mirror of
https://github.com/systemd/systemd
synced 2026-03-29 03:04:52 +02:00
Compare commits
11 Commits
2e016f3a0b
...
964ccab828
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
964ccab828 | ||
|
|
8e9b3bcf12 | ||
|
|
6ca3d087e2 | ||
|
|
1924f26d2a | ||
|
|
528dd6a423 | ||
|
|
e72be068b1 | ||
|
|
c5f26a0250 | ||
|
|
83f0ec7978 | ||
|
|
e975a94559 | ||
|
|
08bf703cc1 | ||
|
|
f833df3848 |
@ -18,6 +18,7 @@ charset = utf-8
|
||||
[*.{c,h}]
|
||||
indent_style = space
|
||||
indent_size = 8
|
||||
max_line_length = 109
|
||||
|
||||
[*.sh]
|
||||
indent_style = space
|
||||
@ -30,3 +31,4 @@ indent_size = 8
|
||||
[man/*.xml]
|
||||
indent_size = 2
|
||||
indent_style = space
|
||||
max_line_length = 109
|
||||
|
||||
@ -191,10 +191,10 @@ evdev:name:Elan Touchpad:dmi:*:svnASUSTeKCOMPUTERINC.:pnUX305UA:*
|
||||
|
||||
# Asus UX362FA
|
||||
evdev:name:ELAN1401:00 04F3:30DC Touchpad:dmi:*:svnASUSTeKCOMPUTERINC.:pnZenBookUX362FA_UX362FA:*
|
||||
EVDEV_ABS_00=:::16
|
||||
EVDEV_ABS_01=:::16
|
||||
EVDEV_ABS_35=:::16
|
||||
EVDEV_ABS_36=:::16
|
||||
EVDEV_ABS_00=:::8
|
||||
EVDEV_ABS_01=:::8
|
||||
EVDEV_ABS_35=:::8
|
||||
EVDEV_ABS_36=:::8
|
||||
|
||||
#########################################
|
||||
# Bangho
|
||||
|
||||
@ -157,6 +157,19 @@ static bool filename_possibly_with_slash_suffix(const char *s) {
|
||||
return filename_is_valid(copied);
|
||||
}
|
||||
|
||||
static bool is_name_to_handle_at_fatal_error(int err) {
|
||||
/* name_to_handle_at() can return "acceptable" errors that are due to the context. For
|
||||
* example the kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall
|
||||
* was blocked (EACCES/EPERM; maybe through seccomp, because we are running inside of a
|
||||
* container), or the mount point is not triggered yet (EOVERFLOW, think nfs4), or some
|
||||
* general name_to_handle_at() flakiness (EINVAL). However other errors are not supposed to
|
||||
* happen and therefore are considered fatal ones. */
|
||||
|
||||
assert(err < 0);
|
||||
|
||||
return !IN_SET(err, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL);
|
||||
}
|
||||
|
||||
int fd_is_mount_point(int fd, const char *filename, int flags) {
|
||||
_cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
|
||||
int mount_id = -1, mount_id_parent = -1;
|
||||
@ -206,39 +219,40 @@ int fd_is_mount_point(int fd, const char *filename, int flags) {
|
||||
return false; /* symlinks are never mount points */
|
||||
|
||||
r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
|
||||
if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
|
||||
/* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
|
||||
* (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
|
||||
* point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
|
||||
* (EINVAL): fall back to simpler logic. */
|
||||
goto fallback_fdinfo;
|
||||
else if (r == -EOPNOTSUPP)
|
||||
/* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
|
||||
* supports it (in which case it is a mount point), otherwise fall back to the traditional stat()
|
||||
* logic */
|
||||
nosupp = true;
|
||||
else if (r < 0)
|
||||
if (r < 0) {
|
||||
if (is_name_to_handle_at_fatal_error(r))
|
||||
return r;
|
||||
if (r != -EOPNOTSUPP)
|
||||
goto fallback_fdinfo;
|
||||
|
||||
/* This kernel or file system does not support name_to_handle_at(), hence let's see
|
||||
* if the upper fs supports it (in which case it is a mount point), otherwise fall
|
||||
* back to the traditional stat() logic */
|
||||
nosupp = true;
|
||||
}
|
||||
|
||||
r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
|
||||
if (r == -EOPNOTSUPP) {
|
||||
if (nosupp)
|
||||
/* Neither parent nor child do name_to_handle_at()? We have no choice but to fall back. */
|
||||
goto fallback_fdinfo;
|
||||
else
|
||||
/* The parent can't do name_to_handle_at() but the directory we are interested in can? If so,
|
||||
* it must be a mount point. */
|
||||
return 1;
|
||||
} else if (r < 0)
|
||||
if (r < 0) {
|
||||
if (is_name_to_handle_at_fatal_error(r))
|
||||
return r;
|
||||
if (r != -EOPNOTSUPP)
|
||||
goto fallback_fdinfo;
|
||||
if (nosupp)
|
||||
/* Both the parent and the directory can't do name_to_handle_at() */
|
||||
goto fallback_fdinfo;
|
||||
|
||||
/* The parent can do name_to_handle_at() but the directory we are interested in can't? If so, it must
|
||||
* be a mount point. */
|
||||
/* The parent can't do name_to_handle_at() but the directory we are
|
||||
* interested in can? If so, it must be a mount point. */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* The parent can do name_to_handle_at() but the directory we are interested in can't? If
|
||||
* so, it must be a mount point. */
|
||||
if (nosupp)
|
||||
return 1;
|
||||
|
||||
/* If the file handle for the directory we are interested in and its parent are identical, we assume
|
||||
* this is the root directory, which is a mount point. */
|
||||
/* If the file handle for the directory we are interested in and its parent are identical,
|
||||
* we assume this is the root directory, which is a mount point. */
|
||||
|
||||
if (h->handle_bytes == h_parent->handle_bytes &&
|
||||
h->handle_type == h_parent->handle_type &&
|
||||
@ -338,10 +352,10 @@ int path_get_mnt_id(const char *path, int *ret) {
|
||||
}
|
||||
|
||||
r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
|
||||
if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
|
||||
return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
|
||||
|
||||
if (r == 0 || is_name_to_handle_at_fatal_error(r))
|
||||
return r;
|
||||
|
||||
return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
|
||||
}
|
||||
|
||||
bool fstype_is_network(const char *fstype) {
|
||||
|
||||
@ -1789,6 +1789,10 @@ int seccomp_restrict_archs(Set *archs) {
|
||||
for (unsigned i = 0; seccomp_local_archs[i] != SECCOMP_LOCAL_ARCH_END; ++i) {
|
||||
uint32_t arch = seccomp_local_archs[i];
|
||||
|
||||
/* See above comment, our "native" architecture is never blocked. */
|
||||
if (arch == seccomp_arch_native())
|
||||
continue;
|
||||
|
||||
/* That architecture might have already been blocked by a previous call to seccomp_restrict_archs. */
|
||||
if (arch == SECCOMP_LOCAL_ARCH_BLOCKED)
|
||||
continue;
|
||||
|
||||
@ -713,8 +713,10 @@ static void test_exec_systemcallfilter(Manager *m) {
|
||||
|
||||
test(m, "exec-systemcallfilter-not-failing.service", 0, CLD_EXITED);
|
||||
test(m, "exec-systemcallfilter-not-failing2.service", 0, CLD_EXITED);
|
||||
test(m, "exec-systemcallfilter-not-failing3.service", 0, CLD_EXITED);
|
||||
test(m, "exec-systemcallfilter-failing.service", SIGSYS, CLD_KILLED);
|
||||
test(m, "exec-systemcallfilter-failing2.service", SIGSYS, CLD_KILLED);
|
||||
test(m, "exec-systemcallfilter-failing3.service", SIGSYS, CLD_KILLED);
|
||||
|
||||
r = find_executable("python3", NULL);
|
||||
if (r < 0) {
|
||||
|
||||
@ -890,6 +890,66 @@ static void test_load_syscall_filter_set_raw(void) {
|
||||
assert_se(wait_for_terminate_and_check("syscallrawseccomp", pid, WAIT_LOG) == EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
static void test_native_syscalls_filtered(void) {
|
||||
pid_t pid;
|
||||
|
||||
log_info("/* %s */", __func__);
|
||||
|
||||
if (!is_seccomp_available()) {
|
||||
log_notice("Seccomp not available, skipping %s", __func__);
|
||||
return;
|
||||
}
|
||||
if (!have_seccomp_privs()) {
|
||||
log_notice("Not privileged, skipping %s", __func__);
|
||||
return;
|
||||
}
|
||||
|
||||
pid = fork();
|
||||
assert_se(pid >= 0);
|
||||
|
||||
if (pid == 0) {
|
||||
_cleanup_set_free_ Set *arch_s = NULL;
|
||||
_cleanup_hashmap_free_ Hashmap *s = NULL;
|
||||
|
||||
/* Passing "native" or an empty set is equivalent, just do both here. */
|
||||
assert_se(arch_s = set_new(NULL));
|
||||
assert_se(seccomp_restrict_archs(arch_s) >= 0);
|
||||
assert_se(set_put(arch_s, SCMP_ARCH_NATIVE) >= 0);
|
||||
assert_se(seccomp_restrict_archs(arch_s) >= 0);
|
||||
|
||||
assert_se(access("/", F_OK) >= 0);
|
||||
assert_se(poll(NULL, 0, 0) == 0);
|
||||
|
||||
assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW, NULL, scmp_act_kill_process(), true) >= 0);
|
||||
assert_se(access("/", F_OK) >= 0);
|
||||
assert_se(poll(NULL, 0, 0) == 0);
|
||||
|
||||
assert_se(s = hashmap_new(NULL));
|
||||
#if defined __NR_access && __NR_access >= 0
|
||||
assert_se(hashmap_put(s, UINT32_TO_PTR(__NR_access + 1), INT_TO_PTR(-1)) >= 0);
|
||||
log_debug("has access()");
|
||||
#endif
|
||||
#if defined __NR_faccessat && __NR_faccessat >= 0
|
||||
assert_se(hashmap_put(s, UINT32_TO_PTR(__NR_faccessat + 1), INT_TO_PTR(-1)) >= 0);
|
||||
log_debug("has faccessat()");
|
||||
#endif
|
||||
#if defined __NR_faccessat2 && __NR_faccessat2 >= 0
|
||||
assert_se(hashmap_put(s, UINT32_TO_PTR(__NR_faccessat2 + 1), INT_TO_PTR(-1)) >= 0);
|
||||
log_debug("has faccessat2()");
|
||||
#endif
|
||||
|
||||
assert_se(!hashmap_isempty(s));
|
||||
assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW, s, SCMP_ACT_ERRNO(EUCLEAN), true) >= 0);
|
||||
|
||||
assert_se(access("/", F_OK) < 0);
|
||||
assert_se(errno == EUCLEAN);
|
||||
|
||||
_exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
assert_se(wait_for_terminate_and_check("nativeseccomp", pid, WAIT_LOG) == EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
static void test_lock_personality(void) {
|
||||
unsigned long current;
|
||||
pid_t pid;
|
||||
@ -1171,6 +1231,7 @@ int main(int argc, char *argv[]) {
|
||||
test_memory_deny_write_execute_shmat();
|
||||
test_restrict_archs();
|
||||
test_load_syscall_filter_set_raw();
|
||||
test_native_syscalls_filtered();
|
||||
test_lock_personality();
|
||||
test_restrict_suid_sgid();
|
||||
|
||||
|
||||
@ -635,7 +635,7 @@ Name={}
|
||||
[Network]
|
||||
DHCP=ipv4
|
||||
IPv6AcceptRA=False
|
||||
DNSSECNegativeTrustAnchors=megasearch.net
|
||||
DNSSECNegativeTrustAnchors=search.example.com
|
||||
'''.format(self.iface))
|
||||
|
||||
# create second device/dnsmasq for a .company/.lab VPN interface
|
||||
@ -681,8 +681,8 @@ DNSSECNegativeTrustAnchors=company lab
|
||||
self.assertIn(b'kettle.cantina.company: 10.241.4.4', out)
|
||||
|
||||
# test general domains
|
||||
out = subprocess.check_output(['resolvectl', 'query', 'megasearch.net'])
|
||||
self.assertIn(b'megasearch.net: 192.168.42.1', out)
|
||||
out = subprocess.check_output(['resolvectl', 'query', 'search.example.com'])
|
||||
self.assertIn(b'search.example.com: 192.168.42.1', out)
|
||||
|
||||
with open(self.dnsmasq_log) as f:
|
||||
general_log = f.read()
|
||||
@ -696,8 +696,8 @@ DNSSECNegativeTrustAnchors=company lab
|
||||
self.assertNotIn('.company', general_log)
|
||||
|
||||
# general domains should not be sent to the VPN DNS
|
||||
self.assertRegex(general_log, 'query.*megasearch.net')
|
||||
self.assertNotIn('megasearch.net', vpn_log)
|
||||
self.assertRegex(general_log, 'query.*search.example.com')
|
||||
self.assertNotIn('search.example.com', vpn_log)
|
||||
|
||||
def test_resolved_etc_hosts(self):
|
||||
'''resolved queries to /etc/hosts'''
|
||||
|
||||
9
test/test-execute/exec-systemcallfilter-failing3.service
Normal file
9
test/test-execute/exec-systemcallfilter-failing3.service
Normal file
@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Test for SystemCallFilter
|
||||
|
||||
[Service]
|
||||
ExecStart=/bin/sh -c '/bin/echo "This should not be seen"'
|
||||
Type=oneshot
|
||||
LimitCORE=0
|
||||
SystemCallArchitectures=native
|
||||
SystemCallFilter=~write open execve fexecve execveat exit_group close mmap munmap fstat DONOTEXIST
|
||||
@ -0,0 +1,8 @@
|
||||
[Unit]
|
||||
Description=Test for SystemCallFilter
|
||||
|
||||
[Service]
|
||||
ExecStart=/bin/sh -c 'echo "Foo bar"'
|
||||
Type=oneshot
|
||||
SystemCallArchitectures=native
|
||||
SystemCallFilter=
|
||||
@ -4,6 +4,13 @@
|
||||
set -eux
|
||||
set -o pipefail
|
||||
|
||||
ARGS=()
|
||||
if [[ -v ASAN_OPTIONS || -v UBSAN_OPTIONS ]]; then
|
||||
# If we're running under sanitizers, we need to use a less restrictive
|
||||
# profile, otherwise LSan syscall would get blocked by seccomp
|
||||
ARGS+=(--profile=trusted)
|
||||
fi
|
||||
|
||||
export SYSTEMD_LOG_LEVEL=debug
|
||||
mkdir -p /run/systemd/system/systemd-portabled.service.d/
|
||||
cat <<EOF >/run/systemd/system/systemd-portabled.service.d/override.conf
|
||||
@ -11,7 +18,7 @@ cat <<EOF >/run/systemd/system/systemd-portabled.service.d/override.conf
|
||||
Environment=SYSTEMD_LOG_LEVEL=debug
|
||||
EOF
|
||||
|
||||
portablectl attach --now --runtime /usr/share/minimal_0.raw app0
|
||||
portablectl "${ARGS[@]}" attach --now --runtime /usr/share/minimal_0.raw app0
|
||||
|
||||
systemctl is-active app0.service
|
||||
systemctl is-active app0-foo.service
|
||||
@ -21,7 +28,7 @@ systemctl is-active app0-bar.service && exit 1
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
portablectl reattach --now --runtime /usr/share/minimal_1.raw app0
|
||||
portablectl "${ARGS[@]}" reattach --now --runtime /usr/share/minimal_1.raw app0
|
||||
|
||||
systemctl is-active app0.service
|
||||
systemctl is-active app0-bar.service
|
||||
@ -42,7 +49,7 @@ portablectl list | grep -q -F "No images."
|
||||
unsquashfs -dest /tmp/minimal_0 /usr/share/minimal_0.raw
|
||||
unsquashfs -dest /tmp/minimal_1 /usr/share/minimal_1.raw
|
||||
|
||||
portablectl attach --copy=symlink --now --runtime /tmp/minimal_0 app0
|
||||
portablectl "${ARGS[@]}" attach --copy=symlink --now --runtime /tmp/minimal_0 app0
|
||||
|
||||
systemctl is-active app0.service
|
||||
systemctl is-active app0-foo.service
|
||||
@ -52,7 +59,7 @@ systemctl is-active app0-bar.service && exit 1
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
portablectl reattach --now --enable --runtime /tmp/minimal_1 app0
|
||||
portablectl "${ARGS[@]}" reattach --now --enable --runtime /tmp/minimal_1 app0
|
||||
|
||||
systemctl is-active app0.service
|
||||
systemctl is-active app0-bar.service
|
||||
@ -68,21 +75,21 @@ portablectl detach --now --enable --runtime /tmp/minimal_1 app0
|
||||
|
||||
portablectl list | grep -q -F "No images."
|
||||
|
||||
portablectl attach --now --runtime --extension /usr/share/app0.raw /usr/share/minimal_0.raw app0
|
||||
portablectl "${ARGS[@]}" attach --now --runtime --extension /usr/share/app0.raw /usr/share/minimal_0.raw app0
|
||||
|
||||
systemctl is-active app0.service
|
||||
|
||||
portablectl reattach --now --runtime --extension /usr/share/app0.raw /usr/share/minimal_1.raw app0
|
||||
portablectl "${ARGS[@]}" reattach --now --runtime --extension /usr/share/app0.raw /usr/share/minimal_1.raw app0
|
||||
|
||||
systemctl is-active app0.service
|
||||
|
||||
portablectl detach --now --runtime --extension /usr/share/app0.raw /usr/share/minimal_1.raw app0
|
||||
|
||||
portablectl attach --now --runtime --extension /usr/share/app1.raw /usr/share/minimal_0.raw app1
|
||||
portablectl "${ARGS[@]}" attach --now --runtime --extension /usr/share/app1.raw /usr/share/minimal_0.raw app1
|
||||
|
||||
systemctl is-active app1.service
|
||||
|
||||
portablectl reattach --now --runtime --extension /usr/share/app1.raw /usr/share/minimal_1.raw app1
|
||||
portablectl "${ARGS[@]}" reattach --now --runtime --extension /usr/share/app1.raw /usr/share/minimal_1.raw app1
|
||||
|
||||
systemctl is-active app1.service
|
||||
|
||||
@ -95,7 +102,7 @@ mount /usr/share/app1.raw /tmp/app1
|
||||
mount /usr/share/minimal_0.raw /tmp/rootdir
|
||||
mount -t overlay overlay -o lowerdir=/tmp/app1:/tmp/rootdir /tmp/overlay
|
||||
|
||||
portablectl attach --copy=symlink --now --runtime /tmp/overlay app1
|
||||
portablectl "${ARGS[@]}" attach --copy=symlink --now --runtime /tmp/overlay app1
|
||||
|
||||
systemctl is-active app1.service
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user