1
0
mirror of https://github.com/systemd/systemd synced 2026-04-25 16:34:50 +02:00

Compare commits

...

3 Commits

Author SHA1 Message Date
Luca Boccassi
7d40544643
Merge pull request #23126 from keszybz/clone3-prohibit
Prohibit clone3() when RestrictNamespaces is used
2022-04-20 11:27:58 +02:00
Zbigniew Jędrzejewski-Szmek
30193fe817 manager: prohibit clone3() in seccomp filters
RestrictNamespaces should block clone3() like flatpak:
a10f52a756

clone3() passes arguments in a structure referenced by a pointer, so we can't
filter on the flags as with clone(). Let's disallow the whole function call.
2022-04-19 22:04:31 +02:00
Zbigniew Jędrzejewski-Szmek
c5503601e2 shared/seccomp: add note about clone2() being unimportant
In case anyone else starts wondering whether it should be listed
as I did…
2022-04-19 11:56:23 +02:00

View File

@ -718,6 +718,9 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
.value =
"capget\0" /* Able to query arbitrary processes */
"clone\0"
/* ia64 as the only architecture has clone2, a replacement for clone, but ia64 doesn't
* implement seccomp, so we don't need to list it at all. C.f.
* acce2f71779c54086962fefce3833d886c655f62 in the kernel. */
"clone3\0"
"execveat\0"
"fork\0"
@ -1227,6 +1230,21 @@ int seccomp_restrict_namespaces(unsigned long retain) {
if (r < 0)
return r;
/* We cannot filter on individual flags to clone3(), and we need to disable the
* syscall altogether. ENOSYS is used instead of EPERM, so that glibc and other
* users shall fall back to clone(), as if on an older kernel.
*
* C.f. https://github.com/flatpak/flatpak/commit/a10f52a7565c549612c92b8e736a6698a53db330,
* https://github.com/moby/moby/issues/42680. */
r = seccomp_rule_add_exact(
seccomp,
SCMP_ACT_ERRNO(ENOSYS),
SCMP_SYS(clone3),
0);
if (r < 0)
log_debug_errno(r, "Failed to add clone3() rule for architecture %s, ignoring: %m", seccomp_arch_to_string(arch));
if ((retain & NAMESPACE_FLAGS_ALL) == 0)
/* If every single kind of namespace shall be prohibited, then let's block the whole setns() syscall
* altogether. */