1
0
mirror of https://github.com/systemd/systemd synced 2025-10-06 04:04:46 +02:00

Compare commits

..

No commits in common. "467b1c4e4b3ececef629823797bd5ce9823b7ecc" and "2e5e17a5707ad6538d67e4d43088a6eb33f2d852" have entirely different histories.

5 changed files with 98 additions and 114 deletions

View File

@ -2270,51 +2270,20 @@ static int setup_private_users_child(int unshare_ready_fd, const char *uid_map,
return 0; return 0;
} }
static int bpffs_helper(const ExecContext *c, int socket_fd) {
assert(c);
assert(socket_fd >= 0);
_cleanup_close_ int fs_fd = receive_one_fd(socket_fd, /* flags = */ 0);
if (fs_fd < 0)
return log_debug_errno(fs_fd, "Failed to receive file descriptor from parent: %m");
char number[STRLEN("0x") + sizeof(c->bpf_delegate_commands) * 2 + 1];
xsprintf(number, "0x%"PRIx64, c->bpf_delegate_commands);
if (fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_cmds", number, /* aux = */ 0) < 0)
return log_debug_errno(errno, "Failed to FSCONFIG_SET_STRING: %m");
xsprintf(number, "0x%"PRIx64, c->bpf_delegate_maps);
if (fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_maps", number, /* aux = */ 0) < 0)
return log_debug_errno(errno, "Failed to FSCONFIG_SET_STRING: %m");
xsprintf(number, "0x%"PRIx64, c->bpf_delegate_programs);
if (fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_progs", number, /* aux = */ 0) < 0)
return log_debug_errno(errno, "Failed to FSCONFIG_SET_STRING: %m");
xsprintf(number, "0x%"PRIx64, c->bpf_delegate_attachments);
if (fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_attachs", number, /* aux = */ 0) < 0)
return log_debug_errno(errno, "Failed to FSCONFIG_SET_STRING: %m");
if (fsconfig(fs_fd, FSCONFIG_CMD_CREATE, /* key = */ NULL, /* value = */ NULL, /* aux = */ 0) < 0)
return log_debug_errno(errno, "Failed to create bpffs superblock: %m");
return 0;
}
static int bpffs_prepare( static int bpffs_prepare(
const ExecContext *c, const ExecContext *c,
PidRef *ret_pid, PidRef *ret_pid,
int *ret_sock_fd, int *ret_sock_fd,
int *ret_errno_pipe) { int *ret_errno_pipe) {
_cleanup_close_pair_ int socket_fds[2] = EBADF_PAIR, errno_pipe[2] = EBADF_PAIR; _cleanup_close_pair_ int socket_fds[2] = EBADF_PAIR, bpffs_errno_pipe[2] = EBADF_PAIR;
int r; int r;
assert(ret_sock_fd); assert(ret_sock_fd);
assert(ret_pid); assert(ret_pid);
assert(ret_errno_pipe); assert(ret_errno_pipe);
r = pipe2(errno_pipe, O_CLOEXEC|O_NONBLOCK); r = pipe2(bpffs_errno_pipe, O_CLOEXEC|O_NONBLOCK);
if (r < 0) if (r < 0)
return log_debug_errno(errno, "Failed to create pipe: %m"); return log_debug_errno(errno, "Failed to create pipe: %m");
@ -2326,13 +2295,67 @@ static int bpffs_prepare(
if (r < 0) if (r < 0)
return log_debug_errno(r, "Failed to fork bpffs privileged helper: %m"); return log_debug_errno(r, "Failed to fork bpffs privileged helper: %m");
if (r == 0) { if (r == 0) {
errno_pipe[0] = safe_close(errno_pipe[0]); _cleanup_close_ int fs_fd = -EBADF;
char number[STRLEN("0x") + sizeof(c->bpf_delegate_commands) * 2 + 1];
bpffs_errno_pipe[0] = safe_close(bpffs_errno_pipe[0]);
socket_fds[0] = safe_close(socket_fds[0]); socket_fds[0] = safe_close(socket_fds[0]);
report_errno_and_exit(errno_pipe[1], bpffs_helper(c, socket_fds[1]));
fs_fd = receive_one_fd(socket_fds[1], /* flags = */ 0);
if (fs_fd < 0) {
log_debug_errno(fs_fd, "Failed to receive file descriptor from parent: %m");
report_errno_and_exit(bpffs_errno_pipe[1], fs_fd);
}
xsprintf(number, "0x%"PRIx64, c->bpf_delegate_commands);
r = fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_cmds", number, /* aux = */ 0);
if (r < 0) {
log_debug_errno(errno, "Failed to FSCONFIG_SET_STRING: %m");
report_errno_and_exit(bpffs_errno_pipe[1], errno);
}
xsprintf(number, "0x%"PRIx64, c->bpf_delegate_maps);
r = fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_maps", number, /* aux = */ 0);
if (r < 0) {
log_debug_errno(errno, "Failed to FSCONFIG_SET_STRING: %m");
report_errno_and_exit(bpffs_errno_pipe[1], errno);
}
xsprintf(number, "0x%"PRIx64, c->bpf_delegate_programs);
r = fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_progs", number, /* aux = */ 0);
if (r < 0) {
log_debug_errno(errno, "Failed to FSCONFIG_SET_STRING: %m");
report_errno_and_exit(bpffs_errno_pipe[1], errno);
}
xsprintf(number, "0x%"PRIx64, c->bpf_delegate_attachments);
r = fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_attachs", number, /* aux = */ 0);
if (r < 0) {
log_debug_errno(errno, "Failed to FSCONFIG_SET_STRING: %m");
report_errno_and_exit(bpffs_errno_pipe[1], errno);
}
r = fsconfig(fs_fd, FSCONFIG_CMD_CREATE, /* key = */ NULL, /* value = */ NULL, /* aux = */ 0);
if (r < 0) {
log_debug_errno(errno, "Failed to create bpffs superblock: %m");
report_errno_and_exit(bpffs_errno_pipe[1], errno);
}
if (write(socket_fds[1], (uint8_t[1]) {}, 1) < 0) {
log_debug_errno(errno, "Failed to send data to child: %m");
report_errno_and_exit(bpffs_errno_pipe[1], errno);
}
_exit(EXIT_SUCCESS);
} }
*ret_sock_fd = TAKE_FD(socket_fds[0]); *ret_sock_fd = TAKE_FD(socket_fds[0]);
*ret_errno_pipe = TAKE_FD(errno_pipe[0]); *ret_errno_pipe = TAKE_FD(bpffs_errno_pipe[0]);
return 0; return 0;
} }
@ -3668,9 +3691,7 @@ static int apply_mount_namespace(
bool needs_sandboxing, bool needs_sandboxing,
uid_t exec_directory_uid, uid_t exec_directory_uid,
gid_t exec_directory_gid, gid_t exec_directory_gid,
PidRef *bpffs_pidref,
int bpffs_socket_fd, int bpffs_socket_fd,
int bpffs_errno_pipe,
char **reterr_path) { char **reterr_path) {
_cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT; _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
@ -3885,9 +3906,7 @@ static int apply_mount_namespace(
.proc_subset = needs_sandboxing ? context->proc_subset : PROC_SUBSET_ALL, .proc_subset = needs_sandboxing ? context->proc_subset : PROC_SUBSET_ALL,
.private_bpf = needs_sandboxing ? context->private_bpf : PRIVATE_BPF_NO, .private_bpf = needs_sandboxing ? context->private_bpf : PRIVATE_BPF_NO,
.bpffs_pidref = bpffs_pidref,
.bpffs_socket_fd = bpffs_socket_fd, .bpffs_socket_fd = bpffs_socket_fd,
.bpffs_errno_pipe = bpffs_errno_pipe,
}; };
r = setup_namespace(&parameters, reterr_path); r = setup_namespace(&parameters, reterr_path);
@ -4528,9 +4547,7 @@ static int setup_delegated_namespaces(
const ExecCommand *command, const ExecCommand *command,
bool needs_sandboxing, bool needs_sandboxing,
bool have_cap_sys_admin, bool have_cap_sys_admin,
PidRef *bpffs_pidref,
int bpffs_socket_fd, int bpffs_socket_fd,
int bpffs_errno_pipe,
int *reterr_exit_status) { int *reterr_exit_status) {
int r; int r;
@ -4653,9 +4670,7 @@ static int setup_delegated_namespaces(
needs_sandboxing, needs_sandboxing,
uid, uid,
gid, gid,
bpffs_pidref,
bpffs_socket_fd, bpffs_socket_fd,
bpffs_errno_pipe,
&error_path); &error_path);
if (r < 0) { if (r < 0) {
*reterr_exit_status = EXIT_NAMESPACE; *reterr_exit_status = EXIT_NAMESPACE;
@ -5767,9 +5782,7 @@ int exec_invoke(
command, command,
needs_sandboxing, needs_sandboxing,
have_cap_sys_admin, have_cap_sys_admin,
&bpffs_pidref,
bpffs_socket_fd, bpffs_socket_fd,
bpffs_errno_pipe,
exit_status); exit_status);
if (r < 0) if (r < 0)
return r; return r;
@ -5829,15 +5842,29 @@ int exec_invoke(
command, command,
needs_sandboxing, needs_sandboxing,
have_cap_sys_admin, have_cap_sys_admin,
&bpffs_pidref,
bpffs_socket_fd, bpffs_socket_fd,
bpffs_errno_pipe,
exit_status); exit_status);
if (r < 0) if (r < 0)
return r; return r;
/* Kill unnecessary process, for the case that e.g. when the bpffs mount point is hidden. */ if (context->private_bpf != PRIVATE_BPF_NO) {
pidref_done_sigkill_wait(&bpffs_pidref); r = pidref_wait_for_terminate_and_check("(sd-bpffs)", &bpffs_pidref, /* flags = */ 0);
if (r < 0) {
*exit_status = EXIT_BPF;
return r;
}
/* If something strange happened with the child, let's consider this fatal, too */
if (r != EXIT_SUCCESS) {
*exit_status = EXIT_BPF;
ssize_t ss = read(bpffs_errno_pipe, &r, sizeof(r));
if (ss == sizeof(r))
return log_debug_errno(r, "bpffs helper exited with error: %m");
if (ss < 0)
return log_debug_errno(errno, "Failed to read from the bpffs helper errno pipe: %m");
return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Short read from the bpffs helper errno pipe.");
}
pidref_done(&bpffs_pidref);
}
if (needs_sandboxing && exec_needs_cgroup_namespace(context) && params->cgroup_path) { if (needs_sandboxing && exec_needs_cgroup_namespace(context) && params->cgroup_path) {
/* Move ourselves into the subcgroup now *after* we've unshared the cgroup namespace, which /* Move ourselves into the subcgroup now *after* we've unshared the cgroup namespace, which

View File

@ -957,7 +957,6 @@ static int append_private_bpf(
*me = (MountEntry) { *me = (MountEntry) {
.path_const = "/sys/fs/bpf", .path_const = "/sys/fs/bpf",
.mode = MOUNT_BPFFS, .mode = MOUNT_BPFFS,
.ignore = !protect_kernel_tunables, /* indicate whether we should fall back to MOUNT_READ_ONLY on failure. */
}; };
return 0; return 0;
} }
@ -1736,13 +1735,11 @@ static int mount_overlay(const MountEntry *m) {
return 1; return 1;
} }
static int mount_bpffs(const MountEntry *m, PidRef *pidref, int socket_fd, int errno_pipe) { static int mount_bpffs(const MountEntry *m, int socket_fd) {
int r; int r;
assert(m); assert(m);
assert(pidref_is_set(pidref));
assert(socket_fd >= 0); assert(socket_fd >= 0);
assert(errno_pipe >= 0);
_cleanup_close_ int fs_fd = fsopen("bpf", FSOPEN_CLOEXEC); _cleanup_close_ int fs_fd = fsopen("bpf", FSOPEN_CLOEXEC);
if (fs_fd < 0) if (fs_fd < 0)
@ -1752,21 +1749,8 @@ static int mount_bpffs(const MountEntry *m, PidRef *pidref, int socket_fd, int e
if (r < 0) if (r < 0)
return log_debug_errno(r, "Failed to send bpffs fd to child: %m"); return log_debug_errno(r, "Failed to send bpffs fd to child: %m");
r = pidref_wait_for_terminate_and_check("(sd-bpffs)", pidref, /* flags = */ 0); if (read(socket_fd, (uint8_t[1]) {}, 1) < 0)
if (r < 0) return log_debug_errno(errno, "Failed to receive data from child: %m");
return r;
/* If something strange happened with the child, let's consider this fatal, too */
if (r != EXIT_SUCCESS) {
ssize_t ss = read(errno_pipe, &r, sizeof(r));
if (ss < 0)
return log_debug_errno(errno, "Failed to read from the bpffs helper errno pipe: %m");
if (ss != sizeof(r))
return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Short read from the bpffs helper errno pipe.");
return log_debug_errno(r, "bpffs helper exited with error: %m");
}
pidref_done(pidref);
_cleanup_close_ int mnt_fd = fsmount(fs_fd, /* flags = */ 0, /* mount_attrs = */ 0); _cleanup_close_ int mnt_fd = fsmount(fs_fd, /* flags = */ 0, /* mount_attrs = */ 0);
if (mnt_fd < 0) if (mnt_fd < 0)
@ -1833,23 +1817,6 @@ static int apply_one_mount(
log_debug("Applying namespace mount on %s", mount_entry_path(m)); log_debug("Applying namespace mount on %s", mount_entry_path(m));
if (m->mode == MOUNT_BPFFS) {
r = mount_bpffs(m, p->bpffs_pidref, p->bpffs_socket_fd, p->bpffs_errno_pipe);
if (r >= 0 ||
(!ERRNO_IS_NEG_NOT_SUPPORTED(r) && /* old kernel? */
!ERRNO_IS_NEG_PRIVILEGE(r))) /* ubuntu kernel bug? See issue #38225 */
return r;
if (m->ignore) {
log_debug_errno(r, "Failed to mount new bpffs instance, ignoring: %m");
return 0;
}
log_debug_errno(r, "Failed to mount new bpffs instance, fallback to making %s read-only, ignoring: %m", mount_entry_path(m));
m->mode = MOUNT_READ_ONLY;
m->ignore = true;
}
switch (m->mode) { switch (m->mode) {
case MOUNT_INACCESSIBLE: { case MOUNT_INACCESSIBLE: {
@ -2052,6 +2019,9 @@ static int apply_one_mount(
case MOUNT_OVERLAY: case MOUNT_OVERLAY:
return mount_overlay(m); return mount_overlay(m);
case MOUNT_BPFFS:
return mount_bpffs(m, p->bpffs_socket_fd);
default: default:
assert_not_reached(); assert_not_reached();
} }

View File

@ -200,9 +200,7 @@ typedef struct NamespaceParameters {
PrivateTmp private_var_tmp; PrivateTmp private_var_tmp;
PrivatePIDs private_pids; PrivatePIDs private_pids;
PidRef *bpffs_pidref;
int bpffs_socket_fd; int bpffs_socket_fd;
int bpffs_errno_pipe;
} NamespaceParameters; } NamespaceParameters;
int setup_namespace(const NamespaceParameters *p, char **reterr_path); int setup_namespace(const NamespaceParameters *p, char **reterr_path);

View File

@ -4,22 +4,25 @@
#include <fcntl.h> #include <fcntl.h>
#include "fd-util.h" #include "fd-util.h"
#include "main-func.h"
#include "tests.h" #include "tests.h"
static int intro(void) { static int run(int argc, char *argv[]) {
#if __LIBBPF_CURRENT_VERSION_GEQ(1, 5) #if __LIBBPF_CURRENT_VERSION_GEQ(1, 5)
_cleanup_close_ int bpffs_fd = open("/sys/fs/bpf", O_RDONLY); _cleanup_close_ int bpffs_fd = -EBADF, token_fd = -EBADF;
bpffs_fd = open("/sys/fs/bpf", O_RDONLY);
if (bpffs_fd < 0) if (bpffs_fd < 0)
return log_error_errno(errno, "Failed to open '/sys/fs/bpf': %m"); return -errno;
_cleanup_close_ int token_fd = bpf_token_create(bpffs_fd, /* opts = */ NULL); token_fd = bpf_token_create(bpffs_fd, /* opts = */ NULL);
if (token_fd < 0) if (token_fd < 0)
return log_error_errno(errno, "Failed to create bpf token: %m"); return -errno;
return EXIT_SUCCESS; return 0;
#else #else
return log_tests_skipped("libbpf is older than v1.5"); exit(77);
#endif #endif
} }
DEFINE_TEST_MAIN_WITH_INTRO(LOG_DEBUG, intro); DEFINE_MAIN_FUNCTION(run);

View File

@ -13,26 +13,12 @@ systemd-run --wait \
grep -q '/sys/fs/bpf .* ro,' /proc/mounts grep -q '/sys/fs/bpf .* ro,' /proc/mounts
# Check that with PrivateBPF=yes, a new bpffs instance is mounted # Check that with PrivateBPF=yes, a new bpffs instance is mounted
if ! systemd-run --wait \ systemd-run --wait \
-p PrivateUsers=yes \ -p PrivateUsers=yes \
-p PrivateMounts=yes \ -p PrivateMounts=yes \
-p DelegateNamespaces=mnt \ -p DelegateNamespaces=mnt \
-p PrivateBPF=yes \ -p PrivateBPF=yes \
grep -q '^none /sys/fs/bpf bpf rw' /proc/mounts; then grep -q '^none /sys/fs/bpf bpf rw' /proc/mounts
# If it does not work, maybe the kernel is old or the system has buggy ubuntu kernel.
# Let's check if PrivateBPF=yes is ignored gracefully in that case.
systemd-run --wait \
-p PrivateUsers=yes \
-p PrivateMounts=yes \
-p DelegateNamespaces=mnt \
-p ProtectKernelTunables=yes \
-p PrivateBPF=yes \
grep -q '/sys/fs/bpf .* ro,' /proc/mounts
# Skip all remaining tests.
exit 0
fi
# Check that when specifying the delegate arguments, the mount options are set properly # Check that when specifying the delegate arguments, the mount options are set properly
check_mount_opts() { check_mount_opts() {
@ -77,9 +63,9 @@ systemd-run --wait \
/usr/lib/systemd/tests/unit-tests/manual/test-bpf-token /usr/lib/systemd/tests/unit-tests/manual/test-bpf-token
# Check that without the delegates, the helper aborts trying to get a token # Check that without the delegates, the helper aborts trying to get a token
(! systemd-run --wait \ ! systemd-run --wait \
-p PrivateUsers=yes \ -p PrivateUsers=yes \
-p PrivateMounts=yes \ -p PrivateMounts=yes \
-p DelegateNamespaces=mnt \ -p DelegateNamespaces=mnt \
-p PrivateBPF=yes \ -p PrivateBPF=yes \
/usr/lib/systemd/tests/unit-tests/manual/test-bpf-token) /usr/lib/systemd/tests/unit-tests/manual/test-bpf-token