mirror of
https://github.com/systemd/systemd
synced 2025-10-05 19:54:46 +02:00
Compare commits
7 Commits
2e5e17a570
...
467b1c4e4b
Author | SHA1 | Date | |
---|---|---|---|
![]() |
467b1c4e4b | ||
![]() |
a673826003 | ||
![]() |
8abdceac77 | ||
![]() |
f1eed4e592 | ||
![]() |
0e8e655c52 | ||
![]() |
9eabf82750 | ||
![]() |
8509ceea10 |
@ -2270,20 +2270,51 @@ static int setup_private_users_child(int unshare_ready_fd, const char *uid_map,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpffs_helper(const ExecContext *c, int socket_fd) {
|
||||
assert(c);
|
||||
assert(socket_fd >= 0);
|
||||
|
||||
_cleanup_close_ int fs_fd = receive_one_fd(socket_fd, /* flags = */ 0);
|
||||
if (fs_fd < 0)
|
||||
return log_debug_errno(fs_fd, "Failed to receive file descriptor from parent: %m");
|
||||
|
||||
char number[STRLEN("0x") + sizeof(c->bpf_delegate_commands) * 2 + 1];
|
||||
xsprintf(number, "0x%"PRIx64, c->bpf_delegate_commands);
|
||||
if (fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_cmds", number, /* aux = */ 0) < 0)
|
||||
return log_debug_errno(errno, "Failed to FSCONFIG_SET_STRING: %m");
|
||||
|
||||
xsprintf(number, "0x%"PRIx64, c->bpf_delegate_maps);
|
||||
if (fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_maps", number, /* aux = */ 0) < 0)
|
||||
return log_debug_errno(errno, "Failed to FSCONFIG_SET_STRING: %m");
|
||||
|
||||
xsprintf(number, "0x%"PRIx64, c->bpf_delegate_programs);
|
||||
if (fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_progs", number, /* aux = */ 0) < 0)
|
||||
return log_debug_errno(errno, "Failed to FSCONFIG_SET_STRING: %m");
|
||||
|
||||
xsprintf(number, "0x%"PRIx64, c->bpf_delegate_attachments);
|
||||
if (fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_attachs", number, /* aux = */ 0) < 0)
|
||||
return log_debug_errno(errno, "Failed to FSCONFIG_SET_STRING: %m");
|
||||
|
||||
if (fsconfig(fs_fd, FSCONFIG_CMD_CREATE, /* key = */ NULL, /* value = */ NULL, /* aux = */ 0) < 0)
|
||||
return log_debug_errno(errno, "Failed to create bpffs superblock: %m");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpffs_prepare(
|
||||
const ExecContext *c,
|
||||
PidRef *ret_pid,
|
||||
int *ret_sock_fd,
|
||||
int *ret_errno_pipe) {
|
||||
|
||||
_cleanup_close_pair_ int socket_fds[2] = EBADF_PAIR, bpffs_errno_pipe[2] = EBADF_PAIR;
|
||||
_cleanup_close_pair_ int socket_fds[2] = EBADF_PAIR, errno_pipe[2] = EBADF_PAIR;
|
||||
int r;
|
||||
|
||||
assert(ret_sock_fd);
|
||||
assert(ret_pid);
|
||||
assert(ret_errno_pipe);
|
||||
|
||||
r = pipe2(bpffs_errno_pipe, O_CLOEXEC|O_NONBLOCK);
|
||||
r = pipe2(errno_pipe, O_CLOEXEC|O_NONBLOCK);
|
||||
if (r < 0)
|
||||
return log_debug_errno(errno, "Failed to create pipe: %m");
|
||||
|
||||
@ -2295,67 +2326,13 @@ static int bpffs_prepare(
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to fork bpffs privileged helper: %m");
|
||||
if (r == 0) {
|
||||
_cleanup_close_ int fs_fd = -EBADF;
|
||||
char number[STRLEN("0x") + sizeof(c->bpf_delegate_commands) * 2 + 1];
|
||||
|
||||
bpffs_errno_pipe[0] = safe_close(bpffs_errno_pipe[0]);
|
||||
errno_pipe[0] = safe_close(errno_pipe[0]);
|
||||
socket_fds[0] = safe_close(socket_fds[0]);
|
||||
|
||||
fs_fd = receive_one_fd(socket_fds[1], /* flags = */ 0);
|
||||
if (fs_fd < 0) {
|
||||
log_debug_errno(fs_fd, "Failed to receive file descriptor from parent: %m");
|
||||
report_errno_and_exit(bpffs_errno_pipe[1], fs_fd);
|
||||
}
|
||||
|
||||
xsprintf(number, "0x%"PRIx64, c->bpf_delegate_commands);
|
||||
|
||||
r = fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_cmds", number, /* aux = */ 0);
|
||||
if (r < 0) {
|
||||
log_debug_errno(errno, "Failed to FSCONFIG_SET_STRING: %m");
|
||||
report_errno_and_exit(bpffs_errno_pipe[1], errno);
|
||||
}
|
||||
|
||||
xsprintf(number, "0x%"PRIx64, c->bpf_delegate_maps);
|
||||
|
||||
r = fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_maps", number, /* aux = */ 0);
|
||||
if (r < 0) {
|
||||
log_debug_errno(errno, "Failed to FSCONFIG_SET_STRING: %m");
|
||||
report_errno_and_exit(bpffs_errno_pipe[1], errno);
|
||||
}
|
||||
|
||||
xsprintf(number, "0x%"PRIx64, c->bpf_delegate_programs);
|
||||
|
||||
r = fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_progs", number, /* aux = */ 0);
|
||||
if (r < 0) {
|
||||
log_debug_errno(errno, "Failed to FSCONFIG_SET_STRING: %m");
|
||||
report_errno_and_exit(bpffs_errno_pipe[1], errno);
|
||||
}
|
||||
|
||||
xsprintf(number, "0x%"PRIx64, c->bpf_delegate_attachments);
|
||||
|
||||
r = fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_attachs", number, /* aux = */ 0);
|
||||
if (r < 0) {
|
||||
log_debug_errno(errno, "Failed to FSCONFIG_SET_STRING: %m");
|
||||
report_errno_and_exit(bpffs_errno_pipe[1], errno);
|
||||
}
|
||||
|
||||
r = fsconfig(fs_fd, FSCONFIG_CMD_CREATE, /* key = */ NULL, /* value = */ NULL, /* aux = */ 0);
|
||||
if (r < 0) {
|
||||
log_debug_errno(errno, "Failed to create bpffs superblock: %m");
|
||||
report_errno_and_exit(bpffs_errno_pipe[1], errno);
|
||||
}
|
||||
|
||||
if (write(socket_fds[1], (uint8_t[1]) {}, 1) < 0) {
|
||||
log_debug_errno(errno, "Failed to send data to child: %m");
|
||||
report_errno_and_exit(bpffs_errno_pipe[1], errno);
|
||||
}
|
||||
|
||||
_exit(EXIT_SUCCESS);
|
||||
report_errno_and_exit(errno_pipe[1], bpffs_helper(c, socket_fds[1]));
|
||||
}
|
||||
|
||||
*ret_sock_fd = TAKE_FD(socket_fds[0]);
|
||||
*ret_errno_pipe = TAKE_FD(bpffs_errno_pipe[0]);
|
||||
|
||||
*ret_errno_pipe = TAKE_FD(errno_pipe[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -3691,7 +3668,9 @@ static int apply_mount_namespace(
|
||||
bool needs_sandboxing,
|
||||
uid_t exec_directory_uid,
|
||||
gid_t exec_directory_gid,
|
||||
PidRef *bpffs_pidref,
|
||||
int bpffs_socket_fd,
|
||||
int bpffs_errno_pipe,
|
||||
char **reterr_path) {
|
||||
|
||||
_cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
|
||||
@ -3906,7 +3885,9 @@ static int apply_mount_namespace(
|
||||
.proc_subset = needs_sandboxing ? context->proc_subset : PROC_SUBSET_ALL,
|
||||
.private_bpf = needs_sandboxing ? context->private_bpf : PRIVATE_BPF_NO,
|
||||
|
||||
.bpffs_pidref = bpffs_pidref,
|
||||
.bpffs_socket_fd = bpffs_socket_fd,
|
||||
.bpffs_errno_pipe = bpffs_errno_pipe,
|
||||
};
|
||||
|
||||
r = setup_namespace(¶meters, reterr_path);
|
||||
@ -4547,7 +4528,9 @@ static int setup_delegated_namespaces(
|
||||
const ExecCommand *command,
|
||||
bool needs_sandboxing,
|
||||
bool have_cap_sys_admin,
|
||||
PidRef *bpffs_pidref,
|
||||
int bpffs_socket_fd,
|
||||
int bpffs_errno_pipe,
|
||||
int *reterr_exit_status) {
|
||||
|
||||
int r;
|
||||
@ -4670,7 +4653,9 @@ static int setup_delegated_namespaces(
|
||||
needs_sandboxing,
|
||||
uid,
|
||||
gid,
|
||||
bpffs_pidref,
|
||||
bpffs_socket_fd,
|
||||
bpffs_errno_pipe,
|
||||
&error_path);
|
||||
if (r < 0) {
|
||||
*reterr_exit_status = EXIT_NAMESPACE;
|
||||
@ -5782,7 +5767,9 @@ int exec_invoke(
|
||||
command,
|
||||
needs_sandboxing,
|
||||
have_cap_sys_admin,
|
||||
&bpffs_pidref,
|
||||
bpffs_socket_fd,
|
||||
bpffs_errno_pipe,
|
||||
exit_status);
|
||||
if (r < 0)
|
||||
return r;
|
||||
@ -5842,29 +5829,15 @@ int exec_invoke(
|
||||
command,
|
||||
needs_sandboxing,
|
||||
have_cap_sys_admin,
|
||||
&bpffs_pidref,
|
||||
bpffs_socket_fd,
|
||||
bpffs_errno_pipe,
|
||||
exit_status);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (context->private_bpf != PRIVATE_BPF_NO) {
|
||||
r = pidref_wait_for_terminate_and_check("(sd-bpffs)", &bpffs_pidref, /* flags = */ 0);
|
||||
if (r < 0) {
|
||||
*exit_status = EXIT_BPF;
|
||||
return r;
|
||||
}
|
||||
/* If something strange happened with the child, let's consider this fatal, too */
|
||||
if (r != EXIT_SUCCESS) {
|
||||
*exit_status = EXIT_BPF;
|
||||
ssize_t ss = read(bpffs_errno_pipe, &r, sizeof(r));
|
||||
if (ss == sizeof(r))
|
||||
return log_debug_errno(r, "bpffs helper exited with error: %m");
|
||||
if (ss < 0)
|
||||
return log_debug_errno(errno, "Failed to read from the bpffs helper errno pipe: %m");
|
||||
return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Short read from the bpffs helper errno pipe.");
|
||||
}
|
||||
pidref_done(&bpffs_pidref);
|
||||
}
|
||||
/* Kill unnecessary process, for the case that e.g. when the bpffs mount point is hidden. */
|
||||
pidref_done_sigkill_wait(&bpffs_pidref);
|
||||
|
||||
if (needs_sandboxing && exec_needs_cgroup_namespace(context) && params->cgroup_path) {
|
||||
/* Move ourselves into the subcgroup now *after* we've unshared the cgroup namespace, which
|
||||
|
@ -957,6 +957,7 @@ static int append_private_bpf(
|
||||
*me = (MountEntry) {
|
||||
.path_const = "/sys/fs/bpf",
|
||||
.mode = MOUNT_BPFFS,
|
||||
.ignore = !protect_kernel_tunables, /* indicate whether we should fall back to MOUNT_READ_ONLY on failure. */
|
||||
};
|
||||
return 0;
|
||||
}
|
||||
@ -1735,11 +1736,13 @@ static int mount_overlay(const MountEntry *m) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int mount_bpffs(const MountEntry *m, int socket_fd) {
|
||||
static int mount_bpffs(const MountEntry *m, PidRef *pidref, int socket_fd, int errno_pipe) {
|
||||
int r;
|
||||
|
||||
assert(m);
|
||||
assert(pidref_is_set(pidref));
|
||||
assert(socket_fd >= 0);
|
||||
assert(errno_pipe >= 0);
|
||||
|
||||
_cleanup_close_ int fs_fd = fsopen("bpf", FSOPEN_CLOEXEC);
|
||||
if (fs_fd < 0)
|
||||
@ -1749,8 +1752,21 @@ static int mount_bpffs(const MountEntry *m, int socket_fd) {
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to send bpffs fd to child: %m");
|
||||
|
||||
if (read(socket_fd, (uint8_t[1]) {}, 1) < 0)
|
||||
return log_debug_errno(errno, "Failed to receive data from child: %m");
|
||||
r = pidref_wait_for_terminate_and_check("(sd-bpffs)", pidref, /* flags = */ 0);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* If something strange happened with the child, let's consider this fatal, too */
|
||||
if (r != EXIT_SUCCESS) {
|
||||
ssize_t ss = read(errno_pipe, &r, sizeof(r));
|
||||
if (ss < 0)
|
||||
return log_debug_errno(errno, "Failed to read from the bpffs helper errno pipe: %m");
|
||||
if (ss != sizeof(r))
|
||||
return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Short read from the bpffs helper errno pipe.");
|
||||
return log_debug_errno(r, "bpffs helper exited with error: %m");
|
||||
}
|
||||
|
||||
pidref_done(pidref);
|
||||
|
||||
_cleanup_close_ int mnt_fd = fsmount(fs_fd, /* flags = */ 0, /* mount_attrs = */ 0);
|
||||
if (mnt_fd < 0)
|
||||
@ -1817,6 +1833,23 @@ static int apply_one_mount(
|
||||
|
||||
log_debug("Applying namespace mount on %s", mount_entry_path(m));
|
||||
|
||||
if (m->mode == MOUNT_BPFFS) {
|
||||
r = mount_bpffs(m, p->bpffs_pidref, p->bpffs_socket_fd, p->bpffs_errno_pipe);
|
||||
if (r >= 0 ||
|
||||
(!ERRNO_IS_NEG_NOT_SUPPORTED(r) && /* old kernel? */
|
||||
!ERRNO_IS_NEG_PRIVILEGE(r))) /* ubuntu kernel bug? See issue #38225 */
|
||||
return r;
|
||||
|
||||
if (m->ignore) {
|
||||
log_debug_errno(r, "Failed to mount new bpffs instance, ignoring: %m");
|
||||
return 0;
|
||||
}
|
||||
|
||||
log_debug_errno(r, "Failed to mount new bpffs instance, fallback to making %s read-only, ignoring: %m", mount_entry_path(m));
|
||||
m->mode = MOUNT_READ_ONLY;
|
||||
m->ignore = true;
|
||||
}
|
||||
|
||||
switch (m->mode) {
|
||||
|
||||
case MOUNT_INACCESSIBLE: {
|
||||
@ -2019,9 +2052,6 @@ static int apply_one_mount(
|
||||
case MOUNT_OVERLAY:
|
||||
return mount_overlay(m);
|
||||
|
||||
case MOUNT_BPFFS:
|
||||
return mount_bpffs(m, p->bpffs_socket_fd);
|
||||
|
||||
default:
|
||||
assert_not_reached();
|
||||
}
|
||||
|
@ -200,7 +200,9 @@ typedef struct NamespaceParameters {
|
||||
PrivateTmp private_var_tmp;
|
||||
PrivatePIDs private_pids;
|
||||
|
||||
PidRef *bpffs_pidref;
|
||||
int bpffs_socket_fd;
|
||||
int bpffs_errno_pipe;
|
||||
} NamespaceParameters;
|
||||
|
||||
int setup_namespace(const NamespaceParameters *p, char **reterr_path);
|
||||
|
@ -4,25 +4,22 @@
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "fd-util.h"
|
||||
#include "main-func.h"
|
||||
#include "tests.h"
|
||||
|
||||
static int run(int argc, char *argv[]) {
|
||||
static int intro(void) {
|
||||
#if __LIBBPF_CURRENT_VERSION_GEQ(1, 5)
|
||||
_cleanup_close_ int bpffs_fd = -EBADF, token_fd = -EBADF;
|
||||
|
||||
bpffs_fd = open("/sys/fs/bpf", O_RDONLY);
|
||||
_cleanup_close_ int bpffs_fd = open("/sys/fs/bpf", O_RDONLY);
|
||||
if (bpffs_fd < 0)
|
||||
return -errno;
|
||||
return log_error_errno(errno, "Failed to open '/sys/fs/bpf': %m");
|
||||
|
||||
token_fd = bpf_token_create(bpffs_fd, /* opts = */ NULL);
|
||||
_cleanup_close_ int token_fd = bpf_token_create(bpffs_fd, /* opts = */ NULL);
|
||||
if (token_fd < 0)
|
||||
return -errno;
|
||||
return log_error_errno(errno, "Failed to create bpf token: %m");
|
||||
|
||||
return 0;
|
||||
return EXIT_SUCCESS;
|
||||
#else
|
||||
exit(77);
|
||||
return log_tests_skipped("libbpf is older than v1.5");
|
||||
#endif
|
||||
}
|
||||
|
||||
DEFINE_MAIN_FUNCTION(run);
|
||||
DEFINE_TEST_MAIN_WITH_INTRO(LOG_DEBUG, intro);
|
||||
|
@ -13,12 +13,26 @@ systemd-run --wait \
|
||||
grep -q '/sys/fs/bpf .* ro,' /proc/mounts
|
||||
|
||||
# Check that with PrivateBPF=yes, a new bpffs instance is mounted
|
||||
systemd-run --wait \
|
||||
if ! systemd-run --wait \
|
||||
-p PrivateUsers=yes \
|
||||
-p PrivateMounts=yes \
|
||||
-p DelegateNamespaces=mnt \
|
||||
-p PrivateBPF=yes \
|
||||
grep -q '^none /sys/fs/bpf bpf rw' /proc/mounts
|
||||
grep -q '^none /sys/fs/bpf bpf rw' /proc/mounts; then
|
||||
|
||||
# If it does not work, maybe the kernel is old or the system has buggy ubuntu kernel.
|
||||
# Let's check if PrivateBPF=yes is ignored gracefully in that case.
|
||||
systemd-run --wait \
|
||||
-p PrivateUsers=yes \
|
||||
-p PrivateMounts=yes \
|
||||
-p DelegateNamespaces=mnt \
|
||||
-p ProtectKernelTunables=yes \
|
||||
-p PrivateBPF=yes \
|
||||
grep -q '/sys/fs/bpf .* ro,' /proc/mounts
|
||||
|
||||
# Skip all remaining tests.
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check that when specifying the delegate arguments, the mount options are set properly
|
||||
check_mount_opts() {
|
||||
@ -63,9 +77,9 @@ systemd-run --wait \
|
||||
/usr/lib/systemd/tests/unit-tests/manual/test-bpf-token
|
||||
|
||||
# Check that without the delegates, the helper aborts trying to get a token
|
||||
! systemd-run --wait \
|
||||
(! systemd-run --wait \
|
||||
-p PrivateUsers=yes \
|
||||
-p PrivateMounts=yes \
|
||||
-p DelegateNamespaces=mnt \
|
||||
-p PrivateBPF=yes \
|
||||
/usr/lib/systemd/tests/unit-tests/manual/test-bpf-token
|
||||
/usr/lib/systemd/tests/unit-tests/manual/test-bpf-token)
|
||||
|
Loading…
x
Reference in New Issue
Block a user