Compare commits

..

No commits in common. "2f2b28ab35e80855042c69e324feaf7418636aa2" and "437f48a471f51ac9dd2697ee3b848a71b4f101df" have entirely different histories.

33 changed files with 576 additions and 977 deletions

5
TODO
View File

@ -30,6 +30,9 @@ Before v244:
Features: Features:
* localed: if UTC is selected but timezone file for it doesn't exist, delete
/etc/localtime instead of creating a symlink
* socket units: allow creating a udev monitor socket with ListenDevices= or so, * socket units: allow creating a udev monitor socket with ListenDevices= or so,
with matches, then actviate app thorugh that passing socket oveer with matches, then actviate app thorugh that passing socket oveer
@ -37,8 +40,6 @@ Features:
may be used to mark a whole binary as non-coredumpable. Would fix: may be used to mark a whole binary as non-coredumpable. Would fix:
https://bugs.freedesktop.org/show_bug.cgi?id=69447 https://bugs.freedesktop.org/show_bug.cgi?id=69447
* teach parse_timestamp() timezones like the calendar spec already knows it
* beef up hibernation to optionally do swapon/swapoff immediately before/after * beef up hibernation to optionally do swapon/swapoff immediately before/after
the hibernation the hibernation

View File

@ -210,9 +210,8 @@
send for setting up an encrypted connection, and thus results send for setting up an encrypted connection, and thus results
in a small DNS look-up time penalty.</para> in a small DNS look-up time penalty.</para>
<para>Note that in <literal>opportunistic</literal> mode the <para>Note as the resolver is not capable of authenticating
resolver is not capable of authenticating the server, so it is the server, it is vulnerable for "man-in-the-middle" attacks.</para>
vulnerable to "man-in-the-middle" attacks.</para>
<para>In addition to this global DNSOverTLS setting <para>In addition to this global DNSOverTLS setting
<citerefentry><refentrytitle>systemd-networkd.service</refentrytitle><manvolnum>8</manvolnum></citerefentry> <citerefentry><refentrytitle>systemd-networkd.service</refentrytitle><manvolnum>8</manvolnum></citerefentry>

View File

@ -778,13 +778,6 @@ Service b@0.service not loaded, b.socket cannot be started.
iterations the specified calendar expression will elapse next. Defaults to 1.</para></listitem> iterations the specified calendar expression will elapse next. Defaults to 1.</para></listitem>
</varlistentry> </varlistentry>
<varlistentry>
<term><option>--base-time=<replaceable>TIMESTAMP</replaceable></option></term>
<listitem><para>When used with the <command>calendar</command> command, show next iterations relative
to the specified point in time. If not specified defaults to the current time.</para></listitem>
</varlistentry>
<xi:include href="user-system-options.xml" xpointer="host" /> <xi:include href="user-system-options.xml" xpointer="host" />
<xi:include href="user-system-options.xml" xpointer="machine" /> <xi:include href="user-system-options.xml" xpointer="machine" />

View File

@ -399,13 +399,8 @@
<refsect1> <refsect1>
<title>Exit status</title> <title>Exit status</title>
<para>On success, 0 is returned. If <command>systemd-run</command> failed to start the service, a <para>On success, 0 is returned, a non-zero failure
non-zero return value will be returned. If <command>systemd-run</command> waits for the service to code otherwise.</para>
terminate, the return value will be propagated from the service. 0 will be returned on success, including
all the cases where systemd considers a service to have exited cleanly, see the discussion of
<varname>SuccessExitStatus=</varname> in
<citerefentry><refentrytitle>systemd.service</refentrytitle><manvolnum>5</manvolnum></citerefentry>.
</para>
</refsect1> </refsect1>
<refsect1> <refsect1>
@ -508,16 +503,6 @@ There is a screen on:
<programlisting>$ loginctl enable-linger</programlisting> <programlisting>$ loginctl enable-linger</programlisting>
</example> </example>
<example>
<title>Return value</title>
<programlisting>$ systemd-run --user --wait true
$ systemd-run --user --wait -p SuccessExitStatus=11 bash -c 'exit 11'
$ systemd-run --user --wait -p SuccessExitStatus=SIGUSR1 bash -c 'kill -SIGUSR1 $$$$'</programlisting>
<para>Those three invocations will succeed, i.e. terminate with an exit code of 0.</para>
</example>
</refsect1> </refsect1>
<refsect1> <refsect1>

View File

@ -271,12 +271,9 @@ int verify_units(char **filenames, UnitFileScope scope, bool check_man, bool run
} }
k = manager_load_startable_unit_or_warn(m, NULL, prepared, &units[count]); k = manager_load_startable_unit_or_warn(m, NULL, prepared, &units[count]);
if (k < 0) { if (k < 0 && r == 0)
if (r == 0)
r = k; r = k;
continue; else
}
count++; count++;
} }

View File

@ -84,7 +84,6 @@ static bool arg_man = true;
static bool arg_generators = false; static bool arg_generators = false;
static const char *arg_root = NULL; static const char *arg_root = NULL;
static unsigned arg_iterations = 1; static unsigned arg_iterations = 1;
static usec_t arg_base_time = USEC_INFINITY;
STATIC_DESTRUCTOR_REGISTER(arg_dot_from_patterns, strv_freep); STATIC_DESTRUCTOR_REGISTER(arg_dot_from_patterns, strv_freep);
STATIC_DESTRUCTOR_REGISTER(arg_dot_to_patterns, strv_freep); STATIC_DESTRUCTOR_REGISTER(arg_dot_to_patterns, strv_freep);
@ -2137,9 +2136,6 @@ static int test_calendar(int argc, char *argv[], void *userdata) {
char **p; char **p;
usec_t n; usec_t n;
if (arg_base_time != USEC_INFINITY)
n = arg_base_time;
else
n = now(CLOCK_REALTIME); /* We want to use the same "base" for all expressions */ n = now(CLOCK_REALTIME); /* We want to use the same "base" for all expressions */
STRV_FOREACH(p, strv_skip(argv, 1)) { STRV_FOREACH(p, strv_skip(argv, 1)) {
@ -2262,7 +2258,6 @@ static int help(int argc, char *argv[], void *userdata) {
" --man[=BOOL] Do [not] check for existence of man pages\n" " --man[=BOOL] Do [not] check for existence of man pages\n"
" --generators[=BOOL] Do [not] run unit generators (requires privileges)\n" " --generators[=BOOL] Do [not] run unit generators (requires privileges)\n"
" --iterations=N Show the specified number of iterations\n" " --iterations=N Show the specified number of iterations\n"
" --base-time=TIMESTAMP Calculate calendar times relative to specified time\n"
"\nCommands:\n" "\nCommands:\n"
" time Print time spent in the kernel\n" " time Print time spent in the kernel\n"
" blame Print list of running units ordered by time to init\n" " blame Print list of running units ordered by time to init\n"
@ -2312,7 +2307,6 @@ static int parse_argv(int argc, char *argv[]) {
ARG_MAN, ARG_MAN,
ARG_GENERATORS, ARG_GENERATORS,
ARG_ITERATIONS, ARG_ITERATIONS,
ARG_BASE_TIME,
}; };
static const struct option options[] = { static const struct option options[] = {
@ -2333,7 +2327,6 @@ static int parse_argv(int argc, char *argv[]) {
{ "host", required_argument, NULL, 'H' }, { "host", required_argument, NULL, 'H' },
{ "machine", required_argument, NULL, 'M' }, { "machine", required_argument, NULL, 'M' },
{ "iterations", required_argument, NULL, ARG_ITERATIONS }, { "iterations", required_argument, NULL, ARG_ITERATIONS },
{ "base-time", required_argument, NULL, ARG_BASE_TIME },
{} {}
}; };
@ -2440,13 +2433,6 @@ static int parse_argv(int argc, char *argv[]) {
break; break;
case ARG_BASE_TIME:
r = parse_timestamp(optarg, &arg_base_time);
if (r < 0)
return log_error_errno(r, "Failed to parse --base-time= parameter: %s", optarg);
break;
case '?': case '?':
return -EINVAL; return -EINVAL;
@ -2486,6 +2472,7 @@ static int run(int argc, char *argv[]) {
{ "get-log-level", VERB_ANY, 1, 0, get_log_level }, { "get-log-level", VERB_ANY, 1, 0, get_log_level },
{ "set-log-target", 2, 2, 0, set_log_target }, { "set-log-target", 2, 2, 0, set_log_target },
{ "get-log-target", VERB_ANY, 1, 0, get_log_target }, { "get-log-target", VERB_ANY, 1, 0, get_log_target },
{ "dump", VERB_ANY, 1, 0, dump }, { "dump", VERB_ANY, 1, 0, dump },
{ "cat-config", 2, VERB_ANY, 0, cat_config }, { "cat-config", 2, VERB_ANY, 0, cat_config },
{ "unit-files", VERB_ANY, VERB_ANY, 0, do_unit_files }, { "unit-files", VERB_ANY, VERB_ANY, 0, do_unit_files },

View File

@ -1391,22 +1391,13 @@ bool clock_supported(clockid_t clock) {
} }
} }
int get_timezone(char **ret) { int get_timezone(char **tz) {
_cleanup_free_ char *t = NULL; _cleanup_free_ char *t = NULL;
const char *e; const char *e;
char *z; char *z;
int r; int r;
r = readlink_malloc("/etc/localtime", &t); r = readlink_malloc("/etc/localtime", &t);
if (r == -ENOENT) {
/* If the symlink does not exist, assume "UTC", like glibc does*/
z = strdup("UTC");
if (!z)
return -ENOMEM;
*ret = z;
return 0;
}
if (r < 0) if (r < 0)
return r; /* returns EINVAL if not a symlink */ return r; /* returns EINVAL if not a symlink */
@ -1421,7 +1412,7 @@ int get_timezone(char **ret) {
if (!z) if (!z)
return -ENOMEM; return -ENOMEM;
*ret = z; *tz = z;
return 0; return 0;
} }

View File

@ -1,17 +1,8 @@
/* SPDX-License-Identifier: LGPL-2.1+ */ /* SPDX-License-Identifier: LGPL-2.1+ */
#include <fnmatch.h>
#include <linux/bpf_insn.h> #include <linux/bpf_insn.h>
#include "bpf-devices.h" #include "bpf-devices.h"
#include "bpf-program.h" #include "bpf-program.h"
#include "fd-util.h"
#include "fileio.h"
#include "nulstr-util.h"
#include "parse-util.h"
#include "stat-util.h"
#include "stdio-util.h"
#include "string-util.h"
#define PASS_JUMP_OFF 4096 #define PASS_JUMP_OFF 4096
@ -38,35 +29,27 @@ static int bpf_access_type(const char *acc) {
return r; return r;
} }
static int bpf_prog_whitelist_device(BPFProgram *prog, char type, int major, int minor, const char *acc) { int cgroup_bpf_whitelist_device(BPFProgram *prog, int type, int major, int minor, const char *acc) {
int r, access; struct bpf_insn insn[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_2, type, 6), /* compare device type */
assert(prog); BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), /* calculate access type */
assert(acc); BPF_ALU32_IMM(BPF_AND, BPF_REG_1, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 3), /* compare access type */
log_trace("%s: %c %d:%d %s", __func__, type, major, minor, acc);
access = bpf_access_type(acc);
if (access <= 0)
return -EINVAL;
assert(IN_SET(type, 'b', 'c'));
const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
const struct bpf_insn insn[] = {
BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 4), /* compare access type */
BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 3), /* compare device type */
BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 2), /* compare major */ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 2), /* compare major */
BPF_JMP_IMM(BPF_JNE, BPF_REG_5, minor, 1), /* compare minor */ BPF_JMP_IMM(BPF_JNE, BPF_REG_5, minor, 1), /* compare minor */
BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */ BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
}; };
int r, access;
assert(prog);
assert(acc);
access = bpf_access_type(acc);
if (access <= 0)
return -EINVAL;
insn[2].imm = access;
if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
else
r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn)); r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
if (r < 0) if (r < 0)
log_error_errno(r, "Extending device control BPF program failed: %m"); log_error_errno(r, "Extending device control BPF program failed: %m");
@ -74,68 +57,26 @@ static int bpf_prog_whitelist_device(BPFProgram *prog, char type, int major, int
return r; return r;
} }
static int bpf_prog_whitelist_major(BPFProgram *prog, char type, int major, const char *acc) { int cgroup_bpf_whitelist_major(BPFProgram *prog, int type, int major, const char *acc) {
int r, access; struct bpf_insn insn[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_2, type, 5), /* compare device type */
assert(prog); BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), /* calculate access type */
assert(acc); BPF_ALU32_IMM(BPF_AND, BPF_REG_1, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 2), /* compare access type */
log_trace("%s: %c %d:* %s", __func__, type, major, acc);
access = bpf_access_type(acc);
if (access <= 0)
return -EINVAL;
assert(IN_SET(type, 'b', 'c'));
const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
const struct bpf_insn insn[] = {
BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 3), /* compare access type */
BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 2), /* compare device type */
BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 1), /* compare major */ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 1), /* compare major */
BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */ BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
}; };
if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
else
r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
if (r < 0)
log_error_errno(r, "Extending device control BPF program failed: %m");
return r;
}
static int bpf_prog_whitelist_class(BPFProgram *prog, char type, const char *acc) {
int r, access; int r, access;
assert(prog); assert(prog);
assert(acc); assert(acc);
log_trace("%s: %c *:* %s", __func__, type, acc);
access = bpf_access_type(acc); access = bpf_access_type(acc);
if (access <= 0) if (access <= 0)
return -EINVAL; return -EINVAL;
assert(IN_SET(type, 'b', 'c')); insn[2].imm = access;
const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
const struct bpf_insn insn[] = {
BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 2), /* compare access type */
BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 1), /* compare device type */
BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
};
if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
else
r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn)); r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
if (r < 0) if (r < 0)
log_error_errno(r, "Extending device control BPF program failed: %m"); log_error_errno(r, "Extending device control BPF program failed: %m");
@ -143,12 +84,37 @@ static int bpf_prog_whitelist_class(BPFProgram *prog, char type, const char *acc
return r; return r;
} }
int bpf_devices_cgroup_init(BPFProgram **ret, CGroupDevicePolicy policy, bool whitelist) { int cgroup_bpf_whitelist_class(BPFProgram *prog, int type, const char *acc) {
const struct bpf_insn pre_insn[] = { struct bpf_insn insn[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_2, type, 5), /* compare device type */
BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), /* calculate access type */
BPF_ALU32_IMM(BPF_AND, BPF_REG_1, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 1), /* compare access type */
BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
};
int r, access;
assert(prog);
assert(acc);
access = bpf_access_type(acc);
if (access <= 0)
return -EINVAL;
insn[2].imm = access;
r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
if (r < 0)
log_error_errno(r, "Extending device control BPF program failed: %m");
return r;
}
int cgroup_init_device_bpf(BPFProgram **ret, CGroupDevicePolicy policy, bool whitelist) {
struct bpf_insn pre_insn[] = {
/* load device type to r2 */ /* load device type to r2 */
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1,
offsetof(struct bpf_cgroup_dev_ctx, access_type)), offsetof(struct bpf_cgroup_dev_ctx, access_type)),
BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0xFFFF),
/* load access type to r3 */ /* load access type to r3 */
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
@ -169,14 +135,14 @@ int bpf_devices_cgroup_init(BPFProgram **ret, CGroupDevicePolicy policy, bool wh
assert(ret); assert(ret);
if (policy == CGROUP_DEVICE_POLICY_AUTO && !whitelist) if (policy == CGROUP_AUTO && !whitelist)
return 0; return 0;
r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, &prog); r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, &prog);
if (r < 0) if (r < 0)
return log_error_errno(r, "Loading device control BPF program failed: %m"); return log_error_errno(r, "Loading device control BPF program failed: %m");
if (policy == CGROUP_DEVICE_POLICY_CLOSED || whitelist) { if (policy == CGROUP_CLOSED || whitelist) {
r = bpf_program_add_instructions(prog, pre_insn, ELEMENTSOF(pre_insn)); r = bpf_program_add_instructions(prog, pre_insn, ELEMENTSOF(pre_insn));
if (r < 0) if (r < 0)
return log_error_errno(r, "Extending device control BPF program failed: %m"); return log_error_errno(r, "Extending device control BPF program failed: %m");
@ -187,73 +153,70 @@ int bpf_devices_cgroup_init(BPFProgram **ret, CGroupDevicePolicy policy, bool wh
return 0; return 0;
} }
int bpf_devices_apply_policy( int cgroup_apply_device_bpf(Unit *u, BPFProgram *prog, CGroupDevicePolicy policy, bool whitelist) {
BPFProgram *prog, struct bpf_insn post_insn[] = {
CGroupDevicePolicy policy,
bool whitelist,
const char *cgroup_path,
BPFProgram **prog_installed) {
_cleanup_free_ char *controller_path = NULL;
int r;
/* This will assign *keep_program if everything goes well. */
if (!prog)
goto finish;
const bool deny_everything = policy == CGROUP_DEVICE_POLICY_STRICT && !whitelist;
const struct bpf_insn post_insn[] = {
/* return DENY */ /* return DENY */
BPF_MOV64_IMM(BPF_REG_0, 0), BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_JMP_A(1), BPF_JMP_A(1),
}; };
const struct bpf_insn exit_insn[] = { struct bpf_insn exit_insn[] = {
/* finally return DENY if deny_everything else ALLOW */ /* else return ALLOW */
BPF_MOV64_IMM(BPF_REG_0, deny_everything ? 0 : 1), BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN() BPF_EXIT_INSN()
}; };
if (!deny_everything) { _cleanup_free_ char *path = NULL;
int r;
if (!prog) {
/* Remove existing program. */
u->bpf_device_control_installed = bpf_program_unref(u->bpf_device_control_installed);
return 0;
}
if (policy != CGROUP_STRICT || whitelist) {
size_t off;
r = bpf_program_add_instructions(prog, post_insn, ELEMENTSOF(post_insn)); r = bpf_program_add_instructions(prog, post_insn, ELEMENTSOF(post_insn));
if (r < 0) if (r < 0)
return log_error_errno(r, "Extending device control BPF program failed: %m"); return log_error_errno(r, "Extending device control BPF program failed: %m");
/* Fixup PASS_JUMP_OFF jump offsets. */ /* Fixup PASS_JUMP_OFF jump offsets. */
for (size_t off = 0; off < prog->n_instructions; off++) { for (off = 0; off < prog->n_instructions; off++) {
struct bpf_insn *ins = &prog->instructions[off]; struct bpf_insn *ins = &prog->instructions[off];
if (ins->code == (BPF_JMP | BPF_JA) && ins->off == PASS_JUMP_OFF) if (ins->code == (BPF_JMP | BPF_JA) && ins->off == PASS_JUMP_OFF)
ins->off = prog->n_instructions - off - 1; ins->off = prog->n_instructions - off - 1;
} }
} } else
/* Explicitly forbid everything. */
exit_insn[0].imm = 0;
r = bpf_program_add_instructions(prog, exit_insn, ELEMENTSOF(exit_insn)); r = bpf_program_add_instructions(prog, exit_insn, ELEMENTSOF(exit_insn));
if (r < 0) if (r < 0)
return log_error_errno(r, "Extending device control BPF program failed: %m"); return log_error_errno(r, "Extending device control BPF program failed: %m");
r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, NULL, &controller_path); r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
if (r < 0) if (r < 0)
return log_error_errno(r, "Failed to determine cgroup path: %m"); return log_error_errno(r, "Failed to determine cgroup path: %m");
r = bpf_program_cgroup_attach(prog, BPF_CGROUP_DEVICE, controller_path, BPF_F_ALLOW_MULTI); r = bpf_program_cgroup_attach(prog, BPF_CGROUP_DEVICE, path, BPF_F_ALLOW_MULTI);
if (r < 0) if (r < 0)
return log_error_errno(r, "Attaching device control BPF program to cgroup %s failed: %m", return log_error_errno(r, "Attaching device control BPF program to cgroup %s failed: %m", path);
cgroup_path);
finish:
/* Unref the old BPF program (which will implicitly detach it) right before attaching the new program. */ /* Unref the old BPF program (which will implicitly detach it) right before attaching the new program. */
if (prog_installed) { u->bpf_device_control_installed = bpf_program_unref(u->bpf_device_control_installed);
bpf_program_unref(*prog_installed);
*prog_installed = bpf_program_ref(prog); /* Remember that this BPF program is installed now. */
} u->bpf_device_control_installed = bpf_program_ref(prog);
return 0; return 0;
} }
int bpf_devices_supported(void) { int bpf_devices_supported(void) {
const struct bpf_insn trivial[] = { struct bpf_insn trivial[] = {
BPF_MOV64_IMM(BPF_REG_0, 1), BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN() BPF_EXIT_INSN()
}; };
@ -305,185 +268,3 @@ int bpf_devices_supported(void) {
return supported = 1; return supported = 1;
} }
static int whitelist_device_pattern(BPFProgram *prog, const char *path, char type, const unsigned *maj, const unsigned *min, const char *acc) {
assert(IN_SET(type, 'b', 'c'));
if (cg_all_unified() > 0) {
if (!prog)
return 0;
if (maj && min)
return bpf_prog_whitelist_device(prog, type, *maj, *min, acc);
else if (maj)
return bpf_prog_whitelist_major(prog, type, *maj, acc);
else
return bpf_prog_whitelist_class(prog, type, acc);
} else {
char buf[2+DECIMAL_STR_MAX(unsigned)*2+2+4];
int r;
if (maj && min)
xsprintf(buf, "%c %u:%u %s", type, *maj, *min, acc);
else if (maj)
xsprintf(buf, "%c %u:* %s", type, *maj, acc);
else
xsprintf(buf, "%c *:* %s", type, acc);
/* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore
* EINVAL here. */
r = cg_set_attribute("devices", path, "devices.allow", buf);
if (r < 0)
log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING,
r, "Failed to set devices.allow on %s: %m", path);
return r;
}
}
int bpf_devices_whitelist_device(BPFProgram *prog, const char *path, const char *node, const char *acc) {
mode_t mode;
dev_t rdev;
int r;
assert(path);
assert(acc);
assert(strlen(acc) <= 3);
log_trace("%s: %s %s", __func__, node, acc);
/* Some special handling for /dev/block/%u:%u, /dev/char/%u:%u, /run/systemd/inaccessible/chr and
* /run/systemd/inaccessible/blk paths. Instead of stat()ing these we parse out the major/minor directly. This
* means clients can use these path without the device node actually around */
r = device_path_parse_major_minor(node, &mode, &rdev);
if (r < 0) {
if (r != -ENODEV)
return log_warning_errno(r, "Couldn't parse major/minor from device path '%s': %m", node);
struct stat st;
if (stat(node, &st) < 0)
return log_warning_errno(errno, "Couldn't stat device %s: %m", node);
if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode))
return log_warning_errno(SYNTHETIC_ERRNO(ENODEV), "%s is not a device.", node);
mode = st.st_mode;
rdev = (dev_t) st.st_rdev;
}
unsigned maj = major(rdev), min = minor(rdev);
return whitelist_device_pattern(prog, path, S_ISCHR(mode) ? 'c' : 'b', &maj, &min, acc);
}
int bpf_devices_whitelist_major(BPFProgram *prog, const char *path, const char *name, char type, const char *acc) {
unsigned maj;
int r;
assert(path);
assert(acc);
assert(IN_SET(type, 'b', 'c'));
if (streq(name, "*"))
/* If the name is a wildcard, then apply this list to all devices of this type */
return whitelist_device_pattern(prog, path, type, NULL, NULL, acc);
if (safe_atou(name, &maj) >= 0 && DEVICE_MAJOR_VALID(maj))
/* The name is numeric and suitable as major. In that case, let's take its major, and create
* the entry directly. */
return whitelist_device_pattern(prog, path, type, &maj, NULL, acc);
_cleanup_fclose_ FILE *f = NULL;
bool good = false, any = false;
f = fopen("/proc/devices", "re");
if (!f)
return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s: %m", name);
for (;;) {
_cleanup_free_ char *line = NULL;
char *w, *p;
r = read_line(f, LONG_LINE_MAX, &line);
if (r < 0)
return log_warning_errno(r, "Failed to read /proc/devices: %m");
if (r == 0)
break;
if (type == 'c' && streq(line, "Character devices:")) {
good = true;
continue;
}
if (type == 'b' && streq(line, "Block devices:")) {
good = true;
continue;
}
if (isempty(line)) {
good = false;
continue;
}
if (!good)
continue;
p = strstrip(line);
w = strpbrk(p, WHITESPACE);
if (!w)
continue;
*w = 0;
r = safe_atou(p, &maj);
if (r < 0)
continue;
if (maj <= 0)
continue;
w++;
w += strspn(w, WHITESPACE);
if (fnmatch(name, w, 0) != 0)
continue;
any = true;
(void) whitelist_device_pattern(prog, path, type, &maj, NULL, acc);
}
if (!any)
return log_debug_errno(SYNTHETIC_ERRNO(ENOENT),
"Device whitelist pattern \"%s\" did not match anything.", name);
return 0;
}
int bpf_devices_whitelist_static(BPFProgram *prog, const char *path) {
static const char auto_devices[] =
"/dev/null\0" "rwm\0"
"/dev/zero\0" "rwm\0"
"/dev/full\0" "rwm\0"
"/dev/random\0" "rwm\0"
"/dev/urandom\0" "rwm\0"
"/dev/tty\0" "rwm\0"
"/dev/ptmx\0" "rwm\0"
/* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */
"/run/systemd/inaccessible/chr\0" "rwm\0"
"/run/systemd/inaccessible/blk\0" "rwm\0";
int r = 0, k;
const char *node, *acc;
NULSTR_FOREACH_PAIR(node, acc, auto_devices) {
k = bpf_devices_whitelist_device(prog, path, node, acc);
if (r >= 0 && k < 0)
r = k;
}
/* PTS (/dev/pts) devices may not be duplicated, but accessed */
k = bpf_devices_whitelist_major(prog, path, "pts", 'c', "rw");
if (r >= 0 && k < 0)
r = k;
return r;
}

View File

@ -3,19 +3,15 @@
#include <inttypes.h> #include <inttypes.h>
#include "cgroup.h" #include "unit.h"
typedef struct BPFProgram BPFProgram; struct BPFProgram;
int bpf_devices_cgroup_init(BPFProgram **ret, CGroupDevicePolicy policy, bool whitelist);
int bpf_devices_apply_policy(
BPFProgram *prog,
CGroupDevicePolicy policy,
bool whitelist,
const char *cgroup_path,
BPFProgram **prog_installed);
int bpf_devices_supported(void); int bpf_devices_supported(void);
int bpf_devices_whitelist_device(BPFProgram *prog, const char *path, const char *node, const char *acc);
int bpf_devices_whitelist_major(BPFProgram *prog, const char *path, const char *name, char type, const char *acc); int cgroup_bpf_whitelist_device(BPFProgram *p, int type, int major, int minor, const char *acc);
int bpf_devices_whitelist_static(BPFProgram *prog, const char *path); int cgroup_bpf_whitelist_major(BPFProgram *p, int type, int major, const char *acc);
int cgroup_bpf_whitelist_class(BPFProgram *prog, int type, const char *acc);
int cgroup_init_device_bpf(BPFProgram **ret, CGroupDevicePolicy policy, bool whitelist);
int cgroup_apply_device_bpf(Unit *u, BPFProgram *p, CGroupDevicePolicy policy, bool whitelist);

View File

@ -132,7 +132,7 @@ static int add_instructions_for_ip_any(
assert(p); assert(p);
const struct bpf_insn insn[] = { struct bpf_insn insn[] = {
BPF_ALU32_IMM(BPF_OR, BPF_REG_8, verdict), BPF_ALU32_IMM(BPF_OR, BPF_REG_8, verdict),
}; };
@ -150,7 +150,7 @@ static int bpf_firewall_compile_bpf(
bool ip_allow_any, bool ip_allow_any,
bool ip_deny_any) { bool ip_deny_any) {
const struct bpf_insn pre_insn[] = { struct bpf_insn pre_insn[] = {
/* /*
* When the eBPF program is entered, R1 contains the address of the skb. * When the eBPF program is entered, R1 contains the address of the skb.
* However, R1-R5 are scratch registers that are not preserved when calling * However, R1-R5 are scratch registers that are not preserved when calling
@ -186,7 +186,7 @@ static int bpf_firewall_compile_bpf(
* This means that if both ACCESS_DENIED and ACCESS_ALLOWED are set, the packet * This means that if both ACCESS_DENIED and ACCESS_ALLOWED are set, the packet
* is allowed to pass. * is allowed to pass.
*/ */
const struct bpf_insn post_insn[] = { struct bpf_insn post_insn[] = {
BPF_MOV64_IMM(BPF_REG_0, 1), BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_IMM(BPF_JNE, BPF_REG_8, ACCESS_DENIED, 1), BPF_JMP_IMM(BPF_JNE, BPF_REG_8, ACCESS_DENIED, 1),
BPF_MOV64_IMM(BPF_REG_0, 0), BPF_MOV64_IMM(BPF_REG_0, 0),
@ -321,7 +321,7 @@ static int bpf_firewall_compile_bpf(
* Exit from the eBPF program, R0 contains the verdict. * Exit from the eBPF program, R0 contains the verdict.
* 0 means the packet is denied, 1 means the packet may pass. * 0 means the packet is denied, 1 means the packet may pass.
*/ */
const struct bpf_insn insn[] = { struct bpf_insn insn[] = {
BPF_EXIT_INSN() BPF_EXIT_INSN()
}; };
@ -795,7 +795,7 @@ int bpf_firewall_reset_accounting(int map_fd) {
static int bpf_firewall_unsupported_reason = 0; static int bpf_firewall_unsupported_reason = 0;
int bpf_firewall_supported(void) { int bpf_firewall_supported(void) {
const struct bpf_insn trivial[] = { struct bpf_insn trivial[] = {
BPF_MOV64_IMM(BPF_REG_0, 1), BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN() BPF_EXIT_INSN()
}; };

View File

@ -1,6 +1,7 @@
/* SPDX-License-Identifier: LGPL-2.1+ */ /* SPDX-License-Identifier: LGPL-2.1+ */
#include <fcntl.h> #include <fcntl.h>
#include <fnmatch.h>
#include "sd-messages.h" #include "sd-messages.h"
@ -16,6 +17,7 @@
#include "fd-util.h" #include "fd-util.h"
#include "fileio.h" #include "fileio.h"
#include "fs-util.h" #include "fs-util.h"
#include "nulstr-util.h"
#include "parse-util.h" #include "parse-util.h"
#include "path-util.h" #include "path-util.h"
#include "process-util.h" #include "process-util.h"
@ -665,6 +667,192 @@ static int lookup_block_device(const char *p, dev_t *ret) {
return 0; return 0;
} }
static int whitelist_device(BPFProgram *prog, const char *path, const char *node, const char *acc) {
dev_t rdev;
mode_t mode;
int r;
assert(path);
assert(acc);
/* Some special handling for /dev/block/%u:%u, /dev/char/%u:%u, /run/systemd/inaccessible/chr and
* /run/systemd/inaccessible/blk paths. Instead of stat()ing these we parse out the major/minor directly. This
* means clients can use these path without the device node actually around */
r = device_path_parse_major_minor(node, &mode, &rdev);
if (r < 0) {
if (r != -ENODEV)
return log_warning_errno(r, "Couldn't parse major/minor from device path '%s': %m", node);
struct stat st;
if (stat(node, &st) < 0)
return log_warning_errno(errno, "Couldn't stat device %s: %m", node);
if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
log_warning("%s is not a device.", node);
return -ENODEV;
}
rdev = (dev_t) st.st_rdev;
mode = st.st_mode;
}
if (cg_all_unified() > 0) {
if (!prog)
return 0;
return cgroup_bpf_whitelist_device(prog, S_ISCHR(mode) ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK,
major(rdev), minor(rdev), acc);
} else {
char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
sprintf(buf,
"%c %u:%u %s",
S_ISCHR(mode) ? 'c' : 'b',
major(rdev), minor(rdev),
acc);
/* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore EINVAL here. */
r = cg_set_attribute("devices", path, "devices.allow", buf);
if (r < 0)
return log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING,
r, "Failed to set devices.allow on %s: %m", path);
return 0;
}
}
static int whitelist_major(BPFProgram *prog, const char *path, const char *name, char type, const char *acc) {
_cleanup_fclose_ FILE *f = NULL;
char buf[2+DECIMAL_STR_MAX(unsigned)+3+4];
bool good = false;
unsigned maj;
int r;
assert(path);
assert(acc);
assert(IN_SET(type, 'b', 'c'));
if (streq(name, "*")) {
/* If the name is a wildcard, then apply this list to all devices of this type */
if (cg_all_unified() > 0) {
if (!prog)
return 0;
(void) cgroup_bpf_whitelist_class(prog, type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK, acc);
} else {
xsprintf(buf, "%c *:* %s", type, acc);
r = cg_set_attribute("devices", path, "devices.allow", buf);
if (r < 0)
log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to set devices.allow on %s: %m", path);
return 0;
}
}
if (safe_atou(name, &maj) >= 0 && DEVICE_MAJOR_VALID(maj)) {
/* The name is numeric and suitable as major. In that case, let's take is major, and create the entry
* directly */
if (cg_all_unified() > 0) {
if (!prog)
return 0;
(void) cgroup_bpf_whitelist_major(prog,
type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK,
maj, acc);
} else {
xsprintf(buf, "%c %u:* %s", type, maj, acc);
r = cg_set_attribute("devices", path, "devices.allow", buf);
if (r < 0)
log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to set devices.allow on %s: %m", path);
}
return 0;
}
f = fopen("/proc/devices", "re");
if (!f)
return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s (%c): %m", name, type);
for (;;) {
_cleanup_free_ char *line = NULL;
char *w, *p;
r = read_line(f, LONG_LINE_MAX, &line);
if (r < 0)
return log_warning_errno(r, "Failed to read /proc/devices: %m");
if (r == 0)
break;
if (type == 'c' && streq(line, "Character devices:")) {
good = true;
continue;
}
if (type == 'b' && streq(line, "Block devices:")) {
good = true;
continue;
}
if (isempty(line)) {
good = false;
continue;
}
if (!good)
continue;
p = strstrip(line);
w = strpbrk(p, WHITESPACE);
if (!w)
continue;
*w = 0;
r = safe_atou(p, &maj);
if (r < 0)
continue;
if (maj <= 0)
continue;
w++;
w += strspn(w, WHITESPACE);
if (fnmatch(name, w, 0) != 0)
continue;
if (cg_all_unified() > 0) {
if (!prog)
continue;
(void) cgroup_bpf_whitelist_major(prog,
type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK,
maj, acc);
} else {
sprintf(buf,
"%c %u:* %s",
type,
maj,
acc);
/* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore EINVAL
* here. */
r = cg_set_attribute("devices", path, "devices.allow", buf);
if (r < 0)
log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING,
r, "Failed to set devices.allow on %s: %m", path);
}
}
return 0;
}
static bool cgroup_context_has_cpu_weight(CGroupContext *c) { static bool cgroup_context_has_cpu_weight(CGroupContext *c) {
return c->cpu_weight != CGROUP_WEIGHT_INVALID || return c->cpu_weight != CGROUP_WEIGHT_INVALID ||
c->startup_cpu_weight != CGROUP_WEIGHT_INVALID; c->startup_cpu_weight != CGROUP_WEIGHT_INVALID;
@ -955,95 +1143,6 @@ static void cgroup_apply_firewall(Unit *u) {
(void) bpf_firewall_install(u); (void) bpf_firewall_install(u);
} }
static int cgroup_apply_devices(Unit *u) {
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
const char *path;
CGroupContext *c;
CGroupDeviceAllow *a;
CGroupDevicePolicy policy;
int r;
assert_se(c = unit_get_cgroup_context(u));
assert_se(path = u->cgroup_path);
policy = c->device_policy;
if (cg_all_unified() > 0) {
r = bpf_devices_cgroup_init(&prog, policy, c->device_allow);
if (r < 0)
return log_unit_warning_errno(u, r, "Failed to initialize device control bpf program: %m");
} else {
/* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore
* EINVAL here. */
if (c->device_allow || policy != CGROUP_DEVICE_POLICY_AUTO)
r = cg_set_attribute("devices", path, "devices.deny", "a");
else
r = cg_set_attribute("devices", path, "devices.allow", "a");
if (r < 0)
log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to reset devices.allow/devices.deny: %m");
}
bool whitelist_static = policy == CGROUP_DEVICE_POLICY_CLOSED ||
(policy == CGROUP_DEVICE_POLICY_AUTO && c->device_allow);
if (whitelist_static)
(void) bpf_devices_whitelist_static(prog, path);
bool any = whitelist_static;
LIST_FOREACH(device_allow, a, c->device_allow) {
char acc[4], *val;
unsigned k = 0;
if (a->r)
acc[k++] = 'r';
if (a->w)
acc[k++] = 'w';
if (a->m)
acc[k++] = 'm';
if (k == 0)
continue;
acc[k++] = 0;
if (path_startswith(a->path, "/dev/"))
r = bpf_devices_whitelist_device(prog, path, a->path, acc);
else if ((val = startswith(a->path, "block-")))
r = bpf_devices_whitelist_major(prog, path, val, 'b', acc);
else if ((val = startswith(a->path, "char-")))
r = bpf_devices_whitelist_major(prog, path, val, 'c', acc);
else {
log_unit_debug(u, "Ignoring device '%s' while writing cgroup attribute.", a->path);
continue;
}
if (r >= 0)
any = true;
}
if (prog && !any) {
log_unit_warning_errno(u, SYNTHETIC_ERRNO(ENODEV), "No devices matched by device filter.");
/* The kernel verifier would reject a program we would build with the normal intro and outro
but no whitelisting rules (outro would contain an unreachable instruction for successful
return). */
policy = CGROUP_DEVICE_POLICY_STRICT;
}
r = bpf_devices_apply_policy(prog, policy, any, path, &u->bpf_device_control_installed);
if (r < 0) {
static bool warned = false;
log_full_errno(warned ? LOG_DEBUG : LOG_WARNING, r,
"Unit %s configures device ACL, but the local system doesn't seem to support the BPF-based device controller.\n"
"Proceeding WITHOUT applying ACL (all devices will be accessible)!\n"
"(This warning is only shown for the first loaded unit using device ACL.)", u->id);
warned = true;
}
return r;
}
static void cgroup_context_apply( static void cgroup_context_apply(
Unit *u, Unit *u,
CGroupMask apply_mask, CGroupMask apply_mask,
@ -1320,8 +1419,88 @@ static void cgroup_context_apply(
/* On cgroup v2 we can apply BPF everywhere. On cgroup v1 we apply it everywhere except for the root of /* On cgroup v2 we can apply BPF everywhere. On cgroup v1 we apply it everywhere except for the root of
* containers, where we leave this to the manager */ * containers, where we leave this to the manager */
if ((apply_mask & (CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES)) && if ((apply_mask & (CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES)) &&
(is_host_root || cg_all_unified() > 0 || !is_local_root)) (is_host_root || cg_all_unified() > 0 || !is_local_root)) {
(void) cgroup_apply_devices(u); _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
CGroupDeviceAllow *a;
if (cg_all_unified() > 0) {
r = cgroup_init_device_bpf(&prog, c->device_policy, c->device_allow);
if (r < 0)
log_unit_warning_errno(u, r, "Failed to initialize device control bpf program: %m");
} else {
/* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore EINVAL
* here. */
if (c->device_allow || c->device_policy != CGROUP_AUTO)
r = cg_set_attribute("devices", path, "devices.deny", "a");
else
r = cg_set_attribute("devices", path, "devices.allow", "a");
if (r < 0)
log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to reset devices.allow/devices.deny: %m");
}
if (c->device_policy == CGROUP_CLOSED ||
(c->device_policy == CGROUP_AUTO && c->device_allow)) {
static const char auto_devices[] =
"/dev/null\0" "rwm\0"
"/dev/zero\0" "rwm\0"
"/dev/full\0" "rwm\0"
"/dev/random\0" "rwm\0"
"/dev/urandom\0" "rwm\0"
"/dev/tty\0" "rwm\0"
"/dev/ptmx\0" "rwm\0"
/* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */
"/run/systemd/inaccessible/chr\0" "rwm\0"
"/run/systemd/inaccessible/blk\0" "rwm\0";
const char *x, *y;
NULSTR_FOREACH_PAIR(x, y, auto_devices)
(void) whitelist_device(prog, path, x, y);
/* PTS (/dev/pts) devices may not be duplicated, but accessed */
(void) whitelist_major(prog, path, "pts", 'c', "rw");
}
LIST_FOREACH(device_allow, a, c->device_allow) {
char acc[4], *val;
unsigned k = 0;
if (a->r)
acc[k++] = 'r';
if (a->w)
acc[k++] = 'w';
if (a->m)
acc[k++] = 'm';
if (k == 0)
continue;
acc[k++] = 0;
if (path_startswith(a->path, "/dev/"))
(void) whitelist_device(prog, path, a->path, acc);
else if ((val = startswith(a->path, "block-")))
(void) whitelist_major(prog, path, val, 'b', acc);
else if ((val = startswith(a->path, "char-")))
(void) whitelist_major(prog, path, val, 'c', acc);
else
log_unit_debug(u, "Ignoring device '%s' while writing cgroup attribute.", a->path);
}
r = cgroup_apply_device_bpf(u, prog, c->device_policy, c->device_allow);
if (r < 0) {
static bool warned = false;
log_full_errno(warned ? LOG_DEBUG : LOG_WARNING, r,
"Unit %s configures device ACL, but the local system doesn't seem to support the BPF-based device controller.\n"
"Proceeding WITHOUT applying ACL (all devices will be accessible)!\n"
"(This warning is only shown for the first loaded unit using device ACL.)", u->id);
warned = true;
}
}
if (apply_mask & CGROUP_MASK_PIDS) { if (apply_mask & CGROUP_MASK_PIDS) {
@ -1430,7 +1609,7 @@ static CGroupMask unit_get_cgroup_mask(Unit *u) {
mask |= CGROUP_MASK_MEMORY; mask |= CGROUP_MASK_MEMORY;
if (c->device_allow || if (c->device_allow ||
c->device_policy != CGROUP_DEVICE_POLICY_AUTO) c->device_policy != CGROUP_AUTO)
mask |= CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES; mask |= CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES;
if (c->tasks_accounting || if (c->tasks_accounting ||
@ -3568,9 +3747,9 @@ int compare_job_priority(const void *a, const void *b) {
} }
static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = { static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
[CGROUP_DEVICE_POLICY_AUTO] = "auto", [CGROUP_AUTO] = "auto",
[CGROUP_DEVICE_POLICY_CLOSED] = "closed", [CGROUP_CLOSED] = "closed",
[CGROUP_DEVICE_POLICY_STRICT] = "strict", [CGROUP_STRICT] = "strict",
}; };
int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name) { int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name) {

View File

@ -18,15 +18,16 @@ typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight;
typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth; typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth;
typedef enum CGroupDevicePolicy { typedef enum CGroupDevicePolicy {
/* When devices listed, will allow those, plus built-in ones, if none are listed will allow
* everything. */ /* When devices listed, will allow those, plus built-in ones,
CGROUP_DEVICE_POLICY_AUTO, if none are listed will allow everything. */
CGROUP_AUTO,
/* Everything forbidden, except built-in ones and listed ones. */ /* Everything forbidden, except built-in ones and listed ones. */
CGROUP_DEVICE_POLICY_CLOSED, CGROUP_CLOSED,
/* Everything forbidden, except for the listed devices */ /* Everything forbidden, except for the listed devices */
CGROUP_DEVICE_POLICY_STRICT, CGROUP_STRICT,
_CGROUP_DEVICE_POLICY_MAX, _CGROUP_DEVICE_POLICY_MAX,
_CGROUP_DEVICE_POLICY_INVALID = -1 _CGROUP_DEVICE_POLICY_INVALID = -1

View File

@ -4303,11 +4303,11 @@ int unit_patch_contexts(Unit *u) {
if (cc && ec) { if (cc && ec) {
if (ec->private_devices && if (ec->private_devices &&
cc->device_policy == CGROUP_DEVICE_POLICY_AUTO) cc->device_policy == CGROUP_AUTO)
cc->device_policy = CGROUP_DEVICE_POLICY_CLOSED; cc->device_policy = CGROUP_CLOSED;
if (ec->root_image && if (ec->root_image &&
(cc->device_policy != CGROUP_DEVICE_POLICY_AUTO || cc->device_allow)) { (cc->device_policy != CGROUP_AUTO || cc->device_allow)) {
/* When RootImage= is specified, the following devices are touched. */ /* When RootImage= is specified, the following devices are touched. */
r = cgroup_add_device_allow(cc, "/dev/loop-control", "rw"); r = cgroup_add_device_allow(cc, "/dev/loop-control", "rw");

View File

@ -16,7 +16,6 @@
#include "bus-wait-for-jobs.h" #include "bus-wait-for-jobs.h"
#include "calendarspec.h" #include "calendarspec.h"
#include "env-util.h" #include "env-util.h"
#include "exit-status.h"
#include "fd-util.h" #include "fd-util.h"
#include "format-util.h" #include "format-util.h"
#include "main-func.h" #include "main-func.h"
@ -1257,7 +1256,7 @@ static int start_transient_service(
if (arg_wait && !arg_quiet) { if (arg_wait && !arg_quiet) {
/* Explicitly destroy the PTY forwarder, so that the PTY device is usable again, with its /* Explicitly destroy the PTY forwarder, so that the PTY device is usable again, in its
* original settings (i.e. proper line breaks), so that we can show the summary in a pretty * original settings (i.e. proper line breaks), so that we can show the summary in a pretty
* way. */ * way. */
c.forward = pty_forward_free(c.forward); c.forward = pty_forward_free(c.forward);
@ -1266,52 +1265,43 @@ static int start_transient_service(
log_info("Finished with result: %s", strna(c.result)); log_info("Finished with result: %s", strna(c.result));
if (c.exit_code == CLD_EXITED) if (c.exit_code == CLD_EXITED)
log_info("Main processes terminated with: code=%s/status=%i", log_info("Main processes terminated with: code=%s/status=%i", sigchld_code_to_string(c.exit_code), c.exit_status);
sigchld_code_to_string(c.exit_code), c.exit_status);
else if (c.exit_code > 0) else if (c.exit_code > 0)
log_info("Main processes terminated with: code=%s/status=%s", log_info("Main processes terminated with: code=%s/status=%s", sigchld_code_to_string(c.exit_code), signal_to_string(c.exit_status));
sigchld_code_to_string(c.exit_code), signal_to_string(c.exit_status));
if (timestamp_is_set(c.inactive_enter_usec) && if (timestamp_is_set(c.inactive_enter_usec) &&
timestamp_is_set(c.inactive_exit_usec) && timestamp_is_set(c.inactive_exit_usec) &&
c.inactive_enter_usec > c.inactive_exit_usec) { c.inactive_enter_usec > c.inactive_exit_usec) {
char ts[FORMAT_TIMESPAN_MAX]; char ts[FORMAT_TIMESPAN_MAX];
log_info("Service runtime: %s", log_info("Service runtime: %s", format_timespan(ts, sizeof(ts), c.inactive_enter_usec - c.inactive_exit_usec, USEC_PER_MSEC));
format_timespan(ts, sizeof ts, c.inactive_enter_usec - c.inactive_exit_usec, USEC_PER_MSEC));
} }
if (c.cpu_usage_nsec != NSEC_INFINITY) { if (c.cpu_usage_nsec != NSEC_INFINITY) {
char ts[FORMAT_TIMESPAN_MAX]; char ts[FORMAT_TIMESPAN_MAX];
log_info("CPU time consumed: %s", log_info("CPU time consumed: %s", format_timespan(ts, sizeof(ts), (c.cpu_usage_nsec + NSEC_PER_USEC - 1) / NSEC_PER_USEC, USEC_PER_MSEC));
format_timespan(ts, sizeof ts, (c.cpu_usage_nsec + NSEC_PER_USEC - 1) / NSEC_PER_USEC, USEC_PER_MSEC));
} }
if (c.ip_ingress_bytes != UINT64_MAX) { if (c.ip_ingress_bytes != UINT64_MAX) {
char bytes[FORMAT_BYTES_MAX]; char bytes[FORMAT_BYTES_MAX];
log_info("IP traffic received: %s", format_bytes(bytes, sizeof bytes, c.ip_ingress_bytes)); log_info("IP traffic received: %s", format_bytes(bytes, sizeof(bytes), c.ip_ingress_bytes));
} }
if (c.ip_egress_bytes != UINT64_MAX) { if (c.ip_egress_bytes != UINT64_MAX) {
char bytes[FORMAT_BYTES_MAX]; char bytes[FORMAT_BYTES_MAX];
log_info("IP traffic sent: %s", format_bytes(bytes, sizeof bytes, c.ip_egress_bytes)); log_info("IP traffic sent: %s", format_bytes(bytes, sizeof(bytes), c.ip_egress_bytes));
} }
if (c.io_read_bytes != UINT64_MAX) { if (c.io_read_bytes != UINT64_MAX) {
char bytes[FORMAT_BYTES_MAX]; char bytes[FORMAT_BYTES_MAX];
log_info("IO bytes read: %s", format_bytes(bytes, sizeof bytes, c.io_read_bytes)); log_info("IO bytes read: %s", format_bytes(bytes, sizeof(bytes), c.io_read_bytes));
} }
if (c.io_write_bytes != UINT64_MAX) { if (c.io_write_bytes != UINT64_MAX) {
char bytes[FORMAT_BYTES_MAX]; char bytes[FORMAT_BYTES_MAX];
log_info("IO bytes written: %s", format_bytes(bytes, sizeof bytes, c.io_write_bytes)); log_info("IO bytes written: %s", format_bytes(bytes, sizeof(bytes), c.io_write_bytes));
} }
} }
/* Try to propagate the service's return value. But if the service defines /* Try to propagate the service's return value */
* e.g. SuccessExitStatus, honour this, and return 0 to mean "success". */ if (c.result && STR_IN_SET(c.result, "success", "exit-code") && c.exit_code == CLD_EXITED)
if (streq_ptr(c.result, "success"))
*retval = 0;
else if (streq_ptr(c.result, "exit-code") && c.exit_status > 0)
*retval = c.exit_status; *retval = c.exit_status;
else if (streq_ptr(c.result, "signal"))
*retval = EXIT_EXCEPTION;
else else
*retval = EXIT_FAILURE; *retval = EXIT_FAILURE;
} }

View File

@ -3,7 +3,6 @@
#include <sched.h> #include <sched.h>
#include <signal.h> #include <signal.h>
#include <stdlib.h> #include <stdlib.h>
#include <sys/mman.h>
#include <sys/mount.h> #include <sys/mount.h>
#include <sys/wait.h> #include <sys/wait.h>
#include <util.h> #include <util.h>
@ -15,14 +14,11 @@
#undef basename #undef basename
#include "alloc-util.h" #include "alloc-util.h"
#include "cgroup-setup.h"
#include "cgroup-util.h"
#include "env-file.h" #include "env-file.h"
#include "env-util.h" #include "env-util.h"
#include "fs-util.h" #include "fs-util.h"
#include "log.h" #include "log.h"
#include "path-util.h" #include "path-util.h"
#include "random-util.h"
#include "strv.h" #include "strv.h"
#include "tests.h" #include "tests.h"
@ -153,50 +149,3 @@ bool have_namespaces(void) {
assert_not_reached("unexpected exit code"); assert_not_reached("unexpected exit code");
} }
bool can_memlock(void) {
/* Let's see if we can mlock() a larger blob of memory. BPF programs are charged against
* RLIMIT_MEMLOCK, hence let's first make sure we can lock memory at all, and skip the test if we
* cannot. Why not check RLIMIT_MEMLOCK explicitly? Because in container environments the
* RLIMIT_MEMLOCK value we see might not match the RLIMIT_MEMLOCK value actually in effect. */
void *p = mmap(NULL, CAN_MEMLOCK_SIZE, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0);
if (p == MAP_FAILED)
return false;
bool b = mlock(p, CAN_MEMLOCK_SIZE) >= 0;
if (b)
assert_se(munlock(p, CAN_MEMLOCK_SIZE) >= 0);
assert_se(munmap(p, CAN_MEMLOCK_SIZE) >= 0);
return b;
}
int enter_cgroup_subroot(char **ret_cgroup) {
_cleanup_free_ char *cgroup_root = NULL, *cgroup_subroot = NULL;
CGroupMask supported;
int r;
r = cg_pid_get_path(NULL, 0, &cgroup_root);
if (r == -ENOMEDIUM)
return log_warning_errno(r, "cg_pid_get_path(NULL, 0, ...) failed: %m");
assert(r >= 0);
assert_se(asprintf(&cgroup_subroot, "%s/%" PRIx64, cgroup_root, random_u64()) >= 0);
assert_se(cg_mask_supported(&supported) >= 0);
/* If this fails, then we don't mind as the later cgroup operations will fail too, and it's fine if
* we handle any errors at that point. */
r = cg_create_everywhere(supported, _CGROUP_MASK_ALL, cgroup_subroot);
if (r < 0)
return r;
r = cg_attach_everywhere(supported, cgroup_subroot, 0, NULL, NULL);
if (r < 0)
return r;
if (ret_cgroup)
*ret_cgroup = TAKE_PTR(cgroup_subroot);
return 0;
}

View File

@ -3,23 +3,7 @@
#include <stdbool.h> #include <stdbool.h>
#include "sd-daemon.h"
#include "macro.h"
static inline bool manager_errno_skip_test(int r) {
return IN_SET(abs(r),
EPERM,
EACCES,
EADDRINUSE,
EHOSTDOWN,
ENOENT,
ENOMEDIUM /* cannot determine cgroup */
);
}
char* setup_fake_runtime_dir(void); char* setup_fake_runtime_dir(void);
int enter_cgroup_subroot(char **ret_cgroup);
const char* get_testdata_dir(void); const char* get_testdata_dir(void);
const char* get_catalog_dir(void); const char* get_catalog_dir(void);
bool slow_tests_enabled(void); bool slow_tests_enabled(void);
@ -28,14 +12,3 @@ int log_tests_skipped(const char *message);
int log_tests_skipped_errno(int r, const char *message); int log_tests_skipped_errno(int r, const char *message);
bool have_namespaces(void); bool have_namespaces(void);
/* We use the small but non-trivial limit here */
#define CAN_MEMLOCK_SIZE (512 * 1024U)
bool can_memlock(void);
#define TEST_REQ_RUNNING_SYSTEMD(x) \
if (sd_booted() > 0) { \
x; \
} else { \
printf("systemd not booted skipping '%s'\n", #x); \
}

View File

@ -50,7 +50,8 @@ tests += [
[], [],
[]], []],
[['src/test/test-engine.c'], [['src/test/test-engine.c',
'src/test/test-helper.c'],
[libcore, [libcore,
libudev, libudev,
libshared], libshared],
@ -139,7 +140,8 @@ tests += [
[], [],
[]], []],
[['src/test/test-unit-name.c'], [['src/test/test-unit-name.c',
'src/test/test-helper.c'],
[libcore, [libcore,
libshared], libshared],
[threads, [threads,
@ -149,7 +151,8 @@ tests += [
libmount, libmount,
libblkid]], libblkid]],
[['src/test/test-load-fragment.c'], [['src/test/test-load-fragment.c',
'src/test/test-helper.c'],
[libcore, [libcore,
libshared], libshared],
[threads, [threads,
@ -424,7 +427,8 @@ tests += [
[libbasic], [libbasic],
[]], []],
[['src/test/test-bpf-devices.c'], [['src/test/test-bpf.c',
'src/test/test-helper.c'],
[libcore, [libcore,
libshared], libshared],
[libmount, [libmount,
@ -434,17 +438,8 @@ tests += [
libselinux, libselinux,
libblkid]], libblkid]],
[['src/test/test-bpf-firewall.c'], [['src/test/test-watch-pid.c',
[libcore, 'src/test/test-helper.c'],
libshared],
[libmount,
threads,
librt,
libseccomp,
libselinux,
libblkid]],
[['src/test/test-watch-pid.c'],
[libcore, [libcore,
libshared], libshared],
[libmount, [libmount,
@ -594,12 +589,14 @@ tests += [
libshared], libshared],
[]], []],
[['src/test/test-cgroup-unit-default.c'], [['src/test/test-cgroup-unit-default.c',
'src/test/test-helper.c'],
[libcore, [libcore,
libshared], libshared],
[]], []],
[['src/test/test-cgroup-mask.c'], [['src/test/test-cgroup-mask.c',
'src/test/test-helper.c'],
[libcore, [libcore,
libshared], libshared],
[threads, [threads,
@ -646,7 +643,8 @@ tests += [
[], [],
'', 'manual'], '', 'manual'],
[['src/test/test-path.c'], [['src/test/test-path.c',
'src/test/test-helper.c'],
[libcore, [libcore,
libshared], libshared],
[threads, [threads,
@ -656,7 +654,8 @@ tests += [
libmount, libmount,
libblkid]], libblkid]],
[['src/test/test-execute.c'], [['src/test/test-execute.c',
'src/test/test-helper.c'],
[libcore, [libcore,
libshared], libshared],
[threads, [threads,
@ -685,7 +684,8 @@ tests += [
[], [],
[]], []],
[['src/test/test-sched-prio.c'], [['src/test/test-sched-prio.c',
'src/test/test-helper.c'],
[libcore, [libcore,
libshared], libshared],
[threads, [threads,

View File

@ -1,306 +0,0 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include <sys/resource.h>
#include <sys/time.h>
#include <unistd.h>
#include "alloc-util.h"
#include "bpf-devices.h"
#include "bpf-program.h"
#include "cgroup-setup.h"
#include "errno-list.h"
#include "fd-util.h"
#include "fs-util.h"
#include "path-util.h"
#include "tests.h"
static void test_policy_closed(const char *cgroup_path, BPFProgram **installed_prog) {
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
unsigned wrong = 0;
int r;
log_info("/* %s */", __func__);
r = bpf_devices_cgroup_init(&prog, CGROUP_DEVICE_POLICY_CLOSED, true);
assert_se(r >= 0);
r = bpf_devices_whitelist_static(prog, cgroup_path);
assert_se(r >= 0);
r = bpf_devices_apply_policy(prog, CGROUP_DEVICE_POLICY_CLOSED, true, cgroup_path, installed_prog);
assert_se(r >= 0);
const char *s;
FOREACH_STRING(s, "/dev/null",
"/dev/zero",
"/dev/full",
"/dev/random",
"/dev/urandom",
"/dev/tty",
"/dev/ptmx") {
_cleanup_close_ int fd, fd2;
fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
wrong += fd < 0 && errno == EPERM;
/* We ignore errors other than EPERM, e.g. ENOENT or ENXIO */
fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
wrong += fd2 < 0 && errno == EPERM;
}
assert_se(wrong == 0);
}
static void test_policy_strict(const char *cgroup_path, BPFProgram **installed_prog) {
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
unsigned wrong = 0;
int r;
log_info("/* %s */", __func__);
r = bpf_devices_cgroup_init(&prog, CGROUP_DEVICE_POLICY_STRICT, true);
assert_se(r >= 0);
r = bpf_devices_whitelist_device(prog, cgroup_path, "/dev/null", "rw");
assert_se(r >= 0);
r = bpf_devices_whitelist_device(prog, cgroup_path, "/dev/random", "r");
assert_se(r >= 0);
r = bpf_devices_whitelist_device(prog, cgroup_path, "/dev/zero", "w");
assert_se(r >= 0);
r = bpf_devices_apply_policy(prog, CGROUP_DEVICE_POLICY_STRICT, true, cgroup_path, installed_prog);
assert_se(r >= 0);
{
_cleanup_close_ int fd, fd2;
const char *s = "/dev/null";
fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
wrong += fd < 0;
fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
wrong += fd2 < 0;
}
{
_cleanup_close_ int fd, fd2;
const char *s = "/dev/random";
fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
wrong += fd < 0;
fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
wrong += fd2 >= 0;
}
{
_cleanup_close_ int fd, fd2;
const char *s = "/dev/zero";
fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
wrong += fd >= 0;
fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
wrong += fd2 < 0;
}
{
_cleanup_close_ int fd, fd2;
const char *s = "/dev/full";
fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
wrong += fd >= 0;
fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
wrong += fd2 >= 0;
}
assert_se(wrong == 0);
}
static void test_policy_whitelist_major(const char *pattern, const char *cgroup_path, BPFProgram **installed_prog) {
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
unsigned wrong = 0;
int r;
log_info("/* %s(%s) */", __func__, pattern);
r = bpf_devices_cgroup_init(&prog, CGROUP_DEVICE_POLICY_STRICT, true);
assert_se(r >= 0);
r = bpf_devices_whitelist_major(prog, cgroup_path, pattern, 'c', "rw");
assert_se(r >= 0);
r = bpf_devices_apply_policy(prog, CGROUP_DEVICE_POLICY_STRICT, true, cgroup_path, installed_prog);
assert_se(r >= 0);
/* /dev/null, /dev/full have major==1, /dev/tty has major==5 */
{
_cleanup_close_ int fd, fd2;
const char *s = "/dev/null";
fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
wrong += fd < 0;
fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
wrong += fd2 < 0;
}
{
_cleanup_close_ int fd, fd2;
const char *s = "/dev/full";
fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
wrong += fd < 0;
fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
wrong += fd2 < 0;
}
{
_cleanup_close_ int fd, fd2;
const char *s = "/dev/tty";
fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
wrong += fd >= 0;
fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
wrong += fd2 >= 0;
}
assert_se(wrong == 0);
}
static void test_policy_whitelist_major_star(char type, const char *cgroup_path, BPFProgram **installed_prog) {
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
unsigned wrong = 0;
int r;
log_info("/* %s(type=%c) */", __func__, type);
r = bpf_devices_cgroup_init(&prog, CGROUP_DEVICE_POLICY_STRICT, true);
assert_se(r >= 0);
r = bpf_devices_whitelist_major(prog, cgroup_path, "*", type, "rw");
assert_se(r >= 0);
r = bpf_devices_apply_policy(prog, CGROUP_DEVICE_POLICY_STRICT, true, cgroup_path, installed_prog);
assert_se(r >= 0);
{
_cleanup_close_ int fd;
const char *s = "/dev/null";
fd = open(s, O_CLOEXEC|O_RDWR|O_NOCTTY);
log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
if (type == 'c')
wrong += fd < 0;
else
wrong += fd >= 0;
}
assert_se(wrong == 0);
}
static void test_policy_empty(bool add_mismatched, const char *cgroup_path, BPFProgram **installed_prog) {
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
unsigned wrong = 0;
int r;
log_info("/* %s(add_mismatched=%s) */", __func__, yes_no(add_mismatched));
r = bpf_devices_cgroup_init(&prog, CGROUP_DEVICE_POLICY_STRICT, add_mismatched);
assert_se(r >= 0);
if (add_mismatched) {
r = bpf_devices_whitelist_major(prog, cgroup_path, "foobarxxx", 'c', "rw");
assert_se(r < 0);
}
r = bpf_devices_apply_policy(prog, CGROUP_DEVICE_POLICY_STRICT, false, cgroup_path, installed_prog);
assert_se(r >= 0);
{
_cleanup_close_ int fd;
const char *s = "/dev/null";
fd = open(s, O_CLOEXEC|O_RDWR|O_NOCTTY);
log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
wrong += fd >= 0;
}
assert_se(wrong == 0);
}
int main(int argc, char *argv[]) {
_cleanup_free_ char *cgroup = NULL, *parent = NULL;
_cleanup_(rmdir_and_freep) char *controller_path = NULL;
CGroupMask supported;
struct rlimit rl;
int r;
test_setup_logging(LOG_DEBUG);
assert_se(getrlimit(RLIMIT_MEMLOCK, &rl) >= 0);
rl.rlim_cur = rl.rlim_max = MAX(rl.rlim_max, CAN_MEMLOCK_SIZE);
(void) setrlimit(RLIMIT_MEMLOCK, &rl);
r = cg_all_unified();
if (r <= 0)
return log_tests_skipped("We don't seem to be running with unified cgroup hierarchy");
if (!can_memlock())
return log_tests_skipped("Can't use mlock()");
r = enter_cgroup_subroot(&cgroup);
if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available");
r = bpf_devices_supported();
if (!r)
return log_tests_skipped("BPF device filter not supported");
assert_se(r == 1);
r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, cgroup, NULL, &controller_path);
assert_se(r >= 0);
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
test_policy_closed(cgroup, &prog);
test_policy_strict(cgroup, &prog);
test_policy_whitelist_major("mem", cgroup, &prog);
test_policy_whitelist_major("1", cgroup, &prog);
test_policy_whitelist_major_star('c', cgroup, &prog);
test_policy_whitelist_major_star('b', cgroup, &prog);
test_policy_empty(false, cgroup, &prog);
test_policy_empty(true, cgroup, &prog);
assert_se(parent = dirname_malloc(cgroup));
assert_se(cg_mask_supported(&supported) >= 0);
r = cg_attach_everywhere(supported, parent, 0, NULL, NULL);
assert_se(r >= 0);
return 0;
}

View File

@ -2,6 +2,7 @@
#include <linux/bpf_insn.h> #include <linux/bpf_insn.h>
#include <string.h> #include <string.h>
#include <sys/mman.h>
#include <unistd.h> #include <unistd.h>
#include "bpf-firewall.h" #include "bpf-firewall.h"
@ -10,12 +11,37 @@
#include "manager.h" #include "manager.h"
#include "rm-rf.h" #include "rm-rf.h"
#include "service.h" #include "service.h"
#include "test-helper.h"
#include "tests.h" #include "tests.h"
#include "unit.h" #include "unit.h"
#include "virt.h" #include "virt.h"
/* We use the same limit here that PID 1 bumps RLIMIT_MEMLOCK to if it can */
#define CAN_MEMLOCK_SIZE (64U*1024U*1024U)
static bool can_memlock(void) {
void *p;
bool b;
/* Let's see if we can mlock() a larger blob of memory. BPF programs are charged against
* RLIMIT_MEMLOCK, hence let's first make sure we can lock memory at all, and skip the test if we
* cannot. Why not check RLIMIT_MEMLOCK explicitly? Because in container environments the
* RLIMIT_MEMLOCK value we see might not match the RLIMIT_MEMLOCK value actually in effect. */
p = mmap(NULL, CAN_MEMLOCK_SIZE, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0);
if (p == MAP_FAILED)
return false;
b = mlock(p, CAN_MEMLOCK_SIZE) >= 0;
if (b)
assert_se(munlock(p, CAN_MEMLOCK_SIZE) >= 0);
assert_se(munmap(p, CAN_MEMLOCK_SIZE) >= 0);
return b;
}
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
const struct bpf_insn exit_insn[] = { struct bpf_insn exit_insn[] = {
BPF_MOV64_IMM(BPF_REG_0, 0), /* drop */ BPF_MOV64_IMM(BPF_REG_0, 0), /* drop */
BPF_EXIT_INSN() BPF_EXIT_INSN()
}; };
@ -35,16 +61,16 @@ int main(int argc, char *argv[]) {
test_setup_logging(LOG_DEBUG); test_setup_logging(LOG_DEBUG);
if (detect_container() > 0) if (detect_container() > 0)
return log_tests_skipped("test-bpf-firewall fails inside LXC and Docker containers: https://github.com/systemd/systemd/issues/9666"); return log_tests_skipped("test-bpf fails inside LXC and Docker containers: https://github.com/systemd/systemd/issues/9666");
assert_se(getrlimit(RLIMIT_MEMLOCK, &rl) >= 0); assert_se(getrlimit(RLIMIT_MEMLOCK, &rl) >= 0);
rl.rlim_cur = rl.rlim_max = MAX(rl.rlim_max, CAN_MEMLOCK_SIZE); rl.rlim_cur = rl.rlim_max = MAX3(rl.rlim_cur, rl.rlim_max, CAN_MEMLOCK_SIZE);
(void) setrlimit(RLIMIT_MEMLOCK, &rl); (void) setrlimit(RLIMIT_MEMLOCK, &rl);
if (!can_memlock()) if (!can_memlock())
return log_tests_skipped("Can't use mlock()"); return log_tests_skipped("Can't use mlock(), skipping.");
r = enter_cgroup_subroot(NULL); r = enter_cgroup_subroot();
if (r == -ENOMEDIUM) if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available"); return log_tests_skipped("cgroupfs not available");

View File

@ -8,6 +8,7 @@
#include "manager.h" #include "manager.h"
#include "rm-rf.h" #include "rm-rf.h"
#include "string-util.h" #include "string-util.h"
#include "test-helper.h"
#include "tests.h" #include "tests.h"
#include "unit.h" #include "unit.h"
@ -33,7 +34,7 @@ static int test_cgroup_mask(void) {
int r; int r;
CGroupMask cpu_accounting_mask = get_cpu_accounting_mask(); CGroupMask cpu_accounting_mask = get_cpu_accounting_mask();
r = enter_cgroup_subroot(NULL); r = enter_cgroup_subroot();
if (r == -ENOMEDIUM) if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available"); return log_tests_skipped("cgroupfs not available");

View File

@ -5,6 +5,7 @@
#include "cgroup.h" #include "cgroup.h"
#include "manager.h" #include "manager.h"
#include "rm-rf.h" #include "rm-rf.h"
#include "test-helper.h"
#include "tests.h" #include "tests.h"
#include "unit.h" #include "unit.h"
@ -18,7 +19,7 @@ static int test_default_memory_low(void) {
uint64_t dml_tree_default; uint64_t dml_tree_default;
int r; int r;
r = enter_cgroup_subroot(NULL); r = enter_cgroup_subroot();
if (r == -ENOMEDIUM) if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available"); return log_tests_skipped("cgroupfs not available");

View File

@ -13,6 +13,7 @@
#include "stat-util.h" #include "stat-util.h"
#include "string-util.h" #include "string-util.h"
#include "strv.h" #include "strv.h"
#include "test-helper.h"
#include "tests.h" #include "tests.h"
#include "user-util.h" #include "user-util.h"
#include "util.h" #include "util.h"

View File

@ -7,6 +7,7 @@
#include "manager.h" #include "manager.h"
#include "rm-rf.h" #include "rm-rf.h"
#include "strv.h" #include "strv.h"
#include "test-helper.h"
#include "tests.h" #include "tests.h"
#include "service.h" #include "service.h"
@ -21,7 +22,7 @@ int main(int argc, char *argv[]) {
test_setup_logging(LOG_DEBUG); test_setup_logging(LOG_DEBUG);
r = enter_cgroup_subroot(NULL); r = enter_cgroup_subroot();
if (r == -ENOMEDIUM) if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available"); return log_tests_skipped("cgroupfs not available");
@ -29,7 +30,7 @@ int main(int argc, char *argv[]) {
assert_se(set_unit_path(get_testdata_dir()) >= 0); assert_se(set_unit_path(get_testdata_dir()) >= 0);
assert_se(runtime_dir = setup_fake_runtime_dir()); assert_se(runtime_dir = setup_fake_runtime_dir());
r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &m); r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &m);
if (manager_errno_skip_test(r)) if (MANAGER_SKIP_TEST(r))
return log_tests_skipped_errno(r, "manager_new"); return log_tests_skipped_errno(r, "manager_new");
assert_se(r >= 0); assert_se(r >= 0);
assert_se(manager_startup(m, NULL, NULL) >= 0); assert_se(manager_startup(m, NULL, NULL) >= 0);

View File

@ -20,6 +20,7 @@
#endif #endif
#include "service.h" #include "service.h"
#include "stat-util.h" #include "stat-util.h"
#include "test-helper.h"
#include "tests.h" #include "tests.h"
#include "unit.h" #include "unit.h"
#include "user-util.h" #include "user-util.h"
@ -36,11 +37,6 @@ static int cld_dumped_to_killed(int code) {
return code == CLD_DUMPED ? CLD_KILLED : code; return code == CLD_DUMPED ? CLD_KILLED : code;
} }
_unused_ static bool is_run_on_travis_ci(void) {
/* https://docs.travis-ci.com/user/environment-variables#default-environment-variables */
return streq_ptr(getenv("TRAVIS"), "true");
}
static void wait_for_service_finish(Manager *m, Unit *unit) { static void wait_for_service_finish(Manager *m, Unit *unit) {
Service *service = NULL; Service *service = NULL;
usec_t ts; usec_t ts;
@ -785,7 +781,7 @@ static int run_tests(UnitFileScope scope, const test_entry tests[], char **patte
assert_se(tests); assert_se(tests);
r = manager_new(scope, MANAGER_TEST_RUN_BASIC, &m); r = manager_new(scope, MANAGER_TEST_RUN_BASIC, &m);
if (manager_errno_skip_test(r)) if (MANAGER_SKIP_TEST(r))
return log_tests_skipped_errno(r, "manager_new"); return log_tests_skipped_errno(r, "manager_new");
assert_se(r >= 0); assert_se(r >= 0);
assert_se(manager_startup(m, NULL, NULL) >= 0); assert_se(manager_startup(m, NULL, NULL) >= 0);
@ -869,7 +865,7 @@ int main(int argc, char *argv[]) {
if (getuid() != 0) if (getuid() != 0)
return log_tests_skipped("not root"); return log_tests_skipped("not root");
r = enter_cgroup_subroot(NULL); r = enter_cgroup_subroot();
if (r == -ENOMEDIUM) if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available"); return log_tests_skipped("cgroupfs not available");

35
src/test/test-helper.c Normal file
View File

@ -0,0 +1,35 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include "test-helper.h"
#include "random-util.h"
#include "alloc-util.h"
#include "cgroup-setup.h"
#include "string-util.h"
int enter_cgroup_subroot(void) {
_cleanup_free_ char *cgroup_root = NULL, *cgroup_subroot = NULL;
CGroupMask supported;
int r;
r = cg_pid_get_path(NULL, 0, &cgroup_root);
if (r == -ENOMEDIUM)
return log_warning_errno(r, "cg_pid_get_path(NULL, 0, ...) failed: %m");
assert(r >= 0);
assert_se(asprintf(&cgroup_subroot, "%s/%" PRIx64, cgroup_root, random_u64()) >= 0);
assert_se(cg_mask_supported(&supported) >= 0);
/* If this fails, then we don't mind as the later cgroup operations will fail too, and it's fine if we handle
* any errors at that point. */
r = cg_create_everywhere(supported, _CGROUP_MASK_ALL, cgroup_subroot);
if (r < 0)
return r;
return cg_attach_everywhere(supported, cgroup_subroot, 0, NULL, NULL);
}
/* https://docs.travis-ci.com/user/environment-variables#default-environment-variables */
bool is_run_on_travis_ci(void) {
return streq_ptr(getenv("TRAVIS"), "true");
}

31
src/test/test-helper.h Normal file
View File

@ -0,0 +1,31 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#pragma once
/***
Copyright © 2013 Holger Hans Peter Freyther
***/
#include "sd-daemon.h"
#include "macro.h"
#define TEST_REQ_RUNNING_SYSTEMD(x) \
if (sd_booted() > 0) { \
x; \
} else { \
printf("systemd not booted skipping '%s'\n", #x); \
}
#define MANAGER_SKIP_TEST(r) \
IN_SET(r, \
-EPERM, \
-EACCES, \
-EADDRINUSE, \
-EHOSTDOWN, \
-ENOENT, \
-ENOMEDIUM /* cannot determine cgroup */ \
)
int enter_cgroup_subroot(void);
bool is_run_on_travis_ci(void);

View File

@ -23,6 +23,7 @@
#include "specifier.h" #include "specifier.h"
#include "string-util.h" #include "string-util.h"
#include "strv.h" #include "strv.h"
#include "test-helper.h"
#include "tests.h" #include "tests.h"
#include "tmpfile-util.h" #include "tmpfile-util.h"
#include "user-util.h" #include "user-util.h"
@ -96,7 +97,7 @@ static void test_config_parse_exec(void) {
_cleanup_(unit_freep) Unit *u = NULL; _cleanup_(unit_freep) Unit *u = NULL;
r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_MINIMAL, &m); r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_MINIMAL, &m);
if (manager_errno_skip_test(r)) { if (MANAGER_SKIP_TEST(r)) {
log_notice_errno(r, "Skipping test: manager_new: %m"); log_notice_errno(r, "Skipping test: manager_new: %m");
return; return;
} }
@ -441,7 +442,7 @@ static void test_config_parse_log_extra_fields(void) {
ExecContext c = {}; ExecContext c = {};
r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_MINIMAL, &m); r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_MINIMAL, &m);
if (manager_errno_skip_test(r)) { if (MANAGER_SKIP_TEST(r)) {
log_notice_errno(r, "Skipping test: manager_new: %m"); log_notice_errno(r, "Skipping test: manager_new: %m");
return; return;
} }
@ -779,7 +780,7 @@ int main(int argc, char *argv[]) {
test_setup_logging(LOG_INFO); test_setup_logging(LOG_INFO);
r = enter_cgroup_subroot(NULL); r = enter_cgroup_subroot();
if (r == -ENOMEDIUM) if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available"); return log_tests_skipped("cgroupfs not available");

View File

@ -16,6 +16,7 @@
#include "rm-rf.h" #include "rm-rf.h"
#include "string-util.h" #include "string-util.h"
#include "strv.h" #include "strv.h"
#include "test-helper.h"
#include "tests.h" #include "tests.h"
#include "unit.h" #include "unit.h"
#include "util.h" #include "util.h"
@ -31,12 +32,12 @@ static int setup_test(Manager **m) {
assert_se(m); assert_se(m);
r = enter_cgroup_subroot(NULL); r = enter_cgroup_subroot();
if (r == -ENOMEDIUM) if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available"); return log_tests_skipped("cgroupfs not available");
r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &tmp); r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &tmp);
if (manager_errno_skip_test(r)) if (MANAGER_SKIP_TEST(r))
return log_tests_skipped_errno(r, "manager_new"); return log_tests_skipped_errno(r, "manager_new");
assert_se(r >= 0); assert_se(r >= 0);
assert_se(manager_startup(tmp, NULL, NULL) >= 0); assert_se(manager_startup(tmp, NULL, NULL) >= 0);

View File

@ -25,6 +25,7 @@
#include "stdio-util.h" #include "stdio-util.h"
#include "string-util.h" #include "string-util.h"
#include "terminal-util.h" #include "terminal-util.h"
#include "test-helper.h"
#include "tests.h" #include "tests.h"
#include "util.h" #include "util.h"
#include "virt.h" #include "virt.h"

View File

@ -9,6 +9,7 @@
#include "macro.h" #include "macro.h"
#include "manager.h" #include "manager.h"
#include "rm-rf.h" #include "rm-rf.h"
#include "test-helper.h"
#include "tests.h" #include "tests.h"
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
@ -20,7 +21,7 @@ int main(int argc, char *argv[]) {
test_setup_logging(LOG_INFO); test_setup_logging(LOG_INFO);
r = enter_cgroup_subroot(NULL); r = enter_cgroup_subroot();
if (r == -ENOMEDIUM) if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available"); return log_tests_skipped("cgroupfs not available");
@ -28,7 +29,7 @@ int main(int argc, char *argv[]) {
assert_se(set_unit_path(get_testdata_dir()) >= 0); assert_se(set_unit_path(get_testdata_dir()) >= 0);
assert_se(runtime_dir = setup_fake_runtime_dir()); assert_se(runtime_dir = setup_fake_runtime_dir());
r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &m); r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &m);
if (manager_errno_skip_test(r)) if (MANAGER_SKIP_TEST(r))
return log_tests_skipped_errno(r, "manager_new"); return log_tests_skipped_errno(r, "manager_new");
assert_se(r >= 0); assert_se(r >= 0);
assert_se(manager_startup(m, NULL, NULL) >= 0); assert_se(manager_startup(m, NULL, NULL) >= 0);

View File

@ -15,6 +15,7 @@
#include "special.h" #include "special.h"
#include "specifier.h" #include "specifier.h"
#include "string-util.h" #include "string-util.h"
#include "test-helper.h"
#include "tests.h" #include "tests.h"
#include "unit-def.h" #include "unit-def.h"
#include "unit-name.h" #include "unit-name.h"
@ -233,7 +234,7 @@ static int test_unit_printf(void) {
assert_se(get_shell(&shell) >= 0); assert_se(get_shell(&shell) >= 0);
r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_MINIMAL, &m); r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_MINIMAL, &m);
if (manager_errno_skip_test(r)) if (MANAGER_SKIP_TEST(r))
return log_tests_skipped_errno(r, "manager_new"); return log_tests_skipped_errno(r, "manager_new");
assert_se(r == 0); assert_se(r == 0);
@ -870,7 +871,7 @@ int main(int argc, char* argv[]) {
test_setup_logging(LOG_INFO); test_setup_logging(LOG_INFO);
r = enter_cgroup_subroot(NULL); r = enter_cgroup_subroot();
if (r == -ENOMEDIUM) if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available"); return log_tests_skipped("cgroupfs not available");

View File

@ -4,6 +4,7 @@
#include "manager.h" #include "manager.h"
#include "rm-rf.h" #include "rm-rf.h"
#include "service.h" #include "service.h"
#include "test-helper.h"
#include "tests.h" #include "tests.h"
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
@ -16,7 +17,7 @@ int main(int argc, char *argv[]) {
if (getuid() != 0) if (getuid() != 0)
return log_tests_skipped("not root"); return log_tests_skipped("not root");
r = enter_cgroup_subroot(NULL); r = enter_cgroup_subroot();
if (r == -ENOMEDIUM) if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available"); return log_tests_skipped("cgroupfs not available");

View File

@ -277,35 +277,20 @@ static int context_read_data(Context *c) {
static int context_write_data_timezone(Context *c) { static int context_write_data_timezone(Context *c) {
_cleanup_free_ char *p = NULL; _cleanup_free_ char *p = NULL;
const char *source;
assert(c); assert(c);
/* No timezone is very similar to UTC. Hence in either of these cases link the UTC file in. Except if if (isempty(c->zone)) {
* it isn't installed, in which case we remove the symlink altogether. Since glibc defaults to an
* internal version of UTC in that case behaviour is mostly equivalent. We still prefer creating the
* symlink though, since things are more self explanatory then. */
if (isempty(c->zone) || streq(c->zone, "UTC")) {
if (access("/usr/share/zoneinfo/UTC", F_OK) < 0) {
if (unlink("/etc/localtime") < 0 && errno != ENOENT) if (unlink("/etc/localtime") < 0 && errno != ENOENT)
return -errno; return -errno;
return 0; return 0;
} }
source = "../usr/share/zoneinfo/UTC";
} else {
p = path_join("../usr/share/zoneinfo", c->zone); p = path_join("../usr/share/zoneinfo", c->zone);
if (!p) if (!p)
return -ENOMEM; return log_oom();
source = p; return symlink_atomic(p, "/etc/localtime");
}
return symlink_atomic(source, "/etc/localtime");
} }
static int context_write_data_local_rtc(Context *c) { static int context_write_data_local_rtc(Context *c) {
@ -653,7 +638,7 @@ static int method_set_timezone(sd_bus_message *m, void *userdata, sd_bus_error *
return r; return r;
if (!timezone_is_valid(z, LOG_DEBUG)) if (!timezone_is_valid(z, LOG_DEBUG))
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid or not installed time zone '%s'", z); return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid time zone '%s'", z);
if (streq_ptr(z, c->zone)) if (streq_ptr(z, c->zone))
return sd_bus_reply_method_return(m, NULL); return sd_bus_reply_method_return(m, NULL);