Compare commits

...

5 Commits

Author SHA1 Message Date
leafcompost 4cfbd7344f
Merge bf2328c714 into d145d1d410 2024-11-07 01:38:15 +08:00
maia x. bf2328c714 man: document confext reload behavior for ExtensionDirectories/Images 2024-10-05 15:55:53 -07:00
maia x. 1e4e23cb7e test: check reloading notify-reload service refreshes vpick extensions 2024-10-05 15:55:53 -07:00
maia x. 77eb9bf10d core: reload confexts when reloading notify-reload services
`ExtensionImages=` and `ExtensionDirectories=` now let you specify vpick-named
extensions; however, since they just get set up once when the service is
started, you can't see newer versions without restarting the service entirely.
Here, also reload confext extensions when you reload a service. This allows you
to deploy a new version of some configuration and have it picked up at reload
time without interruption to your workload.

Right now, we would only reload confext extensions and leave the sysext ones
behind, since it didn't seem prudent to swap out what is likely program code at
reload. This is made possible by only going for the
`SYSTEMD_CONFEXT_HIERARCHIES` overlays (which only contains `/etc`). For now, we
also only do this for the notify-reload service type until more knobs are added
in the future.
2024-10-05 15:55:53 -07:00
maia x. 650e82ee3c vpick: add path_uses_vpick helper method
Add a path_uses_vpick helper method to determine if a path matches
the vpick format ('PATH/NAME.SUFFIX.v' or 'PATH.v/NAME___.SUFFIX').
2024-10-05 15:55:53 -07:00
12 changed files with 369 additions and 2 deletions

View File

@ -562,6 +562,13 @@
To disable the safety check that the extension-release file name matches the image file name, the
<varname>x-systemd.relax-extension-release-check</varname> mount option may be appended.</para>
<para>This option can be used together with a <option>notify-reload</option> service type and
<citerefentry><refentrytitle>systemd.v</refentrytitle><manvolnum>7</manvolnum></citerefentry>
to manage configuration updates. When such a service carrying confext images is reloaded, the confext
itself will also be reloaded to pick up any changes. This only applies to confext extensions. See
<citerefentry><refentrytitle>systemd.service</refentrytitle><manvolnum>5</manvolnum></citerefentry>
also for details.</para>
<para>When <varname>DevicePolicy=</varname> is set to <literal>closed</literal> or
<literal>strict</literal>, or set to <literal>auto</literal> and <varname>DeviceAllow=</varname> is
set, then this setting adds <filename>/dev/loop-control</filename> with <constant>rw</constant> mode,
@ -606,6 +613,14 @@
or the host. See:
<citerefentry><refentrytitle>os-release</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
<para>This option can be used together with a <option>notify-reload</option> service type and
<citerefentry><refentrytitle>systemd.v</refentrytitle><manvolnum>7</manvolnum></citerefentry>
to manage configuration updates. When such a system service carrying confext directories is reloaded,
the confext itself will also be reloaded to pick up any changes. This only applies to confext
extensions. See
<citerefentry><refentrytitle>systemd.service</refentrytitle><manvolnum>5</manvolnum></citerefentry>
also for details.</para>
<para>Note that usage from user units requires overlayfs support in unprivileged user namespaces,
which was first introduced in kernel v5.11.</para>

View File

@ -71,6 +71,7 @@
#include "unit-serialize.h"
#include "user-util.h"
#include "utmp-wtmp.h"
#include "vpick.h"
static bool is_terminal_input(ExecInput i) {
return IN_SET(i,
@ -1938,6 +1939,25 @@ char** exec_context_get_restrict_filesystems(const ExecContext *c) {
return l ? TAKE_PTR(l) : strv_new(NULL);
}
int exec_context_has_vpicked_extensions(const ExecContext *context) {
int r;
assert(context);
FOREACH_ARRAY(mi, context->extension_images, context->n_extension_images) {
r = path_uses_vpick(mi->source);
if (r != 0)
return r;
}
STRV_FOREACH(ed, context->extension_directories) {
r = path_uses_vpick(*ed);
if (r != 0)
return r;
}
return 0;
}
void exec_status_start(ExecStatus *s, pid_t pid, const dual_timestamp *ts) {
assert(s);

View File

@ -559,6 +559,8 @@ char** exec_context_get_syscall_log(const ExecContext *c);
char** exec_context_get_address_families(const ExecContext *c);
char** exec_context_get_restrict_filesystems(const ExecContext *c);
int exec_context_has_vpicked_extensions(const ExecContext *context);
void exec_status_start(ExecStatus *s, pid_t pid, const dual_timestamp *ts);
void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status);
void exec_status_handoff(ExecStatus *s, const struct ucred *ucred, const dual_timestamp *ts);

View File

@ -37,6 +37,8 @@
#include "nulstr-util.h"
#include "os-util.h"
#include "path-util.h"
#include "pidref.h"
#include "process-util.h"
#include "selinux-util.h"
#include "socket-util.h"
#include "sort-util.h"
@ -3296,6 +3298,117 @@ bool ns_type_supported(NamespaceType type) {
return access(ns_proc, F_OK) == 0;
}
int refresh_extensions_in_namespace(
const PidRef *target,
const char *hierarchy_env,
const NamespaceParameters *p) {
const char *overlay_prefix = "/run/systemd/mount-rootfs";
_cleanup_(mount_list_done) MountList ml = {};
_cleanup_free_ char *extension_dir = NULL;
_cleanup_strv_free_ char **hierarchies = NULL;
MountInNamespaceFlags min_flags = 0;
int r;
assert(pidref_is_set(target));
assert(hierarchy_env);
assert(p);
log_debug("Refreshing extensions in-namespace for hierarchy '%s'", hierarchy_env);
extension_dir = path_join(p->private_namespace_dir, "unit-extensions");
if (!extension_dir)
return -ENOMEM;
min_flags |= MOUNT_IN_NAMESPACE_MAKE_FILE_OR_DIRECTORY;
r = parse_env_extension_hierarchies(&hierarchies, hierarchy_env);
if (r < 0)
return r;
r = append_extensions(
&ml,
overlay_prefix,
p->private_namespace_dir,
hierarchies,
p->extension_images,
p->n_extension_images,
p->extension_directories);
if (r < 0)
return r;
if (ml.n_mounts == 0)
return 0;
r = safe_fork("(sd-ns-refresh-exts)",
FORK_DEATHSIG_SIGTERM | FORK_WAIT | FORK_NEW_MOUNTNS | FORK_MOUNTNS_SLAVE,
NULL);
if (r < 0)
return r;
if (r == 0) {
(void) mkdir_p_label(overlay_prefix, 0555);
/* This is effectively two rounds, since all the extensions come before overlays
* (setup_namespace() similarly relies on this property).
*
* (1) First, set up all the extension mounts in the child, which are not visible from the
* process. (2) Then, set up overlays for the sysext/confext hierarchies again using the new
* extension mounts as layers, and move them into the namespace. */
FOREACH_ARRAY(m, ml.mounts, ml.n_mounts) {
if (IN_SET(m->mode, MOUNT_EXTENSION_DIRECTORY, MOUNT_EXTENSION_IMAGE)) {
r = apply_one_mount(p->root_directory, m, p);
if (r < 0) {
log_debug_errno(r, "Failed to apply extension mount: %m");
_exit(EXIT_FAILURE);
}
} else if (m->mode == MOUNT_OVERLAY) {
_cleanup_free_ char *path_relative = NULL, *path_in_namespace = NULL;
r = apply_one_mount(p->root_directory, m, p);
if (r < 0)
_exit(EXIT_FAILURE);
if (r == 0) {
/* Tried to mount overlay, but it is now empty - umount it then. */
min_flags |= MOUNT_IN_NAMESPACE_UMOUNT;
}
/* bind_mount_in_namespace takes a src on the outside and a dest evaluated
* within the namespace. First, figure out where we want the overlay on top
* of within the namespace.
*/
r = path_make_relative(overlay_prefix, mount_entry_path(m), &path_relative);
if (r < 0) {
log_debug_errno(r, "Failed to make path relative: %m");
_exit(EXIT_FAILURE);
}
r = asprintf(&path_in_namespace, "%s/%s", empty_to_root(p->root_directory), path_relative);
if (r < 0) {
log_oom_debug();
_exit(EXIT_FAILURE);
}
r = bind_mount_in_namespace(
target,
p->propagate_dir,
p->incoming_dir,
/* src= */ mount_entry_path(m),
/* dest= */ path_in_namespace,
min_flags);
if (r < 0) {
log_debug_errno(
r,
"Failed to move overlay within %s->%s: %m",
mount_entry_path(m),
path_in_namespace);
_exit(EXIT_FAILURE);
}
}
}
_exit(EXIT_SUCCESS);
}
return 0;
}
static const char *const protect_home_table[_PROTECT_HOME_MAX] = {
[PROTECT_HOME_NO] = "no",
[PROTECT_HOME_YES] = "yes",

View File

@ -16,6 +16,7 @@ typedef struct MountImage MountImage;
#include "fs-util.h"
#include "macro.h"
#include "namespace-util.h"
#include "pidref.h"
#include "runtime-scope.h"
#include "string-util.h"
@ -250,3 +251,8 @@ const char* namespace_type_to_string(NamespaceType t) _const_;
NamespaceType namespace_type_from_string(const char *s) _pure_;
bool ns_type_supported(NamespaceType type);
int refresh_extensions_in_namespace(
const PidRef *target,
const char *hierarchy_env,
const NamespaceParameters *p);

View File

@ -21,6 +21,7 @@
#include "devnum-util.h"
#include "env-util.h"
#include "escape.h"
#include "execute.h"
#include "exec-credential.h"
#include "exit-status.h"
#include "fd-util.h"
@ -33,11 +34,13 @@
#include "manager.h"
#include "missing_audit.h"
#include "mount-util.h"
#include "namespace.h"
#include "open-file.h"
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
#include "random-util.h"
#include "runtime-scope.h"
#include "selinux-util.h"
#include "serialize.h"
#include "service.h"
@ -2709,6 +2712,67 @@ static void service_enter_reload_by_notify(Service *s) {
log_unit_warning(UNIT(s), "Failed to schedule propagation of reload, ignoring: %s", bus_error_message(&error, r));
}
static bool service_should_reload_extensions(Service *s) {
int r;
assert(s);
/* Only support this for notify-reload service types. */
if (s->type != SERVICE_NOTIFY_RELOAD)
return false;
/* TODO: Add support for user services, which can use
* ExtensionDirectories= + notify-reload. For now, skip for user
* services. */
if (UNIT(s)->manager->runtime_scope != RUNTIME_SCOPE_SYSTEM) {
log_unit_debug(UNIT(s), "Not reloading extensions for user services.");
return false;
}
r = exec_context_has_vpicked_extensions(&s->exec_context);
if (r < 0) {
log_unit_warning_errno(UNIT(s), r, "Failed to determine if service should reload extensions, assuming false: %m");
return false;
}
return r > 0;
}
static int service_reload_extensions(Service *s) {
/* TODO: do this asynchronously */
_cleanup_free_ char *propagate_dir = NULL;
assert(s);
/* TODO: remove after adding support for user services */
assert(UNIT(s)->manager->runtime_scope == RUNTIME_SCOPE_SYSTEM);
if (!service_should_reload_extensions(s))
return 0;
propagate_dir = path_join("/run/systemd/propagate/", UNIT(s)->id);
if (!propagate_dir)
return -ENOMEM;
NamespaceParameters p = {
.private_namespace_dir = "/run/systemd",
.incoming_dir = "/run/systemd/incoming",
.propagate_dir = propagate_dir,
.runtime_scope = UNIT(s)->manager->runtime_scope,
.root_directory = s->exec_context.root_directory,
.extension_images = s->exec_context.extension_images,
.n_extension_images = s->exec_context.n_extension_images,
.extension_directories = s->exec_context.extension_directories,
.extension_image_policy = s->exec_context.extension_image_policy
};
/* Only reload confext, and not sysext, because it doesn't make sense
for program code to be swapped at reload. */
return refresh_extensions_in_namespace(
unit_main_pid(UNIT(s)),
"SYSTEMD_CONFEXT_HIERARCHIES",
&p);
}
static void service_enter_reload(Service *s) {
bool killed = false;
int r;
@ -2720,6 +2784,14 @@ static void service_enter_reload(Service *s) {
usec_t ts = now(CLOCK_MONOTONIC);
/* If we have confexts extensions, try to reload vpick'd confext extensions, which is particularly
* beneficial for notify-reload services that could potentially pick up a new version of its
* configuration.
*/
r = service_reload_extensions(s);
if (r < 0)
log_unit_warning_errno(UNIT(s), r, "Failed to reload confexts, ignoring: %m");
if (s->type == SERVICE_NOTIFY_RELOAD && pidref_is_set(&s->main_pid)) {
r = pidref_kill_and_sigcont(&s->main_pid, s->reload_signal);
if (r < 0) {

View File

@ -1132,7 +1132,7 @@ static int mount_in_namespace(
_cleanup_close_pair_ int errno_pipe_fd[2] = EBADF_PAIR;
pid_t child;
if (flags & MOUNT_IN_NAMESPACE_IS_IMAGE) {
if (!(flags & MOUNT_IN_NAMESPACE_UMOUNT) && flags & MOUNT_IN_NAMESPACE_IS_IMAGE) {
r = verity_dissect_and_mount(
chased_src_fd,
chased_src_path,
@ -1150,7 +1150,7 @@ static int mount_in_namespace(
return log_debug_errno(r,
"Failed to dissect and mount image '%s': %m",
chased_src_path);
} else {
} else if (!(flags & MOUNT_IN_NAMESPACE_UMOUNT)) {
new_mount_fd = open_tree(
chased_src_fd,
"",
@ -1189,6 +1189,18 @@ static int mount_in_namespace(
if (r == 0) {
errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]);
if (flags & MOUNT_IN_NAMESPACE_UMOUNT) {
r = umount_verbose(LOG_DEBUG, dest, UMOUNT_NOFOLLOW);
if (r < 0) {
(void) write(errno_pipe_fd[1], &r, sizeof(r));
errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
_exit(EXIT_FAILURE);
}
_exit(EXIT_SUCCESS);
}
if (flags & MOUNT_IN_NAMESPACE_MAKE_FILE_OR_DIRECTORY)
(void) mkdir_parents(dest, 0755);

View File

@ -119,6 +119,7 @@ typedef enum MountInNamespaceFlags {
MOUNT_IN_NAMESPACE_READ_ONLY = 1 << 0,
MOUNT_IN_NAMESPACE_MAKE_FILE_OR_DIRECTORY = 1 << 1,
MOUNT_IN_NAMESPACE_IS_IMAGE = 1 << 2,
MOUNT_IN_NAMESPACE_UMOUNT = 1 << 3,
} MountInNamespaceFlags;
int bind_mount_in_namespace(

View File

@ -681,6 +681,41 @@ int path_pick_update_warn(
return 1;
}
int path_uses_vpick(const char *path) {
_cleanup_free_ char *dir = NULL, *parent = NULL, *fname = NULL;
int r;
assert(path);
r = path_extract_filename(path, &fname);
if (r == -EADDRNOTAVAIL)
return 0;
if (r < 0)
return r;
/* ...PATH/NAME.SUFFIX.v */
if (endswith(fname, ".v"))
return 1;
/* ...PATH.v/NAME___.SUFFIX */
if (!strrstr(fname, "___"))
return 0;
r = path_extract_directory(path, &dir);
if (IN_SET(r, -EDESTADDRREQ, -EADDRNOTAVAIL)) /* only filename specified (no dir), or root or "." */
return 0;
if (r < 0)
return r;
r = path_extract_filename(dir, &parent);
if (r == -EADDRNOTAVAIL)
return 0;
if (r < 0)
return r;
return !!endswith(parent, ".v");
}
const PickFilter pick_filter_image_raw = {
.type_mask = (UINT32_C(1) << DT_REG) | (UINT32_C(1) << DT_BLK),
.architecture = _ARCHITECTURE_INVALID,

View File

@ -56,6 +56,8 @@ int path_pick_update_warn(
PickFlags flags,
PickResult *ret);
int path_uses_vpick(const char *path);
extern const PickFilter pick_filter_image_raw;
extern const PickFilter pick_filter_image_dir;
extern const PickFilter pick_filter_image_any;

View File

@ -168,4 +168,27 @@ TEST(path_pick) {
assert_se(result.architecture == ARCHITECTURE_S390);
}
TEST(path_uses_vpick) {
assert_se(path_uses_vpick("foo.v") > 0);
assert_se(path_uses_vpick("path/to/foo.v") > 0);
assert_se(path_uses_vpick("./path/to/foo.v") > 0);
assert_se(path_uses_vpick("path/to.v/foo.v") > 0);
assert_se(path_uses_vpick("path/to/foo.raw.v") > 0);
assert_se(path_uses_vpick("/var/lib/machines/mymachine.raw.v/") > 0);
assert_se(path_uses_vpick("path/to.v/foo___.hi/a.v") > 0);
assert_se(!path_uses_vpick("path/to/foo.mp4.vtt"));
assert_se(!path_uses_vpick("path/to/foo.mp4.v.1"));
assert_se(!path_uses_vpick("path/to.v/a"));
assert_se(path_uses_vpick("to.v/foo___.raw") > 0);
assert_se(path_uses_vpick("path/to.v/foo___.raw") > 0);
assert_se(!path_uses_vpick("path/to/foo___.raw"));
assert_se(!path_uses_vpick("path/to.v/foo__"));
assert_se(!path_uses_vpick("foo___.raw"));
assert_se(path_uses_vpick("/") < 1);
assert_se(path_uses_vpick(".") < 1);
assert_se(path_uses_vpick("") < 1);
}
DEFINE_TEST_MAIN(LOG_DEBUG);

View File

@ -518,6 +518,72 @@ rm -rf "$VDIR" "$EMPTY_VDIR"
systemd-dissect --umount "$IMAGE_DIR/app0"
systemd-dissect --umount "$IMAGE_DIR/app1"
# Check reloading refreshes vpick extensions
VBASE="vtest$RANDOM"
VDIR="/tmp/${VBASE}.v"
mkdir "$VDIR"
cat >/run/systemd/system/testservice-50g.service <<EOF
[Service]
Type=notify-reload
EnvironmentFile=-/usr/lib/systemd/systemd-asan-env
ExtensionDirectories=${VDIR}
ExecStart=bash -c ' \\
trap "{ \\
systemd-notify --reloading; \\
ls /etc | grep marker; \\
systemd-notify --ready; \\
}" SIGHUP; \\
systemd-notify --ready; \\
while true; do sleep 1; done; \\
'
EOF
mkdir -p "$VDIR/${VBASE}_1/etc/extension-release.d/"
echo "ID=_any" >"$VDIR/${VBASE}_1/etc/extension-release.d/extension-release.${VBASE}_1"
touch "$VDIR/${VBASE}_1/etc/${VBASE}_1.marker"
systemctl start testservice-50g.service
systemctl is-active testservice-50g.service
# First reload; at reload time, the marker file in /etc should be picked up.
systemctl try-reload-or-restart testservice-50g.service
journalctl -b -u testservice-50g | grep -q -F "${VBASE}_1.marker"
# Make a version 2 and reload again; this time we should see the v2 marker
mkdir -p "$VDIR/${VBASE}_2/etc/extension-release.d/"
echo "ID=_any" >"$VDIR/${VBASE}_2/etc/extension-release.d/extension-release.${VBASE}_2"
touch "$VDIR/${VBASE}_2/etc/${VBASE}_2.marker"
systemctl try-reload-or-restart testservice-50g.service
journalctl --sync
journalctl -b -u testservice-50g | grep -q -F "${VBASE}_2.marker"
# Do it for a couple more times (to make sure we're tearing down old overlays)
for _ in {1..5}; do systemctl reload testservice-50g.service; done
systemctl stop testservice-50g.service
# Repeat the same vpick notify-reload test with ExtensionImages= (keeping the
# same VBASE and reusing VDIR files for convenience, but using .raw extensions
# this time)
VDIR2="/tmp/${VBASE}.raw.v"
mkdir "$VDIR2"
cp /run/systemd/system/testservice-50g.service /run/systemd/system/testservice-50h.service
sed -i "s%ExtensionDirectories=.*%ExtensionImages=$VDIR2%g" \
/run/systemd/system/testservice-50h.service
mksquashfs "$VDIR/${VBASE}_1" "$VDIR2/${VBASE}_1.raw"
systemctl start testservice-50h.service
systemctl is-active testservice-50h.service
# First reload should pick up the v1 marker
systemctl try-reload-or-restart testservice-50h.service
journalctl --sync
journalctl -b -u testservice-50h | grep -q -F "${VBASE}_1.marker"
# Second reload should pick up the v2 marker
mksquashfs "$VDIR/${VBASE}_2" "$VDIR2/${VBASE}_2.raw"
systemctl try-reload-or-restart testservice-50h.service
journalctl --sync
journalctl -b -u testservice-50h | grep -q -F "${VBASE}_2.marker"
# Test that removing all the extensions don't cause any issues
rm -rf "${VDIR2:?}"/*
systemctl try-reload-or-restart testservice-50h.service
systemctl is-active testservice-50h.service
systemctl stop testservice-50h.service
rm -rf "$VDIR" "$VDIR2"
# Test that an extension consisting of an empty directory under /etc/extensions/ takes precedence
mkdir -p /var/lib/extensions/
ln -s /tmp/app-nodistro.raw /var/lib/extensions/app-nodistro.raw