Compare commits

...

4 Commits

Author SHA1 Message Date
leafcompost 383b0d6997
Merge 77aa738875 into 7ac1ad90d0 2024-09-17 22:57:07 -07:00
maia x. 77aa738875 man: document confext reload behavior for ExtensionDirectories/Images 2024-09-03 12:57:06 -07:00
maia x. f659808442 test: check reloading notify-reload service refreshes vpick extensions 2024-09-03 12:57:06 -07:00
maia x. a9dbf27342 core: reload confexts when reloading notify-reload services
`ExtensionImages=` and `ExtensionDirectories=` now let you specify vpick-named
extensions; however, since they just get set up once when the service is
started, you can't see newer versions without restarting the service entirely.
Here, also reload confext extensions when you reload a service. This allows you
to deploy a new version of some configuration and have it picked up at reload
time without interruption to your workload.

Right now, we would only reload confext extensions and leave the sysext ones
behind, since it didn't seem prudent to swap out what is likely program code at
reload. This is made possible by only going for the
`SYSTEMD_CONFEXT_HIERARCHIES` overlays (which only contains `/etc`). For now, we
also only do this for the notify-reload service type until more knobs are added
in the future.
2024-09-03 12:57:06 -07:00
7 changed files with 303 additions and 0 deletions

View File

@ -562,6 +562,13 @@
To disable the safety check that the extension-release file name matches the image file name, the
<varname>x-systemd.relax-extension-release-check</varname> mount option may be appended.</para>
<para>This option can be used together with a <option>notify-reload</option> service type and
<citerefentry><refentrytitle>systemd.v</refentrytitle><manvolnum>7</manvolnum></citerefentry>
to manage configuration updates. When such a service carrying confext images is reloaded, the confext
itself will also be reloaded to pick up any changes. This only applies to confext extensions. See
<citerefentry><refentrytitle>systemd.service</refentrytitle><manvolnum>5</manvolnum></citerefentry>
also for details.</para>
<para>When <varname>DevicePolicy=</varname> is set to <literal>closed</literal> or
<literal>strict</literal>, or set to <literal>auto</literal> and <varname>DeviceAllow=</varname> is
set, then this setting adds <filename>/dev/loop-control</filename> with <constant>rw</constant> mode,
@ -606,6 +613,14 @@
or the host. See:
<citerefentry><refentrytitle>os-release</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
<para>This option can be used together with a <option>notify-reload</option> service type and
<citerefentry><refentrytitle>systemd.v</refentrytitle><manvolnum>7</manvolnum></citerefentry>
to manage configuration updates. When such a system service carrying confext directories is reloaded,
the confext itself will also be reloaded to pick up any changes. This only applies to confext
extensions. See
<citerefentry><refentrytitle>systemd.service</refentrytitle><manvolnum>5</manvolnum></citerefentry>
also for details.</para>
<para>Note that usage from user units requires overlayfs support in unprivileged user namespaces,
which was first introduced in kernel v5.11.</para>

View File

@ -1875,6 +1875,40 @@ char** exec_context_get_restrict_filesystems(const ExecContext *c) {
return l ? TAKE_PTR(l) : strv_new(NULL);
}
static int extension_name_is_vpicked(const char *path) {
_cleanup_free_ char *dir = NULL;
int r;
assert(path);
r = path_extract_filename(path, &dir);
if (r == -EADDRNOTAVAIL)
return 0;
if (r < 0)
return r;
return !!endswith(dir, ".v");
}
int exec_context_has_vpicked_extensions(const ExecContext *context) {
int r;
assert(context);
FOREACH_ARRAY(mi, context->extension_images, context->n_extension_images) {
r = extension_name_is_vpicked(mi->source);
if (r != 0)
return r;
}
STRV_FOREACH(ed, context->extension_directories) {
r = extension_name_is_vpicked(*ed);
if (r != 0)
return r;
}
return 0;
}
void exec_status_start(ExecStatus *s, pid_t pid, const dual_timestamp *ts) {
assert(s);

View File

@ -547,6 +547,8 @@ char** exec_context_get_syscall_log(const ExecContext *c);
char** exec_context_get_address_families(const ExecContext *c);
char** exec_context_get_restrict_filesystems(const ExecContext *c);
int exec_context_has_vpicked_extensions(const ExecContext *context);
void exec_status_start(ExecStatus *s, pid_t pid, const dual_timestamp *ts);
void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status);
void exec_status_handoff(ExecStatus *s, const struct ucred *ucred, const dual_timestamp *ts);

View File

@ -37,6 +37,8 @@
#include "nulstr-util.h"
#include "os-util.h"
#include "path-util.h"
#include "pidref.h"
#include "process-util.h"
#include "selinux-util.h"
#include "socket-util.h"
#include "sort-util.h"
@ -3181,6 +3183,118 @@ bool ns_type_supported(NamespaceType type) {
return access(ns_proc, F_OK) == 0;
}
int refresh_extensions_in_namespace(
const PidRef *target,
const char *hierarchy_env,
const NamespaceParameters *p) {
const char *overlay_prefix = "/run/systemd/mount-rootfs";
_cleanup_(mount_list_done) MountList ml = {};
_cleanup_free_ char *extension_dir = NULL;
_cleanup_strv_free_ char **hierarchies = NULL;
MountInNamespaceFlags min_flags = 0;
int r;
assert(pidref_is_set(target));
assert(hierarchy_env);
assert(p);
log_debug("Refreshing extensions in-namespace for hierarchy '%s'", hierarchy_env);
extension_dir = path_join(p->private_namespace_dir, "unit-extensions");
if (!extension_dir)
return -ENOMEM;
min_flags |= MOUNT_IN_NAMESPACE_MAKE_FILE_OR_DIRECTORY;
r = parse_env_extension_hierarchies(&hierarchies, hierarchy_env);
if (r < 0)
return r;
r = append_extensions(
&ml,
overlay_prefix,
p->private_namespace_dir,
hierarchies,
p->extension_images,
p->n_extension_images,
p->extension_directories);
if (r < 0)
return r;
if (ml.n_mounts == 0)
return 0;
r = safe_fork("(sd-ns-refresh-exts)",
FORK_DEATHSIG_SIGTERM | FORK_WAIT | FORK_NEW_MOUNTNS | FORK_MOUNTNS_SLAVE,
NULL);
if (r < 0)
return r;
if (r == 0) {
(void) mkdir_p_label(overlay_prefix, 0555);
/* This is effectively two rounds, since all the extensions come
* before overlays (setup_namespace() similarly relies on this
* property).
*
* (1) First, set up all the extension mounts in the child, which
* are not visible from the process. (2) Then, set up overlays
* for the sysext/confext hierarchies again using the new
* extension mounts as layers, and move them into the namespace. */
FOREACH_ARRAY(m, ml.mounts, ml.n_mounts) {
if (m->mode == MOUNT_EXTENSION_DIRECTORY || m->mode == MOUNT_EXTENSION_IMAGE) {
r = apply_one_mount(p->root_directory, m, p);
if (r < 0) {
log_debug_errno(r, "Failed to apply extension mount: %m");
_exit(EXIT_FAILURE);
}
} else if (m->mode == MOUNT_OVERLAY) {
_cleanup_free_ char *path_relative = NULL, *path_in_namespace = NULL;
r = mount_overlay(m);
if (r < 0)
_exit(EXIT_FAILURE);
if (r == 0) /* Tried to mount overlay, but skipped. */
continue;
/* bind_mount_in_namespace takes a src on the outside
* and a dest evaluated within the namespace. First,
* figure out where we want the overlay on top of within
* the namespace.
*/
r = path_make_relative(overlay_prefix, mount_entry_path(m), &path_relative);
if (r < 0) {
log_debug_errno(r, "Failed to make path relative: %m");
_exit(EXIT_FAILURE);
}
r = asprintf(&path_in_namespace, "%s/%s", empty_to_root(p->root_directory), path_relative);
if (r < 0) {
log_oom_debug();
_exit(EXIT_FAILURE);
}
r = bind_mount_in_namespace(
target,
p->propagate_dir,
p->incoming_dir,
/* src= */ mount_entry_path(m),
/* dest= */ path_in_namespace,
min_flags);
if (r < 0) {
log_debug_errno(
r,
"Failed to move overlay within %s->%s: %m",
mount_entry_path(m),
path_in_namespace);
_exit(EXIT_FAILURE);
}
}
}
_exit(EXIT_SUCCESS);
}
return 0;
}
static const char *const protect_home_table[_PROTECT_HOME_MAX] = {
[PROTECT_HOME_NO] = "no",
[PROTECT_HOME_YES] = "yes",

View File

@ -16,6 +16,7 @@ typedef struct MountImage MountImage;
#include "fs-util.h"
#include "macro.h"
#include "namespace-util.h"
#include "pidref.h"
#include "runtime-scope.h"
#include "string-util.h"
@ -224,3 +225,8 @@ const char* namespace_type_to_string(NamespaceType t) _const_;
NamespaceType namespace_type_from_string(const char *s) _pure_;
bool ns_type_supported(NamespaceType type);
int refresh_extensions_in_namespace(
const PidRef *target,
const char *hierarchy_env,
const NamespaceParameters *p);

View File

@ -21,6 +21,7 @@
#include "devnum-util.h"
#include "env-util.h"
#include "escape.h"
#include "execute.h"
#include "exec-credential.h"
#include "exit-status.h"
#include "fd-util.h"
@ -33,11 +34,13 @@
#include "manager.h"
#include "missing_audit.h"
#include "mount-util.h"
#include "namespace.h"
#include "open-file.h"
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
#include "random-util.h"
#include "runtime-scope.h"
#include "selinux-util.h"
#include "serialize.h"
#include "service.h"
@ -2787,6 +2790,63 @@ static void service_run_next_main(Service *s) {
(void) service_set_main_pidref(s, TAKE_PIDREF(pidref), &s->main_command->exec_status.start_timestamp);
}
static bool service_should_reload_extensions(Unit *u) {
Service *s = ASSERT_PTR(SERVICE(u));
int r;
/* Only support this for notify-reload service types. */
if (s->type != SERVICE_NOTIFY_RELOAD)
return false;
/* TODO: Add support for user services, which can use
* ExtensionDirectories= + notify-reload. For now, skip for user
* services. */
if (u->manager->runtime_scope != RUNTIME_SCOPE_SYSTEM) {
log_unit_debug(UNIT(s), "Not reloading extensions for user services.");
return false;
}
r = exec_context_has_vpicked_extensions(&s->exec_context);
if (r < 0) {
log_unit_warning_errno(u, r, "Failed to determine if service should reload extensions, assuming false: %m");
return false;
}
return r > 0;
}
static int service_reload_extensions(Unit *u) {
/* TODO: do this asynchronously */
Service *s = ASSERT_PTR(SERVICE(u));
_cleanup_free_ char *propagate_dir = NULL;
if (!service_should_reload_extensions(u)) {
return 0;
}
propagate_dir = path_join("/run/systemd/propagate/", u->id);
if (!propagate_dir)
return -ENOMEM;
NamespaceParameters p = {
.private_namespace_dir = "/run/systemd",
.incoming_dir = "/run/systemd/incoming",
.propagate_dir = propagate_dir,
.runtime_scope = RUNTIME_SCOPE_SYSTEM,
.root_directory = s->exec_context.root_directory,
.extension_images = s->exec_context.extension_images,
.n_extension_images = s->exec_context.n_extension_images,
.extension_directories = s->exec_context.extension_directories,
.extension_image_policy = s->exec_context.extension_image_policy
};
/* Only reload confext, and not sysext, because it doesn't make sense
for program code to be swapped at reload. */
return refresh_extensions_in_namespace(
unit_main_pid(u),
"SYSTEMD_CONFEXT_HIERARCHIES",
&p);
}
static int service_start(Unit *u) {
Service *s = ASSERT_PTR(SERVICE(u));
int r;
@ -2941,9 +3001,19 @@ static int service_stop(Unit *u) {
static int service_reload(Unit *u) {
Service *s = ASSERT_PTR(SERVICE(u));
int r;
assert(IN_SET(s->state, SERVICE_RUNNING, SERVICE_EXITED));
/* If we have confexts extensions, try to reload vpick'd confext
* extensions, which is particularly beneficial for notify-reload
* services that could potentially pick up a new version of its
* configuration.
*/
r = service_reload_extensions(u);
if (r < 0)
log_unit_error_errno(u, r, "Failed to reload confexts, ignoring: %m");
service_enter_reload(s);
return 1;
}

View File

@ -516,6 +516,68 @@ rm -rf "$VDIR" "$EMPTY_VDIR"
systemd-dissect --umount "$IMAGE_DIR/app0"
systemd-dissect --umount "$IMAGE_DIR/app1"
# Check reloading refreshes vpick extensions
VBASE="vtest$RANDOM"
VDIR="/tmp/${VBASE}.v"
mkdir "$VDIR"
cat >/run/systemd/system/testservice-50g.service <<EOF
[Service]
Type=notify-reload
EnvironmentFile=-/usr/lib/systemd/systemd-asan-env
ExtensionDirectories=${VDIR}
ExecStart=bash -c ' \\
trap "{ \\
systemd-notify --reloading; \\
ls /etc | grep marker; \\
systemd-notify --ready; \\
}" SIGHUP; \\
systemd-notify --ready; \\
while true; do sleep 1; done; \\
'
EOF
mkdir -p "$VDIR/${VBASE}_1/etc/extension-release.d/"
echo "ID=_any" >"$VDIR/${VBASE}_1/etc/extension-release.d/extension-release.${VBASE}_1"
touch "$VDIR/${VBASE}_1/etc/${VBASE}_1.marker"
systemctl start testservice-50g.service
systemctl is-active testservice-50g.service
# First reload; at reload time, the marker file in /etc should be picked up.
systemctl try-reload-or-restart testservice-50g.service
journalctl -b -u testservice-50g | grep -q -F "${VBASE}_1.marker"
# Make a version 2 and reload again; this time we should see the v2 marker
mkdir -p "$VDIR/${VBASE}_2/etc/extension-release.d/"
echo "ID=_any" >"$VDIR/${VBASE}_2/etc/extension-release.d/extension-release.${VBASE}_2"
touch "$VDIR/${VBASE}_2/etc/${VBASE}_2.marker"
systemctl try-reload-or-restart testservice-50g.service
journalctl --sync
journalctl -b -u testservice-50g | grep -q -F "${VBASE}_2.marker"
# Do it for a couple more times (to make sure we're tearing down old overlays)
for _ in {1..5}; do systemctl reload testservice-50g.service; done
systemctl stop testservice-50g.service
# Repeat the same vpick notify-reload test with ExtensionImages= (keeping the
# same VBASE and reusing VDIR files for convenience, but using .raw extensions
# this time)
VDIR2="/tmp/${VBASE}.raw.v"
mkdir "$VDIR2"
cp /run/systemd/system/testservice-50g.service /run/systemd/system/testservice-50h.service
sed -i "s%ExtensionDirectories=.*%ExtensionImages=$VDIR2%g" \
/run/systemd/system/testservice-50h.service
mksquashfs "$VDIR/${VBASE}_1" "$VDIR2/${VBASE}_1.raw"
systemctl start testservice-50h.service
systemctl is-active testservice-50h.service
# First reload should pick up the v1 marker
systemctl try-reload-or-restart testservice-50h.service
journalctl --sync
journalctl -b -u testservice-50h | grep -q -F "${VBASE}_1.marker"
# Second reload should pick up the v2 marker
mksquashfs "$VDIR/${VBASE}_2" "$VDIR2/${VBASE}_2.raw"
systemctl try-reload-or-restart testservice-50h.service
journalctl --sync
journalctl -b -u testservice-50h | grep -q -F "${VBASE}_2.marker"
systemctl stop testservice-50h.service
rm -rf "$VDIR" "$VDIR2"
# Test that an extension consisting of an empty directory under /etc/extensions/ takes precedence
mkdir -p /var/lib/extensions/
ln -s /tmp/app-nodistro.raw /var/lib/extensions/app-nodistro.raw