2025-10-08 13:14:45 +02:00
9 changed files with 48 additions and 135 deletions
--- a/62
+++ b/62
@ -310,44 +310,6 @@ CHANGES WITH 258 in spe:
          SO_PASSPIDFD socket option for AF_UNIX socket. There's also a new
          setting AcceptFileDescriptors= that controls the new SO_PASSRIGHTS.

-        * A new job type "lenient" has been added, that is similar to the
-          existing "fail" job mode, and which will fail the submitted
-          transaction immediately if it would stop any currently running unit.
-
-        * .socket units gained a new pair of settings DeferTrigger= and
-          DeferTriggerMaxSec= which modify triggering behaviour of the
-          socket. When used this will cause the triggered unit to be enqueued
-          with the new "lenient" job mode, and if the submission of the
-          transaction fails it is later retried to be submitted (up to a
-          configurable timeout), whenever a unit is stopped.
-
-        * The "preset" logic has been extended so that there are now three
-          preset directories: one that declares the default enablement state
-          for per-system services run on the host, one for per-user services,
-          and – now new – one for per-system services that are run in the
-          initrd. This reflects the fact that in many cases services that shall
-          be enabled by default on the host should not be enabled by default in
-          the initrd, or vice versa. Note that while the regular per-system
-          preset policy defaults to enabled, the one for the initrd defaults to
-          disabled.
-
-        * There are now new per-service settings
-          StateDirectoryQuota=/StateDirectoryAccounting=,
-          CacheDirectoryQuota=/CacheDirectoryAccounting=,
-          LogsDirectoryQuota=/LogsDirectoryAccounting= which allow doing
-          per-unit quota of the indicated per-unit directories. This is
-          implemented via project quota, as supported by xfs and ext4. This
-          does not support btrfs, currently. If quota accounting is enabled
-          this information is shown in the usual "systemct status" output.
-
-        * The service manager gained a new KillUnitSubgroup() syscall which may
-          be used to send a signal to a sub-control group of the unit's control
-          group. systemctl kill gained a new --kill-subgroup= switch to make
-          this available from the shell.
-
-        * A new PrivateBPF= switch has been added for unit files, which may be
-          used to mount a private bpffs instance for the unit's processes.
-
        systemd-journald & journal-remote:

        * journalctl's --setup-keys command now supports JSON output.
@ -643,10 +605,6 @@ CHANGES WITH 258 in spe:
          servers. Delegate zones can be configured via drop-ins below
          /etc/systemd/dns-delegate.d/*.dns-delegate.

-        * "resolvectl query -t sshfp" will now decode the returned RR
-          information, and show the cryptographic algorithms by name instead of
-          number.
-
        systemd-hostnamed:

        * The system hardware's serial number may now be read from DeviceTree
@ -1203,15 +1161,6 @@ CHANGES WITH 258 in spe:
          Hardware IDs" (CHIDs) of the local system. This is useful for
          preparing CHID-to-DeviceTree mappings when building UKIs.

-        * systemd-analyze gained a new "transient-settings" verb, which shows
-          all unit settings one can configure dynamically via the "-p" switch
-          when invoking transient units.
-
-        * systemd-analyze gained a new "unit-shell" verb that invokes an
-          interactive shell inside the processes namespaces of the main process
-          of a specified unit. This is useful for debugging unit sandboxes, and
-          getting an idea how things look like from the "inside" of a service.
-
        * The "package note" specification ELF binaries has been extended to
          cover PE binaries (i.e. UEFI binaries), too.

@ -1376,17 +1325,6 @@ CHANGES WITH 258 in spe:
          specified binary is immediately invoked, and not delayed until a
          connection comes in.

-        * systemd-ssh-generator will now generate the AF_VSOCK ssh listener
-          .socket unit, so that a tiny new helper "systemd-ssh-issue" is
-          invoked when the socket is bound, that generates a drop-in file
-          /run/issue.d/50-ssh-vsock.issue that is shown by "login" and other
-          subsystems at login time. The file reports the AF_VSOCK CID of the
-          system, along with very brief information how to connect to the
-          system via ssh-over-AF_VSOCK. Or in other words: if the system is
-          booted up in an AF_VSOCK capable VM the console login screen shown
-          once boot-up is complete will tell you how to connect to the system
-          via SSH, if that's available.
-
        — <place>, <date>

 CHANGES WITH 257:
--- a/src/basic/cgroup-util.c
+++ b/src/basic/cgroup-util.c
@ -142,9 +142,6 @@ int cg_read_pid(FILE *f, pid_t *ret, CGroupFlags flags) {
        assert(f);
        assert(ret);

-        /* NB: The kernel returns ENODEV if we tried to read from cgroup.procs of a cgroup that has been
-         * removed already. Callers should handle that! */
-
        for (;;) {
                errno = 0;
                if (fscanf(f, "%lu", &ul) != 1) {
@ -303,13 +300,6 @@ int cg_kill(
                        _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;

                        r = cg_read_pidref(f, &pidref, flags);
-                        if (r == -ENODEV) {
-                                /* reading from cgroup.pids will result in ENODEV if the cgroup is
-                                 * concurrently removed. Just leave in that case, because a removed cgroup
-                                 * contains no processes anymore. */
-                                done = true;
-                                break;
-                        }
                        if (r < 0)
                                return RET_GATHER(ret, log_debug_errno(r, "Failed to read pidref from cgroup '%s': %m", path));
                        if (r == 0)
--- a/src/basic/recurse-dir.c
+++ b/src/basic/recurse-dir.c
@ -175,7 +175,7 @@ int recurse_dir(

                r = func(RECURSE_DIR_ENTER,
                         path,
-                         -EBADF, /* we have no parent fd */
+                         -1, /* we have no parent fd */
                         dir_fd,
                         NULL, /* we have no dirent */
                         statx_mask != 0 ? &root_sx : NULL,
@ -191,8 +191,7 @@ int recurse_dir(
        if (r < 0)
                return r;

-        FOREACH_ARRAY(entry, de->entries, de->n_entries) {
-                struct dirent *i = *entry;
+        for (size_t i = 0; i < de->n_entries; i++) {
                _cleanup_close_ int inode_fd = -EBADF, subdir_fd = -EBADF;
                _cleanup_free_ char *joined = NULL;
                struct statx sx;
@ -207,16 +206,16 @@ int recurse_dir(
                 */

                if (path) {
-                        joined = path_join(path, i->d_name);
+                        joined = path_join(path, de->entries[i]->d_name);
                        if (!joined)
                                return -ENOMEM;

                        p = joined;
                } else
-                        p = i->d_name;
+                        p = de->entries[i]->d_name;

-                if (IN_SET(i->d_type, DT_UNKNOWN, DT_DIR)) {
-                        subdir_fd = openat(dir_fd, i->d_name, O_DIRECTORY|O_NOFOLLOW|O_CLOEXEC);
+                if (IN_SET(de->entries[i]->d_type, DT_UNKNOWN, DT_DIR)) {
+                        subdir_fd = openat(dir_fd, de->entries[i]->d_name, O_DIRECTORY|O_NOFOLLOW|O_CLOEXEC);
                        if (subdir_fd < 0) {
                                if (errno == ENOENT) /* Vanished by now, go for next file immediately */
                                        continue;
@ -230,9 +229,9 @@ int recurse_dir(
                                        r = func(RECURSE_DIR_SKIP_OPEN_DIR_ERROR_BASE + errno,
                                                 p,
                                                 dir_fd,
-                                                 /* inode_fd = */ -EBADF,
-                                                 i,
-                                                 /* sx = */ NULL,
+                                                 -1,
+                                                 de->entries[i],
+                                                 NULL,
                                                 userdata);
                                        if (r == RECURSE_DIR_LEAVE_DIRECTORY)
                                                break;
@ -246,7 +245,7 @@ int recurse_dir(

                        } else {
                                /* If we managed to get a DIR* off the inode, it's definitely a directory. */
-                                i->d_type = DT_DIR;
+                                de->entries[i]->d_type = DT_DIR;

                                if (statx_mask != 0 || (flags & RECURSE_DIR_SAME_MOUNT)) {
                                        if (statx(subdir_fd, "", AT_EMPTY_PATH, statx_mask, &sx) < 0)
@ -262,7 +261,7 @@ int recurse_dir(

                        if (flags & RECURSE_DIR_INODE_FD) {

-                                inode_fd = openat(dir_fd, i->d_name, O_PATH|O_NOFOLLOW|O_CLOEXEC);
+                                inode_fd = openat(dir_fd, de->entries[i]->d_name, O_PATH|O_NOFOLLOW|O_CLOEXEC);
                                if (inode_fd < 0) {
                                        if (errno == ENOENT) /* Vanished by now, go for next file immediately */
                                                continue;
@ -274,9 +273,9 @@ int recurse_dir(
                                        r = func(RECURSE_DIR_SKIP_OPEN_INODE_ERROR_BASE + errno,
                                                 p,
                                                 dir_fd,
-                                                 /* inode_fd = */ -EBADF,
-                                                 i,
-                                                 /* sx = */ NULL,
+                                                 -1,
+                                                 de->entries[i],
+                                                 NULL,
                                                 userdata);
                                        if (r == RECURSE_DIR_LEAVE_DIRECTORY)
                                                break;
@ -309,9 +308,9 @@ int recurse_dir(
                                        inode_fd = safe_close(inode_fd);
                                }

-                        } else if (statx_mask != 0 || (i->d_type == DT_UNKNOWN && (flags & RECURSE_DIR_ENSURE_TYPE))) {
+                        } else if (statx_mask != 0 || (de->entries[i]->d_type == DT_UNKNOWN && (flags & RECURSE_DIR_ENSURE_TYPE))) {

-                                if (statx(dir_fd, i->d_name, AT_SYMLINK_NOFOLLOW, statx_mask | STATX_TYPE, &sx) < 0) {
+                                if (statx(dir_fd, de->entries[i]->d_name, AT_SYMLINK_NOFOLLOW, statx_mask | STATX_TYPE, &sx) < 0) {
                                        if (errno == ENOENT) /* Vanished by now? Go for next file immediately */
                                                continue;

@ -322,9 +321,9 @@ int recurse_dir(
                                        r = func(RECURSE_DIR_SKIP_STAT_INODE_ERROR_BASE + errno,
                                                 p,
                                                 dir_fd,
-                                                 /* inode_fd = */ -EBADF,
-                                                 i,
-                                                 /* sx = */ NULL,
+                                                 -1,
+                                                 de->entries[i],
+                                                 NULL,
                                                 userdata);
                                        if (r == RECURSE_DIR_LEAVE_DIRECTORY)
                                                break;
@ -352,9 +351,9 @@ int recurse_dir(
                                        r = func(RECURSE_DIR_SKIP_STAT_INODE_ERROR_BASE + EISDIR,
                                                 p,
                                                 dir_fd,
-                                                 /* inode_fd = */ -EBADF,
-                                                 i,
-                                                 /* sx = */ NULL,
+                                                 -1,
+                                                 de->entries[i],
+                                                 NULL,
                                                 userdata);
                                        if (r == RECURSE_DIR_LEAVE_DIRECTORY)
                                                break;
@ -370,11 +369,11 @@ int recurse_dir(
                        /* Copy over the data we acquired through statx() if we acquired any */
                        if (sx.stx_mask & STATX_TYPE) {
                                assert((subdir_fd < 0) == !S_ISDIR(sx.stx_mode));
-                                i->d_type = IFTODT(sx.stx_mode);
+                                de->entries[i]->d_type = IFTODT(sx.stx_mode);
                        }

                        if (sx.stx_mask & STATX_INO)
-                                i->d_ino = sx.stx_ino;
+                                de->entries[i]->d_ino = sx.stx_ino;
                }

                if (subdir_fd >= 0) {
@ -384,7 +383,7 @@ int recurse_dir(
                                if (sx_valid && FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT))
                                        is_mount = FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT);
                                else {
-                                        r = is_mount_point_at(dir_fd, i->d_name, /* flags = */ 0);
+                                        r = is_mount_point_at(dir_fd, de->entries[i]->d_name, 0);
                                        if (r < 0)
                                                log_debug_errno(r, "Failed to determine whether %s is a submount, assuming not: %m", p);

@ -396,7 +395,7 @@ int recurse_dir(
                                                 p,
                                                 dir_fd,
                                                 subdir_fd,
-                                                 i,
+                                                 de->entries[i],
                                                 statx_mask != 0 ? &sx : NULL, /* only pass sx if user asked for it */
                                                 userdata);
                                        if (r == RECURSE_DIR_LEAVE_DIRECTORY)
@ -415,7 +414,7 @@ int recurse_dir(
                                         p,
                                         dir_fd,
                                         subdir_fd,
-                                         i,
+                                         de->entries[i],
                                         statx_mask != 0 ? &sx : NULL, /* only pass sx if user asked for it */
                                         userdata);
                                if (r == RECURSE_DIR_LEAVE_DIRECTORY)
@ -430,7 +429,7 @@ int recurse_dir(
                                 p,
                                 dir_fd,
                                 subdir_fd,
-                                 i,
+                                 de->entries[i],
                                 statx_mask != 0 ? &sx : NULL, /* only pass sx if user asked for it */
                                 userdata);
                        if (r == RECURSE_DIR_LEAVE_DIRECTORY)
@ -444,7 +443,7 @@ int recurse_dir(
                                        p,
                                        statx_mask,
                                        n_depth_max - 1,
-                                        flags & ~RECURSE_DIR_TOPLEVEL, /* we already called the callback for this entry */
+                                        flags &~ RECURSE_DIR_TOPLEVEL, /* we already called the callback for this entry */
                                        func,
                                        userdata);
                        if (r != 0)
@ -454,7 +453,7 @@ int recurse_dir(
                                 p,
                                 dir_fd,
                                 subdir_fd,
-                                 i,
+                                 de->entries[i],
                                 statx_mask != 0 ? &sx : NULL, /* only pass sx if user asked for it */
                                 userdata);
                } else
@ -463,7 +462,7 @@ int recurse_dir(
                                 p,
                                 dir_fd,
                                 inode_fd,
-                                 i,
+                                 de->entries[i],
                                 statx_mask != 0 ? &sx : NULL, /* only pass sx if user asked for it */
                                 userdata);

@ -477,9 +476,9 @@ int recurse_dir(

                r = func(RECURSE_DIR_LEAVE,
                         path,
-                         -EBADF, /* we have no parent fd */
+                         -1,
                         dir_fd,
-                         NULL, /* we have no dirent */
+                         NULL,
                         statx_mask != 0 ? &root_sx : NULL,
                         userdata);
                if (!IN_SET(r, RECURSE_DIR_LEAVE_DIRECTORY, RECURSE_DIR_SKIP_ENTRY, RECURSE_DIR_CONTINUE))
--- a/src/core/dbus-unit.c
+++ b/src/core/dbus-unit.c
@ -1351,13 +1351,10 @@ static int append_cgroup(sd_bus_message *reply, const char *p, Set *pids) {
                /* libvirt / qemu uses threaded mode and cgroup.procs cannot be read at the lower levels.
                 * From https://docs.kernel.org/admin-guide/cgroup-v2.html#threads, “cgroup.procs” in a
                 * threaded domain cgroup contains the PIDs of all processes in the subtree and is not
-                 * readable in the subtree proper.
-                 *
-                 * We'll see ENODEV when trying to enumerate processes and the cgroup is removed at the same
-                 * time. Handle this gracefully. */
+                 * readable in the subtree proper. */

                r = cg_read_pidref(f, &pidref, /* flags = */ 0);
-                if (IN_SET(r, 0, -EOPNOTSUPP, -ENODEV))
+                if (IN_SET(r, 0, -EOPNOTSUPP))
                        break;
                if (r < 0)
                        return r;
--- a/src/nspawn/nspawn-mount.c
+++ b/src/nspawn/nspawn-mount.c
@ -824,7 +824,7 @@ static int mount_bind(const char *dest, CustomMount *m, uid_t uid_shift, uid_t u
         * caller's userns *without* any mount idmapping in place. To get that uid, we clone the
         * mount source tree and clear any existing idmapping and temporarily mount that tree over
         * the mount source before we stat the mount source to figure out the source uid. */
-        _cleanup_close_ int fd_clone = open_tree_attr_with_fallback(
+        _cleanup_close_ int fd_clone = open_tree_attr_fallback(
                        AT_FDCWD,
                        m->source,
                        OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC,
--- a/src/shared/cgroup-setup.c
+++ b/src/shared/cgroup-setup.c
@ -367,8 +367,6 @@ int cg_migrate(
                        if (r < 0)
                                return RET_GATHER(ret, r);
                }
-                if (r == -ENODEV)
-                        continue;
                if (r < 0)
                        return RET_GATHER(ret, r);
        } while (!done);
--- a/src/shared/cgroup-show.c
+++ b/src/shared/cgroup-show.c
@ -107,12 +107,9 @@ static int show_cgroup_one_by_path(
                /* libvirt / qemu uses threaded mode and cgroup.procs cannot be read at the lower levels.
                 * From https://docs.kernel.org/admin-guide/cgroup-v2.html#threads,
                 * “cgroup.procs” in a threaded domain cgroup contains the PIDs of all processes in
-                 * the subtree and is not readable in the subtree proper.
-                 *
-                 * ENODEV is generated when we enumerate processes from a cgroup and the cgroup is removed
-                 * concurrently. */
+                 * the subtree and is not readable in the subtree proper. */
                r = cg_read_pid(f, &pid, /* flags = */ 0);
-                if (IN_SET(r, 0, -EOPNOTSUPP, -ENODEV))
+                if (IN_SET(r, 0, -EOPNOTSUPP))
                        break;
                if (r < 0)
                        return r;
--- a/src/shared/mount-util.c
+++ b/src/shared/mount-util.c
@ -1441,18 +1441,10 @@ int make_userns(uid_t uid_shift,
        return TAKE_FD(userns_fd);
 }

-int open_tree_attr_with_fallback(int dir_fd, const char *path, unsigned int flags, struct mount_attr *attr) {
-        _cleanup_close_ int fd = -EBADF;
-
-        assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
+int open_tree_attr_fallback(int dir_fd, const char *path, unsigned int flags, struct mount_attr *attr) {
        assert(attr);

-        if (isempty(path)) {
-                path = "";
-                flags |= AT_EMPTY_PATH;
-        }
-
-        fd = open_tree_attr(dir_fd, path, flags, attr, sizeof(struct mount_attr));
+        _cleanup_close_ int fd = open_tree_attr(dir_fd, path, flags, attr, sizeof(struct mount_attr));
        if (fd >= 0)
                return TAKE_FD(fd);
        if (!ERRNO_IS_NOT_SUPPORTED(errno))
@ -1500,8 +1492,8 @@ int remount_idmap_fd(

        for (size_t i = 0; i < n; i++) {
                /* Clone the mount point and et the user namespace mapping attribute on the cloned mount point. */
-                mount_fds[n_mounts_fds] = open_tree_attr_with_fallback(
-                                AT_FDCWD,
+                mount_fds[n_mounts_fds] = open_tree_attr_fallback(
+                                /* dir_fd= */ -EBADF,
                                paths[i],
                                OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC,
                                &(struct mount_attr) {
--- a/src/shared/mount-util.h
+++ b/src/shared/mount-util.h
@ -13,13 +13,11 @@ typedef struct SubMount {
 void sub_mount_array_free(SubMount *s, size_t n);

 int get_sub_mounts(const char *prefix, SubMount **ret_mounts, size_t *ret_n_mounts);
-int bind_mount_submounts(
-                const char *source,
-                const char *target);

 int repeat_unmount(const char *path, int flags);

 int umount_recursive_full(const char *target, int flags, char **keep);
+
 static inline int umount_recursive(const char *target, int flags) {
        return umount_recursive_full(target, flags, NULL);
 }
@ -150,12 +148,16 @@ typedef enum RemountIdmapping {
        _REMOUNT_IDMAPPING_INVALID = -EINVAL,
 } RemountIdmapping;

-int open_tree_attr_with_fallback(int dir_fd, const char *path, unsigned int flags, struct mount_attr *attr);
+int open_tree_attr_fallback(int dir_fd, const char *path, unsigned int flags, struct mount_attr *attr);

 int make_userns(uid_t uid_shift, uid_t uid_range, uid_t host_owner, uid_t dest_owner, RemountIdmapping idmapping);
 int remount_idmap_fd(char **p, int userns_fd, uint64_t extra_mount_attr_set);
 int remount_idmap(char **p, uid_t uid_shift, uid_t uid_range, uid_t host_owner, uid_t dest_owner, RemountIdmapping idmapping);

+int bind_mount_submounts(
+                const char *source,
+                const char *target);
+
 /* Creates a mount point (without any parents) based on the source path or mode - i.e., a file or a directory */
 int make_mount_point_inode_from_mode(int dir_fd, const char *dest, mode_t source_mode, mode_t target_mode);
 int make_mount_point_inode_from_path(const char *source, const char *dest, mode_t mode);