Merge bd59565d0e into 5261c521e3

mount-util: make path_get_mount_info() work arbitrary inode
Follow-up for d49d95df0a. Replaces 9a032ec55a. Fixes #35075.
2024-11-08 13:30:15 +00:00 · 2024-11-08 13:25:17 +01:00 · 2024-11-08 12:37:40 +01:00 · 2024-11-08 10:10:11 +01:00 · 2024-11-08 09:21:25 +01:00 · 2024-11-08 13:38:35 +09:00
9 changed files with 197 additions and 78 deletions
--- a/25
+++ b/25
@ -129,6 +129,10 @@ Deprecations and removals:
 Features:
 * format-table: introduce new cell type for strings with ansi sequences in
  them. display them in regular output mode (via strip_tab_ansi()), but
  suppress them in json mode.
 * machined: when registering a machine, also take a relative cgroup path,
  relative to the machine's unit. This is useful when registering unpriv
  machines, as they might sit down the cgroup tree, below a cgroup delegation
@ -217,12 +221,8 @@ Features:
  services where mount propagation from the root fs is off, an still have
  confext/sysext propagated in.
 * support F_DUDFD_QUERY for comparing fds in same_fd (requires kernel 6.10)
 * generic interface for varlink for setting log level and stuff that all our daemons can implement
 * use pty ioctl to get peer wherever possible (TIOCGPTPEER)
 * maybe teach repart.d/ dropins a new setting MakeMountNodes= or so, which is
  just like MakeDirectories=, but uses an access mode of 0000 and sets the +i
  chattr bit. This is useful as protection against early uses of /var/ or /tmp/
@ -253,8 +253,6 @@ Features:
 * initrd: when transitioning from initrd to host, validate that
  /lib/modules/`uname -r` exists, refuse otherwise
 * tmpfiles: add "owning" flag for lines that limits effect of --purge
 * signed bpf loading: to address need for signature verification for bpf
  programs when they are loaded, and given the bpf folks don't think this is
  realistic in kernel space, maybe add small daemon that facilitates this
@ -458,9 +456,6 @@ Features:
 * introduce mntid_t, and make it 64bit, as apparently the kernel switched to
  64bit mount ids
 * use udev rule networkd ownership property to take ownership of network
  interfaces nspawn creates
 * mountfsd/nsresourced
  - userdb: maybe allow callers to map one uid to their own uid
  - bpflsm: allow writes if resulting UID on disk would be userns' owner UID
@ -647,6 +642,7 @@ Features:
  - openpt_allocate_in_namespace()
  - unit_attach_pid_to_cgroup_via_bus()
  - cg_attach() – requires new kernel feature
  - journald's process cache
 * ddi must be listed as block device fstype
@ -1470,9 +1466,6 @@ Features:
 * in sd-id128: also parse UUIDs in RFC4122 URN syntax (i.e. chop off urn:uuid: prefix)
 * DynamicUser= + StateDirectory= → use uid mapping mounts, too, in order to
  make dirs appear under right UID.
 * systemd-sysext: optionally, run it in initrd already, before transitioning
  into host, to open up possibility for services shipped like that.
@ -1644,14 +1637,6 @@ Features:
 * maybe add kernel cmdline params: to force random seed crediting
 * introduce a new per-process uuid, similar to the boot id, the machine id, the
  invocation id, that is derived from process creds, specifically a hashed
  combination of AT_RANDOM + getpid() + the starttime from
  /proc/self/status. Then add these ids implicitly when logging. Deriving this
  uuid from these three things has the benefit that it can be derived easily
  from /proc/$PID/ in a stable, and unique way that changes on both fork() and
  exec().
 * let's not GC a unit while its ratelimits are still pending
 * when killing due to service watchdog timeout maybe detect whether target
--- a/src/basic/fs-util.c
+++ b/src/basic/fs-util.c
@ -1131,6 +1131,8 @@ int xopenat_full(int dir_fd, const char *path, int open_flags, XOpenFlags xopen_
         *   • If O_CREAT is used with XO_LABEL, any created file will be immediately relabelled.
         *
         *   • If the path is specified NULL or empty, behaves like fd_reopen().
         *
         *   • If XO_NOCOW is specified will turn on the NOCOW btrfs flag on the file, if available.
         */
        if (isempty(path)) {
--- a/src/shared/mount-util.c
+++ b/src/shared/mount-util.c
@ -1808,63 +1808,81 @@ char* umount_and_unlink_and_free(char *p) {
        return mfree(p);
 }
-static int path_get_mount_info(
+static int path_get_mount_info_at(
                int dir_fd,
                const char *path,
                char **ret_fstype,
                char **ret_options) {
        _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
-        _cleanup_free_ char *fstype = NULL, *options = NULL;
+        _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
-        struct libmnt_fs *fs;
+        int r, mnt_id;
        int r;
-        assert(path);
+        assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
-        table = mnt_new_table();
+        r = path_get_mnt_id_at(dir_fd, path, &mnt_id);
        if (!table)
                return -ENOMEM;
        r = mnt_table_parse_mtab(table, /* filename = */ NULL);
        if (r < 0)
-                return r;
+                return log_debug_errno(r, "Failed to get mount ID: %m");
-        fs = mnt_table_find_mountpoint(table, path, MNT_ITER_FORWARD);
+        r = libmount_parse("/proc/self/mountinfo", NULL, &table, &iter);
-        if (!fs)
+        if (r < 0)
-                return -EINVAL;
+                return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m");
-        if (ret_fstype) {
+        for (;;) {
-                fstype = strdup(strempty(mnt_fs_get_fstype(fs)));
+                struct libmnt_fs *fs;
-                if (!fstype)
+
-                        return -ENOMEM;
+                r = mnt_table_next_fs(table, iter, &fs);
                if (r == 1)
                        break; /* EOF */
                if (r < 0)
                        return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
                if (mnt_fs_get_id(fs) != mnt_id)
                        continue;
                _cleanup_free_ char *fstype = NULL, *options = NULL;
                if (ret_fstype) {
                        fstype = strdup(strempty(mnt_fs_get_fstype(fs)));
                        if (!fstype)
                                return log_oom_debug();
                }
                if (ret_options) {
                        options = strdup(strempty(mnt_fs_get_options(fs)));
                        if (!options)
                                return log_oom_debug();
                }
                if (ret_fstype)
                        *ret_fstype = TAKE_PTR(fstype);
                if (ret_options)
                        *ret_options = TAKE_PTR(options);
                return 0;
        }
-        if (ret_options) {
+        return log_debug_errno(SYNTHETIC_ERRNO(ESTALE), "Cannot find mount ID %i from /proc/self/mountinfo.", mnt_id);
                options = strdup(strempty(mnt_fs_get_options(fs)));
                if (!options)
                        return -ENOMEM;
        }
        if (ret_fstype)
                *ret_fstype = TAKE_PTR(fstype);
        if (ret_options)
                *ret_options = TAKE_PTR(options);
        return 0;
 }
-int path_is_network_fs_harder(const char *path) {
+int path_is_network_fs_harder_at(int dir_fd, const char *path) {
        _cleanup_close_ int fd = -EBADF;
        int r;
        assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
        fd = xopenat(dir_fd, path, O_PATH | O_CLOEXEC | O_NOFOLLOW);
        if (fd < 0)
                return fd;
        r = fd_is_network_fs(fd);
        if (r != 0)
                return r;
        _cleanup_free_ char *fstype = NULL, *options = NULL;
-        int r, ret;
+        r = path_get_mount_info_at(fd, /* path = */ NULL, &fstype, &options);
        assert(path);
        ret = path_is_network_fs(path);
        if (ret > 0)
                return true;
        r = path_get_mount_info(path, &fstype, &options);
        if (r < 0)
-                return RET_GATHER(ret, r);
+                return r;
        if (fstype_is_network(fstype))
                return true;
--- a/src/shared/mount-util.h
+++ b/src/shared/mount-util.h
@ -181,4 +181,7 @@ int mount_credentials_fs(const char *path, size_t size, bool ro);
 int make_fsmount(int error_log_level, const char *what, const char *type, unsigned long flags, const char *options, int userns_fd);
-int path_is_network_fs_harder(const char *path);
+int path_is_network_fs_harder_at(int dir_fd, const char *path);
 static inline int path_is_network_fs_harder(const char *path) {
        return path_is_network_fs_harder_at(AT_FDCWD, path);
 }
--- a/src/sysext/sysext.c
+++ b/src/sysext/sysext.c
@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: LGPL-2.1-or-later */
 #include <ctype.h>
 #include <fcntl.h>
 #include <getopt.h>
 #include <linux/loop.h>
@ -2208,6 +2209,46 @@ static int vl_method_refresh(sd_varlink *link, sd_json_variant *parameters, sd_v
        return sd_varlink_reply(link, NULL);
 }
 static int parse_version(const char *filename, char **version) {
    char *start, *last_dot_after_underscore;
    // Find the last occurrences of '_'. This will mark the start of the extension version.
    start = strrchr(filename, '_');
    if (!start) {
        // If no underscore found, assign "none" and return success.
        // This is for cases where there are no versions in the filename like 'image.raw'.
        *version = strdup("none");
        return *version ? 0 : -1;
    }
    start ++;
    last_dot_after_underscore = strrchr(start, '.');
    if (!last_dot_after_underscore) {
        // If no dot found, treat everything after '_' as the version
        *version = strdup(start);
        return *version ? 0 : -1;
    }
    if (last_dot_after_underscore[1] && isdigit(last_dot_after_underscore[1])) {
        // If the last dot is followed by a digit, the version is everything after the underscore
        // This is for cases where the extension name is like so: image_0.1
        *version = strdup(start);
    }
    else if (last_dot_after_underscore[1] && isalpha(last_dot_after_underscore[1])) {
        // If the last dot is followed by a letter, the version is between the first '_' and the last dot
        // This is for cases where the extension name is like so: image_0.1.raw (version ia 0.1)
        // or image_1.raw (version is 1)
        size_t length = last_dot_after_underscore - start;
        *version = strndup(start, length);
    }
    else {
        // In all other cases assign "n/a" to version
        *version = strdup("n/a");
    }
    return *version ? 0:1;
 }
 static int verb_list(int argc, char **argv, void *userdata) {
        _cleanup_hashmap_free_ Hashmap *images = NULL;
        _cleanup_(table_unrefp) Table *t = NULL;
@ -2227,14 +2268,27 @@ static int verb_list(int argc, char **argv, void *userdata) {
                return 0;
        }
-        t = table_new("name", "type", "path", "time");
+        t = table_new("name", "version", "type", "path", "time");
        if (!t)
                return log_oom();
        HASHMAP_FOREACH(img, images) {
                _cleanup_free_ char *image_name = NULL, *version = NULL;
                /* Get the absolute file name with version info for logging. */
                r = path_extract_filename(img->path, &image_name);
                if (r < 0)
                        return log_error_errno(r, "Failed to extract filename from '%s': %m", img->path);
                // Using the above, extract just the version part from the image name
                r = parse_version(image_name, &version);
                if (r < 0)
                        return log_error_errno(r, "Failed to get version for the extension '%s': %m", image_name);
                r = table_add_many(
                                t,
                                TABLE_STRING, img->name,
                                TABLE_STRING, version,
                                TABLE_STRING, image_type_to_string(img->type),
                                TABLE_PATH, img->path,
                                TABLE_TIMESTAMP, img->mtime != 0 ? img->mtime : img->crtime);
--- a/src/test/test-mount-util.c
+++ b/src/test/test-mount-util.c
@ -538,9 +538,53 @@ TEST(bind_mount_submounts) {
 }
 TEST(path_is_network_fs_harder) {
-        ASSERT_OK_ZERO(path_is_network_fs_harder("/dev"));
+        _cleanup_close_ int dir_fd = -EBADF;
-        ASSERT_OK_ZERO(path_is_network_fs_harder("/sys"));
+        int r;
-        ASSERT_OK_ZERO(path_is_network_fs_harder("/run"));
+
        ASSERT_OK(dir_fd = open("/", O_PATH | O_CLOEXEC));
        FOREACH_STRING(s,
                       "/", "/dev/", "/proc/", "/run/", "/sys/", "/tmp/", "/usr/", "/var/tmp/",
                       "", ".", "../../../", "/this/path/should/not/exist/for/test-mount-util/") {
                r = path_is_network_fs_harder(s);
                log_debug("path_is_network_fs_harder(%s) → %i: %s", s, r, r < 0 ? STRERROR(r) : yes_no(r));
                const char *q = path_startswith(s, "/") ?: s;
                r = path_is_network_fs_harder_at(dir_fd, q);
                log_debug("path_is_network_fs_harder_at(root, %s) → %i: %s", q, r, r < 0 ? STRERROR(r) : yes_no(r));
        }
        if (geteuid() != 0 || have_effective_cap(CAP_SYS_ADMIN) <= 0) {
                (void) log_tests_skipped("not running privileged");
                return;
        }
        _cleanup_(rm_rf_physical_and_freep) char *t = NULL;
        assert_se(mkdtemp_malloc("/tmp/test-mount-util.path_is_network_fs_harder.XXXXXXX", &t) >= 0);
        r = safe_fork("(make_mount-point)",
                      FORK_RESET_SIGNALS |
                      FORK_CLOSE_ALL_FDS |
                      FORK_DEATHSIG_SIGTERM |
                      FORK_WAIT |
                      FORK_REOPEN_LOG |
                      FORK_LOG |
                      FORK_NEW_MOUNTNS |
                      FORK_MOUNTNS_SLAVE,
                      NULL);
        ASSERT_OK(r);
        if (r == 0) {
                ASSERT_OK(mount_nofollow_verbose(LOG_INFO, "tmpfs", t, "tmpfs", 0, NULL));
                ASSERT_OK_ZERO(path_is_network_fs_harder(t));
                ASSERT_OK_ERRNO(umount(t));
                ASSERT_OK(mount_nofollow_verbose(LOG_INFO, "tmpfs", t, "tmpfs", 0, "x-systemd-growfs,x-systemd-automount"));
                ASSERT_OK_ZERO(path_is_network_fs_harder(t));
                ASSERT_OK_ERRNO(umount(t));
                _exit(EXIT_SUCCESS);
        }
 }
 DEFINE_TEST_MAIN(LOG_DEBUG);
--- a/test/meson.build
+++ b/test/meson.build
@ -142,11 +142,13 @@ endif
 ############################################################
 if install_tests
-        foreach script : ['integration-test-setup.sh', 'run-unit-tests.py']
+        install_data('run-unit-tests.py',
-                install_data(script,
+                     install_mode : 'rwxr-xr-x',
-                             install_mode : 'rwxr-xr-x',
+                     install_dir : testsdir)
-                             install_dir : testsdir)
+
-        endforeach
+        install_data('integration-test-setup.sh',
                     install_mode : 'rwxr-xr-x',
                     install_dir : testdata_dir)
 endif
 ############################################################
--- a/test/test.service.in
+++ b/test/test.service.in
@ -7,9 +7,9 @@ Before=getty-pre.target
 [Service]
 ExecStartPre=rm -f /failed /testok
-ExecStartPre=/usr/lib/systemd/tests/integration-test-setup.sh setup
+ExecStartPre=/usr/lib/systemd/tests/testdata/integration-test-setup.sh setup
 ExecStart=@command@
-ExecStopPost=/usr/lib/systemd/tests/integration-test-setup.sh finalize
+ExecStopPost=/usr/lib/systemd/tests/testdata/integration-test-setup.sh finalize
 Type=oneshot
 MemoryAccounting=@memory-accounting@
 StateDirectory=%N
--- a/test/units/TEST-07-PID1.private-pids.sh
+++ b/test/units/TEST-07-PID1.private-pids.sh
@ -132,10 +132,12 @@ testcase_unpriv() {
        return 0
    fi
-    # The kernel has a restriction for unprivileged user namespaces where they cannot mount a less restrictive
+    # IMPORTANT: For /proc/ to be remounted in pid namespace within an unprivileged user namespace, there needs to
-    # instance of /proc/. So if /proc/ is masked (e.g. /proc/kmsg is over-mounted with tmpfs as systemd-nspawn does),
+    # be at least 1 unmasked procfs mount in ANY directory. Otherwise, if /proc/ is masked (e.g. /proc/scsi is
-    # then mounting a new /proc/ will fail and we will still see the host's /proc/. Thus, to allow tests to run in
+    # over-mounted with tmpfs), then mounting a new /proc/ will fail.
-    # a VM or nspawn, we mount a new proc on a temporary directory with no masking to bypass this kernel restriction.
+    #
    # Thus, to guarantee PrivatePIDs=yes tests for unprivileged users pass, we mount a new procfs on a temporary
    # directory with no masking. This will guarantee an unprivileged user can mount a new /proc/ successfully.
    mkdir -p /tmp/TEST-07-PID1-private-pids-proc
    mount -t proc proc /tmp/TEST-07-PID1-private-pids-proc
@ -146,7 +148,16 @@ testcase_unpriv() {
    umount /tmp/TEST-07-PID1-private-pids-proc
    rm -rf /tmp/TEST-07-PID1-private-pids-proc
-    # Now verify the behavior with masking - units should fail as PrivatePIDs=yes has no graceful fallback.
+    # Now we will mask /proc/ by mounting tmpfs over /proc/scsi. This will guarantee that mounting /proc/ will fail
    # for unprivileged users when using PrivatePIDs=yes. Now units should fail as PrivatePIDs=yes has no graceful
    # fallback.
    #
    # Note some kernels do not have /proc/scsi so we verify the directory exists prior to running the test.
    if [ ! -d /proc/scsi ]; then
        echo "/proc/scsi does not exist, skipping unprivileged PrivatePIDs=yes test with masked /proc/"
        return 0
    fi
    if [[ "$HAS_EXISTING_SCSI_MOUNT" == "no" ]]; then
        mount -t tmpfs tmpfs /proc/scsi
    fi
Author	SHA1	Message	Date
Maanya Goenka	b7325800bb	Merge `bd59565d0e` into `5261c521e3`	2024-11-08 13:30:15 +00:00
Yu Watanabe	5261c521e3	mount-util: make path_get_mount_info() work arbitrary inode Follow-up for `d49d95df0a`. Replaces `9a032ec55a`. Fixes #35075.	2024-11-08 13:25:17 +01:00
Franck Bui	514d9e1665	test: install integration-test-setup.sh in testdata/ integration-test-setup.sh is an auxiliary script that tests rely on at runtime. As such, install the script in testdata/. Follow-up for `af153e36ae`.	2024-11-08 12:37:40 +01:00
Lennart Poettering	b480a4c15e	update TODO	2024-11-08 10:10:11 +01:00
Lennart Poettering	af3baf174a	fs-util: add comment about XO_NOCOW	2024-11-08 09:21:25 +01:00
Ryan Wilson	d8091e1281	Fix PrivatePIDs=yes integration test for kernels with no /proc/scsi	2024-11-08 13:38:35 +09:00
Maanya Goenka	bd59565d0e	Add version as new column in the list command table output Add a new column in the list command output table that logs the version of the image name of the extension. 'version' of the image should be the part of the image name string that includes the version and the boot counters at the end of the filename but excludes the .raw type suffix. This commit modifies the table outputted by the list command to have the version from the absolute filename displayed alongside the shortened image name.	2024-11-07 00:34:46 +00:00