1
0
mirror of https://github.com/systemd/systemd synced 2026-03-18 19:14:46 +01:00

Compare commits

..

No commits in common. "49eb2d50b4328956b0d8fdf173425e2029b7d5ee" and "b6585c811a82633c082182145ca5d5132a8228ff" have entirely different histories.

20 changed files with 465 additions and 159 deletions

22
README
View File

@ -47,25 +47,25 @@ REQUIREMENTS:
≥ 5.3 for bounded loops in BPF program, keyring namespacing, ≥ 5.3 for bounded loops in BPF program, keyring namespacing,
and nexthop support and nexthop support
≥ 5.4 for pidfd and signed Verity images ≥ 5.4 for pidfd and signed Verity images
≥ 5.6 for getrandom() GRND_INSECURE
≥ 5.7 for CLONE_INTO_CGROUP, cgroup2fs memory_recursiveprot option,
BPF links and the BPF LSM hook
≥ 5.8 for LOOP_CONFIGURE and STATX_ATTR_MOUNT_ROOT
≥ 5.9 for close_range()
≥ 5.10 for STATX_MNT_ID
⛔ Kernel versions below 5.10 ("minimum baseline") are not supported at all, ⛔ Kernel versions below 5.4 ("minimum baseline") are not supported at all,
and are missing required functionality as listed above. and are missing required functionality as listed above.
Linux kernel ≥ 5.12 for idmapped mount Linux kernel ≥ 5.6 for getrandom() GRND_INSECURE
≥ 5.14 for cgroup.kill and quotactl_fd() ≥ 5.7 for CLONE_INTO_CGROUP, cgroup2fs memory_recursiveprot option,
BPF links and the BPF LSM hook
⚠️ Kernel versions below 5.14 ("recommended baseline") have significant gaps ⚠️ Kernel versions below 5.7 ("recommended baseline") have significant gaps
in functionality and are not recommended for use with this version in functionality and are not recommended for use with this version
of systemd. Taint flag 'old-kernel' will be set. systemd will most likely of systemd. Taint flag 'old-kernel' will be set. systemd will most likely
still function, but upstream support and testing are limited. still function, but upstream support and testing are limited.
Linux kernel ≥ 6.3 for MFD_EXEC/MFD_NOEXEC_SEAL and tmpfs noswap option Linux kernel ≥ 5.8 for LOOP_CONFIGURE and STATX_ATTR_MOUNT_ROOT
≥ 5.9 for close_range()
≥ 5.12 for idmapped mount
≥ 5.14 for cgroup.kill
≥ 5.14 for quotactl_fd()
≥ 6.3 for MFD_EXEC/MFD_NOEXEC_SEAL and tmpfs noswap option
≥ 6.5 for name_to_handle_at() AT_HANDLE_FID, SO_PEERPIDFD/SO_PASSPIDFD, ≥ 6.5 for name_to_handle_at() AT_HANDLE_FID, SO_PEERPIDFD/SO_PASSPIDFD,
and MOVE_MOUNT_BENEATH and MOVE_MOUNT_BENEATH
≥ 6.6 for quota support on tmpfs ≥ 6.6 for quota support on tmpfs

View File

@ -68,4 +68,4 @@
#define VARLINK_PATH_MACHINED_RESOLVE_HOOK "/run/systemd/resolve.hook/io.systemd.Machine" #define VARLINK_PATH_MACHINED_RESOLVE_HOOK "/run/systemd/resolve.hook/io.systemd.Machine"
/* Recommended baseline - see README for details */ /* Recommended baseline - see README for details */
#define KERNEL_BASELINE_VERSION "5.14" #define KERNEL_BASELINE_VERSION "5.7"

View File

@ -16,6 +16,7 @@
#include "format-util.h" #include "format-util.h"
#include "fs-util.h" #include "fs-util.h"
#include "log.h" #include "log.h"
#include "mountpoint-util.h"
#include "parse-util.h" #include "parse-util.h"
#include "path-util.h" #include "path-util.h"
#include "process-util.h" #include "process-util.h"
@ -251,9 +252,10 @@ int close_all_fds_frugal(const int except[], size_t n_except) {
assert(except || n_except == 0); assert(except || n_except == 0);
/* This is the inner fallback core of close_all_fds(). This never calls malloc() or so and hence is /* This is the inner fallback core of close_all_fds(). This never calls malloc() or opendir() or so
* safe to be called in signal handler context. Most users should call close_all_fds(), but when we * and hence is safe to be called in signal handler context. Most users should call close_all_fds(),
* assume we are called from signal handler context, then use this simpler call instead. */ * but when we assume we are called from signal handler context, then use this simpler call
* instead. */
max_fd = get_max_fd(); max_fd = get_max_fd();
if (max_fd < 0) if (max_fd < 0)
@ -279,6 +281,44 @@ int close_all_fds_frugal(const int except[], size_t n_except) {
return r; return r;
} }
int close_all_fds_by_proc(const int except[], size_t n_except) {
_cleanup_closedir_ DIR *d = NULL;
int r = 0;
d = opendir("/proc/self/fd");
if (!d)
return close_all_fds_frugal(except, n_except); /* ultimate fallback if /proc/ is not available */
FOREACH_DIRENT(de, d, return -errno) {
int fd = -EBADF, q;
if (!IN_SET(de->d_type, DT_LNK, DT_UNKNOWN))
continue;
fd = parse_fd(de->d_name);
if (fd < 0)
/* Let's better ignore this, just in case */
continue;
if (fd < 3)
continue;
if (fd == dirfd(d))
continue;
if (fd_in_set(fd, except, n_except))
continue;
q = close_nointr(fd);
if (q != -EBADF) /* Valgrind has its own FD and doesn't want to have it closed */
RET_GATHER(r, q);
}
return r;
}
static bool have_close_range = true; /* Assume we live in the future */
static int close_all_fds_special_case(const int except[], size_t n_except) { static int close_all_fds_special_case(const int except[], size_t n_except) {
assert(n_except == 0 || except); assert(n_except == 0 || except);
@ -286,6 +326,9 @@ static int close_all_fds_special_case(const int except[], size_t n_except) {
* nicely, since we won't need sorting for them. Returns > 0 if the special casing worked, 0 * nicely, since we won't need sorting for them. Returns > 0 if the special casing worked, 0
* otherwise. */ * otherwise. */
if (!have_close_range)
return 0;
if (n_except == 1 && except[0] < 0) /* Minor optimization: if we only got one fd, and it's invalid, if (n_except == 1 && except[0] < 0) /* Minor optimization: if we only got one fd, and it's invalid,
* we got none */ * we got none */
n_except = 0; n_except = 0;
@ -294,22 +337,31 @@ static int close_all_fds_special_case(const int except[], size_t n_except) {
case 0: case 0:
/* Close everything. Yay! */ /* Close everything. Yay! */
if (close_range(3, INT_MAX, 0) < 0)
return -errno;
return 1; if (close_range(3, INT_MAX, 0) >= 0)
return 1;
if (ERRNO_IS_NOT_SUPPORTED(errno) || ERRNO_IS_PRIVILEGE(errno)) {
have_close_range = false;
return 0;
}
return -errno;
case 1: case 1:
/* Close all but exactly one, then we don't need no sorting. This is a pretty common /* Close all but exactly one, then we don't need no sorting. This is a pretty common
* case, hence let's handle it specially. */ * case, hence let's handle it specially. */
if (except[0] > 3 && close_range(3, except[0] - 1, 0) < 0) if ((except[0] <= 3 || close_range(3, except[0]-1, 0) >= 0) &&
return -errno; (except[0] >= INT_MAX || close_range(MAX(3, except[0]+1), -1, 0) >= 0))
return 1;
if (except[0] < INT_MAX && close_range(MAX(3, except[0] + 1), -1, 0) < 0) if (ERRNO_IS_NOT_SUPPORTED(errno) || ERRNO_IS_PRIVILEGE(errno)) {
return -errno; have_close_range = false;
return 0;
}
return 1; return -errno;
default: default:
return 0; return 0;
@ -341,6 +393,9 @@ int close_all_fds(const int except[], size_t n_except) {
if (r > 0) /* special case worked! */ if (r > 0) /* special case worked! */
return 0; return 0;
if (!have_close_range)
return close_all_fds_by_proc(except, n_except);
_cleanup_free_ int *sorted_malloc = NULL; _cleanup_free_ int *sorted_malloc = NULL;
size_t n_sorted; size_t n_sorted;
int *sorted; int *sorted;
@ -360,7 +415,7 @@ int close_all_fds(const int except[], size_t n_except) {
sorted = newa(int, n_sorted); sorted = newa(int, n_sorted);
if (!sorted) /* Fallback on OOM. */ if (!sorted) /* Fallback on OOM. */
return close_all_fds_frugal(except, n_except); return close_all_fds_by_proc(except, n_except);
memcpy(sorted, except, n_except * sizeof(int)); memcpy(sorted, except, n_except * sizeof(int));
@ -382,8 +437,13 @@ int close_all_fds(const int except[], size_t n_except) {
continue; continue;
/* Close everything between the start and end fds (both of which shall stay open) */ /* Close everything between the start and end fds (both of which shall stay open) */
if (close_range(start + 1, end - 1, 0) < 0) if (close_range(start + 1, end - 1, 0) < 0) {
return -errno; if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
return -errno;
have_close_range = false;
return close_all_fds_by_proc(except, n_except);
}
} }
/* The loop succeeded. Let's now close everything beyond the end */ /* The loop succeeded. Let's now close everything beyond the end */
@ -391,8 +451,13 @@ int close_all_fds(const int except[], size_t n_except) {
if (sorted[n_sorted-1] >= INT_MAX) /* Dont let the addition below overflow */ if (sorted[n_sorted-1] >= INT_MAX) /* Dont let the addition below overflow */
return 0; return 0;
if (close_range(sorted[n_sorted-1] + 1, INT_MAX, 0) < 0) if (close_range(sorted[n_sorted-1] + 1, INT_MAX, 0) < 0) {
return -errno; if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
return -errno;
have_close_range = false;
return close_all_fds_by_proc(except, n_except);
}
return 0; return 0;
} }
@ -1054,6 +1119,7 @@ int path_is_root_at(int dir_fd, const char *path) {
int fds_are_same_mount(int fd1, int fd2) { int fds_are_same_mount(int fd1, int fd2) {
struct statx sx1 = {}, sx2 = {}; /* explicitly initialize the struct to make msan silent. */ struct statx sx1 = {}, sx2 = {}; /* explicitly initialize the struct to make msan silent. */
int r;
assert(fd1 >= 0); assert(fd1 >= 0);
assert(fd2 >= 0); assert(fd2 >= 0);
@ -1064,7 +1130,39 @@ int fds_are_same_mount(int fd1, int fd2) {
if (statx(fd2, "", AT_EMPTY_PATH, STATX_TYPE|STATX_INO|STATX_MNT_ID, &sx2) < 0) if (statx(fd2, "", AT_EMPTY_PATH, STATX_TYPE|STATX_INO|STATX_MNT_ID, &sx2) < 0)
return -errno; return -errno;
return statx_inode_same(&sx1, &sx2) && statx_mount_same(&sx1, &sx2); /* First, compare inode. If these are different, the fd does not point to the root directory "/". */
if (!statx_inode_same(&sx1, &sx2))
return false;
/* Note, statx() does not provide the mount ID and path_get_mnt_id_at() does not work when an old
* kernel is used. In that case, let's assume that we do not have such spurious mount points in an
* early boot stage, and silently skip the following check. */
if (!FLAGS_SET(sx1.stx_mask, STATX_MNT_ID)) {
int mntid;
r = path_get_mnt_id_at_fallback(fd1, "", &mntid);
if (r < 0)
return r;
assert(mntid >= 0);
sx1.stx_mnt_id = mntid;
sx1.stx_mask |= STATX_MNT_ID;
}
if (!FLAGS_SET(sx2.stx_mask, STATX_MNT_ID)) {
int mntid;
r = path_get_mnt_id_at_fallback(fd2, "", &mntid);
if (r < 0)
return r;
assert(mntid >= 0);
sx2.stx_mnt_id = mntid;
sx2.stx_mask |= STATX_MNT_ID;
}
return statx_mount_same(&sx1, &sx2);
} }
char* format_proc_fd_path(char buf[static PROC_FD_PATH_MAX], int fd) { char* format_proc_fd_path(char buf[static PROC_FD_PATH_MAX], int fd) {

View File

@ -112,6 +112,7 @@ int get_max_fd(void);
int close_all_fds(const int except[], size_t n_except); int close_all_fds(const int except[], size_t n_except);
int close_all_fds_without_malloc(const int except[], size_t n_except); int close_all_fds_without_malloc(const int except[], size_t n_except);
int close_all_fds_by_proc(const int except[], size_t n_except);
int close_all_fds_frugal(const int except[], size_t n_except); int close_all_fds_frugal(const int except[], size_t n_except);
int pack_fds(int fds[], size_t n); int pack_fds(int fds[], size_t n);

View File

@ -13,8 +13,10 @@
#include "log.h" #include "log.h"
#include "mountpoint-util.h" #include "mountpoint-util.h"
#include "nulstr-util.h" #include "nulstr-util.h"
#include "parse-util.h"
#include "path-util.h" #include "path-util.h"
#include "stat-util.h" #include "stat-util.h"
#include "stdio-util.h"
#include "string-util.h" #include "string-util.h"
#include "strv.h" #include "strv.h"
@ -133,6 +135,57 @@ int name_to_handle_at_try_fid(
return name_to_handle_at_loop(fd, path, ret_handle, ret_mnt_id, flags & ~AT_HANDLE_FID); return name_to_handle_at_loop(fd, path, ret_handle, ret_mnt_id, flags & ~AT_HANDLE_FID);
} }
static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *ret_mnt_id) {
char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
_cleanup_close_ int subfd = -EBADF;
int r;
assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);
assert(ret_mnt_id);
if ((flags & AT_EMPTY_PATH) && isempty(filename))
xsprintf(path, "/proc/self/fdinfo/%i", fd);
else {
subfd = openat(fd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_FOLLOW ? 0 : O_NOFOLLOW));
if (subfd < 0)
return -errno;
xsprintf(path, "/proc/self/fdinfo/%i", subfd);
}
_cleanup_free_ char *p = NULL;
r = get_proc_field(path, "mnt_id", &p);
if (r == -ENOENT)
return -EBADF;
if (r < 0)
return r;
return safe_atoi(p, ret_mnt_id);
}
static bool filename_possibly_with_slash_suffix(const char *s) {
const char *slash, *copied;
/* Checks whether the specified string is either file name, or a filename with a suffix of
* slashes. But nothing else.
*
* this is OK: foo, bar, foo/, bar/, foo//, bar///
* this is not OK: "", "/", "/foo", "foo/bar", ".", ".." */
slash = strchr(s, '/');
if (!slash)
return filename_is_valid(s);
if (slash - s > PATH_MAX) /* We want to allocate on the stack below, hence do a size check first */
return false;
if (slash[strspn(slash, "/")] != 0) /* Check that the suffix consist only of one or more slashes */
return false;
copied = strndupa_safe(s, slash - s);
return filename_is_valid(copied);
}
bool file_handle_equal(const struct file_handle *a, const struct file_handle *b) { bool file_handle_equal(const struct file_handle *a, const struct file_handle *b) {
if (a == b) if (a == b)
return true; return true;
@ -144,73 +197,191 @@ bool file_handle_equal(const struct file_handle *a, const struct file_handle *b)
return memcmp_nn(a->f_handle, a->handle_bytes, b->f_handle, b->handle_bytes) == 0; return memcmp_nn(a->f_handle, a->handle_bytes, b->f_handle, b->handle_bytes) == 0;
} }
int is_mount_point_at(int dir_fd, const char *path, int flags) { int is_mount_point_at(int fd, const char *filename, int flags) {
bool fd_is_self;
int r; int r;
assert(dir_fd >= 0 || dir_fd == AT_FDCWD); assert(fd >= 0 || fd == AT_FDCWD);
assert((flags & ~AT_SYMLINK_FOLLOW) == 0); assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
if (path_equal(path, "/")) if (isempty(filename)) {
return true; if (fd == AT_FDCWD)
filename = ".";
if (isempty(path)) {
if (dir_fd == AT_FDCWD)
path = ".";
else { else {
/* If the file name is empty we'll see if the specified 'fd' is a mount point.
* That's only supported by statx(), or if the inode specified via 'fd' refers to a
* directory. Otherwise, we'll have to fail (ENOTDIR), because we have no kernel API
* to query the information we need. */
flags |= AT_EMPTY_PATH; flags |= AT_EMPTY_PATH;
path = ""; filename = "";
} }
fd_is_self = true;
} else if (STR_IN_SET(filename, ".", "./"))
fd_is_self = true;
else {
/* Insist that the specified filename is actually a filename, and not a path, i.e. some inode
* further up or down the tree then immediately below the specified directory fd. */
if (!filename_possibly_with_slash_suffix(filename))
return -EINVAL;
fd_is_self = false;
} }
/* First we will try statx()' STATX_ATTR_MOUNT_ROOT attribute, which is our ideal API, available
* since kernel 5.8.
*
* If that fails, our second try is the name_to_handle_at() syscall, which tells us the mount id and
* an opaque file "handle". It is not supported everywhere though (kernel compile-time option, not
* all file systems are hooked up). If it works the mount id is usually good enough to tell us
* whether something is a mount point.
*
* If that didn't work we will try to read the mount id from /proc/self/fdinfo/<fd>. This is almost
* as good as name_to_handle_at(), however, does not return the opaque file handle. The opaque file
* handle is pretty useful to detect the root directory, which we should always consider a mount
* point. Hence we use this only as fallback.
*
* Note that traditionally the check is done via fstat()-based st_dev comparisons. However, various
* file systems don't guarantee same st_dev across single fs anymore, e.g. unionfs exposes file systems
* with a variety of st_dev reported. Also, btrfs subvolumes have different st_dev, even though
* they aren't real mounts of their own. */
struct statx sx = {}; /* explicitly initialize the struct to make msan silent. */ struct statx sx = {}; /* explicitly initialize the struct to make msan silent. */
if (statx(dir_fd, path, if (statx(fd, filename,
at_flags_normalize_nofollow(flags) | at_flags_normalize_nofollow(flags) |
AT_NO_AUTOMOUNT | /* don't trigger automounts mounts are a local concept, hence no need to trigger automounts to determine STATX_ATTR_MOUNT_ROOT */ AT_NO_AUTOMOUNT | /* don't trigger automounts mounts are a local concept, hence no need to trigger automounts to determine STATX_ATTR_MOUNT_ROOT */
AT_STATX_DONT_SYNC, /* don't go to the network for this for similar reasons */ AT_STATX_DONT_SYNC, /* don't go to the network for this for similar reasons */
STATX_TYPE|STATX_INO, STATX_TYPE,
&sx) < 0) &sx) < 0)
return -errno; return -errno;
r = statx_warn_mount_root(&sx, LOG_DEBUG); if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) /* yay! */
return FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT);
_cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
int mount_id = -1, mount_id_parent = -1;
bool nosupp = false;
r = name_to_handle_at_try_fid(fd, filename, &h, &mount_id, flags);
if (r < 0) {
if (is_name_to_handle_at_fatal_error(r))
return r;
if (!ERRNO_IS_NOT_SUPPORTED(r))
goto fallback_fdinfo;
/* This file system does not support name_to_handle_at(), hence let's see if the upper fs
* supports it (in which case it is a mount point), otherwise fall back to the fdinfo logic. */
nosupp = true;
}
if (fd_is_self)
r = name_to_handle_at_try_fid(fd, "..", &h_parent, &mount_id_parent, 0); /* can't work for non-directories 😢 */
else
r = name_to_handle_at_try_fid(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
if (r < 0) {
if (is_name_to_handle_at_fatal_error(r))
return r;
if (!ERRNO_IS_NOT_SUPPORTED(r))
goto fallback_fdinfo;
if (nosupp)
/* Both the parent and the directory can't do name_to_handle_at() */
goto fallback_fdinfo;
/* The parent can't do name_to_handle_at() but the directory we are
* interested in can? If so, it must be a mount point. */
return 1;
}
/* The parent can do name_to_handle_at() but the directory we are interested in can't? If
* so, it must be a mount point. */
if (nosupp)
return 1;
/* If the file handle for the directory we are interested in and its parent are identical,
* we assume this is the root directory, which is a mount point. */
if (file_handle_equal(h_parent, h))
return 1;
return mount_id != mount_id_parent;
fallback_fdinfo:
r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
if (r < 0) if (r < 0)
return r; return r;
if (FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT)) if (fd_is_self)
return true; r = fd_fdinfo_mnt_id(fd, "..", 0, &mount_id_parent); /* can't work for non-directories 😢 */
else
r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
if (r < 0)
return r;
/* When running on chroot environment, the root may not be a mount point, but we unconditionally if (mount_id != mount_id_parent)
* return true when the input is "/" in the above, but the shortcut may not work e.g. when the path return 1;
* is relative. */
struct statx sx2 = {}; /* explicitly initialize the struct to make msan silent. */ /* Hmm, so, the mount ids are the same. This leaves one special case though for the root file
if (statx(AT_FDCWD, "/", AT_STATX_DONT_SYNC, STATX_TYPE|STATX_INO, &sx2) < 0) * system. For that, let's see if the parent directory has the same inode as we are interested
* in. */
struct stat a, b;
/* yay for fstatat() taking a different set of flags than the other _at() above */
if (fstatat(fd, filename, &a, at_flags_normalize_nofollow(flags)) < 0)
return -errno; return -errno;
return statx_inode_same(&sx, &sx2); if (fd_is_self)
r = fstatat(fd, "..", &b, 0);
else
r = fstatat(fd, "", &b, AT_EMPTY_PATH);
if (r < 0)
return -errno;
/* A directory with same device and inode as its parent must be the root directory. Otherwise
* not a mount point.
*
* NB: we avoid inode_same_at() here because it internally attempts name_to_handle_at_try_fid() first,
* which is redundant. */
return stat_inode_same(&a, &b);
} }
/* flags can be AT_SYMLINK_FOLLOW or 0 */ /* flags can be AT_SYMLINK_FOLLOW or 0 */
int path_is_mount_point_full(const char *path, const char *root, int flags) { int path_is_mount_point_full(const char *path, const char *root, int flags) {
_cleanup_close_ int dir_fd = -EBADF; _cleanup_close_ int dfd = -EBADF;
int r; _cleanup_free_ char *fn = NULL;
assert(path); assert(path);
assert((flags & ~AT_SYMLINK_FOLLOW) == 0); assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
if (empty_or_root(root)) if (path_equal(path, "/"))
return is_mount_point_at(AT_FDCWD, path, flags); return 1;
r = chase(path, root, /* we need to resolve symlinks manually, we can't just rely on is_mount_point_at() to do that for us;
FLAGS_SET(flags, AT_SYMLINK_FOLLOW) ? 0 : CHASE_NOFOLLOW, * if we have a structure like /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
/* ret_path= */ NULL, &dir_fd); * look at needs to be /usr, not /. */
if (r < 0) dfd = chase_and_open_parent(path, root,
CHASE_TRAIL_SLASH|(FLAGS_SET(flags, AT_SYMLINK_FOLLOW) ? 0 : CHASE_NOFOLLOW),
&fn);
if (dfd < 0)
return dfd;
return is_mount_point_at(dfd, fn, flags);
}
int path_get_mnt_id_at_fallback(int dir_fd, const char *path, int *ret) {
int r;
assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
assert(ret);
r = name_to_handle_at_loop(dir_fd, path, NULL, ret, isempty(path) ? AT_EMPTY_PATH : 0);
if (r >= 0 || is_name_to_handle_at_fatal_error(r))
return r; return r;
return is_mount_point_at(dir_fd, /* path= */ NULL, flags); return fd_fdinfo_mnt_id(dir_fd, path, isempty(path) ? AT_EMPTY_PATH : 0, ret);
} }
int path_get_mnt_id_at(int dir_fd, const char *path, int *ret) { int path_get_mnt_id_at(int dir_fd, const char *path, int *ret) {
struct statx sx; struct statx sx;
int r;
assert(dir_fd >= 0 || dir_fd == AT_FDCWD); assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
assert(ret); assert(ret);
@ -224,12 +395,12 @@ int path_get_mnt_id_at(int dir_fd, const char *path, int *ret) {
&sx) < 0) &sx) < 0)
return -errno; return -errno;
r = statx_warn_mount_id(&sx, LOG_DEBUG); if (FLAGS_SET(sx.stx_mask, STATX_MNT_ID)) {
if (r < 0) *ret = sx.stx_mnt_id;
return r; return 0;
}
*ret = sx.stx_mnt_id; return path_get_mnt_id_at_fallback(dir_fd, path, ret);
return 0;
} }
bool fstype_is_network(const char *fstype) { bool fstype_is_network(const char *fstype) {

View File

@ -39,12 +39,13 @@ int name_to_handle_at_try_fid(int fd, const char *path, struct file_handle **ret
bool file_handle_equal(const struct file_handle *a, const struct file_handle *b); bool file_handle_equal(const struct file_handle *a, const struct file_handle *b);
int path_get_mnt_id_at_fallback(int dir_fd, const char *path, int *ret);
int path_get_mnt_id_at(int dir_fd, const char *path, int *ret); int path_get_mnt_id_at(int dir_fd, const char *path, int *ret);
static inline int path_get_mnt_id(const char *path, int *ret) { static inline int path_get_mnt_id(const char *path, int *ret) {
return path_get_mnt_id_at(AT_FDCWD, path, ret); return path_get_mnt_id_at(AT_FDCWD, path, ret);
} }
int is_mount_point_at(int dir_fd, const char *path, int flags); int is_mount_point_at(int fd, const char *filename, int flags);
int path_is_mount_point_full(const char *path, const char *root, int flags); int path_is_mount_point_full(const char *path, const char *root, int flags);
static inline int path_is_mount_point(const char *path) { static inline int path_is_mount_point(const char *path) {
return path_is_mount_point_full(path, NULL, 0); return path_is_mount_point_full(path, NULL, 0);

View File

@ -2119,14 +2119,15 @@ int posix_spawn_wrapper(
if (ERRNO_IS_NOT_SUPPORTED(r) && FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP) && cg_is_threaded(cgroup) > 0) if (ERRNO_IS_NOT_SUPPORTED(r) && FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP) && cg_is_threaded(cgroup) > 0)
return -EUCLEAN; /* clone3() could also return EOPNOTSUPP if the target cgroup is in threaded mode, return -EUCLEAN; /* clone3() could also return EOPNOTSUPP if the target cgroup is in threaded mode,
turn that into something recognizable */ turn that into something recognizable */
if ((ERRNO_IS_NOT_SUPPORTED(r) || ERRNO_IS_PRIVILEGE(r)) && if ((ERRNO_IS_NOT_SUPPORTED(r) || ERRNO_IS_PRIVILEGE(r) || r == E2BIG) &&
FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP)) { FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP)) {
/* Compiled on a newer host, or seccomp&friends blocking clone3()? Fallback, but /* Compiled on a newer host, or seccomp&friends blocking clone3()? Fallback, but
* need to disable POSIX_SPAWN_SETCGROUP, which is what redirects to clone3(). * need to disable POSIX_SPAWN_SETCGROUP, which is what redirects to clone3().
* CLONE_INTO_CGROUP definitely won't work, hence remember the fact so that we don't * Note that we might get E2BIG here since some kernels (e.g. 5.4) support clone3()
* retry every time. * but not CLONE_INTO_CGROUP. */
* Note, CLONE_INTO_CGROUP is supported since kernel v5.7, but some architectures still
* do not support clone3(). Hence, we need to keep the fallback logic for a while. */ /* CLONE_INTO_CGROUP definitely won't work, hence remember the fact so that we don't
* retry every time. */
have_clone_into_cgroup = false; have_clone_into_cgroup = false;
flags &= ~POSIX_SPAWN_SETCGROUP; flags &= ~POSIX_SPAWN_SETCGROUP;

View File

@ -68,6 +68,8 @@ static void fallback_random_bytes(void *p, size_t n) {
} }
void random_bytes(void *p, size_t n) { void random_bytes(void *p, size_t n) {
static bool have_grndinsecure = true;
assert(p || n == 0); assert(p || n == 0);
if (n == 0) if (n == 0)
@ -76,9 +78,15 @@ void random_bytes(void *p, size_t n) {
for (;;) { for (;;) {
ssize_t l; ssize_t l;
l = getrandom(p, n, GRND_INSECURE); l = getrandom(p, n, have_grndinsecure ? GRND_INSECURE : GRND_NONBLOCK);
if (l < 0 && errno == EINVAL && have_grndinsecure) {
/* No GRND_INSECURE; fallback to GRND_NONBLOCK. */
have_grndinsecure = false;
continue;
}
if (l <= 0) if (l <= 0)
break; /* Unexpected error. Give up and fallback to /dev/urandom. */ break; /* Will block (with GRND_NONBLOCK), or unexpected error. Give up and fallback
to /dev/urandom. */
if ((size_t) l == n) if ((size_t) l == n)
return; /* Done reading, success. */ return; /* Done reading, success. */

View File

@ -463,13 +463,8 @@ bool statx_inode_same(const struct statx *a, const struct statx *b) {
/* Same as stat_inode_same() but for struct statx */ /* Same as stat_inode_same() but for struct statx */
if (!statx_is_set(a) || !statx_is_set(b)) return statx_is_set(a) && statx_is_set(b) &&
return false; FLAGS_SET(a->stx_mask, STATX_TYPE|STATX_INO) && FLAGS_SET(b->stx_mask, STATX_TYPE|STATX_INO) &&
assert(FLAGS_SET(a->stx_mask, STATX_TYPE|STATX_INO));
assert(FLAGS_SET(b->stx_mask, STATX_TYPE|STATX_INO));
return
((a->stx_mode ^ b->stx_mode) & S_IFMT) == 0 && ((a->stx_mode ^ b->stx_mode) & S_IFMT) == 0 &&
a->stx_dev_major == b->stx_dev_major && a->stx_dev_major == b->stx_dev_major &&
a->stx_dev_minor == b->stx_dev_minor && a->stx_dev_minor == b->stx_dev_minor &&
@ -480,10 +475,13 @@ bool statx_mount_same(const struct statx *a, const struct statx *b) {
if (!statx_is_set(a) || !statx_is_set(b)) if (!statx_is_set(a) || !statx_is_set(b))
return false; return false;
assert(FLAGS_SET(a->stx_mask, STATX_MNT_ID)); /* if we have the mount ID, that's all we need */
assert(FLAGS_SET(b->stx_mask, STATX_MNT_ID)); if (FLAGS_SET(a->stx_mask, STATX_MNT_ID) && FLAGS_SET(b->stx_mask, STATX_MNT_ID))
return a->stx_mnt_id == b->stx_mnt_id;
return a->stx_mnt_id == b->stx_mnt_id; /* Otherwise, major/minor of backing device must match */
return a->stx_dev_major == b->stx_dev_major &&
a->stx_dev_minor == b->stx_dev_minor;
} }
int xstatfsat(int dir_fd, const char *path, struct statfs *ret) { int xstatfsat(int dir_fd, const char *path, struct statfs *ret) {
@ -574,25 +572,3 @@ mode_t inode_type_from_string(const char *s) {
return MODE_INVALID; return MODE_INVALID;
} }
int statx_warn_mount_root(const struct statx *sx, int log_level) {
assert(sx);
/* The STATX_ATTR_MOUNT_ROOT flag is supported since kernel v5.8. */
if (!FLAGS_SET(sx->stx_attributes_mask, STATX_ATTR_MOUNT_ROOT))
return log_full_errno(log_level, SYNTHETIC_ERRNO(ENOSYS),
"statx() did not set STATX_ATTR_MOUNT_ROOT, running on an old kernel?");
return 0;
}
int statx_warn_mount_id(const struct statx *sx, int log_level) {
assert(sx);
/* The STATX_MNT_ID flag is supported since kernel v5.10. */
if (!FLAGS_SET(sx->stx_mask, STATX_MNT_ID))
return log_full_errno(log_level, SYNTHETIC_ERRNO(ENOSYS),
"statx() does not support STATX_MNT_ID, running on an old kernel?");
return 0;
}

View File

@ -117,6 +117,3 @@ static inline bool inode_type_can_hardlink(mode_t m) {
* type). */ * type). */
return IN_SET(m & S_IFMT, S_IFSOCK, S_IFLNK, S_IFREG, S_IFBLK, S_IFCHR, S_IFIFO); return IN_SET(m & S_IFMT, S_IFSOCK, S_IFLNK, S_IFREG, S_IFBLK, S_IFCHR, S_IFIFO);
} }
int statx_warn_mount_root(const struct statx *sx, int log_level);
int statx_warn_mount_id(const struct statx *sx, int log_level);

View File

@ -3467,7 +3467,7 @@ static int is_extension_overlay(const char *path, int fd) {
fd = dfd; fd = dfd;
} }
r = is_mount_point_at(fd, /* path= */ NULL, /* flags= */ 0); r = is_mount_point_at(fd, /* filename= */ NULL, /* flags= */ 0);
if (r < 0) if (r < 0)
return log_debug_errno(r, "Unable to determine whether '%s' is a mount point: %m", path); return log_debug_errno(r, "Unable to determine whether '%s' is a mount point: %m", path);
if (r == 0) if (r == 0)

View File

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */ /* SPDX-License-Identifier: LGPL-2.1-or-later */
#pragma once #pragma once
#include_next <linux/bpf_insn.h> /* IWYU pragma: export */ #include_next <linux/bpf.h> /* IWYU pragma: export */
/* defined in linux/filter.h */ /* defined in linux/filter.h */
/* Unconditional jumps, goto pc + off16 */ /* Unconditional jumps, goto pc + off16 */

View File

@ -1113,7 +1113,7 @@ static int action_umount(sd_bus *bus, int argc, char **argv) {
if (fstat(fd, &st) < 0) if (fstat(fd, &st) < 0)
return log_error_errno(errno, "Can't stat '%s' (from %s): %m", p, argv[i]); return log_error_errno(errno, "Can't stat '%s' (from %s): %m", p, argv[i]);
r = is_mount_point_at(fd, /* path= */ NULL, /* flags= */ 0); r = is_mount_point_at(fd, /* filename= */ NULL, /* flags= */ 0);
fd = safe_close(fd); /* before continuing make sure the dir is not keeping anything busy */ fd = safe_close(fd); /* before continuing make sure the dir is not keeping anything busy */
if (r > 0) if (r > 0)
RET_GATHER(ret, stop_mounts(bus, p)); RET_GATHER(ret, stop_mounts(bus, p));

View File

@ -147,7 +147,7 @@ int mount_cgroups(const char *dest, bool accept_existing) {
if (r < 0) if (r < 0)
return log_error_errno(r, "Failed to chase %s/sys/fs/cgroup: %m", strempty(dest)); return log_error_errno(r, "Failed to chase %s/sys/fs/cgroup: %m", strempty(dest));
r = is_mount_point_at(fd, /* path= */ NULL, /* flags= */ 0); r = is_mount_point_at(fd, /* filename= */ NULL, /* flags= */ 0);
if (r < 0) if (r < 0)
return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p); return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p);
if (r > 0) { if (r > 0) {

View File

@ -268,7 +268,7 @@ static int verify_fsroot_dir(
bool searching = FLAGS_SET(flags, VERIFY_ESP_SEARCHING), bool searching = FLAGS_SET(flags, VERIFY_ESP_SEARCHING),
unprivileged_mode = FLAGS_SET(flags, VERIFY_ESP_UNPRIVILEGED_MODE); unprivileged_mode = FLAGS_SET(flags, VERIFY_ESP_UNPRIVILEGED_MODE);
_cleanup_free_ char *f = NULL; _cleanup_free_ char *f = NULL;
struct statx sx; struct statx sxa, sxb;
int r; int r;
/* Checks if the specified directory is at the root of its file system, and returns device /* Checks if the specified directory is at the root of its file system, and returns device
@ -287,30 +287,49 @@ static int verify_fsroot_dir(
if (statx(dir_fd, strempty(f), if (statx(dir_fd, strempty(f),
AT_SYMLINK_NOFOLLOW|(isempty(f) ? AT_EMPTY_PATH : 0), AT_SYMLINK_NOFOLLOW|(isempty(f) ? AT_EMPTY_PATH : 0),
STATX_TYPE|STATX_INO|STATX_MNT_ID, &sx) < 0) STATX_TYPE|STATX_INO|STATX_MNT_ID, &sxa) < 0)
return log_full_errno((searching && errno == ENOENT) || return log_full_errno((searching && errno == ENOENT) ||
(unprivileged_mode && ERRNO_IS_PRIVILEGE(errno)) ? LOG_DEBUG : LOG_ERR, errno, (unprivileged_mode && ERRNO_IS_PRIVILEGE(errno)) ? LOG_DEBUG : LOG_ERR, errno,
"Failed to determine block device node of \"%s\": %m", path); "Failed to determine block device node of \"%s\": %m", path);
if (!S_ISDIR(sx.stx_mode)) if (!S_ISDIR(sxa.stx_mode))
return log_error_errno(SYNTHETIC_ERRNO(ENOTDIR), "Path \"%s\" is not a directory", path); return log_error_errno(SYNTHETIC_ERRNO(ENOTDIR), "Path \"%s\" is not a directory", path);
r = statx_warn_mount_root(&sx, LOG_ERR); if (FLAGS_SET(sxa.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) {
if (r < 0)
return r;
if (!FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT)) /* If we have STATX_ATTR_MOUNT_ROOT, we are happy, that's all we need. We operate under the
* assumption that a top of a mount point is also the top of the file system. (Which of
* course is strictly speaking not always true...) */
if (!FLAGS_SET(sxa.stx_attributes, STATX_ATTR_MOUNT_ROOT))
return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
"Directory \"%s\" is not the root of the file system.", path);
goto success;
}
/* Now let's look at the parent */
if (statx(dir_fd, "", AT_EMPTY_PATH, STATX_TYPE|STATX_INO|STATX_MNT_ID, &sxb) < 0)
return log_full_errno(unprivileged_mode && ERRNO_IS_PRIVILEGE(errno) ? LOG_DEBUG : LOG_ERR, errno,
"Failed to determine block device node of parent of \"%s\": %m", path);
if (statx_inode_same(&sxa, &sxb)) /* for the root dir inode nr for both inodes will be the same */
goto success;
if (statx_mount_same(&sxa, &sxb))
return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV), SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
"Directory \"%s\" is not the root of the file system.", path); "Directory \"%s\" is not the root of the file system.", path);
success:
if (!ret_dev) if (!ret_dev)
return 0; return 0;
if (sx.stx_dev_major == 0) /* Hmm, maybe a btrfs device, and the caller asked for the backing device? Then let's try to get it. */ if (sxa.stx_dev_major == 0) /* Hmm, maybe a btrfs device, and the caller asked for the backing device? Then let's try to get it. */
return btrfs_get_block_device_at(dir_fd, strempty(f), ret_dev); return btrfs_get_block_device_at(dir_fd, strempty(f), ret_dev);
*ret_dev = makedev(sx.stx_dev_major, sx.stx_dev_minor); *ret_dev = makedev(sxa.stx_dev_major, sxa.stx_dev_minor);
return 0; return 0;
} }

View File

@ -49,20 +49,19 @@ static int cgroupfs_mount_options(int priority, const char *type, char **ret) {
assert(streq(type, "cgroup2")); assert(streq(type, "cgroup2"));
assert(ret); assert(ret);
/* memory_hugetlb_accounting mount option is since kernel v6.7 (8cba9576df601c384abd334a503c3f6e1e29eefb). */ _cleanup_free_ char *opts = NULL;
FOREACH_STRING(o, "memory_recursiveprot", "memory_hugetlb_accounting") {
r = mount_option_supported("cgroup2", "memory_hugetlb_accounting", /* value= */ NULL); r = mount_option_supported("cgroup2", o, /* value= */ NULL);
if (r <= 0) {
if (r < 0) if (r < 0)
log_full_errno(priority, r, "Failed to determine whether cgroupfs supports 'memory_hugetlb_accounting' mount option, assuming not: %m"); log_full_errno(priority, r, "Failed to determine whether cgroupfs supports '%s' mount option, assuming not: %m", o);
else else if (r == 0)
log_debug("'memory_hugetlb_accounting' not supported by cgroupfs, not using mount option."); log_debug("'%s' not supported by cgroupfs, not using mount option.", o);
else if (!strextend_with_separator(&opts, ",", o))
*ret = NULL; return log_oom_full(priority);
return 0;
} }
return strdup_to(ret, "memory_hugetlb_accounting"); *ret = TAKE_PTR(opts);
return 0;
} }
int mount_cgroupfs(const char *path) { int mount_cgroupfs(const char *path) {
@ -82,7 +81,7 @@ int mount_cgroupfs(const char *path) {
return r; return r;
/* These options shall be kept in sync with those in mount_table below. */ /* These options shall be kept in sync with those in mount_table below. */
if (!strprepend_with_separator(&opts, ",", "nsdelegate,memory_recursiveprot")) if (!strprepend_with_separator(&opts, ",", "nsdelegate"))
return log_oom(); return log_oom();
return mount_nofollow_verbose(LOG_ERR, "cgroup2", path, "cgroup2", MS_NOSUID|MS_NOEXEC|MS_NODEV, opts); return mount_nofollow_verbose(LOG_ERR, "cgroup2", path, "cgroup2", MS_NOSUID|MS_NOEXEC|MS_NODEV, opts);
@ -200,7 +199,7 @@ static const MountPoint mount_table[] = {
.what = "cgroup2", .what = "cgroup2",
.where = "/sys/fs/cgroup", .where = "/sys/fs/cgroup",
.type = "cgroup2", .type = "cgroup2",
.options = "nsdelegate,memory_recursiveprot", .options = "nsdelegate",
.options_fn = cgroupfs_mount_options, .options_fn = cgroupfs_mount_options,
.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV, .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
.mode = MNT_FATAL|MNT_IN_CONTAINER|MNT_CHECK_WRITABLE, .mode = MNT_FATAL|MNT_IN_CONTAINER|MNT_CHECK_WRITABLE,

View File

@ -343,13 +343,19 @@ TEST(close_all_fds) {
_exit(EXIT_SUCCESS); _exit(EXIT_SUCCESS);
} }
r = ASSERT_OK(pidref_safe_fork("(caf-nomalloc)", flags, NULL)); ASSERT_OK(r = pidref_safe_fork("(caf-nomalloc)", flags, NULL));
if (r == 0) { if (r == 0) {
test_close_all_fds_inner(close_all_fds_without_malloc); test_close_all_fds_inner(close_all_fds_without_malloc);
_exit(EXIT_SUCCESS); _exit(EXIT_SUCCESS);
} }
r = ASSERT_OK(pidref_safe_fork("(caf-frugal)", flags, NULL)); ASSERT_OK(r = pidref_safe_fork("(caf-proc)", flags, NULL));
if (r == 0) {
test_close_all_fds_inner(close_all_fds_by_proc);
_exit(EXIT_SUCCESS);
}
ASSERT_OK(r = pidref_safe_fork("(caf-frugal)", flags, NULL));
if (r == 0) { if (r == 0) {
test_close_all_fds_inner(close_all_fds_frugal); test_close_all_fds_inner(close_all_fds_frugal);
_exit(EXIT_SUCCESS); _exit(EXIT_SUCCESS);

View File

@ -283,16 +283,18 @@ TEST(is_mount_point_at) {
fd = open("/", O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY); fd = open("/", O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY);
assert_se(fd >= 0); assert_se(fd >= 0);
ASSERT_OK_POSITIVE(is_mount_point_at(fd, "/", /* flags= */ 0)); /* Not allowed, since "/" is a path, not a plain filename */
ASSERT_OK_POSITIVE(is_mount_point_at(fd, "..", /* flags= */ 0)); assert_se(is_mount_point_at(fd, "/", 0) == -EINVAL);
ASSERT_OK_POSITIVE(is_mount_point_at(fd, "../", /* flags= */ 0)); assert_se(is_mount_point_at(fd, "..", 0) == -EINVAL);
r = ASSERT_OK(proc_mounted()); assert_se(is_mount_point_at(fd, "../", 0) == -EINVAL);
ASSERT_OK_EQ(is_mount_point_at(fd, "/proc", /* flags= */ 0), r); assert_se(is_mount_point_at(fd, "/proc", 0) == -EINVAL);
ASSERT_OK_EQ(is_mount_point_at(fd, "/proc/", /* flags= */ 0), r); assert_se(is_mount_point_at(fd, "/proc/", 0) == -EINVAL);
ASSERT_OK_EQ(is_mount_point_at(fd, "proc", /* flags= */ 0), r); assert_se(is_mount_point_at(fd, "proc/sys", 0) == -EINVAL);
ASSERT_OK_EQ(is_mount_point_at(fd, "proc/", /* flags= */ 0), r); assert_se(is_mount_point_at(fd, "proc/sys/", 0) == -EINVAL);
ASSERT_OK_ZERO(is_mount_point_at(fd, "usr/lib", /* flags= */ 0));
ASSERT_OK_ZERO(is_mount_point_at(fd, "usr/lib", /* flags= */ 0)); /* This one definitely is a mount point */
assert_se(is_mount_point_at(fd, "proc", 0) > 0);
assert_se(is_mount_point_at(fd, "proc/", 0) > 0);
safe_close(fd); safe_close(fd);
fd = open("/tmp", O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY); fd = open("/tmp", O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY);

View File

@ -559,7 +559,7 @@ static int opendir_and_stat(
bool *ret_mountpoint) { bool *ret_mountpoint) {
_cleanup_closedir_ DIR *d = NULL; _cleanup_closedir_ DIR *d = NULL;
struct statx sx; struct statx sx1;
int r; int r;
assert(path); assert(path);
@ -586,16 +586,21 @@ static int opendir_and_stat(
return 0; return 0;
} }
if (statx(dirfd(d), "", AT_EMPTY_PATH, STATX_MODE|STATX_INO|STATX_ATIME|STATX_MTIME, &sx) < 0) if (statx(dirfd(d), "", AT_EMPTY_PATH, STATX_MODE|STATX_INO|STATX_ATIME|STATX_MTIME, &sx1) < 0)
return log_error_errno(errno, "statx(%s) failed: %m", path); return log_error_errno(errno, "statx(%s) failed: %m", path);
r = statx_warn_mount_root(&sx, LOG_ERR); if (FLAGS_SET(sx1.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT))
if (r < 0) *ret_mountpoint = FLAGS_SET(sx1.stx_attributes, STATX_ATTR_MOUNT_ROOT);
return r; else {
struct statx sx2;
if (statx(dirfd(d), "..", 0, STATX_INO, &sx2) < 0)
return log_error_errno(errno, "statx(%s/..) failed: %m", path);
*ret_mountpoint = !statx_mount_same(&sx1, &sx2);
}
*ret_mountpoint = FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT);
*ret = TAKE_PTR(d); *ret = TAKE_PTR(d);
*ret_sx = sx; *ret_sx = sx1;
return 1; return 1;
} }
@ -708,13 +713,35 @@ static int dir_cleanup(
continue; continue;
} }
r = statx_warn_mount_root(&sx, LOG_ERR); if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) {
if (r < 0) /* Yay, we have the mount point API, use it */
return r; if (FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT)) {
log_debug("Ignoring \"%s/%s\": different mount points.", p, de->d_name);
continue;
}
} else {
/* So we might have statx() but the STATX_ATTR_MOUNT_ROOT flag is not supported, fall
* back to traditional stx_dev checking. */
if (sx.stx_dev_major != rootdev_major ||
sx.stx_dev_minor != rootdev_minor) {
log_debug("Ignoring \"%s/%s\": different filesystem.", p, de->d_name);
continue;
}
if (FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT)) { /* Try to detect bind mounts of the same filesystem instance; they do not differ in
log_debug("Ignoring \"%s/%s\": different mount points.", p, de->d_name); * device major/minors. This type of query is not supported on all kernels or
continue; * filesystem types though. */
if (S_ISDIR(sx.stx_mode)) {
int q;
q = is_mount_point_at(dirfd(d), de->d_name, 0);
if (q < 0)
log_debug_errno(q, "Failed to determine whether \"%s/%s\" is a mount point, ignoring: %m", p, de->d_name);
else if (q > 0) {
log_debug("Ignoring \"%s/%s\": different mount of the same filesystem.", p, de->d_name);
continue;
}
}
} }
atime_nsec = FLAGS_SET(sx.stx_mask, STATX_ATIME) ? statx_timestamp_load_nsec(&sx.stx_atime) : 0; atime_nsec = FLAGS_SET(sx.stx_mask, STATX_ATIME) ? statx_timestamp_load_nsec(&sx.stx_atime) : 0;

View File

@ -426,7 +426,7 @@ static int run(int argc, char *argv[]) {
if (target_fd < 0) if (target_fd < 0)
return log_error_errno(target_fd, "Failed to open directory '%s': %m", arg_target); return log_error_errno(target_fd, "Failed to open directory '%s': %m", arg_target);
r = is_mount_point_at(target_fd, /* path= */ NULL, /* flags= */ 0); r = is_mount_point_at(target_fd, /* filename= */ NULL, /* flags= */ 0);
if (r < 0) if (r < 0)
return log_error_errno(r, "Failed to determine whether '%s' is a mount point: %m", resolved); return log_error_errno(r, "Failed to determine whether '%s' is a mount point: %m", resolved);
if (!r) if (!r)