1
0
mirror of https://github.com/systemd/systemd synced 2026-03-14 00:54:46 +01:00

Compare commits

..

No commits in common. "23c6ab3971eb22d9c656165e8c6314135df0ee6c" and "8b6d8ec66fd65e2a4bd92257e6545b696e415f8c" have entirely different histories.

28 changed files with 419 additions and 663 deletions

2
README
View File

@ -72,11 +72,9 @@ REQUIREMENTS:
and MOVE_MOUNT_BENEATH and MOVE_MOUNT_BENEATH
≥ 6.6 for quota support on tmpfs ≥ 6.6 for quota support on tmpfs
≥ 6.7 for cgroup2fs memory_hugetlb_accounting option ≥ 6.7 for cgroup2fs memory_hugetlb_accounting option
≥ 6.8 for STATX_MNT_ID_UNIQUE
≥ 6.9 for pidfs ≥ 6.9 for pidfs
≥ 6.10 for fcntl(F_DUPFD_QUERY), unprivileged linkat(AT_EMPTY_PATH), ≥ 6.10 for fcntl(F_DUPFD_QUERY), unprivileged linkat(AT_EMPTY_PATH),
and block device 'partscan' sysfs attribute and block device 'partscan' sysfs attribute
≥ 6.12 for AT_HANDLE_MNT_ID_UNIQUE
≥ 6.13 for PIDFD_GET_INFO and {set,remove}xattrat() ≥ 6.13 for PIDFD_GET_INFO and {set,remove}xattrat()
≥ 6.16 for coredump pattern '%F' (pidfd) specifier and SO_PASSRIGHTS ≥ 6.16 for coredump pattern '%F' (pidfd) specifier and SO_PASSRIGHTS

34
TODO
View File

@ -118,36 +118,6 @@ Deprecations and removals:
Features: Features:
* mountfsd: when looking for non-foreign uid owned inodes up the tree, stop at
mounts
* downgrade the uid/gid disposition enforcement in udev
* have a signal that reloads every unit that supports reloading
* systemd: add storage API via varlink, where everyone can drop a socket in a
dir, similar, do the same thing for networking
* do a console daemon that takes stdio fds for services and allows to reconnect
to them later
* report: have something that requests cloud workload identity bearer tokens
and includes it in the report
* sysupdate: download multiple arbitrary patterns from same source
* sysupdate: SHA256SUMS format with bearer tokens for each resource to download
* sysupdate: decrypt SHA256SUMS with key from tpm
* sysupdate: clean up stuff on disk that disappears from SHA256SUMS
* sysupdate: turn http backend stuff int plugin via varlink
* add new tool that can be used in debug mode runs in very early boot,
generates a random password, passes it as credential to sysusers for the root
user, then displays it on screen. people can use this to remotely log in.
* Maybe introducean InodeRef structure inspired by PidRef, which references a * Maybe introducean InodeRef structure inspired by PidRef, which references a
specific inode, and combines: a path, an O_PATH fd, and possibly a FID into specific inode, and combines: a path, an O_PATH fd, and possibly a FID into
one. Why? We often pass around path and fd separately in chaseat() and similar one. Why? We often pass around path and fd separately in chaseat() and similar
@ -559,6 +529,10 @@ Features:
- add support to export-fs, import-fs - add support to export-fs, import-fs
- systemd-dissect should learn mappings, too, when doing mtree and such - systemd-dissect should learn mappings, too, when doing mtree and such
* format-table: introduce new cell type for strings with ansi sequences in
them. display them in regular output mode (via strip_tab_ansi()), but
suppress them in json mode.
* resolved: report ttl in resolution replies if we know it. This data is useful * resolved: report ttl in resolution replies if we know it. This data is useful
for tools such as wireguard which want to periodically re-resolve DNS names, for tools such as wireguard which want to periodically re-resolve DNS names,
and might want to use the TTL has hint for that. and might want to use the TTL has hint for that.

View File

@ -1077,53 +1077,34 @@ int path_is_root_at(int dir_fd, const char *path) {
} }
int fds_are_same_mount(int fd1, int fd2) { int fds_are_same_mount(int fd1, int fd2) {
struct statx sx1, sx2; struct statx sx1 = {}, sx2 = {}; /* explicitly initialize the struct to make msan silent. */
int r;
assert(fd1 >= 0 || IN_SET(fd1, AT_FDCWD, XAT_FDROOT)); assert(fd1 >= 0 || IN_SET(fd1, AT_FDCWD, XAT_FDROOT));
assert(fd2 >= 0 || IN_SET(fd2, AT_FDCWD, XAT_FDROOT)); assert(fd2 >= 0 || IN_SET(fd2, AT_FDCWD, XAT_FDROOT));
r = xstatx(fd1, /* path = */ NULL, AT_EMPTY_PATH, const char *fn1;
STATX_TYPE|STATX_INO|STATX_MNT_ID, if (fd1 == XAT_FDROOT) {
&sx1); fd1 = AT_FDCWD;
if (r < 0) fn1 = "/";
return r; } else
fn1 = "";
r = xstatx(fd2, /* path = */ NULL, AT_EMPTY_PATH, if (statx(fd1, fn1, AT_EMPTY_PATH, STATX_TYPE|STATX_INO|STATX_MNT_ID, &sx1) < 0)
STATX_TYPE|STATX_INO|STATX_MNT_ID, return -errno;
&sx2);
if (r < 0) const char *fn2;
return r; if (fd2 == XAT_FDROOT) {
fd2 = AT_FDCWD;
fn2 = "/";
} else
fn2 = "";
if (statx(fd2, fn2, AT_EMPTY_PATH, STATX_TYPE|STATX_INO|STATX_MNT_ID, &sx2) < 0)
return -errno;
return statx_inode_same(&sx1, &sx2) && statx_mount_same(&sx1, &sx2); return statx_inode_same(&sx1, &sx2) && statx_mount_same(&sx1, &sx2);
} }
int resolve_xat_fdroot(int *fd, const char **path, char **ret_buffer) {
assert(fd);
assert(path);
assert(ret_buffer);
if (*fd != XAT_FDROOT) {
*ret_buffer = NULL;
return 0;
}
if (isempty(*path)) {
*path = "/";
*ret_buffer = NULL;
} else if (!path_is_absolute(*path)) {
char *p = strjoin("/", *path);
if (!p)
return -ENOMEM;
*path = *ret_buffer = p;
}
*fd = AT_FDCWD;
return 1;
}
char* format_proc_fd_path(char buf[static PROC_FD_PATH_MAX], int fd) { char* format_proc_fd_path(char buf[static PROC_FD_PATH_MAX], int fd) {
assert(buf); assert(buf);
assert(fd >= 0); assert(fd >= 0);

View File

@ -181,8 +181,6 @@ static inline int dir_fd_is_root_or_cwd(int dir_fd) {
int fds_are_same_mount(int fd1, int fd2); int fds_are_same_mount(int fd1, int fd2);
int resolve_xat_fdroot(int *fd, const char **path, char **ret_buffer);
/* The maximum length a buffer for a /proc/self/fd/<fd> path needs */ /* The maximum length a buffer for a /proc/self/fd/<fd> path needs */
#define PROC_FD_PATH_MAX \ #define PROC_FD_PATH_MAX \
(STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)) (STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int))

View File

@ -1,10 +0,0 @@
#!/usr/bin/env bash
# SPDX-License-Identifier: LGPL-2.1-or-later
set -eu
set -o pipefail
CC=${1:?}
shift
$CC -E -dM -include linux/stat.h "$@" - </dev/null | \
awk '/^#define[ \t]+STATX_ATTR_[A-Z][A-Z_]*[ \t]+/ { print $2; }'

View File

@ -1,11 +0,0 @@
#!/usr/bin/env bash
# SPDX-License-Identifier: LGPL-2.1-or-later
set -eu
set -o pipefail
CC=${1:?}
shift
$CC -E -dM -include linux/stat.h "$@" - </dev/null | \
grep -Ev '^#define[[:space:]]+(STATX_BASIC_STATS|STATX_ALL|STATX_ATTR_)' | \
awk '/^#define[ \t]+STATX_[A-Z][A-Z_]*[ \t]+/ { print $2; }'

View File

@ -125,12 +125,10 @@ sources += basic_sources
generated_gperf_headers = [] generated_gperf_headers = []
foreach item : [ foreach item : [
# name, source, prefix, headers # name, source, prefix, headers
['af', af_sources, '', ['<sys/socket.h>'], ], ['af', af_sources, '', ['<sys/socket.h>'], ],
['arphrd', arphrd_sources, 'ARPHRD_', ['<linux/if_arp.h>'], ], ['arphrd', arphrd_sources, 'ARPHRD_', ['<linux/if_arp.h>'], ],
['capability', capability_sources, '', ['<linux/capability.h>'], ], ['capability', capability_sources, '', ['<linux/capability.h>'], ],
['errno', [], '', ['<errno.h>'], ], ['errno', [], '', ['<errno.h>'], ],
['statx-mask', statx_sources ],
['statx-attribute', statx_sources ],
] ]
generate_list = files('generate-@0@-list.sh'.format(item[0])) generate_list = files('generate-@0@-list.sh'.format(item[0]))
@ -140,35 +138,31 @@ foreach item : [
command : [env, 'bash', generate_list, cpp, system_include_args], command : [env, 'bash', generate_list, cpp, system_include_args],
capture : true) capture : true)
gperf_file = custom_target(
input : list_txt,
output : '@0@-from-name.gperf'.format(item[0]),
command : [generate_gperfs, item[0], item[2], '@INPUT@'] + item[3],
capture : true)
target1 = custom_target(
input : gperf_file,
output : '@0@-from-name.inc'.format(item[0]),
command : [gperf,
'-L', 'ANSI-C', '-t', '--ignore-case',
'-N', 'lookup_@0@'.format(item[0]),
'-H', 'hash_@0@_name'.format(item[0]),
'-p', '-C',
'@INPUT@'],
capture : true)
awkscript = '@0@-to-name.awk'.format(item[0]) awkscript = '@0@-to-name.awk'.format(item[0])
target = custom_target( target2 = custom_target(
input : [awkscript, list_txt], input : [awkscript, list_txt],
output : '@0@-to-name.inc'.format(item[0]), output : '@0@-to-name.inc'.format(item[0]),
command : [awk, '-f', '@INPUT0@', '@INPUT1@'], command : [awk, '-f', '@INPUT0@', '@INPUT1@'],
capture : true) capture : true)
generated_gperf_headers += [target] generated_gperf_headers += [target1, target2]
if item.length() > 2
gperf_file = custom_target(
input : list_txt,
output : '@0@-from-name.gperf'.format(item[0]),
command : [generate_gperfs, item[0], item[2], '@INPUT@'] + item[3],
capture : true)
target = custom_target(
input : gperf_file,
output : '@0@-from-name.inc'.format(item[0]),
command : [gperf,
'-L', 'ANSI-C', '-t', '--ignore-case',
'-N', 'lookup_@0@'.format(item[0]),
'-H', 'hash_@0@_name'.format(item[0]),
'-p', '-C',
'@INPUT@'],
capture : true)
generated_gperf_headers += [target]
endif
endforeach endforeach
generated_sources += generated_gperf_headers generated_sources += generated_gperf_headers

View File

@ -53,29 +53,27 @@ int name_to_handle_at_loop(
uint64_t *ret_unique_mnt_id, uint64_t *ret_unique_mnt_id,
int flags) { int flags) {
int r; size_t n = ORIGINAL_MAX_HANDLE_SZ;
assert(fd >= 0 || fd == AT_FDCWD); assert(fd >= 0 || fd == AT_FDCWD);
assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH|AT_HANDLE_FID)) == 0); assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH|AT_HANDLE_FID|AT_HANDLE_MNT_ID_UNIQUE)) == 0);
/* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
* buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
* start value, it is not an upper bound on the buffer size required. * start value, it is not an upper bound on the buffer size required.
* *
* This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
* as NULL if there's no interest in either. * as NULL if there's no interest in either. */
*
* If unique mount id is requested via ret_unique_mnt_id, try AT_HANDLE_MNT_ID_UNIQUE flag first
* (needs kernel v6.12), and fall back to statx() if not supported. If neither worked, and caller
* also specifies ret_mnt_id, then the old-style mount id is returned, -EUNATCH otherwise. */
if (isempty(path)) { if (isempty(path)) {
flags |= AT_EMPTY_PATH; flags |= AT_EMPTY_PATH;
path = ""; path = "";
} }
for (size_t n = ORIGINAL_MAX_HANDLE_SZ;;) { for (;;) {
_cleanup_free_ struct file_handle *h = NULL; _cleanup_free_ struct file_handle *h = NULL;
int mnt_id = -1, r;
uint64_t unique_mnt_id = 0;
h = malloc0(offsetof(struct file_handle, f_handle) + n); h = malloc0(offsetof(struct file_handle, f_handle) + n);
if (!h) if (!h)
@ -83,61 +81,18 @@ int name_to_handle_at_loop(
h->handle_bytes = n; h->handle_bytes = n;
if (ret_unique_mnt_id) { if (FLAGS_SET(flags, AT_HANDLE_MNT_ID_UNIQUE))
uint64_t mnt_id;
/* The kernel will still use this as uint64_t pointer */ /* The kernel will still use this as uint64_t pointer */
r = name_to_handle_at(fd, path, h, (int *) &mnt_id, flags|AT_HANDLE_MNT_ID_UNIQUE); r = name_to_handle_at(fd, path, h, (int *) &unique_mnt_id, flags);
if (r >= 0) { else
if (ret_handle) r = name_to_handle_at(fd, path, h, &mnt_id, flags);
*ret_handle = TAKE_PTR(h);
*ret_unique_mnt_id = mnt_id;
if (ret_mnt_id)
*ret_mnt_id = -1;
return 1;
}
if (errno == EOVERFLOW)
goto grow;
if (errno != EINVAL)
return -errno;
}
int mnt_id;
r = name_to_handle_at(fd, path, h, &mnt_id, flags);
if (r >= 0) { if (r >= 0) {
if (ret_unique_mnt_id) {
/* Hmm, AT_HANDLE_MNT_ID_UNIQUE is not supported? Let's try to acquire
* the unique mount id from statx() then, which has a slightly lower
* kernel version requirement (6.8 vs 6.12). */
struct statx sx;
r = xstatx(fd, path,
at_flags_normalize_nofollow(flags & (AT_SYMLINK_FOLLOW|AT_EMPTY_PATH))|AT_STATX_DONT_SYNC,
STATX_MNT_ID_UNIQUE,
&sx);
if (r >= 0) {
if (ret_handle)
*ret_handle = TAKE_PTR(h);
*ret_unique_mnt_id = sx.stx_mnt_id;
if (ret_mnt_id)
*ret_mnt_id = -1;
return 1;
}
if (r != -EUNATCH || !ret_mnt_id)
return r;
*ret_unique_mnt_id = 0;
}
if (ret_handle) if (ret_handle)
*ret_handle = TAKE_PTR(h); *ret_handle = TAKE_PTR(h);
if (ret_unique_mnt_id)
*ret_unique_mnt_id = unique_mnt_id;
if (ret_mnt_id) if (ret_mnt_id)
*ret_mnt_id = mnt_id; *ret_mnt_id = mnt_id;
@ -146,7 +101,19 @@ int name_to_handle_at_loop(
if (errno != EOVERFLOW) if (errno != EOVERFLOW)
return -errno; return -errno;
grow: if (!ret_handle && ((ret_mnt_id && mnt_id >= 0) || (ret_unique_mnt_id && unique_mnt_id > 0))) {
/* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
* buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
* be filled in, and the caller was interested in only the mount ID an nothing else. */
if (ret_unique_mnt_id)
*ret_unique_mnt_id = unique_mnt_id;
if (ret_mnt_id)
*ret_mnt_id = mnt_id;
return 0;
}
/* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by
* something else (apparently EOVERFLOW is returned for untriggered nfs4 autofs mounts * something else (apparently EOVERFLOW is returned for untriggered nfs4 autofs mounts
* sometimes), not by the too small buffer. In that case propagate EOVERFLOW */ * sometimes), not by the too small buffer. In that case propagate EOVERFLOW */
@ -167,7 +134,6 @@ int name_to_handle_at_try_fid(
const char *path, const char *path,
struct file_handle **ret_handle, struct file_handle **ret_handle,
int *ret_mnt_id, int *ret_mnt_id,
uint64_t *ret_unique_mnt_id,
int flags) { int flags) {
int r; int r;
@ -178,11 +144,55 @@ int name_to_handle_at_try_fid(
* we'll try without the flag, in order to support older kernels that didn't have AT_HANDLE_FID * we'll try without the flag, in order to support older kernels that didn't have AT_HANDLE_FID
* (i.e. older than Linux 6.5). */ * (i.e. older than Linux 6.5). */
r = name_to_handle_at_loop(fd, path, ret_handle, ret_mnt_id, ret_unique_mnt_id, flags | AT_HANDLE_FID); r = name_to_handle_at_loop(fd, path, ret_handle, ret_mnt_id, /* ret_unique_mnt_id= */ NULL, flags | AT_HANDLE_FID);
if (r >= 0 || is_name_to_handle_at_fatal_error(r)) if (r >= 0 || is_name_to_handle_at_fatal_error(r))
return r; return r;
return name_to_handle_at_loop(fd, path, ret_handle, ret_mnt_id, ret_unique_mnt_id, flags & ~AT_HANDLE_FID); return name_to_handle_at_loop(fd, path, ret_handle, ret_mnt_id, /* ret_unique_mnt_id= */ NULL, flags & ~AT_HANDLE_FID);
}
int name_to_handle_at_try_unique_mntid_fid(
int fd,
const char *path,
struct file_handle **ret_handle,
uint64_t *ret_mnt_id,
int flags) {
int mnt_id = -1, r;
assert(fd >= 0 || fd == AT_FDCWD);
/* First issues name_to_handle_at() with AT_HANDLE_MNT_ID_UNIQUE and AT_HANDLE_FID.
* If this fails and this is not a fatal error we'll try without the
* AT_HANDLE_MNT_ID_UNIQUE flag because it's only available from Linux 6.12 onwards. */
r = name_to_handle_at_loop(fd, path, ret_handle, /* ret_mnt_id= */ NULL, ret_mnt_id, flags | AT_HANDLE_MNT_ID_UNIQUE | AT_HANDLE_FID);
if (r >= 0 || is_name_to_handle_at_fatal_error(r))
return r;
flags &= ~AT_HANDLE_MNT_ID_UNIQUE;
/* Then issues name_to_handle_at() with AT_HANDLE_FID. If this fails and this is not a fatal error
* we'll try without the flag, in order to support older kernels that didn't have AT_HANDLE_FID
* (i.e. older than Linux 6.5). */
r = name_to_handle_at_loop(fd, path, ret_handle, &mnt_id, /* ret_unique_mnt_id= */ NULL, flags | AT_HANDLE_FID);
if (r < 0 && is_name_to_handle_at_fatal_error(r))
return r;
if (r >= 0) {
if (ret_mnt_id && mnt_id >= 0) {
/* See if we can do better because statx can do unique mount IDs since Linux 6.8
* and only if this doesn't work we use the non-unique mnt_id as returned. */
if (path_get_unique_mnt_id_at(fd, path, ret_mnt_id) < 0)
*ret_mnt_id = mnt_id;
}
return r;
}
r = name_to_handle_at_loop(fd, path, ret_handle, &mnt_id, /* ret_unique_mnt_id= */ NULL, flags & ~AT_HANDLE_FID);
if (ret_mnt_id && mnt_id >= 0)
*ret_mnt_id = mnt_id;
return r;
} }
int name_to_handle_at_u64(int fd, const char *path, uint64_t *ret) { int name_to_handle_at_u64(int fd, const char *path, uint64_t *ret) {
@ -236,24 +246,31 @@ struct file_handle* file_handle_dup(const struct file_handle *fh) {
int is_mount_point_at(int dir_fd, const char *path, int flags) { int is_mount_point_at(int dir_fd, const char *path, int flags) {
int r; int r;
assert(dir_fd >= 0 || IN_SET(dir_fd, AT_FDCWD, XAT_FDROOT)); assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
assert((flags & ~AT_SYMLINK_FOLLOW) == 0); assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
if (path_equal(path, "/")) if (path_equal(path, "/"))
return true; return true;
if (dir_fd == XAT_FDROOT && isempty(path)) if (isempty(path)) {
return true; if (dir_fd == AT_FDCWD)
path = ".";
else {
flags |= AT_EMPTY_PATH;
path = "";
}
}
struct statx sx; struct statx sx = {}; /* explicitly initialize the struct to make msan silent. */
r = xstatx_full(dir_fd, path, if (statx(dir_fd, path,
at_flags_normalize_nofollow(flags) | at_flags_normalize_nofollow(flags) |
AT_NO_AUTOMOUNT | /* don't trigger automounts mounts are a local concept, hence no need to trigger automounts to determine STATX_ATTR_MOUNT_ROOT */ AT_NO_AUTOMOUNT | /* don't trigger automounts mounts are a local concept, hence no need to trigger automounts to determine STATX_ATTR_MOUNT_ROOT */
AT_STATX_DONT_SYNC, /* don't go to the network for this for similar reasons */ AT_STATX_DONT_SYNC, /* don't go to the network for this for similar reasons */
STATX_TYPE|STATX_INO, STATX_TYPE|STATX_INO,
/* optional_mask = */ 0, &sx) < 0)
STATX_ATTR_MOUNT_ROOT, return -errno;
&sx);
r = statx_warn_mount_root(&sx, LOG_DEBUG);
if (r < 0) if (r < 0)
return r; return r;
@ -290,19 +307,23 @@ int path_is_mount_point_full(const char *path, const char *root, int flags) {
return is_mount_point_at(dir_fd, /* path= */ NULL, flags); return is_mount_point_at(dir_fd, /* path= */ NULL, flags);
} }
static int path_get_mnt_id_at_internal(int dir_fd, const char *path, bool unique, uint64_t *ret) { int path_get_mnt_id_at(int dir_fd, const char *path, int *ret) {
struct statx sx; struct statx sx;
int r; int r;
assert(dir_fd >= 0 || IN_SET(dir_fd, AT_FDCWD, XAT_FDROOT)); assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
assert(ret); assert(ret);
r = xstatx(dir_fd, path, if (statx(dir_fd,
AT_SYMLINK_NOFOLLOW | strempty(path),
AT_NO_AUTOMOUNT | /* don't trigger automounts, mnt_id is a local concept */ (isempty(path) ? AT_EMPTY_PATH : AT_SYMLINK_NOFOLLOW) |
AT_STATX_DONT_SYNC, /* don't go to the network, mnt_id is a local concept */ AT_NO_AUTOMOUNT | /* don't trigger automounts, mnt_id is a local concept */
unique ? STATX_MNT_ID_UNIQUE : STATX_MNT_ID, AT_STATX_DONT_SYNC, /* don't go to the network, mnt_id is a local concept */
&sx); STATX_MNT_ID,
&sx) < 0)
return -errno;
r = statx_warn_mount_id(&sx, LOG_DEBUG);
if (r < 0) if (r < 0)
return r; return r;
@ -310,21 +331,26 @@ static int path_get_mnt_id_at_internal(int dir_fd, const char *path, bool unique
return 0; return 0;
} }
int path_get_mnt_id_at(int dir_fd, const char *path, int *ret) {
uint64_t mnt_id;
int r;
r = path_get_mnt_id_at_internal(dir_fd, path, /* unique = */ false, &mnt_id);
if (r < 0)
return r;
assert(mnt_id <= INT_MAX);
*ret = (int) mnt_id;
return 0;
}
int path_get_unique_mnt_id_at(int dir_fd, const char *path, uint64_t *ret) { int path_get_unique_mnt_id_at(int dir_fd, const char *path, uint64_t *ret) {
return path_get_mnt_id_at_internal(dir_fd, path, /* unique = */ true, ret); struct statx sx;
assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
assert(ret);
if (statx(dir_fd,
strempty(path),
(isempty(path) ? AT_EMPTY_PATH : AT_SYMLINK_NOFOLLOW) |
AT_NO_AUTOMOUNT | /* don't trigger automounts, mnt_id is a local concept */
AT_STATX_DONT_SYNC, /* don't go to the network, mnt_id is a local concept */
STATX_MNT_ID_UNIQUE,
&sx) < 0)
return -errno;
if (!FLAGS_SET(sx.stx_mask, STATX_MNT_ID_UNIQUE))
return -EOPNOTSUPP;
*ret = sx.stx_mnt_id;
return 0;
} }
bool fstype_is_network(const char *fstype) { bool fstype_is_network(const char *fstype) {

View File

@ -35,7 +35,8 @@
bool is_name_to_handle_at_fatal_error(int err); bool is_name_to_handle_at_fatal_error(int err);
int name_to_handle_at_loop(int fd, const char *path, struct file_handle **ret_handle, int *ret_mnt_id, uint64_t *ret_unique_mnt_id, int flags); int name_to_handle_at_loop(int fd, const char *path, struct file_handle **ret_handle, int *ret_mnt_id, uint64_t *ret_unique_mnt_id, int flags);
int name_to_handle_at_try_fid(int fd, const char *path, struct file_handle **ret_handle, int *ret_mnt_id, uint64_t *ret_unique_mnt_id, int flags); int name_to_handle_at_try_fid(int fd, const char *path, struct file_handle **ret_handle, int *ret_mnt_id, int flags);
int name_to_handle_at_try_unique_mntid_fid(int fd, const char *path, struct file_handle **ret_handle, uint64_t *ret_mnt_id, int flags);
int name_to_handle_at_u64(int fd, const char *path, uint64_t *ret); int name_to_handle_at_u64(int fd, const char *path, uint64_t *ret);
static inline int path_to_handle_u64(const char *path, uint64_t *ret) { static inline int path_to_handle_u64(const char *path, uint64_t *ret) {
return name_to_handle_at_u64(AT_FDCWD, path, ret); return name_to_handle_at_u64(AT_FDCWD, path, ret);

View File

@ -618,7 +618,7 @@ int userns_enter_and_pin(int userns_fd, PidRef *ret) {
"(sd-pinuserns)", "(sd-pinuserns)",
/* stdio_fds= */ NULL, /* stdio_fds= */ NULL,
(int[]) { pfd[1], userns_fd }, 2, (int[]) { pfd[1], userns_fd }, 2,
FORK_CLOSE_ALL_FDS|FORK_REOPEN_LOG|FORK_DEATHSIG_SIGKILL, FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL,
&pidref); &pidref);
if (r < 0) if (r < 0)
return r; return r;

View File

@ -15,6 +15,7 @@
#include "stat-util.h" #include "stat-util.h"
#include "stdio-util.h" #include "stdio-util.h"
#include "string-util.h" #include "string-util.h"
#include "unaligned.h"
static thread_local int have_pidfs = -1; static thread_local int have_pidfs = -1;

View File

@ -6,7 +6,6 @@
#include <unistd.h> #include <unistd.h>
#include "alloc-util.h" #include "alloc-util.h"
#include "bitfield.h"
#include "chase.h" #include "chase.h"
#include "dirent-util.h" #include "dirent-util.h"
#include "errno-util.h" #include "errno-util.h"
@ -37,9 +36,19 @@ static int verify_stat_at(
assert(verify_func); assert(verify_func);
_cleanup_free_ char *p = NULL; _cleanup_free_ char *p = NULL;
r = resolve_xat_fdroot(&fd, &path, &p); if (fd == XAT_FDROOT) {
if (r < 0) fd = AT_FDCWD;
return r;
if (isempty(path))
path = "/";
else if (!path_is_absolute(path)) {
p = strjoin("/", path);
if (!p)
return -ENOMEM;
path = p;
}
}
if (fstatat(fd, strempty(path), &st, if (fstatat(fd, strempty(path), &st,
(isempty(path) ? AT_EMPTY_PATH : 0) | (follow ? 0 : AT_SYMLINK_NOFOLLOW)) < 0) (isempty(path) ? AT_EMPTY_PATH : 0) | (follow ? 0 : AT_SYMLINK_NOFOLLOW)) < 0)
@ -236,97 +245,6 @@ int null_or_empty_path_with_root(const char *fn, const char *root) {
return null_or_empty(&st); return null_or_empty(&st);
} }
static const char* statx_mask_one_to_name(unsigned mask);
static const char* statx_attribute_to_name(uint64_t attr);
#include "statx-attribute-to-name.inc"
#include "statx-mask-to-name.inc"
#define DEFINE_STATX_BITS_TO_STRING(prefix, type, func, format_str) \
static char* prefix##_to_string(type v) { \
if (v == 0) \
return strdup(""); \
\
_cleanup_free_ char *s = NULL; \
\
BIT_FOREACH(i, v) { \
type f = 1 << i; \
\
const char *n = func(f); \
if (!n) \
continue; \
\
if (!strextend_with_separator(&s, "|", n)) \
return NULL; \
v &= ~f; \
} \
\
if (v != 0 && strextendf_with_separator(&s, "|", format_str, v) < 0) \
return NULL; \
\
return TAKE_PTR(s); \
}
DEFINE_STATX_BITS_TO_STRING(statx_mask, unsigned, statx_mask_one_to_name, "0x%x");
DEFINE_STATX_BITS_TO_STRING(statx_attributes, uint64_t, statx_attribute_to_name, "0x%" PRIx64);
int xstatx_full(int fd,
const char *path,
int flags,
unsigned mandatory_mask,
unsigned optional_mask,
uint64_t mandatory_attributes,
struct statx *ret) {
struct statx sx = {}; /* explicitly initialize the struct to make msan silent. */
int r;
/* Wrapper around statx(), with additional bells and whistles:
*
* 1. AT_EMPTY_PATH is implied on empty path
* 2. Supports XAT_FDROOT
* 3. Takes separate mandatory and optional mask params, plus mandatory attributes.
* Returns -EUNATCH if statx() does not return all masks specified as mandatory,
* > 0 if all optional masks are supported, 0 otherwise.
*/
assert(fd >= 0 || IN_SET(fd, AT_FDCWD, XAT_FDROOT));
assert((mandatory_mask & optional_mask) == 0);
assert(ret);
_cleanup_free_ char *p = NULL;
r = resolve_xat_fdroot(&fd, &path, &p);
if (r < 0)
return r;
if (statx(fd, strempty(path),
flags|(isempty(path) ? AT_EMPTY_PATH : 0),
mandatory_mask|optional_mask,
&sx) < 0)
return negative_errno();
if (!FLAGS_SET(sx.stx_mask, mandatory_mask)) {
if (DEBUG_LOGGING) {
_cleanup_free_ char *mask_str = statx_mask_to_string(mandatory_mask & ~sx.stx_mask);
log_debug("statx() does not support '%s' mask (running on an old kernel?)", strnull(mask_str));
}
return -EUNATCH;
}
if (!FLAGS_SET(sx.stx_attributes_mask, mandatory_attributes)) {
if (DEBUG_LOGGING) {
_cleanup_free_ char *attr_str = statx_attributes_to_string(mandatory_attributes & ~sx.stx_attributes_mask);
log_debug("statx() does not support '%s' attribute (running on an old kernel?)", strnull(attr_str));
}
return -EUNATCH;
}
*ret = sx;
return FLAGS_SET(sx.stx_mask, optional_mask);
}
static int xfstatfs(int fd, struct statfs *ret) { static int xfstatfs(int fd, struct statfs *ret) {
assert(ret); assert(ret);
@ -431,14 +349,12 @@ int inode_same_at(int fda, const char *filea, int fdb, const char *fileb, int fl
int ntha_flags = at_flags_normalize_follow(flags) & (AT_EMPTY_PATH|AT_SYMLINK_FOLLOW); int ntha_flags = at_flags_normalize_follow(flags) & (AT_EMPTY_PATH|AT_SYMLINK_FOLLOW);
_cleanup_free_ struct file_handle *ha = NULL, *hb = NULL; _cleanup_free_ struct file_handle *ha = NULL, *hb = NULL;
uint64_t mntida, mntidb; int mntida = -1, mntidb = -1;
int _mntida, _mntidb;
r = name_to_handle_at_try_fid( r = name_to_handle_at_try_fid(
fda, fda,
filea, filea,
&ha, &ha,
&_mntida,
&mntida, &mntida,
ntha_flags); ntha_flags);
if (r < 0) { if (r < 0) {
@ -447,15 +363,12 @@ int inode_same_at(int fda, const char *filea, int fdb, const char *fileb, int fl
goto fallback; goto fallback;
} }
if (r == 0)
mntida = _mntida;
r = name_to_handle_at_try_fid( r = name_to_handle_at_try_fid(
fdb, fdb,
fileb, fileb,
&hb, &hb,
r > 0 ? NULL : &_mntidb, /* if we managed to get unique mnt id for a, insist on that for b */ &mntidb,
r > 0 ? &mntidb : NULL,
ntha_flags); ntha_flags);
if (r < 0) { if (r < 0) {
if (is_name_to_handle_at_fatal_error(r)) if (is_name_to_handle_at_fatal_error(r))
@ -463,8 +376,6 @@ int inode_same_at(int fda, const char *filea, int fdb, const char *fileb, int fl
goto fallback; goto fallback;
} }
if (r == 0)
mntidb = _mntidb;
/* Now compare the two file handles */ /* Now compare the two file handles */
if (!file_handle_equal(ha, hb)) if (!file_handle_equal(ha, hb))
@ -701,3 +612,25 @@ mode_t inode_type_from_string(const char *s) {
return MODE_INVALID; return MODE_INVALID;
} }
int statx_warn_mount_root(const struct statx *sx, int log_level) {
assert(sx);
/* The STATX_ATTR_MOUNT_ROOT flag is supported since kernel v5.8. */
if (!FLAGS_SET(sx->stx_attributes_mask, STATX_ATTR_MOUNT_ROOT))
return log_full_errno(log_level, SYNTHETIC_ERRNO(ENOSYS),
"statx() did not set STATX_ATTR_MOUNT_ROOT, running on an old kernel?");
return 0;
}
int statx_warn_mount_id(const struct statx *sx, int log_level) {
assert(sx);
/* The STATX_MNT_ID flag is supported since kernel v5.10. */
if (!FLAGS_SET(sx->stx_mask, STATX_MNT_ID))
return log_full_errno(log_level, SYNTHETIC_ERRNO(ENOSYS),
"statx() does not support STATX_MNT_ID, running on an old kernel?");
return 0;
}

View File

@ -41,24 +41,6 @@ static inline int null_or_empty_path(const char *fn) {
return null_or_empty_path_with_root(fn, NULL); return null_or_empty_path_with_root(fn, NULL);
} }
int xstatx_full(int fd,
const char *path,
int flags,
unsigned mandatory_mask,
unsigned optional_mask,
uint64_t mandatory_attributes,
struct statx *ret);
static inline int xstatx(
int fd,
const char *path,
int flags,
unsigned mandatory_mask,
struct statx *ret) {
return xstatx_full(fd, path, flags, mandatory_mask, 0, 0, ret);
}
int fd_is_read_only_fs(int fd); int fd_is_read_only_fs(int fd);
int path_is_read_only_fs(const char *path); int path_is_read_only_fs(const char *path);
@ -135,3 +117,6 @@ static inline bool inode_type_can_hardlink(mode_t m) {
* type). */ * type). */
return IN_SET(m & S_IFMT, S_IFSOCK, S_IFLNK, S_IFREG, S_IFBLK, S_IFCHR, S_IFIFO); return IN_SET(m & S_IFMT, S_IFSOCK, S_IFLNK, S_IFREG, S_IFBLK, S_IFCHR, S_IFIFO);
} }
int statx_warn_mount_root(const struct statx *sx, int log_level);
int statx_warn_mount_id(const struct statx *sx, int log_level);

View File

@ -1,14 +0,0 @@
# SPDX-License-Identifier: LGPL-2.1-or-later
BEGIN{
print "const char* statx_attribute_to_name(uint64_t attr) {"
print " switch (attr) {"
}
{
printf " case %s: return \"%s\";\n", $1, $1
}
END{
print " default: return NULL;"
print " }"
print "}"
}

View File

@ -1,14 +0,0 @@
# SPDX-License-Identifier: LGPL-2.1-or-later
BEGIN{
print "const char* statx_mask_one_to_name(unsigned mask) {"
print " switch (mask) {"
}
{
printf " case %s: return \"%s\";\n", $1, $1
}
END{
print " default: return NULL;"
print " }"
print "}"
}

View File

@ -419,6 +419,9 @@ int getcrtime_at(
assert(fd >= 0 || fd == AT_FDCWD); assert(fd >= 0 || fd == AT_FDCWD);
assert((at_flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0); assert((at_flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);
if (isempty(path))
at_flags |= AT_EMPTY_PATH;
/* So here's the deal: the creation/birth time (crtime/btime) of a file is a relatively newly supported concept /* So here's the deal: the creation/birth time (crtime/btime) of a file is a relatively newly supported concept
* on Linux (or more strictly speaking: a concept that only recently got supported in the API, it was * on Linux (or more strictly speaking: a concept that only recently got supported in the API, it was
* implemented on various file systems on the lower level since a while, but never was accessible). However, we * implemented on various file systems on the lower level since a while, but never was accessible). However, we
@ -429,13 +432,11 @@ int getcrtime_at(
* concept is useful for determining how "old" a file really is, and hence using the older of the two makes * concept is useful for determining how "old" a file really is, and hence using the older of the two makes
* most sense. */ * most sense. */
r = xstatx_full(fd, path, if (statx(fd, strempty(path),
at_flags_normalize_nofollow(at_flags)|AT_STATX_DONT_SYNC, at_flags_normalize_nofollow(at_flags)|AT_STATX_DONT_SYNC,
/* mandatory_mask = */ 0, STATX_BTIME,
STATX_BTIME, &sx) >= 0 &&
/* mandatory_attributes = */ 0, FLAGS_SET(sx.stx_mask, STATX_BTIME) && sx.stx_btime.tv_sec != 0)
&sx);
if (r > 0 && sx.stx_btime.tv_sec != 0) /* > 0: all optional masks are supported */
a = statx_timestamp_load(&sx.stx_btime); a = statx_timestamp_load(&sx.stx_btime);
else else
a = USEC_INFINITY; a = USEC_INFINITY;

View File

@ -1,7 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */ /* SPDX-License-Identifier: LGPL-2.1-or-later */
#include "efi-string.h" #include "efi-string.h"
#include "string-util-fundamental.h"
#if SD_BOOT #if SD_BOOT
# include "proto/simple-text-io.h" # include "proto/simple-text-io.h"
@ -498,7 +497,8 @@ char* line_get_key_value(char *s, const char *sep, size_t *pos, char **ret_key,
value++; value++;
/* unquote */ /* unquote */
if (strchr8(QUOTES, value[0]) && line[linelen - 1] == value[0]) { if ((value[0] == '"' && line[linelen - 1] == '"') ||
(value[0] == '\'' && line[linelen - 1] == '\'')) {
value++; value++;
line[linelen - 1] = '\0'; line[linelen - 1] = '\0';
} }

View File

@ -43,8 +43,3 @@ keyboard_sources = files(
'uapi/linux/input.h', 'uapi/linux/input.h',
'uapi/linux/input-event-codes.h', 'uapi/linux/input-event-codes.h',
) )
# Source files that provides STATX_*
statx_sources = files(
'uapi/linux/stat.h',
)

View File

@ -685,7 +685,7 @@ int json_variant_new_fd_info(sd_json_variant **ret, int fd) {
if (r < 0) if (r < 0)
return r; return r;
r = name_to_handle_at_try_fid(fd, "", &fid, &mntid, /* ret_unique_mnt_id = */ NULL, AT_EMPTY_PATH); r = name_to_handle_at_try_fid(fd, "", &fid, &mntid, AT_EMPTY_PATH);
if (r < 0 && is_name_to_handle_at_fatal_error(r)) if (r < 0 && is_name_to_handle_at_fatal_error(r))
return r; return r;

View File

@ -36,8 +36,6 @@
#include "os-util.h" #include "os-util.h"
#include "path-util.h" #include "path-util.h"
#include "pidref.h" #include "pidref.h"
#include "process-util.h"
#include "socket-util.h"
#include "stat-util.h" #include "stat-util.h"
#include "string-table.h" #include "string-table.h"
#include "string-util.h" #include "string-util.h"
@ -880,99 +878,6 @@ static DirectoryOwnership validate_directory_fd(
return DIRECTORY_IS_OTHERWISE_OWNED; return DIRECTORY_IS_OTHERWISE_OWNED;
} }
static int open_tree_try_drop_idmap_harder(sd_varlink *link, int directory_fd, const char *directory_path) {
int r;
_cleanup_close_ int mount_fd = open_tree_try_drop_idmap(
directory_fd,
"",
OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC|AT_SYMLINK_NOFOLLOW|AT_EMPTY_PATH);
if (mount_fd >= 0)
return TAKE_FD(mount_fd);
if (mount_fd != -EINVAL)
return log_debug_errno(mount_fd, "Failed to issue open_tree() of provided directory '%s': %m", strna(directory_path));
_cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
r = varlink_get_peer_pidref(link, &pidref);
if (r < 0)
return r;
_cleanup_close_ int mntns_fd = pidref_namespace_open_by_type(&pidref, NAMESPACE_MOUNT);
if (mntns_fd < 0)
return log_debug_errno(mntns_fd, "Failed to open mount namespace of peer: %m");
r = is_our_namespace(mntns_fd, NAMESPACE_MOUNT);
if (r < 0)
return log_debug_errno(r, "Failed to check if peer is in same mount namespace: %m");
if (r > 0)
return log_debug_errno(mount_fd, "Failed to issue open_tree() of provided directory '%s': %m", strna(directory_path));
/* The peer is in a different mount namespace. open_tree() will fail with EINVAL on directory fds
* from a different mount namespace, so we need to fork off a child process that joins the peer's
* mount namespace and calls open_tree() there. */
_cleanup_close_pair_ int errno_pipe_fd[2] = EBADF_PAIR, mount_fd_socket[2] = EBADF_PAIR;
if (pipe2(errno_pipe_fd, O_CLOEXEC) < 0)
return log_debug_errno(errno, "Failed to create pipe: %m");
if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, mount_fd_socket) < 0)
return log_debug_errno(errno, "Failed to create socket pair: %m");
_cleanup_(pidref_done) PidRef child = PIDREF_NULL;
r = namespace_fork(
"(sd-opentreens)",
"(sd-opentree)",
FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGKILL,
/* pidns_fd= */ -EBADF,
mntns_fd,
/* netns_fd= */ -EBADF,
/* userns_fd= */ -EBADF,
/* root_fd= */ -EBADF,
&child);
if (r < 0)
return log_debug_errno(r, "Failed to fork into peer's mount namespace: %m");
if (r == 0) {
/* Child */
errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]);
mount_fd_socket[0] = safe_close(mount_fd_socket[0]);
mount_fd = open_tree_try_drop_idmap(
directory_fd,
"",
OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC|AT_SYMLINK_NOFOLLOW|AT_EMPTY_PATH);
if (mount_fd < 0) {
log_debug_errno(mount_fd, "Failed to issue open_tree() of provided directory '%s': %m", strna(directory_path));
report_errno_and_exit(errno_pipe_fd[1], mount_fd);
}
r = send_one_fd(mount_fd_socket[1], mount_fd, /* flags= */ 0);
if (r < 0) {
log_debug_errno(r, "Failed to send mount fd: %m");
report_errno_and_exit(errno_pipe_fd[1], r);
}
_exit(EXIT_SUCCESS);
}
errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
mount_fd_socket[1] = safe_close(mount_fd_socket[1]);
r = pidref_wait_for_terminate_and_check("(sd-opentreens)", &child, /* flags= */ 0);
if (r < 0)
return log_debug_errno(r, "Failed to wait for child: %m");
r = read_errno(errno_pipe_fd[0]);
if (r < 0)
return r;
mount_fd = receive_one_fd(mount_fd_socket[0], MSG_DONTWAIT);
if (mount_fd < 0)
return log_debug_errno(mount_fd, "Failed to receive mount fd from child: %m");
return TAKE_FD(mount_fd);
}
static int vl_method_mount_directory( static int vl_method_mount_directory(
sd_varlink *link, sd_varlink *link,
sd_json_variant *parameters, sd_json_variant *parameters,
@ -1087,9 +992,12 @@ static int vl_method_mount_directory(
if (r < 0) if (r < 0)
return r; return r;
_cleanup_close_ int mount_fd = open_tree_try_drop_idmap_harder(link, directory_fd, directory_path); _cleanup_close_ int mount_fd = open_tree_try_drop_idmap(
directory_fd,
"",
OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC|AT_SYMLINK_NOFOLLOW|AT_EMPTY_PATH);
if (mount_fd < 0) if (mount_fd < 0)
return mount_fd; return log_debug_errno(errno, "Failed to issue open_tree() of provided directory '%s': %m", strna(directory_path));
/* MOUNT_ATTR_IDMAP has possibly been cleared. Let's verify that the underlying data matches our expectations. */ /* MOUNT_ATTR_IDMAP has possibly been cleared. Let's verify that the underlying data matches our expectations. */
struct stat unmapped_st; struct stat unmapped_st;

View File

@ -31,7 +31,7 @@ void* bpf_rdonly_cast(const void *, __u32) __ksym;
* by their inode number in nsfs) that restricts creation of inodes (which would inherit the callers UID/GID) * by their inode number in nsfs) that restricts creation of inodes (which would inherit the callers UID/GID)
* or changing of ownership (similar). * or changing of ownership (similar).
* *
* This hooks into the various path-based LSM entrypoints that control inode creation as well as chown(), and * This hooks into the various path-based LSM entrypoints that control inode creation as well as chmod(), and
* then looks up the calling process' user namespace in a global map of namespaces, which points us to * then looks up the calling process' user namespace in a global map of namespaces, which points us to
* another map that is simply a list of allowed mnt_ids. */ * another map that is simply a list of allowed mnt_ids. */
@ -140,8 +140,6 @@ int BPF_PROG(userns_restrict_path_mkdir, struct path *dir, struct dentry *dentry
return validate_path(dir, ret); return validate_path(dir, ret);
} }
/* The mknod hook covers all file creations, including regular files, in case the reader is looking for a
* missing hook for open(). */
SEC("lsm/path_mknod") SEC("lsm/path_mknod")
int BPF_PROG(userns_restrict_path_mknod, const struct path *dir, struct dentry *dentry, umode_t mode, unsigned dev, int ret) { int BPF_PROG(userns_restrict_path_mknod, const struct path *dir, struct dentry *dentry, umode_t mode, unsigned dev, int ret) {
return validate_path(dir, ret); return validate_path(dir, ret);
@ -157,22 +155,25 @@ int BPF_PROG(userns_restrict_path_link, struct dentry *old_dentry, const struct
return validate_path(new_dir, ret); return validate_path(new_dir, ret);
} }
SEC("kprobe/retire_userns_sysctls") SEC("kprobe/free_user_ns")
int BPF_KPROBE(userns_restrict_retire_userns_sysctls, struct user_namespace *userns) { void BPF_KPROBE(userns_restrict_free_user_ns, struct work_struct *work) {
struct user_namespace *userns;
unsigned inode; unsigned inode;
void *mnt_id_map; void *mnt_id_map;
/* Inform userspace that a user namespace just went away. I wish there was a nicer way to hook into /* Inform userspace that a user namespace just went away. I wish there was a nicer way to hook into
* user namespaces being deleted than using kprobes, but couldn't find any. */ * user namespaces being deleted than using kprobes, but couldn't find any. */
userns = bpf_rdonly_cast(userns, bpf_core_type_id_kernel(struct user_namespace));
userns = bpf_rdonly_cast(container_of(work, struct user_namespace, work),
bpf_core_type_id_kernel(struct user_namespace));
inode = userns->ns.inum; inode = userns->ns.inum;
mnt_id_map = bpf_map_lookup_elem(&userns_mnt_id_hash, &inode); mnt_id_map = bpf_map_lookup_elem(&userns_mnt_id_hash, &inode);
if (!mnt_id_map) /* No rules installed for this userns? Then send no notification. */ if (!mnt_id_map) /* No rules installed for this userns? Then send no notification. */
return 0; return;
bpf_ringbuf_output(&userns_ringbuf, &inode, sizeof(inode), 0); bpf_ringbuf_output(&userns_ringbuf, &inode, sizeof(inode), 0);
return 0;
} }
static const char _license[] SEC("license") = "GPL"; static const char _license[] SEC("license") = "GPL";

View File

@ -142,7 +142,7 @@ static int extract_prefix(const char *path, char **ret) {
else { else {
const char *e; const char *e;
e = ENDSWITH_SET(bn, ".raw.v", ".raw", ".v"); e = endswith(bn, ".raw");
if (!e) if (!e)
e = strchr(bn, 0); e = strchr(bn, 0);

View File

@ -451,15 +451,27 @@ static int image_make(
path_startswith(path, "/usr") || path_startswith(path, "/usr") ||
(faccessat(fd, "", W_OK, AT_EACCESS|AT_EMPTY_PATH) < 0 && errno == EROFS); (faccessat(fd, "", W_OK, AT_EACCESS|AT_EMPTY_PATH) < 0 && errno == EROFS);
uint64_t on_mount_id = 0;
_cleanup_free_ struct file_handle *fh = NULL; _cleanup_free_ struct file_handle *fh = NULL;
uint64_t on_mount_id;
int _mnt_id;
r = name_to_handle_at_try_fid(fd, /* path= */ NULL, &fh, &_mnt_id, &on_mount_id, AT_EMPTY_PATH); r = name_to_handle_at_try_unique_mntid_fid(fd, /* path= */ NULL, &fh, &on_mount_id, /* flags= */ 0);
if (r < 0) if (r < 0) {
return r; if (is_name_to_handle_at_fatal_error(r))
if (r == 0) return r;
on_mount_id = _mnt_id;
r = path_get_unique_mnt_id_at(fd, /* path= */ NULL, &on_mount_id);
if (r < 0) {
if (!ERRNO_IS_NEG_NOT_SUPPORTED(r))
return r;
int on_mount_id_fallback = -1;
r = path_get_mnt_id_at(fd, /* path= */ NULL, &on_mount_id_fallback);
if (r < 0)
return r;
on_mount_id = on_mount_id_fallback;
}
}
if (S_ISDIR(st->st_mode)) { if (S_ISDIR(st->st_mode)) {
unsigned file_attr = 0; unsigned file_attr = 0;

View File

@ -285,20 +285,20 @@ static int verify_fsroot_dir(
if (r < 0 && r != -EADDRNOTAVAIL) if (r < 0 && r != -EADDRNOTAVAIL)
return log_error_errno(r, "Failed to extract filename of \"%s\": %m", path); return log_error_errno(r, "Failed to extract filename of \"%s\": %m", path);
r = xstatx_full(dir_fd, f, if (statx(dir_fd, strempty(f),
AT_SYMLINK_NOFOLLOW, AT_SYMLINK_NOFOLLOW|(isempty(f) ? AT_EMPTY_PATH : 0),
STATX_TYPE|STATX_INO, STATX_TYPE|STATX_INO|STATX_MNT_ID, &sx) < 0)
/* optional_mask = */ 0, return log_full_errno((searching && errno == ENOENT) ||
STATX_ATTR_MOUNT_ROOT, (unprivileged_mode && ERRNO_IS_PRIVILEGE(errno)) ? LOG_DEBUG : LOG_ERR, errno,
&sx);
if (r < 0)
return log_full_errno((searching && r == -ENOENT) ||
(unprivileged_mode && ERRNO_IS_NEG_PRIVILEGE(r)) ? LOG_DEBUG : LOG_ERR, r,
"Failed to determine block device node of \"%s\": %m", path); "Failed to determine block device node of \"%s\": %m", path);
if (!S_ISDIR(sx.stx_mode)) if (!S_ISDIR(sx.stx_mode))
return log_error_errno(SYNTHETIC_ERRNO(ENOTDIR), "Path \"%s\" is not a directory", path); return log_error_errno(SYNTHETIC_ERRNO(ENOTDIR), "Path \"%s\" is not a directory", path);
r = statx_warn_mount_root(&sx, LOG_ERR);
if (r < 0)
return r;
if (!FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT)) if (!FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT))
return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV), SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),

View File

@ -29,12 +29,13 @@ int portable_profile_dirs(RuntimeScope scope, char ***ret) {
_cleanup_free_ char *d = NULL; _cleanup_free_ char *d = NULL;
r = xdg_user_config_dir("systemd/portable/profile", &d); r = xdg_user_config_dir("systemd/portable/profile", &d);
if (r < 0) if (r < 0 && r != -ENXIO)
return r;
r = strv_consume(&dirs, TAKE_PTR(d));
if (r < 0)
return r; return r;
if (r >= 0) {
r = strv_consume(&dirs, TAKE_PTR(d));
if (r < 0)
return r;
}
r = xdg_user_runtime_dir("systemd/portable/profile", &d); r = xdg_user_runtime_dir("systemd/portable/profile", &d);
if (r < 0 && r != -ENXIO) if (r < 0 && r != -ENXIO)

View File

@ -1015,10 +1015,8 @@ static int make_tmpfs(void) {
struct make_archive_data { struct make_archive_data {
struct archive *archive; struct archive *archive;
TarFlags flags; TarFlags flags;
int hardlink_db_fd; int hardlink_db_fd;
char *hardlink_db_path; char *hardlink_db_path;
int have_unique_mount_id;
}; };
static int hardlink_lookup( static int hardlink_lookup(
@ -1044,29 +1042,16 @@ static int hardlink_lookup(
if (FLAGS_SET(sx->stx_mask, STATX_TYPE) && !inode_type_can_hardlink(sx->stx_mode)) if (FLAGS_SET(sx->stx_mask, STATX_TYPE) && !inode_type_can_hardlink(sx->stx_mode))
goto bypass; goto bypass;
uint64_t unique_mnt_id;
int mnt_id; int mnt_id;
r = name_to_handle_at_try_fid(inode_fd, /* path= */ NULL, r = name_to_handle_at_try_fid(inode_fd, /* path= */ NULL, &handle, &mnt_id, /* flags= */ AT_EMPTY_PATH);
&handle,
d->have_unique_mount_id <= 0 ? &mnt_id : NULL,
d->have_unique_mount_id != 0 ? &unique_mnt_id : NULL,
/* flags= */ AT_EMPTY_PATH);
if (r < 0) if (r < 0)
return log_error_errno(r, "Failed to get file handle of file: %m"); return log_error_errno(r, "Failed to get file handle of file: %m");
if (d->have_unique_mount_id < 0)
d->have_unique_mount_id = r > 0;
else
assert(d->have_unique_mount_id == (r > 0));
m = hexmem(SHA256_DIRECT(handle->f_handle, handle->handle_bytes), SHA256_DIGEST_SIZE); m = hexmem(SHA256_DIRECT(handle->f_handle, handle->handle_bytes), SHA256_DIGEST_SIZE);
if (!m) if (!m)
return log_oom(); return log_oom();
if (d->have_unique_mount_id) if (asprintf(&n, "%i:%i:%s", mnt_id, handle->handle_type, m) < 0)
r = asprintf(&n, "%" PRIu64 ":%i:%s", unique_mnt_id, handle->handle_type, m);
else
r = asprintf(&n, "%i:%i:%s", mnt_id, handle->handle_type, m);
if (r < 0)
return log_oom(); return log_oom();
if (d->hardlink_db_fd < 0) { if (d->hardlink_db_fd < 0) {
@ -1482,7 +1467,6 @@ int tar_c(int tree_fd, int output_fd, const char *filename, TarFlags flags) {
.archive = a, .archive = a,
.flags = flags, .flags = flags,
.hardlink_db_fd = -EBADF, .hardlink_db_fd = -EBADF,
.have_unique_mount_id = -1,
}; };
r = recurse_dir(tree_fd, r = recurse_dir(tree_fd,

View File

@ -15,104 +15,104 @@ TEST(uid_range) {
_cleanup_(uid_range_freep) UIDRange *p = NULL; _cleanup_(uid_range_freep) UIDRange *p = NULL;
uid_t search; uid_t search;
ASSERT_TRUE(uid_range_covers(p, 0, 0)); assert_se(uid_range_covers(p, 0, 0));
ASSERT_FALSE(uid_range_covers(p, 0, 1)); assert_se(!uid_range_covers(p, 0, 1));
ASSERT_FALSE(uid_range_covers(p, 100, UINT32_MAX)); assert_se(!uid_range_covers(p, 100, UINT32_MAX));
ASSERT_FALSE(uid_range_covers(p, UINT32_MAX, 1)); assert_se(!uid_range_covers(p, UINT32_MAX, 1));
ASSERT_FALSE(uid_range_covers(p, UINT32_MAX - 10, 11)); assert_se(!uid_range_covers(p, UINT32_MAX - 10, 11));
ASSERT_EQ(uid_range_entries(p), 0U); assert_se(uid_range_entries(p) == 0);
ASSERT_EQ(uid_range_size(p), 0U); assert_se(uid_range_size(p) == 0);
ASSERT_TRUE(uid_range_is_empty(p)); assert_se(uid_range_is_empty(p));
ASSERT_OK(uid_range_add_str(&p, "500-999")); assert_se(uid_range_add_str(&p, "500-999") >= 0);
ASSERT_NOT_NULL(p); assert_se(p);
ASSERT_EQ(uid_range_entries(p), 1U); assert_se(uid_range_entries(p) == 1);
ASSERT_EQ(uid_range_size(p), 500U); assert_se(uid_range_size(p) == 500);
ASSERT_FALSE(uid_range_is_empty(p)); assert_se(!uid_range_is_empty(p));
ASSERT_EQ(p->entries[0].start, 500U); assert_se(p->entries[0].start == 500);
ASSERT_EQ(p->entries[0].nr, 500U); assert_se(p->entries[0].nr == 500);
ASSERT_FALSE(uid_range_contains(p, 499)); assert_se(!uid_range_contains(p, 499));
ASSERT_TRUE(uid_range_contains(p, 500)); assert_se(uid_range_contains(p, 500));
ASSERT_TRUE(uid_range_contains(p, 999)); assert_se(uid_range_contains(p, 999));
ASSERT_FALSE(uid_range_contains(p, 1000)); assert_se(!uid_range_contains(p, 1000));
ASSERT_FALSE(uid_range_covers(p, 100, 150)); assert_se(!uid_range_covers(p, 100, 150));
ASSERT_FALSE(uid_range_covers(p, 400, 200)); assert_se(!uid_range_covers(p, 400, 200));
ASSERT_FALSE(uid_range_covers(p, 499, 1)); assert_se(!uid_range_covers(p, 499, 1));
ASSERT_TRUE(uid_range_covers(p, 500, 1)); assert_se(uid_range_covers(p, 500, 1));
ASSERT_TRUE(uid_range_covers(p, 501, 10)); assert_se(uid_range_covers(p, 501, 10));
ASSERT_TRUE(uid_range_covers(p, 999, 1)); assert_se(uid_range_covers(p, 999, 1));
ASSERT_FALSE(uid_range_covers(p, 999, 2)); assert_se(!uid_range_covers(p, 999, 2));
ASSERT_FALSE(uid_range_covers(p, 1000, 1)); assert_se(!uid_range_covers(p, 1000, 1));
ASSERT_FALSE(uid_range_covers(p, 1000, 100)); assert_se(!uid_range_covers(p, 1000, 100));
ASSERT_FALSE(uid_range_covers(p, 1001, 100)); assert_se(!uid_range_covers(p, 1001, 100));
search = UID_INVALID; search = UID_INVALID;
ASSERT_OK_POSITIVE(uid_range_next_lower(p, &search)); assert_se(uid_range_next_lower(p, &search));
ASSERT_EQ(search, 999U); assert_se(search == 999);
ASSERT_OK_POSITIVE(uid_range_next_lower(p, &search)); assert_se(uid_range_next_lower(p, &search));
ASSERT_EQ(search, 998U); assert_se(search == 998);
search = 501; search = 501;
ASSERT_OK_POSITIVE(uid_range_next_lower(p, &search)); assert_se(uid_range_next_lower(p, &search));
ASSERT_EQ(search, 500U); assert_se(search == 500);
ASSERT_ERROR(uid_range_next_lower(p, &search), EBUSY); assert_se(uid_range_next_lower(p, &search) == -EBUSY);
ASSERT_OK(uid_range_add_str(&p, "1000")); assert_se(uid_range_add_str(&p, "1000") >= 0);
ASSERT_EQ(uid_range_entries(p), 1U); assert_se(uid_range_entries(p) == 1);
ASSERT_EQ(p->entries[0].start, 500U); assert_se(p->entries[0].start == 500);
ASSERT_EQ(p->entries[0].nr, 501U); assert_se(p->entries[0].nr == 501);
ASSERT_OK(uid_range_add_str(&p, "30-40")); assert_se(uid_range_add_str(&p, "30-40") >= 0);
ASSERT_EQ(uid_range_entries(p), 2U); assert_se(uid_range_entries(p) == 2);
ASSERT_EQ(uid_range_size(p), 500U + 1U + 11U); assert_se(uid_range_size(p) == 500 + 1 + 11);
ASSERT_FALSE(uid_range_is_empty(p)); assert_se(!uid_range_is_empty(p));
ASSERT_EQ(p->entries[0].start, 30U); assert_se(p->entries[0].start == 30);
ASSERT_EQ(p->entries[0].nr, 11U); assert_se(p->entries[0].nr == 11);
ASSERT_EQ(p->entries[1].start , 500U); assert_se(p->entries[1].start == 500);
ASSERT_EQ(p->entries[1].nr , 501U); assert_se(p->entries[1].nr == 501);
ASSERT_OK(uid_range_add_str(&p, "60-70")); assert_se(uid_range_add_str(&p, "60-70") >= 0);
ASSERT_EQ(uid_range_entries(p), 3U); assert_se(uid_range_entries(p) == 3);
ASSERT_EQ(uid_range_size(p), 500U + 1U + 11U + 11U); assert_se(uid_range_size(p) == 500 + 1 + 11 + 11);
ASSERT_FALSE(uid_range_is_empty(p)); assert_se(!uid_range_is_empty(p));
ASSERT_EQ(p->entries[0].start, 30U); assert_se(p->entries[0].start == 30);
ASSERT_EQ(p->entries[0].nr, 11U); assert_se(p->entries[0].nr == 11);
ASSERT_EQ(p->entries[1].start, 60U); assert_se(p->entries[1].start == 60);
ASSERT_EQ(p->entries[1].nr, 11U); assert_se(p->entries[1].nr == 11);
ASSERT_EQ(p->entries[2].start, 500U); assert_se(p->entries[2].start == 500);
ASSERT_EQ(p->entries[2].nr, 501U); assert_se(p->entries[2].nr == 501);
ASSERT_OK(uid_range_add_str(&p, "20-2000")); assert_se(uid_range_add_str(&p, "20-2000") >= 0);
ASSERT_EQ(uid_range_entries(p), 1U); assert_se(uid_range_entries(p) == 1);
ASSERT_EQ(uid_range_size(p), 1981U); assert_se(uid_range_size(p) == 1981);
ASSERT_EQ(p->entries[0].start, 20U); assert_se(p->entries[0].start == 20);
ASSERT_EQ(p->entries[0].nr, 1981U); assert_se(p->entries[0].nr == 1981);
ASSERT_OK(uid_range_add_str(&p, "2002")); assert_se(uid_range_add_str(&p, "2002") >= 0);
ASSERT_EQ(uid_range_entries(p), 2U); assert_se(uid_range_entries(p) == 2);
ASSERT_EQ(uid_range_size(p), 1982U); assert_se(uid_range_size(p) == 1982);
ASSERT_EQ(p->entries[0].start, 20U); assert_se(p->entries[0].start == 20);
ASSERT_EQ(p->entries[0].nr, 1981U); assert_se(p->entries[0].nr == 1981);
ASSERT_EQ(p->entries[1].start, 2002U); assert_se(p->entries[1].start == 2002);
ASSERT_EQ(p->entries[1].nr, 1U); assert_se(p->entries[1].nr == 1);
_cleanup_(uid_range_freep) UIDRange *q = NULL; _cleanup_(uid_range_freep) UIDRange *q = NULL;
ASSERT_FALSE(uid_range_equal(p, q)); assert_se(!uid_range_equal(p, q));
ASSERT_OK(uid_range_add_str(&q, "20-2000")); assert_se(uid_range_add_str(&q, "20-2000") >= 0);
ASSERT_FALSE(uid_range_equal(p, q)); assert_se(!uid_range_equal(p, q));
ASSERT_OK(uid_range_add_str(&q, "2002")); assert_se(uid_range_add_str(&q, "2002") >= 0);
ASSERT_TRUE(uid_range_equal(p, q)); assert_se(uid_range_equal(p, q));
ASSERT_OK(uid_range_add_str(&p, "2001")); assert_se(uid_range_add_str(&p, "2001") >= 0);
ASSERT_EQ(uid_range_entries(p), 1U); assert_se(uid_range_entries(p) == 1);
ASSERT_EQ(uid_range_size(p), 1983U); assert_se(uid_range_size(p) == 1983);
ASSERT_EQ(p->entries[0].start, 20U); assert_se(p->entries[0].start == 20);
ASSERT_EQ(p->entries[0].nr, 1983U); assert_se(p->entries[0].nr == 1983);
ASSERT_OK(uid_range_add_str(&q, "2001")); assert_se(uid_range_add_str(&q, "2001") >= 0);
ASSERT_TRUE(uid_range_equal(p, q)); assert_se(uid_range_equal(p, q));
} }
TEST(load_userns) { TEST(load_userns) {
@ -125,74 +125,74 @@ TEST(load_userns) {
if (ERRNO_IS_NEG_NOT_SUPPORTED(r)) if (ERRNO_IS_NEG_NOT_SUPPORTED(r))
return; return;
ASSERT_OK(r); assert_se(r >= 0);
ASSERT_TRUE(uid_range_contains(p, getuid())); assert_se(uid_range_contains(p, getuid()));
r = running_in_userns(); r = running_in_userns();
if (r == 0) { if (r == 0) {
ASSERT_EQ(p->n_entries, 1U); assert_se(p->n_entries == 1);
ASSERT_EQ(p->entries[0].start, 0U); assert_se(p->entries[0].start == 0);
ASSERT_EQ(p->entries[0].nr, UINT32_MAX); assert_se(p->entries[0].nr == UINT32_MAX);
ASSERT_TRUE(uid_range_covers(p, 0, UINT32_MAX)); assert_se(uid_range_covers(p, 0, UINT32_MAX));
} }
ASSERT_OK(fopen_temporary_child(NULL, &f, &fn)); assert_se(fopen_temporary_child(NULL, &f, &fn) >= 0);
fputs("0 0 20\n" fputs("0 0 20\n"
"100 0 20\n", f); "100 0 20\n", f);
ASSERT_OK(fflush_and_check(f)); assert_se(fflush_and_check(f) >= 0);
p = uid_range_free(p); p = uid_range_free(p);
ASSERT_OK(uid_range_load_userns(fn, UID_RANGE_USERNS_INSIDE, &p)); assert_se(uid_range_load_userns(fn, UID_RANGE_USERNS_INSIDE, &p) >= 0);
ASSERT_TRUE(uid_range_contains(p, 0)); assert_se(uid_range_contains(p, 0));
ASSERT_TRUE(uid_range_contains(p, 19)); assert_se(uid_range_contains(p, 19));
ASSERT_FALSE(uid_range_contains(p, 20)); assert_se(!uid_range_contains(p, 20));
ASSERT_FALSE(uid_range_contains(p, 99)); assert_se(!uid_range_contains(p, 99));
ASSERT_TRUE(uid_range_contains(p, 100)); assert_se(uid_range_contains(p, 100));
ASSERT_TRUE(uid_range_contains(p, 119)); assert_se(uid_range_contains(p, 119));
ASSERT_FALSE(uid_range_contains(p, 120)); assert_se(!uid_range_contains(p, 120));
} }
TEST(uid_range_coalesce) { TEST(uid_range_coalesce) {
_cleanup_(uid_range_freep) UIDRange *p = NULL; _cleanup_(uid_range_freep) UIDRange *p = NULL;
for (size_t i = 0; i < 10; i++) { for (size_t i = 0; i < 10; i++) {
ASSERT_OK(uid_range_add_internal(&p, i * 10, 10, /* coalesce= */ false)); assert_se(uid_range_add_internal(&p, i * 10, 10, /* coalesce= */ false) >= 0);
ASSERT_OK(uid_range_add_internal(&p, i * 10 + 5, 10, /* coalesce= */ false)); assert_se(uid_range_add_internal(&p, i * 10 + 5, 10, /* coalesce= */ false) >= 0);
} }
ASSERT_OK(uid_range_add_internal(&p, 100, 1, /* coalesce= */ true)); assert_se(uid_range_add_internal(&p, 100, 1, /* coalesce= */ true) >= 0);
ASSERT_EQ(p->n_entries, 1U); assert_se(p->n_entries == 1);
ASSERT_EQ(p->entries[0].start, 0U); assert_se(p->entries[0].start == 0);
ASSERT_EQ(p->entries[0].nr, 105U); assert_se(p->entries[0].nr == 105);
p = uid_range_free(p); p = uid_range_free(p);
for (size_t i = 0; i < 10; i++) { for (size_t i = 0; i < 10; i++) {
ASSERT_OK(uid_range_add_internal(&p, (10 - i) * 10, 10, /* coalesce= */ false)); assert_se(uid_range_add_internal(&p, (10 - i) * 10, 10, /* coalesce= */ false) >= 0);
ASSERT_OK(uid_range_add_internal(&p, (10 - i) * 10 + 5, 10, /* coalesce= */ false)); assert_se(uid_range_add_internal(&p, (10 - i) * 10 + 5, 10, /* coalesce= */ false) >= 0);
} }
ASSERT_OK(uid_range_add_internal(&p, 100, 1, /* coalesce= */ true)); assert_se(uid_range_add_internal(&p, 100, 1, /* coalesce= */ true) >= 0);
ASSERT_EQ(p->n_entries, 1U); assert_se(p->n_entries == 1);
ASSERT_EQ(p->entries[0].start, 10U); assert_se(p->entries[0].start == 10);
ASSERT_EQ(p->entries[0].nr, 105U); assert_se(p->entries[0].nr == 105);
p = uid_range_free(p); p = uid_range_free(p);
for (size_t i = 0; i < 10; i++) { for (size_t i = 0; i < 10; i++) {
ASSERT_OK(uid_range_add_internal(&p, i * 10, 10, /* coalesce= */ false)); assert_se(uid_range_add_internal(&p, i * 10, 10, /* coalesce= */ false) >= 0);
ASSERT_OK(uid_range_add_internal(&p, i * 10 + 5, 10, /* coalesce= */ false)); assert_se(uid_range_add_internal(&p, i * 10 + 5, 10, /* coalesce= */ false) >= 0);
ASSERT_OK(uid_range_add_internal(&p, (10 - i) * 10, 10, /* coalesce= */ false)); assert_se(uid_range_add_internal(&p, (10 - i) * 10, 10, /* coalesce= */ false) >= 0);
ASSERT_OK(uid_range_add_internal(&p, (10 - i) * 10 + 5, 10, /* coalesce= */ false)); assert_se(uid_range_add_internal(&p, (10 - i) * 10 + 5, 10, /* coalesce= */ false) >= 0);
} }
ASSERT_OK(uid_range_add_internal(&p, 100, 1, /* coalesce= */ true)); assert_se(uid_range_add_internal(&p, 100, 1, /* coalesce= */ true) >= 0);
ASSERT_EQ(p->n_entries, 1U); assert_se(p->n_entries == 1);
ASSERT_EQ(p->entries[0].start, 0U); assert_se(p->entries[0].start == 0);
ASSERT_EQ(p->entries[0].nr, 115U); assert_se(p->entries[0].nr == 115);
} }
DEFINE_TEST_MAIN(LOG_DEBUG); DEFINE_TEST_MAIN(LOG_DEBUG);

View File

@ -586,13 +586,12 @@ static int opendir_and_stat(
return 0; return 0;
} }
r = xstatx_full(dirfd(d), /* path = */ NULL, AT_EMPTY_PATH, if (statx(dirfd(d), "", AT_EMPTY_PATH, STATX_MODE|STATX_INO|STATX_ATIME|STATX_MTIME, &sx) < 0)
STATX_MODE|STATX_INO|STATX_ATIME|STATX_MTIME, return log_error_errno(errno, "statx(%s) failed: %m", path);
/* optional_mask = */ 0,
STATX_ATTR_MOUNT_ROOT, r = statx_warn_mount_root(&sx, LOG_ERR);
&sx);
if (r < 0) if (r < 0)
return log_error_errno(r, "statx(%s) failed: %m", path); return r;
*ret_mountpoint = FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT); *ret_mountpoint = FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT);
*ret = TAKE_PTR(d); *ret = TAKE_PTR(d);
@ -684,22 +683,35 @@ static int dir_cleanup(
if (dot_or_dot_dot(de->d_name)) if (dot_or_dot_dot(de->d_name))
continue; continue;
/* If statx() is supported, use it. It's preferable over fstatat() since it tells us
* explicitly where we are looking at a mount point, for free as side information. Determining
* the same information without statx() is hard, see the complexity of path_is_mount_point(),
* and also much slower as it requires a number of syscalls instead of just one. Hence, when
* we have modern statx() we use it instead of fstat() and do proper mount point checks,
* while on older kernels's well do traditional st_dev based detection of mount points.
*
* Using statx() for detecting mount points also has the benefit that we handle weird file
* systems such as overlayfs better where each file is originating from a different
* st_dev. */
struct statx sx; struct statx sx;
r = xstatx_full(dirfd(d), de->d_name, if (statx(dirfd(d), de->d_name,
AT_SYMLINK_NOFOLLOW|AT_NO_AUTOMOUNT, AT_SYMLINK_NOFOLLOW|AT_NO_AUTOMOUNT,
STATX_TYPE|STATX_MODE|STATX_UID, STATX_TYPE|STATX_MODE|STATX_UID|STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_BTIME,
STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_BTIME, &sx) < 0) {
STATX_ATTR_MOUNT_ROOT, if (errno == ENOENT)
&sx); continue;
if (r == -ENOENT)
continue;
if (r < 0) {
/* FUSE, NFS mounts, SELinux might return EACCES */ /* FUSE, NFS mounts, SELinux might return EACCES */
log_full_errno(r == -EACCES ? LOG_DEBUG : LOG_ERR, r, log_full_errno(errno == EACCES ? LOG_DEBUG : LOG_ERR, errno,
"statx(%s/%s) failed: %m", p, de->d_name); "statx(%s/%s) failed: %m", p, de->d_name);
continue; continue;
} }
r = statx_warn_mount_root(&sx, LOG_ERR);
if (r < 0)
return r;
if (FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT)) { if (FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT)) {
log_debug("Ignoring \"%s/%s\": different mount points.", p, de->d_name); log_debug("Ignoring \"%s/%s\": different mount points.", p, de->d_name);
continue; continue;