mirror of
https://github.com/systemd/systemd
synced 2025-11-17 15:54:45 +01:00
Compare commits
No commits in common. "404d9bfb62a2ce14939b194e1f9f03e9dc537ae5" and "96d03f8e41dc7b59cc48811da65e2dc88b60e8f5" have entirely different histories.
404d9bfb62
...
96d03f8e41
5
NEWS
5
NEWS
@ -2,11 +2,6 @@ systemd System and Service Manager
|
|||||||
|
|
||||||
CHANGES WITH 259 in spe:
|
CHANGES WITH 259 in spe:
|
||||||
|
|
||||||
* The cgroup2 file system is now mounted with the
|
|
||||||
"memory_hugetlb_accounting" mount option, supported since kernel 6.6.
|
|
||||||
This means that HugeTLB memory usage is now counted towards the
|
|
||||||
cgroup’s overall memory usage for the memory controller.
|
|
||||||
|
|
||||||
* homectl's --recovery-key= option may now be used with the "update"
|
* homectl's --recovery-key= option may now be used with the "update"
|
||||||
command to add recovery keys to existing user accounts. Previously,
|
command to add recovery keys to existing user accounts. Previously,
|
||||||
recovery keys could only be configured during initial user creation.
|
recovery keys could only be configured during initial user creation.
|
||||||
|
|||||||
3
README
3
README
@ -68,8 +68,7 @@ REQUIREMENTS:
|
|||||||
≥ 6.3 for MFD_EXEC/MFD_NOEXEC_SEAL and tmpfs noswap option
|
≥ 6.3 for MFD_EXEC/MFD_NOEXEC_SEAL and tmpfs noswap option
|
||||||
≥ 6.5 for name_to_handle_at() AT_HANDLE_FID, SO_PEERPIDFD/SO_PASSPIDFD,
|
≥ 6.5 for name_to_handle_at() AT_HANDLE_FID, SO_PEERPIDFD/SO_PASSPIDFD,
|
||||||
and MOVE_MOUNT_BENEATH
|
and MOVE_MOUNT_BENEATH
|
||||||
≥ 6.6 for quota support on tmpfs and cgroup2fs memory_hugetlb_accounting
|
≥ 6.6 for quota support on tmpfs
|
||||||
option
|
|
||||||
≥ 6.9 for pidfs
|
≥ 6.9 for pidfs
|
||||||
≥ 6.10 for fcntl(F_DUPFD_QUERY), unprivileged linkat(AT_EMPTY_PATH),
|
≥ 6.10 for fcntl(F_DUPFD_QUERY), unprivileged linkat(AT_EMPTY_PATH),
|
||||||
and block device 'partscan' sysfs attribute
|
and block device 'partscan' sysfs attribute
|
||||||
|
|||||||
@ -25,10 +25,12 @@
|
|||||||
#include "virt.h"
|
#include "virt.h"
|
||||||
|
|
||||||
typedef enum MountMode {
|
typedef enum MountMode {
|
||||||
|
MNT_NONE = 0,
|
||||||
MNT_FATAL = 1 << 0,
|
MNT_FATAL = 1 << 0,
|
||||||
MNT_IN_CONTAINER = 1 << 1,
|
MNT_IN_CONTAINER = 1 << 1,
|
||||||
MNT_CHECK_WRITABLE = 1 << 2,
|
MNT_CHECK_WRITABLE = 1 << 2,
|
||||||
MNT_FOLLOW_SYMLINK = 1 << 3,
|
MNT_FOLLOW_SYMLINK = 1 << 3,
|
||||||
|
MNT_USRQUOTA_GRACEFUL = 1 << 4,
|
||||||
} MountMode;
|
} MountMode;
|
||||||
|
|
||||||
typedef struct MountPoint {
|
typedef struct MountPoint {
|
||||||
@ -36,198 +38,79 @@ typedef struct MountPoint {
|
|||||||
const char *where;
|
const char *where;
|
||||||
const char *type;
|
const char *type;
|
||||||
const char *options;
|
const char *options;
|
||||||
int (*options_fn)(int priority, const char *type, char **ret);
|
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
MountMode mode;
|
MountMode mode;
|
||||||
bool (*condition_fn)(void);
|
bool (*condition_fn)(void);
|
||||||
} MountPoint;
|
} MountPoint;
|
||||||
|
|
||||||
static int cgroupfs_mount_options(int priority, const char *type, char **ret) {
|
static bool cgroupfs_recursiveprot_supported(void) {
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
assert(type);
|
/* Added in kernel 5.7 */
|
||||||
assert(streq(type, "cgroup2"));
|
|
||||||
assert(ret);
|
|
||||||
|
|
||||||
_cleanup_free_ char *opts = NULL;
|
r = mount_option_supported("cgroup2", "memory_recursiveprot", /* value = */ NULL);
|
||||||
FOREACH_STRING(o, "memory_recursiveprot", "memory_hugetlb_accounting") {
|
|
||||||
r = mount_option_supported("cgroup2", o, /* value = */ NULL);
|
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
log_full_errno(priority, r, "Failed to determine whether cgroupfs supports '%s' mount option, assuming not: %m", o);
|
log_debug_errno(r, "Failed to determine whether cgroupfs supports 'memory_recursiveprot' mount option, assuming not: %m");
|
||||||
else if (r == 0)
|
else if (r == 0)
|
||||||
log_debug("'%s' not supported by cgroupfs, not using mount option.", o);
|
log_debug("'memory_recursiveprot' not supported by cgroupfs, not using mount option.");
|
||||||
else if (!strextend_with_separator(&opts, ",", o))
|
|
||||||
return log_oom_full(priority);
|
|
||||||
}
|
|
||||||
|
|
||||||
*ret = TAKE_PTR(opts);
|
return r > 0;
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int mount_cgroupfs(const char *path) {
|
int mount_cgroupfs(const char *path) {
|
||||||
int r;
|
|
||||||
|
|
||||||
assert(path);
|
assert(path);
|
||||||
|
|
||||||
/* Mount a separate cgroupfs instance, taking all options we initial set into account. This is
|
/* Mount a separate cgroupfs instance, taking all options we initial set into account. This is
|
||||||
* especially useful when cgroup namespace is *not* employed, since the kernel overrides all
|
* especially useful when cgroup namespace is *not* employed, since the kernel overrides all
|
||||||
* previous options if a new mount is established in initial cgns (c.f.
|
* previous options if a new mount is established in initial cgns (c.f.
|
||||||
* https://github.com/torvalds/linux/blob/b69bb476dee99d564d65d418e9a20acca6f32c3f/kernel/cgroup/cgroup.c#L1984)
|
* https://github.com/torvalds/linux/blob/b69bb476dee99d564d65d418e9a20acca6f32c3f/kernel/cgroup/cgroup.c#L1984)
|
||||||
*/
|
*
|
||||||
|
* The options shall be kept in sync with those in mount_table below. */
|
||||||
|
|
||||||
_cleanup_free_ char *opts = NULL;
|
return mount_nofollow_verbose(LOG_ERR, "cgroup2", path, "cgroup2",
|
||||||
r = cgroupfs_mount_options(LOG_WARNING, "cgroup2", &opts);
|
MS_NOSUID|MS_NOEXEC|MS_NODEV,
|
||||||
if (r < 0)
|
cgroupfs_recursiveprot_supported() ? "nsdelegate,memory_recursiveprot" : "nsdelegate");
|
||||||
return r;
|
|
||||||
|
|
||||||
/* These options shall be kept in sync with those in mount_table below. */
|
|
||||||
if (!strprepend_with_separator(&opts, ",", "nsdelegate"))
|
|
||||||
return log_oom();
|
|
||||||
|
|
||||||
return mount_nofollow_verbose(LOG_ERR, "cgroup2", path, "cgroup2", MS_NOSUID|MS_NOEXEC|MS_NODEV, opts);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int usrquota_mount_option(int priority, const char *type, char **ret) {
|
|
||||||
_cleanup_free_ char *o = NULL;
|
|
||||||
int r;
|
|
||||||
|
|
||||||
assert(type);
|
|
||||||
assert(ret);
|
|
||||||
|
|
||||||
r = mount_option_supported(type, "usrquota", /* value= */ NULL);
|
|
||||||
if (r < 0)
|
|
||||||
log_full_errno(priority, r, "Unable to determine whether %s supports 'usrquota' mount option, assuming not: %m", type);
|
|
||||||
else if (r == 0)
|
|
||||||
log_debug("Not enabling 'usrquota' for '%s' as kernel lacks support for it.", type);
|
|
||||||
else {
|
|
||||||
o = strdup("usrquota");
|
|
||||||
if (!o)
|
|
||||||
return log_oom_full(priority);
|
|
||||||
}
|
|
||||||
|
|
||||||
*ret = TAKE_PTR(o);
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static const MountPoint mount_table[] = {
|
static const MountPoint mount_table[] = {
|
||||||
{
|
{ "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
|
||||||
.what = "proc",
|
MNT_FATAL|MNT_IN_CONTAINER|MNT_FOLLOW_SYMLINK },
|
||||||
.where = "/proc",
|
{ "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
|
||||||
.type = "proc",
|
MNT_FATAL|MNT_IN_CONTAINER },
|
||||||
.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
|
{ "devtmpfs", "/dev", "devtmpfs", "mode=0755" TMPFS_LIMITS_DEV, MS_NOSUID|MS_STRICTATIME,
|
||||||
.mode = MNT_FATAL|MNT_IN_CONTAINER|MNT_FOLLOW_SYMLINK,
|
MNT_FATAL|MNT_IN_CONTAINER },
|
||||||
},
|
{ "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
|
||||||
{
|
MNT_NONE },
|
||||||
.what = "sysfs",
|
|
||||||
.where = "/sys",
|
|
||||||
.type = "sysfs",
|
|
||||||
.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
|
|
||||||
.mode = MNT_FATAL|MNT_IN_CONTAINER,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
.what = "devtmpfs",
|
|
||||||
.where = "/dev",
|
|
||||||
.type = "devtmpfs",
|
|
||||||
.options = "mode=0755" TMPFS_LIMITS_DEV,
|
|
||||||
.flags = MS_NOSUID|MS_STRICTATIME,
|
|
||||||
.mode = MNT_FATAL|MNT_IN_CONTAINER,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
.what = "securityfs",
|
|
||||||
.where = "/sys/kernel/security",
|
|
||||||
.type = "securityfs",
|
|
||||||
.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
|
|
||||||
},
|
|
||||||
#if ENABLE_SMACK
|
#if ENABLE_SMACK
|
||||||
{
|
{ "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV,
|
||||||
.what = "smackfs",
|
MNT_FATAL, mac_smack_use },
|
||||||
.where = "/sys/fs/smackfs",
|
{ "tmpfs", "/dev/shm", "tmpfs", "mode=01777,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
|
||||||
.type = "smackfs",
|
MNT_FATAL|MNT_USRQUOTA_GRACEFUL, mac_smack_use },
|
||||||
.options = "smackfsdef=*",
|
|
||||||
.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
|
|
||||||
.mode = MNT_FATAL,
|
|
||||||
.condition_fn = mac_smack_use,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
.what = "tmpfs",
|
|
||||||
.where = "/dev/shm",
|
|
||||||
.type = "tmpfs",
|
|
||||||
.options = "mode=01777,smackfsroot=*",
|
|
||||||
.options_fn = usrquota_mount_option,
|
|
||||||
.flags = MS_NOSUID|MS_NODEV|MS_STRICTATIME,
|
|
||||||
.mode = MNT_FATAL,
|
|
||||||
.condition_fn = mac_smack_use,
|
|
||||||
},
|
|
||||||
#endif
|
#endif
|
||||||
{
|
{ "tmpfs", "/dev/shm", "tmpfs", "mode=01777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
|
||||||
.what = "tmpfs",
|
MNT_FATAL|MNT_IN_CONTAINER|MNT_USRQUOTA_GRACEFUL },
|
||||||
.where = "/dev/shm",
|
{ "devpts", "/dev/pts", "devpts", "mode=" STRINGIFY(TTY_MODE) ",gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
|
||||||
.type = "tmpfs",
|
MNT_IN_CONTAINER },
|
||||||
.options = "mode=01777",
|
|
||||||
.options_fn = usrquota_mount_option,
|
|
||||||
.flags = MS_NOSUID|MS_NODEV|MS_STRICTATIME,
|
|
||||||
.mode = MNT_FATAL|MNT_IN_CONTAINER,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
.what = "devpts",
|
|
||||||
.where = "/dev/pts",
|
|
||||||
.type = "devpts",
|
|
||||||
.options = "mode=" STRINGIFY(TTY_MODE) ",gid=" STRINGIFY(TTY_GID),
|
|
||||||
.flags = MS_NOSUID|MS_NOEXEC,
|
|
||||||
.mode = MNT_IN_CONTAINER,
|
|
||||||
},
|
|
||||||
#if ENABLE_SMACK
|
#if ENABLE_SMACK
|
||||||
{
|
{ "tmpfs", "/run", "tmpfs", "mode=0755,smackfsroot=*" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
|
||||||
.what = "tmpfs",
|
MNT_FATAL, mac_smack_use },
|
||||||
.where = "/run",
|
|
||||||
.type = "tmpfs",
|
|
||||||
.options = "mode=0755,smackfsroot=*" TMPFS_LIMITS_RUN,
|
|
||||||
.flags = MS_NOSUID|MS_NODEV|MS_STRICTATIME,
|
|
||||||
.mode = MNT_FATAL,
|
|
||||||
.condition_fn = mac_smack_use,
|
|
||||||
},
|
|
||||||
#endif
|
#endif
|
||||||
{
|
{ "tmpfs", "/run", "tmpfs", "mode=0755" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
|
||||||
.what = "tmpfs",
|
MNT_FATAL|MNT_IN_CONTAINER },
|
||||||
.where = "/run",
|
{ "cgroup2", "/sys/fs/cgroup", "cgroup2", "nsdelegate,memory_recursiveprot", MS_NOSUID|MS_NOEXEC|MS_NODEV,
|
||||||
.type = "tmpfs",
|
MNT_FATAL|MNT_IN_CONTAINER|MNT_CHECK_WRITABLE, cgroupfs_recursiveprot_supported },
|
||||||
.options = "mode=0755" TMPFS_LIMITS_RUN,
|
{ "cgroup2", "/sys/fs/cgroup", "cgroup2", "nsdelegate", MS_NOSUID|MS_NOEXEC|MS_NODEV,
|
||||||
.flags = MS_NOSUID|MS_NODEV|MS_STRICTATIME,
|
MNT_FATAL|MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
|
||||||
.mode = MNT_FATAL|MNT_IN_CONTAINER,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
.what = "cgroup2",
|
|
||||||
.where = "/sys/fs/cgroup",
|
|
||||||
.type = "cgroup2",
|
|
||||||
.options = "nsdelegate",
|
|
||||||
.options_fn = cgroupfs_mount_options,
|
|
||||||
.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
|
|
||||||
.mode = MNT_FATAL|MNT_IN_CONTAINER|MNT_CHECK_WRITABLE,
|
|
||||||
},
|
|
||||||
#if ENABLE_PSTORE
|
#if ENABLE_PSTORE
|
||||||
{
|
{ "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
|
||||||
.what = "pstore",
|
MNT_NONE },
|
||||||
.where = "/sys/fs/pstore",
|
|
||||||
.type = "pstore",
|
|
||||||
.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
|
|
||||||
},
|
|
||||||
#endif
|
#endif
|
||||||
#if ENABLE_EFI
|
#if ENABLE_EFI
|
||||||
{
|
{ "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
|
||||||
.what = "efivarfs",
|
MNT_NONE, is_efi_boot },
|
||||||
.where = "/sys/firmware/efi/efivars",
|
|
||||||
.type = "efivarfs",
|
|
||||||
.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
|
|
||||||
.condition_fn = is_efi_boot,
|
|
||||||
},
|
|
||||||
#endif
|
#endif
|
||||||
{
|
{ "bpf", "/sys/fs/bpf", "bpf", "mode=0700", MS_NOSUID|MS_NOEXEC|MS_NODEV,
|
||||||
.what = "bpf",
|
MNT_NONE },
|
||||||
.where = "/sys/fs/bpf",
|
|
||||||
.type = "bpf",
|
|
||||||
.options = "mode=0700",
|
|
||||||
.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/* The first three entries we might need before SELinux is up. The
|
/* The first three entries we might need before SELinux is up. The
|
||||||
@ -310,18 +193,20 @@ static int mount_one(const MountPoint *p, bool relabel) {
|
|||||||
(void) mkdir_p(p->where, 0755);
|
(void) mkdir_p(p->where, 0755);
|
||||||
|
|
||||||
_cleanup_free_ char *extend_options = NULL;
|
_cleanup_free_ char *extend_options = NULL;
|
||||||
const char *o;
|
const char *o = p->options;
|
||||||
if (p->options_fn) {
|
if (FLAGS_SET(p->mode, MNT_USRQUOTA_GRACEFUL)) {
|
||||||
r = p->options_fn(priority, p->type, &extend_options);
|
r = mount_option_supported(p->type, "usrquota", /* value= */ NULL);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
log_full_errno(priority, r, "Unable to determine whether %s supports 'usrquota' mount option, assuming not: %m", p->type);
|
||||||
|
else if (r == 0)
|
||||||
if (!strprepend_with_separator(&extend_options, ",", p->options))
|
log_debug("Not enabling 'usrquota' on '%s' as kernel lacks support for it.", p->where);
|
||||||
|
else {
|
||||||
|
if (!strextend_with_separator(&extend_options, ",", p->options ?: POINTER_MAX, "usrquota"))
|
||||||
return log_oom();
|
return log_oom();
|
||||||
|
|
||||||
o = extend_options;
|
o = extend_options;
|
||||||
} else
|
}
|
||||||
o = p->options;
|
}
|
||||||
|
|
||||||
r = mount_verbose_full(priority, p->what, p->where, p->type, p->flags, o, FLAGS_SET(p->mode, MNT_FOLLOW_SYMLINK));
|
r = mount_verbose_full(priority, p->what, p->where, p->type, p->flags, o, FLAGS_SET(p->mode, MNT_FOLLOW_SYMLINK));
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user