1
0
mirror of https://github.com/systemd/systemd synced 2025-11-18 08:14:46 +01:00

Compare commits

...

5 Commits

Author SHA1 Message Date
Lennart Poettering
a6146b250e
machined: support image clone/rm operations unpriv, and make hidden images always read-only (#39408) 2025-10-27 17:06:26 +01:00
Ronan Pigott
6f8e6e50a0 zsh: add completion for dbus bus address
The DBUS_SESSION_BUS_ADDRESS and DBUS_SYSTEM_BUS_ADDRESS parameters have
an interesting syntax thats useful to complete. Let's include a
completion definition for these parameters.
2025-10-27 12:41:42 +00:00
Lennart Poettering
9d5fc235f4 discover-image: support clone + rm operation also unpriv 2025-10-24 17:51:00 +02:00
Lennart Poettering
ee327e086e discover-image: imply that hidden images are read-only
Marking a whole directory tree OS image as read-only is difficult
privilege-wise, because so far we rely on the FS_IMMUTABLE_FL which is
not accessible to unpriv clients.

One fundamental place where we currently rely on marking images
read-only is for keeping pristine copies of the originally downloaded
image around, which we place in "hidden" image directories. This is
probably the most relevant usecase for the read-only flag. And moreover,
the only usecase for the hidden images are these read-only pristine
copies.

Hence, let's make this work reasonably in the unpriv case, and simply
imply the read-only flag for hidden images. This is strictly speaking a
change in behaviour, but effectively it shouldn't be, because for nspawn
containers that are executed we insist on names that are hostname
compatible, and hidden names aren't (because they start with a dot).
2025-10-24 17:51:00 +02:00
Lennart Poettering
502f7a2b80 rm-rf: make sure we can safely remove dirs we have no access to via rm_rf_at()
Previously, we'd first empty a dir, and then remove it. This works fine
as long as we have access to a dir. But in some cases (like for example
a foreign owned container tree) we might not have access to the dir, but
are still able to remove it (because it is empty, and in a dir we own).
Hence let's try that first. If it works, we do not need to enter the dir
(and thus fail).
2025-10-24 17:38:29 +02:00
13 changed files with 283 additions and 29 deletions

View File

@ -0,0 +1,50 @@
#compdef -value-,DBUS_SESSION_BUS_ADDRESS,-default- -value-,DBUS_SYSTEM_BUS_ADDRESS,-default-
local context state state_descr line
typeset -A val_args
_values -S: kind \
'unix[a unix domain socket]:unix properties:->unix' \
'tcp[a tcp socket]:tcp properties:->tcp' \
'unixexec[a process]:unixexec properties:->unixexec' \
'x-machine-unix[a container]:machine properties:->x-machine-unix'
_sd_bus_get_guid() {
local TYPE VALUE
local -a busname=(org.freedesktop.DBus /org/freedesktop/DBus org.freedesktop.DBus)
_call_program -l dbus-guid busctl call $busname GetId |
while read TYPE VALUE; do
[[ $TYPE == 's' && -n $VALUE ]] && compadd "$@" - ${(Q)VALUE};
done
}
local expl ret
case $context in
unix)
_values -s, 'unix socket properties' \
'guid[bus guid]:bus guid:_sd_bus_get_guid -qS,' \
'(abstract)path[unix domain socket path]:path:_files -r, -g "*(=)"' \
'(path)abstract[unix domain socket path in the abstract namespace]:abstract path:_files -r, -P@ -g "*(=)"' \
'uid[unix uid]:uid:_numbers' \
'gid[unix gid]:gid:_numbers'
;;
tcp)
_values -s, 'unix socket properties' \
'guid[bus guid]:bus guid:_sd_bus_get_guid -qS,' \
'host[hostname]:hostname:_hosts -r,'\
'port[port]:port:_numbers' \
'family[address family]:address family:(ipv4 ipv6)'
;;
unixexec)
_values -s, 'unixexec properties' \
'guid[bus guid]:bus guid:_sd_bus_get_guid -qS,' \
'path[command path]:command path:_absolute_command_paths -r,'\
;;
x-machine-unix)
_values -s, 'machine properties' \
'guid[bus guid]:bus guid:_sd_bus_get_guid -qS,' \
'(pid)machine[machine]:machine:_call_function ret _sd_machines' \
'(machine)pid[pid]:pid:_pids'
;;
esac

View File

@ -24,6 +24,7 @@ items = [['_busctl', ''],
['_udevadm', ''],
['_varlinkctl', ''],
['_kernel-install', 'ENABLE_KERNEL_INSTALL'],
['_sd_bus_address', ''],
['_sd_hosts_or_user_at_host', ''],
['_sd_outputmodes', ''],
['_sd_unit_files', ''],

View File

@ -2006,8 +2006,8 @@ static int action_discover(void) {
TABLE_SET_COLOR, startswith(img->name, ".") ? ANSI_GREY : NULL,
TABLE_STRING, image_type_to_string(img->type),
TABLE_STRING, image_class_to_string(img->class),
TABLE_BOOLEAN, img->read_only,
TABLE_SET_COLOR, !img->read_only ? ANSI_HIGHLIGHT_GREEN : ANSI_HIGHLIGHT_RED,
TABLE_BOOLEAN, image_is_read_only(img),
TABLE_SET_COLOR, image_is_read_only(img) ? ANSI_HIGHLIGHT_RED : ANSI_HIGHLIGHT_GREEN,
TABLE_PATH, img->path,
TABLE_TIMESTAMP, img->mtime != 0 ? img->mtime : img->crtime,
TABLE_SIZE, img->usage,

View File

@ -1365,7 +1365,7 @@ static int method_list_images(sd_bus_message *msg, void *userdata, sd_bus_error
i->name,
image_type_to_string(i->type),
i->path,
i->read_only,
image_is_read_only(i),
i->crtime,
i->mtime,
i->usage,

View File

@ -24,6 +24,20 @@
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_type, image_type, ImageType);
static int property_get_read_only(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
Image *image = ASSERT_PTR(userdata);
return sd_bus_message_append(ASSERT_PTR(reply), "b", image_is_read_only(image));
}
int bus_image_method_remove(
sd_bus_message *message,
void *userdata,
@ -448,7 +462,7 @@ const sd_bus_vtable image_vtable[] = {
SD_BUS_PROPERTY("Name", "s", NULL, offsetof(Image, name), 0),
SD_BUS_PROPERTY("Path", "s", NULL, offsetof(Image, path), 0),
SD_BUS_PROPERTY("Type", "s", property_get_type, offsetof(Image, type), 0),
SD_BUS_PROPERTY("ReadOnly", "b", bus_property_get_bool, offsetof(Image, read_only), 0),
SD_BUS_PROPERTY("ReadOnly", "b", property_get_read_only, 0, 0),
SD_BUS_PROPERTY("CreationTimestamp", "t", NULL, offsetof(Image, crtime), 0),
SD_BUS_PROPERTY("ModificationTimestamp", "t", NULL, offsetof(Image, mtime), 0),
SD_BUS_PROPERTY("Usage", "t", NULL, offsetof(Image, usage), 0),

View File

@ -823,7 +823,7 @@ static int method_list_images(sd_bus_message *message, void *userdata, sd_bus_er
r = sd_bus_message_append(reply, "(ssbttto)",
image->name,
image_type_to_string(image->type),
image->read_only,
image_is_read_only(image),
image->crtime,
image->mtime,
image->usage,

View File

@ -638,7 +638,7 @@ static int list_image_one_and_maybe_read_metadata(Manager *m, sd_varlink *link,
JSON_BUILD_PAIR_STRING_NON_EMPTY("path", image->path),
SD_JSON_BUILD_PAIR_STRING("type", image_type_to_string(image->type)),
SD_JSON_BUILD_PAIR_STRING("class", image_class_to_string(image->class)),
SD_JSON_BUILD_PAIR_BOOLEAN("readOnly", image->read_only),
SD_JSON_BUILD_PAIR_BOOLEAN("readOnly", image_is_read_only(image)),
JSON_BUILD_PAIR_UNSIGNED_NON_ZERO("creationTimestamp", image->crtime),
JSON_BUILD_PAIR_UNSIGNED_NON_ZERO("modificationTimestamp", image->mtime),
JSON_BUILD_PAIR_UNSIGNED_NOT_EQUAL("usage", image->usage, UINT64_MAX),

View File

@ -3077,7 +3077,7 @@ static int determine_names(void) {
return log_oom();
if (!arg_ephemeral)
arg_read_only = arg_read_only || i->read_only;
arg_read_only = arg_read_only || image_is_read_only(i);
} else {
r = safe_getcwd(&arg_directory);
if (r < 0)

View File

@ -177,7 +177,7 @@ static int method_list_images(sd_bus_message *message, void *userdata, sd_bus_er
r = sd_bus_message_append(reply, "(ssbtttso)",
image->name,
image_type_to_string(image->type),
image->read_only,
image_is_read_only(image),
image->crtime,
image->mtime,
image->usage,

View File

@ -30,6 +30,20 @@
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_type, image_type, ImageType);
static int property_get_read_only(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
Image *image = ASSERT_PTR(userdata);
return sd_bus_message_append(ASSERT_PTR(reply), "b", image_is_read_only(image));
}
int bus_image_common_get_os_release(
Manager *m,
sd_bus_message *message,
@ -865,7 +879,7 @@ const sd_bus_vtable image_vtable[] = {
SD_BUS_PROPERTY("Name", "s", NULL, offsetof(Image, name), 0),
SD_BUS_PROPERTY("Path", "s", NULL, offsetof(Image, path), 0),
SD_BUS_PROPERTY("Type", "s", property_get_type, offsetof(Image, type), 0),
SD_BUS_PROPERTY("ReadOnly", "b", bus_property_get_bool, offsetof(Image, read_only), 0),
SD_BUS_PROPERTY("ReadOnly", "b", property_get_read_only, 0, 0),
SD_BUS_PROPERTY("CreationTimestamp", "t", NULL, offsetof(Image, crtime), 0),
SD_BUS_PROPERTY("ModificationTimestamp", "t", NULL, offsetof(Image, mtime), 0),
SD_BUS_PROPERTY("Usage", "t", NULL, offsetof(Image, usage), 0),

View File

@ -36,10 +36,13 @@
#include "log.h"
#include "loop-util.h"
#include "mkdir.h"
#include "namespace-util.h"
#include "nsresource.h"
#include "nulstr-util.h"
#include "os-util.h"
#include "path-lookup.h"
#include "path-util.h"
#include "process-util.h"
#include "rm-rf.h"
#include "runtime-scope.h"
#include "stat-util.h"
@ -47,6 +50,7 @@
#include "string-util.h"
#include "strv.h"
#include "time-util.h"
#include "uid-classification.h"
#include "vpick.h"
#include "xattr-util.h"
@ -480,6 +484,7 @@ static int image_make(
if (r < 0)
return r;
(*ret)->foreign_uid_owned = uid_is_foreign(st->st_uid);
(void) image_update_quota(*ret, fd);
return 0;
}
@ -505,6 +510,7 @@ static int image_make(
if (r < 0)
return r;
(*ret)->foreign_uid_owned = uid_is_foreign(st->st_uid);
return 0;
} else if (S_ISREG(st->st_mode) && endswith(filename, ".raw")) {
@ -1131,6 +1137,64 @@ int image_discover(
return 0;
}
static int unprivileged_remove(Image *i) {
int r;
assert(i);
_cleanup_close_ int userns_fd = nsresource_allocate_userns(/* name= */ NULL, /* size= */ NSRESOURCE_UIDS_64K);
if (userns_fd < 0)
return log_debug_errno(userns_fd, "Failed to allocate transient user namespace: %m");
_cleanup_close_ int tree_fd = -EBADF;
r = mountfsd_mount_directory(
i->path,
userns_fd,
DISSECT_IMAGE_FOREIGN_UID,
&tree_fd);
if (r < 0)
return r;
/* Fork off child that moves into userns and does the copying */
r = safe_fork_full(
"rm-tree",
/* stdio_fds= */ NULL,
(int[]) { userns_fd, tree_fd, }, 2,
FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_REOPEN_LOG,
/* ret_pid= */ NULL);
if (r < 0)
return log_debug_errno(r, "Process that was supposed to remove tree failed: %m");
if (r == 0) {
/* child */
r = namespace_enter(
/* pidns_fd= */ -EBADF,
/* mntns_fd= */ -EBADF,
/* netns_fd= */ -EBADF,
userns_fd,
/* root_fd= */ -EBADF);
if (r < 0) {
log_debug_errno(r, "Failed to join user namespace: %m");
_exit(EXIT_FAILURE);
}
_cleanup_close_ int dfd = fd_reopen(tree_fd, O_DIRECTORY|O_CLOEXEC);
if (dfd < 0) {
log_error_errno(r, "Failed to reopen tree fd: %m");
_exit(EXIT_FAILURE);
}
r = rm_rf_children(dfd, REMOVE_PHYSICAL|REMOVE_SUBVOLUME|REMOVE_CHMOD, /* root_dev= */ NULL);
if (r < 0) {
log_error_errno(r, "Failed to empty '%s' directory in foreign UID mode: %m", i->path);
_exit(EXIT_FAILURE);
}
_exit(EXIT_SUCCESS);
}
return 0;
}
int image_remove(Image *i, RuntimeScope scope) {
_cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
_cleanup_strv_free_ char **settings = NULL;
@ -1172,6 +1236,11 @@ int image_remove(Image *i, RuntimeScope scope) {
case IMAGE_DIRECTORY:
/* Allow deletion of read-only directories */
(void) chattr_path(i->path, 0, FS_IMMUTABLE_FL);
/* If this is foreign owned, try an unprivileged remove first, but accept if that doesn't work, and do it directly either way, maybe it works */
if (i->foreign_uid_owned)
(void) unprivileged_remove(i);
r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
if (r < 0)
return r;
@ -1398,6 +1467,87 @@ static int get_pool_directory(
return 0;
}
static int unpriviled_clone(Image *i, const char *new_path) {
int r;
assert(i);
assert(new_path);
_cleanup_close_ int userns_fd = nsresource_allocate_userns(/* name= */ NULL, /* size= */ NSRESOURCE_UIDS_64K);
if (userns_fd < 0)
return log_debug_errno(userns_fd, "Failed to allocate transient user namespace: %m");
/* Map original image */
_cleanup_close_ int tree_fd = -EBADF;
r = mountfsd_mount_directory(
i->path,
userns_fd,
DISSECT_IMAGE_FOREIGN_UID,
&tree_fd);
if (r < 0)
return r;
/* Make new image */
_cleanup_close_ int new_fd = -EBADF;
r = mountfsd_make_directory(
new_path,
/* flags= */ 0,
&new_fd);
if (r < 0)
return 0;
/* Mount new image */
_cleanup_close_ int target_fd = -EBADF;
r = mountfsd_mount_directory_fd(
new_fd,
userns_fd,
DISSECT_IMAGE_FOREIGN_UID,
&target_fd);
if (r < 0)
return r;
/* Fork off child that moves into userns and does the copying */
r = safe_fork_full(
"clone-tree",
/* stdio_fds= */ NULL,
(int[]) { userns_fd, tree_fd, target_fd }, 3,
FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_REOPEN_LOG,
/* ret_pid= */ NULL);
if (r < 0)
return log_debug_errno(r, "Process that was supposed to clone tree failed: %m");
if (r == 0) {
/* child */
r = namespace_enter(
/* pidns_fd= */ -EBADF,
/* mntns_fd= */ -EBADF,
/* netns_fd= */ -EBADF,
userns_fd,
/* root_fd= */ -EBADF);
if (r < 0) {
log_debug_errno(r, "Failed to join user namespace: %m");
_exit(EXIT_FAILURE);
}
r = copy_tree_at(
tree_fd, /* from= */ NULL,
target_fd, /* to= */ NULL,
/* override_uid= */ UID_INVALID,
/* override_gid= */ GID_INVALID,
COPY_REFLINK|COPY_HARDLINKS|COPY_MERGE_EMPTY|COPY_MERGE_APPLY_STAT|COPY_SAME_MOUNT|COPY_ALL_XATTRS,
/* denylist= */ NULL,
/* subvolumes= */ NULL);
if (r < 0) {
log_debug_errno(r, "Failed to copy clone tree: %m");
_exit(EXIT_FAILURE);
}
_exit(EXIT_SUCCESS);
}
return 0;
}
int image_clone(Image *i, const char *new_name, bool read_only, RuntimeScope scope) {
_cleanup_(release_lock_file) LockFile name_lock = LOCK_FILE_INIT;
_cleanup_strv_free_ char **settings = NULL;
@ -1442,16 +1592,22 @@ int image_clone(Image *i, const char *new_name, bool read_only, RuntimeScope sco
if (r < 0)
return r;
r = btrfs_subvol_snapshot_at(AT_FDCWD, i->path, AT_FDCWD, new_path,
(read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
BTRFS_SNAPSHOT_FALLBACK_COPY |
BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE |
BTRFS_SNAPSHOT_RECURSIVE |
BTRFS_SNAPSHOT_QUOTA);
if (r >= 0)
/* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */
(void) btrfs_subvol_auto_qgroup(new_path, 0, true);
if (i->foreign_uid_owned)
r = unpriviled_clone(i, new_path);
else {
r = btrfs_subvol_snapshot_at(
AT_FDCWD, i->path,
AT_FDCWD, new_path,
(read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
BTRFS_SNAPSHOT_FALLBACK_COPY |
BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE |
BTRFS_SNAPSHOT_RECURSIVE |
BTRFS_SNAPSHOT_QUOTA);
if (r >= 0)
/* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */
(void) btrfs_subvol_auto_qgroup(new_path, /* subvol_id= */ 0, /* create_intermediary_qgroup= */ true);
}
break;
}
@ -1493,7 +1649,7 @@ int image_read_only(Image *i, bool b, RuntimeScope scope) {
assert(i);
if (image_is_vendor(i) || image_is_host(i))
if (image_is_vendor(i) || image_is_host(i) || image_is_hidden(i))
return -EROFS;
/* Make sure we don't interfere with a running nspawn */
@ -2005,7 +2161,7 @@ int image_to_json(const struct Image *img, sd_json_variant **ret) {
SD_JSON_BUILD_PAIR_STRING("Class", image_class_to_string(img->class)),
SD_JSON_BUILD_PAIR_STRING("Name", img->name),
SD_JSON_BUILD_PAIR_CONDITION(!!img->path, "Path", SD_JSON_BUILD_STRING(img->path)),
SD_JSON_BUILD_PAIR_BOOLEAN("ReadOnly", img->read_only),
SD_JSON_BUILD_PAIR_BOOLEAN("ReadOnly", image_is_read_only(img)),
SD_JSON_BUILD_PAIR_CONDITION(img->crtime != 0, "CreationTimestamp", SD_JSON_BUILD_UNSIGNED(img->crtime)),
SD_JSON_BUILD_PAIR_CONDITION(img->mtime != 0, "ModificationTimestamp", SD_JSON_BUILD_UNSIGNED(img->mtime)),
SD_JSON_BUILD_PAIR_CONDITION(img->usage != UINT64_MAX, "Usage", SD_JSON_BUILD_UNSIGNED(img->usage)),

View File

@ -39,8 +39,9 @@ typedef struct Image {
char **sysext_release;
char **confext_release;
bool metadata_valid:1;
bool discoverable:1; /* true if we know for sure that image_find() would find the image given just the short name */
bool metadata_valid:1; /* true if the above 6 metadata fields have been read from the image */
bool discoverable:1; /* true if we know for sure that image_find() would find the image given just the short name */
bool foreign_uid_owned:1; /* true if this is of type IMAGE_DIRECTORY/IMAGE_SUBVOLUME and owned by foreign UID range */
void *userdata;
} Image;
@ -84,16 +85,28 @@ static inline char** image_extension_release(Image *image, ImageClass class) {
return NULL;
}
static inline bool image_is_hidden(const struct Image *i) {
static inline bool image_is_hidden(const Image *i) {
assert(i);
return i->name && i->name[0] == '.';
}
bool image_is_vendor(const struct Image *i);
bool image_is_host(const struct Image *i);
static inline int image_is_read_only(const Image *i) {
assert(i);
int image_to_json(const struct Image *i, sd_json_variant **ret);
/* We enforce the rule that hidden images are always read-only too. If people want to change hidden
* images they should make a copy first, and make that one mutable */
if (image_is_hidden(i))
return true;
return i->read_only;
}
bool image_is_vendor(const Image *i);
bool image_is_host(const Image *i);
int image_to_json(const Image *i, sd_json_variant **ret);
int image_root_pick(RuntimeScope scope, ImageClass c, bool runtime, char **ret);

View File

@ -449,12 +449,18 @@ int rm_rf_at(int dir_fd, const char *path, RemoveFlags flags) {
if (FLAGS_SET(flags, REMOVE_MISSING_OK) && r == -ENOENT)
return 0;
if (!IN_SET(r, -ENOTTY, -EINVAL, -ENOTDIR))
if (!IN_SET(r, -ENOTTY, -EINVAL, -ENOTDIR, -EPERM, -EACCES))
return r;
/* Not btrfs or not a subvolume */
/* Not btrfs or not a subvolume, or permissions are not available (but might if we go via unlinkat()) */
}
/* In the next step we'll try to open the directory in order to enumerate its contents. This might
* not work due to perms, but we might still be able to delete it, hence let's try that first. */
if (FLAGS_SET(flags, REMOVE_ROOT | REMOVE_PHYSICAL))
if (unlinkat(dir_fd, path, AT_REMOVEDIR) >= 0)
return 0;
fd = openat_harder(dir_fd, path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME, flags, &old_mode);
if (fd >= 0) {
/* We have a dir */