Compare commits

...

6 Commits

Author SHA1 Message Date
Lennart Poettering c53da7ed02
Merge pull request #16678 from poettering/loop-configure
loop-util: use new LOOP_CONFIGURE ioctl added in kernel 5.8
2020-08-11 22:22:27 +02:00
Lennart Poettering 6b49257f6b man: fix incorrectly placed full stop 2020-08-11 19:36:04 +01:00
Lennart Poettering d8857af4d0 update TODO 2020-08-11 15:24:18 +02:00
Lennart Poettering 86c1c1f345 loop-util: use new LOOP_CONFIGURE ioctl
LOOP_CONFIGURE allows us to configure a loopback device in one ioctl
instead of two, which is not just faster but also removes the race that
udev might start probing the device before we adjusted things properly.

Unfortunately LOOP_CONFIGURE is broken in regards to LO_FLAGS_PARTSCAN
as of kernel 5.8.0. This patch contains a work-around for that, to
fallback to old behaviour if partition scanning is requested but does
not work. Sucks a bit.

Proposed upstream fix for that issue:

https://lkml.org/lkml/2020/8/6/97
2020-08-11 15:24:18 +02:00
Lennart Poettering 834c15ec38 dissect: use new blockdev_partscan_enabled() API where appropriate 2020-08-11 10:30:19 +02:00
Lennart Poettering e8467cd31c blockdev-util: add correct API for detecting if block device has partition scanning enabled
Instead of checking the loopback ioctls, let's check sysfs, so that we
catch all kinds of block devices, not just loopback block devices.
2020-08-11 10:29:43 +02:00
7 changed files with 134 additions and 38 deletions

3
TODO
View File

@ -20,9 +20,6 @@ Features:
* make use of the new statx mountid and rootmount fields in path_get_mnt_id() * make use of the new statx mountid and rootmount fields in path_get_mnt_id()
and fd_is_mount_point() and fd_is_mount_point()
* make use of new loopback setup ioctl to setup loopback devices in one atomic
ioctl
* nspawn: move "incoming mount" directory to /run/host, move "inaccessible" * nspawn: move "incoming mount" directory to /run/host, move "inaccessible"
nodes to /run/host, move notify socket (for sd_notify() between payload and nodes to /run/host, move notify socket (for sd_notify() between payload and
container manager) container manager)

View File

@ -217,7 +217,7 @@
this notification message has been sent. If this option is used, <varname>NotifyAccess=</varname> (see this notification message has been sent. If this option is used, <varname>NotifyAccess=</varname> (see
below) should be set to open access to the notification socket provided by systemd. If below) should be set to open access to the notification socket provided by systemd. If
<varname>NotifyAccess=</varname> is missing or set to <option>none</option>, it will be forcibly set to <varname>NotifyAccess=</varname> is missing or set to <option>none</option>, it will be forcibly set to
<option>main</option></para></listitem>. <option>main</option>.</para></listitem>
<listitem><para>Behavior of <option>idle</option> is very similar to <option>simple</option>; however, <listitem><para>Behavior of <option>idle</option> is very similar to <option>simple</option>; however,
actual execution of the service program is delayed until all active jobs are dispatched. This may be used actual execution of the service program is delayed until all active jobs are dispatched. This may be used

View File

@ -214,3 +214,40 @@ int lock_whole_block_device(dev_t devt, int operation) {
return TAKE_FD(lock_fd); return TAKE_FD(lock_fd);
} }
int blockdev_partscan_enabled(int fd) {
_cleanup_free_ char *p = NULL, *buf = NULL;
unsigned long long ull;
struct stat st;
int r;
/* Checks if partition scanning is correctly enabled on the block device */
if (fstat(fd, &st) < 0)
return -errno;
if (!S_ISBLK(st.st_mode))
return -ENOTBLK;
if (asprintf(&p, "/sys/dev/block/%u:%u/capability", major(st.st_rdev), minor(st.st_rdev)) < 0)
return -ENOMEM;
r = read_one_line_file(p, &buf);
if (r == -ENOENT) /* If the capability file doesn't exist then we are most likely looking at a
* partition block device, not the whole block device. And that means we have no
* partition scanning on for it (we do for its parent, but not for the partition
* itself). */
return false;
if (r < 0)
return r;
r = safe_atollu_full(buf, 16, &ull);
if (r < 0)
return r;
#ifndef GENHD_FL_NO_PART_SCAN
#define GENHD_FL_NO_PART_SCAN (0x0200)
#endif
return !FLAGS_SET(ull, GENHD_FL_NO_PART_SCAN);
}

View File

@ -20,3 +20,5 @@ int get_block_device(const char *path, dev_t *dev);
int get_block_device_harder(const char *path, dev_t *dev); int get_block_device_harder(const char *path, dev_t *dev);
int lock_whole_block_device(dev_t devt, int operation); int lock_whole_block_device(dev_t devt, int operation);
int blockdev_partscan_enabled(int fd);

15
src/basic/missing_loop.h Normal file
View File

@ -0,0 +1,15 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#pragma once
#include <linux/loop.h>
#ifndef LOOP_CONFIGURE
struct loop_config {
__u32 fd;
__u32 block_size;
struct loop_info64 info;
__u64 __reserved[8];
};
#define LOOP_CONFIGURE 0x4C0A
#endif

View File

@ -215,22 +215,18 @@ static int wait_for_partitions_to_appear(
break; break;
r = -errno; r = -errno;
if (r == -EINVAL) { if (r == -EINVAL) {
struct loop_info64 info; /* If we are running on a block device that has partition scanning off, return an
* explicit recognizable error about this, so that callers can generate a proper
* message explaining the situation. */
/* If we are running on a loop device that has partition scanning off, return r = blockdev_partscan_enabled(fd);
* an explicit recognizable error about this, so that callers can generate a if (r < 0)
* proper message explaining the situation. */ return r;
if (r == 0)
if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0) {
#if HAVE_VALGRIND_MEMCHECK_H
/* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
#endif
if ((info.lo_flags & LO_FLAGS_PARTSCAN) == 0)
return log_debug_errno(EPROTONOSUPPORT, return log_debug_errno(EPROTONOSUPPORT,
"Device is a loop device and partition scanning is off!"); "Device is a loop device and partition scanning is off!");
}
return -EINVAL; /* original error */
} }
if (r != -EBUSY) if (r != -EBUSY)
return r; return r;

View File

@ -14,20 +14,68 @@
#include <unistd.h> #include <unistd.h>
#include "alloc-util.h" #include "alloc-util.h"
#include "blockdev-util.h"
#include "errno-util.h" #include "errno-util.h"
#include "fd-util.h" #include "fd-util.h"
#include "fileio.h" #include "fileio.h"
#include "loop-util.h" #include "loop-util.h"
#include "missing_loop.h"
#include "parse-util.h" #include "parse-util.h"
#include "stat-util.h" #include "stat-util.h"
#include "stdio-util.h" #include "stdio-util.h"
#include "string-util.h" #include "string-util.h"
static void cleanup_clear_loop_close(int *fd) { static void cleanup_clear_loop_close(int *fd) {
if (*fd >= 0) { if (*fd < 0)
return;
(void) ioctl(*fd, LOOP_CLR_FD); (void) ioctl(*fd, LOOP_CLR_FD);
(void) safe_close(*fd); (void) safe_close(*fd);
} }
static int loop_configure(int fd, const struct loop_config *c) {
int r;
assert(fd >= 0);
assert(c);
if (ioctl(fd, LOOP_CONFIGURE, c) < 0) {
/* Do fallback only if LOOP_CONFIGURE is not supported, propagate all other errors. Note that
* the kernel is weird: non-existing ioctls currently return EINVAL rather than ENOTTY on
* loopback block devices. They should fix that in the kernel, but in the meantime we accept
* both here. */
if (!ERRNO_IS_NOT_SUPPORTED(errno) && errno != EINVAL)
return -errno;
} else {
if (!FLAGS_SET(c->info.lo_flags, LO_FLAGS_PARTSCAN))
return 0;
/* Kernel 5.8 vanilla doesn't properly propagate the partition scanning flag into the
* block device. Let's hence verify if things work correctly here before returning. */
r = blockdev_partscan_enabled(fd);
if (r < 0)
goto fail;
if (r > 0)
return 0; /* All is good. */
/* Otherwise, undo the attachment and use the old APIs */
(void) ioctl(fd, LOOP_CLR_FD);
}
if (ioctl(fd, LOOP_SET_FD, c->fd) < 0)
return -errno;
if (ioctl(fd, LOOP_SET_STATUS64, &c->info) < 0) {
r = -errno;
goto fail;
}
return 0;
fail:
(void) ioctl(fd, LOOP_CLR_FD);
return r;
} }
int loop_device_make( int loop_device_make(
@ -39,7 +87,7 @@ int loop_device_make(
LoopDevice **ret) { LoopDevice **ret) {
_cleanup_free_ char *loopdev = NULL; _cleanup_free_ char *loopdev = NULL;
struct loop_info64 info; struct loop_config config;
LoopDevice *d = NULL; LoopDevice *d = NULL;
struct stat st; struct stat st;
int nr = -1, r; int nr = -1, r;
@ -52,14 +100,14 @@ int loop_device_make(
return -errno; return -errno;
if (S_ISBLK(st.st_mode)) { if (S_ISBLK(st.st_mode)) {
if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0) { if (ioctl(fd, LOOP_GET_STATUS64, &config.info) >= 0) {
/* Oh! This is a loopback device? That's interesting! */ /* Oh! This is a loopback device? That's interesting! */
#if HAVE_VALGRIND_MEMCHECK_H #if HAVE_VALGRIND_MEMCHECK_H
/* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */ /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info)); VALGRIND_MAKE_MEM_DEFINED(&config.info, sizeof(config.info));
#endif #endif
nr = info.lo_number; nr = config.info.lo_number;
if (asprintf(&loopdev, "/dev/loop%i", nr) < 0) if (asprintf(&loopdev, "/dev/loop%i", nr) < 0)
return -ENOMEM; return -ENOMEM;
@ -100,6 +148,16 @@ int loop_device_make(
if (control < 0) if (control < 0)
return -errno; return -errno;
config = (struct loop_config) {
.fd = fd,
.info = {
/* Use the specified flags, but configure the read-only flag from the open flags, and force autoclear */
.lo_flags = (loop_flags & ~LO_FLAGS_READ_ONLY) | ((loop_flags & O_ACCMODE) == O_RDONLY ? LO_FLAGS_READ_ONLY : 0) | LO_FLAGS_AUTOCLEAR,
.lo_offset = offset,
.lo_sizelimit = size == UINT64_MAX ? 0 : size,
},
};
/* Loop around LOOP_CTL_GET_FREE, since at the moment we attempt to open the returned device it might /* Loop around LOOP_CTL_GET_FREE, since at the moment we attempt to open the returned device it might
* be gone already, taken by somebody else racing against us. */ * be gone already, taken by somebody else racing against us. */
for (unsigned n_attempts = 0;;) { for (unsigned n_attempts = 0;;) {
@ -119,12 +177,13 @@ int loop_device_make(
if (errno != ENOENT) if (errno != ENOENT)
return -errno; return -errno;
} else { } else {
if (ioctl(loop, LOOP_SET_FD, fd) >= 0) { r = loop_configure(loop, &config);
if (r >= 0) {
loop_with_fd = TAKE_FD(loop); loop_with_fd = TAKE_FD(loop);
break; break;
} }
if (errno != EBUSY) if (r != -EBUSY)
return -errno; return r;
} }
if (++n_attempts >= 64) /* Give up eventually */ if (++n_attempts >= 64) /* Give up eventually */
@ -133,16 +192,6 @@ int loop_device_make(
loopdev = mfree(loopdev); loopdev = mfree(loopdev);
} }
info = (struct loop_info64) {
/* Use the specified flags, but configure the read-only flag from the open flags, and force autoclear */
.lo_flags = (loop_flags & ~LO_FLAGS_READ_ONLY) | ((loop_flags & O_ACCMODE) == O_RDONLY ? LO_FLAGS_READ_ONLY : 0) | LO_FLAGS_AUTOCLEAR,
.lo_offset = offset,
.lo_sizelimit = size == UINT64_MAX ? 0 : size,
};
if (ioctl(loop_with_fd, LOOP_SET_STATUS64, &info) < 0)
return -errno;
d = new(LoopDevice, 1); d = new(LoopDevice, 1);
if (!d) if (!d)
return -ENOMEM; return -ENOMEM;