Compare commits

...

8 Commits

Author SHA1 Message Date
Lennart Poettering 3094985efa
Merge 4b4af14a98 into 6fd3496cfd 2024-11-24 22:59:22 -08:00
Luca Boccassi 6fd3496cfd test: mask tmpfiles.d file shipped by selinux policy package in containers
This tmpfiles.d wants to write to sysfs, which is read-only in containers,
so systemd-tmpfiles --create fails in TEST-22-TMPFILES when ran in nspawn
if the selinux policy package is instealled. Mask it, as it's not our
config file, we don't need it in the test.
2024-11-25 15:25:55 +09:00
Daan De Meyer bb486fe9df mkosi: Use shared extra tree between initrd and main image
Let's share more between initrd and main system and use a shared
extra tree to achieve that.
2024-11-25 15:09:58 +09:00
Daan De Meyer 0e44a351ea mkosi: Make sure mkosi.clangd always runs on the host
If the editor that invokes mkosi.clangd is a flatpak, let's make sure
that mkosi is run on the host and not in the flatpak sandbox since it
won't be installed there.
2024-11-25 00:21:10 +01:00
Lennart Poettering 4b4af14a98 test-namespace: tweak log message a bit 2024-11-23 00:14:20 +01:00
Lennart Poettering a2429f507c virt: make use of ns inode check in running_in_userns() and running_in_cgroupns() too 2024-11-23 00:14:20 +01:00
Luca Boccassi 193bf42ab0 detect-virt: check the inode number of the pid namespace
The indoe number of root pid namespace is hardcoded in the kernel to
0xEFFFFFFC since 3.8, so check the inode number of our pid namespace
if all else fails. If it's not 0xEFFFFFFC then we are in a pid
namespace, hence a container environment.

Fixes https://github.com/systemd/systemd/issues/35249

[Reworked by Lennart, to make use of namespace_is_init()]
2024-11-23 00:14:20 +01:00
Lennart Poettering 18ead2b03d namespace-util: add generic namespace_is_init() call 2024-11-23 00:14:20 +01:00
16 changed files with 118 additions and 16 deletions

View File

@ -1,12 +1,18 @@
#!/bin/bash
# SPDX-License-Identifier: LGPL-2.1-or-later
MKOSI_CONFIG="$(mkosi --json summary | jq -r .Images[-1])"
if command -v flatpak-spawn >/dev/null; then
SPAWN=(flatpak-spawn --host)
else
SPAWN=()
fi
MKOSI_CONFIG="$("${SPAWN[@]}" --host mkosi --json summary | jq -r .Images[-1])"
DISTRIBUTION="$(jq -r .Distribution <<< "$MKOSI_CONFIG")"
RELEASE="$(jq -r .Release <<< "$MKOSI_CONFIG")"
ARCH="$(jq -r .Architecture <<< "$MKOSI_CONFIG")"
exec mkosi \
exec "${SPAWN[@]}" mkosi \
--incremental=strict \
--build-sources-ephemeral=no \
--format=none \

View File

@ -38,9 +38,8 @@ SignExpectedPcr=yes
[Content]
ExtraTrees=
mkosi.extra.common
mkosi.crt:/usr/lib/verity.d/mkosi.crt # sysext verification key
mkosi.leak-sanitizer-suppressions:/usr/lib/systemd/leak-sanitizer-suppressions
mkosi.coredump-journal-storage.conf:/usr/lib/systemd/coredump.conf.d/10-coredump-journal-storage.conf
%O/minimal-0.root-%a.raw:/usr/share/minimal_0.raw
%O/minimal-0.root-%a-verity.raw:/usr/share/minimal_0.verity
%O/minimal-0.root-%a-verity-sig.raw:/usr/share/minimal_0.verity.sig

View File

@ -6,9 +6,7 @@ Include=
%D/mkosi.sanitizers
[Content]
ExtraTrees=
%D/mkosi.leak-sanitizer-suppressions:/usr/lib/systemd/leak-sanitizer-suppressions
%D/mkosi.coredump-journal-storage.conf:/usr/lib/systemd/coredump.conf.d/10-coredump-journal-storage.conf
ExtraTrees=%D/mkosi.extra.common
Packages=
findutils

View File

@ -0,0 +1,12 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#pragma once
/* Root namespace inode numbers, as per include/linux/proc_ns.h in the kernel source tree, since v3.8:
* https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=98f842e675f96ffac96e6c50315790912b2812be */
#define PROC_IPC_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFF))
#define PROC_UTS_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFE))
#define PROC_USER_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFD))
#define PROC_PID_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFC))
#define PROC_CGROUP_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFB))
#define PROC_TIME_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFA))

View File

@ -12,6 +12,7 @@
#include "fileio.h"
#include "missing_fs.h"
#include "missing_magic.h"
#include "missing_namespace.h"
#include "missing_sched.h"
#include "missing_syscall.h"
#include "mountpoint-util.h"
@ -23,17 +24,17 @@
#include "user-util.h"
const struct namespace_info namespace_info[_NAMESPACE_TYPE_MAX + 1] = {
[NAMESPACE_CGROUP] = { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, },
[NAMESPACE_IPC] = { "ipc", "ns/ipc", CLONE_NEWIPC, },
[NAMESPACE_NET] = { "net", "ns/net", CLONE_NEWNET, },
[NAMESPACE_CGROUP] = { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, PROC_CGROUP_INIT_INO },
[NAMESPACE_IPC] = { "ipc", "ns/ipc", CLONE_NEWIPC, PROC_IPC_INIT_INO },
[NAMESPACE_NET] = { "net", "ns/net", CLONE_NEWNET, 0 },
/* So, the mount namespace flag is called CLONE_NEWNS for historical
* reasons. Let's expose it here under a more explanatory name: "mnt".
* This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */
[NAMESPACE_MOUNT] = { "mnt", "ns/mnt", CLONE_NEWNS, },
[NAMESPACE_PID] = { "pid", "ns/pid", CLONE_NEWPID, },
[NAMESPACE_USER] = { "user", "ns/user", CLONE_NEWUSER, },
[NAMESPACE_UTS] = { "uts", "ns/uts", CLONE_NEWUTS, },
[NAMESPACE_TIME] = { "time", "ns/time", CLONE_NEWTIME, },
[NAMESPACE_MOUNT] = { "mnt", "ns/mnt", CLONE_NEWNS, 0 },
[NAMESPACE_PID] = { "pid", "ns/pid", CLONE_NEWPID, PROC_PID_INIT_INO },
[NAMESPACE_USER] = { "user", "ns/user", CLONE_NEWUSER, PROC_USER_INIT_INO },
[NAMESPACE_UTS] = { "uts", "ns/uts", CLONE_NEWUTS, PROC_UTS_INIT_INO },
[NAMESPACE_TIME] = { "time", "ns/time", CLONE_NEWTIME, PROC_TIME_INIT_INO },
{ /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */ },
};
@ -479,6 +480,28 @@ int namespace_open_by_type(NamespaceType type) {
return fd;
}
int namespace_is_init(NamespaceType type) {
int r;
assert(type >= 0);
assert(type <= _NAMESPACE_TYPE_MAX);
if (namespace_info[type].root_inode == 0)
return -EBADR; /* Cannot answer this question */
const char *p = pid_namespace_path(0, type);
struct stat st;
r = RET_NERRNO(stat(p, &st));
if (r == -ENOENT)
/* If the /proc/ns/<type> API is not around in /proc/ then ns is off in the kernel and we are in the init ns */
return proc_mounted() == 0 ? -ENOSYS : true;
if (r < 0)
return r;
return st.st_ino == namespace_info[type].root_inode;
}
int is_our_namespace(int fd, NamespaceType request_type) {
int clone_flag;

View File

@ -24,6 +24,7 @@ extern const struct namespace_info {
const char *proc_name;
const char *proc_path;
unsigned int clone_flag;
ino_t root_inode;
} namespace_info[_NAMESPACE_TYPE_MAX + 1];
int pidref_namespace_open(
@ -74,6 +75,8 @@ int parse_userns_uid_range(const char *s, uid_t *ret_uid_shift, uid_t *ret_uid_r
int namespace_open_by_type(NamespaceType type);
int namespace_is_init(NamespaceType type);
int is_our_namespace(int fd, NamespaceType type);
int is_idmapping_supported(const char *path);

View File

@ -585,6 +585,14 @@ static int running_in_cgroupns(void) {
if (!cg_ns_supported())
return false;
r = namespace_is_init(NAMESPACE_CGROUP);
if (r < 0)
log_debug_errno(r, "Failed to test if in root cgroup namespace, ignoring: %m");
else if (r > 0)
return false;
// FIXME: We really should drop the heuristics below.
r = cg_all_unified();
if (r < 0)
return r;
@ -645,6 +653,16 @@ static int running_in_cgroupns(void) {
}
}
static int running_in_pidns(void) {
int r;
r = namespace_is_init(NAMESPACE_PID);
if (r < 0)
return log_debug_errno(r, "Failed to test if in root PID namespace, ignoring: %m");
return !r;
}
static Virtualization detect_container_files(void) {
static const struct {
const char *file_path;
@ -790,12 +808,21 @@ check_files:
r = running_in_cgroupns();
if (r > 0) {
log_debug("Running in a cgroup namespace, assuming unknown container manager.");
v = VIRTUALIZATION_CONTAINER_OTHER;
goto finish;
}
if (r < 0)
log_debug_errno(r, "Failed to detect cgroup namespace: %m");
/* Finally, the root pid namespace has an hardcoded inode number of 0xEFFFFFFC since kernel 3.8, so
* if all else fails we can check the inode number of our pid namespace and compare it. */
if (running_in_pidns() > 0) {
log_debug("Running in a pid namespace, assuming unknown container manager.");
v = VIRTUALIZATION_CONTAINER_OTHER;
goto finish;
}
/* If none of that worked, give up, assume no container manager. */
v = VIRTUALIZATION_NONE;
goto finish;
@ -863,6 +890,14 @@ int running_in_userns(void) {
_cleanup_free_ char *line = NULL;
int r;
r = namespace_is_init(NAMESPACE_USER);
if (r < 0)
log_debug_errno(r, "Failed to test if in root user namespace, ignoring: %m");
else if (r > 0)
return false;
// FIXME: We really should drop the heuristics below.
r = userns_has_mapping("/proc/self/uid_map");
if (r != 0)
return r;

View File

@ -213,9 +213,23 @@ TEST(idmapping_supported) {
assert_se(is_idmapping_supported("/etc") >= 0);
}
TEST(namespace_is_init) {
int r;
for (NamespaceType t = 0; t < _NAMESPACE_TYPE_MAX; t++) {
r = namespace_is_init(t);
if (r == -EBADR)
log_info_errno(r, "In root namespace of type '%s': don't know", namespace_info[t].proc_name);
else {
ASSERT_OK(r);
log_info("In root namespace of type '%s': %s", namespace_info[t].proc_name, yes_no(r));
}
}
}
static int intro(void) {
if (!have_namespaces())
return log_tests_skipped("Don't have namespace support");
return log_tests_skipped("Don't have namespace support or lacking privileges");
return EXIT_SUCCESS;
}

View File

@ -6,6 +6,14 @@ set -o pipefail
# shellcheck source=test/units/test-control.sh
. "$(dirname "$0")"/test-control.sh
if systemd-detect-virt --quiet --container; then
# This comes from the selinux package and tries to write
# some files under sysfs, which will be read-only in a container,
# so mask it. It's not our tmpfiles.d file anyway.
mkdir -p /run/tmpfiles.d/
ln -s /dev/null /run/tmpfiles.d/selinux-policy.conf
fi
run_subtests
touch /testok

View File

@ -5,3 +5,7 @@ set -o pipefail
SYSTEMD_IN_CHROOT=1 systemd-detect-virt --chroot
(! SYSTEMD_IN_CHROOT=0 systemd-detect-virt --chroot)
if ! systemd-detect-virt -c; then
unshare --mount-proc --fork --user --pid systemd-detect-virt --container
fi