Compare commits

...

4 Commits

Author SHA1 Message Date
Lennart Poettering 4b4af14a98 test-namespace: tweak log message a bit 2024-11-23 00:14:20 +01:00
Lennart Poettering a2429f507c virt: make use of ns inode check in running_in_userns() and running_in_cgroupns() too 2024-11-23 00:14:20 +01:00
Luca Boccassi 193bf42ab0 detect-virt: check the inode number of the pid namespace
The indoe number of root pid namespace is hardcoded in the kernel to
0xEFFFFFFC since 3.8, so check the inode number of our pid namespace
if all else fails. If it's not 0xEFFFFFFC then we are in a pid
namespace, hence a container environment.

Fixes https://github.com/systemd/systemd/issues/35249

[Reworked by Lennart, to make use of namespace_is_init()]
2024-11-23 00:14:20 +01:00
Lennart Poettering 18ead2b03d namespace-util: add generic namespace_is_init() call 2024-11-23 00:14:20 +01:00
6 changed files with 100 additions and 9 deletions

View File

@ -0,0 +1,12 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#pragma once
/* Root namespace inode numbers, as per include/linux/proc_ns.h in the kernel source tree, since v3.8:
* https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=98f842e675f96ffac96e6c50315790912b2812be */
#define PROC_IPC_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFF))
#define PROC_UTS_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFE))
#define PROC_USER_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFD))
#define PROC_PID_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFC))
#define PROC_CGROUP_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFB))
#define PROC_TIME_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFA))

View File

@ -12,6 +12,7 @@
#include "fileio.h"
#include "missing_fs.h"
#include "missing_magic.h"
#include "missing_namespace.h"
#include "missing_sched.h"
#include "missing_syscall.h"
#include "mountpoint-util.h"
@ -23,17 +24,17 @@
#include "user-util.h"
const struct namespace_info namespace_info[_NAMESPACE_TYPE_MAX + 1] = {
[NAMESPACE_CGROUP] = { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, },
[NAMESPACE_IPC] = { "ipc", "ns/ipc", CLONE_NEWIPC, },
[NAMESPACE_NET] = { "net", "ns/net", CLONE_NEWNET, },
[NAMESPACE_CGROUP] = { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, PROC_CGROUP_INIT_INO },
[NAMESPACE_IPC] = { "ipc", "ns/ipc", CLONE_NEWIPC, PROC_IPC_INIT_INO },
[NAMESPACE_NET] = { "net", "ns/net", CLONE_NEWNET, 0 },
/* So, the mount namespace flag is called CLONE_NEWNS for historical
* reasons. Let's expose it here under a more explanatory name: "mnt".
* This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */
[NAMESPACE_MOUNT] = { "mnt", "ns/mnt", CLONE_NEWNS, },
[NAMESPACE_PID] = { "pid", "ns/pid", CLONE_NEWPID, },
[NAMESPACE_USER] = { "user", "ns/user", CLONE_NEWUSER, },
[NAMESPACE_UTS] = { "uts", "ns/uts", CLONE_NEWUTS, },
[NAMESPACE_TIME] = { "time", "ns/time", CLONE_NEWTIME, },
[NAMESPACE_MOUNT] = { "mnt", "ns/mnt", CLONE_NEWNS, 0 },
[NAMESPACE_PID] = { "pid", "ns/pid", CLONE_NEWPID, PROC_PID_INIT_INO },
[NAMESPACE_USER] = { "user", "ns/user", CLONE_NEWUSER, PROC_USER_INIT_INO },
[NAMESPACE_UTS] = { "uts", "ns/uts", CLONE_NEWUTS, PROC_UTS_INIT_INO },
[NAMESPACE_TIME] = { "time", "ns/time", CLONE_NEWTIME, PROC_TIME_INIT_INO },
{ /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */ },
};
@ -479,6 +480,28 @@ int namespace_open_by_type(NamespaceType type) {
return fd;
}
int namespace_is_init(NamespaceType type) {
int r;
assert(type >= 0);
assert(type <= _NAMESPACE_TYPE_MAX);
if (namespace_info[type].root_inode == 0)
return -EBADR; /* Cannot answer this question */
const char *p = pid_namespace_path(0, type);
struct stat st;
r = RET_NERRNO(stat(p, &st));
if (r == -ENOENT)
/* If the /proc/ns/<type> API is not around in /proc/ then ns is off in the kernel and we are in the init ns */
return proc_mounted() == 0 ? -ENOSYS : true;
if (r < 0)
return r;
return st.st_ino == namespace_info[type].root_inode;
}
int is_our_namespace(int fd, NamespaceType request_type) {
int clone_flag;

View File

@ -24,6 +24,7 @@ extern const struct namespace_info {
const char *proc_name;
const char *proc_path;
unsigned int clone_flag;
ino_t root_inode;
} namespace_info[_NAMESPACE_TYPE_MAX + 1];
int pidref_namespace_open(
@ -74,6 +75,8 @@ int parse_userns_uid_range(const char *s, uid_t *ret_uid_shift, uid_t *ret_uid_r
int namespace_open_by_type(NamespaceType type);
int namespace_is_init(NamespaceType type);
int is_our_namespace(int fd, NamespaceType type);
int is_idmapping_supported(const char *path);

View File

@ -585,6 +585,14 @@ static int running_in_cgroupns(void) {
if (!cg_ns_supported())
return false;
r = namespace_is_init(NAMESPACE_CGROUP);
if (r < 0)
log_debug_errno(r, "Failed to test if in root cgroup namespace, ignoring: %m");
else if (r > 0)
return false;
// FIXME: We really should drop the heuristics below.
r = cg_all_unified();
if (r < 0)
return r;
@ -645,6 +653,16 @@ static int running_in_cgroupns(void) {
}
}
static int running_in_pidns(void) {
int r;
r = namespace_is_init(NAMESPACE_PID);
if (r < 0)
return log_debug_errno(r, "Failed to test if in root PID namespace, ignoring: %m");
return !r;
}
static Virtualization detect_container_files(void) {
static const struct {
const char *file_path;
@ -790,12 +808,21 @@ check_files:
r = running_in_cgroupns();
if (r > 0) {
log_debug("Running in a cgroup namespace, assuming unknown container manager.");
v = VIRTUALIZATION_CONTAINER_OTHER;
goto finish;
}
if (r < 0)
log_debug_errno(r, "Failed to detect cgroup namespace: %m");
/* Finally, the root pid namespace has an hardcoded inode number of 0xEFFFFFFC since kernel 3.8, so
* if all else fails we can check the inode number of our pid namespace and compare it. */
if (running_in_pidns() > 0) {
log_debug("Running in a pid namespace, assuming unknown container manager.");
v = VIRTUALIZATION_CONTAINER_OTHER;
goto finish;
}
/* If none of that worked, give up, assume no container manager. */
v = VIRTUALIZATION_NONE;
goto finish;
@ -863,6 +890,14 @@ int running_in_userns(void) {
_cleanup_free_ char *line = NULL;
int r;
r = namespace_is_init(NAMESPACE_USER);
if (r < 0)
log_debug_errno(r, "Failed to test if in root user namespace, ignoring: %m");
else if (r > 0)
return false;
// FIXME: We really should drop the heuristics below.
r = userns_has_mapping("/proc/self/uid_map");
if (r != 0)
return r;

View File

@ -213,9 +213,23 @@ TEST(idmapping_supported) {
assert_se(is_idmapping_supported("/etc") >= 0);
}
TEST(namespace_is_init) {
int r;
for (NamespaceType t = 0; t < _NAMESPACE_TYPE_MAX; t++) {
r = namespace_is_init(t);
if (r == -EBADR)
log_info_errno(r, "In root namespace of type '%s': don't know", namespace_info[t].proc_name);
else {
ASSERT_OK(r);
log_info("In root namespace of type '%s': %s", namespace_info[t].proc_name, yes_no(r));
}
}
}
static int intro(void) {
if (!have_namespaces())
return log_tests_skipped("Don't have namespace support");
return log_tests_skipped("Don't have namespace support or lacking privileges");
return EXIT_SUCCESS;
}

View File

@ -5,3 +5,7 @@ set -o pipefail
SYSTEMD_IN_CHROOT=1 systemd-detect-virt --chroot
(! SYSTEMD_IN_CHROOT=0 systemd-detect-virt --chroot)
if ! systemd-detect-virt -c; then
unshare --mount-proc --fork --user --pid systemd-detect-virt --container
fi