mirror of
https://github.com/systemd/systemd
synced 2025-09-30 17:24:46 +02:00
Compare commits
8 Commits
f963f8953d
...
aaf73b2ecf
Author | SHA1 | Date | |
---|---|---|---|
![]() |
aaf73b2ecf | ||
![]() |
372a5002dc | ||
![]() |
ac3bc1b819 | ||
![]() |
baf78f1a51 | ||
![]() |
13ec9f103b | ||
![]() |
b522c4b92a | ||
![]() |
0e13779d37 | ||
![]() |
a4a9a6f7c6 |
125
src/basic/virt.c
125
src/basic/virt.c
@ -9,6 +9,7 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
#include "alloc-util.h"
|
#include "alloc-util.h"
|
||||||
|
#include "cgroup-util.h"
|
||||||
#include "dirent-util.h"
|
#include "dirent-util.h"
|
||||||
#include "env-util.h"
|
#include "env-util.h"
|
||||||
#include "fd-util.h"
|
#include "fd-util.h"
|
||||||
@ -453,6 +454,100 @@ static const char *const container_table[_VIRTUALIZATION_MAX] = {
|
|||||||
|
|
||||||
DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(container, int);
|
DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(container, int);
|
||||||
|
|
||||||
|
static int running_in_cgroupns(void) {
|
||||||
|
int r;
|
||||||
|
|
||||||
|
if (!cg_ns_supported())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
r = cg_all_unified();
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
if (r) {
|
||||||
|
/* cgroup v2 */
|
||||||
|
|
||||||
|
r = access("/sys/fs/cgroup/cgroup.events", F_OK);
|
||||||
|
if (r < 0) {
|
||||||
|
if (errno != ENOENT)
|
||||||
|
return -errno;
|
||||||
|
/* All kernel versions have cgroup.events in nested cgroups. */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* There's no cgroup.type in the root cgroup, and future kernel versions
|
||||||
|
* are unlikely to add it since cgroup.type is something that makes no sense
|
||||||
|
* whatsoever in the root cgroup. */
|
||||||
|
r = access("/sys/fs/cgroup/cgroup.type", F_OK);
|
||||||
|
if (r == 0)
|
||||||
|
return true;
|
||||||
|
if (r < 0 && errno != ENOENT)
|
||||||
|
return -errno;
|
||||||
|
|
||||||
|
/* On older kernel versions, there's no cgroup.type */
|
||||||
|
r = access("/sys/kernel/cgroup/features", F_OK);
|
||||||
|
if (r < 0) {
|
||||||
|
if (errno != ENOENT)
|
||||||
|
return -errno;
|
||||||
|
/* This is an old kernel that we know for sure has cgroup.events
|
||||||
|
* only in nested cgroups. */
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This is a recent kernel, and cgroup.type doesn't exist, so we must be
|
||||||
|
* in the root cgroup. */
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
/* cgroup v1 */
|
||||||
|
|
||||||
|
/* If systemd controller is not mounted, do not even bother. */
|
||||||
|
r = access("/sys/fs/cgroup/systemd", F_OK);
|
||||||
|
if (r < 0) {
|
||||||
|
if (errno != ENOENT)
|
||||||
|
return -errno;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* release_agent only exists in the root cgroup. */
|
||||||
|
r = access("/sys/fs/cgroup/systemd/release_agent", F_OK);
|
||||||
|
if (r < 0) {
|
||||||
|
if (errno != ENOENT)
|
||||||
|
return -errno;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int detect_container_files(void) {
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
|
static const struct {
|
||||||
|
const char *file_path;
|
||||||
|
int id;
|
||||||
|
} container_file_table[] = {
|
||||||
|
/* https://github.com/containers/podman/issues/6192 */
|
||||||
|
/* https://github.com/containers/podman/issues/3586#issuecomment-661918679 */
|
||||||
|
{ "/run/.containerenv", VIRTUALIZATION_PODMAN },
|
||||||
|
/* https://github.com/moby/moby/issues/18355 */
|
||||||
|
/* Docker must be the last in this table, see below. */
|
||||||
|
{ "/.dockerenv", VIRTUALIZATION_DOCKER },
|
||||||
|
};
|
||||||
|
|
||||||
|
for (i = 0; i < ELEMENTSOF(container_file_table); i++) {
|
||||||
|
if (access(container_file_table[i].file_path, F_OK) >= 0)
|
||||||
|
return container_file_table[i].id;
|
||||||
|
|
||||||
|
if (errno != ENOENT)
|
||||||
|
log_debug_errno(errno,
|
||||||
|
"Checking if %s exists failed, ignoring: %m",
|
||||||
|
container_file_table[i].file_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
return VIRTUALIZATION_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
int detect_container(void) {
|
int detect_container(void) {
|
||||||
static thread_local int cached_found = _VIRTUALIZATION_INVALID;
|
static thread_local int cached_found = _VIRTUALIZATION_INVALID;
|
||||||
_cleanup_free_ char *m = NULL, *o = NULL, *p = NULL;
|
_cleanup_free_ char *m = NULL, *o = NULL, *p = NULL;
|
||||||
@ -530,7 +625,7 @@ int detect_container(void) {
|
|||||||
*/
|
*/
|
||||||
e = getenv("container");
|
e = getenv("container");
|
||||||
if (!e)
|
if (!e)
|
||||||
goto none;
|
goto check_files;
|
||||||
if (isempty(e)) {
|
if (isempty(e)) {
|
||||||
r = VIRTUALIZATION_NONE;
|
r = VIRTUALIZATION_NONE;
|
||||||
goto finish;
|
goto finish;
|
||||||
@ -558,12 +653,36 @@ int detect_container(void) {
|
|||||||
if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
|
if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
|
||||||
log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
|
log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
|
||||||
|
|
||||||
none:
|
check_files:
|
||||||
/* If that didn't work, give up, assume no container manager. */
|
/* Check for existence of some well-known files. We only do this after checking
|
||||||
|
* for other specific container managers, otherwise we risk mistaking another
|
||||||
|
* container manager for Docker: the /.dockerenv file could inadvertently end up
|
||||||
|
* in a file system image. */
|
||||||
|
r = detect_container_files();
|
||||||
|
if (r)
|
||||||
|
goto finish;
|
||||||
|
|
||||||
|
r = running_in_cgroupns();
|
||||||
|
if (r > 0) {
|
||||||
|
r = VIRTUALIZATION_CONTAINER_OTHER;
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
if (r < 0)
|
||||||
|
log_debug_errno(r, "Failed to detect cgroup namespace: %m");
|
||||||
|
|
||||||
|
/* If none of that worked, give up, assume no container manager. */
|
||||||
r = VIRTUALIZATION_NONE;
|
r = VIRTUALIZATION_NONE;
|
||||||
goto finish;
|
goto finish;
|
||||||
|
|
||||||
translate_name:
|
translate_name:
|
||||||
|
if (streq(e, "oci")) {
|
||||||
|
/* Some images hardcode container=oci, but OCI is not a specific container manager.
|
||||||
|
* Try to detect one based on well-known files. */
|
||||||
|
r = detect_container_files();
|
||||||
|
if (!r)
|
||||||
|
r = VIRTUALIZATION_CONTAINER_OTHER;
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
r = container_from_string(e);
|
r = container_from_string(e);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
r = VIRTUALIZATION_CONTAINER_OTHER;
|
r = VIRTUALIZATION_CONTAINER_OTHER;
|
||||||
|
@ -19,7 +19,7 @@
|
|||||||
struct reply_callback {
|
struct reply_callback {
|
||||||
sd_netlink_message_handler_t callback;
|
sd_netlink_message_handler_t callback;
|
||||||
usec_t timeout;
|
usec_t timeout;
|
||||||
uint64_t serial;
|
uint32_t serial;
|
||||||
unsigned prioq_idx;
|
unsigned prioq_idx;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -17,6 +17,9 @@
|
|||||||
#include "string-util.h"
|
#include "string-util.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
|
||||||
|
/* Some really high limit, to catch programming errors */
|
||||||
|
#define REPLY_CALLBACKS_MAX UINT16_MAX
|
||||||
|
|
||||||
static int sd_netlink_new(sd_netlink **ret) {
|
static int sd_netlink_new(sd_netlink **ret) {
|
||||||
_cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
|
_cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
|
||||||
|
|
||||||
@ -33,11 +36,29 @@ static int sd_netlink_new(sd_netlink **ret) {
|
|||||||
.original_pid = getpid_cached(),
|
.original_pid = getpid_cached(),
|
||||||
.protocol = -1,
|
.protocol = -1,
|
||||||
|
|
||||||
/* Change notification responses have sequence 0, so we must
|
/* Kernel change notification messages have sequence number 0. We want to avoid that with our
|
||||||
* start our request sequence numbers at 1, or we may confuse our
|
* own serials, in order not to get confused when matching up kernel replies to our earlier
|
||||||
* responses with notifications from the kernel */
|
* requests.
|
||||||
.serial = 1,
|
*
|
||||||
|
* Moreover, when using netlink socket activation (i.e. where PID 1 binds an AF_NETLINK
|
||||||
|
* socket for us and passes it to us across execve()) and we get restarted multiple times
|
||||||
|
* while the socket sticks around we might get confused by replies from earlier runs coming
|
||||||
|
* in late — which is pretty likely if we'd start our sequence numbers always from 1. Hence,
|
||||||
|
* let's start with a value based on the system clock. This should make collisions much less
|
||||||
|
* likely (though still theoretically possible). We use a 32 bit µs counter starting at boot
|
||||||
|
* for this (and explicitly exclude the zero, see above). This counter will wrap around after
|
||||||
|
* a bit more than 1h, but that's hopefully OK as the kernel shouldn't take that long to
|
||||||
|
* reply to our requests.
|
||||||
|
*
|
||||||
|
* We only pick the initial start value this way. For each message we simply increase the
|
||||||
|
* sequence number by 1. This means we could enqueue 1 netlink message per µs without risking
|
||||||
|
* collisions, which should be OK.
|
||||||
|
*
|
||||||
|
* Note this means the serials will be in the range 1…UINT32_MAX here.
|
||||||
|
*
|
||||||
|
* (In an ideal world we'd attach the current serial counter to the netlink socket itself
|
||||||
|
* somehow, to avoid all this, but I couldn't come up with a nice way to do this) */
|
||||||
|
.serial = (uint32_t) (now(CLOCK_MONOTONIC) % UINT32_MAX) + 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* We guarantee that the read buffer has at least space for
|
/* We guarantee that the read buffer has at least space for
|
||||||
@ -89,9 +110,7 @@ static bool rtnl_pid_changed(const sd_netlink *rtnl) {
|
|||||||
|
|
||||||
int sd_netlink_open_fd(sd_netlink **ret, int fd) {
|
int sd_netlink_open_fd(sd_netlink **ret, int fd) {
|
||||||
_cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
|
_cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
|
||||||
int r;
|
int r, protocol;
|
||||||
int protocol;
|
|
||||||
socklen_t l;
|
|
||||||
|
|
||||||
assert_return(ret, -EINVAL);
|
assert_return(ret, -EINVAL);
|
||||||
assert_return(fd >= 0, -EBADF);
|
assert_return(fd >= 0, -EBADF);
|
||||||
@ -100,8 +119,7 @@ int sd_netlink_open_fd(sd_netlink **ret, int fd) {
|
|||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
l = sizeof(protocol);
|
r = getsockopt_int(fd, SOL_SOCKET, SO_PROTOCOL, &protocol);
|
||||||
r = getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &l);
|
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
@ -190,18 +208,25 @@ static sd_netlink *netlink_free(sd_netlink *rtnl) {
|
|||||||
DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_netlink, sd_netlink, netlink_free);
|
DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_netlink, sd_netlink, netlink_free);
|
||||||
|
|
||||||
static void rtnl_seal_message(sd_netlink *rtnl, sd_netlink_message *m) {
|
static void rtnl_seal_message(sd_netlink *rtnl, sd_netlink_message *m) {
|
||||||
|
uint32_t picked;
|
||||||
|
|
||||||
assert(rtnl);
|
assert(rtnl);
|
||||||
assert(!rtnl_pid_changed(rtnl));
|
assert(!rtnl_pid_changed(rtnl));
|
||||||
assert(m);
|
assert(m);
|
||||||
assert(m->hdr);
|
assert(m->hdr);
|
||||||
|
|
||||||
/* don't use seq == 0, as that is used for broadcasts, so we
|
/* Avoid collisions with outstanding requests */
|
||||||
would get confused by replies to such messages */
|
do {
|
||||||
m->hdr->nlmsg_seq = rtnl->serial++ ? : rtnl->serial++;
|
picked = rtnl->serial;
|
||||||
|
|
||||||
|
/* Don't use seq == 0, as that is used for broadcasts, so we would get confused by replies to
|
||||||
|
such messages */
|
||||||
|
rtnl->serial = rtnl->serial == UINT32_MAX ? 1 : rtnl->serial + 1;
|
||||||
|
|
||||||
|
} while (hashmap_contains(rtnl->reply_callbacks, UINT32_TO_PTR(picked)));
|
||||||
|
|
||||||
|
m->hdr->nlmsg_seq = picked;
|
||||||
rtnl_message_seal(m);
|
rtnl_message_seal(m);
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int sd_netlink_send(sd_netlink *nl,
|
int sd_netlink_send(sd_netlink *nl,
|
||||||
@ -339,7 +364,7 @@ static int process_timeout(sd_netlink *rtnl) {
|
|||||||
|
|
||||||
assert_se(prioq_pop(rtnl->reply_callbacks_prioq) == c);
|
assert_se(prioq_pop(rtnl->reply_callbacks_prioq) == c);
|
||||||
c->timeout = 0;
|
c->timeout = 0;
|
||||||
hashmap_remove(rtnl->reply_callbacks, &c->serial);
|
hashmap_remove(rtnl->reply_callbacks, UINT32_TO_PTR(c->serial));
|
||||||
|
|
||||||
slot = container_of(c, sd_netlink_slot, reply_callback);
|
slot = container_of(c, sd_netlink_slot, reply_callback);
|
||||||
|
|
||||||
@ -359,7 +384,7 @@ static int process_timeout(sd_netlink *rtnl) {
|
|||||||
static int process_reply(sd_netlink *rtnl, sd_netlink_message *m) {
|
static int process_reply(sd_netlink *rtnl, sd_netlink_message *m) {
|
||||||
struct reply_callback *c;
|
struct reply_callback *c;
|
||||||
sd_netlink_slot *slot;
|
sd_netlink_slot *slot;
|
||||||
uint64_t serial;
|
uint32_t serial;
|
||||||
uint16_t type;
|
uint16_t type;
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
@ -367,7 +392,7 @@ static int process_reply(sd_netlink *rtnl, sd_netlink_message *m) {
|
|||||||
assert(m);
|
assert(m);
|
||||||
|
|
||||||
serial = rtnl_message_get_serial(m);
|
serial = rtnl_message_get_serial(m);
|
||||||
c = hashmap_remove(rtnl->reply_callbacks, &serial);
|
c = hashmap_remove(rtnl->reply_callbacks, UINT32_TO_PTR(serial));
|
||||||
if (!c)
|
if (!c)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@ -412,20 +437,19 @@ static int process_match(sd_netlink *rtnl, sd_netlink_message *m) {
|
|||||||
return r;
|
return r;
|
||||||
|
|
||||||
LIST_FOREACH(match_callbacks, c, rtnl->match_callbacks) {
|
LIST_FOREACH(match_callbacks, c, rtnl->match_callbacks) {
|
||||||
if (type == c->type) {
|
if (type != c->type)
|
||||||
slot = container_of(c, sd_netlink_slot, match_callback);
|
continue;
|
||||||
|
|
||||||
r = c->callback(rtnl, m, slot->userdata);
|
slot = container_of(c, sd_netlink_slot, match_callback);
|
||||||
if (r != 0) {
|
|
||||||
if (r < 0)
|
|
||||||
log_debug_errno(r, "sd-netlink: match callback %s%s%sfailed: %m",
|
|
||||||
slot->description ? "'" : "",
|
|
||||||
strempty(slot->description),
|
|
||||||
slot->description ? "' " : "");
|
|
||||||
|
|
||||||
break;
|
r = c->callback(rtnl, m, slot->userdata);
|
||||||
}
|
if (r < 0)
|
||||||
}
|
log_debug_errno(r, "sd-netlink: match callback %s%s%sfailed: %m",
|
||||||
|
slot->description ? "'" : "",
|
||||||
|
strempty(slot->description),
|
||||||
|
slot->description ? "' " : "");
|
||||||
|
if (r != 0)
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
@ -568,7 +592,6 @@ int sd_netlink_call_async(
|
|||||||
uint64_t usec,
|
uint64_t usec,
|
||||||
const char *description) {
|
const char *description) {
|
||||||
_cleanup_free_ sd_netlink_slot *slot = NULL;
|
_cleanup_free_ sd_netlink_slot *slot = NULL;
|
||||||
uint32_t s;
|
|
||||||
int r, k;
|
int r, k;
|
||||||
|
|
||||||
assert_return(nl, -EINVAL);
|
assert_return(nl, -EINVAL);
|
||||||
@ -576,7 +599,10 @@ int sd_netlink_call_async(
|
|||||||
assert_return(callback, -EINVAL);
|
assert_return(callback, -EINVAL);
|
||||||
assert_return(!rtnl_pid_changed(nl), -ECHILD);
|
assert_return(!rtnl_pid_changed(nl), -ECHILD);
|
||||||
|
|
||||||
r = hashmap_ensure_allocated(&nl->reply_callbacks, &uint64_hash_ops);
|
if (hashmap_size(nl->reply_callbacks) >= REPLY_CALLBACKS_MAX)
|
||||||
|
return -ERANGE;
|
||||||
|
|
||||||
|
r = hashmap_ensure_allocated(&nl->reply_callbacks, &trivial_hash_ops);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
@ -593,20 +619,18 @@ int sd_netlink_call_async(
|
|||||||
slot->reply_callback.callback = callback;
|
slot->reply_callback.callback = callback;
|
||||||
slot->reply_callback.timeout = calc_elapse(usec);
|
slot->reply_callback.timeout = calc_elapse(usec);
|
||||||
|
|
||||||
k = sd_netlink_send(nl, m, &s);
|
k = sd_netlink_send(nl, m, &slot->reply_callback.serial);
|
||||||
if (k < 0)
|
if (k < 0)
|
||||||
return k;
|
return k;
|
||||||
|
|
||||||
slot->reply_callback.serial = s;
|
r = hashmap_put(nl->reply_callbacks, UINT32_TO_PTR(slot->reply_callback.serial), &slot->reply_callback);
|
||||||
|
|
||||||
r = hashmap_put(nl->reply_callbacks, &slot->reply_callback.serial, &slot->reply_callback);
|
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
if (slot->reply_callback.timeout != 0) {
|
if (slot->reply_callback.timeout != 0) {
|
||||||
r = prioq_put(nl->reply_callbacks_prioq, &slot->reply_callback, &slot->reply_callback.prioq_idx);
|
r = prioq_put(nl->reply_callbacks_prioq, &slot->reply_callback, &slot->reply_callback.prioq_idx);
|
||||||
if (r < 0) {
|
if (r < 0) {
|
||||||
(void) hashmap_remove(nl->reply_callbacks, &slot->reply_callback.serial);
|
(void) hashmap_remove(nl->reply_callbacks, UINT32_TO_PTR(slot->reply_callback.serial));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user