1
0
mirror of https://github.com/systemd/systemd synced 2025-09-30 17:24:46 +02:00

Compare commits

..

No commits in common. "aaf73b2ecf44cf447afe848e36640a003699ceb7" and "f963f8953daeab03b892616ce0c65f7572932187" have entirely different histories.

3 changed files with 41 additions and 184 deletions

View File

@ -9,7 +9,6 @@
#include <unistd.h>
#include "alloc-util.h"
#include "cgroup-util.h"
#include "dirent-util.h"
#include "env-util.h"
#include "fd-util.h"
@ -454,100 +453,6 @@ static const char *const container_table[_VIRTUALIZATION_MAX] = {
DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(container, int);
static int running_in_cgroupns(void) {
int r;
if (!cg_ns_supported())
return false;
r = cg_all_unified();
if (r < 0)
return r;
if (r) {
/* cgroup v2 */
r = access("/sys/fs/cgroup/cgroup.events", F_OK);
if (r < 0) {
if (errno != ENOENT)
return -errno;
/* All kernel versions have cgroup.events in nested cgroups. */
return false;
}
/* There's no cgroup.type in the root cgroup, and future kernel versions
* are unlikely to add it since cgroup.type is something that makes no sense
* whatsoever in the root cgroup. */
r = access("/sys/fs/cgroup/cgroup.type", F_OK);
if (r == 0)
return true;
if (r < 0 && errno != ENOENT)
return -errno;
/* On older kernel versions, there's no cgroup.type */
r = access("/sys/kernel/cgroup/features", F_OK);
if (r < 0) {
if (errno != ENOENT)
return -errno;
/* This is an old kernel that we know for sure has cgroup.events
* only in nested cgroups. */
return true;
}
/* This is a recent kernel, and cgroup.type doesn't exist, so we must be
* in the root cgroup. */
return false;
} else {
/* cgroup v1 */
/* If systemd controller is not mounted, do not even bother. */
r = access("/sys/fs/cgroup/systemd", F_OK);
if (r < 0) {
if (errno != ENOENT)
return -errno;
return false;
}
/* release_agent only exists in the root cgroup. */
r = access("/sys/fs/cgroup/systemd/release_agent", F_OK);
if (r < 0) {
if (errno != ENOENT)
return -errno;
return true;
}
return false;
}
}
static int detect_container_files(void) {
unsigned i;
static const struct {
const char *file_path;
int id;
} container_file_table[] = {
/* https://github.com/containers/podman/issues/6192 */
/* https://github.com/containers/podman/issues/3586#issuecomment-661918679 */
{ "/run/.containerenv", VIRTUALIZATION_PODMAN },
/* https://github.com/moby/moby/issues/18355 */
/* Docker must be the last in this table, see below. */
{ "/.dockerenv", VIRTUALIZATION_DOCKER },
};
for (i = 0; i < ELEMENTSOF(container_file_table); i++) {
if (access(container_file_table[i].file_path, F_OK) >= 0)
return container_file_table[i].id;
if (errno != ENOENT)
log_debug_errno(errno,
"Checking if %s exists failed, ignoring: %m",
container_file_table[i].file_path);
}
return VIRTUALIZATION_NONE;
}
int detect_container(void) {
static thread_local int cached_found = _VIRTUALIZATION_INVALID;
_cleanup_free_ char *m = NULL, *o = NULL, *p = NULL;
@ -625,7 +530,7 @@ int detect_container(void) {
*/
e = getenv("container");
if (!e)
goto check_files;
goto none;
if (isempty(e)) {
r = VIRTUALIZATION_NONE;
goto finish;
@ -653,36 +558,12 @@ int detect_container(void) {
if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
check_files:
/* Check for existence of some well-known files. We only do this after checking
* for other specific container managers, otherwise we risk mistaking another
* container manager for Docker: the /.dockerenv file could inadvertently end up
* in a file system image. */
r = detect_container_files();
if (r)
goto finish;
r = running_in_cgroupns();
if (r > 0) {
r = VIRTUALIZATION_CONTAINER_OTHER;
goto finish;
}
if (r < 0)
log_debug_errno(r, "Failed to detect cgroup namespace: %m");
/* If none of that worked, give up, assume no container manager. */
none:
/* If that didn't work, give up, assume no container manager. */
r = VIRTUALIZATION_NONE;
goto finish;
translate_name:
if (streq(e, "oci")) {
/* Some images hardcode container=oci, but OCI is not a specific container manager.
* Try to detect one based on well-known files. */
r = detect_container_files();
if (!r)
r = VIRTUALIZATION_CONTAINER_OTHER;
goto finish;
}
r = container_from_string(e);
if (r < 0)
r = VIRTUALIZATION_CONTAINER_OTHER;

View File

@ -19,7 +19,7 @@
struct reply_callback {
sd_netlink_message_handler_t callback;
usec_t timeout;
uint32_t serial;
uint64_t serial;
unsigned prioq_idx;
};

View File

@ -17,9 +17,6 @@
#include "string-util.h"
#include "util.h"
/* Some really high limit, to catch programming errors */
#define REPLY_CALLBACKS_MAX UINT16_MAX
static int sd_netlink_new(sd_netlink **ret) {
_cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
@ -36,29 +33,11 @@ static int sd_netlink_new(sd_netlink **ret) {
.original_pid = getpid_cached(),
.protocol = -1,
/* Kernel change notification messages have sequence number 0. We want to avoid that with our
* own serials, in order not to get confused when matching up kernel replies to our earlier
* requests.
*
* Moreover, when using netlink socket activation (i.e. where PID 1 binds an AF_NETLINK
* socket for us and passes it to us across execve()) and we get restarted multiple times
* while the socket sticks around we might get confused by replies from earlier runs coming
* in late which is pretty likely if we'd start our sequence numbers always from 1. Hence,
* let's start with a value based on the system clock. This should make collisions much less
* likely (though still theoretically possible). We use a 32 bit µs counter starting at boot
* for this (and explicitly exclude the zero, see above). This counter will wrap around after
* a bit more than 1h, but that's hopefully OK as the kernel shouldn't take that long to
* reply to our requests.
*
* We only pick the initial start value this way. For each message we simply increase the
* sequence number by 1. This means we could enqueue 1 netlink message per µs without risking
* collisions, which should be OK.
*
* Note this means the serials will be in the range 1UINT32_MAX here.
*
* (In an ideal world we'd attach the current serial counter to the netlink socket itself
* somehow, to avoid all this, but I couldn't come up with a nice way to do this) */
.serial = (uint32_t) (now(CLOCK_MONOTONIC) % UINT32_MAX) + 1,
/* Change notification responses have sequence 0, so we must
* start our request sequence numbers at 1, or we may confuse our
* responses with notifications from the kernel */
.serial = 1,
};
/* We guarantee that the read buffer has at least space for
@ -110,7 +89,9 @@ static bool rtnl_pid_changed(const sd_netlink *rtnl) {
int sd_netlink_open_fd(sd_netlink **ret, int fd) {
_cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
int r, protocol;
int r;
int protocol;
socklen_t l;
assert_return(ret, -EINVAL);
assert_return(fd >= 0, -EBADF);
@ -119,7 +100,8 @@ int sd_netlink_open_fd(sd_netlink **ret, int fd) {
if (r < 0)
return r;
r = getsockopt_int(fd, SOL_SOCKET, SO_PROTOCOL, &protocol);
l = sizeof(protocol);
r = getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &l);
if (r < 0)
return r;
@ -208,25 +190,18 @@ static sd_netlink *netlink_free(sd_netlink *rtnl) {
DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_netlink, sd_netlink, netlink_free);
static void rtnl_seal_message(sd_netlink *rtnl, sd_netlink_message *m) {
uint32_t picked;
assert(rtnl);
assert(!rtnl_pid_changed(rtnl));
assert(m);
assert(m->hdr);
/* Avoid collisions with outstanding requests */
do {
picked = rtnl->serial;
/* don't use seq == 0, as that is used for broadcasts, so we
would get confused by replies to such messages */
m->hdr->nlmsg_seq = rtnl->serial++ ? : rtnl->serial++;
/* Don't use seq == 0, as that is used for broadcasts, so we would get confused by replies to
such messages */
rtnl->serial = rtnl->serial == UINT32_MAX ? 1 : rtnl->serial + 1;
} while (hashmap_contains(rtnl->reply_callbacks, UINT32_TO_PTR(picked)));
m->hdr->nlmsg_seq = picked;
rtnl_message_seal(m);
return;
}
int sd_netlink_send(sd_netlink *nl,
@ -364,7 +339,7 @@ static int process_timeout(sd_netlink *rtnl) {
assert_se(prioq_pop(rtnl->reply_callbacks_prioq) == c);
c->timeout = 0;
hashmap_remove(rtnl->reply_callbacks, UINT32_TO_PTR(c->serial));
hashmap_remove(rtnl->reply_callbacks, &c->serial);
slot = container_of(c, sd_netlink_slot, reply_callback);
@ -384,7 +359,7 @@ static int process_timeout(sd_netlink *rtnl) {
static int process_reply(sd_netlink *rtnl, sd_netlink_message *m) {
struct reply_callback *c;
sd_netlink_slot *slot;
uint32_t serial;
uint64_t serial;
uint16_t type;
int r;
@ -392,7 +367,7 @@ static int process_reply(sd_netlink *rtnl, sd_netlink_message *m) {
assert(m);
serial = rtnl_message_get_serial(m);
c = hashmap_remove(rtnl->reply_callbacks, UINT32_TO_PTR(serial));
c = hashmap_remove(rtnl->reply_callbacks, &serial);
if (!c)
return 0;
@ -437,19 +412,20 @@ static int process_match(sd_netlink *rtnl, sd_netlink_message *m) {
return r;
LIST_FOREACH(match_callbacks, c, rtnl->match_callbacks) {
if (type != c->type)
continue;
if (type == c->type) {
slot = container_of(c, sd_netlink_slot, match_callback);
slot = container_of(c, sd_netlink_slot, match_callback);
r = c->callback(rtnl, m, slot->userdata);
if (r != 0) {
if (r < 0)
log_debug_errno(r, "sd-netlink: match callback %s%s%sfailed: %m",
slot->description ? "'" : "",
strempty(slot->description),
slot->description ? "' " : "");
r = c->callback(rtnl, m, slot->userdata);
if (r < 0)
log_debug_errno(r, "sd-netlink: match callback %s%s%sfailed: %m",
slot->description ? "'" : "",
strempty(slot->description),
slot->description ? "' " : "");
if (r != 0)
break;
break;
}
}
}
return 1;
@ -592,6 +568,7 @@ int sd_netlink_call_async(
uint64_t usec,
const char *description) {
_cleanup_free_ sd_netlink_slot *slot = NULL;
uint32_t s;
int r, k;
assert_return(nl, -EINVAL);
@ -599,10 +576,7 @@ int sd_netlink_call_async(
assert_return(callback, -EINVAL);
assert_return(!rtnl_pid_changed(nl), -ECHILD);
if (hashmap_size(nl->reply_callbacks) >= REPLY_CALLBACKS_MAX)
return -ERANGE;
r = hashmap_ensure_allocated(&nl->reply_callbacks, &trivial_hash_ops);
r = hashmap_ensure_allocated(&nl->reply_callbacks, &uint64_hash_ops);
if (r < 0)
return r;
@ -619,18 +593,20 @@ int sd_netlink_call_async(
slot->reply_callback.callback = callback;
slot->reply_callback.timeout = calc_elapse(usec);
k = sd_netlink_send(nl, m, &slot->reply_callback.serial);
k = sd_netlink_send(nl, m, &s);
if (k < 0)
return k;
r = hashmap_put(nl->reply_callbacks, UINT32_TO_PTR(slot->reply_callback.serial), &slot->reply_callback);
slot->reply_callback.serial = s;
r = hashmap_put(nl->reply_callbacks, &slot->reply_callback.serial, &slot->reply_callback);
if (r < 0)
return r;
if (slot->reply_callback.timeout != 0) {
r = prioq_put(nl->reply_callbacks_prioq, &slot->reply_callback, &slot->reply_callback.prioq_idx);
if (r < 0) {
(void) hashmap_remove(nl->reply_callbacks, UINT32_TO_PTR(slot->reply_callback.serial));
(void) hashmap_remove(nl->reply_callbacks, &slot->reply_callback.serial);
return r;
}
}