1
0
mirror of https://github.com/systemd/systemd synced 2026-03-20 03:54:45 +01:00

Compare commits

...

8 Commits

Author SHA1 Message Date
Daan De Meyer
f76f0f9935
core: several follow-ups for BindNetworkInterface= (#40202) 2025-12-28 12:55:30 +01:00
Mike Yuan
e69ac871a0
bus-unit-util: BindNetworkInterface= is a cgroup property 2025-12-27 23:47:55 +01:00
Mike Yuan
ecd677bcc9
core/cgroup: no need to handle empty bind_network_interface
We always normalize empty setting to NULL, hence per our usual
coding style a simply NULL check would suffice.
2025-12-27 23:47:55 +01:00
Mike Yuan
705d2c527f
core/dbus-cgroup: actually allow empty assignment for BindNetworkInterface= 2025-12-27 23:47:55 +01:00
Mike Yuan
5af2df38df
core/cgroup: fix bpf-bind-iface serialization
Follow-up for c1c787651b34c0a0f1082b9d32cf47ea3abe0af2

The commit added serialization, but deserialization wasn't in place.
Follow the usual practice of pinning the bpf link fd until we install
the new instance.
2025-12-27 23:44:53 +01:00
Mike Yuan
e78f615763
core/cgroup: fix potential bpf link fd leak during deserialization
Also add a comment explaining the deserialization mechanism, as requested
in https://github.com/systemd/systemd/pull/40202#discussion_r2649274628
2025-12-27 20:51:05 +01:00
Mike Yuan
d90cfb7e30
core/bpf-restrict-ifaces: avoid duplicate unit_get_cgroup_runtime() call 2025-12-27 20:14:57 +01:00
Mike Yuan
e4c43ca7d6
core/cgroup: CGContext must be available if Unit has CGRuntime
Follow-up for e03e5056dbffffafc86e46985658e1c9075d3c74
2025-12-27 20:14:56 +01:00
7 changed files with 91 additions and 65 deletions

View File

@ -49,25 +49,18 @@ int bpf_bind_network_interface_supported(void) {
return (supported = bpf_can_link_program(obj->progs.sd_bind_interface)); return (supported = bpf_can_link_program(obj->progs.sd_bind_interface));
} }
int bpf_bind_network_interface_install(Unit *u) { static int bind_network_interface_install_impl(Unit *u, CGroupRuntime *crt) {
_cleanup_(bpf_link_freep) struct bpf_link *link = NULL; _cleanup_(bpf_link_freep) struct bpf_link *link = NULL;
_cleanup_(bind_iface_bpf_freep) struct bind_iface_bpf *obj = NULL; _cleanup_(bind_iface_bpf_freep) struct bind_iface_bpf *obj = NULL;
_cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
_cleanup_free_ char *cgroup_path = NULL; _cleanup_free_ char *cgroup_path = NULL;
_cleanup_close_ int cgroup_fd = -EBADF; _cleanup_close_ int cgroup_fd = -EBADF;
CGroupContext *cc;
CGroupRuntime *crt;
int r, ifindex; int r, ifindex;
assert(u); assert(u);
assert(crt);
cc = unit_get_cgroup_context(u); CGroupContext *cc = ASSERT_PTR(unit_get_cgroup_context(u));
if (!cc)
return 0;
crt = unit_get_cgroup_runtime(u);
if (!crt)
return 0;
if (isempty(cc->bind_network_interface)) if (isempty(cc->bind_network_interface))
return 0; return 0;
@ -118,6 +111,21 @@ int bpf_bind_network_interface_install(Unit *u) {
return 0; return 0;
} }
int bpf_bind_network_interface_install(Unit *u) {
CGroupRuntime *crt;
int r;
assert(u);
crt = unit_get_cgroup_runtime(u);
if (!crt)
return 0;
r = bind_network_interface_install_impl(u, crt);
crt->initial_bind_network_interface_link_fd = safe_close(crt->initial_bind_network_interface_link_fd);
return r;
}
int bpf_bind_network_interface_serialize(Unit *u, FILE *f, FDSet *fds) { int bpf_bind_network_interface_serialize(Unit *u, FILE *f, FDSet *fds) {
CGroupRuntime *crt; CGroupRuntime *crt;
@ -127,7 +135,7 @@ int bpf_bind_network_interface_serialize(Unit *u, FILE *f, FDSet *fds) {
if (!crt) if (!crt)
return 0; return 0;
return bpf_serialize_link(f, fds, "bind-interface-fd", crt->bpf_bind_network_interface_link); return bpf_serialize_link(f, fds, "bind-iface-bpf-fd", crt->bpf_bind_network_interface_link);
} }
#else /* ! BPF_FRAMEWORK */ #else /* ! BPF_FRAMEWORK */

View File

@ -98,22 +98,17 @@ int bpf_restrict_ifaces_supported(void) {
return (supported = bpf_can_link_program(obj->progs.sd_restrictif_i)); return (supported = bpf_can_link_program(obj->progs.sd_restrictif_i));
} }
static int restrict_ifaces_install_impl(Unit *u) { static int restrict_ifaces_install_impl(Unit *u, CGroupRuntime *crt) {
_cleanup_(bpf_link_freep) struct bpf_link *egress_link = NULL, *ingress_link = NULL; _cleanup_(bpf_link_freep) struct bpf_link *egress_link = NULL, *ingress_link = NULL;
_cleanup_(restrict_ifaces_bpf_freep) struct restrict_ifaces_bpf *obj = NULL; _cleanup_(restrict_ifaces_bpf_freep) struct restrict_ifaces_bpf *obj = NULL;
_cleanup_free_ char *cgroup_path = NULL; _cleanup_free_ char *cgroup_path = NULL;
_cleanup_close_ int cgroup_fd = -EBADF; _cleanup_close_ int cgroup_fd = -EBADF;
CGroupContext *cc;
CGroupRuntime *crt;
int r; int r;
cc = unit_get_cgroup_context(u); assert(u);
if (!cc) assert(crt);
return 0;
crt = unit_get_cgroup_runtime(u); CGroupContext *cc = ASSERT_PTR(unit_get_cgroup_context(u));
if (!crt)
return 0;
r = cg_get_path(crt->cgroup_path, /* suffix= */ NULL, &cgroup_path); r = cg_get_path(crt->cgroup_path, /* suffix= */ NULL, &cgroup_path);
if (r < 0) if (r < 0)
@ -159,7 +154,7 @@ int bpf_restrict_ifaces_install(Unit *u) {
if (!crt) if (!crt)
return 0; return 0;
r = restrict_ifaces_install_impl(u); r = restrict_ifaces_install_impl(u, crt);
fdset_close(crt->initial_restrict_ifaces_link_fds, /* async= */ false); fdset_close(crt->initial_restrict_ifaces_link_fds, /* async= */ false);
return r; return r;
} }

View File

@ -571,7 +571,7 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
fprintf(f, "%sDelegateSubgroup: %s\n", fprintf(f, "%sDelegateSubgroup: %s\n",
prefix, c->delegate_subgroup); prefix, c->delegate_subgroup);
if (!isempty(c->bind_network_interface)) if (c->bind_network_interface)
fprintf(f, "%sBindNetworkInterface: %s\n", fprintf(f, "%sBindNetworkInterface: %s\n",
prefix, c->bind_network_interface); prefix, c->bind_network_interface);
@ -1698,7 +1698,7 @@ static bool unit_get_needs_bind_network_interface(Unit *u) {
if (!c) if (!c)
return false; return false;
return !isempty(c->bind_network_interface); return c->bind_network_interface;
} }
static CGroupMask unit_get_cgroup_mask(Unit *u) { static CGroupMask unit_get_cgroup_mask(Unit *u) {
@ -3050,9 +3050,7 @@ int unit_check_oom(Unit *u) {
if (!crt || !crt->cgroup_path) if (!crt || !crt->cgroup_path)
return 0; return 0;
CGroupContext *ctx = unit_get_cgroup_context(u); CGroupContext *ctx = ASSERT_PTR(unit_get_cgroup_context(u));
if (!ctx)
return 0;
/* If memory.oom.group=1, then look up the oom_group_kill field, which reports how many times the /* If memory.oom.group=1, then look up the oom_group_kill field, which reports how many times the
* kernel killed every process recursively in this cgroup and its descendants, similar to * kernel killed every process recursively in this cgroup and its descendants, similar to
@ -4201,6 +4199,8 @@ CGroupRuntime* cgroup_runtime_new(void) {
.ipv4_deny_map_fd = -EBADF, .ipv4_deny_map_fd = -EBADF,
.ipv6_deny_map_fd = -EBADF, .ipv6_deny_map_fd = -EBADF,
.initial_bind_network_interface_link_fd = -EBADF,
.cgroup_invalidated_mask = _CGROUP_MASK_ALL, .cgroup_invalidated_mask = _CGROUP_MASK_ALL,
.deserialized_cgroup_realized = -1, .deserialized_cgroup_realized = -1,
@ -4235,6 +4235,7 @@ CGroupRuntime* cgroup_runtime_free(CGroupRuntime *crt) {
#endif #endif
fdset_free(crt->initial_restrict_ifaces_link_fds); fdset_free(crt->initial_restrict_ifaces_link_fds);
safe_close(crt->initial_bind_network_interface_link_fd);
bpf_firewall_close(crt); bpf_firewall_close(crt);
@ -4461,34 +4462,24 @@ int cgroup_runtime_deserialize_one(Unit *u, const char *key, const char *value,
if (MATCH_DESERIALIZE_IMMEDIATE(u, "cgroup-invalidated-mask", key, value, cg_mask_from_string, cgroup_invalidated_mask)) if (MATCH_DESERIALIZE_IMMEDIATE(u, "cgroup-invalidated-mask", key, value, cg_mask_from_string, cgroup_invalidated_mask))
return 1; return 1;
if (STR_IN_SET(key, "ipv4-socket-bind-bpf-link-fd", "ipv6-socket-bind-bpf-link-fd")) {
int fd;
fd = deserialize_fd(fds, value);
if (fd >= 0)
(void) bpf_socket_bind_add_initial_link_fd(u, fd);
return 1;
}
if (STR_IN_SET(key, if (STR_IN_SET(key,
"ip-bpf-ingress-installed", "ip-bpf-egress-installed",
"bpf-device-control-installed", "bpf-device-control-installed",
"ip-bpf-ingress-installed", "ip-bpf-egress-installed",
"ip-bpf-custom-ingress-installed", "ip-bpf-custom-egress-installed")) { "ip-bpf-custom-ingress-installed", "ip-bpf-custom-egress-installed")) {
CGroupRuntime *crt = unit_setup_cgroup_runtime(u); CGroupRuntime *crt = unit_setup_cgroup_runtime(u);
if (!crt) if (!crt)
log_oom_debug(); log_oom_debug();
else { else {
if (streq(key, "bpf-device-control-installed"))
(void) bpf_program_deserialize_attachment(value, fds, &crt->bpf_device_control_installed);
if (streq(key, "ip-bpf-ingress-installed")) if (streq(key, "ip-bpf-ingress-installed"))
(void) bpf_program_deserialize_attachment(value, fds, &crt->ip_bpf_ingress_installed); (void) bpf_program_deserialize_attachment(value, fds, &crt->ip_bpf_ingress_installed);
if (streq(key, "ip-bpf-egress-installed")) if (streq(key, "ip-bpf-egress-installed"))
(void) bpf_program_deserialize_attachment(value, fds, &crt->ip_bpf_egress_installed); (void) bpf_program_deserialize_attachment(value, fds, &crt->ip_bpf_egress_installed);
if (streq(key, "bpf-device-control-installed"))
(void) bpf_program_deserialize_attachment(value, fds, &crt->bpf_device_control_installed);
if (streq(key, "ip-bpf-custom-ingress-installed")) if (streq(key, "ip-bpf-custom-ingress-installed"))
(void) bpf_program_deserialize_attachment_set(value, fds, &crt->ip_bpf_custom_ingress_installed); (void) bpf_program_deserialize_attachment_set(value, fds, &crt->ip_bpf_custom_ingress_installed);
@ -4499,12 +4490,47 @@ int cgroup_runtime_deserialize_one(Unit *u, const char *key, const char *value,
return 1; return 1;
} }
if (streq(key, "restrict-ifaces-bpf-fd")) { /* We keep the previous bpf link fds stashed until we reattach anew, to close the window where
int fd; * the cgroup restrictions would otherwise be lifted. */
if (STR_IN_SET(key, "ipv4-socket-bind-bpf-link-fd", "ipv6-socket-bind-bpf-link-fd")) {
_cleanup_close_ int fd = -EBADF;
fd = deserialize_fd(fds, value); fd = deserialize_fd(fds, value);
if (fd >= 0) if (fd >= 0) {
(void) bpf_restrict_ifaces_add_initial_link_fd(u, fd); r = bpf_socket_bind_add_initial_link_fd(u, fd);
if (r >= 0)
TAKE_FD(fd);
}
return 1;
}
if (streq(key, "restrict-ifaces-bpf-fd")) {
_cleanup_close_ int fd = -EBADF;
fd = deserialize_fd(fds, value);
if (fd >= 0) {
r = bpf_restrict_ifaces_add_initial_link_fd(u, fd);
if (r >= 0)
TAKE_FD(fd);
}
return 1;
}
if (streq(key, "bind-iface-bpf-fd")) {
_cleanup_close_ int fd = -EBADF;
fd = deserialize_fd(fds, value);
if (fd >= 0) {
CGroupRuntime *crt = unit_setup_cgroup_runtime(u);
if (!crt)
log_oom_debug();
else
close_and_replace(crt->initial_bind_network_interface_link_fd, fd);
}
return 1; return 1;
} }

View File

@ -325,6 +325,13 @@ typedef struct CGroupRuntime {
struct bpf_link *restrict_ifaces_egress_bpf_link; struct bpf_link *restrict_ifaces_egress_bpf_link;
#endif #endif
#if BPF_FRAMEWORK
/* BPF link to BPF programs attached to cgroup/sock_create hooks and
* responsible for binding created sockets to a given VRF interface. */
struct bpf_link *bpf_bind_network_interface_link;
#endif
int initial_bind_network_interface_link_fd;
bool cgroup_members_mask_valid:1; bool cgroup_members_mask_valid:1;
/* Reset cgroup accounting next time we fork something off */ /* Reset cgroup accounting next time we fork something off */
@ -334,12 +341,6 @@ typedef struct CGroupRuntime {
bool warned_clamping_cpu_quota_period:1; bool warned_clamping_cpu_quota_period:1;
int deserialized_cgroup_realized; /* tristate, for backwards compat */ int deserialized_cgroup_realized; /* tristate, for backwards compat */
#if BPF_FRAMEWORK
/* BPF link to BPF programs attached to cgroup/sock_create hooks and
* responsible for binding created sockets to a given VRF interface. */
struct bpf_link *bpf_bind_network_interface_link;
#endif
} CGroupRuntime; } CGroupRuntime;
uint64_t cgroup_context_cpu_weight(CGroupContext *c, ManagerState state); uint64_t cgroup_context_cpu_weight(CGroupContext *c, ManagerState state);

View File

@ -1893,6 +1893,7 @@ int bus_cgroup_set_property(
return 1; return 1;
} }
if (streq(name, "RestrictNetworkInterfaces")) { if (streq(name, "RestrictNetworkInterfaces")) {
int is_allow_list; int is_allow_list;
_cleanup_strv_free_ char **l = NULL; _cleanup_strv_free_ char **l = NULL;
@ -1958,19 +1959,15 @@ int bus_cgroup_set_property(
if (r < 0) if (r < 0)
return r; return r;
if (!ifname_valid_full(s, IFNAME_VALID_ALTERNATIVE)) if (!isempty(s) && !ifname_valid_full(s, IFNAME_VALID_ALTERNATIVE))
return sd_bus_error_setf(reterr_error, SD_BUS_ERROR_INVALID_ARGS, "Invalid interface name: %s", s); return sd_bus_error_setf(reterr_error, SD_BUS_ERROR_INVALID_ARGS, "Invalid interface name: %s", s);
if (!UNIT_WRITE_FLAGS_NOOP(flags)) { if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
if (isempty(s)) r = free_and_strdup_warn(&c->bind_network_interface, empty_to_null(s));
c->bind_network_interface = mfree(c->bind_network_interface); if (r < 0)
else { return r;
r = free_and_strdup_warn(&c->bind_network_interface, s); if (r > 0)
if (r < 0) unit_write_settingf(u, flags, name, "BindNetworkInterface=%s", s);
return r;
}
unit_write_settingf(u, flags, name, "BindNetworkInterface=%s", strempty(s));
} }
return 1; return 1;

View File

@ -5141,12 +5141,11 @@ int unit_setup_exec_runtime(Unit *u) {
return r; return r;
} }
CGroupRuntime *unit_setup_cgroup_runtime(Unit *u) { CGroupRuntime* unit_setup_cgroup_runtime(Unit *u) {
size_t offset;
assert(u); assert(u);
assert(UNIT_HAS_CGROUP_CONTEXT(u));
offset = UNIT_VTABLE(u)->cgroup_runtime_offset; size_t offset = UNIT_VTABLE(u)->cgroup_runtime_offset;
assert(offset > 0); assert(offset > 0);
CGroupRuntime **rt = (CGroupRuntime**) ((uint8_t*) u + offset); CGroupRuntime **rt = (CGroupRuntime**) ((uint8_t*) u + offset);

View File

@ -2370,6 +2370,7 @@ static const BusProperty cgroup_properties[] = {
{ "SocketBindDeny", bus_append_socket_filter }, { "SocketBindDeny", bus_append_socket_filter },
{ "MemoryPressureThresholdSec", bus_append_parse_sec_rename }, { "MemoryPressureThresholdSec", bus_append_parse_sec_rename },
{ "NFTSet", bus_append_nft_set }, { "NFTSet", bus_append_nft_set },
{ "BindNetworkInterface", bus_append_string },
/* While infinity is disallowed in unit file, infinity is allowed in D-Bus API which /* While infinity is disallowed in unit file, infinity is allowed in D-Bus API which
* means use the default memory pressure duration from oomd.conf. */ * means use the default memory pressure duration from oomd.conf. */
@ -2551,7 +2552,6 @@ static const BusProperty execute_properties[] = {
{ "StateDirectoryAccounting", bus_append_parse_boolean }, { "StateDirectoryAccounting", bus_append_parse_boolean },
{ "CacheDirectoryAccounting", bus_append_parse_boolean }, { "CacheDirectoryAccounting", bus_append_parse_boolean },
{ "LogsDirectoryAccounting", bus_append_parse_boolean }, { "LogsDirectoryAccounting", bus_append_parse_boolean },
{ "BindNetworkInterface", bus_append_string },
{ NULL, bus_try_append_resource_limit, dump_resource_limits }, { NULL, bus_try_append_resource_limit, dump_resource_limits },
{} {}