1
0
mirror of https://github.com/systemd/systemd synced 2026-03-19 19:44:48 +01:00

Compare commits

...

8 Commits

Author SHA1 Message Date
Daan De Meyer
f76f0f9935
core: several follow-ups for BindNetworkInterface= (#40202) 2025-12-28 12:55:30 +01:00
Mike Yuan
e69ac871a0
bus-unit-util: BindNetworkInterface= is a cgroup property 2025-12-27 23:47:55 +01:00
Mike Yuan
ecd677bcc9
core/cgroup: no need to handle empty bind_network_interface
We always normalize empty setting to NULL, hence per our usual
coding style a simply NULL check would suffice.
2025-12-27 23:47:55 +01:00
Mike Yuan
705d2c527f
core/dbus-cgroup: actually allow empty assignment for BindNetworkInterface= 2025-12-27 23:47:55 +01:00
Mike Yuan
5af2df38df
core/cgroup: fix bpf-bind-iface serialization
Follow-up for c1c787651b34c0a0f1082b9d32cf47ea3abe0af2

The commit added serialization, but deserialization wasn't in place.
Follow the usual practice of pinning the bpf link fd until we install
the new instance.
2025-12-27 23:44:53 +01:00
Mike Yuan
e78f615763
core/cgroup: fix potential bpf link fd leak during deserialization
Also add a comment explaining the deserialization mechanism, as requested
in https://github.com/systemd/systemd/pull/40202#discussion_r2649274628
2025-12-27 20:51:05 +01:00
Mike Yuan
d90cfb7e30
core/bpf-restrict-ifaces: avoid duplicate unit_get_cgroup_runtime() call 2025-12-27 20:14:57 +01:00
Mike Yuan
e4c43ca7d6
core/cgroup: CGContext must be available if Unit has CGRuntime
Follow-up for e03e5056dbffffafc86e46985658e1c9075d3c74
2025-12-27 20:14:56 +01:00
7 changed files with 91 additions and 65 deletions

View File

@ -49,25 +49,18 @@ int bpf_bind_network_interface_supported(void) {
return (supported = bpf_can_link_program(obj->progs.sd_bind_interface));
}
int bpf_bind_network_interface_install(Unit *u) {
static int bind_network_interface_install_impl(Unit *u, CGroupRuntime *crt) {
_cleanup_(bpf_link_freep) struct bpf_link *link = NULL;
_cleanup_(bind_iface_bpf_freep) struct bind_iface_bpf *obj = NULL;
_cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
_cleanup_free_ char *cgroup_path = NULL;
_cleanup_close_ int cgroup_fd = -EBADF;
CGroupContext *cc;
CGroupRuntime *crt;
int r, ifindex;
assert(u);
assert(crt);
cc = unit_get_cgroup_context(u);
if (!cc)
return 0;
crt = unit_get_cgroup_runtime(u);
if (!crt)
return 0;
CGroupContext *cc = ASSERT_PTR(unit_get_cgroup_context(u));
if (isempty(cc->bind_network_interface))
return 0;
@ -118,6 +111,21 @@ int bpf_bind_network_interface_install(Unit *u) {
return 0;
}
int bpf_bind_network_interface_install(Unit *u) {
CGroupRuntime *crt;
int r;
assert(u);
crt = unit_get_cgroup_runtime(u);
if (!crt)
return 0;
r = bind_network_interface_install_impl(u, crt);
crt->initial_bind_network_interface_link_fd = safe_close(crt->initial_bind_network_interface_link_fd);
return r;
}
int bpf_bind_network_interface_serialize(Unit *u, FILE *f, FDSet *fds) {
CGroupRuntime *crt;
@ -127,7 +135,7 @@ int bpf_bind_network_interface_serialize(Unit *u, FILE *f, FDSet *fds) {
if (!crt)
return 0;
return bpf_serialize_link(f, fds, "bind-interface-fd", crt->bpf_bind_network_interface_link);
return bpf_serialize_link(f, fds, "bind-iface-bpf-fd", crt->bpf_bind_network_interface_link);
}
#else /* ! BPF_FRAMEWORK */

View File

@ -98,22 +98,17 @@ int bpf_restrict_ifaces_supported(void) {
return (supported = bpf_can_link_program(obj->progs.sd_restrictif_i));
}
static int restrict_ifaces_install_impl(Unit *u) {
static int restrict_ifaces_install_impl(Unit *u, CGroupRuntime *crt) {
_cleanup_(bpf_link_freep) struct bpf_link *egress_link = NULL, *ingress_link = NULL;
_cleanup_(restrict_ifaces_bpf_freep) struct restrict_ifaces_bpf *obj = NULL;
_cleanup_free_ char *cgroup_path = NULL;
_cleanup_close_ int cgroup_fd = -EBADF;
CGroupContext *cc;
CGroupRuntime *crt;
int r;
cc = unit_get_cgroup_context(u);
if (!cc)
return 0;
assert(u);
assert(crt);
crt = unit_get_cgroup_runtime(u);
if (!crt)
return 0;
CGroupContext *cc = ASSERT_PTR(unit_get_cgroup_context(u));
r = cg_get_path(crt->cgroup_path, /* suffix= */ NULL, &cgroup_path);
if (r < 0)
@ -159,7 +154,7 @@ int bpf_restrict_ifaces_install(Unit *u) {
if (!crt)
return 0;
r = restrict_ifaces_install_impl(u);
r = restrict_ifaces_install_impl(u, crt);
fdset_close(crt->initial_restrict_ifaces_link_fds, /* async= */ false);
return r;
}

View File

@ -571,7 +571,7 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
fprintf(f, "%sDelegateSubgroup: %s\n",
prefix, c->delegate_subgroup);
if (!isempty(c->bind_network_interface))
if (c->bind_network_interface)
fprintf(f, "%sBindNetworkInterface: %s\n",
prefix, c->bind_network_interface);
@ -1698,7 +1698,7 @@ static bool unit_get_needs_bind_network_interface(Unit *u) {
if (!c)
return false;
return !isempty(c->bind_network_interface);
return c->bind_network_interface;
}
static CGroupMask unit_get_cgroup_mask(Unit *u) {
@ -3050,9 +3050,7 @@ int unit_check_oom(Unit *u) {
if (!crt || !crt->cgroup_path)
return 0;
CGroupContext *ctx = unit_get_cgroup_context(u);
if (!ctx)
return 0;
CGroupContext *ctx = ASSERT_PTR(unit_get_cgroup_context(u));
/* If memory.oom.group=1, then look up the oom_group_kill field, which reports how many times the
* kernel killed every process recursively in this cgroup and its descendants, similar to
@ -4201,6 +4199,8 @@ CGroupRuntime* cgroup_runtime_new(void) {
.ipv4_deny_map_fd = -EBADF,
.ipv6_deny_map_fd = -EBADF,
.initial_bind_network_interface_link_fd = -EBADF,
.cgroup_invalidated_mask = _CGROUP_MASK_ALL,
.deserialized_cgroup_realized = -1,
@ -4235,6 +4235,7 @@ CGroupRuntime* cgroup_runtime_free(CGroupRuntime *crt) {
#endif
fdset_free(crt->initial_restrict_ifaces_link_fds);
safe_close(crt->initial_bind_network_interface_link_fd);
bpf_firewall_close(crt);
@ -4461,34 +4462,24 @@ int cgroup_runtime_deserialize_one(Unit *u, const char *key, const char *value,
if (MATCH_DESERIALIZE_IMMEDIATE(u, "cgroup-invalidated-mask", key, value, cg_mask_from_string, cgroup_invalidated_mask))
return 1;
if (STR_IN_SET(key, "ipv4-socket-bind-bpf-link-fd", "ipv6-socket-bind-bpf-link-fd")) {
int fd;
fd = deserialize_fd(fds, value);
if (fd >= 0)
(void) bpf_socket_bind_add_initial_link_fd(u, fd);
return 1;
}
if (STR_IN_SET(key,
"ip-bpf-ingress-installed", "ip-bpf-egress-installed",
"bpf-device-control-installed",
"ip-bpf-ingress-installed", "ip-bpf-egress-installed",
"ip-bpf-custom-ingress-installed", "ip-bpf-custom-egress-installed")) {
CGroupRuntime *crt = unit_setup_cgroup_runtime(u);
if (!crt)
log_oom_debug();
else {
if (streq(key, "bpf-device-control-installed"))
(void) bpf_program_deserialize_attachment(value, fds, &crt->bpf_device_control_installed);
if (streq(key, "ip-bpf-ingress-installed"))
(void) bpf_program_deserialize_attachment(value, fds, &crt->ip_bpf_ingress_installed);
if (streq(key, "ip-bpf-egress-installed"))
(void) bpf_program_deserialize_attachment(value, fds, &crt->ip_bpf_egress_installed);
if (streq(key, "bpf-device-control-installed"))
(void) bpf_program_deserialize_attachment(value, fds, &crt->bpf_device_control_installed);
if (streq(key, "ip-bpf-custom-ingress-installed"))
(void) bpf_program_deserialize_attachment_set(value, fds, &crt->ip_bpf_custom_ingress_installed);
@ -4499,12 +4490,47 @@ int cgroup_runtime_deserialize_one(Unit *u, const char *key, const char *value,
return 1;
}
if (streq(key, "restrict-ifaces-bpf-fd")) {
int fd;
/* We keep the previous bpf link fds stashed until we reattach anew, to close the window where
* the cgroup restrictions would otherwise be lifted. */
if (STR_IN_SET(key, "ipv4-socket-bind-bpf-link-fd", "ipv6-socket-bind-bpf-link-fd")) {
_cleanup_close_ int fd = -EBADF;
fd = deserialize_fd(fds, value);
if (fd >= 0)
(void) bpf_restrict_ifaces_add_initial_link_fd(u, fd);
if (fd >= 0) {
r = bpf_socket_bind_add_initial_link_fd(u, fd);
if (r >= 0)
TAKE_FD(fd);
}
return 1;
}
if (streq(key, "restrict-ifaces-bpf-fd")) {
_cleanup_close_ int fd = -EBADF;
fd = deserialize_fd(fds, value);
if (fd >= 0) {
r = bpf_restrict_ifaces_add_initial_link_fd(u, fd);
if (r >= 0)
TAKE_FD(fd);
}
return 1;
}
if (streq(key, "bind-iface-bpf-fd")) {
_cleanup_close_ int fd = -EBADF;
fd = deserialize_fd(fds, value);
if (fd >= 0) {
CGroupRuntime *crt = unit_setup_cgroup_runtime(u);
if (!crt)
log_oom_debug();
else
close_and_replace(crt->initial_bind_network_interface_link_fd, fd);
}
return 1;
}

View File

@ -325,6 +325,13 @@ typedef struct CGroupRuntime {
struct bpf_link *restrict_ifaces_egress_bpf_link;
#endif
#if BPF_FRAMEWORK
/* BPF link to BPF programs attached to cgroup/sock_create hooks and
* responsible for binding created sockets to a given VRF interface. */
struct bpf_link *bpf_bind_network_interface_link;
#endif
int initial_bind_network_interface_link_fd;
bool cgroup_members_mask_valid:1;
/* Reset cgroup accounting next time we fork something off */
@ -334,12 +341,6 @@ typedef struct CGroupRuntime {
bool warned_clamping_cpu_quota_period:1;
int deserialized_cgroup_realized; /* tristate, for backwards compat */
#if BPF_FRAMEWORK
/* BPF link to BPF programs attached to cgroup/sock_create hooks and
* responsible for binding created sockets to a given VRF interface. */
struct bpf_link *bpf_bind_network_interface_link;
#endif
} CGroupRuntime;
uint64_t cgroup_context_cpu_weight(CGroupContext *c, ManagerState state);

View File

@ -1893,6 +1893,7 @@ int bus_cgroup_set_property(
return 1;
}
if (streq(name, "RestrictNetworkInterfaces")) {
int is_allow_list;
_cleanup_strv_free_ char **l = NULL;
@ -1958,19 +1959,15 @@ int bus_cgroup_set_property(
if (r < 0)
return r;
if (!ifname_valid_full(s, IFNAME_VALID_ALTERNATIVE))
if (!isempty(s) && !ifname_valid_full(s, IFNAME_VALID_ALTERNATIVE))
return sd_bus_error_setf(reterr_error, SD_BUS_ERROR_INVALID_ARGS, "Invalid interface name: %s", s);
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
if (isempty(s))
c->bind_network_interface = mfree(c->bind_network_interface);
else {
r = free_and_strdup_warn(&c->bind_network_interface, s);
if (r < 0)
return r;
}
unit_write_settingf(u, flags, name, "BindNetworkInterface=%s", strempty(s));
r = free_and_strdup_warn(&c->bind_network_interface, empty_to_null(s));
if (r < 0)
return r;
if (r > 0)
unit_write_settingf(u, flags, name, "BindNetworkInterface=%s", s);
}
return 1;

View File

@ -5141,12 +5141,11 @@ int unit_setup_exec_runtime(Unit *u) {
return r;
}
CGroupRuntime *unit_setup_cgroup_runtime(Unit *u) {
size_t offset;
CGroupRuntime* unit_setup_cgroup_runtime(Unit *u) {
assert(u);
assert(UNIT_HAS_CGROUP_CONTEXT(u));
offset = UNIT_VTABLE(u)->cgroup_runtime_offset;
size_t offset = UNIT_VTABLE(u)->cgroup_runtime_offset;
assert(offset > 0);
CGroupRuntime **rt = (CGroupRuntime**) ((uint8_t*) u + offset);

View File

@ -2370,6 +2370,7 @@ static const BusProperty cgroup_properties[] = {
{ "SocketBindDeny", bus_append_socket_filter },
{ "MemoryPressureThresholdSec", bus_append_parse_sec_rename },
{ "NFTSet", bus_append_nft_set },
{ "BindNetworkInterface", bus_append_string },
/* While infinity is disallowed in unit file, infinity is allowed in D-Bus API which
* means use the default memory pressure duration from oomd.conf. */
@ -2551,7 +2552,6 @@ static const BusProperty execute_properties[] = {
{ "StateDirectoryAccounting", bus_append_parse_boolean },
{ "CacheDirectoryAccounting", bus_append_parse_boolean },
{ "LogsDirectoryAccounting", bus_append_parse_boolean },
{ "BindNetworkInterface", bus_append_string },
{ NULL, bus_try_append_resource_limit, dump_resource_limits },
{}