mirror of
https://github.com/systemd/systemd
synced 2026-03-13 08:34:47 +01:00
Compare commits
6 Commits
5276ef1548
...
dd8c12e5ec
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dd8c12e5ec | ||
|
|
595ea7c2e8 | ||
|
|
6801fdda61 | ||
|
|
c1bf450fa0 | ||
|
|
67cbfbbf4b | ||
|
|
8fba6e2b40 |
2
.mailmap
2
.mailmap
@ -25,6 +25,7 @@ Christophe Varoqui <christophe.varoqui@free.fr>
|
|||||||
Colin Guthrie <ColinGuthrie@web>
|
Colin Guthrie <ColinGuthrie@web>
|
||||||
Cristian Rodríguez <cristian@rodriguez.im> <crodriguez@owncloud.com>
|
Cristian Rodríguez <cristian@rodriguez.im> <crodriguez@owncloud.com>
|
||||||
Daan De Meyer <daan.j.demeyer@gmail.com>
|
Daan De Meyer <daan.j.demeyer@gmail.com>
|
||||||
|
Daan De Meyer <daan.j.demeyer@gmail.com> <daan@amutable.com>
|
||||||
Daniel Elstner <daniel.kitta@gmail.com> <danielk@openismus.com>
|
Daniel Elstner <daniel.kitta@gmail.com> <danielk@openismus.com>
|
||||||
Daniel Gorbea <danielgorbea@hotmail.com>
|
Daniel Gorbea <danielgorbea@hotmail.com>
|
||||||
Daniel J Walsh <dwalsh@redhat.com>
|
Daniel J Walsh <dwalsh@redhat.com>
|
||||||
@ -110,6 +111,7 @@ Larry Bernstone <lbernstone@gmail.com>
|
|||||||
Lennart Poettering <lennart@poettering.net> <LennartPoettering@web>
|
Lennart Poettering <lennart@poettering.net> <LennartPoettering@web>
|
||||||
Lennart Poettering <lennart@poettering.net> <lennart@bf9bc1cc-28ce-0310-abfb-9041aa761afb>
|
Lennart Poettering <lennart@poettering.net> <lennart@bf9bc1cc-28ce-0310-abfb-9041aa761afb>
|
||||||
Lennart Poettering <lennart@poettering.net> <mzninuv@0pointer.de>
|
Lennart Poettering <lennart@poettering.net> <mzninuv@0pointer.de>
|
||||||
|
Lennart Poettering <lennart@poettering.net> <lennart@amutable.com>
|
||||||
Leonard König <leonard.r.koenig@googlemail.com>
|
Leonard König <leonard.r.koenig@googlemail.com>
|
||||||
Luca BRUNO <luca.bruno@coreos.com>
|
Luca BRUNO <luca.bruno@coreos.com>
|
||||||
Luis Felipe Strano Moraes <luis.strano@gmail.com> <lfelipe@profusion.mobi>
|
Luis Felipe Strano Moraes <luis.strano@gmail.com> <lfelipe@profusion.mobi>
|
||||||
|
|||||||
@ -14,7 +14,7 @@ A few interfaces are optionally kept for backward compatibility.
|
|||||||
When systemd is compiled with the `-Dcompat-sysv-interfaces=true` setting,
|
When systemd is compiled with the `-Dcompat-sysv-interfaces=true` setting,
|
||||||
legacy interfaces are provided,
|
legacy interfaces are provided,
|
||||||
e.g. the `runlevelX.target` aliases,
|
e.g. the `runlevelX.target` aliases,
|
||||||
and lock directories in under `/var` and `/run`.
|
and lock directories under `/var` and `/run`.
|
||||||
This option may be extended to cover other deprecated interfaces in the future.
|
This option may be extended to cover other deprecated interfaces in the future.
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -48,6 +48,26 @@
|
|||||||
|
|
||||||
<xi:include href="standard-conf.xml" xpointer="main-conf" />
|
<xi:include href="standard-conf.xml" xpointer="main-conf" />
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>Prekill event</title>
|
||||||
|
|
||||||
|
<para><command>systemd-oomd</command> supports notifying external components before killing a control
|
||||||
|
group.
|
||||||
|
This is done by sending a notification over varlink to all sockets found in
|
||||||
|
<filename>/run/systemd/oomd.prekill.hook/</filename> folder. Each socket should implement the
|
||||||
|
<constant>io.systemd.oom.Prekill</constant> interface. The notification contains the control group path
|
||||||
|
to allow the hook to identify which control group is being killed. This allows external components to
|
||||||
|
perform any necessary cleanup or logging before the control group is terminated. The hook is not intended
|
||||||
|
as a way to avoid the kill, but rather as a notification mechanism.
|
||||||
|
Note that this is a privileged option as, even if it has a timeout, is synchronous and delays the kill,
|
||||||
|
so use with care.
|
||||||
|
The typically preferable mechanism to process memory pressure is to do what
|
||||||
|
<ulink url="https://systemd.io/MEMORY_PRESSURE/">MEMORY_PRESSURE</ulink> describes which is unprivileged,
|
||||||
|
asynchronous and does not delay the kill.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
<title>[OOM] Section Options</title>
|
<title>[OOM] Section Options</title>
|
||||||
|
|
||||||
@ -98,6 +118,22 @@
|
|||||||
<xi:include href="version-info.xml" xpointer="v248"/></listitem>
|
<xi:include href="version-info.xml" xpointer="v248"/></listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><varname>PrekillHookTimeoutSec=</varname></term>
|
||||||
|
|
||||||
|
<listitem><para>Sets the amount of time <command>systemd-oomd</command> will wait for pre-kill hooks
|
||||||
|
to complete, before proceeding with the control group termination. Pre-kill hooks work by placing
|
||||||
|
varlink socket to <filename>/run/systemd/oomd.prekill.hook/</filename> folder. Each socket should
|
||||||
|
implement interface for notification to work. <command>systemd-oomd</command> sends a notification
|
||||||
|
before killing a control group for each discovered socket. The timeout is intended to be global and
|
||||||
|
not per hook. If all hooks return earlier, the kill is performed as soon as possible. The timeout
|
||||||
|
must be at least 1s.
|
||||||
|
Defaults to 0, which means <command>systemd-oomd</command> will not wait and no notifications
|
||||||
|
will be sent.</para>
|
||||||
|
|
||||||
|
<xi:include href="version-info.xml" xpointer="v260"/></listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
</variablelist>
|
</variablelist>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
|||||||
@ -66,6 +66,8 @@
|
|||||||
#define VARLINK_PATH_MACHINED_USERDB "/run/systemd/userdb/io.systemd.Machine"
|
#define VARLINK_PATH_MACHINED_USERDB "/run/systemd/userdb/io.systemd.Machine"
|
||||||
/* Path where systemd-machined listens to resolve.hook varlink queries */
|
/* Path where systemd-machined listens to resolve.hook varlink queries */
|
||||||
#define VARLINK_PATH_MACHINED_RESOLVE_HOOK "/run/systemd/resolve.hook/io.systemd.Machine"
|
#define VARLINK_PATH_MACHINED_RESOLVE_HOOK "/run/systemd/resolve.hook/io.systemd.Machine"
|
||||||
|
/* Path where to connect to send varlink prekill events */
|
||||||
|
#define VARLINK_DIR_OOMD_PREKILL_HOOK "/run/systemd/oomd.prekill.hook/"
|
||||||
|
|
||||||
/* Recommended baseline - see README for details */
|
/* Recommended baseline - see README for details */
|
||||||
#define KERNEL_BASELINE_VERSION "5.14"
|
#define KERNEL_BASELINE_VERSION "5.14"
|
||||||
|
|||||||
@ -75,6 +75,7 @@ void manager_parse_config_file(Manager *m) {
|
|||||||
{ "OOM", "SwapUsedLimit", config_parse_permyriad, 0, &m->swap_used_limit_permyriad },
|
{ "OOM", "SwapUsedLimit", config_parse_permyriad, 0, &m->swap_used_limit_permyriad },
|
||||||
{ "OOM", "DefaultMemoryPressureLimit", config_parse_loadavg, 0, &m->default_mem_pressure_limit },
|
{ "OOM", "DefaultMemoryPressureLimit", config_parse_loadavg, 0, &m->default_mem_pressure_limit },
|
||||||
{ "OOM", "DefaultMemoryPressureDurationSec", config_parse_duration, 0, &m->default_mem_pressure_duration_usec },
|
{ "OOM", "DefaultMemoryPressureDurationSec", config_parse_duration, 0, &m->default_mem_pressure_duration_usec },
|
||||||
|
{ "OOM", "PrekillHookTimeoutSec", config_parse_sec, 0, &m->prekill_timeout },
|
||||||
{}
|
{}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -105,7 +105,7 @@ static int process_managed_oom_message(Manager *m, uid_t uid, sd_json_variant *p
|
|||||||
m->monitored_swap_cgroup_contexts : m->monitored_mem_pressure_cgroup_contexts;
|
m->monitored_swap_cgroup_contexts : m->monitored_mem_pressure_cgroup_contexts;
|
||||||
|
|
||||||
if (message.mode == MANAGED_OOM_AUTO) {
|
if (message.mode == MANAGED_OOM_AUTO) {
|
||||||
(void) oomd_cgroup_context_free(hashmap_remove(monitor_hm, empty_to_root(message.path)));
|
(void) oomd_cgroup_context_unref(hashmap_remove(monitor_hm, empty_to_root(message.path)));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -392,7 +392,7 @@ static int monitor_swap_contexts_handler(sd_event_source *s, uint64_t usec, void
|
|||||||
if (oomd_mem_available_below(&m->system_context, 10000 - m->swap_used_limit_permyriad) &&
|
if (oomd_mem_available_below(&m->system_context, 10000 - m->swap_used_limit_permyriad) &&
|
||||||
oomd_swap_free_below(&m->system_context, 10000 - m->swap_used_limit_permyriad)) {
|
oomd_swap_free_below(&m->system_context, 10000 - m->swap_used_limit_permyriad)) {
|
||||||
_cleanup_hashmap_free_ Hashmap *candidates = NULL;
|
_cleanup_hashmap_free_ Hashmap *candidates = NULL;
|
||||||
_cleanup_free_ char *selected = NULL;
|
OomdCGroupContext *selected = NULL;
|
||||||
uint64_t threshold;
|
uint64_t threshold;
|
||||||
|
|
||||||
log_debug("Memory used (%"PRIu64") / total (%"PRIu64") and "
|
log_debug("Memory used (%"PRIu64") / total (%"PRIu64") and "
|
||||||
@ -408,29 +408,28 @@ static int monitor_swap_contexts_handler(sd_event_source *s, uint64_t usec, void
|
|||||||
log_debug_errno(r, "Failed to get monitored swap cgroup candidates, ignoring: %m");
|
log_debug_errno(r, "Failed to get monitored swap cgroup candidates, ignoring: %m");
|
||||||
|
|
||||||
threshold = m->system_context.swap_total * THRESHOLD_SWAP_USED_PERCENT / 100;
|
threshold = m->system_context.swap_total * THRESHOLD_SWAP_USED_PERCENT / 100;
|
||||||
r = oomd_kill_by_swap_usage(candidates, threshold, m->dry_run, &selected);
|
r = oomd_select_by_swap_usage(candidates, threshold, &selected);
|
||||||
|
if (r < 0)
|
||||||
|
return log_error_errno(r, "Failed to select any cgroups based on swap: %m");
|
||||||
|
if (r == 0) {
|
||||||
|
log_debug("No cgroup candidates found for swap-based OOM action");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
r = oomd_cgroup_kill_mark(m, selected);
|
||||||
if (r == -ENOMEM)
|
if (r == -ENOMEM)
|
||||||
return log_oom();
|
return log_oom();
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
log_notice_errno(r, "Failed to kill any cgroups based on swap: %m");
|
log_error_errno(r, "Failed to select any cgroups based on swap: %m");
|
||||||
else {
|
else {
|
||||||
if (selected && r > 0) {
|
if (selected && r > 0) {
|
||||||
log_notice("Killed %s due to memory used (%"PRIu64") / total (%"PRIu64") and "
|
log_notice("Marked %s for killing due to memory used (%"PRIu64") / total (%"PRIu64") and "
|
||||||
"swap used (%"PRIu64") / total (%"PRIu64") being more than "
|
"swap used (%"PRIu64") / total (%"PRIu64") being more than "
|
||||||
PERMYRIAD_AS_PERCENT_FORMAT_STR,
|
PERMYRIAD_AS_PERCENT_FORMAT_STR,
|
||||||
selected,
|
selected->path,
|
||||||
m->system_context.mem_used, m->system_context.mem_total,
|
m->system_context.mem_used, m->system_context.mem_total,
|
||||||
m->system_context.swap_used, m->system_context.swap_total,
|
m->system_context.swap_used, m->system_context.swap_total,
|
||||||
PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad));
|
PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad));
|
||||||
|
|
||||||
/* send dbus signal */
|
|
||||||
(void) sd_bus_emit_signal(m->bus,
|
|
||||||
"/org/freedesktop/oom1",
|
|
||||||
"org.freedesktop.oom1.Manager",
|
|
||||||
"Killed",
|
|
||||||
"ss",
|
|
||||||
selected,
|
|
||||||
"memory-used");
|
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -500,7 +499,7 @@ static int monitor_memory_pressure_contexts_handler(sd_event_source *s, uint64_t
|
|||||||
else if (r == 1 && !in_post_action_delay) {
|
else if (r == 1 && !in_post_action_delay) {
|
||||||
OomdCGroupContext *t;
|
OomdCGroupContext *t;
|
||||||
SET_FOREACH(t, targets) {
|
SET_FOREACH(t, targets) {
|
||||||
_cleanup_free_ char *selected = NULL;
|
OomdCGroupContext *selected = NULL;
|
||||||
|
|
||||||
/* Check if there was reclaim activity in the given interval. The concern is the following case:
|
/* Check if there was reclaim activity in the given interval. The concern is the following case:
|
||||||
* Pressure climbed, a lot of high-frequency pages were reclaimed, and we killed the offending
|
* Pressure climbed, a lot of high-frequency pages were reclaimed, and we killed the offending
|
||||||
@ -525,14 +524,21 @@ static int monitor_memory_pressure_contexts_handler(sd_event_source *s, uint64_t
|
|||||||
else
|
else
|
||||||
clear_candidates = NULL;
|
clear_candidates = NULL;
|
||||||
|
|
||||||
r = oomd_kill_by_pgscan_rate(m->monitored_mem_pressure_cgroup_contexts_candidates,
|
r = oomd_select_by_pgscan_rate(m->monitored_mem_pressure_cgroup_contexts_candidates,
|
||||||
/* prefix= */ t->path,
|
/* prefix= */ t->path,
|
||||||
/* dry_run= */ m->dry_run,
|
|
||||||
&selected);
|
&selected);
|
||||||
|
if (r < 0)
|
||||||
|
return log_error_errno(r, "Failed to select any cgroups based on swap, ignoring: %m");
|
||||||
|
if (r == 0) {
|
||||||
|
log_debug("No cgroup candidates found for memory pressure-based OOM action for %s", t->path);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
r = oomd_cgroup_kill_mark(m, selected);
|
||||||
if (r == -ENOMEM)
|
if (r == -ENOMEM)
|
||||||
return log_oom();
|
return log_oom();
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
log_notice_errno(r, "Failed to kill any cgroups under %s based on pressure: %m", t->path);
|
log_error_errno(r, "Failed to select any cgroups under %s based on pressure, ignoring: %m", t->path);
|
||||||
else {
|
else {
|
||||||
/* Don't act on all the high pressure cgroups at once; return as soon as we kill one.
|
/* Don't act on all the high pressure cgroups at once; return as soon as we kill one.
|
||||||
* If r == 0 then it means there were not eligible candidates, the candidate cgroup
|
* If r == 0 then it means there were not eligible candidates, the candidate cgroup
|
||||||
@ -541,21 +547,12 @@ static int monitor_memory_pressure_contexts_handler(sd_event_source *s, uint64_t
|
|||||||
* pressure is still high. */
|
* pressure is still high. */
|
||||||
m->mem_pressure_post_action_delay_start = usec_now;
|
m->mem_pressure_post_action_delay_start = usec_now;
|
||||||
if (selected && r > 0) {
|
if (selected && r > 0) {
|
||||||
log_notice("Killed %s due to memory pressure for %s being %lu.%02lu%% > %lu.%02lu%%"
|
log_notice("Marked %s for killing due to memory pressure for %s being %lu.%02lu%% > %lu.%02lu%%"
|
||||||
" for > %s with reclaim activity",
|
" for > %s with reclaim activity",
|
||||||
selected, t->path,
|
selected->path, t->path,
|
||||||
LOADAVG_INT_SIDE(t->memory_pressure.avg10), LOADAVG_DECIMAL_SIDE(t->memory_pressure.avg10),
|
LOADAVG_INT_SIDE(t->memory_pressure.avg10), LOADAVG_DECIMAL_SIDE(t->memory_pressure.avg10),
|
||||||
LOADAVG_INT_SIDE(t->mem_pressure_limit), LOADAVG_DECIMAL_SIDE(t->mem_pressure_limit),
|
LOADAVG_INT_SIDE(t->mem_pressure_limit), LOADAVG_DECIMAL_SIDE(t->mem_pressure_limit),
|
||||||
FORMAT_TIMESPAN(t->mem_pressure_duration_usec, USEC_PER_SEC));
|
FORMAT_TIMESPAN(t->mem_pressure_duration_usec, USEC_PER_SEC));
|
||||||
|
|
||||||
/* send dbus signal */
|
|
||||||
(void) sd_bus_emit_signal(m->bus,
|
|
||||||
"/org/freedesktop/oom1",
|
|
||||||
"org.freedesktop.oom1.Manager",
|
|
||||||
"Killed",
|
|
||||||
"ss",
|
|
||||||
selected,
|
|
||||||
"memory-pressure");
|
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -653,6 +650,8 @@ Manager* manager_free(Manager *m) {
|
|||||||
hashmap_free(m->monitored_mem_pressure_cgroup_contexts);
|
hashmap_free(m->monitored_mem_pressure_cgroup_contexts);
|
||||||
hashmap_free(m->monitored_mem_pressure_cgroup_contexts_candidates);
|
hashmap_free(m->monitored_mem_pressure_cgroup_contexts_candidates);
|
||||||
|
|
||||||
|
set_free(m->kill_states);
|
||||||
|
|
||||||
return mfree(m);
|
return mfree(m);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
#include "conf-parser-forward.h"
|
#include "conf-parser-forward.h"
|
||||||
#include "shared-forward.h"
|
#include "shared-forward.h"
|
||||||
|
#include "oomd-conf.h"
|
||||||
#include "oomd-util.h"
|
#include "oomd-util.h"
|
||||||
|
|
||||||
/* Polling interval for monitoring stats */
|
/* Polling interval for monitoring stats */
|
||||||
@ -54,6 +55,9 @@ typedef struct Manager {
|
|||||||
/* This varlink server object is used to manage systemd-oomd's varlink server which is used by user
|
/* This varlink server object is used to manage systemd-oomd's varlink server which is used by user
|
||||||
* managers to report changes in ManagedOOM settings (oomd server - systemd client). */
|
* managers to report changes in ManagedOOM settings (oomd server - systemd client). */
|
||||||
sd_varlink_server *varlink_server;
|
sd_varlink_server *varlink_server;
|
||||||
|
|
||||||
|
usec_t prekill_timeout;
|
||||||
|
Set *kill_states; /* currently ongoing OomdKillState operations */
|
||||||
} Manager;
|
} Manager;
|
||||||
|
|
||||||
Manager* manager_free(Manager *m);
|
Manager* manager_free(Manager *m);
|
||||||
|
|||||||
@ -1,23 +1,35 @@
|
|||||||
/* SPDX-License-Identifier: LGPL-2.1-or-later */
|
/* SPDX-License-Identifier: LGPL-2.1-or-later */
|
||||||
|
|
||||||
#include "alloc-util.h"
|
#include "alloc-util.h"
|
||||||
|
#include "constants.h"
|
||||||
|
#include "dirent-util.h"
|
||||||
#include "errno-util.h"
|
#include "errno-util.h"
|
||||||
#include "fd-util.h"
|
#include "fd-util.h"
|
||||||
#include "fileio.h"
|
#include "fileio.h"
|
||||||
#include "format-util.h"
|
#include "format-util.h"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
#include "memstream-util.h"
|
#include "memstream-util.h"
|
||||||
|
#include "oomd-manager.h"
|
||||||
#include "oomd-util.h"
|
#include "oomd-util.h"
|
||||||
#include "parse-util.h"
|
#include "parse-util.h"
|
||||||
#include "path-util.h"
|
#include "path-util.h"
|
||||||
#include "pidref.h"
|
#include "pidref.h"
|
||||||
#include "procfs-util.h"
|
#include "procfs-util.h"
|
||||||
|
#include "sd-bus.h"
|
||||||
#include "set.h"
|
#include "set.h"
|
||||||
#include "signal-util.h"
|
#include "signal-util.h"
|
||||||
#include "sort-util.h"
|
#include "sort-util.h"
|
||||||
#include "stdio-util.h"
|
#include "stdio-util.h"
|
||||||
#include "string-util.h"
|
#include "string-util.h"
|
||||||
#include "time-util.h"
|
#include "time-util.h"
|
||||||
|
#include "varlink-util.h"
|
||||||
|
|
||||||
|
typedef struct OomdKillState {
|
||||||
|
Manager *manager;
|
||||||
|
OomdCGroupContext *ctx;
|
||||||
|
/* This holds sd_varlink references */
|
||||||
|
Set *links;
|
||||||
|
} OomdKillState;
|
||||||
|
|
||||||
DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
|
DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
|
||||||
oomd_cgroup_ctx_hash_ops,
|
oomd_cgroup_ctx_hash_ops,
|
||||||
@ -25,7 +37,7 @@ DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
|
|||||||
path_hash_func,
|
path_hash_func,
|
||||||
path_compare,
|
path_compare,
|
||||||
OomdCGroupContext,
|
OomdCGroupContext,
|
||||||
oomd_cgroup_context_free);
|
oomd_cgroup_context_unref);
|
||||||
|
|
||||||
static int log_kill(const PidRef *pid, int sig, void *userdata) {
|
static int log_kill(const PidRef *pid, int sig, void *userdata) {
|
||||||
log_debug("oomd attempting to kill " PID_FMT " with %s", pid->pid, signal_to_string(sig));
|
log_debug("oomd attempting to kill " PID_FMT " with %s", pid->pid, signal_to_string(sig));
|
||||||
@ -62,7 +74,7 @@ static int increment_oomd_xattr(const char *path, const char *xattr, uint64_t nu
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
OomdCGroupContext *oomd_cgroup_context_free(OomdCGroupContext *ctx) {
|
static OomdCGroupContext *oomd_cgroup_context_free(OomdCGroupContext *ctx) {
|
||||||
if (!ctx)
|
if (!ctx)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
@ -70,6 +82,8 @@ OomdCGroupContext *oomd_cgroup_context_free(OomdCGroupContext *ctx) {
|
|||||||
return mfree(ctx);
|
return mfree(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DEFINE_TRIVIAL_REF_UNREF_FUNC(OomdCGroupContext, oomd_cgroup_context, oomd_cgroup_context_free);
|
||||||
|
|
||||||
int oomd_pressure_above(Hashmap *h, Set **ret) {
|
int oomd_pressure_above(Hashmap *h, Set **ret) {
|
||||||
_cleanup_set_free_ Set *targets = NULL;
|
_cleanup_set_free_ Set *targets = NULL;
|
||||||
OomdCGroupContext *ctx;
|
OomdCGroupContext *ctx;
|
||||||
@ -231,65 +245,263 @@ int oomd_sort_cgroup_contexts(Hashmap *h, oomd_compare_t compare_func, const cha
|
|||||||
return (int) k;
|
return (int) k;
|
||||||
}
|
}
|
||||||
|
|
||||||
int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) {
|
int oomd_cgroup_kill(Manager *m, OomdCGroupContext *ctx, bool recurse) {
|
||||||
_cleanup_set_free_ Set *pids_killed = NULL;
|
_cleanup_set_free_ Set *pids_killed = NULL;
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
assert(path);
|
assert(ctx);
|
||||||
|
|
||||||
/* First try to send SIG0 recursively to ensure all child cgroups can be killed. */
|
|
||||||
if (recurse)
|
|
||||||
r = cg_kill_recursive(path, /* sig= */ 0, CGROUP_IGNORE_SELF,
|
|
||||||
/* killed_pids= */ NULL, /* log_kill= */ NULL, /* userdata= */ NULL);
|
|
||||||
else
|
|
||||||
r = cg_kill(path, /* sig= */ 0, CGROUP_IGNORE_SELF,
|
|
||||||
/* killed_pids= */ NULL, /* log_kill= */ NULL, /* userdata= */ NULL);
|
|
||||||
if (r < 0)
|
|
||||||
return log_debug_errno(r, "Failed to send SIG0 to processes in cgroup '%s': %m", path);
|
|
||||||
|
|
||||||
if (dry_run) {
|
|
||||||
_cleanup_free_ char *cg_path = NULL;
|
|
||||||
|
|
||||||
r = cg_get_path(path, /* suffix= */ NULL, &cg_path);
|
|
||||||
if (r < 0)
|
|
||||||
return r;
|
|
||||||
|
|
||||||
log_info("oomd dry-run: Would have tried to kill %s with recurse=%s", cg_path, true_false(recurse));
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
pids_killed = set_new(NULL);
|
pids_killed = set_new(NULL);
|
||||||
if (!pids_killed)
|
if (!pids_killed)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
r = increment_oomd_xattr(path, "user.oomd_ooms", 1);
|
r = increment_oomd_xattr(ctx->path, "user.oomd_ooms", 1);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
log_debug_errno(r, "Failed to set user.oomd_ooms before kill: %m");
|
log_debug_errno(r, "Failed to set user.oomd_ooms before kill: %m");
|
||||||
|
|
||||||
if (recurse)
|
if (recurse)
|
||||||
r = cg_kill_recursive(path, SIGKILL, CGROUP_IGNORE_SELF, pids_killed, log_kill, NULL);
|
r = cg_kill_recursive(ctx->path, SIGKILL, CGROUP_IGNORE_SELF, pids_killed, log_kill, NULL);
|
||||||
else
|
else
|
||||||
r = cg_kill(path, SIGKILL, CGROUP_IGNORE_SELF, pids_killed, log_kill, NULL);
|
r = cg_kill(ctx->path, SIGKILL, CGROUP_IGNORE_SELF, pids_killed, log_kill, NULL);
|
||||||
|
|
||||||
/* The cgroup could have been cleaned up after we have sent SIGKILL to all of the processes, but before
|
/* The cgroup could have been cleaned up after we have sent SIGKILL to all of the processes, but before
|
||||||
* we could do one last iteration of cgroup.procs to check. Or the service unit could have exited and
|
* we could do one last iteration of cgroup.procs to check. Or the service unit could have exited and
|
||||||
* was removed between picking candidates and coming into this function. In either case, let's log
|
* was removed between picking candidates and coming into this function. In either case, let's log
|
||||||
* about it let the caller decide what to do once they know how many PIDs were killed. */
|
* about it let the caller decide what to do once they know how many PIDs were killed. */
|
||||||
if (IN_SET(r, -ENOENT, -ENODEV))
|
if (IN_SET(r, -ENOENT, -ENODEV))
|
||||||
log_debug_errno(r, "Error when sending SIGKILL to processes in cgroup path %s, ignoring: %m", path);
|
log_debug_errno(r, "Error when sending SIGKILL to processes in cgroup path %s, ignoring: %m", ctx->path);
|
||||||
else if (r < 0)
|
else if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
if (set_isempty(pids_killed))
|
if (set_isempty(pids_killed))
|
||||||
log_debug("Nothing killed when attempting to kill %s", path);
|
log_debug("Nothing killed when attempting to kill %s", ctx->path);
|
||||||
|
|
||||||
r = increment_oomd_xattr(path, "user.oomd_kill", set_size(pids_killed));
|
r = increment_oomd_xattr(ctx->path, "user.oomd_kill", set_size(pids_killed));
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
log_debug_errno(r, "Failed to set user.oomd_kill on kill: %m");
|
log_debug_errno(r, "Failed to set user.oomd_kill on kill: %m");
|
||||||
|
|
||||||
|
/* send dbus signal */
|
||||||
|
if (m)
|
||||||
|
(void) sd_bus_emit_signal(m->bus,
|
||||||
|
"/org/freedesktop/oom1",
|
||||||
|
"org.freedesktop.oom1.Manager",
|
||||||
|
"Killed",
|
||||||
|
"ss",
|
||||||
|
ctx,
|
||||||
|
"oom");
|
||||||
|
|
||||||
return !set_isempty(pids_killed);
|
return !set_isempty(pids_killed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void oomd_kill_state_free(OomdKillState *ks) {
|
||||||
|
if (!ks)
|
||||||
|
return;
|
||||||
|
|
||||||
|
assert(ks->manager);
|
||||||
|
|
||||||
|
set_free(ks->links);
|
||||||
|
|
||||||
|
set_remove(ks->manager->kill_states, ks);
|
||||||
|
oomd_cgroup_context_unref(ks->ctx);
|
||||||
|
free(ks);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int oomd_kill_state_compare(const OomdKillState *a, const OomdKillState *b) {
|
||||||
|
return path_compare(a->ctx->path, b->ctx->path);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void oomd_kill_state_hash_func(const OomdKillState *ks, struct siphash *state) {
|
||||||
|
path_hash_func(ks->ctx->path, state);
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(
|
||||||
|
oomd_kill_state_hash_ops,
|
||||||
|
OomdKillState,
|
||||||
|
oomd_kill_state_hash_func,
|
||||||
|
oomd_kill_state_compare,
|
||||||
|
oomd_kill_state_free);
|
||||||
|
|
||||||
|
/* oomd_kill_state_remove() is called N+1 times where N is the number of prekill hooks found.
|
||||||
|
* The extra call is just after creating the kill state, so to have at least a call if no
|
||||||
|
* prekill hooks are found. Each call removes one link from the kill state, and when the set
|
||||||
|
* is empty, it performs the actual cgroup kill. */
|
||||||
|
static void oomd_kill_state_remove(OomdKillState *ks) {
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(ks);
|
||||||
|
assert(ks->ctx);
|
||||||
|
|
||||||
|
if (!set_isempty(ks->links))
|
||||||
|
return;
|
||||||
|
|
||||||
|
r = oomd_cgroup_kill(ks->manager, ks->ctx, /* recurse= */ true);
|
||||||
|
if (r < 0)
|
||||||
|
log_debug_errno(r, "Failed to kill cgroup '%s', ignoring: %m", ks->ctx->path);
|
||||||
|
oomd_kill_state_free(ks);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int prekill_callback(
|
||||||
|
sd_varlink *link,
|
||||||
|
sd_json_variant *parameters,
|
||||||
|
const char *error_id,
|
||||||
|
sd_varlink_reply_flags_t flags,
|
||||||
|
void *userdata) {
|
||||||
|
|
||||||
|
OomdKillState *ks = ASSERT_PTR(userdata);
|
||||||
|
|
||||||
|
assert(ks);
|
||||||
|
assert(ks->ctx);
|
||||||
|
|
||||||
|
if (error_id)
|
||||||
|
log_warning("oomd prekill hook for %s returned error: %s", ks->ctx->path, error_id);
|
||||||
|
else
|
||||||
|
log_info("oomd prekill hook finished for cgroup %s", ks->ctx->path);
|
||||||
|
|
||||||
|
assert_se(set_remove(ks->links, link) == link);
|
||||||
|
oomd_kill_state_remove(ks);
|
||||||
|
sd_varlink_unref(link);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(OomdKillState *, oomd_kill_state_remove, NULL);
|
||||||
|
|
||||||
|
static int send_prekill_message(
|
||||||
|
const char *basename,
|
||||||
|
sd_json_variant *cparams,
|
||||||
|
OomdKillState *ks,
|
||||||
|
sd_event *e) {
|
||||||
|
|
||||||
|
_cleanup_(sd_varlink_close_unrefp) sd_varlink *link = NULL;
|
||||||
|
_cleanup_free_ char *hook_path = NULL;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(basename);
|
||||||
|
assert(cparams);
|
||||||
|
assert(e);
|
||||||
|
assert(ks);
|
||||||
|
assert(ks->ctx);
|
||||||
|
assert(ks->manager);
|
||||||
|
|
||||||
|
log_info("Invoking oomd prekill hook %s for cgroup %s", basename, ks->ctx->path);
|
||||||
|
|
||||||
|
hook_path = path_join(VARLINK_DIR_OOMD_PREKILL_HOOK, basename);
|
||||||
|
if (!hook_path)
|
||||||
|
return log_oom_debug();
|
||||||
|
|
||||||
|
r = sd_varlink_connect_address(&link, hook_path);
|
||||||
|
if (r < 0) {
|
||||||
|
log_debug_errno(r, "Socket '%s' is not connectible, probably stale, ignoring: %m", hook_path);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
(void) sd_varlink_set_userdata(link, ks);
|
||||||
|
r = sd_varlink_set_description(link, "oomd prekill hook");
|
||||||
|
if (r < 0)
|
||||||
|
return log_debug_errno(r, "Failed to set varlink description: %m");
|
||||||
|
(void) sd_varlink_set_relative_timeout(link, ks->manager->prekill_timeout);
|
||||||
|
|
||||||
|
r = sd_varlink_attach_event(link, e, SD_EVENT_PRIORITY_NORMAL);
|
||||||
|
if (r < 0)
|
||||||
|
return log_debug_errno(r, "Failed to attach varlink to event loop: %m");
|
||||||
|
|
||||||
|
r = sd_varlink_bind_reply(link, prekill_callback);
|
||||||
|
if (r < 0)
|
||||||
|
return log_debug_errno(r, "Failed to bind reply callback: %m");
|
||||||
|
|
||||||
|
r = sd_varlink_invoke(link, "io.systemd.oom.Prekill.Notify", cparams);
|
||||||
|
if (r < 0)
|
||||||
|
return log_debug_errno(r, "Failed to call varlink method io.systemd.oom.Prekill.Notify: %m");
|
||||||
|
|
||||||
|
r = set_ensure_consume(&ks->links, &varlink_hash_ops, TAKE_PTR(link));
|
||||||
|
if (r < 0)
|
||||||
|
return log_oom_debug();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* oomd_prekill_hook() sets the prekill hooks up by sending varlink messages to all sockets found
|
||||||
|
* in VARLINK_DIR_OOMD_PREKILL_HOOK directory. It returns immediately if no prekill hooks are configured
|
||||||
|
* or PrekillHookTimeoutSec= is not set. In that case, the actual killing is done immediately by
|
||||||
|
* the callback set up by the cleanup handler in oomd_cgroup_kill_mark(). */
|
||||||
|
static int oomd_prekill_hook(Manager *m, OomdKillState *ks) {
|
||||||
|
_cleanup_closedir_ DIR *d = NULL;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(m);
|
||||||
|
assert(ks);
|
||||||
|
assert(ks->ctx);
|
||||||
|
|
||||||
|
if (m->prekill_timeout == 0) {
|
||||||
|
log_debug("Zero oomd prekill timeout configured, skipping prekill hooks.");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
d = opendir(VARLINK_DIR_OOMD_PREKILL_HOOK);
|
||||||
|
if (!d) {
|
||||||
|
if (errno == ENOENT) {
|
||||||
|
log_debug("No prekill varlink socket directory %s, ignoring.", VARLINK_DIR_OOMD_PREKILL_HOOK);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return log_debug_errno(errno, "Failed to open prekill varlink socket directory %s: %m",
|
||||||
|
VARLINK_DIR_OOMD_PREKILL_HOOK);
|
||||||
|
}
|
||||||
|
|
||||||
|
_cleanup_(sd_json_variant_unrefp) sd_json_variant *cparams = NULL;
|
||||||
|
r = sd_json_buildo(&cparams, SD_JSON_BUILD_PAIR_STRING("cgroup", ks->ctx->path));
|
||||||
|
if (r < 0)
|
||||||
|
return log_oom_debug();
|
||||||
|
|
||||||
|
FOREACH_DIRENT(de, d, return -errno) {
|
||||||
|
if (!IN_SET(de->d_type, DT_SOCK, DT_UNKNOWN))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
r = send_prekill_message(de->d_name, cparams, ks, m->event);
|
||||||
|
if (r < 0)
|
||||||
|
log_warning_errno(r, "Failed to send oomd prekill message to %s for cgroup %s, ignoring: %m",
|
||||||
|
de->d_name, ks->ctx->path);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int oomd_cgroup_kill_mark(Manager *m, OomdCGroupContext *ctx) {
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(ctx);
|
||||||
|
assert(m);
|
||||||
|
|
||||||
|
if (m->dry_run) {
|
||||||
|
_cleanup_free_ char *cg_path = NULL;
|
||||||
|
|
||||||
|
r = cg_get_path(ctx->path, /* suffix= */ NULL, &cg_path);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
log_info("oomd dry-run: Would have tried to kill %s and all its descendants", cg_path);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
_cleanup_(oomd_kill_state_removep) OomdKillState *ks = new(OomdKillState, 1);
|
||||||
|
if (!ks)
|
||||||
|
return log_oom_debug();
|
||||||
|
|
||||||
|
*ks = (OomdKillState) {
|
||||||
|
.manager = m,
|
||||||
|
.ctx = oomd_cgroup_context_ref(ctx),
|
||||||
|
};
|
||||||
|
|
||||||
|
r = set_ensure_put(&m->kill_states, &oomd_kill_state_hash_ops, ks);
|
||||||
|
if (r < 0)
|
||||||
|
return log_oom_debug();
|
||||||
|
|
||||||
|
r = oomd_prekill_hook(m, ks);
|
||||||
|
if (r < 0)
|
||||||
|
log_warning_errno(r, "oomd prekill hook failed for %s, ignoring: %m", ctx->path);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
typedef void (*dump_candidate_func)(const OomdCGroupContext *ctx, FILE *f, const char *prefix);
|
typedef void (*dump_candidate_func)(const OomdCGroupContext *ctx, FILE *f, const char *prefix);
|
||||||
|
|
||||||
static int dump_kill_candidates(
|
static int dump_kill_candidates(
|
||||||
@ -327,10 +539,9 @@ static int dump_kill_candidates(
|
|||||||
return memstream_dump(LOG_INFO, &m);
|
return memstream_dump(LOG_INFO, &m);
|
||||||
}
|
}
|
||||||
|
|
||||||
int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char **ret_selected) {
|
int oomd_select_by_pgscan_rate(Hashmap *h, const char *prefix, OomdCGroupContext **ret_selected) {
|
||||||
_cleanup_free_ OomdCGroupContext **sorted = NULL;
|
_cleanup_free_ OomdCGroupContext **sorted = NULL;
|
||||||
const OomdCGroupContext *killed = NULL;
|
int r, n, ret = 0;
|
||||||
int n, r, ret = 0;
|
|
||||||
|
|
||||||
assert(h);
|
assert(h);
|
||||||
assert(ret_selected);
|
assert(ret_selected);
|
||||||
@ -340,38 +551,31 @@ int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char
|
|||||||
return n;
|
return n;
|
||||||
|
|
||||||
FOREACH_ARRAY(i, sorted, n) {
|
FOREACH_ARRAY(i, sorted, n) {
|
||||||
const OomdCGroupContext *c = *i;
|
OomdCGroupContext *c = *i;
|
||||||
|
|
||||||
/* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure.
|
/* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure.
|
||||||
* Continue since there might be "avoid" cgroups at the end. */
|
* Continue since there might be "avoid" cgroups at the end. */
|
||||||
if (c->pgscan == 0 && c->current_memory_usage == 0)
|
if (c->pgscan == 0 && c->current_memory_usage == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
r = oomd_cgroup_kill(c->path, /* recurse= */ true, /* dry_run= */ dry_run);
|
/* First try killing recursively to ensure all child cgroups can be killed. */
|
||||||
if (r == -ENOMEM)
|
r = cg_kill_recursive(c->path, /* sig= */ 0, CGROUP_IGNORE_SELF, /* killed_pids= */ NULL,
|
||||||
return r; /* Treat oom as a hard error */
|
/* log_kill= */ NULL, /* userdata= */ NULL);
|
||||||
if (r < 0) {
|
|
||||||
RET_GATHER(ret, r);
|
|
||||||
continue; /* Try to find something else to kill */
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = r;
|
|
||||||
r = strdup_to(ret_selected, c->path);
|
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
continue;
|
||||||
|
|
||||||
killed = c;
|
ret = 1;
|
||||||
|
*ret_selected = c;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
(void) dump_kill_candidates(sorted, n, killed, oomd_dump_memory_pressure_cgroup_context);
|
(void) dump_kill_candidates(sorted, n, *ret_selected, oomd_dump_memory_pressure_cgroup_context);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int oomd_kill_by_swap_usage(Hashmap *h, uint64_t threshold_usage, bool dry_run, char **ret_selected) {
|
int oomd_select_by_swap_usage(Hashmap *h, uint64_t threshold_usage, OomdCGroupContext **ret_selected) {
|
||||||
_cleanup_free_ OomdCGroupContext **sorted = NULL;
|
_cleanup_free_ OomdCGroupContext **sorted = NULL;
|
||||||
const OomdCGroupContext *killed = NULL;
|
int r, n, ret = 0;
|
||||||
int n, r, ret = 0;
|
|
||||||
|
|
||||||
assert(h);
|
assert(h);
|
||||||
assert(ret_selected);
|
assert(ret_selected);
|
||||||
@ -384,36 +588,30 @@ int oomd_kill_by_swap_usage(Hashmap *h, uint64_t threshold_usage, bool dry_run,
|
|||||||
* no swap usage. Threshold killing only cgroups with more than threshold swap usage. */
|
* no swap usage. Threshold killing only cgroups with more than threshold swap usage. */
|
||||||
|
|
||||||
FOREACH_ARRAY(i, sorted, n) {
|
FOREACH_ARRAY(i, sorted, n) {
|
||||||
const OomdCGroupContext *c = *i;
|
OomdCGroupContext *c = *i;
|
||||||
|
|
||||||
/* Skip over cgroups with not enough swap usage. Don't break since there might be "avoid"
|
/* Skip over cgroups with not enough swap usage. Don't break since there might be "avoid"
|
||||||
* cgroups at the end. */
|
* cgroups at the end. */
|
||||||
if (c->swap_usage <= threshold_usage)
|
if (c->swap_usage <= threshold_usage)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
r = oomd_cgroup_kill(c->path, /* recurse= */ true, /* dry_run= */ dry_run);
|
/* First try killing recursively to ensure all child cgroups can be killed. */
|
||||||
if (r == -ENOMEM)
|
r = cg_kill_recursive(c->path, /* sig= */ 0, CGROUP_IGNORE_SELF, /* killed_pids= */ NULL,
|
||||||
return r; /* Treat oom as a hard error */
|
/* log_kill= */ NULL, /* userdata= */ NULL);
|
||||||
if (r < 0) {
|
|
||||||
RET_GATHER(ret, r);
|
|
||||||
continue; /* Try to find something else to kill */
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = r;
|
|
||||||
r = strdup_to(ret_selected, c->path);
|
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
continue;
|
||||||
|
|
||||||
killed = c;
|
ret = 1;
|
||||||
|
*ret_selected = c;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
(void) dump_kill_candidates(sorted, n, killed, oomd_dump_swap_cgroup_context);
|
(void) dump_kill_candidates(sorted, n, *ret_selected, oomd_dump_swap_cgroup_context);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret) {
|
int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret) {
|
||||||
_cleanup_(oomd_cgroup_context_freep) OomdCGroupContext *ctx = NULL;
|
_cleanup_(oomd_cgroup_context_unrefp) OomdCGroupContext *ctx = NULL;
|
||||||
_cleanup_free_ char *p = NULL, *val = NULL;
|
_cleanup_free_ char *p = NULL, *val = NULL;
|
||||||
bool is_root;
|
bool is_root;
|
||||||
int r;
|
int r;
|
||||||
@ -425,8 +623,15 @@ int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret) {
|
|||||||
if (!ctx)
|
if (!ctx)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
*ctx = (OomdCGroupContext) {
|
||||||
|
.n_ref = 1,
|
||||||
|
.preference = MANAGED_OOM_PREFERENCE_NONE,
|
||||||
|
.path = strdup(empty_to_root(path)),
|
||||||
|
};
|
||||||
|
if (!ctx->path)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
is_root = empty_or_root(path);
|
is_root = empty_or_root(path);
|
||||||
ctx->preference = MANAGED_OOM_PREFERENCE_NONE;
|
|
||||||
|
|
||||||
r = cg_get_path(path, "memory.pressure", &p);
|
r = cg_get_path(path, "memory.pressure", &p);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
@ -470,10 +675,6 @@ int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret) {
|
|||||||
return log_debug_errno(r, "Error converting pgscan value to uint64_t: %m");
|
return log_debug_errno(r, "Error converting pgscan value to uint64_t: %m");
|
||||||
}
|
}
|
||||||
|
|
||||||
r = strdup_to(&ctx->path, empty_to_root(path));
|
|
||||||
if (r < 0)
|
|
||||||
return r;
|
|
||||||
|
|
||||||
*ret = TAKE_PTR(ctx);
|
*ret = TAKE_PTR(ctx);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -555,7 +756,7 @@ int oomd_system_context_acquire(const char *proc_meminfo_path, OomdSystemContext
|
|||||||
}
|
}
|
||||||
|
|
||||||
int oomd_insert_cgroup_context(Hashmap *old_h, Hashmap *new_h, const char *path) {
|
int oomd_insert_cgroup_context(Hashmap *old_h, Hashmap *new_h, const char *path) {
|
||||||
_cleanup_(oomd_cgroup_context_freep) OomdCGroupContext *curr_ctx = NULL;
|
_cleanup_(oomd_cgroup_context_unrefp) OomdCGroupContext *curr_ctx = NULL;
|
||||||
OomdCGroupContext *old_ctx;
|
OomdCGroupContext *old_ctx;
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
|
|||||||
@ -10,12 +10,16 @@
|
|||||||
|
|
||||||
extern const struct hash_ops oomd_cgroup_ctx_hash_ops;
|
extern const struct hash_ops oomd_cgroup_ctx_hash_ops;
|
||||||
|
|
||||||
|
struct Manager;
|
||||||
|
|
||||||
typedef struct OomdCGroupContext OomdCGroupContext;
|
typedef struct OomdCGroupContext OomdCGroupContext;
|
||||||
typedef struct OomdSystemContext OomdSystemContext;
|
typedef struct OomdSystemContext OomdSystemContext;
|
||||||
|
typedef struct Manager Manager;
|
||||||
|
|
||||||
typedef int (oomd_compare_t)(OomdCGroupContext * const *, OomdCGroupContext * const *);
|
typedef int (oomd_compare_t)(OomdCGroupContext * const *, OomdCGroupContext * const *);
|
||||||
|
|
||||||
struct OomdCGroupContext {
|
struct OomdCGroupContext {
|
||||||
|
unsigned n_ref;
|
||||||
char *path;
|
char *path;
|
||||||
|
|
||||||
ResourcePressure memory_pressure;
|
ResourcePressure memory_pressure;
|
||||||
@ -45,8 +49,9 @@ struct OomdSystemContext {
|
|||||||
uint64_t swap_used;
|
uint64_t swap_used;
|
||||||
};
|
};
|
||||||
|
|
||||||
OomdCGroupContext *oomd_cgroup_context_free(OomdCGroupContext *ctx);
|
OomdCGroupContext *oomd_cgroup_context_ref(OomdCGroupContext *p);
|
||||||
DEFINE_TRIVIAL_CLEANUP_FUNC(OomdCGroupContext*, oomd_cgroup_context_free);
|
OomdCGroupContext *oomd_cgroup_context_unref(OomdCGroupContext *p);
|
||||||
|
DEFINE_TRIVIAL_CLEANUP_FUNC(OomdCGroupContext*, oomd_cgroup_context_unref);
|
||||||
|
|
||||||
/* All hashmaps used with these functions are expected to be of the form
|
/* All hashmaps used with these functions are expected to be of the form
|
||||||
* key: cgroup paths -> value: OomdCGroupContext. */
|
* key: cgroup paths -> value: OomdCGroupContext. */
|
||||||
@ -119,14 +124,15 @@ int oomd_sort_cgroup_contexts(Hashmap *h, oomd_compare_t compare_func, const cha
|
|||||||
int oomd_fetch_cgroup_oom_preference(OomdCGroupContext *ctx, const char *prefix);
|
int oomd_fetch_cgroup_oom_preference(OomdCGroupContext *ctx, const char *prefix);
|
||||||
|
|
||||||
/* Returns a negative value on error, 0 if no processes were killed, or 1 if processes were killed. */
|
/* Returns a negative value on error, 0 if no processes were killed, or 1 if processes were killed. */
|
||||||
int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run);
|
int oomd_cgroup_kill(Manager *m, OomdCGroupContext *ctx, bool recurse);
|
||||||
|
int oomd_cgroup_kill_mark(Manager *m, OomdCGroupContext *ctx);
|
||||||
|
|
||||||
/* The following oomd_kill_by_* functions return 1 if processes were killed, or negative otherwise. */
|
/* The following oomd_kill_by_* functions return 1 if processes were killed, or negative otherwise. */
|
||||||
/* If `prefix` is supplied, only cgroups whose paths start with `prefix` are eligible candidates. Otherwise,
|
/* If `prefix` is supplied, only cgroups whose paths start with `prefix` are eligible candidates. Otherwise,
|
||||||
* everything in `h` is a candidate.
|
* everything in `h` is a candidate.
|
||||||
* Returns the killed cgroup in ret_selected. */
|
* Returns the killed cgroup in ret_selected. */
|
||||||
int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char **ret_selected);
|
int oomd_select_by_pgscan_rate(Hashmap *h, const char *prefix, OomdCGroupContext **ret_selected);
|
||||||
int oomd_kill_by_swap_usage(Hashmap *h, uint64_t threshold_usage, bool dry_run, char **ret_selected);
|
int oomd_select_by_swap_usage(Hashmap *h, uint64_t threshold_usage, OomdCGroupContext **ret_selected);
|
||||||
|
|
||||||
int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret);
|
int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret);
|
||||||
int oomd_system_context_acquire(const char *proc_meminfo_path, OomdSystemContext *ret);
|
int oomd_system_context_acquire(const char *proc_meminfo_path, OomdSystemContext *ret);
|
||||||
|
|||||||
@ -88,7 +88,7 @@ TEST(oomd_cgroup_kill) {
|
|||||||
ASSERT_OK(fork_and_sleep(5, &two));
|
ASSERT_OK(fork_and_sleep(5, &two));
|
||||||
ASSERT_OK(cg_attach(subcgroup, two.pid));
|
ASSERT_OK(cg_attach(subcgroup, two.pid));
|
||||||
|
|
||||||
ASSERT_OK_POSITIVE(oomd_cgroup_kill(subcgroup, false /* recurse */, false /* dry run */));
|
ASSERT_OK_POSITIVE(oomd_cgroup_kill(NULL /* manager */, &(OomdCGroupContext){ .path = subcgroup }, false /* recurse */));
|
||||||
|
|
||||||
ASSERT_OK(cg_get_xattr(subcgroup, "user.oomd_ooms", &v, /* ret_size= */ NULL));
|
ASSERT_OK(cg_get_xattr(subcgroup, "user.oomd_ooms", &v, /* ret_size= */ NULL));
|
||||||
ASSERT_STREQ(v, i == 0 ? "1" : "2");
|
ASSERT_STREQ(v, i == 0 ? "1" : "2");
|
||||||
@ -115,7 +115,7 @@ TEST(oomd_cgroup_kill) {
|
|||||||
|
|
||||||
TEST(oomd_cgroup_context_acquire_and_insert) {
|
TEST(oomd_cgroup_context_acquire_and_insert) {
|
||||||
_cleanup_hashmap_free_ Hashmap *h1 = NULL, *h2 = NULL;
|
_cleanup_hashmap_free_ Hashmap *h1 = NULL, *h2 = NULL;
|
||||||
_cleanup_(oomd_cgroup_context_freep) OomdCGroupContext *ctx = NULL;
|
_cleanup_(oomd_cgroup_context_unrefp) OomdCGroupContext *ctx = NULL;
|
||||||
OomdCGroupContext *c1, *c2;
|
OomdCGroupContext *c1, *c2;
|
||||||
CGroupMask mask;
|
CGroupMask mask;
|
||||||
|
|
||||||
@ -138,7 +138,7 @@ TEST(oomd_cgroup_context_acquire_and_insert) {
|
|||||||
ASSERT_EQ(ctx->swap_usage, 0u);
|
ASSERT_EQ(ctx->swap_usage, 0u);
|
||||||
ASSERT_EQ(ctx->last_pgscan, 0u);
|
ASSERT_EQ(ctx->last_pgscan, 0u);
|
||||||
ASSERT_EQ(ctx->pgscan, 0u);
|
ASSERT_EQ(ctx->pgscan, 0u);
|
||||||
ASSERT_NULL(ctx = oomd_cgroup_context_free(ctx));
|
ASSERT_NULL(ctx = oomd_cgroup_context_unref(ctx));
|
||||||
|
|
||||||
ASSERT_OK(oomd_cgroup_context_acquire("", &ctx));
|
ASSERT_OK(oomd_cgroup_context_acquire("", &ctx));
|
||||||
ASSERT_STREQ(ctx->path, "/");
|
ASSERT_STREQ(ctx->path, "/");
|
||||||
@ -429,7 +429,7 @@ TEST(oomd_sort_cgroups) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(oomd_fetch_cgroup_oom_preference) {
|
TEST(oomd_fetch_cgroup_oom_preference) {
|
||||||
_cleanup_(oomd_cgroup_context_freep) OomdCGroupContext *ctx = NULL;
|
_cleanup_(oomd_cgroup_context_unrefp) OomdCGroupContext *ctx = NULL;
|
||||||
ManagedOOMPreference root_pref;
|
ManagedOOMPreference root_pref;
|
||||||
CGroupMask mask;
|
CGroupMask mask;
|
||||||
bool test_xattrs;
|
bool test_xattrs;
|
||||||
@ -464,7 +464,7 @@ TEST(oomd_fetch_cgroup_oom_preference) {
|
|||||||
ASSERT_FAIL(oomd_fetch_cgroup_oom_preference(ctx, NULL));
|
ASSERT_FAIL(oomd_fetch_cgroup_oom_preference(ctx, NULL));
|
||||||
ASSERT_EQ(ctx->preference, MANAGED_OOM_PREFERENCE_NONE);
|
ASSERT_EQ(ctx->preference, MANAGED_OOM_PREFERENCE_NONE);
|
||||||
}
|
}
|
||||||
ctx = oomd_cgroup_context_free(ctx);
|
ctx = oomd_cgroup_context_unref(ctx);
|
||||||
|
|
||||||
/* also check when only avoid is set to true */
|
/* also check when only avoid is set to true */
|
||||||
if (test_xattrs) {
|
if (test_xattrs) {
|
||||||
@ -473,7 +473,7 @@ TEST(oomd_fetch_cgroup_oom_preference) {
|
|||||||
ASSERT_OK(oomd_cgroup_context_acquire(cgroup, &ctx));
|
ASSERT_OK(oomd_cgroup_context_acquire(cgroup, &ctx));
|
||||||
ASSERT_OK(oomd_fetch_cgroup_oom_preference(ctx, NULL));
|
ASSERT_OK(oomd_fetch_cgroup_oom_preference(ctx, NULL));
|
||||||
ASSERT_EQ(ctx->preference, geteuid() == 0 ? MANAGED_OOM_PREFERENCE_AVOID : MANAGED_OOM_PREFERENCE_NONE);
|
ASSERT_EQ(ctx->preference, geteuid() == 0 ? MANAGED_OOM_PREFERENCE_AVOID : MANAGED_OOM_PREFERENCE_NONE);
|
||||||
ctx = oomd_cgroup_context_free(ctx);
|
ctx = oomd_cgroup_context_unref(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Test the root cgroup */
|
/* Test the root cgroup */
|
||||||
@ -493,7 +493,7 @@ TEST(oomd_fetch_cgroup_oom_preference) {
|
|||||||
/* Assert that avoid/omit are not set if the cgroup and prefix are not
|
/* Assert that avoid/omit are not set if the cgroup and prefix are not
|
||||||
* owned by the same user. */
|
* owned by the same user. */
|
||||||
if (test_xattrs && !empty_or_root(cgroup) && geteuid() == 0) {
|
if (test_xattrs && !empty_or_root(cgroup) && geteuid() == 0) {
|
||||||
ctx = oomd_cgroup_context_free(ctx);
|
ctx = oomd_cgroup_context_unref(ctx);
|
||||||
ASSERT_OK(cg_set_access(cgroup, 61183, 0));
|
ASSERT_OK(cg_set_access(cgroup, 61183, 0));
|
||||||
ASSERT_OK(oomd_cgroup_context_acquire(cgroup, &ctx));
|
ASSERT_OK(oomd_cgroup_context_acquire(cgroup, &ctx));
|
||||||
|
|
||||||
|
|||||||
@ -224,6 +224,7 @@ shared_sources = files(
|
|||||||
'varlink-io.systemd.Unit.c',
|
'varlink-io.systemd.Unit.c',
|
||||||
'varlink-io.systemd.UserDatabase.c',
|
'varlink-io.systemd.UserDatabase.c',
|
||||||
'varlink-io.systemd.oom.c',
|
'varlink-io.systemd.oom.c',
|
||||||
|
'varlink-io.systemd.oom.Prekill.c',
|
||||||
'varlink-io.systemd.service.c',
|
'varlink-io.systemd.service.c',
|
||||||
'varlink-io.systemd.sysext.c',
|
'varlink-io.systemd.sysext.c',
|
||||||
'varlink-serialize.c',
|
'varlink-serialize.c',
|
||||||
|
|||||||
18
src/shared/varlink-io.systemd.oom.Prekill.c
Normal file
18
src/shared/varlink-io.systemd.oom.Prekill.c
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
/* SPDX-License-Identifier: LGPL-2.1-or-later */
|
||||||
|
|
||||||
|
#include "varlink-io.systemd.oom.Prekill.h"
|
||||||
|
|
||||||
|
/* This is a new Varlink interface for pre-kill notifications from oomd.
|
||||||
|
* It will be available through /run/systemd/oomd.prekill.hook/ */
|
||||||
|
|
||||||
|
static SD_VARLINK_DEFINE_METHOD(
|
||||||
|
Notify,
|
||||||
|
SD_VARLINK_FIELD_COMMENT("The cgroup which is going to be killed"),
|
||||||
|
SD_VARLINK_DEFINE_INPUT(cgroup, SD_VARLINK_STRING, 0));
|
||||||
|
|
||||||
|
SD_VARLINK_DEFINE_INTERFACE(
|
||||||
|
io_systemd_oom_Prekill,
|
||||||
|
"io.systemd.oom.Prekill",
|
||||||
|
SD_VARLINK_INTERFACE_COMMENT("Prekill notifications from oomd"),
|
||||||
|
SD_VARLINK_SYMBOL_COMMENT("Notify about an imminent OOM kill"),
|
||||||
|
&vl_method_Notify);
|
||||||
6
src/shared/varlink-io.systemd.oom.Prekill.h
Normal file
6
src/shared/varlink-io.systemd.oom.Prekill.h
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
/* SPDX-License-Identifier: LGPL-2.1-or-later */
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "sd-varlink-idl.h"
|
||||||
|
|
||||||
|
extern const sd_varlink_interface vl_interface_io_systemd_oom_Prekill;
|
||||||
@ -43,6 +43,7 @@
|
|||||||
#include "varlink-io.systemd.Unit.h"
|
#include "varlink-io.systemd.Unit.h"
|
||||||
#include "varlink-io.systemd.UserDatabase.h"
|
#include "varlink-io.systemd.UserDatabase.h"
|
||||||
#include "varlink-io.systemd.oom.h"
|
#include "varlink-io.systemd.oom.h"
|
||||||
|
#include "varlink-io.systemd.oom.Prekill.h"
|
||||||
#include "varlink-io.systemd.service.h"
|
#include "varlink-io.systemd.service.h"
|
||||||
#include "varlink-io.systemd.sysext.h"
|
#include "varlink-io.systemd.sysext.h"
|
||||||
#include "varlink-org.varlink.service.h"
|
#include "varlink-org.varlink.service.h"
|
||||||
@ -206,6 +207,7 @@ TEST(parse_format) {
|
|||||||
&vl_interface_io_systemd_Unit,
|
&vl_interface_io_systemd_Unit,
|
||||||
&vl_interface_io_systemd_UserDatabase,
|
&vl_interface_io_systemd_UserDatabase,
|
||||||
&vl_interface_io_systemd_oom,
|
&vl_interface_io_systemd_oom,
|
||||||
|
&vl_interface_io_systemd_oom_Prekill,
|
||||||
&vl_interface_io_systemd_service,
|
&vl_interface_io_systemd_service,
|
||||||
&vl_interface_io_systemd_sysext,
|
&vl_interface_io_systemd_sysext,
|
||||||
&vl_interface_org_varlink_service,
|
&vl_interface_org_varlink_service,
|
||||||
|
|||||||
@ -353,6 +353,35 @@ EOF
|
|||||||
systemctl reset-failed
|
systemctl reset-failed
|
||||||
}
|
}
|
||||||
|
|
||||||
|
testcase_prekill_hook() {
|
||||||
|
cat >/run/systemd/oomd.conf.d/99-oomd-prekill-test.conf <<'EOF'
|
||||||
|
[OOM]
|
||||||
|
PrekillHookTimeoutSec=3s
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# no hooks
|
||||||
|
systemctl reload systemd-oomd.service
|
||||||
|
! systemctl start --wait TEST-55-OOMD-testbloat.service || exit 1
|
||||||
|
|
||||||
|
# one hook
|
||||||
|
mkdir -p /run/systemd/oomd.prekill.hook/
|
||||||
|
ncat --recv-only -kUl /run/systemd/oomd.prekill.hook/althook >/tmp/oomd_event.json &
|
||||||
|
! systemctl start --wait TEST-55-OOMD-testbloat.service || exit 1
|
||||||
|
[[ $(jq -r .method </tmp/oomd_event.json) = 'io.systemd.oom.Prekill.Notify' ]]
|
||||||
|
|
||||||
|
rm -f /run/systemd/oomd.prekill.hook/* /tmp/oomd_event.json
|
||||||
|
|
||||||
|
# many hooks
|
||||||
|
for i in {1..4}; do
|
||||||
|
ncat --recv-only -kUl "/run/systemd/oomd.prekill.hook/althook$i" >"/tmp/oomd_event$i.json" &
|
||||||
|
done
|
||||||
|
|
||||||
|
! systemctl start --wait TEST-55-OOMD-testbloat.service || exit 1
|
||||||
|
for j in /tmp/oomd_event*.json; do
|
||||||
|
[[ $(jq -r .method <"$j") = 'io.systemd.oom.Prekill.Notify' ]]
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
run_testcases
|
run_testcases
|
||||||
|
|
||||||
touch /testok
|
touch /testok
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user