1
0
mirror of https://github.com/systemd/systemd synced 2026-03-29 19:24:50 +02:00

Compare commits

..

6 Commits

Author SHA1 Message Date
Luca Boccassi
e66ee1d7e6
Merge pull request #20893 from poettering/per-user-oom-score
default to higher oom scores for user sessions
2021-10-05 09:54:24 +01:00
Lennart Poettering
643006f62b man: document the new DefaultOOMScoreAdjust= setting 2021-10-04 16:27:11 +02:00
Lennart Poettering
ce7de0ba8e units: run user service managers at OOM score adjustment 100
Let's make it slightly more likely that a per-user service manager is
killed than any system service. We use a conservative 100 (from a range
that goes all the way to 1000).

Replaces: #17426

Together with the previous commit this means: system manager and system
services are placed at OOM score adjustment 0 (specifically: they
inherit kernel default of 0). User service manager (both for root and
non-root) are placed at 100. User services for non-root are placed at
200, those for root inherit 100.

Note that processes forked off the user *sessions* (i.e. not forked off
the per-user service manager) remain at 0 (e.g. the shell process
created by a tty or ssh login). This probably should be
addressed too one day (maybe in pam_systemd?), but is not covered here.
2021-10-04 16:27:10 +02:00
Lennart Poettering
d4a402e4f6 core: add a new setting DefaultOOMScoreAdjust= and set it to 100 above service manager's by default
Let's make our service managers slightly less likely to be killed by the
OOM killer by adjusting our services' OOM score adjustment to 100 above
ours. Do this conservatively, i.e. only for regular user sessions.
2021-10-04 16:27:10 +02:00
Lennart Poettering
bb2d1d8ea4 test: add test case for {get,set}_oom_score_adjust() 2021-10-04 16:27:10 +02:00
Lennart Poettering
2c37c613a7 process-util: add helper for querying oom score adjustment value 2021-10-04 16:27:10 +02:00
11 changed files with 169 additions and 11 deletions

View File

@ -493,6 +493,8 @@ node /org/freedesktop/systemd1 {
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s DefaultOOMPolicy = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly i DefaultOOMScoreAdjust = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s CtrlAltDelBurstAction = '...';
};
interface org.freedesktop.DBus.Peer { ... };
@ -735,6 +737,8 @@ node /org/freedesktop/systemd1 {
<!--property DefaultOOMPolicy is not documented!-->
<!--property DefaultOOMScoreAdjust is not documented!-->
<!--property CtrlAltDelBurstAction is not documented!-->
<!--Autogenerated cross-references for systemd.directives, do not edit-->
@ -1131,6 +1135,8 @@ node /org/freedesktop/systemd1 {
<variablelist class="dbus-property" generated="True" extra-ref="DefaultOOMPolicy"/>
<variablelist class="dbus-property" generated="True" extra-ref="DefaultOOMScoreAdjust"/>
<variablelist class="dbus-property" generated="True" extra-ref="CtrlAltDelBurstAction"/>
<!--End of Autogenerated section-->

View File

@ -450,6 +450,20 @@
for details. Note that this default is not used for services that have <varname>Delegate=</varname>
turned on.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>DefaultOOMScoreAdjust=</varname></term>
<listitem><para>Configures the default OOM score adjustments of processes run by the service
manager. This defaults to unset (meaning the forked off processes inherit the service manager's OOM
score adjustment value), except if the service manager is run for an unprivileged user, in which case
this defaults to the service manager's OOM adjustment value plus 100 (this makes service processes
slightly more likely to be killed under memory pressure than the manager itself). This may be used to
pick a global default for the per-unit <varname>OOMScoreAdjust=</varname> setting. See
<citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry> for
details. Note that this setting has no effect on the OOM score adjustment value of the service
manager process itself, it retains the original value set during its invocation.</para></listitem>
</varlistentry>
</variablelist>
</refsect1>

View File

@ -1502,6 +1502,24 @@ int set_oom_score_adjust(int value) {
WRITE_STRING_FILE_VERIFY_ON_FAILURE|WRITE_STRING_FILE_DISABLE_BUFFER);
}
int get_oom_score_adjust(int *ret) {
_cleanup_free_ char *t;
int r, a;
r = read_virtual_file("/proc/self/oom_score_adj", SIZE_MAX, &t, NULL);
if (r < 0)
return r;
delete_trailing_chars(t, WHITESPACE);
assert_se(safe_atoi(t, &a) >= 0);
assert_se(oom_score_adjust_is_valid(a));
if (ret)
*ret = a;
return 0;
}
int pidfd_get_pid(int fd, pid_t *ret) {
char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
_cleanup_free_ char *fdinfo = NULL;

View File

@ -175,6 +175,7 @@ static inline int safe_fork(const char *name, ForkFlags flags, pid_t *ret_pid) {
int namespace_fork(const char *outer_name, const char *inner_name, int except_fds[], size_t n_except_fds, ForkFlags flags, int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd, pid_t *ret_pid);
int set_oom_score_adjust(int value);
int get_oom_score_adjust(int *ret);
/* The highest possibly (theoretic) pid_t value on this architecture. */
#define PID_T_MAX ((pid_t) INT32_MAX)

View File

@ -104,8 +104,7 @@ static int property_get_oom_score_adjust(
sd_bus_error *error) {
ExecContext *c = userdata;
int32_t n;
int r;
int r, n;
assert(bus);
assert(reply);
@ -114,17 +113,10 @@ static int property_get_oom_score_adjust(
if (c->oom_score_adjust_set)
n = c->oom_score_adjust;
else {
_cleanup_free_ char *t = NULL;
n = 0;
r = read_one_line_file("/proc/self/oom_score_adj", &t);
r = get_oom_score_adjust(&n);
if (r < 0)
log_debug_errno(r, "Failed to read /proc/self/oom_score_adj, ignoring: %m");
else {
r = safe_atoi32(t, &n);
if (r < 0)
log_debug_errno(r, "Failed to parse \"%s\" from /proc/self/oom_score_adj, ignoring: %m", t);
}
}
return sd_bus_message_append(reply, "i", n);

View File

@ -31,6 +31,7 @@
#include "os-util.h"
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
#include "selinux-access.h"
#include "stat-util.h"
#include "string-util.h"
@ -358,6 +359,34 @@ static int property_set_kexec_watchdog(
return property_set_watchdog(userdata, WATCHDOG_KEXEC, value);
}
static int property_get_oom_score_adjust(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
Manager *m = userdata;
int r, n;
assert(m);
assert(bus);
assert(reply);
if (m->default_oom_score_adjust_set)
n = m->default_oom_score_adjust;
else {
n = 0;
r = get_oom_score_adjust(&n);
if (r < 0)
log_debug_errno(r, "Failed to read current OOM score adjustment value, ignoring: %m");
}
return sd_bus_message_append(reply, "i", n);
}
static int bus_get_unit_by_name(Manager *m, sd_bus_message *message, const char *name, Unit **ret_unit, sd_bus_error *error) {
Unit *u;
int r;
@ -2722,6 +2751,7 @@ const sd_bus_vtable bus_manager_vtable[] = {
SD_BUS_PROPERTY("DefaultTasksMax", "t", bus_property_get_tasks_max, offsetof(Manager, default_tasks_max), 0),
SD_BUS_PROPERTY("TimerSlackNSec", "t", property_get_timer_slack_nsec, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("DefaultOOMPolicy", "s", bus_property_get_oom_policy, offsetof(Manager, default_oom_policy), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("DefaultOOMScoreAdjust", "i", property_get_oom_score_adjust, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("CtrlAltDelBurstAction", "s", bus_property_get_emergency_action, offsetof(Manager, cad_burst_action), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_METHOD_WITH_NAMES("GetUnit",

View File

@ -3,6 +3,7 @@
#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
#include <linux/oom.h>
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/reboot.h>
@ -160,6 +161,8 @@ static NUMAPolicy arg_numa_policy;
static usec_t arg_clock_usec;
static void *arg_random_seed;
static size_t arg_random_seed_size;
static int arg_default_oom_score_adjust;
static bool arg_default_oom_score_adjust_set;
/* A copy of the original environment block */
static char **saved_env = NULL;
@ -633,6 +636,37 @@ static int config_parse_default_timeout_abort(
return 0;
}
static int config_parse_oom_score_adjust(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
int oa, r;
if (isempty(rvalue)) {
arg_default_oom_score_adjust_set = false;
return 0;
}
r = parse_oom_score_adjust(rvalue, &oa);
if (r < 0) {
log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse the OOM score adjust value '%s', ignoring: %m", rvalue);
return 0;
}
arg_default_oom_score_adjust = oa;
arg_default_oom_score_adjust_set = true;
return 0;
}
static int parse_config_file(void) {
const ConfigTableItem items[] = {
{ "Manager", "LogLevel", config_parse_level2, 0, NULL },
@ -667,7 +701,7 @@ static int parse_config_file(void) {
{ "Manager", "DefaultStandardError", config_parse_output_restricted, 0, &arg_default_std_error },
{ "Manager", "DefaultTimeoutStartSec", config_parse_sec, 0, &arg_default_timeout_start_usec },
{ "Manager", "DefaultTimeoutStopSec", config_parse_sec, 0, &arg_default_timeout_stop_usec },
{ "Manager", "DefaultTimeoutAbortSec", config_parse_default_timeout_abort, 0, NULL },
{ "Manager", "DefaultTimeoutAbortSec", config_parse_default_timeout_abort, 0, NULL },
{ "Manager", "DefaultRestartSec", config_parse_sec, 0, &arg_default_restart_usec },
{ "Manager", "DefaultStartLimitInterval", config_parse_sec, 0, &arg_default_start_limit_interval }, /* obsolete alias */
{ "Manager", "DefaultStartLimitIntervalSec", config_parse_sec, 0, &arg_default_start_limit_interval },
@ -699,6 +733,7 @@ static int parse_config_file(void) {
{ "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max },
{ "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, 0, &arg_cad_burst_action },
{ "Manager", "DefaultOOMPolicy", config_parse_oom_policy, 0, &arg_default_oom_policy },
{ "Manager", "DefaultOOMScoreAdjust", config_parse_oom_score_adjust, 0, NULL },
{}
};
@ -769,6 +804,8 @@ static void set_manager_defaults(Manager *m) {
m->default_tasks_accounting = arg_default_tasks_accounting;
m->default_tasks_max = arg_default_tasks_max;
m->default_oom_policy = arg_default_oom_policy;
m->default_oom_score_adjust_set = arg_default_oom_score_adjust_set;
m->default_oom_score_adjust = arg_default_oom_score_adjust;
(void) manager_set_default_rlimits(m, arg_default_rlimit);
@ -2426,6 +2463,35 @@ static void reset_arguments(void) {
arg_random_seed = mfree(arg_random_seed);
arg_random_seed_size = 0;
arg_clock_usec = 0;
arg_default_oom_score_adjust_set = false;
}
static void determine_default_oom_score_adjust(void) {
int r, a, b;
/* Run our services at slightly higher OOM score than ourselves. But let's be conservative here, and
* do this only if we don't run as root (i.e. only if we are run in user mode, for an unprivileged
* user). */
if (arg_default_oom_score_adjust_set)
return;
if (getuid() == 0)
return;
r = get_oom_score_adjust(&a);
if (r < 0)
return (void) log_warning_errno(r, "Failed to determine current OOM score adjustment value, ignoring: %m");
assert_cc(100 <= OOM_SCORE_ADJ_MAX);
b = a >= OOM_SCORE_ADJ_MAX - 100 ? OOM_SCORE_ADJ_MAX : a + 100;
if (a == b)
return;
arg_default_oom_score_adjust = b;
arg_default_oom_score_adjust_set = true;
}
static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
@ -2459,6 +2525,9 @@ static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
if (arg_show_status == _SHOW_STATUS_INVALID)
arg_show_status = SHOW_STATUS_YES;
/* Slightly raise the OOM score for our services if we are running for unprivileged users. */
determine_default_oom_score_adjust();
/* Push variables into the manager environment block */
setenv_manager_environment();

View File

@ -370,6 +370,8 @@ struct Manager {
usec_t default_timer_accuracy_usec;
OOMPolicy default_oom_policy;
int default_oom_score_adjust;
bool default_oom_score_adjust_set;
int original_log_level;
LogTarget original_log_target;

View File

@ -187,6 +187,11 @@ static void unit_init(Unit *u) {
if (ec) {
exec_context_init(ec);
if (u->manager->default_oom_score_adjust_set) {
ec->oom_score_adjust = u->manager->default_oom_score_adjust;
ec->oom_score_adjust_set = true;
}
if (MANAGER_IS_SYSTEM(u->manager))
ec->keyring_mode = EXEC_KEYRING_SHARED;
else {

View File

@ -1,6 +1,7 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include <fcntl.h>
#include <linux/oom.h>
#include <sys/mount.h>
#include <sys/personality.h>
#include <sys/prctl.h>
@ -874,6 +875,24 @@ static void test_get_process_ppid(void) {
}
}
static void test_set_oom_score_adjust(void) {
int a, b, r;
assert_se(get_oom_score_adjust(&a) >= 0);
r = set_oom_score_adjust(OOM_SCORE_ADJ_MIN);
assert_se(r >= 0 || ERRNO_IS_PRIVILEGE(r));
if (r >= 0) {
assert_se(get_oom_score_adjust(&b) >= 0);
assert_se(b == OOM_SCORE_ADJ_MIN);
}
assert_se(set_oom_score_adjust(a) >= 0);
assert_se(get_oom_score_adjust(&b) >= 0);
assert_se(b == a);
}
int main(int argc, char *argv[]) {
log_show_color(true);
test_setup_logging(LOG_INFO);
@ -904,6 +923,7 @@ int main(int argc, char *argv[]) {
test_ioprio_class_from_to_string();
test_setpriority_closest();
test_get_process_ppid();
test_set_oom_score_adjust();
return 0;
}

View File

@ -25,3 +25,4 @@ Delegate=pids memory
TasksMax=infinity
TimeoutStopSec=120s
KeyringMode=inherit
OOMScoreAdjust=100