1
0
mirror of https://github.com/systemd/systemd synced 2026-03-30 19:54:51 +02:00

Compare commits

..

No commits in common. "b6e44cd934873659f233bfd962e4a0dcb3026e59" and "df618f259a48cc70fb8fd7308b5a53d04fee31ba" have entirely different histories.

7 changed files with 115 additions and 235 deletions

View File

@ -133,46 +133,33 @@
<term><varname>RebootWatchdogSec=</varname></term> <term><varname>RebootWatchdogSec=</varname></term>
<term><varname>KExecWatchdogSec=</varname></term> <term><varname>KExecWatchdogSec=</varname></term>
<listitem><para>Configure the hardware watchdog at runtime and at reboot. Takes a timeout value in <listitem><para>Configure the hardware watchdog at runtime and at reboot. Takes a timeout value in seconds (or
seconds (or in other time units if suffixed with <literal>ms</literal>, <literal>min</literal>, in other time units if suffixed with <literal>ms</literal>, <literal>min</literal>, <literal>h</literal>,
<literal>h</literal>, <literal>d</literal>, <literal>w</literal>). If set to zero the watchdog logic <literal>d</literal>, <literal>w</literal>). If <varname>RuntimeWatchdogSec=</varname> is set to a non-zero
is disabled: no watchdog device is opened, configured, or pinged. If set to the special string value, the watchdog hardware (<filename>/dev/watchdog</filename> or the path specified with
<literal>default</literal> the watchdog is opened and pinged in regular intervals, but the timeout <varname>WatchdogDevice=</varname> or the kernel option <varname>systemd.watchdog-device=</varname>) will be
is not changed from the default. If set to any other time value the watchdog timeout is configured to programmed to automatically reboot the system if it is not contacted within the specified timeout interval. The
the specified value (or a value close to it, depending on hardware capabilities).</para> system manager will ensure to contact it at least once in half the specified timeout interval. This feature
requires a hardware watchdog device to be present, as it is commonly the case in embedded and server
<para>If <varname>RuntimeWatchdogSec=</varname> is set to a non-zero value, the watchdog hardware systems. Not all hardware watchdogs allow configuration of all possible reboot timeout values, in which case
(<filename>/dev/watchdog</filename> or the path specified with <varname>WatchdogDevice=</varname> or the closest available timeout is picked. <varname>RebootWatchdogSec=</varname> may be used to configure the
the kernel option <varname>systemd.watchdog-device=</varname>) will be programmed to automatically hardware watchdog when the system is asked to reboot. It works as a safety net to ensure that the reboot takes
reboot the system if it is not contacted within the specified timeout interval. The system manager place even if a clean reboot attempt times out. Note that the <varname>RebootWatchdogSec=</varname> timeout
will ensure to contact it at least once in half the specified timeout interval. This feature requires applies only to the second phase of the reboot, i.e. after all regular services are already terminated, and
a hardware watchdog device to be present, as it is commonly the case in embedded and server after the system and service manager process (PID 1) got replaced by the <filename>systemd-shutdown</filename>
systems. Not all hardware watchdogs allow configuration of all possible reboot timeout values, in binary, see system <citerefentry><refentrytitle>bootup</refentrytitle><manvolnum>7</manvolnum></citerefentry>
which case the closest available timeout is picked.</para> for details. During the first phase of the shutdown operation the system and service manager remains running
and hence <varname>RuntimeWatchdogSec=</varname> is still honoured. In order to define a timeout on this first
<para><varname>RebootWatchdogSec=</varname> may be used to configure the hardware watchdog when the phase of system shutdown, configure <varname>JobTimeoutSec=</varname> and <varname>JobTimeoutAction=</varname>
system is asked to reboot. It works as a safety net to ensure that the reboot takes place even if a in the [Unit] section of the <filename>shutdown.target</filename> unit. By default
clean reboot attempt times out. Note that the <varname>RebootWatchdogSec=</varname> timeout applies <varname>RuntimeWatchdogSec=</varname> defaults to 0 (off), and <varname>RebootWatchdogSec=</varname> to
only to the second phase of the reboot, i.e. after all regular services are already terminated, and 10min. <varname>KExecWatchdogSec=</varname> may be used to additionally enable the watchdog when kexec
after the system and service manager process (PID 1) got replaced by the is being executed rather than when rebooting. Note that if the kernel does not reset the watchdog on kexec (depending
<filename>systemd-shutdown</filename> binary, see system on the specific hardware and/or driver), in this case the watchdog might not get disabled after kexec succeeds
<citerefentry><refentrytitle>bootup</refentrytitle><manvolnum>7</manvolnum></citerefentry> for and thus the system might get rebooted, unless <varname>RuntimeWatchdogSec=</varname> is also enabled at the same time.
details. During the first phase of the shutdown operation the system and service manager remains For this reason it is recommended to enable <varname>KExecWatchdogSec=</varname> only if
running and hence <varname>RuntimeWatchdogSec=</varname> is still honoured. In order to define a <varname>RuntimeWatchdogSec=</varname> is also enabled.
timeout on this first phase of system shutdown, configure <varname>JobTimeoutSec=</varname> and These settings have no effect if a hardware watchdog is not available.</para></listitem>
<varname>JobTimeoutAction=</varname> in the [Unit] section of the
<filename>shutdown.target</filename> unit. By default <varname>RuntimeWatchdogSec=</varname> defaults
to 0 (off), and <varname>RebootWatchdogSec=</varname> to 10min.</para>
<para><varname>KExecWatchdogSec=</varname> may be used to additionally enable the watchdog when kexec
is being executed rather than when rebooting. Note that if the kernel does not reset the watchdog on
kexec (depending on the specific hardware and/or driver), in this case the watchdog might not get
disabled after kexec succeeds and thus the system might get rebooted, unless
<varname>RuntimeWatchdogSec=</varname> is also enabled at the same time. For this reason it is
recommended to enable <varname>KExecWatchdogSec=</varname> only if
<varname>RuntimeWatchdogSec=</varname> is also enabled.</para>
<para>These settings have no effect if a hardware watchdog is not available.</para></listitem>
</varlistentry> </varlistentry>
<varlistentry> <varlistentry>

View File

@ -657,9 +657,7 @@ static int attach_custom_bpf_progs(Unit *u, const char *path, int attach_type, S
assert(u); assert(u);
set_clear(*set_installed); set_clear(*set_installed);
r = set_ensure_allocated(set_installed, &bpf_program_hash_ops); set_ensure_allocated(set_installed, &bpf_program_hash_ops);
if (r < 0)
return log_oom();
SET_FOREACH_MOVE(prog, *set_installed, *set) { SET_FOREACH_MOVE(prog, *set_installed, *set) {
r = bpf_program_cgroup_attach(prog, attach_type, path, BPF_F_ALLOW_MULTI); r = bpf_program_cgroup_attach(prog, attach_type, path, BPF_F_ALLOW_MULTI);

View File

@ -6334,32 +6334,3 @@ int config_parse_swap_priority(
s->parameters_fragment.priority_set = true; s->parameters_fragment.priority_set = true;
return 0; return 0;
} }
int config_parse_watchdog_sec(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
assert(filename);
assert(lvalue);
assert(rvalue);
/* This is called for {Runtime,Reboot,KExec}WatchdogSec= where "default" maps to
* USEC_INFINITY internally. */
if (streq(rvalue, "default")) {
usec_t *usec = data;
*usec = USEC_INFINITY;
return 0;
}
return config_parse_sec(unit, filename, line, section, section_line, lvalue, ltype, rvalue, data, userdata);
}

View File

@ -143,7 +143,6 @@ CONFIG_PARSER_PROTOTYPE(config_parse_extension_images);
CONFIG_PARSER_PROTOTYPE(config_parse_bpf_foreign_program); CONFIG_PARSER_PROTOTYPE(config_parse_bpf_foreign_program);
CONFIG_PARSER_PROTOTYPE(config_parse_cgroup_socket_bind); CONFIG_PARSER_PROTOTYPE(config_parse_cgroup_socket_bind);
CONFIG_PARSER_PROTOTYPE(config_parse_restrict_network_interfaces); CONFIG_PARSER_PROTOTYPE(config_parse_restrict_network_interfaces);
CONFIG_PARSER_PROTOTYPE(config_parse_watchdog_sec);
/* gperf prototypes */ /* gperf prototypes */
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length); const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length);

View File

@ -537,23 +537,6 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
(void) parse_path_argument(value, false, &arg_watchdog_device); (void) parse_path_argument(value, false, &arg_watchdog_device);
} else if (proc_cmdline_key_streq(key, "systemd.watchdog_sec")) {
if (proc_cmdline_value_missing(key, value))
return 0;
if (streq(value, "default"))
arg_runtime_watchdog = USEC_INFINITY;
else {
r = parse_sec(value, &arg_runtime_watchdog);
if (r < 0) {
log_warning_errno(r, "Failed to parse systemd.watchdog_sec= argument '%s', ignoring: %m", value);
return 0;
}
}
arg_kexec_watchdog = arg_reboot_watchdog = arg_runtime_watchdog;
} else if (proc_cmdline_key_streq(key, "systemd.clock_usec")) { } else if (proc_cmdline_key_streq(key, "systemd.clock_usec")) {
if (proc_cmdline_value_missing(key, value)) if (proc_cmdline_value_missing(key, value))
@ -705,10 +688,10 @@ static int parse_config_file(void) {
{ "Manager", "NUMAPolicy", config_parse_numa_policy, 0, &arg_numa_policy.type }, { "Manager", "NUMAPolicy", config_parse_numa_policy, 0, &arg_numa_policy.type },
{ "Manager", "NUMAMask", config_parse_numa_mask, 0, &arg_numa_policy }, { "Manager", "NUMAMask", config_parse_numa_mask, 0, &arg_numa_policy },
{ "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL }, { "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL },
{ "Manager", "RuntimeWatchdogSec", config_parse_watchdog_sec, 0, &arg_runtime_watchdog }, { "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog },
{ "Manager", "RebootWatchdogSec", config_parse_watchdog_sec, 0, &arg_reboot_watchdog }, { "Manager", "RebootWatchdogSec", config_parse_sec, 0, &arg_reboot_watchdog },
{ "Manager", "ShutdownWatchdogSec", config_parse_watchdog_sec, 0, &arg_reboot_watchdog }, /* obsolete alias */ { "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_reboot_watchdog }, /* obsolete alias */
{ "Manager", "KExecWatchdogSec", config_parse_watchdog_sec, 0, &arg_kexec_watchdog }, { "Manager", "KExecWatchdogSec", config_parse_sec, 0, &arg_kexec_watchdog },
{ "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device }, { "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device },
{ "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set }, { "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set },
{ "Manager", "NoNewPrivileges", config_parse_bool, 0, &arg_no_new_privs }, { "Manager", "NoNewPrivileges", config_parse_bool, 0, &arg_no_new_privs },
@ -1540,9 +1523,9 @@ static int become_shutdown(
}; };
_cleanup_strv_free_ char **env_block = NULL; _cleanup_strv_free_ char **env_block = NULL;
usec_t watchdog_timer = 0;
size_t pos = 7; size_t pos = 7;
int r; int r;
usec_t watchdog_timer = 0;
assert(shutdown_verb); assert(shutdown_verb);
assert(!command_line[pos]); assert(!command_line[pos]);
@ -1591,16 +1574,19 @@ static int become_shutdown(
else if (streq(shutdown_verb, "kexec")) else if (streq(shutdown_verb, "kexec"))
watchdog_timer = arg_kexec_watchdog; watchdog_timer = arg_kexec_watchdog;
/* If we reboot or kexec let's set the shutdown watchdog and tell the if (timestamp_is_set(watchdog_timer)) {
* shutdown binary to repeatedly ping it */ /* If we reboot or kexec let's set the shutdown watchdog and tell the shutdown binary to
r = watchdog_setup(watchdog_timer); * repeatedly ping it */
watchdog_close(r < 0); r = watchdog_setup(watchdog_timer);
watchdog_close(r < 0);
/* Tell the binary how often to ping, ignore failure */ /* Tell the binary how often to ping, ignore failure */
(void) strv_extendf(&env_block, "WATCHDOG_USEC="USEC_FMT, watchdog_timer); (void) strv_extendf(&env_block, "WATCHDOG_USEC="USEC_FMT, watchdog_timer);
if (arg_watchdog_device) if (arg_watchdog_device)
(void) strv_extendf(&env_block, "WATCHDOG_DEVICE=%s", arg_watchdog_device); (void) strv_extendf(&env_block, "WATCHDOG_DEVICE=%s", arg_watchdog_device);
} else
watchdog_close(true);
/* Avoid the creation of new processes forked by the kernel; at this /* Avoid the creation of new processes forked by the kernel; at this
* point, we will not listen to the signals anyway */ * point, we will not listen to the signals anyway */

View File

@ -3215,8 +3215,12 @@ void manager_set_watchdog(Manager *m, WatchdogType t, usec_t timeout) {
return; return;
if (t == WATCHDOG_RUNTIME) if (t == WATCHDOG_RUNTIME)
if (!timestamp_is_set(m->watchdog_overridden[WATCHDOG_RUNTIME])) if (!timestamp_is_set(m->watchdog_overridden[WATCHDOG_RUNTIME])) {
(void) watchdog_setup(timeout); if (timestamp_is_set(timeout))
(void) watchdog_setup(timeout);
else
watchdog_close(true);
}
m->watchdog[t] = timeout; m->watchdog[t] = timeout;
} }
@ -3234,7 +3238,10 @@ int manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout) {
if (t == WATCHDOG_RUNTIME) { if (t == WATCHDOG_RUNTIME) {
usec_t usec = timestamp_is_set(timeout) ? timeout : m->watchdog[t]; usec_t usec = timestamp_is_set(timeout) ? timeout : m->watchdog[t];
(void) watchdog_setup(usec); if (timestamp_is_set(usec))
(void) watchdog_setup(usec);
else
watchdog_close(true);
} }
m->watchdog_overridden[t] = timeout; m->watchdog_overridden[t] = timeout;

View File

@ -15,114 +15,59 @@
#include "watchdog.h" #include "watchdog.h"
static int watchdog_fd = -1; static int watchdog_fd = -1;
static char *watchdog_device; static char *watchdog_device = NULL;
static usec_t watchdog_timeout; /* 0 → close device and USEC_INFINITY → don't change timeout */ static usec_t watchdog_timeout = USEC_INFINITY;
static usec_t watchdog_last_ping = USEC_INFINITY; static usec_t watchdog_last_ping = USEC_INFINITY;
static int watchdog_set_enable(bool enable) {
int flags = enable ? WDIOS_ENABLECARD : WDIOS_DISABLECARD;
int r;
assert(watchdog_fd >= 0);
r = ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags);
if (r < 0) {
if (!enable)
return log_warning_errno(errno, "Failed to disable hardware watchdog, ignoring: %m");
/* ENOTTY means the watchdog is always enabled so we're fine */
log_full_errno(ERRNO_IS_NOT_SUPPORTED(errno) ? LOG_DEBUG : LOG_WARNING, errno,
"Failed to enable hardware watchdog, ignoring: %m");
if (!ERRNO_IS_NOT_SUPPORTED(errno))
return -errno;
}
return 0;
}
static int watchdog_get_timeout(void) {
int sec = 0;
assert(watchdog_fd > 0);
if (ioctl(watchdog_fd, WDIOC_GETTIMEOUT, &sec) < 0)
return -errno;
assert(sec > 0);
watchdog_timeout = sec * USEC_PER_SEC;
return 0;
}
static int watchdog_set_timeout(void) {
usec_t t;
int sec;
assert(watchdog_fd >= 0);
assert(timestamp_is_set(watchdog_timeout));
t = DIV_ROUND_UP(watchdog_timeout, USEC_PER_SEC);
sec = MIN(t, (usec_t) INT_MAX); /* Saturate */
if (ioctl(watchdog_fd, WDIOC_SETTIMEOUT, &sec) < 0)
return -errno;
assert(sec > 0);/* buggy driver ? */
watchdog_timeout = sec * USEC_PER_SEC;
return 0;
}
static int watchdog_ping_now(void) {
assert(watchdog_fd >= 0);
if (ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0) < 0)
return log_warning_errno(errno, "Failed to ping hardware watchdog, ignoring: %m");
watchdog_last_ping = now(clock_boottime_or_monotonic());
return 0;
}
static int update_timeout(void) { static int update_timeout(void) {
int r;
assert(watchdog_timeout > 0);
if (watchdog_fd < 0) if (watchdog_fd < 0)
return 0; return 0;
if (watchdog_timeout == USEC_INFINITY)
return 0;
if (watchdog_timeout != USEC_INFINITY) { if (watchdog_timeout == 0) {
r = watchdog_set_timeout(); int flags;
if (r < 0) {
if (!ERRNO_IS_NOT_SUPPORTED(r))
return log_error_errno(r, "Failed to set timeout to %s: %m",
FORMAT_TIMESPAN(watchdog_timeout, 0));
log_info("Modifying watchdog timeout is not supported, reusing the programmed timeout."); flags = WDIOS_DISABLECARD;
watchdog_timeout = USEC_INFINITY; if (ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags) < 0)
return log_warning_errno(errno, "Failed to disable hardware watchdog, ignoring: %m");
} else {
int sec, flags;
usec_t t;
t = DIV_ROUND_UP(watchdog_timeout, USEC_PER_SEC);
sec = MIN(t, (usec_t) INT_MAX); /* Saturate */
if (ioctl(watchdog_fd, WDIOC_SETTIMEOUT, &sec) < 0)
return log_warning_errno(errno, "Failed to set timeout to %is, ignoring: %m", sec);
/* Just in case the driver is buggy */
assert(sec > 0);
/* watchdog_timeout stores the actual timeout used by the HW */
watchdog_timeout = sec * USEC_PER_SEC;
log_info("Set hardware watchdog to %s.", FORMAT_TIMESPAN(watchdog_timeout, 0));
flags = WDIOS_ENABLECARD;
if (ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags) < 0) {
/* ENOTTY means the watchdog is always enabled so we're fine */
log_full_errno(ERRNO_IS_NOT_SUPPORTED(errno) ? LOG_DEBUG : LOG_WARNING, errno,
"Failed to enable hardware watchdog, ignoring: %m");
if (!ERRNO_IS_NOT_SUPPORTED(errno))
return -errno;
} }
if (ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0) < 0)
return log_warning_errno(errno, "Failed to ping hardware watchdog, ignoring: %m");
watchdog_last_ping = now(clock_boottime_or_monotonic());
} }
if (watchdog_timeout == USEC_INFINITY) { return 0;
r = watchdog_get_timeout();
if (r < 0)
return log_error_errno(errno, "Failed to query watchdog HW timeout: %m");
}
r = watchdog_set_enable(true);
if (r < 0)
return r;
log_info("Watchdog running with a timeout of %s.", FORMAT_TIMESPAN(watchdog_timeout, 0));
return watchdog_ping_now();
} }
static int open_watchdog(void) { static int open_watchdog(void) {
struct watchdog_info ident; struct watchdog_info ident;
const char *fn; const char *fn;
int r;
if (watchdog_fd >= 0) if (watchdog_fd >= 0)
return 0; return 0;
@ -140,11 +85,7 @@ static int open_watchdog(void) {
ident.firmware_version, ident.firmware_version,
fn); fn);
r = update_timeout(); return update_timeout();
if (r < 0)
watchdog_close(true);
return r;
} }
int watchdog_set_device(const char *path) { int watchdog_set_device(const char *path) {
@ -161,38 +102,21 @@ int watchdog_set_device(const char *path) {
} }
int watchdog_setup(usec_t timeout) { int watchdog_setup(usec_t timeout) {
usec_t previous_timeout;
int r;
/* timeout=0 closes the device whereas passing timeout=USEC_INFINITY
* opens it (if needed) without configuring any particular timeout and
* thus reuses the programmed value (therefore it's a nop if the device
* is already opened).
*/
if (timeout == 0) {
watchdog_close(true);
return 0;
}
/* Let's shortcut duplicated requests */
if (watchdog_fd >= 0 && (timeout == watchdog_timeout || timeout == USEC_INFINITY))
return 0;
/* Initialize the watchdog timeout with the caller value. This value is /* Initialize the watchdog timeout with the caller value. This value is
* going to be updated by update_timeout() with the closest value * going to be updated by update_timeout() with the closest value
* supported by the driver */ * supported by the driver */
previous_timeout = watchdog_timeout;
watchdog_timeout = timeout; watchdog_timeout = timeout;
/* If we didn't open the watchdog yet and didn't get any explicit
* timeout value set, don't do anything */
if (watchdog_fd < 0 && watchdog_timeout == USEC_INFINITY)
return 0;
if (watchdog_fd < 0) if (watchdog_fd < 0)
return open_watchdog(); return open_watchdog();
r = update_timeout(); return update_timeout();
if (r < 0)
watchdog_timeout = previous_timeout;
return r;
} }
usec_t watchdog_runtime_wait(void) { usec_t watchdog_runtime_wait(void) {
@ -214,7 +138,7 @@ usec_t watchdog_runtime_wait(void) {
int watchdog_ping(void) { int watchdog_ping(void) {
usec_t ntime; usec_t ntime;
if (watchdog_timeout == 0) if (!timestamp_is_set(watchdog_timeout))
return 0; return 0;
if (watchdog_fd < 0) if (watchdog_fd < 0)
@ -231,20 +155,24 @@ int watchdog_ping(void) {
return 0; return 0;
} }
return watchdog_ping_now(); if (ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0) < 0)
return log_warning_errno(errno, "Failed to ping hardware watchdog, ignoring: %m");
watchdog_last_ping = ntime;
return 0;
} }
void watchdog_close(bool disarm) { void watchdog_close(bool disarm) {
/* Once closed, pinging the device becomes a NOP and we request a new
* call to watchdog_setup() to open the device again. */
watchdog_timeout = 0;
if (watchdog_fd < 0) if (watchdog_fd < 0)
return; return;
if (disarm) { if (disarm) {
(void) watchdog_set_enable(false); int flags;
/* Explicitly disarm it */
flags = WDIOS_DISABLECARD;
if (ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags) < 0)
log_warning_errno(errno, "Failed to disable hardware watchdog, ignoring: %m");
/* To be sure, use magic close logic, too */ /* To be sure, use magic close logic, too */
for (;;) { for (;;) {
@ -261,4 +189,8 @@ void watchdog_close(bool disarm) {
} }
watchdog_fd = safe_close(watchdog_fd); watchdog_fd = safe_close(watchdog_fd);
/* Once closed, pinging the device becomes a NOP and we request a new
* call to watchdog_setup() to open the device again. */
watchdog_timeout = USEC_INFINITY;
} }