1
0
mirror of https://github.com/systemd/systemd synced 2026-03-30 19:54:51 +02:00

Compare commits

..

No commits in common. "b6e44cd934873659f233bfd962e4a0dcb3026e59" and "df618f259a48cc70fb8fd7308b5a53d04fee31ba" have entirely different histories.

7 changed files with 115 additions and 235 deletions

View File

@ -133,46 +133,33 @@
<term><varname>RebootWatchdogSec=</varname></term>
<term><varname>KExecWatchdogSec=</varname></term>
<listitem><para>Configure the hardware watchdog at runtime and at reboot. Takes a timeout value in
seconds (or in other time units if suffixed with <literal>ms</literal>, <literal>min</literal>,
<literal>h</literal>, <literal>d</literal>, <literal>w</literal>). If set to zero the watchdog logic
is disabled: no watchdog device is opened, configured, or pinged. If set to the special string
<literal>default</literal> the watchdog is opened and pinged in regular intervals, but the timeout
is not changed from the default. If set to any other time value the watchdog timeout is configured to
the specified value (or a value close to it, depending on hardware capabilities).</para>
<para>If <varname>RuntimeWatchdogSec=</varname> is set to a non-zero value, the watchdog hardware
(<filename>/dev/watchdog</filename> or the path specified with <varname>WatchdogDevice=</varname> or
the kernel option <varname>systemd.watchdog-device=</varname>) will be programmed to automatically
reboot the system if it is not contacted within the specified timeout interval. The system manager
will ensure to contact it at least once in half the specified timeout interval. This feature requires
a hardware watchdog device to be present, as it is commonly the case in embedded and server
systems. Not all hardware watchdogs allow configuration of all possible reboot timeout values, in
which case the closest available timeout is picked.</para>
<para><varname>RebootWatchdogSec=</varname> may be used to configure the hardware watchdog when the
system is asked to reboot. It works as a safety net to ensure that the reboot takes place even if a
clean reboot attempt times out. Note that the <varname>RebootWatchdogSec=</varname> timeout applies
only to the second phase of the reboot, i.e. after all regular services are already terminated, and
after the system and service manager process (PID 1) got replaced by the
<filename>systemd-shutdown</filename> binary, see system
<citerefentry><refentrytitle>bootup</refentrytitle><manvolnum>7</manvolnum></citerefentry> for
details. During the first phase of the shutdown operation the system and service manager remains
running and hence <varname>RuntimeWatchdogSec=</varname> is still honoured. In order to define a
timeout on this first phase of system shutdown, configure <varname>JobTimeoutSec=</varname> and
<varname>JobTimeoutAction=</varname> in the [Unit] section of the
<filename>shutdown.target</filename> unit. By default <varname>RuntimeWatchdogSec=</varname> defaults
to 0 (off), and <varname>RebootWatchdogSec=</varname> to 10min.</para>
<para><varname>KExecWatchdogSec=</varname> may be used to additionally enable the watchdog when kexec
is being executed rather than when rebooting. Note that if the kernel does not reset the watchdog on
kexec (depending on the specific hardware and/or driver), in this case the watchdog might not get
disabled after kexec succeeds and thus the system might get rebooted, unless
<varname>RuntimeWatchdogSec=</varname> is also enabled at the same time. For this reason it is
recommended to enable <varname>KExecWatchdogSec=</varname> only if
<varname>RuntimeWatchdogSec=</varname> is also enabled.</para>
<para>These settings have no effect if a hardware watchdog is not available.</para></listitem>
<listitem><para>Configure the hardware watchdog at runtime and at reboot. Takes a timeout value in seconds (or
in other time units if suffixed with <literal>ms</literal>, <literal>min</literal>, <literal>h</literal>,
<literal>d</literal>, <literal>w</literal>). If <varname>RuntimeWatchdogSec=</varname> is set to a non-zero
value, the watchdog hardware (<filename>/dev/watchdog</filename> or the path specified with
<varname>WatchdogDevice=</varname> or the kernel option <varname>systemd.watchdog-device=</varname>) will be
programmed to automatically reboot the system if it is not contacted within the specified timeout interval. The
system manager will ensure to contact it at least once in half the specified timeout interval. This feature
requires a hardware watchdog device to be present, as it is commonly the case in embedded and server
systems. Not all hardware watchdogs allow configuration of all possible reboot timeout values, in which case
the closest available timeout is picked. <varname>RebootWatchdogSec=</varname> may be used to configure the
hardware watchdog when the system is asked to reboot. It works as a safety net to ensure that the reboot takes
place even if a clean reboot attempt times out. Note that the <varname>RebootWatchdogSec=</varname> timeout
applies only to the second phase of the reboot, i.e. after all regular services are already terminated, and
after the system and service manager process (PID 1) got replaced by the <filename>systemd-shutdown</filename>
binary, see system <citerefentry><refentrytitle>bootup</refentrytitle><manvolnum>7</manvolnum></citerefentry>
for details. During the first phase of the shutdown operation the system and service manager remains running
and hence <varname>RuntimeWatchdogSec=</varname> is still honoured. In order to define a timeout on this first
phase of system shutdown, configure <varname>JobTimeoutSec=</varname> and <varname>JobTimeoutAction=</varname>
in the [Unit] section of the <filename>shutdown.target</filename> unit. By default
<varname>RuntimeWatchdogSec=</varname> defaults to 0 (off), and <varname>RebootWatchdogSec=</varname> to
10min. <varname>KExecWatchdogSec=</varname> may be used to additionally enable the watchdog when kexec
is being executed rather than when rebooting. Note that if the kernel does not reset the watchdog on kexec (depending
on the specific hardware and/or driver), in this case the watchdog might not get disabled after kexec succeeds
and thus the system might get rebooted, unless <varname>RuntimeWatchdogSec=</varname> is also enabled at the same time.
For this reason it is recommended to enable <varname>KExecWatchdogSec=</varname> only if
<varname>RuntimeWatchdogSec=</varname> is also enabled.
These settings have no effect if a hardware watchdog is not available.</para></listitem>
</varlistentry>
<varlistentry>

View File

@ -657,9 +657,7 @@ static int attach_custom_bpf_progs(Unit *u, const char *path, int attach_type, S
assert(u);
set_clear(*set_installed);
r = set_ensure_allocated(set_installed, &bpf_program_hash_ops);
if (r < 0)
return log_oom();
set_ensure_allocated(set_installed, &bpf_program_hash_ops);
SET_FOREACH_MOVE(prog, *set_installed, *set) {
r = bpf_program_cgroup_attach(prog, attach_type, path, BPF_F_ALLOW_MULTI);

View File

@ -6334,32 +6334,3 @@ int config_parse_swap_priority(
s->parameters_fragment.priority_set = true;
return 0;
}
int config_parse_watchdog_sec(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
assert(filename);
assert(lvalue);
assert(rvalue);
/* This is called for {Runtime,Reboot,KExec}WatchdogSec= where "default" maps to
* USEC_INFINITY internally. */
if (streq(rvalue, "default")) {
usec_t *usec = data;
*usec = USEC_INFINITY;
return 0;
}
return config_parse_sec(unit, filename, line, section, section_line, lvalue, ltype, rvalue, data, userdata);
}

View File

@ -143,7 +143,6 @@ CONFIG_PARSER_PROTOTYPE(config_parse_extension_images);
CONFIG_PARSER_PROTOTYPE(config_parse_bpf_foreign_program);
CONFIG_PARSER_PROTOTYPE(config_parse_cgroup_socket_bind);
CONFIG_PARSER_PROTOTYPE(config_parse_restrict_network_interfaces);
CONFIG_PARSER_PROTOTYPE(config_parse_watchdog_sec);
/* gperf prototypes */
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length);

View File

@ -537,23 +537,6 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
(void) parse_path_argument(value, false, &arg_watchdog_device);
} else if (proc_cmdline_key_streq(key, "systemd.watchdog_sec")) {
if (proc_cmdline_value_missing(key, value))
return 0;
if (streq(value, "default"))
arg_runtime_watchdog = USEC_INFINITY;
else {
r = parse_sec(value, &arg_runtime_watchdog);
if (r < 0) {
log_warning_errno(r, "Failed to parse systemd.watchdog_sec= argument '%s', ignoring: %m", value);
return 0;
}
}
arg_kexec_watchdog = arg_reboot_watchdog = arg_runtime_watchdog;
} else if (proc_cmdline_key_streq(key, "systemd.clock_usec")) {
if (proc_cmdline_value_missing(key, value))
@ -705,10 +688,10 @@ static int parse_config_file(void) {
{ "Manager", "NUMAPolicy", config_parse_numa_policy, 0, &arg_numa_policy.type },
{ "Manager", "NUMAMask", config_parse_numa_mask, 0, &arg_numa_policy },
{ "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL },
{ "Manager", "RuntimeWatchdogSec", config_parse_watchdog_sec, 0, &arg_runtime_watchdog },
{ "Manager", "RebootWatchdogSec", config_parse_watchdog_sec, 0, &arg_reboot_watchdog },
{ "Manager", "ShutdownWatchdogSec", config_parse_watchdog_sec, 0, &arg_reboot_watchdog }, /* obsolete alias */
{ "Manager", "KExecWatchdogSec", config_parse_watchdog_sec, 0, &arg_kexec_watchdog },
{ "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog },
{ "Manager", "RebootWatchdogSec", config_parse_sec, 0, &arg_reboot_watchdog },
{ "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_reboot_watchdog }, /* obsolete alias */
{ "Manager", "KExecWatchdogSec", config_parse_sec, 0, &arg_kexec_watchdog },
{ "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device },
{ "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set },
{ "Manager", "NoNewPrivileges", config_parse_bool, 0, &arg_no_new_privs },
@ -1540,9 +1523,9 @@ static int become_shutdown(
};
_cleanup_strv_free_ char **env_block = NULL;
usec_t watchdog_timer = 0;
size_t pos = 7;
int r;
usec_t watchdog_timer = 0;
assert(shutdown_verb);
assert(!command_line[pos]);
@ -1591,16 +1574,19 @@ static int become_shutdown(
else if (streq(shutdown_verb, "kexec"))
watchdog_timer = arg_kexec_watchdog;
/* If we reboot or kexec let's set the shutdown watchdog and tell the
* shutdown binary to repeatedly ping it */
r = watchdog_setup(watchdog_timer);
watchdog_close(r < 0);
if (timestamp_is_set(watchdog_timer)) {
/* If we reboot or kexec let's set the shutdown watchdog and tell the shutdown binary to
* repeatedly ping it */
r = watchdog_setup(watchdog_timer);
watchdog_close(r < 0);
/* Tell the binary how often to ping, ignore failure */
(void) strv_extendf(&env_block, "WATCHDOG_USEC="USEC_FMT, watchdog_timer);
/* Tell the binary how often to ping, ignore failure */
(void) strv_extendf(&env_block, "WATCHDOG_USEC="USEC_FMT, watchdog_timer);
if (arg_watchdog_device)
(void) strv_extendf(&env_block, "WATCHDOG_DEVICE=%s", arg_watchdog_device);
if (arg_watchdog_device)
(void) strv_extendf(&env_block, "WATCHDOG_DEVICE=%s", arg_watchdog_device);
} else
watchdog_close(true);
/* Avoid the creation of new processes forked by the kernel; at this
* point, we will not listen to the signals anyway */

View File

@ -3215,8 +3215,12 @@ void manager_set_watchdog(Manager *m, WatchdogType t, usec_t timeout) {
return;
if (t == WATCHDOG_RUNTIME)
if (!timestamp_is_set(m->watchdog_overridden[WATCHDOG_RUNTIME]))
(void) watchdog_setup(timeout);
if (!timestamp_is_set(m->watchdog_overridden[WATCHDOG_RUNTIME])) {
if (timestamp_is_set(timeout))
(void) watchdog_setup(timeout);
else
watchdog_close(true);
}
m->watchdog[t] = timeout;
}
@ -3234,7 +3238,10 @@ int manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout) {
if (t == WATCHDOG_RUNTIME) {
usec_t usec = timestamp_is_set(timeout) ? timeout : m->watchdog[t];
(void) watchdog_setup(usec);
if (timestamp_is_set(usec))
(void) watchdog_setup(usec);
else
watchdog_close(true);
}
m->watchdog_overridden[t] = timeout;

View File

@ -15,114 +15,59 @@
#include "watchdog.h"
static int watchdog_fd = -1;
static char *watchdog_device;
static usec_t watchdog_timeout; /* 0 → close device and USEC_INFINITY → don't change timeout */
static char *watchdog_device = NULL;
static usec_t watchdog_timeout = USEC_INFINITY;
static usec_t watchdog_last_ping = USEC_INFINITY;
static int watchdog_set_enable(bool enable) {
int flags = enable ? WDIOS_ENABLECARD : WDIOS_DISABLECARD;
int r;
assert(watchdog_fd >= 0);
r = ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags);
if (r < 0) {
if (!enable)
return log_warning_errno(errno, "Failed to disable hardware watchdog, ignoring: %m");
/* ENOTTY means the watchdog is always enabled so we're fine */
log_full_errno(ERRNO_IS_NOT_SUPPORTED(errno) ? LOG_DEBUG : LOG_WARNING, errno,
"Failed to enable hardware watchdog, ignoring: %m");
if (!ERRNO_IS_NOT_SUPPORTED(errno))
return -errno;
}
return 0;
}
static int watchdog_get_timeout(void) {
int sec = 0;
assert(watchdog_fd > 0);
if (ioctl(watchdog_fd, WDIOC_GETTIMEOUT, &sec) < 0)
return -errno;
assert(sec > 0);
watchdog_timeout = sec * USEC_PER_SEC;
return 0;
}
static int watchdog_set_timeout(void) {
usec_t t;
int sec;
assert(watchdog_fd >= 0);
assert(timestamp_is_set(watchdog_timeout));
t = DIV_ROUND_UP(watchdog_timeout, USEC_PER_SEC);
sec = MIN(t, (usec_t) INT_MAX); /* Saturate */
if (ioctl(watchdog_fd, WDIOC_SETTIMEOUT, &sec) < 0)
return -errno;
assert(sec > 0);/* buggy driver ? */
watchdog_timeout = sec * USEC_PER_SEC;
return 0;
}
static int watchdog_ping_now(void) {
assert(watchdog_fd >= 0);
if (ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0) < 0)
return log_warning_errno(errno, "Failed to ping hardware watchdog, ignoring: %m");
watchdog_last_ping = now(clock_boottime_or_monotonic());
return 0;
}
static int update_timeout(void) {
int r;
assert(watchdog_timeout > 0);
if (watchdog_fd < 0)
return 0;
if (watchdog_timeout == USEC_INFINITY)
return 0;
if (watchdog_timeout != USEC_INFINITY) {
r = watchdog_set_timeout();
if (r < 0) {
if (!ERRNO_IS_NOT_SUPPORTED(r))
return log_error_errno(r, "Failed to set timeout to %s: %m",
FORMAT_TIMESPAN(watchdog_timeout, 0));
if (watchdog_timeout == 0) {
int flags;
log_info("Modifying watchdog timeout is not supported, reusing the programmed timeout.");
watchdog_timeout = USEC_INFINITY;
flags = WDIOS_DISABLECARD;
if (ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags) < 0)
return log_warning_errno(errno, "Failed to disable hardware watchdog, ignoring: %m");
} else {
int sec, flags;
usec_t t;
t = DIV_ROUND_UP(watchdog_timeout, USEC_PER_SEC);
sec = MIN(t, (usec_t) INT_MAX); /* Saturate */
if (ioctl(watchdog_fd, WDIOC_SETTIMEOUT, &sec) < 0)
return log_warning_errno(errno, "Failed to set timeout to %is, ignoring: %m", sec);
/* Just in case the driver is buggy */
assert(sec > 0);
/* watchdog_timeout stores the actual timeout used by the HW */
watchdog_timeout = sec * USEC_PER_SEC;
log_info("Set hardware watchdog to %s.", FORMAT_TIMESPAN(watchdog_timeout, 0));
flags = WDIOS_ENABLECARD;
if (ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags) < 0) {
/* ENOTTY means the watchdog is always enabled so we're fine */
log_full_errno(ERRNO_IS_NOT_SUPPORTED(errno) ? LOG_DEBUG : LOG_WARNING, errno,
"Failed to enable hardware watchdog, ignoring: %m");
if (!ERRNO_IS_NOT_SUPPORTED(errno))
return -errno;
}
if (ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0) < 0)
return log_warning_errno(errno, "Failed to ping hardware watchdog, ignoring: %m");
watchdog_last_ping = now(clock_boottime_or_monotonic());
}
if (watchdog_timeout == USEC_INFINITY) {
r = watchdog_get_timeout();
if (r < 0)
return log_error_errno(errno, "Failed to query watchdog HW timeout: %m");
}
r = watchdog_set_enable(true);
if (r < 0)
return r;
log_info("Watchdog running with a timeout of %s.", FORMAT_TIMESPAN(watchdog_timeout, 0));
return watchdog_ping_now();
return 0;
}
static int open_watchdog(void) {
struct watchdog_info ident;
const char *fn;
int r;
if (watchdog_fd >= 0)
return 0;
@ -140,11 +85,7 @@ static int open_watchdog(void) {
ident.firmware_version,
fn);
r = update_timeout();
if (r < 0)
watchdog_close(true);
return r;
return update_timeout();
}
int watchdog_set_device(const char *path) {
@ -161,38 +102,21 @@ int watchdog_set_device(const char *path) {
}
int watchdog_setup(usec_t timeout) {
usec_t previous_timeout;
int r;
/* timeout=0 closes the device whereas passing timeout=USEC_INFINITY
* opens it (if needed) without configuring any particular timeout and
* thus reuses the programmed value (therefore it's a nop if the device
* is already opened).
*/
if (timeout == 0) {
watchdog_close(true);
return 0;
}
/* Let's shortcut duplicated requests */
if (watchdog_fd >= 0 && (timeout == watchdog_timeout || timeout == USEC_INFINITY))
return 0;
/* Initialize the watchdog timeout with the caller value. This value is
* going to be updated by update_timeout() with the closest value
* supported by the driver */
previous_timeout = watchdog_timeout;
watchdog_timeout = timeout;
/* If we didn't open the watchdog yet and didn't get any explicit
* timeout value set, don't do anything */
if (watchdog_fd < 0 && watchdog_timeout == USEC_INFINITY)
return 0;
if (watchdog_fd < 0)
return open_watchdog();
r = update_timeout();
if (r < 0)
watchdog_timeout = previous_timeout;
return r;
return update_timeout();
}
usec_t watchdog_runtime_wait(void) {
@ -214,7 +138,7 @@ usec_t watchdog_runtime_wait(void) {
int watchdog_ping(void) {
usec_t ntime;
if (watchdog_timeout == 0)
if (!timestamp_is_set(watchdog_timeout))
return 0;
if (watchdog_fd < 0)
@ -231,20 +155,24 @@ int watchdog_ping(void) {
return 0;
}
return watchdog_ping_now();
if (ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0) < 0)
return log_warning_errno(errno, "Failed to ping hardware watchdog, ignoring: %m");
watchdog_last_ping = ntime;
return 0;
}
void watchdog_close(bool disarm) {
/* Once closed, pinging the device becomes a NOP and we request a new
* call to watchdog_setup() to open the device again. */
watchdog_timeout = 0;
if (watchdog_fd < 0)
return;
if (disarm) {
(void) watchdog_set_enable(false);
int flags;
/* Explicitly disarm it */
flags = WDIOS_DISABLECARD;
if (ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags) < 0)
log_warning_errno(errno, "Failed to disable hardware watchdog, ignoring: %m");
/* To be sure, use magic close logic, too */
for (;;) {
@ -261,4 +189,8 @@ void watchdog_close(bool disarm) {
}
watchdog_fd = safe_close(watchdog_fd);
/* Once closed, pinging the device becomes a NOP and we request a new
* call to watchdog_setup() to open the device again. */
watchdog_timeout = USEC_INFINITY;
}