1
0
mirror of https://github.com/systemd/systemd synced 2025-11-19 08:44:44 +01:00

Compare commits

...

2 Commits

Author SHA1 Message Date
Yu Watanabe
8eefd0f4de core: increment start limit counter only when we can start the unit
Otherwise, e.g. requesting to start a unit that is under stopping may
enter the failed state.

This makes
- rename .can_start() -> .test_startable(), and make it allow to return
  boolean and refuse to start units when it returns false,
- refuse earlier to start units that are in the deactivating state, so
  several redundant conditions in .start() can be dropped,
- move checks for unit states mapped to UNIT_ACTIVATING from .start() to
  .test_startable().

Fixes #39247.
2025-10-23 15:51:28 +02:00
Frantisek Sumsal
c05758663b test: properly wait for the forked process
The process forked off by `systemd-notify --fork` is not a child of the
current shell, so using `wait` doesn't work. This then later causes a
race, when the test occasionally fails because it attempts to start a
new systemd-socket-activate instance before the old one is completely
gone:

[ 1488.947744] TEST-74-AUX-UTILS.sh[1938]: Child 1947 died with code 0
[ 1488.947952] TEST-74-AUX-UTILS.sh[1933]: + assert_eq hello hello
[ 1488.949716] TEST-74-AUX-UTILS.sh[1948]: + set +ex
[ 1488.950112] TEST-74-AUX-UTILS.sh[1950]: ++ cat /proc/1938/comm
[ 1488.945555] systemd[1]: Started systemd-networkd.service - Network Management.
[ 1488.950365] TEST-74-AUX-UTILS.sh[1933]: + assert_in systemd-socket systemd-socket-
[ 1488.950563] TEST-74-AUX-UTILS.sh[1951]: + set +ex
[ 1488.950766] TEST-74-AUX-UTILS.sh[1933]: + kill 1938
[ 1488.950766] TEST-74-AUX-UTILS.sh[1933]: + wait 1938
[ 1488.950766] TEST-74-AUX-UTILS.sh[1933]: .//usr/lib/systemd/tests/testdata/units/TEST-74-AUX-UTILS.socket-activate.sh: line 14: wait: pid 1938 is not a child of this shell
[ 1488.950766] TEST-74-AUX-UTILS.sh[1933]: + :
[ 1488.951486] TEST-74-AUX-UTILS.sh[1952]: ++ systemd-notify --fork -- systemd-socket-activate -l 1234 --now socat ACCEPT-FD:3 PIPE
[ 1488.952222] TEST-74-AUX-UTILS.sh[1953]: Failed to listen on [::]🔢 Address already in use
[ 1488.952222] TEST-74-AUX-UTILS.sh[1953]: Failed to open '1234': Address already in use
[ 1488.956831] TEST-74-AUX-UTILS.sh[1933]: + PID=1953
[ 1488.957078] TEST-74-AUX-UTILS.sh[102]: + echo 'Subtest /usr/lib/systemd/tests/testdata/units/TEST-74-AUX-UTILS.socket-activate.sh failed'
[ 1488.957078] TEST-74-AUX-UTILS.sh[102]: Subtest /usr/lib/systemd/tests/testdata/units/TEST-74-AUX-UTILS.socket-activate.sh failed
2025-10-23 11:56:26 +01:00
11 changed files with 96 additions and 87 deletions

View File

@ -1039,7 +1039,7 @@ static bool automount_supported(void) {
return supported;
}
static int automount_can_start(Unit *u) {
static int automount_test_startable(Unit *u) {
Automount *a = ASSERT_PTR(AUTOMOUNT(u));
int r;
@ -1049,7 +1049,7 @@ static int automount_can_start(Unit *u) {
return r;
}
return 1;
return true;
}
static const char* const automount_result_table[_AUTOMOUNT_RESULT_MAX] = {
@ -1115,5 +1115,5 @@ const UnitVTable automount_vtable = {
},
},
.can_start = automount_can_start,
.test_startable = automount_test_startable,
};

View File

@ -1363,19 +1363,6 @@ static int mount_start(Unit *u) {
Mount *m = ASSERT_PTR(MOUNT(u));
int r;
/* We cannot fulfill this request right now, try again later
* please! */
if (IN_SET(m->state,
MOUNT_UNMOUNTING,
MOUNT_UNMOUNTING_SIGTERM,
MOUNT_UNMOUNTING_SIGKILL,
MOUNT_CLEANING))
return -EAGAIN;
/* Already on it! */
if (IN_SET(m->state, MOUNT_MOUNTING, MOUNT_MOUNTING_DONE))
return 0;
assert(IN_SET(m->state, MOUNT_DEAD, MOUNT_FAILED));
r = unit_acquire_invocation_id(u);
@ -2382,10 +2369,14 @@ static int mount_can_clean(Unit *u, ExecCleanMask *ret) {
return exec_context_get_clean_mask(&m->exec_context, ret);
}
static int mount_can_start(Unit *u) {
static int mount_test_startable(Unit *u) {
Mount *m = ASSERT_PTR(MOUNT(u));
int r;
/* It is already being started. */
if (IN_SET(m->state, MOUNT_MOUNTING, MOUNT_MOUNTING_DONE))
return false;
r = unit_test_start_limit(u);
if (r < 0) {
mount_enter_dead(m, MOUNT_FAILURE_START_LIMIT_HIT, /* flush_result = */ false);
@ -2395,7 +2386,7 @@ static int mount_can_start(Unit *u) {
if (!get_mount_parameters_fragment(m))
return -ENOENT;
return 1;
return true;
}
static int mount_subsystem_ratelimited(Manager *m) {
@ -2561,7 +2552,7 @@ const UnitVTable mount_vtable = {
},
},
.can_start = mount_can_start,
.test_startable = mount_test_startable,
.notify_plymouth = true,
};

View File

@ -898,7 +898,7 @@ static void path_reset_failed(Unit *u) {
p->result = PATH_SUCCESS;
}
static int path_can_start(Unit *u) {
static int path_test_startable(Unit *u) {
Path *p = ASSERT_PTR(PATH(u));
int r;
@ -908,7 +908,7 @@ static int path_can_start(Unit *u) {
return r;
}
return 1;
return true;
}
static void activation_details_path_done(ActivationDetails *details) {
@ -1042,7 +1042,7 @@ const UnitVTable path_vtable = {
.bus_set_property = bus_path_set_property,
.can_start = path_can_start,
.test_startable = path_test_startable,
};
const ActivationDetailsVTable activation_details_path_vtable = {

View File

@ -2951,17 +2951,6 @@ static int service_start(Unit *u) {
Service *s = ASSERT_PTR(SERVICE(u));
int r;
/* We cannot fulfill this request right now, try again later
* please! */
if (IN_SET(s->state,
SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL, SERVICE_CLEANING))
return -EAGAIN;
/* Already on it! */
if (IN_SET(s->state, SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST))
return 0;
if (s->state == SERVICE_AUTO_RESTART) {
/* As mentioned in unit_start(), we allow manual starts to act as "hurry up" signals
* for auto restart. We need to re-enqueue the job though, as the job type has changed
@ -5558,10 +5547,17 @@ static const char* service_finished_job(Unit *u, JobType t, JobResult result) {
return NULL;
}
static int service_can_start(Unit *u) {
static int service_test_startable(Unit *u) {
Service *s = ASSERT_PTR(SERVICE(u));
int r;
/* First check the state, and do not increment start limit counter if the service cannot start due to
* that e.g. it is already being started. Note, the service states mapped to UNIT_ACTIVE,
* UNIT_RELOADING, UNIT_DEACTIVATING, UNIT_MAINTENANCE, and UNIT_REFRESHING are already filtered in
* unit_start(). Hence, here we only need to check states that mapped to UNIT_ACTIVATING. */
if (IN_SET(s->state, SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST))
return false;
/* Make sure we don't enter a busy loop of some kind. */
r = unit_test_start_limit(u);
if (r < 0) {
@ -5569,7 +5565,7 @@ static int service_can_start(Unit *u) {
return r;
}
return 1;
return true;
}
static void service_release_resources(Unit *u) {
@ -5851,7 +5847,7 @@ const UnitVTable service_vtable = {
.finished_job = service_finished_job,
},
.can_start = service_can_start,
.test_startable = service_test_startable,
.notify_plymouth = true,

View File

@ -2623,26 +2623,6 @@ static int socket_start(Unit *u) {
Socket *s = ASSERT_PTR(SOCKET(u));
int r;
/* We cannot fulfill this request right now, try again later
* please! */
if (IN_SET(s->state,
SOCKET_STOP_PRE,
SOCKET_STOP_PRE_SIGKILL,
SOCKET_STOP_PRE_SIGTERM,
SOCKET_STOP_POST,
SOCKET_FINAL_SIGTERM,
SOCKET_FINAL_SIGKILL,
SOCKET_CLEANING))
return -EAGAIN;
/* Already on it! */
if (IN_SET(s->state,
SOCKET_START_PRE,
SOCKET_START_OPEN,
SOCKET_START_CHOWN,
SOCKET_START_POST))
return 0;
/* Cannot run this without the service being around */
if (UNIT_ISSET(s->service)) {
Service *service = ASSERT_PTR(SERVICE(UNIT_DEREF(s->service)));
@ -3650,17 +3630,25 @@ static int socket_can_clean(Unit *u, ExecCleanMask *ret) {
return exec_context_get_clean_mask(&s->exec_context, ret);
}
static int socket_can_start(Unit *u) {
static int socket_test_startable(Unit *u) {
Socket *s = ASSERT_PTR(SOCKET(u));
int r;
/* It is already being started. */
if (IN_SET(s->state,
SOCKET_START_PRE,
SOCKET_START_OPEN,
SOCKET_START_CHOWN,
SOCKET_START_POST))
return false;
r = unit_test_start_limit(u);
if (r < 0) {
socket_enter_dead(s, SOCKET_FAILURE_START_LIMIT_HIT);
return r;
}
return 1;
return true;
}
static const char* const socket_exec_command_table[_SOCKET_EXEC_COMMAND_MAX] = {
@ -3801,5 +3789,5 @@ const UnitVTable socket_vtable = {
},
},
.can_start = socket_can_start,
.test_startable = socket_test_startable,
};

View File

@ -864,19 +864,6 @@ static int swap_start(Unit *u) {
int r;
assert(s);
/* We cannot fulfill this request right now, try again later please! */
if (IN_SET(s->state,
SWAP_DEACTIVATING,
SWAP_DEACTIVATING_SIGTERM,
SWAP_DEACTIVATING_SIGKILL,
SWAP_CLEANING))
return -EAGAIN;
/* Already on it! */
if (s->state == SWAP_ACTIVATING)
return 0;
assert(IN_SET(s->state, SWAP_DEAD, SWAP_FAILED));
if (detect_container() > 0)
@ -1521,17 +1508,21 @@ static int swap_can_clean(Unit *u, ExecCleanMask *ret) {
return exec_context_get_clean_mask(&s->exec_context, ret);
}
static int swap_can_start(Unit *u) {
static int swap_test_startable(Unit *u) {
Swap *s = ASSERT_PTR(SWAP(u));
int r;
/* It is already being started. */
if (s->state == SWAP_ACTIVATING)
return false;
r = unit_test_start_limit(u);
if (r < 0) {
swap_enter_dead(s, SWAP_FAILURE_START_LIMIT_HIT);
return r;
}
return 1;
return true;
}
int swap_get_priority(const Swap *s) {
@ -1652,7 +1643,7 @@ const UnitVTable swap_vtable = {
},
},
.can_start = swap_can_start,
.test_startable = swap_test_startable,
.notify_plymouth = true,
};

View File

@ -906,7 +906,7 @@ static int timer_can_clean(Unit *u, ExecCleanMask *ret) {
return 0;
}
static int timer_can_start(Unit *u) {
static int timer_test_startable(Unit *u) {
Timer *t = ASSERT_PTR(TIMER(u));
int r;
@ -916,7 +916,7 @@ static int timer_can_start(Unit *u) {
return r;
}
return 1;
return true;
}
static void activation_details_timer_serialize(const ActivationDetails *details, FILE *f) {
@ -1093,7 +1093,7 @@ const UnitVTable timer_vtable = {
.bus_set_property = bus_timer_set_property,
.can_start = timer_can_start,
.test_startable = timer_test_startable,
};
const ActivationDetailsVTable activation_details_timer_vtable = {

View File

@ -1936,7 +1936,7 @@ int unit_start(Unit *u, ActivationDetails *details) {
state = unit_active_state(u);
if (UNIT_IS_ACTIVE_OR_RELOADING(state))
return -EALREADY;
if (state == UNIT_MAINTENANCE)
if (IN_SET(state, UNIT_DEACTIVATING, UNIT_MAINTENANCE))
return -EAGAIN;
/* Units that aren't loaded cannot be started */
@ -1983,10 +1983,11 @@ int unit_start(Unit *u, ActivationDetails *details) {
if (u->freezer_state != FREEZER_RUNNING)
return -EDEADLK;
/* Check our ability to start early so that failure conditions don't cause us to enter a busy loop. */
if (UNIT_VTABLE(u)->can_start) {
r = UNIT_VTABLE(u)->can_start(u);
if (r < 0)
/* Check our ability to start early so that ratelimited or already starting/started units don't
* cause us to enter a busy loop. */
if (UNIT_VTABLE(u)->test_startable) {
r = UNIT_VTABLE(u)->test_startable(u);
if (r <= 0)
return r;
}

View File

@ -726,8 +726,8 @@ typedef struct UnitVTable {
bool (*supported)(void);
/* If this function is set, it's invoked first as part of starting a unit to allow start rate
* limiting checks to occur before we do anything else. */
int (*can_start)(Unit *u);
* limiting checks and unit state checks to occur before we do anything else. */
int (*test_startable)(Unit *u);
/* Returns > 0 if the whole subsystem is ratelimited, and new start operations should not be started
* for this unit type right now. */

View File

@ -0,0 +1,42 @@
#!/usr/bin/env bash
# SPDX-License-Identifier: LGPL-2.1-or-later
set -eux
set -o pipefail
# For issue #39247.
at_exit() {
set +e
rm -rf /run/systemd/system/systemd-resolved.service.d/
systemctl daemon-reload
systemctl restart systemd-resolved.service
}
trap at_exit EXIT
mkdir -p /run/systemd/system/systemd-resolved.service.d/
cat >/run/systemd/system/systemd-resolved.service.d/99-start-limit.conf <<EOF
[Unit]
StartLimitBurst=5
StartLimitInterval=30
[Service]
ExecStopPost=sleep 10
EOF
systemctl daemon-reload
systemctl restart systemd-resolved.service
systemctl reset-failed systemd-resolved.service
systemctl status --no-pager systemd-resolved.service
systemctl show systemd-resolved.service | grep StartLimit
for i in {1..5}; do
echo "Start #$i"
systemctl stop --no-block systemd-resolved.service
if ! resolvectl; then
journalctl -o short-monotonic --no-hostname --no-pager -u systemd-resolved.service -n 15
exit 1
fi
done

View File

@ -11,7 +11,7 @@ assert_in systemd-socket "$(cat /proc/"$PID"/comm)"
assert_eq "$(echo -n hello | socat - 'TCP:localhost:1234')" hello
assert_in systemd-socket "$(cat /proc/"$PID"/comm)"
kill "$PID"
wait "$PID" || :
tail --pid="$PID" -f /dev/null
PID=$(systemd-notify --fork -- systemd-socket-activate -l 1234 --now socat ACCEPT-FD:3 PIPE)
for _ in {1..100}; do
@ -24,7 +24,7 @@ for _ in {1..100}; do
if [[ "$(cat /proc/"$PID"/comm || :)" =~ socat ]]; then
assert_eq "$(echo -n bye | socat - 'TCP:localhost:1234')" bye
wait "$PID" || :
tail --pid="$PID" -f /dev/null
break
fi
done