1
0
mirror of https://github.com/systemd/systemd synced 2026-04-25 16:34:50 +02:00

Compare commits

...

8 Commits

Author SHA1 Message Date
Zbigniew Jędrzejewski-Szmek
87d44b6e1a
Merge pull request #23002 from yuwata/udev-use-child-event
udev: use child event source
2022-04-13 08:32:16 +02:00
Yu Watanabe
82e0b63183 udev: use child event source to manage workers 2022-04-13 01:08:42 +09:00
Yu Watanabe
a79cba3326 udev: use EventResult type
This also adds EVENT_RESULT_SUCCESS for readability.
2022-04-13 01:00:08 +09:00
Yu Watanabe
fbae50904f sd-event: make inotify event work after the process is forked 2022-04-13 01:00:08 +09:00
Yu Watanabe
86587c93b0 sd-event: do not kill a child process from another child 2022-04-13 01:00:08 +09:00
Yu Watanabe
01e6af7374 sd-event: do not update signal fd after PID is changed
Otherwise, child event source will not work after the process is forked
and the event source is unref()ed on the child process.
2022-04-13 01:00:08 +09:00
Yu Watanabe
54988a27b9 sd-event: set pid to event source after all setup processes finished
Otherwise, the assertion in source_disconnect() may be triggered,
2022-04-13 01:00:08 +09:00
Yu Watanabe
91c700713f sd-event: rebreak comments 2022-04-13 01:00:08 +09:00
2 changed files with 89 additions and 97 deletions

View File

@ -706,6 +706,9 @@ static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig
return;
}
if (event_pid_changed(e))
return;
assert(d->fd >= 0);
if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
@ -851,6 +854,9 @@ static void source_disconnect(sd_event_source *s) {
break;
case SOURCE_CHILD:
if (event_pid_changed(s->event))
s->child.process_owned = false;
if (s->child.pid > 0) {
if (event_source_is_online(s)) {
assert(s->event->n_online_child_sources > 0);
@ -1426,7 +1432,6 @@ _public_ int sd_event_add_child(
return -ENOMEM;
s->wakeup = WAKEUP_EVENT_SOURCE;
s->child.pid = pid;
s->child.options = options;
s->child.callback = callback;
s->userdata = userdata;
@ -1436,7 +1441,7 @@ _public_ int sd_event_add_child(
* pin the PID, and make regular waitid() handling race-free. */
if (shall_use_pidfd()) {
s->child.pidfd = pidfd_open(s->child.pid, 0);
s->child.pidfd = pidfd_open(pid, 0);
if (s->child.pidfd < 0) {
/* Propagate errors unless the syscall is not supported or blocked */
if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
@ -1446,10 +1451,6 @@ _public_ int sd_event_add_child(
} else
s->child.pidfd = -1;
r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
if (r < 0)
return r;
if (EVENT_SOURCE_WATCH_PIDFD(s)) {
/* We have a pidfd and we only want to watch for exit */
r = source_child_pidfd_register(s, s->enabled);
@ -1465,6 +1466,12 @@ _public_ int sd_event_add_child(
e->need_process_child = true;
}
r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
if (r < 0)
return r;
/* These must be done after everything succeeds. */
s->child.pid = pid;
e->n_online_child_sources++;
if (ret)
@ -1691,7 +1698,8 @@ static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
if (d->fd >= 0) {
if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
if (!event_pid_changed(e) &&
epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
safe_close(d->fd);
@ -1801,7 +1809,7 @@ static void event_free_inode_data(
if (d->inotify_data) {
if (d->wd >= 0) {
if (d->inotify_data->fd >= 0) {
if (d->inotify_data->fd >= 0 && !event_pid_changed(e)) {
/* So here's a problem. At the time this runs the watch descriptor might already be
* invalidated, because an IN_IGNORED event might be queued right the moment we enter
* the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
@ -3218,23 +3226,16 @@ static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priori
e->need_process_child = false;
/*
So, this is ugly. We iteratively invoke waitid() with P_PID
+ WNOHANG for each PID we wait for, instead of using
P_ALL. This is because we only want to get child
information of very specific child processes, and not all
of them. We might not have processed the SIGCHLD even of a
previous invocation and we don't want to maintain a
unbounded *per-child* event queue, hence we really don't
want anything flushed out of the kernel's queue that we
don't care about. Since this is O(n) this means that if you
have a lot of processes you probably want to handle SIGCHLD
yourself.
We do not reap the children here (by using WNOWAIT), this
is only done after the event source is dispatched so that
the callback still sees the process as a zombie.
*/
/* So, this is ugly. We iteratively invoke waitid() with P_PID + WNOHANG for each PID we wait
* for, instead of using P_ALL. This is because we only want to get child information of very
* specific child processes, and not all of them. We might not have processed the SIGCHLD event
* of a previous invocation and we don't want to maintain a unbounded *per-child* event queue,
* hence we really don't want anything flushed out of the kernel's queue that we don't care
* about. Since this is O(n) this means that if you have a lot of processes you probably want
* to handle SIGCHLD yourself.
*
* We do not reap the children here (by using WNOWAIT), this is only done after the event
* source is dispatched so that the callback still sees the process as a zombie. */
HASHMAP_FOREACH(s, e->child_sources) {
assert(s->type == SOURCE_CHILD);
@ -3251,7 +3252,9 @@ static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priori
if (s->child.exited)
continue;
if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
if (EVENT_SOURCE_WATCH_PIDFD(s))
/* There's a usable pidfd known for this event source? Then don't waitid() for
* it here */
continue;
zero(s->child.siginfo);
@ -3266,10 +3269,9 @@ static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priori
s->child.exited = true;
if (!zombie && (s->child.options & WEXITED)) {
/* If the child isn't dead then let's
* immediately remove the state change
* from the queue, since there's no
* benefit in leaving it queued */
/* If the child isn't dead then let's immediately remove the state
* change from the queue, since there's no benefit in leaving it
* queued. */
assert(s->child.options & (WSTOPPED|WCONTINUED));
(void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
@ -3324,19 +3326,16 @@ static int process_signal(sd_event *e, struct signal_data *d, uint32_t events, i
assert_return(events == EPOLLIN, -EIO);
assert(min_priority);
/* If there's a signal queued on this priority and SIGCHLD is
on this priority too, then make sure to recheck the
children we watch. This is because we only ever dequeue
the first signal per priority, and if we dequeue one, and
SIGCHLD might be enqueued later we wouldn't know, but we
might have higher priority children we care about hence we
need to check that explicitly. */
/* If there's a signal queued on this priority and SIGCHLD is on this priority too, then make
* sure to recheck the children we watch. This is because we only ever dequeue the first signal
* per priority, and if we dequeue one, and SIGCHLD might be enqueued later we wouldn't know,
* but we might have higher priority children we care about hence we need to check that
* explicitly. */
if (sigismember(&d->sigset, SIGCHLD))
e->need_process_child = true;
/* If there's already an event source pending for this
* priority we don't read another */
/* If there's already an event source pending for this priority we don't read another */
if (d->current)
return 0;

View File

@ -151,6 +151,7 @@ typedef enum WorkerState {
typedef struct Worker {
Manager *manager;
pid_t pid;
sd_event_source *child_event_source;
sd_device_monitor *monitor;
WorkerState state;
Event *event;
@ -160,6 +161,7 @@ typedef struct Worker {
typedef enum EventResult {
EVENT_RESULT_NERRNO_MIN = -ERRNO_MAX,
EVENT_RESULT_NERRNO_MAX = -1,
EVENT_RESULT_SUCCESS = 0,
EVENT_RESULT_EXIT_STATUS_BASE = 0,
EVENT_RESULT_EXIT_STATUS_MAX = 255,
EVENT_RESULT_TRY_AGAIN = 256, /* when the block device is locked by another process. */
@ -202,9 +204,10 @@ static Worker *worker_free(Worker *worker) {
if (!worker)
return NULL;
assert(worker->manager);
if (worker->manager)
hashmap_remove(worker->manager->workers, PID_TO_PTR(worker->pid));
hashmap_remove(worker->manager->workers, PID_TO_PTR(worker->pid));
sd_event_source_unref(worker->child_event_source);
sd_device_monitor_unref(worker->monitor);
event_free(worker->event);
@ -255,6 +258,8 @@ static Manager* manager_free(Manager *manager) {
DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
static int on_sigchld(sd_event_source *s, const siginfo_t *si, void *userdata);
static int worker_new(Worker **ret, Manager *manager, sd_device_monitor *worker_monitor, pid_t pid) {
_cleanup_(worker_freep) Worker *worker = NULL;
int r;
@ -272,17 +277,21 @@ static int worker_new(Worker **ret, Manager *manager, sd_device_monitor *worker_
return -ENOMEM;
*worker = (Worker) {
.manager = manager,
.monitor = sd_device_monitor_ref(worker_monitor),
.pid = pid,
};
r = sd_event_add_child(manager->event, &worker->child_event_source, pid, WEXITED, on_sigchld, worker);
if (r < 0)
return r;
r = hashmap_ensure_put(&manager->workers, &worker_hash_op, PID_TO_PTR(pid), worker);
if (r < 0)
return r;
*ret = TAKE_PTR(worker);
worker->manager = manager;
*ret = TAKE_PTR(worker);
return 0;
}
@ -363,7 +372,7 @@ static int on_kill_workers_event(sd_event_source *s, uint64_t usec, void *userda
return 1;
}
static void device_broadcast(sd_device_monitor *monitor, sd_device *dev, int result) {
static void device_broadcast(sd_device_monitor *monitor, sd_device *dev, EventResult result) {
int r;
assert(dev);
@ -372,7 +381,7 @@ static void device_broadcast(sd_device_monitor *monitor, sd_device *dev, int res
if (!monitor)
return;
if (result != 0) {
if (result != EVENT_RESULT_SUCCESS) {
(void) device_add_property(dev, "UDEV_WORKER_FAILED", "1");
switch (result) {
@ -415,7 +424,7 @@ static void device_broadcast(sd_device_monitor *monitor, sd_device *dev, int res
"Failed to broadcast event to libudev listeners, ignoring: %m");
}
static int worker_send_result(Manager *manager, int result) {
static int worker_send_result(Manager *manager, EventResult result) {
assert(manager);
assert(manager->worker_watch[WRITE_END] >= 0);
@ -1200,7 +1209,7 @@ static int on_worker(sd_event_source *s, int fd, uint32_t revents, void *userdat
assert(manager);
for (;;) {
int result;
EventResult result;
struct iovec iovec = IOVEC_MAKE(&result, sizeof(result));
CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control;
struct msghdr msghdr = {
@ -1543,59 +1552,47 @@ static int on_sighup(sd_event_source *s, const struct signalfd_siginfo *si, void
return 1;
}
static int on_sigchld(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
Manager *manager = userdata;
static int on_sigchld(sd_event_source *s, const siginfo_t *si, void *userdata) {
Worker *worker = ASSERT_PTR(userdata);
Manager *manager = ASSERT_PTR(worker->manager);
sd_device *dev = worker->event ? ASSERT_PTR(worker->event->dev) : NULL;
EventResult result;
int r;
assert(manager);
assert(si);
for (;;) {
pid_t pid;
int status;
Worker *worker;
switch (si->si_code) {
case CLD_EXITED:
if (si->si_status == 0)
log_device_debug(dev, "Worker ["PID_FMT"] exited.", si->si_pid);
else
log_device_warning(dev, "Worker ["PID_FMT"] exited with return code %i.",
si->si_pid, si->si_status);
result = EVENT_RESULT_EXIT_STATUS_BASE + si->si_status;
break;
pid = waitpid(-1, &status, WNOHANG);
if (pid <= 0)
break;
case CLD_KILLED:
case CLD_DUMPED:
log_device_warning(dev, "Worker ["PID_FMT"] terminated by signal %i (%s).",
si->si_pid, si->si_status, signal_to_string(si->si_status));
result = EVENT_RESULT_SIGNAL_BASE + si->si_status;
break;
worker = hashmap_get(manager->workers, PID_TO_PTR(pid));
if (!worker) {
log_warning("Worker ["PID_FMT"] is unknown, ignoring", pid);
continue;
}
if (WIFEXITED(status)) {
if (WEXITSTATUS(status) == 0)
log_debug("Worker ["PID_FMT"] exited", pid);
else
log_warning("Worker ["PID_FMT"] exited with return code %i", pid, WEXITSTATUS(status));
} else if (WIFSIGNALED(status))
log_warning("Worker ["PID_FMT"] terminated by signal %i (%s)", pid, WTERMSIG(status), signal_to_string(WTERMSIG(status)));
else if (WIFSTOPPED(status)) {
log_info("Worker ["PID_FMT"] stopped", pid);
continue;
} else if (WIFCONTINUED(status)) {
log_info("Worker ["PID_FMT"] continued", pid);
continue;
} else
log_warning("Worker ["PID_FMT"] exit with status 0x%04x", pid, status);
if ((!WIFEXITED(status) || WEXITSTATUS(status) != 0) && worker->event) {
log_device_error(worker->event->dev, "Worker ["PID_FMT"] failed", pid);
/* delete state from disk */
device_delete_db(worker->event->dev);
device_tag_index(worker->event->dev, NULL, false);
/* Forward kernel event to libudev listeners */
device_broadcast(manager->monitor, worker->event->dev,
WIFEXITED(status) ? EVENT_RESULT_EXIT_STATUS_BASE + WEXITSTATUS(status):
WIFSIGNALED(status) ? EVENT_RESULT_SIGNAL_BASE + WTERMSIG(status) : 0);
}
worker_free(worker);
default:
assert_not_reached();
}
if (result != EVENT_RESULT_SUCCESS && dev) {
/* delete state from disk */
device_delete_db(dev);
device_tag_index(dev, NULL, false);
/* Forward kernel event to libudev listeners */
device_broadcast(manager->monitor, dev, result);
}
worker_free(worker);
/* we can start new workers, try to schedule events */
event_queue_start(manager);
@ -2017,10 +2014,6 @@ static int main_loop(Manager *manager) {
if (r < 0)
return log_error_errno(r, "Failed to create SIGHUP event source: %m");
r = sd_event_add_signal(manager->event, NULL, SIGCHLD, on_sigchld, manager);
if (r < 0)
return log_error_errno(r, "Failed to create SIGCHLD event source: %m");
r = sd_event_set_watchdog(manager->event, true);
if (r < 0)
return log_error_errno(r, "Failed to create watchdog event source: %m");