2026-04-26 00:45:09 +02:00
2 changed files with 97 additions and 89 deletions
--- a/src/libsystemd/sd-event/sd-event.c
+++ b/src/libsystemd/sd-event/sd-event.c
@ -706,9 +706,6 @@ static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig
                return;
        }

-        if (event_pid_changed(e))
-                return;
-
        assert(d->fd >= 0);

        if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
@ -854,9 +851,6 @@ static void source_disconnect(sd_event_source *s) {
                break;

        case SOURCE_CHILD:
-                if (event_pid_changed(s->event))
-                        s->child.process_owned = false;
-
                if (s->child.pid > 0) {
                        if (event_source_is_online(s)) {
                                assert(s->event->n_online_child_sources > 0);
@ -1432,6 +1426,7 @@ _public_ int sd_event_add_child(
                return -ENOMEM;

        s->wakeup = WAKEUP_EVENT_SOURCE;
+        s->child.pid = pid;
        s->child.options = options;
        s->child.callback = callback;
        s->userdata = userdata;
@ -1441,7 +1436,7 @@ _public_ int sd_event_add_child(
         * pin the PID, and make regular waitid() handling race-free. */

        if (shall_use_pidfd()) {
-                s->child.pidfd = pidfd_open(pid, 0);
+                s->child.pidfd = pidfd_open(s->child.pid, 0);
                if (s->child.pidfd < 0) {
                        /* Propagate errors unless the syscall is not supported or blocked */
                        if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
@ -1451,6 +1446,10 @@ _public_ int sd_event_add_child(
        } else
                s->child.pidfd = -1;

+        r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
+        if (r < 0)
+                return r;
+
        if (EVENT_SOURCE_WATCH_PIDFD(s)) {
                /* We have a pidfd and we only want to watch for exit */
                r = source_child_pidfd_register(s, s->enabled);
@ -1466,12 +1465,6 @@ _public_ int sd_event_add_child(
                e->need_process_child = true;
        }

-        r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
-        if (r < 0)
-                return r;
-
-        /* These must be done after everything succeeds. */
-        s->child.pid = pid;
        e->n_online_child_sources++;

        if (ret)
@ -1698,8 +1691,7 @@ static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
        assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);

        if (d->fd >= 0) {
-                if (!event_pid_changed(e) &&
-                    epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
+                if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
                        log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");

                safe_close(d->fd);
@ -1809,7 +1801,7 @@ static void event_free_inode_data(
        if (d->inotify_data) {

                if (d->wd >= 0) {
-                        if (d->inotify_data->fd >= 0 && !event_pid_changed(e)) {
+                        if (d->inotify_data->fd >= 0) {
                                /* So here's a problem. At the time this runs the watch descriptor might already be
                                 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
                                 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
@ -3226,16 +3218,23 @@ static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priori

        e->need_process_child = false;

-        /* So, this is ugly. We iteratively invoke waitid() with P_PID + WNOHANG for each PID we wait
-         * for, instead of using P_ALL. This is because we only want to get child information of very
-         * specific child processes, and not all of them. We might not have processed the SIGCHLD event
-         * of a previous invocation and we don't want to maintain a unbounded *per-child* event queue,
-         * hence we really don't want anything flushed out of the kernel's queue that we don't care
-         * about. Since this is O(n) this means that if you have a lot of processes you probably want
-         * to handle SIGCHLD yourself.
-         *
-         * We do not reap the children here (by using WNOWAIT), this is only done after the event
-         * source is dispatched so that the callback still sees the process as a zombie. */
+        /*
+           So, this is ugly. We iteratively invoke waitid() with P_PID
+           + WNOHANG for each PID we wait for, instead of using
+           P_ALL. This is because we only want to get child
+           information of very specific child processes, and not all
+           of them. We might not have processed the SIGCHLD even of a
+           previous invocation and we don't want to maintain a
+           unbounded *per-child* event queue, hence we really don't
+           want anything flushed out of the kernel's queue that we
+           don't care about. Since this is O(n) this means that if you
+           have a lot of processes you probably want to handle SIGCHLD
+           yourself.
+
+           We do not reap the children here (by using WNOWAIT), this
+           is only done after the event source is dispatched so that
+           the callback still sees the process as a zombie.
+        */

        HASHMAP_FOREACH(s, e->child_sources) {
                assert(s->type == SOURCE_CHILD);
@ -3252,9 +3251,7 @@ static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priori
                if (s->child.exited)
                        continue;

-                if (EVENT_SOURCE_WATCH_PIDFD(s))
-                        /* There's a usable pidfd known for this event source? Then don't waitid() for
-                         * it here */
+                if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
                        continue;

                zero(s->child.siginfo);
@ -3269,9 +3266,10 @@ static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priori
                                s->child.exited = true;

                        if (!zombie && (s->child.options & WEXITED)) {
-                                /* If the child isn't dead then let's immediately remove the state
-                                 * change from the queue, since there's no benefit in leaving it
-                                 * queued. */
+                                /* If the child isn't dead then let's
+                                 * immediately remove the state change
+                                 * from the queue, since there's no
+                                 * benefit in leaving it queued */

                                assert(s->child.options & (WSTOPPED|WCONTINUED));
                                (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
@ -3326,16 +3324,19 @@ static int process_signal(sd_event *e, struct signal_data *d, uint32_t events, i
        assert_return(events == EPOLLIN, -EIO);
        assert(min_priority);

-        /* If there's a signal queued on this priority and SIGCHLD is on this priority too, then make
-         * sure to recheck the children we watch. This is because we only ever dequeue the first signal
-         * per priority, and if we dequeue one, and SIGCHLD might be enqueued later we wouldn't know,
-         * but we might have higher priority children we care about hence we need to check that
-         * explicitly. */
+        /* If there's a signal queued on this priority and SIGCHLD is
+           on this priority too, then make sure to recheck the
+           children we watch. This is because we only ever dequeue
+           the first signal per priority, and if we dequeue one, and
+           SIGCHLD might be enqueued later we wouldn't know, but we
+           might have higher priority children we care about hence we
+           need to check that explicitly. */

        if (sigismember(&d->sigset, SIGCHLD))
                e->need_process_child = true;

-        /* If there's already an event source pending for this priority we don't read another */
+        /* If there's already an event source pending for this
+         * priority we don't read another */
        if (d->current)
                return 0;

--- a/src/udev/udevd.c
+++ b/src/udev/udevd.c
@ -151,7 +151,6 @@ typedef enum WorkerState {
 typedef struct Worker {
        Manager *manager;
        pid_t pid;
-        sd_event_source *child_event_source;
        sd_device_monitor *monitor;
        WorkerState state;
        Event *event;
@ -161,7 +160,6 @@ typedef struct Worker {
 typedef enum EventResult {
        EVENT_RESULT_NERRNO_MIN       = -ERRNO_MAX,
        EVENT_RESULT_NERRNO_MAX       = -1,
-        EVENT_RESULT_SUCCESS          = 0,
        EVENT_RESULT_EXIT_STATUS_BASE = 0,
        EVENT_RESULT_EXIT_STATUS_MAX  = 255,
        EVENT_RESULT_TRY_AGAIN        = 256, /* when the block device is locked by another process. */
@ -204,10 +202,9 @@ static Worker *worker_free(Worker *worker) {
        if (!worker)
                return NULL;

-        if (worker->manager)
-                hashmap_remove(worker->manager->workers, PID_TO_PTR(worker->pid));
+        assert(worker->manager);

-        sd_event_source_unref(worker->child_event_source);
+        hashmap_remove(worker->manager->workers, PID_TO_PTR(worker->pid));
        sd_device_monitor_unref(worker->monitor);
        event_free(worker->event);

@ -258,8 +255,6 @@ static Manager* manager_free(Manager *manager) {

 DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);

-static int on_sigchld(sd_event_source *s, const siginfo_t *si, void *userdata);
-
 static int worker_new(Worker **ret, Manager *manager, sd_device_monitor *worker_monitor, pid_t pid) {
        _cleanup_(worker_freep) Worker *worker = NULL;
        int r;
@ -277,21 +272,17 @@ static int worker_new(Worker **ret, Manager *manager, sd_device_monitor *worker_
                return -ENOMEM;

        *worker = (Worker) {
+                .manager = manager,
                .monitor = sd_device_monitor_ref(worker_monitor),
                .pid = pid,
        };

-        r = sd_event_add_child(manager->event, &worker->child_event_source, pid, WEXITED, on_sigchld, worker);
-        if (r < 0)
-                return r;
-
        r = hashmap_ensure_put(&manager->workers, &worker_hash_op, PID_TO_PTR(pid), worker);
        if (r < 0)
                return r;

-        worker->manager = manager;
-
        *ret = TAKE_PTR(worker);
+
        return 0;
 }

@ -372,7 +363,7 @@ static int on_kill_workers_event(sd_event_source *s, uint64_t usec, void *userda
        return 1;
 }

-static void device_broadcast(sd_device_monitor *monitor, sd_device *dev, EventResult result) {
+static void device_broadcast(sd_device_monitor *monitor, sd_device *dev, int result) {
        int r;

        assert(dev);
@ -381,7 +372,7 @@ static void device_broadcast(sd_device_monitor *monitor, sd_device *dev, EventRe
        if (!monitor)
                return;

-        if (result != EVENT_RESULT_SUCCESS) {
+        if (result != 0) {
                (void) device_add_property(dev, "UDEV_WORKER_FAILED", "1");

                switch (result) {
@ -424,7 +415,7 @@ static void device_broadcast(sd_device_monitor *monitor, sd_device *dev, EventRe
                                         "Failed to broadcast event to libudev listeners, ignoring: %m");
 }

-static int worker_send_result(Manager *manager, EventResult result) {
+static int worker_send_result(Manager *manager, int result) {
        assert(manager);
        assert(manager->worker_watch[WRITE_END] >= 0);

@ -1209,7 +1200,7 @@ static int on_worker(sd_event_source *s, int fd, uint32_t revents, void *userdat
        assert(manager);

        for (;;) {
-                EventResult result;
+                int result;
                struct iovec iovec = IOVEC_MAKE(&result, sizeof(result));
                CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control;
                struct msghdr msghdr = {
@ -1552,47 +1543,59 @@ static int on_sighup(sd_event_source *s, const struct signalfd_siginfo *si, void
        return 1;
 }

-static int on_sigchld(sd_event_source *s, const siginfo_t *si, void *userdata) {
-        Worker *worker = ASSERT_PTR(userdata);
-        Manager *manager = ASSERT_PTR(worker->manager);
-        sd_device *dev = worker->event ? ASSERT_PTR(worker->event->dev) : NULL;
-        EventResult result;
+static int on_sigchld(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+        Manager *manager = userdata;
        int r;

-        assert(si);
+        assert(manager);

-        switch (si->si_code) {
-        case CLD_EXITED:
-                if (si->si_status == 0)
-                        log_device_debug(dev, "Worker ["PID_FMT"] exited.", si->si_pid);
-                else
-                        log_device_warning(dev, "Worker ["PID_FMT"] exited with return code %i.",
-                                           si->si_pid, si->si_status);
-                result = EVENT_RESULT_EXIT_STATUS_BASE + si->si_status;
-                break;
+        for (;;) {
+                pid_t pid;
+                int status;
+                Worker *worker;

-        case CLD_KILLED:
-        case CLD_DUMPED:
-                log_device_warning(dev, "Worker ["PID_FMT"] terminated by signal %i (%s).",
-                                   si->si_pid, si->si_status, signal_to_string(si->si_status));
-                result = EVENT_RESULT_SIGNAL_BASE + si->si_status;
-                break;
+                pid = waitpid(-1, &status, WNOHANG);
+                if (pid <= 0)
+                        break;

-        default:
-                assert_not_reached();
+                worker = hashmap_get(manager->workers, PID_TO_PTR(pid));
+                if (!worker) {
+                        log_warning("Worker ["PID_FMT"] is unknown, ignoring", pid);
+                        continue;
+                }
+
+                if (WIFEXITED(status)) {
+                        if (WEXITSTATUS(status) == 0)
+                                log_debug("Worker ["PID_FMT"] exited", pid);
+                        else
+                                log_warning("Worker ["PID_FMT"] exited with return code %i", pid, WEXITSTATUS(status));
+                } else if (WIFSIGNALED(status))
+                        log_warning("Worker ["PID_FMT"] terminated by signal %i (%s)", pid, WTERMSIG(status), signal_to_string(WTERMSIG(status)));
+                else if (WIFSTOPPED(status)) {
+                        log_info("Worker ["PID_FMT"] stopped", pid);
+                        continue;
+                } else if (WIFCONTINUED(status)) {
+                        log_info("Worker ["PID_FMT"] continued", pid);
+                        continue;
+                } else
+                        log_warning("Worker ["PID_FMT"] exit with status 0x%04x", pid, status);
+
+                if ((!WIFEXITED(status) || WEXITSTATUS(status) != 0) && worker->event) {
+                        log_device_error(worker->event->dev, "Worker ["PID_FMT"] failed", pid);
+
+                        /* delete state from disk */
+                        device_delete_db(worker->event->dev);
+                        device_tag_index(worker->event->dev, NULL, false);
+
+                        /* Forward kernel event to libudev listeners */
+                        device_broadcast(manager->monitor, worker->event->dev,
+                                         WIFEXITED(status) ? EVENT_RESULT_EXIT_STATUS_BASE + WEXITSTATUS(status):
+                                         WIFSIGNALED(status) ? EVENT_RESULT_SIGNAL_BASE + WTERMSIG(status) : 0);
+                }
+
+                worker_free(worker);
        }

-        if (result != EVENT_RESULT_SUCCESS && dev) {
-                /* delete state from disk */
-                device_delete_db(dev);
-                device_tag_index(dev, NULL, false);
-
-                /* Forward kernel event to libudev listeners */
-                device_broadcast(manager->monitor, dev, result);
-        }
-
-        worker_free(worker);
-
        /* we can start new workers, try to schedule events */
        event_queue_start(manager);

@ -2014,6 +2017,10 @@ static int main_loop(Manager *manager) {
        if (r < 0)
                return log_error_errno(r, "Failed to create SIGHUP event source: %m");

+        r = sd_event_add_signal(manager->event, NULL, SIGCHLD, on_sigchld, manager);
+        if (r < 0)
+                return log_error_errno(r, "Failed to create SIGCHLD event source: %m");
+
        r = sd_event_set_watchdog(manager->event, true);
        if (r < 0)
                return log_error_errno(r, "Failed to create watchdog event source: %m");