1
0
mirror of https://github.com/systemd/systemd synced 2026-04-26 00:45:09 +02:00

Compare commits

..

No commits in common. "87d44b6e1a190a82c53df1982754f04cb6a00f0b" and "5e9f594038cfac43771256af47b459855fc02485" have entirely different histories.

2 changed files with 97 additions and 89 deletions

View File

@ -706,9 +706,6 @@ static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig
return; return;
} }
if (event_pid_changed(e))
return;
assert(d->fd >= 0); assert(d->fd >= 0);
if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0) if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
@ -854,9 +851,6 @@ static void source_disconnect(sd_event_source *s) {
break; break;
case SOURCE_CHILD: case SOURCE_CHILD:
if (event_pid_changed(s->event))
s->child.process_owned = false;
if (s->child.pid > 0) { if (s->child.pid > 0) {
if (event_source_is_online(s)) { if (event_source_is_online(s)) {
assert(s->event->n_online_child_sources > 0); assert(s->event->n_online_child_sources > 0);
@ -1432,6 +1426,7 @@ _public_ int sd_event_add_child(
return -ENOMEM; return -ENOMEM;
s->wakeup = WAKEUP_EVENT_SOURCE; s->wakeup = WAKEUP_EVENT_SOURCE;
s->child.pid = pid;
s->child.options = options; s->child.options = options;
s->child.callback = callback; s->child.callback = callback;
s->userdata = userdata; s->userdata = userdata;
@ -1441,7 +1436,7 @@ _public_ int sd_event_add_child(
* pin the PID, and make regular waitid() handling race-free. */ * pin the PID, and make regular waitid() handling race-free. */
if (shall_use_pidfd()) { if (shall_use_pidfd()) {
s->child.pidfd = pidfd_open(pid, 0); s->child.pidfd = pidfd_open(s->child.pid, 0);
if (s->child.pidfd < 0) { if (s->child.pidfd < 0) {
/* Propagate errors unless the syscall is not supported or blocked */ /* Propagate errors unless the syscall is not supported or blocked */
if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
@ -1451,6 +1446,10 @@ _public_ int sd_event_add_child(
} else } else
s->child.pidfd = -1; s->child.pidfd = -1;
r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
if (r < 0)
return r;
if (EVENT_SOURCE_WATCH_PIDFD(s)) { if (EVENT_SOURCE_WATCH_PIDFD(s)) {
/* We have a pidfd and we only want to watch for exit */ /* We have a pidfd and we only want to watch for exit */
r = source_child_pidfd_register(s, s->enabled); r = source_child_pidfd_register(s, s->enabled);
@ -1466,12 +1465,6 @@ _public_ int sd_event_add_child(
e->need_process_child = true; e->need_process_child = true;
} }
r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
if (r < 0)
return r;
/* These must be done after everything succeeds. */
s->child.pid = pid;
e->n_online_child_sources++; e->n_online_child_sources++;
if (ret) if (ret)
@ -1698,8 +1691,7 @@ static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
assert_se(hashmap_remove(e->inotify_data, &d->priority) == d); assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
if (d->fd >= 0) { if (d->fd >= 0) {
if (!event_pid_changed(e) && if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m"); log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
safe_close(d->fd); safe_close(d->fd);
@ -1809,7 +1801,7 @@ static void event_free_inode_data(
if (d->inotify_data) { if (d->inotify_data) {
if (d->wd >= 0) { if (d->wd >= 0) {
if (d->inotify_data->fd >= 0 && !event_pid_changed(e)) { if (d->inotify_data->fd >= 0) {
/* So here's a problem. At the time this runs the watch descriptor might already be /* So here's a problem. At the time this runs the watch descriptor might already be
* invalidated, because an IN_IGNORED event might be queued right the moment we enter * invalidated, because an IN_IGNORED event might be queued right the moment we enter
* the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
@ -3226,16 +3218,23 @@ static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priori
e->need_process_child = false; e->need_process_child = false;
/* So, this is ugly. We iteratively invoke waitid() with P_PID + WNOHANG for each PID we wait /*
* for, instead of using P_ALL. This is because we only want to get child information of very So, this is ugly. We iteratively invoke waitid() with P_PID
* specific child processes, and not all of them. We might not have processed the SIGCHLD event + WNOHANG for each PID we wait for, instead of using
* of a previous invocation and we don't want to maintain a unbounded *per-child* event queue, P_ALL. This is because we only want to get child
* hence we really don't want anything flushed out of the kernel's queue that we don't care information of very specific child processes, and not all
* about. Since this is O(n) this means that if you have a lot of processes you probably want of them. We might not have processed the SIGCHLD even of a
* to handle SIGCHLD yourself. previous invocation and we don't want to maintain a
* unbounded *per-child* event queue, hence we really don't
* We do not reap the children here (by using WNOWAIT), this is only done after the event want anything flushed out of the kernel's queue that we
* source is dispatched so that the callback still sees the process as a zombie. */ don't care about. Since this is O(n) this means that if you
have a lot of processes you probably want to handle SIGCHLD
yourself.
We do not reap the children here (by using WNOWAIT), this
is only done after the event source is dispatched so that
the callback still sees the process as a zombie.
*/
HASHMAP_FOREACH(s, e->child_sources) { HASHMAP_FOREACH(s, e->child_sources) {
assert(s->type == SOURCE_CHILD); assert(s->type == SOURCE_CHILD);
@ -3252,9 +3251,7 @@ static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priori
if (s->child.exited) if (s->child.exited)
continue; continue;
if (EVENT_SOURCE_WATCH_PIDFD(s)) if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
/* There's a usable pidfd known for this event source? Then don't waitid() for
* it here */
continue; continue;
zero(s->child.siginfo); zero(s->child.siginfo);
@ -3269,9 +3266,10 @@ static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priori
s->child.exited = true; s->child.exited = true;
if (!zombie && (s->child.options & WEXITED)) { if (!zombie && (s->child.options & WEXITED)) {
/* If the child isn't dead then let's immediately remove the state /* If the child isn't dead then let's
* change from the queue, since there's no benefit in leaving it * immediately remove the state change
* queued. */ * from the queue, since there's no
* benefit in leaving it queued */
assert(s->child.options & (WSTOPPED|WCONTINUED)); assert(s->child.options & (WSTOPPED|WCONTINUED));
(void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED))); (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
@ -3326,16 +3324,19 @@ static int process_signal(sd_event *e, struct signal_data *d, uint32_t events, i
assert_return(events == EPOLLIN, -EIO); assert_return(events == EPOLLIN, -EIO);
assert(min_priority); assert(min_priority);
/* If there's a signal queued on this priority and SIGCHLD is on this priority too, then make /* If there's a signal queued on this priority and SIGCHLD is
* sure to recheck the children we watch. This is because we only ever dequeue the first signal on this priority too, then make sure to recheck the
* per priority, and if we dequeue one, and SIGCHLD might be enqueued later we wouldn't know, children we watch. This is because we only ever dequeue
* but we might have higher priority children we care about hence we need to check that the first signal per priority, and if we dequeue one, and
* explicitly. */ SIGCHLD might be enqueued later we wouldn't know, but we
might have higher priority children we care about hence we
need to check that explicitly. */
if (sigismember(&d->sigset, SIGCHLD)) if (sigismember(&d->sigset, SIGCHLD))
e->need_process_child = true; e->need_process_child = true;
/* If there's already an event source pending for this priority we don't read another */ /* If there's already an event source pending for this
* priority we don't read another */
if (d->current) if (d->current)
return 0; return 0;

View File

@ -151,7 +151,6 @@ typedef enum WorkerState {
typedef struct Worker { typedef struct Worker {
Manager *manager; Manager *manager;
pid_t pid; pid_t pid;
sd_event_source *child_event_source;
sd_device_monitor *monitor; sd_device_monitor *monitor;
WorkerState state; WorkerState state;
Event *event; Event *event;
@ -161,7 +160,6 @@ typedef struct Worker {
typedef enum EventResult { typedef enum EventResult {
EVENT_RESULT_NERRNO_MIN = -ERRNO_MAX, EVENT_RESULT_NERRNO_MIN = -ERRNO_MAX,
EVENT_RESULT_NERRNO_MAX = -1, EVENT_RESULT_NERRNO_MAX = -1,
EVENT_RESULT_SUCCESS = 0,
EVENT_RESULT_EXIT_STATUS_BASE = 0, EVENT_RESULT_EXIT_STATUS_BASE = 0,
EVENT_RESULT_EXIT_STATUS_MAX = 255, EVENT_RESULT_EXIT_STATUS_MAX = 255,
EVENT_RESULT_TRY_AGAIN = 256, /* when the block device is locked by another process. */ EVENT_RESULT_TRY_AGAIN = 256, /* when the block device is locked by another process. */
@ -204,10 +202,9 @@ static Worker *worker_free(Worker *worker) {
if (!worker) if (!worker)
return NULL; return NULL;
if (worker->manager) assert(worker->manager);
hashmap_remove(worker->manager->workers, PID_TO_PTR(worker->pid));
sd_event_source_unref(worker->child_event_source); hashmap_remove(worker->manager->workers, PID_TO_PTR(worker->pid));
sd_device_monitor_unref(worker->monitor); sd_device_monitor_unref(worker->monitor);
event_free(worker->event); event_free(worker->event);
@ -258,8 +255,6 @@ static Manager* manager_free(Manager *manager) {
DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free); DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
static int on_sigchld(sd_event_source *s, const siginfo_t *si, void *userdata);
static int worker_new(Worker **ret, Manager *manager, sd_device_monitor *worker_monitor, pid_t pid) { static int worker_new(Worker **ret, Manager *manager, sd_device_monitor *worker_monitor, pid_t pid) {
_cleanup_(worker_freep) Worker *worker = NULL; _cleanup_(worker_freep) Worker *worker = NULL;
int r; int r;
@ -277,21 +272,17 @@ static int worker_new(Worker **ret, Manager *manager, sd_device_monitor *worker_
return -ENOMEM; return -ENOMEM;
*worker = (Worker) { *worker = (Worker) {
.manager = manager,
.monitor = sd_device_monitor_ref(worker_monitor), .monitor = sd_device_monitor_ref(worker_monitor),
.pid = pid, .pid = pid,
}; };
r = sd_event_add_child(manager->event, &worker->child_event_source, pid, WEXITED, on_sigchld, worker);
if (r < 0)
return r;
r = hashmap_ensure_put(&manager->workers, &worker_hash_op, PID_TO_PTR(pid), worker); r = hashmap_ensure_put(&manager->workers, &worker_hash_op, PID_TO_PTR(pid), worker);
if (r < 0) if (r < 0)
return r; return r;
worker->manager = manager;
*ret = TAKE_PTR(worker); *ret = TAKE_PTR(worker);
return 0; return 0;
} }
@ -372,7 +363,7 @@ static int on_kill_workers_event(sd_event_source *s, uint64_t usec, void *userda
return 1; return 1;
} }
static void device_broadcast(sd_device_monitor *monitor, sd_device *dev, EventResult result) { static void device_broadcast(sd_device_monitor *monitor, sd_device *dev, int result) {
int r; int r;
assert(dev); assert(dev);
@ -381,7 +372,7 @@ static void device_broadcast(sd_device_monitor *monitor, sd_device *dev, EventRe
if (!monitor) if (!monitor)
return; return;
if (result != EVENT_RESULT_SUCCESS) { if (result != 0) {
(void) device_add_property(dev, "UDEV_WORKER_FAILED", "1"); (void) device_add_property(dev, "UDEV_WORKER_FAILED", "1");
switch (result) { switch (result) {
@ -424,7 +415,7 @@ static void device_broadcast(sd_device_monitor *monitor, sd_device *dev, EventRe
"Failed to broadcast event to libudev listeners, ignoring: %m"); "Failed to broadcast event to libudev listeners, ignoring: %m");
} }
static int worker_send_result(Manager *manager, EventResult result) { static int worker_send_result(Manager *manager, int result) {
assert(manager); assert(manager);
assert(manager->worker_watch[WRITE_END] >= 0); assert(manager->worker_watch[WRITE_END] >= 0);
@ -1209,7 +1200,7 @@ static int on_worker(sd_event_source *s, int fd, uint32_t revents, void *userdat
assert(manager); assert(manager);
for (;;) { for (;;) {
EventResult result; int result;
struct iovec iovec = IOVEC_MAKE(&result, sizeof(result)); struct iovec iovec = IOVEC_MAKE(&result, sizeof(result));
CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control; CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control;
struct msghdr msghdr = { struct msghdr msghdr = {
@ -1552,47 +1543,59 @@ static int on_sighup(sd_event_source *s, const struct signalfd_siginfo *si, void
return 1; return 1;
} }
static int on_sigchld(sd_event_source *s, const siginfo_t *si, void *userdata) { static int on_sigchld(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
Worker *worker = ASSERT_PTR(userdata); Manager *manager = userdata;
Manager *manager = ASSERT_PTR(worker->manager);
sd_device *dev = worker->event ? ASSERT_PTR(worker->event->dev) : NULL;
EventResult result;
int r; int r;
assert(si); assert(manager);
switch (si->si_code) { for (;;) {
case CLD_EXITED: pid_t pid;
if (si->si_status == 0) int status;
log_device_debug(dev, "Worker ["PID_FMT"] exited.", si->si_pid); Worker *worker;
else
log_device_warning(dev, "Worker ["PID_FMT"] exited with return code %i.",
si->si_pid, si->si_status);
result = EVENT_RESULT_EXIT_STATUS_BASE + si->si_status;
break;
case CLD_KILLED: pid = waitpid(-1, &status, WNOHANG);
case CLD_DUMPED: if (pid <= 0)
log_device_warning(dev, "Worker ["PID_FMT"] terminated by signal %i (%s).", break;
si->si_pid, si->si_status, signal_to_string(si->si_status));
result = EVENT_RESULT_SIGNAL_BASE + si->si_status;
break;
default: worker = hashmap_get(manager->workers, PID_TO_PTR(pid));
assert_not_reached(); if (!worker) {
log_warning("Worker ["PID_FMT"] is unknown, ignoring", pid);
continue;
}
if (WIFEXITED(status)) {
if (WEXITSTATUS(status) == 0)
log_debug("Worker ["PID_FMT"] exited", pid);
else
log_warning("Worker ["PID_FMT"] exited with return code %i", pid, WEXITSTATUS(status));
} else if (WIFSIGNALED(status))
log_warning("Worker ["PID_FMT"] terminated by signal %i (%s)", pid, WTERMSIG(status), signal_to_string(WTERMSIG(status)));
else if (WIFSTOPPED(status)) {
log_info("Worker ["PID_FMT"] stopped", pid);
continue;
} else if (WIFCONTINUED(status)) {
log_info("Worker ["PID_FMT"] continued", pid);
continue;
} else
log_warning("Worker ["PID_FMT"] exit with status 0x%04x", pid, status);
if ((!WIFEXITED(status) || WEXITSTATUS(status) != 0) && worker->event) {
log_device_error(worker->event->dev, "Worker ["PID_FMT"] failed", pid);
/* delete state from disk */
device_delete_db(worker->event->dev);
device_tag_index(worker->event->dev, NULL, false);
/* Forward kernel event to libudev listeners */
device_broadcast(manager->monitor, worker->event->dev,
WIFEXITED(status) ? EVENT_RESULT_EXIT_STATUS_BASE + WEXITSTATUS(status):
WIFSIGNALED(status) ? EVENT_RESULT_SIGNAL_BASE + WTERMSIG(status) : 0);
}
worker_free(worker);
} }
if (result != EVENT_RESULT_SUCCESS && dev) {
/* delete state from disk */
device_delete_db(dev);
device_tag_index(dev, NULL, false);
/* Forward kernel event to libudev listeners */
device_broadcast(manager->monitor, dev, result);
}
worker_free(worker);
/* we can start new workers, try to schedule events */ /* we can start new workers, try to schedule events */
event_queue_start(manager); event_queue_start(manager);
@ -2014,6 +2017,10 @@ static int main_loop(Manager *manager) {
if (r < 0) if (r < 0)
return log_error_errno(r, "Failed to create SIGHUP event source: %m"); return log_error_errno(r, "Failed to create SIGHUP event source: %m");
r = sd_event_add_signal(manager->event, NULL, SIGCHLD, on_sigchld, manager);
if (r < 0)
return log_error_errno(r, "Failed to create SIGCHLD event source: %m");
r = sd_event_set_watchdog(manager->event, true); r = sd_event_set_watchdog(manager->event, true);
if (r < 0) if (r < 0)
return log_error_errno(r, "Failed to create watchdog event source: %m"); return log_error_errno(r, "Failed to create watchdog event source: %m");