ccfeeef59d
Backport the patches for this issue: https://bugzilla.redhat.com/show_bug.cgi?id=1819868 We met such an issue: When testing a large number of pods (> 230), occasionally observed a number of issues related to systemd process: systemd ran continually 90-100% cpu usage systemd memory usage started increasing rapidly (20GB/hour) systemctl commands would always timeout (Failed to get properties: Connection timed out) sm services failed and can't recover: open-ldap, registry-token-server, docker-distribution, etcd new pods can't start, and got stuck in state ContainerCreating Those patches work to prevent excessive /proc/1/mountinfo reparsing. It has been verified that those patches can improve this performance greatly. 16 commits are listed in sequence (from [1] to [16]) at below link for the issue: https://github.com/systemd-rhel/rhel-8/pull/154/commits [16](10)core: prevent excessive /proc/self/mountinfo parsing [15][Dropped-6]test: add ratelimiting test [14](9)sd-event: add ability to ratelimit event sources [13](8)sd-event: increase n_enabled_child_sources just once [12](7)sd-event: update state at the end in event_source_enable [11](6)sd-event: remove earliest_index/latest_index into common part of event source objects [10][Dropped-5]sd-event: follow coding style with naming return parameter [9] [Dropped-4]sd-event: ref event loop while in sd_event_prepare() ot sd_event_run() [8] (5)sd-event: refuse running default event loops in any other thread than the one they are default for [7] [Dropped-3]sd-event: let's suffix last_run/last_log with "_usec" [6] [Dropped-2]sd-event: fix delays assert brain-o (#17790) [5] (4)sd-event: split out code to add/remove timer event sources to earliest/latest prioq [4] (3)sd-event: split clock data allocation out of sd_event_add_time() [3] [Dropped-1]sd-event: mention that two debug logged events are ignored [2] (2)sd-event: split out enable and disable codepaths from sd_event_source_set_enabled() [1] (1)sd-event: split out helper functions for reshuffling prioqs I ported 10 of them back (from (1) to (10)) to fix this issue and dropped the other 6 (from [Dropped-1] to [Dropped-6]) for those reasons: [Dropped-1]Only changes error log. [Dropped-2]Fixes a bug introduced in a commit which doesn't exist in this version. [Dropped-3]Only changes vars' names and there is no functional change. [Dropped-4]More commits are needed for merging it, while I don't see any help on adding the rate-limiting ability. [Dropped-5]Change coding style for a function which isn't really used by anyone. [Dropped-6]Add test cases. Closes-Bug: #1924686 Signed-off-by: Li Zhou <li.zhou@windriver.com> Change-Id: Ia4c8f162cb1a47b40d1b26cf4d604976b97e92d6
816 lines
29 KiB
Diff
816 lines
29 KiB
Diff
From 2976f3b959bef0e6f0a1f4d55d998c5d60e56b0d Mon Sep 17 00:00:00 2001
|
|
From: Lennart Poettering <lennart@poettering.net>
|
|
Date: Thu, 3 Sep 2015 20:13:09 +0200
|
|
Subject: [PATCH 02/20] sd-event: make sure RT signals are not dropped
|
|
|
|
RT signals operate in a queue, and we should be careful to never merge
|
|
two queued signals into one. Hence, makes sure we only ever dequeue a
|
|
single signal at a time and leave the remaining ones queued in the
|
|
signalfd. In order to implement correct priorities for the signals
|
|
introduce one signalfd per priority, so that we only process the highest
|
|
priority signal at a time.
|
|
|
|
[commit 9da4cb2be260ed123f2676cb85cb350c527b1492 from
|
|
https://github.com/systemd-rhel/rhel-8/]
|
|
|
|
Signed-off-by: Li Zhou <li.zhou@windriver.com>
|
|
---
|
|
src/libsystemd/sd-event/sd-event.c | 430 ++++++++++++++++++---------
|
|
src/libsystemd/sd-event/test-event.c | 66 +++-
|
|
2 files changed, 357 insertions(+), 139 deletions(-)
|
|
|
|
diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c
|
|
index a84bfbb..26ef3ea 100644
|
|
--- a/src/libsystemd/sd-event/sd-event.c
|
|
+++ b/src/libsystemd/sd-event/sd-event.c
|
|
@@ -56,9 +56,22 @@ typedef enum EventSourceType {
|
|
_SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
|
|
} EventSourceType;
|
|
|
|
+/* All objects we use in epoll events start with this value, so that
|
|
+ * we know how to dispatch it */
|
|
+typedef enum WakeupType {
|
|
+ WAKEUP_NONE,
|
|
+ WAKEUP_EVENT_SOURCE,
|
|
+ WAKEUP_CLOCK_DATA,
|
|
+ WAKEUP_SIGNAL_DATA,
|
|
+ _WAKEUP_TYPE_MAX,
|
|
+ _WAKEUP_TYPE_INVALID = -1,
|
|
+} WakeupType;
|
|
+
|
|
#define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
|
|
|
|
struct sd_event_source {
|
|
+ WakeupType wakeup;
|
|
+
|
|
unsigned n_ref;
|
|
|
|
sd_event *event;
|
|
@@ -120,6 +133,7 @@ struct sd_event_source {
|
|
};
|
|
|
|
struct clock_data {
|
|
+ WakeupType wakeup;
|
|
int fd;
|
|
|
|
/* For all clocks we maintain two priority queues each, one
|
|
@@ -136,11 +150,23 @@ struct clock_data {
|
|
bool needs_rearm:1;
|
|
};
|
|
|
|
+struct signal_data {
|
|
+ WakeupType wakeup;
|
|
+
|
|
+ /* For each priority we maintain one signal fd, so that we
|
|
+ * only have to dequeue a single event per priority at a
|
|
+ * time. */
|
|
+
|
|
+ int fd;
|
|
+ int64_t priority;
|
|
+ sigset_t sigset;
|
|
+ sd_event_source *current;
|
|
+};
|
|
+
|
|
struct sd_event {
|
|
unsigned n_ref;
|
|
|
|
int epoll_fd;
|
|
- int signal_fd;
|
|
int watchdog_fd;
|
|
|
|
Prioq *pending;
|
|
@@ -157,8 +183,8 @@ struct sd_event {
|
|
|
|
usec_t perturb;
|
|
|
|
- sigset_t sigset;
|
|
- sd_event_source **signal_sources;
|
|
+ sd_event_source **signal_sources; /* indexed by signal number */
|
|
+ Hashmap *signal_data; /* indexed by priority */
|
|
|
|
Hashmap *child_sources;
|
|
unsigned n_enabled_child_sources;
|
|
@@ -355,6 +381,7 @@ static int exit_prioq_compare(const void *a, const void *b) {
|
|
|
|
static void free_clock_data(struct clock_data *d) {
|
|
assert(d);
|
|
+ assert(d->wakeup == WAKEUP_CLOCK_DATA);
|
|
|
|
safe_close(d->fd);
|
|
prioq_free(d->earliest);
|
|
@@ -378,7 +405,6 @@ static void event_free(sd_event *e) {
|
|
*(e->default_event_ptr) = NULL;
|
|
|
|
safe_close(e->epoll_fd);
|
|
- safe_close(e->signal_fd);
|
|
safe_close(e->watchdog_fd);
|
|
|
|
free_clock_data(&e->realtime);
|
|
@@ -392,6 +418,7 @@ static void event_free(sd_event *e) {
|
|
prioq_free(e->exit);
|
|
|
|
free(e->signal_sources);
|
|
+ hashmap_free(e->signal_data);
|
|
|
|
hashmap_free(e->child_sources);
|
|
set_free(e->post_sources);
|
|
@@ -409,13 +436,12 @@ _public_ int sd_event_new(sd_event** ret) {
|
|
return -ENOMEM;
|
|
|
|
e->n_ref = 1;
|
|
- e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
|
|
+ e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
|
|
e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
|
|
+ e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA;
|
|
e->original_pid = getpid();
|
|
e->perturb = USEC_INFINITY;
|
|
|
|
- assert_se(sigemptyset(&e->sigset) == 0);
|
|
-
|
|
e->pending = prioq_new(pending_prioq_compare);
|
|
if (!e->pending) {
|
|
r = -ENOMEM;
|
|
@@ -510,7 +536,6 @@ static int source_io_register(
|
|
r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
|
|
else
|
|
r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
|
|
-
|
|
if (r < 0)
|
|
return -errno;
|
|
|
|
@@ -592,45 +617,171 @@ static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
|
|
}
|
|
}
|
|
|
|
-static bool need_signal(sd_event *e, int signal) {
|
|
- return (e->signal_sources && e->signal_sources[signal] &&
|
|
- e->signal_sources[signal]->enabled != SD_EVENT_OFF)
|
|
- ||
|
|
- (signal == SIGCHLD &&
|
|
- e->n_enabled_child_sources > 0);
|
|
-}
|
|
+static int event_make_signal_data(
|
|
+ sd_event *e,
|
|
+ int sig,
|
|
+ struct signal_data **ret) {
|
|
|
|
-static int event_update_signal_fd(sd_event *e) {
|
|
struct epoll_event ev = {};
|
|
- bool add_to_epoll;
|
|
+ struct signal_data *d;
|
|
+ bool added = false;
|
|
+ sigset_t ss_copy;
|
|
+ int64_t priority;
|
|
int r;
|
|
|
|
assert(e);
|
|
|
|
if (event_pid_changed(e))
|
|
- return 0;
|
|
+ return -ECHILD;
|
|
|
|
- add_to_epoll = e->signal_fd < 0;
|
|
+ if (e->signal_sources && e->signal_sources[sig])
|
|
+ priority = e->signal_sources[sig]->priority;
|
|
+ else
|
|
+ priority = 0;
|
|
|
|
- r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
|
|
- if (r < 0)
|
|
- return -errno;
|
|
+ d = hashmap_get(e->signal_data, &priority);
|
|
+ if (d) {
|
|
+ if (sigismember(&d->sigset, sig) > 0) {
|
|
+ if (ret)
|
|
+ *ret = d;
|
|
+ return 0;
|
|
+ }
|
|
+ } else {
|
|
+ r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
|
|
+ if (r < 0)
|
|
+ return r;
|
|
+
|
|
+ d = new0(struct signal_data, 1);
|
|
+ if (!d)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ d->wakeup = WAKEUP_SIGNAL_DATA;
|
|
+ d->fd = -1;
|
|
+ d->priority = priority;
|
|
+
|
|
+ r = hashmap_put(e->signal_data, &d->priority, d);
|
|
+ if (r < 0)
|
|
+ return r;
|
|
|
|
- e->signal_fd = r;
|
|
+ added = true;
|
|
+ }
|
|
+
|
|
+ ss_copy = d->sigset;
|
|
+ assert_se(sigaddset(&ss_copy, sig) >= 0);
|
|
+
|
|
+ r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
|
|
+ if (r < 0) {
|
|
+ r = -errno;
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ d->sigset = ss_copy;
|
|
|
|
- if (!add_to_epoll)
|
|
+ if (d->fd >= 0) {
|
|
+ if (ret)
|
|
+ *ret = d;
|
|
return 0;
|
|
+ }
|
|
+
|
|
+ d->fd = r;
|
|
|
|
ev.events = EPOLLIN;
|
|
- ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
|
|
+ ev.data.ptr = d;
|
|
|
|
- r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
|
|
- if (r < 0) {
|
|
- e->signal_fd = safe_close(e->signal_fd);
|
|
- return -errno;
|
|
+ r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
|
|
+ if (r < 0) {
|
|
+ r = -errno;
|
|
+ goto fail;
|
|
}
|
|
|
|
+ if (ret)
|
|
+ *ret = d;
|
|
+
|
|
return 0;
|
|
+
|
|
+fail:
|
|
+ if (added) {
|
|
+ d->fd = safe_close(d->fd);
|
|
+ hashmap_remove(e->signal_data, &d->priority);
|
|
+ free(d);
|
|
+ }
|
|
+
|
|
+ return r;
|
|
+}
|
|
+
|
|
+static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
|
|
+ assert(e);
|
|
+ assert(d);
|
|
+
|
|
+ /* Turns off the specified signal in the signal data
|
|
+ * object. If the signal mask of the object becomes empty that
|
|
+ * way removes it. */
|
|
+
|
|
+ if (sigismember(&d->sigset, sig) == 0)
|
|
+ return;
|
|
+
|
|
+ assert_se(sigdelset(&d->sigset, sig) >= 0);
|
|
+
|
|
+ if (sigisemptyset(&d->sigset)) {
|
|
+
|
|
+ /* If all the mask is all-zero we can get rid of the structure */
|
|
+ hashmap_remove(e->signal_data, &d->priority);
|
|
+ assert(!d->current);
|
|
+ safe_close(d->fd);
|
|
+ free(d);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ assert(d->fd >= 0);
|
|
+
|
|
+ if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
|
|
+ log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
|
|
+}
|
|
+
|
|
+static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
|
|
+ struct signal_data *d;
|
|
+ static const int64_t zero_priority = 0;
|
|
+
|
|
+ assert(e);
|
|
+
|
|
+ /* Rechecks if the specified signal is still something we are
|
|
+ * interested in. If not, we'll unmask it, and possibly drop
|
|
+ * the signalfd for it. */
|
|
+
|
|
+ if (sig == SIGCHLD &&
|
|
+ e->n_enabled_child_sources > 0)
|
|
+ return;
|
|
+
|
|
+ if (e->signal_sources &&
|
|
+ e->signal_sources[sig] &&
|
|
+ e->signal_sources[sig]->enabled != SD_EVENT_OFF)
|
|
+ return;
|
|
+
|
|
+ /*
|
|
+ * The specified signal might be enabled in three different queues:
|
|
+ *
|
|
+ * 1) the one that belongs to the priority passed (if it is non-NULL)
|
|
+ * 2) the one that belongs to the priority of the event source of the signal (if there is one)
|
|
+ * 3) the 0 priority (to cover the SIGCHLD case)
|
|
+ *
|
|
+ * Hence, let's remove it from all three here.
|
|
+ */
|
|
+
|
|
+ if (priority) {
|
|
+ d = hashmap_get(e->signal_data, priority);
|
|
+ if (d)
|
|
+ event_unmask_signal_data(e, d, sig);
|
|
+ }
|
|
+
|
|
+ if (e->signal_sources && e->signal_sources[sig]) {
|
|
+ d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
|
|
+ if (d)
|
|
+ event_unmask_signal_data(e, d, sig);
|
|
+ }
|
|
+
|
|
+ d = hashmap_get(e->signal_data, &zero_priority);
|
|
+ if (d)
|
|
+ event_unmask_signal_data(e, d, sig);
|
|
}
|
|
|
|
static void source_disconnect(sd_event_source *s) {
|
|
@@ -669,17 +820,11 @@ static void source_disconnect(sd_event_source *s) {
|
|
|
|
case SOURCE_SIGNAL:
|
|
if (s->signal.sig > 0) {
|
|
+
|
|
if (s->event->signal_sources)
|
|
s->event->signal_sources[s->signal.sig] = NULL;
|
|
|
|
- /* If the signal was on and now it is off... */
|
|
- if (s->enabled != SD_EVENT_OFF && !need_signal(s->event, s->signal.sig)) {
|
|
- assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
|
|
-
|
|
- (void) event_update_signal_fd(s->event);
|
|
- /* If disabling failed, we might get a spurious event,
|
|
- * but otherwise nothing bad should happen. */
|
|
- }
|
|
+ event_gc_signal_data(s->event, &s->priority, s->signal.sig);
|
|
}
|
|
|
|
break;
|
|
@@ -689,18 +834,10 @@ static void source_disconnect(sd_event_source *s) {
|
|
if (s->enabled != SD_EVENT_OFF) {
|
|
assert(s->event->n_enabled_child_sources > 0);
|
|
s->event->n_enabled_child_sources--;
|
|
-
|
|
- /* We know the signal was on, if it is off now... */
|
|
- if (!need_signal(s->event, SIGCHLD)) {
|
|
- assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
|
|
-
|
|
- (void) event_update_signal_fd(s->event);
|
|
- /* If disabling failed, we might get a spurious event,
|
|
- * but otherwise nothing bad should happen. */
|
|
- }
|
|
}
|
|
|
|
- hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
|
|
+ (void) hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
|
|
+ event_gc_signal_data(s->event, &s->priority, SIGCHLD);
|
|
}
|
|
|
|
break;
|
|
@@ -779,6 +916,14 @@ static int source_set_pending(sd_event_source *s, bool b) {
|
|
d->needs_rearm = true;
|
|
}
|
|
|
|
+ if (s->type == SOURCE_SIGNAL && !b) {
|
|
+ struct signal_data *d;
|
|
+
|
|
+ d = hashmap_get(s->event->signal_data, &s->priority);
|
|
+ if (d && d->current == s)
|
|
+ d->current = NULL;
|
|
+ }
|
|
+
|
|
return 0;
|
|
}
|
|
|
|
@@ -828,6 +973,7 @@ _public_ int sd_event_add_io(
|
|
if (!s)
|
|
return -ENOMEM;
|
|
|
|
+ s->wakeup = WAKEUP_EVENT_SOURCE;
|
|
s->io.fd = fd;
|
|
s->io.events = events;
|
|
s->io.callback = callback;
|
|
@@ -884,7 +1030,7 @@ static int event_setup_timer_fd(
|
|
return -errno;
|
|
|
|
ev.events = EPOLLIN;
|
|
- ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
|
|
+ ev.data.ptr = d;
|
|
|
|
r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
|
|
if (r < 0) {
|
|
@@ -994,9 +1140,9 @@ _public_ int sd_event_add_signal(
|
|
void *userdata) {
|
|
|
|
sd_event_source *s;
|
|
+ struct signal_data *d;
|
|
sigset_t ss;
|
|
int r;
|
|
- bool previous;
|
|
|
|
assert_return(e, -EINVAL);
|
|
assert_return(sig > 0, -EINVAL);
|
|
@@ -1021,8 +1167,6 @@ _public_ int sd_event_add_signal(
|
|
} else if (e->signal_sources[sig])
|
|
return -EBUSY;
|
|
|
|
- previous = need_signal(e, sig);
|
|
-
|
|
s = source_new(e, !ret, SOURCE_SIGNAL);
|
|
if (!s)
|
|
return -ENOMEM;
|
|
@@ -1034,14 +1178,10 @@ _public_ int sd_event_add_signal(
|
|
|
|
e->signal_sources[sig] = s;
|
|
|
|
- if (!previous) {
|
|
- assert_se(sigaddset(&e->sigset, sig) == 0);
|
|
-
|
|
- r = event_update_signal_fd(e);
|
|
- if (r < 0) {
|
|
- source_free(s);
|
|
- return r;
|
|
- }
|
|
+ r = event_make_signal_data(e, sig, &d);
|
|
+ if (r < 0) {
|
|
+ source_free(s);
|
|
+ return r;
|
|
}
|
|
|
|
/* Use the signal name as description for the event source by default */
|
|
@@ -1063,7 +1203,6 @@ _public_ int sd_event_add_child(
|
|
|
|
sd_event_source *s;
|
|
int r;
|
|
- bool previous;
|
|
|
|
assert_return(e, -EINVAL);
|
|
assert_return(pid > 1, -EINVAL);
|
|
@@ -1080,8 +1219,6 @@ _public_ int sd_event_add_child(
|
|
if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
|
|
return -EBUSY;
|
|
|
|
- previous = need_signal(e, SIGCHLD);
|
|
-
|
|
s = source_new(e, !ret, SOURCE_CHILD);
|
|
if (!s)
|
|
return -ENOMEM;
|
|
@@ -1100,14 +1237,11 @@ _public_ int sd_event_add_child(
|
|
|
|
e->n_enabled_child_sources ++;
|
|
|
|
- if (!previous) {
|
|
- assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
|
|
-
|
|
- r = event_update_signal_fd(e);
|
|
- if (r < 0) {
|
|
- source_free(s);
|
|
- return r;
|
|
- }
|
|
+ r = event_make_signal_data(e, SIGCHLD, NULL);
|
|
+ if (r < 0) {
|
|
+ e->n_enabled_child_sources--;
|
|
+ source_free(s);
|
|
+ return r;
|
|
}
|
|
|
|
e->need_process_child = true;
|
|
@@ -1407,6 +1541,8 @@ _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority)
|
|
}
|
|
|
|
_public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
|
|
+ int r;
|
|
+
|
|
assert_return(s, -EINVAL);
|
|
assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
|
|
assert_return(!event_pid_changed(s->event), -ECHILD);
|
|
@@ -1414,7 +1550,25 @@ _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority)
|
|
if (s->priority == priority)
|
|
return 0;
|
|
|
|
- s->priority = priority;
|
|
+ if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
|
|
+ struct signal_data *old, *d;
|
|
+
|
|
+ /* Move us from the signalfd belonging to the old
|
|
+ * priority to the signalfd of the new priority */
|
|
+
|
|
+ assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
|
|
+
|
|
+ s->priority = priority;
|
|
+
|
|
+ r = event_make_signal_data(s->event, s->signal.sig, &d);
|
|
+ if (r < 0) {
|
|
+ s->priority = old->priority;
|
|
+ return r;
|
|
+ }
|
|
+
|
|
+ event_unmask_signal_data(s->event, old, s->signal.sig);
|
|
+ } else
|
|
+ s->priority = priority;
|
|
|
|
if (s->pending)
|
|
prioq_reshuffle(s->event->pending, s, &s->pending_index);
|
|
@@ -1482,34 +1636,18 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
|
|
}
|
|
|
|
case SOURCE_SIGNAL:
|
|
- assert(need_signal(s->event, s->signal.sig));
|
|
-
|
|
s->enabled = m;
|
|
|
|
- if (!need_signal(s->event, s->signal.sig)) {
|
|
- assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
|
|
-
|
|
- (void) event_update_signal_fd(s->event);
|
|
- /* If disabling failed, we might get a spurious event,
|
|
- * but otherwise nothing bad should happen. */
|
|
- }
|
|
-
|
|
+ event_gc_signal_data(s->event, &s->priority, s->signal.sig);
|
|
break;
|
|
|
|
case SOURCE_CHILD:
|
|
- assert(need_signal(s->event, SIGCHLD));
|
|
-
|
|
s->enabled = m;
|
|
|
|
assert(s->event->n_enabled_child_sources > 0);
|
|
s->event->n_enabled_child_sources--;
|
|
|
|
- if (!need_signal(s->event, SIGCHLD)) {
|
|
- assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
|
|
-
|
|
- (void) event_update_signal_fd(s->event);
|
|
- }
|
|
-
|
|
+ event_gc_signal_data(s->event, &s->priority, SIGCHLD);
|
|
break;
|
|
|
|
case SOURCE_EXIT:
|
|
@@ -1555,37 +1693,33 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
|
|
}
|
|
|
|
case SOURCE_SIGNAL:
|
|
- /* Check status before enabling. */
|
|
- if (!need_signal(s->event, s->signal.sig)) {
|
|
- assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
|
|
-
|
|
- r = event_update_signal_fd(s->event);
|
|
- if (r < 0) {
|
|
- s->enabled = SD_EVENT_OFF;
|
|
- return r;
|
|
- }
|
|
- }
|
|
|
|
s->enabled = m;
|
|
+
|
|
+ r = event_make_signal_data(s->event, s->signal.sig, NULL);
|
|
+ if (r < 0) {
|
|
+ s->enabled = SD_EVENT_OFF;
|
|
+ event_gc_signal_data(s->event, &s->priority, s->signal.sig);
|
|
+ return r;
|
|
+ }
|
|
+
|
|
break;
|
|
|
|
case SOURCE_CHILD:
|
|
- /* Check status before enabling. */
|
|
- if (s->enabled == SD_EVENT_OFF) {
|
|
- if (!need_signal(s->event, SIGCHLD)) {
|
|
- assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
|
|
-
|
|
- r = event_update_signal_fd(s->event);
|
|
- if (r < 0) {
|
|
- s->enabled = SD_EVENT_OFF;
|
|
- return r;
|
|
- }
|
|
- }
|
|
|
|
+ if (s->enabled == SD_EVENT_OFF)
|
|
s->event->n_enabled_child_sources++;
|
|
- }
|
|
|
|
s->enabled = m;
|
|
+
|
|
+ r = event_make_signal_data(s->event, s->signal.sig, SIGCHLD);
|
|
+ if (r < 0) {
|
|
+ s->enabled = SD_EVENT_OFF;
|
|
+ s->event->n_enabled_child_sources--;
|
|
+ event_gc_signal_data(s->event, &s->priority, SIGCHLD);
|
|
+ return r;
|
|
+ }
|
|
+
|
|
break;
|
|
|
|
case SOURCE_EXIT:
|
|
@@ -2029,20 +2163,35 @@ static int process_child(sd_event *e) {
|
|
return 0;
|
|
}
|
|
|
|
-static int process_signal(sd_event *e, uint32_t events) {
|
|
+static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
|
|
bool read_one = false;
|
|
int r;
|
|
|
|
assert(e);
|
|
-
|
|
assert_return(events == EPOLLIN, -EIO);
|
|
|
|
+ /* If there's a signal queued on this priority and SIGCHLD is
|
|
+ on this priority too, then make sure to recheck the
|
|
+ children we watch. This is because we only ever dequeue
|
|
+ the first signal per priority, and if we dequeue one, and
|
|
+ SIGCHLD might be enqueued later we wouldn't know, but we
|
|
+ might have higher priority children we care about hence we
|
|
+ need to check that explicitly. */
|
|
+
|
|
+ if (sigismember(&d->sigset, SIGCHLD))
|
|
+ e->need_process_child = true;
|
|
+
|
|
+ /* If there's already an event source pending for this
|
|
+ * priority we don't read another */
|
|
+ if (d->current)
|
|
+ return 0;
|
|
+
|
|
for (;;) {
|
|
struct signalfd_siginfo si;
|
|
ssize_t n;
|
|
sd_event_source *s = NULL;
|
|
|
|
- n = read(e->signal_fd, &si, sizeof(si));
|
|
+ n = read(d->fd, &si, sizeof(si));
|
|
if (n < 0) {
|
|
if (errno == EAGAIN || errno == EINTR)
|
|
return read_one;
|
|
@@ -2057,24 +2206,21 @@ static int process_signal(sd_event *e, uint32_t events) {
|
|
|
|
read_one = true;
|
|
|
|
- if (si.ssi_signo == SIGCHLD) {
|
|
- r = process_child(e);
|
|
- if (r < 0)
|
|
- return r;
|
|
- if (r > 0)
|
|
- continue;
|
|
- }
|
|
-
|
|
if (e->signal_sources)
|
|
s = e->signal_sources[si.ssi_signo];
|
|
-
|
|
if (!s)
|
|
continue;
|
|
+ if (s->pending)
|
|
+ continue;
|
|
|
|
s->signal.siginfo = si;
|
|
+ d->current = s;
|
|
+
|
|
r = source_set_pending(s, true);
|
|
if (r < 0)
|
|
return r;
|
|
+
|
|
+ return 1;
|
|
}
|
|
}
|
|
|
|
@@ -2393,23 +2539,31 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
|
|
|
|
for (i = 0; i < m; i++) {
|
|
|
|
- if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
|
|
- r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
|
|
- else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
|
|
- r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
|
|
- else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
|
|
- r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
|
|
- else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
|
|
- r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
|
|
- else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
|
|
- r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
|
|
- else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
|
|
- r = process_signal(e, ev_queue[i].events);
|
|
- else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
|
|
+ if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
|
|
r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
|
|
- else
|
|
- r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
|
|
+ else {
|
|
+ WakeupType *t = ev_queue[i].data.ptr;
|
|
+
|
|
+ switch (*t) {
|
|
+
|
|
+ case WAKEUP_EVENT_SOURCE:
|
|
+ r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
|
|
+ break;
|
|
|
|
+ case WAKEUP_CLOCK_DATA: {
|
|
+ struct clock_data *d = ev_queue[i].data.ptr;
|
|
+ r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ case WAKEUP_SIGNAL_DATA:
|
|
+ r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ assert_not_reached("Invalid wake-up pointer");
|
|
+ }
|
|
+ }
|
|
if (r < 0)
|
|
goto finish;
|
|
}
|
|
diff --git a/src/libsystemd/sd-event/test-event.c b/src/libsystemd/sd-event/test-event.c
|
|
index 721700b..6bb1420 100644
|
|
--- a/src/libsystemd/sd-event/test-event.c
|
|
+++ b/src/libsystemd/sd-event/test-event.c
|
|
@@ -160,7 +160,7 @@ static int exit_handler(sd_event_source *s, void *userdata) {
|
|
return 3;
|
|
}
|
|
|
|
-int main(int argc, char *argv[]) {
|
|
+static void test_basic(void) {
|
|
sd_event *e = NULL;
|
|
sd_event_source *w = NULL, *x = NULL, *y = NULL, *z = NULL, *q = NULL, *t = NULL;
|
|
static const char ch = 'x';
|
|
@@ -248,6 +248,70 @@ int main(int argc, char *argv[]) {
|
|
safe_close_pair(b);
|
|
safe_close_pair(d);
|
|
safe_close_pair(k);
|
|
+}
|
|
+
|
|
+static int last_rtqueue_sigval = 0;
|
|
+static int n_rtqueue = 0;
|
|
+
|
|
+static int rtqueue_handler(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
|
|
+ last_rtqueue_sigval = si->ssi_int;
|
|
+ n_rtqueue ++;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void test_rtqueue(void) {
|
|
+ sd_event_source *u = NULL, *v = NULL, *s = NULL;
|
|
+ sd_event *e = NULL;
|
|
+
|
|
+ assert_se(sd_event_default(&e) >= 0);
|
|
+
|
|
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGRTMIN+2, SIGRTMIN+3, SIGUSR2, -1) >= 0);
|
|
+ assert_se(sd_event_add_signal(e, &u, SIGRTMIN+2, rtqueue_handler, NULL) >= 0);
|
|
+ assert_se(sd_event_add_signal(e, &v, SIGRTMIN+3, rtqueue_handler, NULL) >= 0);
|
|
+ assert_se(sd_event_add_signal(e, &s, SIGUSR2, rtqueue_handler, NULL) >= 0);
|
|
+
|
|
+ assert_se(sd_event_source_set_priority(v, -10) >= 0);
|
|
+
|
|
+ assert(sigqueue(getpid(), SIGRTMIN+2, (union sigval) { .sival_int = 1 }) >= 0);
|
|
+ assert(sigqueue(getpid(), SIGRTMIN+3, (union sigval) { .sival_int = 2 }) >= 0);
|
|
+ assert(sigqueue(getpid(), SIGUSR2, (union sigval) { .sival_int = 3 }) >= 0);
|
|
+ assert(sigqueue(getpid(), SIGRTMIN+3, (union sigval) { .sival_int = 4 }) >= 0);
|
|
+ assert(sigqueue(getpid(), SIGUSR2, (union sigval) { .sival_int = 5 }) >= 0);
|
|
+
|
|
+ assert_se(n_rtqueue == 0);
|
|
+ assert_se(last_rtqueue_sigval == 0);
|
|
+
|
|
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
|
|
+ assert_se(n_rtqueue == 1);
|
|
+ assert_se(last_rtqueue_sigval == 2); /* first SIGRTMIN+3 */
|
|
+
|
|
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
|
|
+ assert_se(n_rtqueue == 2);
|
|
+ assert_se(last_rtqueue_sigval == 4); /* second SIGRTMIN+3 */
|
|
+
|
|
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
|
|
+ assert_se(n_rtqueue == 3);
|
|
+ assert_se(last_rtqueue_sigval == 3); /* first SIGUSR2 */
|
|
+
|
|
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
|
|
+ assert_se(n_rtqueue == 4);
|
|
+ assert_se(last_rtqueue_sigval == 1); /* SIGRTMIN+2 */
|
|
+
|
|
+ assert_se(sd_event_run(e, 0) == 0); /* the other SIGUSR2 is dropped, because the first one was still queued */
|
|
+ assert_se(n_rtqueue == 4);
|
|
+ assert_se(last_rtqueue_sigval == 1);
|
|
+
|
|
+ sd_event_source_unref(u);
|
|
+ sd_event_source_unref(v);
|
|
+ sd_event_source_unref(s);
|
|
+
|
|
+ sd_event_unref(e);
|
|
+}
|
|
+
|
|
+int main(int argc, char *argv[]) {
|
|
+
|
|
+ test_basic();
|
|
+ test_rtqueue();
|
|
|
|
return 0;
|
|
}
|
|
--
|
|
2.17.1
|
|
|