Download raw body.
kevent(2): make EVFILT_TIMER mp-safe
On Thu, May 08, 2025 at 02:25:04PM +0000, Visa Hankala wrote:
> On Thu, May 08, 2025 at 03:43:13AM +0300, Vitaliy Makkoveev wrote:
> > On Thu, May 08, 2025 at 03:32:22AM +0300, Vitaliy Makkoveev wrote:
> > > The last one kernel locked from kern/kern_event.c.
> > >
> > > Introduce 'filt_timer' opaque structure to keep the timeout(9), the
> > > `ft_reschedule' flag and protecting mutex(9). The timeout(9) handler
> > > could reschedule itself, so `ft_reschedule' to prevent this while we
> > > canceling timeout. The `ft_mtx' mutex(9) also protects the
> > > re-initialization, because it could be concurrently accessed from
> > > another filt_timermodify() thread.
>
> Concurrent filt_timermodify() calls cannot happen, though, because
> the kqueue subsystem serializes the same knote's f_attach, f_detach,
> f_modify, and f_process calls automatically.
>
Nice to hear. So I need serialization only with running timeout handler.
I removed recursion from filt_timeradd()/filt_timerexpire() to reduce
`ft_mtx' usage.
The type casts also removed.
Index: sys/kern/kern_event.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_event.c,v
retrieving revision 1.201
diff -u -p -r1.201 kern_event.c
--- sys/kern/kern_event.c 10 Feb 2025 16:45:46 -0000 1.201
+++ sys/kern/kern_event.c 8 May 2025 15:59:50 -0000
@@ -172,7 +172,7 @@ const struct filterops file_filtops = {
};
const struct filterops timer_filtops = {
- .f_flags = 0,
+ .f_flags = FILTEROP_MPSAFE,
.f_attach = filt_timerattach,
.f_detach = filt_timerdetach,
.f_event = NULL,
@@ -180,12 +180,19 @@ const struct filterops timer_filtops = {
.f_process = filt_timerprocess,
};
+/*
+ * Locking:
+ * I immutable after creation
+ * a atomic operations
+ * t ft_mtx
+ */
+
struct pool knote_pool;
struct pool kqueue_pool;
struct mutex kqueue_klist_lock = MUTEX_INITIALIZER(IPL_MPFLOOR);
struct rwlock kqueue_ps_list_lock = RWLOCK_INITIALIZER("kqpsl");
-int kq_ntimeouts = 0;
-int kq_timeoutmax = (4 * 1024);
+int kq_ntimeouts = 0; /* [a] */
+int kq_timeoutmax = (4 * 1024); /* [I] */
#define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
@@ -598,50 +605,81 @@ filt_timervalidate(int sfflags, int64_t
return (0);
}
+struct filt_timer {
+ struct mutex ft_mtx;
+ struct timeout ft_to;
+ int ft_reschedule; /* [t] */
+};
+
static void
filt_timeradd(struct knote *kn, struct timespec *ts)
{
struct timespec expiry, now;
- struct timeout *to = kn->kn_hook;
+ struct filt_timer *ft = kn->kn_hook;
int tticks;
+ /*
+ * Unlocked `ft_reschedule' and `kn_data' access is fine,
+ * we have no concurrent threads at this point.
+ */
+
+ ft->ft_reschedule = ((kn->kn_flags & EV_ONESHOT) == 0 &&
+ (kn->kn_sfflags & NOTE_ABSTIME) == 0);
+
if (kn->kn_sfflags & NOTE_ABSTIME) {
nanotime(&now);
if (timespeccmp(ts, &now, >)) {
timespecsub(ts, &now, &expiry);
/* XXX timeout_abs_ts with CLOCK_REALTIME */
- timeout_add(to, tstohz(&expiry));
+ timeout_add(&ft->ft_to, tstohz(&expiry));
} else {
+ struct kqueue *kq = kn->kn_kq;
+
/* Expire immediately. */
- filt_timerexpire(kn);
+ kn->kn_data++;
+ mtx_enter(&kq->kq_lock);
+ knote_activate(kn);
+ mtx_leave(&kq->kq_lock);
}
return;
}
tticks = tstohz(ts);
/* Remove extra tick from tstohz() if timeout has fired before. */
- if (timeout_triggered(to))
+ if (timeout_triggered(&ft->ft_to))
tticks--;
- timeout_add(to, (tticks > 0) ? tticks : 1);
+ timeout_add(&ft->ft_to, (tticks > 0) ? tticks : 1);
}
void
filt_timerexpire(void *knx)
{
- struct timespec ts;
struct knote *kn = knx;
struct kqueue *kq = kn->kn_kq;
+ struct filt_timer *ft = kn->kn_hook;
+ mtx_enter(&ft->ft_mtx);
kn->kn_data++;
+
mtx_enter(&kq->kq_lock);
knote_activate(kn);
mtx_leave(&kq->kq_lock);
- if ((kn->kn_flags & EV_ONESHOT) == 0 &&
- (kn->kn_sfflags & NOTE_ABSTIME) == 0) {
+ if (ft->ft_reschedule) {
+ struct timespec ts;
+ int tticks;
+
(void)filt_timervalidate(kn->kn_sfflags, kn->kn_sdata, &ts);
- filt_timeradd(kn, &ts);
+ tticks = tstohz(&ts);
+ /*
+ * Remove extra tick from tstohz() if timeout has
+ * fired before.
+ */
+ if (timeout_triggered(&ft->ft_to))
+ tticks--;
+ timeout_add(&ft->ft_to, (tticks > 0) ? tticks : 1);
}
+ mtx_leave(&ft->ft_mtx);
}
/*
@@ -651,22 +689,25 @@ int
filt_timerattach(struct knote *kn)
{
struct timespec ts;
- struct timeout *to;
+ struct filt_timer *ft;
int error;
error = filt_timervalidate(kn->kn_sfflags, kn->kn_sdata, &ts);
if (error != 0)
return (error);
- if (kq_ntimeouts > kq_timeoutmax)
+ if (atomic_inc_int_nv(&kq_ntimeouts) > kq_timeoutmax) {
+ atomic_dec_int(&kq_ntimeouts);
return (ENOMEM);
- kq_ntimeouts++;
+ }
if ((kn->kn_sfflags & NOTE_ABSTIME) == 0)
kn->kn_flags |= EV_CLEAR; /* automatically set */
- to = malloc(sizeof(*to), M_KEVENT, M_WAITOK);
- timeout_set(to, filt_timerexpire, kn);
- kn->kn_hook = to;
+
+ ft = malloc(sizeof(*ft), M_KEVENT, M_WAITOK);
+ mtx_init(&ft->ft_mtx, IPL_MPSAFE);
+ timeout_set(&ft->ft_to, filt_timerexpire, kn);
+ kn->kn_hook = ft;
filt_timeradd(kn, &ts);
return (0);
@@ -675,12 +716,15 @@ filt_timerattach(struct knote *kn)
void
filt_timerdetach(struct knote *kn)
{
- struct timeout *to;
+ struct filt_timer *ft = kn->kn_hook;
- to = (struct timeout *)kn->kn_hook;
- timeout_del_barrier(to);
- free(to, M_KEVENT, sizeof(*to));
- kq_ntimeouts--;
+ mtx_enter(&ft->ft_mtx);
+ ft->ft_reschedule = 0;
+ mtx_leave(&ft->ft_mtx);
+
+ timeout_del_barrier(&ft->ft_to);
+ free(ft, M_KEVENT, sizeof(*ft));
+ atomic_dec_int(&kq_ntimeouts);
}
int
@@ -688,7 +732,7 @@ filt_timermodify(struct kevent *kev, str
{
struct timespec ts;
struct kqueue *kq = kn->kn_kq;
- struct timeout *to = kn->kn_hook;
+ struct filt_timer *ft = kn->kn_hook;
int error;
error = filt_timervalidate(kev->fflags, kev->data, &ts);
@@ -699,8 +743,11 @@ filt_timermodify(struct kevent *kev, str
}
/* Reset the timer. Any pending events are discarded. */
+ mtx_enter(&ft->ft_mtx);
+ ft->ft_reschedule = 0;
+ mtx_leave(&ft->ft_mtx);
- timeout_del_barrier(to);
+ timeout_del_barrier(&ft->ft_to);
mtx_enter(&kq->kq_lock);
if (kn->kn_status & KN_QUEUED)
@@ -711,7 +758,7 @@ filt_timermodify(struct kevent *kev, str
kn->kn_data = 0;
knote_assign(kev, kn);
/* Reinit timeout to invoke tick adjustment again. */
- timeout_set(to, filt_timerexpire, kn);
+ timeout_set(&ft->ft_to, filt_timerexpire, kn);
filt_timeradd(kn, &ts);
return (0);
@@ -720,13 +767,14 @@ filt_timermodify(struct kevent *kev, str
int
filt_timerprocess(struct knote *kn, struct kevent *kev)
{
- int active, s;
+ struct filt_timer *ft = kn->kn_hook;
+ int active;
- s = splsoftclock();
+ mtx_enter(&ft->ft_mtx);
active = (kn->kn_data != 0);
if (active)
knote_submit(kn, kev);
- splx(s);
+ mtx_leave(&ft->ft_mtx);
return (active);
}
kevent(2): make EVFILT_TIMER mp-safe