From: Vitaliy Makkoveev Subject: Re: kevent(2): make EVFILT_TIMER mp-safe To: Visa Hankala Cc: tech@openbsd.org Date: Thu, 8 May 2025 19:39:12 +0300 On Thu, May 08, 2025 at 02:25:04PM +0000, Visa Hankala wrote: > On Thu, May 08, 2025 at 03:43:13AM +0300, Vitaliy Makkoveev wrote: > > On Thu, May 08, 2025 at 03:32:22AM +0300, Vitaliy Makkoveev wrote: > > > The last one kernel locked from kern/kern_event.c. > > > > > > Introduce 'filt_timer' opaque structure to keep the timeout(9), the > > > `ft_reschedule' flag and protecting mutex(9). The timeout(9) handler > > > could reschedule itself, so `ft_reschedule' to prevent this while we > > > canceling timeout. The `ft_mtx' mutex(9) also protects the > > > re-initialization, because it could be concurrently accessed from > > > another filt_timermodify() thread. > > Concurrent filt_timermodify() calls cannot happen, though, because > the kqueue subsystem serializes the same knote's f_attach, f_detach, > f_modify, and f_process calls automatically. > Nice to hear. So I need serialization only with running timeout handler. I removed recursion from filt_timeradd()/filt_timerexpire() to reduce `ft_mtx' usage. The type casts also removed. Index: sys/kern/kern_event.c =================================================================== RCS file: /cvs/src/sys/kern/kern_event.c,v retrieving revision 1.201 diff -u -p -r1.201 kern_event.c --- sys/kern/kern_event.c 10 Feb 2025 16:45:46 -0000 1.201 +++ sys/kern/kern_event.c 8 May 2025 15:59:50 -0000 @@ -172,7 +172,7 @@ const struct filterops file_filtops = { }; const struct filterops timer_filtops = { - .f_flags = 0, + .f_flags = FILTEROP_MPSAFE, .f_attach = filt_timerattach, .f_detach = filt_timerdetach, .f_event = NULL, @@ -180,12 +180,19 @@ const struct filterops timer_filtops = { .f_process = filt_timerprocess, }; +/* + * Locking: + * I immutable after creation + * a atomic operations + * t ft_mtx + */ + struct pool knote_pool; struct pool kqueue_pool; struct mutex kqueue_klist_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); struct rwlock kqueue_ps_list_lock = RWLOCK_INITIALIZER("kqpsl"); -int kq_ntimeouts = 0; -int kq_timeoutmax = (4 * 1024); +int kq_ntimeouts = 0; /* [a] */ +int kq_timeoutmax = (4 * 1024); /* [I] */ #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) @@ -598,50 +605,81 @@ filt_timervalidate(int sfflags, int64_t return (0); } +struct filt_timer { + struct mutex ft_mtx; + struct timeout ft_to; + int ft_reschedule; /* [t] */ +}; + static void filt_timeradd(struct knote *kn, struct timespec *ts) { struct timespec expiry, now; - struct timeout *to = kn->kn_hook; + struct filt_timer *ft = kn->kn_hook; int tticks; + /* + * Unlocked `ft_reschedule' and `kn_data' access is fine, + * we have no concurrent threads at this point. + */ + + ft->ft_reschedule = ((kn->kn_flags & EV_ONESHOT) == 0 && + (kn->kn_sfflags & NOTE_ABSTIME) == 0); + if (kn->kn_sfflags & NOTE_ABSTIME) { nanotime(&now); if (timespeccmp(ts, &now, >)) { timespecsub(ts, &now, &expiry); /* XXX timeout_abs_ts with CLOCK_REALTIME */ - timeout_add(to, tstohz(&expiry)); + timeout_add(&ft->ft_to, tstohz(&expiry)); } else { + struct kqueue *kq = kn->kn_kq; + /* Expire immediately. */ - filt_timerexpire(kn); + kn->kn_data++; + mtx_enter(&kq->kq_lock); + knote_activate(kn); + mtx_leave(&kq->kq_lock); } return; } tticks = tstohz(ts); /* Remove extra tick from tstohz() if timeout has fired before. */ - if (timeout_triggered(to)) + if (timeout_triggered(&ft->ft_to)) tticks--; - timeout_add(to, (tticks > 0) ? tticks : 1); + timeout_add(&ft->ft_to, (tticks > 0) ? tticks : 1); } void filt_timerexpire(void *knx) { - struct timespec ts; struct knote *kn = knx; struct kqueue *kq = kn->kn_kq; + struct filt_timer *ft = kn->kn_hook; + mtx_enter(&ft->ft_mtx); kn->kn_data++; + mtx_enter(&kq->kq_lock); knote_activate(kn); mtx_leave(&kq->kq_lock); - if ((kn->kn_flags & EV_ONESHOT) == 0 && - (kn->kn_sfflags & NOTE_ABSTIME) == 0) { + if (ft->ft_reschedule) { + struct timespec ts; + int tticks; + (void)filt_timervalidate(kn->kn_sfflags, kn->kn_sdata, &ts); - filt_timeradd(kn, &ts); + tticks = tstohz(&ts); + /* + * Remove extra tick from tstohz() if timeout has + * fired before. + */ + if (timeout_triggered(&ft->ft_to)) + tticks--; + timeout_add(&ft->ft_to, (tticks > 0) ? tticks : 1); } + mtx_leave(&ft->ft_mtx); } /* @@ -651,22 +689,25 @@ int filt_timerattach(struct knote *kn) { struct timespec ts; - struct timeout *to; + struct filt_timer *ft; int error; error = filt_timervalidate(kn->kn_sfflags, kn->kn_sdata, &ts); if (error != 0) return (error); - if (kq_ntimeouts > kq_timeoutmax) + if (atomic_inc_int_nv(&kq_ntimeouts) > kq_timeoutmax) { + atomic_dec_int(&kq_ntimeouts); return (ENOMEM); - kq_ntimeouts++; + } if ((kn->kn_sfflags & NOTE_ABSTIME) == 0) kn->kn_flags |= EV_CLEAR; /* automatically set */ - to = malloc(sizeof(*to), M_KEVENT, M_WAITOK); - timeout_set(to, filt_timerexpire, kn); - kn->kn_hook = to; + + ft = malloc(sizeof(*ft), M_KEVENT, M_WAITOK); + mtx_init(&ft->ft_mtx, IPL_MPSAFE); + timeout_set(&ft->ft_to, filt_timerexpire, kn); + kn->kn_hook = ft; filt_timeradd(kn, &ts); return (0); @@ -675,12 +716,15 @@ filt_timerattach(struct knote *kn) void filt_timerdetach(struct knote *kn) { - struct timeout *to; + struct filt_timer *ft = kn->kn_hook; - to = (struct timeout *)kn->kn_hook; - timeout_del_barrier(to); - free(to, M_KEVENT, sizeof(*to)); - kq_ntimeouts--; + mtx_enter(&ft->ft_mtx); + ft->ft_reschedule = 0; + mtx_leave(&ft->ft_mtx); + + timeout_del_barrier(&ft->ft_to); + free(ft, M_KEVENT, sizeof(*ft)); + atomic_dec_int(&kq_ntimeouts); } int @@ -688,7 +732,7 @@ filt_timermodify(struct kevent *kev, str { struct timespec ts; struct kqueue *kq = kn->kn_kq; - struct timeout *to = kn->kn_hook; + struct filt_timer *ft = kn->kn_hook; int error; error = filt_timervalidate(kev->fflags, kev->data, &ts); @@ -699,8 +743,11 @@ filt_timermodify(struct kevent *kev, str } /* Reset the timer. Any pending events are discarded. */ + mtx_enter(&ft->ft_mtx); + ft->ft_reschedule = 0; + mtx_leave(&ft->ft_mtx); - timeout_del_barrier(to); + timeout_del_barrier(&ft->ft_to); mtx_enter(&kq->kq_lock); if (kn->kn_status & KN_QUEUED) @@ -711,7 +758,7 @@ filt_timermodify(struct kevent *kev, str kn->kn_data = 0; knote_assign(kev, kn); /* Reinit timeout to invoke tick adjustment again. */ - timeout_set(to, filt_timerexpire, kn); + timeout_set(&ft->ft_to, filt_timerexpire, kn); filt_timeradd(kn, &ts); return (0); @@ -720,13 +767,14 @@ filt_timermodify(struct kevent *kev, str int filt_timerprocess(struct knote *kn, struct kevent *kev) { - int active, s; + struct filt_timer *ft = kn->kn_hook; + int active; - s = splsoftclock(); + mtx_enter(&ft->ft_mtx); active = (kn->kn_data != 0); if (active) knote_submit(kn, kev); - splx(s); + mtx_leave(&ft->ft_mtx); return (active); }