From: Vitaliy Makkoveev Subject: wskbd(4): mp-safe filterops To: tech@openbsd.org Date: Wed, 15 Jan 2025 18:46:39 +0300 I like to resurrect my old wsconsi(4) diff and start pushing it forward to avoid X stuck in kernel lock during polling. I like to start with wskbd(4) to make the diff as small as possible. Note all except wskbd(4) ws*(4) drivers still rely on kernel lock even with mp-safe filterops. So, the `ws_mtx' mutex(9) protects wseventvar structure fields. I used dedicated `wsevent_klistops' to take kernel lock before mutex for the non mp-safe drivers. It will be removed after complete wsevent unlocking. Note, pgsigio() takes kernel lock deep within and can't be called with mutex(9) held, that's why I call it lockless. This the compromise to allow step forward. ok? Index: sys/dev/wscons/wsevent.c =================================================================== RCS file: /cvs/src/sys/dev/wscons/wsevent.c,v diff -u -p -r1.28 wsevent.c --- sys/dev/wscons/wsevent.c 25 Mar 2024 13:01:49 -0000 1.28 +++ sys/dev/wscons/wsevent.c 15 Jan 2025 15:45:01 -0000 @@ -85,19 +85,61 @@ void filt_wseventdetach(struct knote *); int filt_wseventread(struct knote *, long); +int filt_wseventmodify(struct kevent *, struct knote *); +int filt_wseventprocess(struct knote *, struct kevent *); + +static void +wsevent_klist_assertlk(void *arg) +{ + struct wseventvar *ev = arg; + + if((ev->ws_flags & WSEVENT_MPSAFE) == 0) + KERNEL_ASSERT_LOCKED(); + MUTEX_ASSERT_LOCKED(&ev->ws_mtx); +} + +static int +wsevent_klist_lock(void *arg) +{ + struct wseventvar *ev = arg; + + if((ev->ws_flags & WSEVENT_MPSAFE) == 0) + KERNEL_LOCK(); + mtx_enter(&ev->ws_mtx); + + return (0); +} + +static void +wsevent_klist_unlock(void *arg, int s) +{ + struct wseventvar *ev = arg; + + mtx_leave(&ev->ws_mtx); + if ((ev->ws_flags & WSEVENT_MPSAFE) == 0) + KERNEL_UNLOCK(); +} + +static const struct klistops wsevent_klistops = { + .klo_assertlk = wsevent_klist_assertlk, + .klo_lock = wsevent_klist_lock, + .klo_unlock = wsevent_klist_unlock, +}; const struct filterops wsevent_filtops = { - .f_flags = FILTEROP_ISFD, + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_wseventdetach, .f_event = filt_wseventread, + .f_modify = filt_wseventmodify, + .f_process = filt_wseventprocess, }; /* * Initialize a wscons_event queue. */ int -wsevent_init(struct wseventvar *ev) +wsevent_init_flags(struct wseventvar *ev, int flags) { struct wscons_event *queue; @@ -112,6 +154,10 @@ wsevent_init(struct wseventvar *ev) return (1); } + mtx_init_flags(&ev->ws_mtx, IPL_MPFLOOR, "wsmtx", 0); + klist_init(&ev->ws_klist, &wsevent_klistops, ev); + + ev->ws_flags = flags; ev->ws_q = queue; ev->ws_get = ev->ws_put = 0; @@ -135,7 +181,7 @@ wsevent_fini(struct wseventvar *ev) free(ev->ws_q, M_DEVBUF, WSEVENT_QSIZE * sizeof(struct wscons_event)); ev->ws_q = NULL; - klist_invalidate(&ev->ws_sel.si_note); + klist_invalidate(&ev->ws_klist); sigio_free(&ev->ws_sigio); } @@ -147,8 +193,8 @@ wsevent_fini(struct wseventvar *ev) int wsevent_read(struct wseventvar *ev, struct uio *uio, int flags) { - int s, error; - u_int cnt; + int error, notwrap = 0; + u_int cnt, tcnt, get; size_t n; /* @@ -156,17 +202,20 @@ wsevent_read(struct wseventvar *ev, stru */ if (uio->uio_resid < sizeof(struct wscons_event)) return (EMSGSIZE); /* ??? */ - s = splwsevent(); + n = howmany(uio->uio_resid, sizeof(struct wscons_event)); + + mtx_enter(&ev->ws_mtx); + while (ev->ws_get == ev->ws_put) { if (flags & IO_NDELAY) { - splx(s); + mtx_leave(&ev->ws_mtx); return (EWOULDBLOCK); } ev->ws_wanted = 1; error = tsleep_nsec(ev, PWSEVENT | PCATCH, "wsevent_read", INFSLP); if (error) { - splx(s); + mtx_leave(&ev->ws_mtx); return (error); } } @@ -178,37 +227,45 @@ wsevent_read(struct wseventvar *ev, stru cnt = WSEVENT_QSIZE - ev->ws_get; /* events in [get..QSIZE) */ else cnt = ev->ws_put - ev->ws_get; /* events in [get..put) */ - splx(s); - n = howmany(uio->uio_resid, sizeof(struct wscons_event)); + if (cnt > n) cnt = n; - error = uiomove((caddr_t)&ev->ws_q[ev->ws_get], - cnt * sizeof(struct wscons_event), uio); + + get = ev->ws_get; + tcnt = ev->ws_put; n -= cnt; + + if ((ev->ws_get = (ev->ws_get + cnt) % WSEVENT_QSIZE) != 0 || + n == 0 || error || tcnt == 0) + notwrap = 1; + else { + if (tcnt > n) + tcnt = n; + ev->ws_get = tcnt; + } + + mtx_leave(&ev->ws_mtx); + + error = uiomove((caddr_t)&ev->ws_q[get], + cnt * sizeof(struct wscons_event), uio); + /* * If we do not wrap to 0, used up all our space, or had an error, * stop. Otherwise move from front of queue to put index, if there * is anything there to move. */ - if ((ev->ws_get = (ev->ws_get + cnt) % WSEVENT_QSIZE) != 0 || - n == 0 || error || (cnt = ev->ws_put) == 0) + if (notwrap || error) return (error); - if (cnt > n) - cnt = n; + error = uiomove((caddr_t)&ev->ws_q[0], - cnt * sizeof(struct wscons_event), uio); - ev->ws_get = cnt; + tcnt * sizeof(struct wscons_event), uio); + return (error); } int wsevent_kqfilter(struct wseventvar *ev, struct knote *kn) { - struct klist *klist; - int s; - - klist = &ev->ws_sel.si_note; - switch (kn->kn_filter) { case EVFILT_READ: kn->kn_fop = &wsevent_filtops; @@ -218,10 +275,7 @@ wsevent_kqfilter(struct wseventvar *ev, } kn->kn_hook = ev; - - s = splwsevent(); - klist_insert_locked(klist, kn); - splx(s); + klist_insert(&ev->ws_klist, kn); return (0); } @@ -230,12 +284,8 @@ void filt_wseventdetach(struct knote *kn) { struct wseventvar *ev = kn->kn_hook; - struct klist *klist = &ev->ws_sel.si_note; - int s; - s = splwsevent(); - klist_remove_locked(klist, kn); - splx(s); + klist_remove(&ev->ws_klist, kn); } int @@ -243,13 +293,51 @@ filt_wseventread(struct knote *kn, long { struct wseventvar *ev = kn->kn_hook; + if((ev->ws_flags & WSEVENT_MPSAFE) == 0) + KERNEL_ASSERT_LOCKED(); + MUTEX_ASSERT_LOCKED(&ev->ws_mtx); + if (ev->ws_get == ev->ws_put) return (0); - if (ev->ws_get < ev->ws_put) kn->kn_data = ev->ws_put - ev->ws_get; else kn->kn_data = (WSEVENT_QSIZE - ev->ws_get) + ev->ws_put; return (1); +} + +int +filt_wseventmodify(struct kevent *kev, struct knote *kn) +{ + struct wseventvar *ev = kn->kn_hook; + int active, dolock = ((ev->ws_flags & WSEVENT_MPSAFE) == 0); + + if (dolock) + KERNEL_LOCK(); + mtx_enter(&ev->ws_mtx); + active = knote_modify(kev, kn); + mtx_leave(&ev->ws_mtx); + if (dolock) + KERNEL_UNLOCK(); + + return (active); +} + +int +filt_wseventprocess(struct knote *kn, struct kevent *kev) +{ + struct wseventvar *ev = kn->kn_hook; + int active, dolock = ((ev->ws_flags & WSEVENT_MPSAFE) == 0); + + if (dolock) + KERNEL_LOCK(); + mtx_enter(&ev->ws_mtx); + active = knote_process(kn, kev); + mtx_leave(&ev->ws_mtx); + if (dolock) + KERNEL_UNLOCK(); + + return (active); + } Index: sys/dev/wscons/wseventvar.h =================================================================== RCS file: /cvs/src/sys/dev/wscons/wseventvar.h,v diff -u -p -r1.13 wseventvar.h --- sys/dev/wscons/wseventvar.h 25 Mar 2024 13:01:49 -0000 1.13 +++ sys/dev/wscons/wseventvar.h 15 Jan 2025 15:45:01 -0000 @@ -71,8 +71,9 @@ * @(#)event_var.h 8.1 (Berkeley) 6/11/93 */ -#include +#include #include +#include /* * Internal "wscons_event" queue interface for the keyboard and mouse drivers. @@ -80,37 +81,68 @@ * i.e., are expected to run off serial ports or similar devices. */ +/* + * Locks used to protect data + * i immutable + * m ws_mtx + */ + +/* + * XXXSMP: Non WSEVENT_MPSAFE wseventvar structures rely on kernel lock + */ + /* WSEVENT_QSIZE should be a power of two so that `%' is fast */ #define WSEVENT_QSIZE 256 /* may need tuning; this uses 2k */ struct wseventvar { - u_int ws_get; /* get (read) index (modified + u_int ws_get; /* [m] get (read) index (modified synchronously) */ - volatile u_int ws_put; /* put (write) index (modified by + volatile u_int ws_put; /* [m] put (write) index (modified by interrupt) */ - struct selinfo ws_sel; /* process selecting */ + int ws_flags; /* [i] flags, see below*/ + struct mutex ws_mtx; + struct klist ws_klist; /* [k] list of knotes */ struct sigio_ref ws_sigio; /* async I/O registration */ - int ws_wanted; /* wake up on input ready */ - int ws_async; /* send SIGIO on input ready */ - struct wscons_event *ws_q; /* circular buffer (queue) of events */ + int ws_wanted; /* [m] wake up on input ready */ + int ws_async; /* [m] send SIGIO on input ready */ + struct wscons_event *ws_q; /* [m] circular buffer (queue) of + events */ }; -#define splwsevent() spltty() +#define WSEVENT_MPSAFE 0x0001 + +static inline void +wsevent_wakeup(struct wseventvar *ev) +{ + int dosigio = 0; + + knote(&ev->ws_klist, 0); + + mtx_enter(&ev->ws_mtx); + if (ev->ws_wanted) { + ev->ws_wanted = 0; + wakeup(ev); + } + if (ev->ws_async) + dosigio = 1; + mtx_leave(&ev->ws_mtx); -#define WSEVENT_WAKEUP(ev) { \ - selwakeup(&(ev)->ws_sel); \ - if ((ev)->ws_wanted) { \ - (ev)->ws_wanted = 0; \ - wakeup((caddr_t)(ev)); \ - } \ - if ((ev)->ws_async) \ - pgsigio(&(ev)->ws_sigio, SIGIO, 0); \ + if (dosigio) + pgsigio(&ev->ws_sigio, SIGIO, 0); } -int wsevent_init(struct wseventvar *); +#define WSEVENT_WAKEUP(ev) do { wsevent_wakeup(ev); } while (0) + +int wsevent_init_flags(struct wseventvar *, int); void wsevent_fini(struct wseventvar *); int wsevent_read(struct wseventvar *, struct uio *, int); int wsevent_kqfilter(struct wseventvar *, struct knote *); + +static inline int +wsevent_init(struct wseventvar *ev) +{ + return wsevent_init_flags(ev, 0); +} /* * PWSEVENT is set just above PSOCK, which is just above TTIPRI, on the Index: sys/dev/wscons/wskbd.c =================================================================== RCS file: /cvs/src/sys/dev/wscons/wskbd.c,v diff -u -p -r1.120 wskbd.c --- sys/dev/wscons/wskbd.c 30 Dec 2024 02:46:00 -0000 1.120 +++ sys/dev/wscons/wskbd.c 15 Jan 2025 15:45:01 -0000 @@ -625,7 +625,6 @@ wskbd_detach(struct device *self, int f struct wskbd_softc *sc = (struct wskbd_softc *)self; struct wseventvar *evar; int maj, mn; - int s; #if NWSMUX > 0 /* Tell parent mux we're leaving. */ @@ -647,18 +646,19 @@ wskbd_detach(struct device *self, int f evar = sc->sc_base.me_evp; if (evar != NULL) { - s = spltty(); if (--sc->sc_refcnt >= 0) { /* Wake everyone by generating a dummy event. */ + + mtx_enter(&evar->ws_mtx); if (++evar->ws_put >= WSEVENT_QSIZE) evar->ws_put = 0; - WSEVENT_WAKEUP(evar); + mtx_leave(&evar->ws_mtx); + wsevent_wakeup(evar); /* Wait for processes to go away. */ if (tsleep_nsec(sc, PZERO, "wskdet", SEC_TO_NSEC(60))) printf("wskbd_detach: %s didn't detach\n", sc->sc_base.me_dv.dv_xname); } - splx(s); } free(sc->sc_map, M_DEVBUF, @@ -763,6 +763,7 @@ wskbd_deliver_event(struct wskbd_softc * } #endif + mtx_enter(&evar->ws_mtx); put = evar->ws_put; ev = &evar->ws_q[put]; put = (put + 1) % WSEVENT_QSIZE; @@ -775,7 +776,8 @@ wskbd_deliver_event(struct wskbd_softc * ev->value = value; nanotime(&ev->time); evar->ws_put = put; - WSEVENT_WAKEUP(evar); + mtx_leave(&evar->ws_mtx); + wsevent_wakeup(evar); } #ifdef WSDISPLAY_COMPAT_RAWKBD @@ -862,7 +864,7 @@ wskbdopen(dev_t dev, int flags, int mode return (EBUSY); evar = &sc->sc_base.me_evar; - if (wsevent_init(evar)) + if (wsevent_init_flags(evar, WSEVENT_MPSAFE)) return (EBUSY); error = wskbd_do_open(sc, evar); @@ -1005,7 +1007,9 @@ wskbd_do_ioctl_sc(struct wskbd_softc *sc case FIOASYNC: if (sc->sc_base.me_evp == NULL) return (EINVAL); + mtx_enter(&sc->sc_base.me_evp->ws_mtx); sc->sc_base.me_evp->ws_async = *(int *)data != 0; + mtx_leave(&sc->sc_base.me_evp->ws_mtx); return (0); case FIOGETOWN: