Download raw body.
wskbd(4): mp-safe filterops
I like to resurrect my old wsconsi(4) diff and start pushing it forward
to avoid X stuck in kernel lock during polling. I like to start with
wskbd(4) to make the diff as small as possible. Note all except wskbd(4)
ws*(4) drivers still rely on kernel lock even with mp-safe filterops.
So, the `ws_mtx' mutex(9) protects wseventvar structure fields. I used
dedicated `wsevent_klistops' to take kernel lock before mutex for
the non mp-safe drivers. It will be removed after complete wsevent
unlocking.
Note, pgsigio() takes kernel lock deep within and can't be called with
mutex(9) held, that's why I call it lockless. This the compromise to
allow step forward.
ok?
Index: sys/dev/wscons/wsevent.c
===================================================================
RCS file: /cvs/src/sys/dev/wscons/wsevent.c,v
diff -u -p -r1.28 wsevent.c
--- sys/dev/wscons/wsevent.c 25 Mar 2024 13:01:49 -0000 1.28
+++ sys/dev/wscons/wsevent.c 15 Jan 2025 15:45:01 -0000
@@ -85,19 +85,61 @@
void filt_wseventdetach(struct knote *);
int filt_wseventread(struct knote *, long);
+int filt_wseventmodify(struct kevent *, struct knote *);
+int filt_wseventprocess(struct knote *, struct kevent *);
+
+static void
+wsevent_klist_assertlk(void *arg)
+{
+ struct wseventvar *ev = arg;
+
+ if((ev->ws_flags & WSEVENT_MPSAFE) == 0)
+ KERNEL_ASSERT_LOCKED();
+ MUTEX_ASSERT_LOCKED(&ev->ws_mtx);
+}
+
+static int
+wsevent_klist_lock(void *arg)
+{
+ struct wseventvar *ev = arg;
+
+ if((ev->ws_flags & WSEVENT_MPSAFE) == 0)
+ KERNEL_LOCK();
+ mtx_enter(&ev->ws_mtx);
+
+ return (0);
+}
+
+static void
+wsevent_klist_unlock(void *arg, int s)
+{
+ struct wseventvar *ev = arg;
+
+ mtx_leave(&ev->ws_mtx);
+ if ((ev->ws_flags & WSEVENT_MPSAFE) == 0)
+ KERNEL_UNLOCK();
+}
+
+static const struct klistops wsevent_klistops = {
+ .klo_assertlk = wsevent_klist_assertlk,
+ .klo_lock = wsevent_klist_lock,
+ .klo_unlock = wsevent_klist_unlock,
+};
const struct filterops wsevent_filtops = {
- .f_flags = FILTEROP_ISFD,
+ .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
.f_attach = NULL,
.f_detach = filt_wseventdetach,
.f_event = filt_wseventread,
+ .f_modify = filt_wseventmodify,
+ .f_process = filt_wseventprocess,
};
/*
* Initialize a wscons_event queue.
*/
int
-wsevent_init(struct wseventvar *ev)
+wsevent_init_flags(struct wseventvar *ev, int flags)
{
struct wscons_event *queue;
@@ -112,6 +154,10 @@ wsevent_init(struct wseventvar *ev)
return (1);
}
+ mtx_init_flags(&ev->ws_mtx, IPL_MPFLOOR, "wsmtx", 0);
+ klist_init(&ev->ws_klist, &wsevent_klistops, ev);
+
+ ev->ws_flags = flags;
ev->ws_q = queue;
ev->ws_get = ev->ws_put = 0;
@@ -135,7 +181,7 @@ wsevent_fini(struct wseventvar *ev)
free(ev->ws_q, M_DEVBUF, WSEVENT_QSIZE * sizeof(struct wscons_event));
ev->ws_q = NULL;
- klist_invalidate(&ev->ws_sel.si_note);
+ klist_invalidate(&ev->ws_klist);
sigio_free(&ev->ws_sigio);
}
@@ -147,8 +193,8 @@ wsevent_fini(struct wseventvar *ev)
int
wsevent_read(struct wseventvar *ev, struct uio *uio, int flags)
{
- int s, error;
- u_int cnt;
+ int error, notwrap = 0;
+ u_int cnt, tcnt, get;
size_t n;
/*
@@ -156,17 +202,20 @@ wsevent_read(struct wseventvar *ev, stru
*/
if (uio->uio_resid < sizeof(struct wscons_event))
return (EMSGSIZE); /* ??? */
- s = splwsevent();
+ n = howmany(uio->uio_resid, sizeof(struct wscons_event));
+
+ mtx_enter(&ev->ws_mtx);
+
while (ev->ws_get == ev->ws_put) {
if (flags & IO_NDELAY) {
- splx(s);
+ mtx_leave(&ev->ws_mtx);
return (EWOULDBLOCK);
}
ev->ws_wanted = 1;
error = tsleep_nsec(ev, PWSEVENT | PCATCH,
"wsevent_read", INFSLP);
if (error) {
- splx(s);
+ mtx_leave(&ev->ws_mtx);
return (error);
}
}
@@ -178,37 +227,45 @@ wsevent_read(struct wseventvar *ev, stru
cnt = WSEVENT_QSIZE - ev->ws_get; /* events in [get..QSIZE) */
else
cnt = ev->ws_put - ev->ws_get; /* events in [get..put) */
- splx(s);
- n = howmany(uio->uio_resid, sizeof(struct wscons_event));
+
if (cnt > n)
cnt = n;
- error = uiomove((caddr_t)&ev->ws_q[ev->ws_get],
- cnt * sizeof(struct wscons_event), uio);
+
+ get = ev->ws_get;
+ tcnt = ev->ws_put;
n -= cnt;
+
+ if ((ev->ws_get = (ev->ws_get + cnt) % WSEVENT_QSIZE) != 0 ||
+ n == 0 || error || tcnt == 0)
+ notwrap = 1;
+ else {
+ if (tcnt > n)
+ tcnt = n;
+ ev->ws_get = tcnt;
+ }
+
+ mtx_leave(&ev->ws_mtx);
+
+ error = uiomove((caddr_t)&ev->ws_q[get],
+ cnt * sizeof(struct wscons_event), uio);
+
/*
* If we do not wrap to 0, used up all our space, or had an error,
* stop. Otherwise move from front of queue to put index, if there
* is anything there to move.
*/
- if ((ev->ws_get = (ev->ws_get + cnt) % WSEVENT_QSIZE) != 0 ||
- n == 0 || error || (cnt = ev->ws_put) == 0)
+ if (notwrap || error)
return (error);
- if (cnt > n)
- cnt = n;
+
error = uiomove((caddr_t)&ev->ws_q[0],
- cnt * sizeof(struct wscons_event), uio);
- ev->ws_get = cnt;
+ tcnt * sizeof(struct wscons_event), uio);
+
return (error);
}
int
wsevent_kqfilter(struct wseventvar *ev, struct knote *kn)
{
- struct klist *klist;
- int s;
-
- klist = &ev->ws_sel.si_note;
-
switch (kn->kn_filter) {
case EVFILT_READ:
kn->kn_fop = &wsevent_filtops;
@@ -218,10 +275,7 @@ wsevent_kqfilter(struct wseventvar *ev,
}
kn->kn_hook = ev;
-
- s = splwsevent();
- klist_insert_locked(klist, kn);
- splx(s);
+ klist_insert(&ev->ws_klist, kn);
return (0);
}
@@ -230,12 +284,8 @@ void
filt_wseventdetach(struct knote *kn)
{
struct wseventvar *ev = kn->kn_hook;
- struct klist *klist = &ev->ws_sel.si_note;
- int s;
- s = splwsevent();
- klist_remove_locked(klist, kn);
- splx(s);
+ klist_remove(&ev->ws_klist, kn);
}
int
@@ -243,13 +293,51 @@ filt_wseventread(struct knote *kn, long
{
struct wseventvar *ev = kn->kn_hook;
+ if((ev->ws_flags & WSEVENT_MPSAFE) == 0)
+ KERNEL_ASSERT_LOCKED();
+ MUTEX_ASSERT_LOCKED(&ev->ws_mtx);
+
if (ev->ws_get == ev->ws_put)
return (0);
-
if (ev->ws_get < ev->ws_put)
kn->kn_data = ev->ws_put - ev->ws_get;
else
kn->kn_data = (WSEVENT_QSIZE - ev->ws_get) + ev->ws_put;
return (1);
+}
+
+int
+filt_wseventmodify(struct kevent *kev, struct knote *kn)
+{
+ struct wseventvar *ev = kn->kn_hook;
+ int active, dolock = ((ev->ws_flags & WSEVENT_MPSAFE) == 0);
+
+ if (dolock)
+ KERNEL_LOCK();
+ mtx_enter(&ev->ws_mtx);
+ active = knote_modify(kev, kn);
+ mtx_leave(&ev->ws_mtx);
+ if (dolock)
+ KERNEL_UNLOCK();
+
+ return (active);
+}
+
+int
+filt_wseventprocess(struct knote *kn, struct kevent *kev)
+{
+ struct wseventvar *ev = kn->kn_hook;
+ int active, dolock = ((ev->ws_flags & WSEVENT_MPSAFE) == 0);
+
+ if (dolock)
+ KERNEL_LOCK();
+ mtx_enter(&ev->ws_mtx);
+ active = knote_process(kn, kev);
+ mtx_leave(&ev->ws_mtx);
+ if (dolock)
+ KERNEL_UNLOCK();
+
+ return (active);
+
}
Index: sys/dev/wscons/wseventvar.h
===================================================================
RCS file: /cvs/src/sys/dev/wscons/wseventvar.h,v
diff -u -p -r1.13 wseventvar.h
--- sys/dev/wscons/wseventvar.h 25 Mar 2024 13:01:49 -0000 1.13
+++ sys/dev/wscons/wseventvar.h 15 Jan 2025 15:45:01 -0000
@@ -71,8 +71,9 @@
* @(#)event_var.h 8.1 (Berkeley) 6/11/93
*/
-#include <sys/selinfo.h>
+#include <sys/event.h>
#include <sys/sigio.h>
+#include <sys/signalvar.h>
/*
* Internal "wscons_event" queue interface for the keyboard and mouse drivers.
@@ -80,37 +81,68 @@
* i.e., are expected to run off serial ports or similar devices.
*/
+/*
+ * Locks used to protect data
+ * i immutable
+ * m ws_mtx
+ */
+
+/*
+ * XXXSMP: Non WSEVENT_MPSAFE wseventvar structures rely on kernel lock
+ */
+
/* WSEVENT_QSIZE should be a power of two so that `%' is fast */
#define WSEVENT_QSIZE 256 /* may need tuning; this uses 2k */
struct wseventvar {
- u_int ws_get; /* get (read) index (modified
+ u_int ws_get; /* [m] get (read) index (modified
synchronously) */
- volatile u_int ws_put; /* put (write) index (modified by
+ volatile u_int ws_put; /* [m] put (write) index (modified by
interrupt) */
- struct selinfo ws_sel; /* process selecting */
+ int ws_flags; /* [i] flags, see below*/
+ struct mutex ws_mtx;
+ struct klist ws_klist; /* [k] list of knotes */
struct sigio_ref ws_sigio; /* async I/O registration */
- int ws_wanted; /* wake up on input ready */
- int ws_async; /* send SIGIO on input ready */
- struct wscons_event *ws_q; /* circular buffer (queue) of events */
+ int ws_wanted; /* [m] wake up on input ready */
+ int ws_async; /* [m] send SIGIO on input ready */
+ struct wscons_event *ws_q; /* [m] circular buffer (queue) of
+ events */
};
-#define splwsevent() spltty()
+#define WSEVENT_MPSAFE 0x0001
+
+static inline void
+wsevent_wakeup(struct wseventvar *ev)
+{
+ int dosigio = 0;
+
+ knote(&ev->ws_klist, 0);
+
+ mtx_enter(&ev->ws_mtx);
+ if (ev->ws_wanted) {
+ ev->ws_wanted = 0;
+ wakeup(ev);
+ }
+ if (ev->ws_async)
+ dosigio = 1;
+ mtx_leave(&ev->ws_mtx);
-#define WSEVENT_WAKEUP(ev) { \
- selwakeup(&(ev)->ws_sel); \
- if ((ev)->ws_wanted) { \
- (ev)->ws_wanted = 0; \
- wakeup((caddr_t)(ev)); \
- } \
- if ((ev)->ws_async) \
- pgsigio(&(ev)->ws_sigio, SIGIO, 0); \
+ if (dosigio)
+ pgsigio(&ev->ws_sigio, SIGIO, 0);
}
-int wsevent_init(struct wseventvar *);
+#define WSEVENT_WAKEUP(ev) do { wsevent_wakeup(ev); } while (0)
+
+int wsevent_init_flags(struct wseventvar *, int);
void wsevent_fini(struct wseventvar *);
int wsevent_read(struct wseventvar *, struct uio *, int);
int wsevent_kqfilter(struct wseventvar *, struct knote *);
+
+static inline int
+wsevent_init(struct wseventvar *ev)
+{
+ return wsevent_init_flags(ev, 0);
+}
/*
* PWSEVENT is set just above PSOCK, which is just above TTIPRI, on the
Index: sys/dev/wscons/wskbd.c
===================================================================
RCS file: /cvs/src/sys/dev/wscons/wskbd.c,v
diff -u -p -r1.120 wskbd.c
--- sys/dev/wscons/wskbd.c 30 Dec 2024 02:46:00 -0000 1.120
+++ sys/dev/wscons/wskbd.c 15 Jan 2025 15:45:01 -0000
@@ -625,7 +625,6 @@ wskbd_detach(struct device *self, int f
struct wskbd_softc *sc = (struct wskbd_softc *)self;
struct wseventvar *evar;
int maj, mn;
- int s;
#if NWSMUX > 0
/* Tell parent mux we're leaving. */
@@ -647,18 +646,19 @@ wskbd_detach(struct device *self, int f
evar = sc->sc_base.me_evp;
if (evar != NULL) {
- s = spltty();
if (--sc->sc_refcnt >= 0) {
/* Wake everyone by generating a dummy event. */
+
+ mtx_enter(&evar->ws_mtx);
if (++evar->ws_put >= WSEVENT_QSIZE)
evar->ws_put = 0;
- WSEVENT_WAKEUP(evar);
+ mtx_leave(&evar->ws_mtx);
+ wsevent_wakeup(evar);
/* Wait for processes to go away. */
if (tsleep_nsec(sc, PZERO, "wskdet", SEC_TO_NSEC(60)))
printf("wskbd_detach: %s didn't detach\n",
sc->sc_base.me_dv.dv_xname);
}
- splx(s);
}
free(sc->sc_map, M_DEVBUF,
@@ -763,6 +763,7 @@ wskbd_deliver_event(struct wskbd_softc *
}
#endif
+ mtx_enter(&evar->ws_mtx);
put = evar->ws_put;
ev = &evar->ws_q[put];
put = (put + 1) % WSEVENT_QSIZE;
@@ -775,7 +776,8 @@ wskbd_deliver_event(struct wskbd_softc *
ev->value = value;
nanotime(&ev->time);
evar->ws_put = put;
- WSEVENT_WAKEUP(evar);
+ mtx_leave(&evar->ws_mtx);
+ wsevent_wakeup(evar);
}
#ifdef WSDISPLAY_COMPAT_RAWKBD
@@ -862,7 +864,7 @@ wskbdopen(dev_t dev, int flags, int mode
return (EBUSY);
evar = &sc->sc_base.me_evar;
- if (wsevent_init(evar))
+ if (wsevent_init_flags(evar, WSEVENT_MPSAFE))
return (EBUSY);
error = wskbd_do_open(sc, evar);
@@ -1005,7 +1007,9 @@ wskbd_do_ioctl_sc(struct wskbd_softc *sc
case FIOASYNC:
if (sc->sc_base.me_evp == NULL)
return (EINVAL);
+ mtx_enter(&sc->sc_base.me_evp->ws_mtx);
sc->sc_base.me_evp->ws_async = *(int *)data != 0;
+ mtx_leave(&sc->sc_base.me_evp->ws_mtx);
return (0);
case FIOGETOWN:
wskbd(4): mp-safe filterops