Index | Thread | Search

From:
Vitaliy Makkoveev <mvs@openbsd.org>
Subject:
wskbd(4): mp-safe filterops
To:
tech@openbsd.org
Date:
Wed, 15 Jan 2025 18:46:39 +0300

Download raw body.

Thread
I like to resurrect my old wsconsi(4) diff and start pushing it forward
to avoid X stuck in kernel lock during polling. I like to start with
wskbd(4) to make the diff as small as possible. Note all except wskbd(4)
ws*(4) drivers still rely on kernel lock even with mp-safe filterops.

So, the `ws_mtx' mutex(9) protects wseventvar structure fields. I used
dedicated `wsevent_klistops' to take kernel lock before mutex for
the non mp-safe drivers. It will be removed after complete wsevent
unlocking.

Note, pgsigio() takes kernel lock deep within and can't be called with
mutex(9) held, that's why I call it lockless. This the compromise to
allow step forward.

ok?

Index: sys/dev/wscons/wsevent.c
===================================================================
RCS file: /cvs/src/sys/dev/wscons/wsevent.c,v
diff -u -p -r1.28 wsevent.c
--- sys/dev/wscons/wsevent.c	25 Mar 2024 13:01:49 -0000	1.28
+++ sys/dev/wscons/wsevent.c	15 Jan 2025 15:45:01 -0000
@@ -85,19 +85,61 @@
 
 void	filt_wseventdetach(struct knote *);
 int	filt_wseventread(struct knote *, long);
+int	filt_wseventmodify(struct kevent *, struct knote *);
+int	filt_wseventprocess(struct knote *, struct kevent *);
+
+static void
+wsevent_klist_assertlk(void *arg)
+{
+	struct wseventvar *ev = arg;
+
+	if((ev->ws_flags & WSEVENT_MPSAFE) == 0)
+		KERNEL_ASSERT_LOCKED();
+	MUTEX_ASSERT_LOCKED(&ev->ws_mtx);
+}
+
+static int
+wsevent_klist_lock(void *arg)
+{
+	struct wseventvar *ev = arg;
+	
+	if((ev->ws_flags & WSEVENT_MPSAFE) == 0)
+		KERNEL_LOCK();
+	mtx_enter(&ev->ws_mtx);
+
+	return (0);
+}
+
+static void
+wsevent_klist_unlock(void *arg, int s)
+{
+	struct wseventvar *ev = arg;
+
+	mtx_leave(&ev->ws_mtx);
+	if ((ev->ws_flags & WSEVENT_MPSAFE) == 0)
+		KERNEL_UNLOCK();
+}
+
+static const struct klistops wsevent_klistops = {
+	.klo_assertlk	= wsevent_klist_assertlk,
+	.klo_lock	= wsevent_klist_lock,
+	.klo_unlock	= wsevent_klist_unlock,
+};
 
 const struct filterops wsevent_filtops = {
-	.f_flags	= FILTEROP_ISFD,
+	.f_flags	= FILTEROP_ISFD | FILTEROP_MPSAFE,
 	.f_attach	= NULL,
 	.f_detach	= filt_wseventdetach,
 	.f_event	= filt_wseventread,
+	.f_modify	= filt_wseventmodify,
+	.f_process	= filt_wseventprocess,
 };
 
 /*
  * Initialize a wscons_event queue.
  */
 int
-wsevent_init(struct wseventvar *ev)
+wsevent_init_flags(struct wseventvar *ev, int flags)
 {
 	struct wscons_event *queue;
 
@@ -112,6 +154,10 @@ wsevent_init(struct wseventvar *ev)
 		return (1);
 	}
 
+	mtx_init_flags(&ev->ws_mtx, IPL_MPFLOOR, "wsmtx", 0);
+	klist_init(&ev->ws_klist, &wsevent_klistops, ev);
+
+	ev->ws_flags = flags;
 	ev->ws_q = queue;
 	ev->ws_get = ev->ws_put = 0;
 
@@ -135,7 +181,7 @@ wsevent_fini(struct wseventvar *ev)
 	free(ev->ws_q, M_DEVBUF, WSEVENT_QSIZE * sizeof(struct wscons_event));
 	ev->ws_q = NULL;
 
-	klist_invalidate(&ev->ws_sel.si_note);
+	klist_invalidate(&ev->ws_klist);
 
 	sigio_free(&ev->ws_sigio);
 }
@@ -147,8 +193,8 @@ wsevent_fini(struct wseventvar *ev)
 int
 wsevent_read(struct wseventvar *ev, struct uio *uio, int flags)
 {
-	int s, error;
-	u_int cnt;
+	int error, notwrap = 0;
+	u_int cnt, tcnt, get;
 	size_t n;
 
 	/*
@@ -156,17 +202,20 @@ wsevent_read(struct wseventvar *ev, stru
 	 */
 	if (uio->uio_resid < sizeof(struct wscons_event))
 		return (EMSGSIZE);	/* ??? */
-	s = splwsevent();
+	n = howmany(uio->uio_resid, sizeof(struct wscons_event));
+
+	mtx_enter(&ev->ws_mtx);
+
 	while (ev->ws_get == ev->ws_put) {
 		if (flags & IO_NDELAY) {
-			splx(s);
+			mtx_leave(&ev->ws_mtx);
 			return (EWOULDBLOCK);
 		}
 		ev->ws_wanted = 1;
 		error = tsleep_nsec(ev, PWSEVENT | PCATCH,
 		    "wsevent_read", INFSLP);
 		if (error) {
-			splx(s);
+			mtx_leave(&ev->ws_mtx);
 			return (error);
 		}
 	}
@@ -178,37 +227,45 @@ wsevent_read(struct wseventvar *ev, stru
 		cnt = WSEVENT_QSIZE - ev->ws_get; /* events in [get..QSIZE) */
 	else
 		cnt = ev->ws_put - ev->ws_get;    /* events in [get..put) */
-	splx(s);
-	n = howmany(uio->uio_resid, sizeof(struct wscons_event));
+
 	if (cnt > n)
 		cnt = n;
-	error = uiomove((caddr_t)&ev->ws_q[ev->ws_get],
-	    cnt * sizeof(struct wscons_event), uio);
+
+	get = ev->ws_get;
+	tcnt = ev->ws_put;
 	n -= cnt;
+
+	if ((ev->ws_get = (ev->ws_get + cnt) % WSEVENT_QSIZE) != 0 ||
+	    n == 0 || error || tcnt == 0)
+		notwrap = 1;
+	else {
+		if (tcnt > n)
+			tcnt = n;
+		ev->ws_get = tcnt;
+	}
+
+	mtx_leave(&ev->ws_mtx);
+
+	error = uiomove((caddr_t)&ev->ws_q[get],
+	    cnt * sizeof(struct wscons_event), uio);
+
 	/*
 	 * If we do not wrap to 0, used up all our space, or had an error,
 	 * stop.  Otherwise move from front of queue to put index, if there
 	 * is anything there to move.
 	 */
-	if ((ev->ws_get = (ev->ws_get + cnt) % WSEVENT_QSIZE) != 0 ||
-	    n == 0 || error || (cnt = ev->ws_put) == 0)
+	if (notwrap || error)
 		return (error);
-	if (cnt > n)
-		cnt = n;
+
 	error = uiomove((caddr_t)&ev->ws_q[0],
-	    cnt * sizeof(struct wscons_event), uio);
-	ev->ws_get = cnt;
+	    tcnt * sizeof(struct wscons_event), uio);
+
 	return (error);
 }
 
 int
 wsevent_kqfilter(struct wseventvar *ev, struct knote *kn)
 {
-	struct klist *klist;
-	int s;
-
-	klist = &ev->ws_sel.si_note;
-
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
 		kn->kn_fop = &wsevent_filtops;
@@ -218,10 +275,7 @@ wsevent_kqfilter(struct wseventvar *ev, 
 	}
 
 	kn->kn_hook = ev;
-
-	s = splwsevent();
-	klist_insert_locked(klist, kn);
-	splx(s);
+	klist_insert(&ev->ws_klist, kn);
 
 	return (0);
 }
@@ -230,12 +284,8 @@ void
 filt_wseventdetach(struct knote *kn)
 {
 	struct wseventvar *ev = kn->kn_hook;
-	struct klist *klist = &ev->ws_sel.si_note;
-	int s;
 
-	s = splwsevent();
-	klist_remove_locked(klist, kn);
-	splx(s);
+	klist_remove(&ev->ws_klist, kn);
 }
 
 int
@@ -243,13 +293,51 @@ filt_wseventread(struct knote *kn, long 
 {
 	struct wseventvar *ev = kn->kn_hook;
 
+	if((ev->ws_flags & WSEVENT_MPSAFE) == 0)
+		KERNEL_ASSERT_LOCKED();
+	MUTEX_ASSERT_LOCKED(&ev->ws_mtx);
+
 	if (ev->ws_get == ev->ws_put)
 		return (0);
-
 	if (ev->ws_get < ev->ws_put)
 		kn->kn_data = ev->ws_put - ev->ws_get;
 	else
 		kn->kn_data = (WSEVENT_QSIZE - ev->ws_get) + ev->ws_put;
 
 	return (1);
+}
+
+int
+filt_wseventmodify(struct kevent *kev, struct knote *kn)
+{
+	struct wseventvar *ev = kn->kn_hook;
+	int active, dolock = ((ev->ws_flags & WSEVENT_MPSAFE) == 0);
+
+	if (dolock)
+		KERNEL_LOCK();
+	mtx_enter(&ev->ws_mtx);
+	active = knote_modify(kev, kn);
+	mtx_leave(&ev->ws_mtx);
+	if (dolock)
+		KERNEL_UNLOCK();
+	
+	return (active);
+}
+
+int
+filt_wseventprocess(struct knote *kn, struct kevent *kev)
+{
+	struct wseventvar *ev = kn->kn_hook;
+	int active, dolock = ((ev->ws_flags & WSEVENT_MPSAFE) == 0);
+
+	if (dolock)
+		KERNEL_LOCK();
+	mtx_enter(&ev->ws_mtx);
+	active = knote_process(kn, kev);
+	mtx_leave(&ev->ws_mtx);
+	if (dolock)
+		KERNEL_UNLOCK();
+	
+	return (active);
+	
 }
Index: sys/dev/wscons/wseventvar.h
===================================================================
RCS file: /cvs/src/sys/dev/wscons/wseventvar.h,v
diff -u -p -r1.13 wseventvar.h
--- sys/dev/wscons/wseventvar.h	25 Mar 2024 13:01:49 -0000	1.13
+++ sys/dev/wscons/wseventvar.h	15 Jan 2025 15:45:01 -0000
@@ -71,8 +71,9 @@
  *	@(#)event_var.h	8.1 (Berkeley) 6/11/93
  */
 
-#include <sys/selinfo.h>
+#include <sys/event.h>
 #include <sys/sigio.h>
+#include <sys/signalvar.h>
 
 /*
  * Internal "wscons_event" queue interface for the keyboard and mouse drivers.
@@ -80,37 +81,68 @@
  * i.e., are expected to run off serial ports or similar devices.
  */
 
+/*
+ * Locks used to protect data
+ *	i	immutable
+ *	m	ws_mtx
+ */
+
+/*
+ * XXXSMP: Non WSEVENT_MPSAFE wseventvar structures rely on kernel lock
+ */
+
 /* WSEVENT_QSIZE should be a power of two so that `%' is fast */
 #define	WSEVENT_QSIZE	256	/* may need tuning; this uses 2k */
 
 struct wseventvar {
-	u_int	ws_get;			/* get (read) index (modified
+	u_int	ws_get;			/* [m] get (read) index (modified
 					    synchronously) */
-	volatile u_int ws_put;		/* put (write) index (modified by
+	volatile u_int ws_put;		/* [m] put (write) index (modified by
 					    interrupt) */
-	struct selinfo ws_sel;		/* process selecting */
+	int	ws_flags;		/* [i] flags, see below*/
+	struct mutex ws_mtx;
+	struct klist ws_klist;		/* [k] list of knotes */
 	struct sigio_ref ws_sigio;	/* async I/O registration */
-	int	ws_wanted;		/* wake up on input ready */
-	int	ws_async;		/* send SIGIO on input ready */
-	struct wscons_event *ws_q;	/* circular buffer (queue) of events */
+	int	ws_wanted;		/* [m] wake up on input ready */
+	int	ws_async;		/* [m] send SIGIO on input ready */
+	struct wscons_event *ws_q;	/* [m] circular buffer (queue) of
+					    events */
 };
 
-#define	splwsevent()	spltty()
+#define WSEVENT_MPSAFE		0x0001
+
+static inline void
+wsevent_wakeup(struct wseventvar *ev)
+{
+	int dosigio = 0;
+
+	knote(&ev->ws_klist, 0);
+
+	mtx_enter(&ev->ws_mtx);
+	if (ev->ws_wanted) {
+		ev->ws_wanted = 0;
+		wakeup(ev);
+	}
+	if (ev->ws_async)
+		dosigio = 1;
+	mtx_leave(&ev->ws_mtx);
 
-#define	WSEVENT_WAKEUP(ev) { \
-	selwakeup(&(ev)->ws_sel); \
-	if ((ev)->ws_wanted) { \
-		(ev)->ws_wanted = 0; \
-		wakeup((caddr_t)(ev)); \
-	} \
-	if ((ev)->ws_async) \
-		pgsigio(&(ev)->ws_sigio, SIGIO, 0); \
+	if (dosigio)
+		pgsigio(&ev->ws_sigio, SIGIO, 0);
 }
 
-int	wsevent_init(struct wseventvar *);
+#define	WSEVENT_WAKEUP(ev) do { wsevent_wakeup(ev); } while (0)
+
+int	wsevent_init_flags(struct wseventvar *, int);
 void	wsevent_fini(struct wseventvar *);
 int	wsevent_read(struct wseventvar *, struct uio *, int);
 int	wsevent_kqfilter(struct wseventvar *, struct knote *);
+
+static inline int
+wsevent_init(struct wseventvar *ev)
+{
+	return wsevent_init_flags(ev, 0);
+}
 
 /*
  * PWSEVENT is set just above PSOCK, which is just above TTIPRI, on the
Index: sys/dev/wscons/wskbd.c
===================================================================
RCS file: /cvs/src/sys/dev/wscons/wskbd.c,v
diff -u -p -r1.120 wskbd.c
--- sys/dev/wscons/wskbd.c	30 Dec 2024 02:46:00 -0000	1.120
+++ sys/dev/wscons/wskbd.c	15 Jan 2025 15:45:01 -0000
@@ -625,7 +625,6 @@ wskbd_detach(struct device  *self, int f
 	struct wskbd_softc *sc = (struct wskbd_softc *)self;
 	struct wseventvar *evar;
 	int maj, mn;
-	int s;
 
 #if NWSMUX > 0
 	/* Tell parent mux we're leaving. */
@@ -647,18 +646,19 @@ wskbd_detach(struct device  *self, int f
 
 	evar = sc->sc_base.me_evp;
 	if (evar != NULL) {
-		s = spltty();
 		if (--sc->sc_refcnt >= 0) {
 			/* Wake everyone by generating a dummy event. */
+
+			mtx_enter(&evar->ws_mtx);
 			if (++evar->ws_put >= WSEVENT_QSIZE)
 				evar->ws_put = 0;
-			WSEVENT_WAKEUP(evar);
+			mtx_leave(&evar->ws_mtx);
+			wsevent_wakeup(evar);
 			/* Wait for processes to go away. */
 			if (tsleep_nsec(sc, PZERO, "wskdet", SEC_TO_NSEC(60)))
 				printf("wskbd_detach: %s didn't detach\n",
 				       sc->sc_base.me_dv.dv_xname);
 		}
-		splx(s);
 	}
 
 	free(sc->sc_map, M_DEVBUF,
@@ -763,6 +763,7 @@ wskbd_deliver_event(struct wskbd_softc *
 	}
 #endif
 
+	mtx_enter(&evar->ws_mtx);
 	put = evar->ws_put;
 	ev = &evar->ws_q[put];
 	put = (put + 1) % WSEVENT_QSIZE;
@@ -775,7 +776,8 @@ wskbd_deliver_event(struct wskbd_softc *
 	ev->value = value;
 	nanotime(&ev->time);
 	evar->ws_put = put;
-	WSEVENT_WAKEUP(evar);
+	mtx_leave(&evar->ws_mtx);
+	wsevent_wakeup(evar);
 }
 
 #ifdef WSDISPLAY_COMPAT_RAWKBD
@@ -862,7 +864,7 @@ wskbdopen(dev_t dev, int flags, int mode
 		return (EBUSY);
 
 	evar = &sc->sc_base.me_evar;
-	if (wsevent_init(evar))
+	if (wsevent_init_flags(evar, WSEVENT_MPSAFE))
 		return (EBUSY);
 
 	error = wskbd_do_open(sc, evar);
@@ -1005,7 +1007,9 @@ wskbd_do_ioctl_sc(struct wskbd_softc *sc
 	case FIOASYNC:
 		if (sc->sc_base.me_evp == NULL)
 			return (EINVAL);
+		mtx_enter(&sc->sc_base.me_evp->ws_mtx);
 		sc->sc_base.me_evp->ws_async = *(int *)data != 0;
+		mtx_leave(&sc->sc_base.me_evp->ws_mtx);
 		return (0);
 
 	case FIOGETOWN: