Download raw body.
wskbd(4): mp-safe filterops
On Mon, Jan 20, 2025 at 04:27:39PM +0100, Alexander Bluhm wrote:
> On Wed, Jan 15, 2025 at 06:46:39PM +0300, Vitaliy Makkoveev wrote:
> > I like to resurrect my old wsconsi(4) diff and start pushing it forward
> > to avoid X stuck in kernel lock during polling. I like to start with
> > wskbd(4) to make the diff as small as possible. Note all except wskbd(4)
> > ws*(4) drivers still rely on kernel lock even with mp-safe filterops.
> >
> > So, the `ws_mtx' mutex(9) protects wseventvar structure fields. I used
> > dedicated `wsevent_klistops' to take kernel lock before mutex for
> > the non mp-safe drivers. It will be removed after complete wsevent
> > unlocking.
> >
> > Note, pgsigio() takes kernel lock deep within and can't be called with
> > mutex(9) held, that's why I call it lockless. This the compromise to
> > allow step forward.
> >
> > ok?
>
> Comments inline
>
> > + mtx_init_flags(&ev->ws_mtx, IPL_MPFLOOR, "wsmtx", 0);
>
> I would pefer an IPL_TTY here. Currently they are the same, but
> we want to lock tty interrupts in wscons. For me IPL_MPFLOOR is a
> hack that you have to use for lower levels when going MP.
>
We have a little mess here: IPL_MPFLOOR, IPL_MPSAFE. I agree, IPL_VM is
better for consistency with the rest of wscons.
> > error = tsleep_nsec(ev, PWSEVENT | PCATCH,
>
> You sleep with mutex ws_mtx held. Should be msleep_nsec().
>
Ooops. Fixed.
> You have already dereferenced ev->ws_get. So you could use get.
> if ((ev->ws_get = (get + cnt) % WSEVENT_QSIZE) != 0 ||
>
I left `get' alone because it was introduces to unlocked access, but I
could it instead of ev->ws_get. It saves some space and I could keep
if() check in one line.
> > + n == 0 || error || tcnt == 0)
> > + notwrap = 1;
>
> Not wrapped is true. So wrap is false. Could you call the variable
> wrap and exchange 0, 1, and !
>
I followed the preceding comment, so the "(notwrap || error)" is
positive.
> > /*
> > * If we do not wrap to 0, used up all our space, or had an error,
> > * stop. Otherwise move from front of queue to put index, if there
> > * is anything there to move.
> > */
> > + if (notwrap || error)
What about this logic? I moved '!' to the first "if()" block:
+ ev->ws_get = (get + cnt) % WSEVENT_QSIZE;
+ if (!(ev->ws_get != 0 || n == 0 || tcnt == 0)) {
+ wrap = 1;
+
+ if (tcnt > n)
+ tcnt = n;
+ ev->ws_get = tcnt;
+ }
+ /*
+ * If we do wrap to 0, move from front of queue to put index, if
+ * there is anything there to move.
+ */
+ if (wrap && error == 0) {
+ error = uiomove((caddr_t)&ev->ws_q[0],
+ tcnt * sizeof(struct wscons_event), uio);
+ }
> > +/*
> > + * Locks used to protect data
> > + * i immutable
> > + * m ws_mtx
> > + */
>
> Can we have I for immutable and K for kernel lock? This is more
> consistent.
>
Fixed.
> I would unroll this macro and directly call the function.
>
I intentionally used this wrappers to leave the rest of wscons devices
as is. Anyway I will replace them with the following diffs, but are you
need these hunks right now?
> You remove spl protection from sc_refcnt. Is that MP safe? I think
> the mutex should be around sc_refcnt or sc_refcnt changed to struct
> refcnt.
>
We don't share `sc_refcnt' with interrupts, all increments happens in
thread context. Seems the interrupts were denied mostly to put a dummy
event and protect `ws_put'.
Actually, we don't need any reference counting here. What does they do?
We don't destroy wsevent, so concurrent read() or ioctl() threads are
safe. This detach thread will wait opened devices in the following
vdevgone().
I mostly dislike the wskbd_activate() which lefts sleeping read() thread
as is. It seems this should be wakeup.
wskbd_activate(struct device *self, int act)
{
struct wskbd_softc *sc = (struct wskbd_softc *)self;
if (act == DVACT_DEACTIVATE)
sc->sc_dying = 1;
return (0);
}
Updated diff. It also has fixed white spaces pointed by kirill@
Index: sys/dev/wscons/wsevent.c
===================================================================
RCS file: /cvs/src/sys/dev/wscons/wsevent.c,v
retrieving revision 1.28
diff -u -p -r1.28 wsevent.c
--- sys/dev/wscons/wsevent.c 25 Mar 2024 13:01:49 -0000 1.28
+++ sys/dev/wscons/wsevent.c 20 Jan 2025 18:57:37 -0000
@@ -85,19 +85,61 @@
void filt_wseventdetach(struct knote *);
int filt_wseventread(struct knote *, long);
+int filt_wseventmodify(struct kevent *, struct knote *);
+int filt_wseventprocess(struct knote *, struct kevent *);
+
+static void
+wsevent_klist_assertlk(void *arg)
+{
+ struct wseventvar *ev = arg;
+
+ if((ev->ws_flags & WSEVENT_MPSAFE) == 0)
+ KERNEL_ASSERT_LOCKED();
+ MUTEX_ASSERT_LOCKED(&ev->ws_mtx);
+}
+
+static int
+wsevent_klist_lock(void *arg)
+{
+ struct wseventvar *ev = arg;
+
+ if((ev->ws_flags & WSEVENT_MPSAFE) == 0)
+ KERNEL_LOCK();
+ mtx_enter(&ev->ws_mtx);
+
+ return (0);
+}
+
+static void
+wsevent_klist_unlock(void *arg, int s)
+{
+ struct wseventvar *ev = arg;
+
+ mtx_leave(&ev->ws_mtx);
+ if ((ev->ws_flags & WSEVENT_MPSAFE) == 0)
+ KERNEL_UNLOCK();
+}
+
+static const struct klistops wsevent_klistops = {
+ .klo_assertlk = wsevent_klist_assertlk,
+ .klo_lock = wsevent_klist_lock,
+ .klo_unlock = wsevent_klist_unlock,
+};
const struct filterops wsevent_filtops = {
- .f_flags = FILTEROP_ISFD,
+ .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
.f_attach = NULL,
.f_detach = filt_wseventdetach,
.f_event = filt_wseventread,
+ .f_modify = filt_wseventmodify,
+ .f_process = filt_wseventprocess,
};
/*
* Initialize a wscons_event queue.
*/
int
-wsevent_init(struct wseventvar *ev)
+wsevent_init_flags(struct wseventvar *ev, int flags)
{
struct wscons_event *queue;
@@ -112,6 +154,10 @@ wsevent_init(struct wseventvar *ev)
return (1);
}
+ mtx_init_flags(&ev->ws_mtx, IPL_TTY, "wsmtx", 0);
+ klist_init(&ev->ws_klist, &wsevent_klistops, ev);
+
+ ev->ws_flags = flags;
ev->ws_q = queue;
ev->ws_get = ev->ws_put = 0;
@@ -135,7 +181,7 @@ wsevent_fini(struct wseventvar *ev)
free(ev->ws_q, M_DEVBUF, WSEVENT_QSIZE * sizeof(struct wscons_event));
ev->ws_q = NULL;
- klist_invalidate(&ev->ws_sel.si_note);
+ klist_invalidate(&ev->ws_klist);
sigio_free(&ev->ws_sigio);
}
@@ -147,8 +193,8 @@ wsevent_fini(struct wseventvar *ev)
int
wsevent_read(struct wseventvar *ev, struct uio *uio, int flags)
{
- int s, error;
- u_int cnt;
+ int error, wrap = 0;
+ u_int cnt, tcnt, get;
size_t n;
/*
@@ -156,17 +202,20 @@ wsevent_read(struct wseventvar *ev, stru
*/
if (uio->uio_resid < sizeof(struct wscons_event))
return (EMSGSIZE); /* ??? */
- s = splwsevent();
+ n = howmany(uio->uio_resid, sizeof(struct wscons_event));
+
+ mtx_enter(&ev->ws_mtx);
+
while (ev->ws_get == ev->ws_put) {
if (flags & IO_NDELAY) {
- splx(s);
+ mtx_leave(&ev->ws_mtx);
return (EWOULDBLOCK);
}
ev->ws_wanted = 1;
- error = tsleep_nsec(ev, PWSEVENT | PCATCH,
+ error = msleep_nsec(ev, &ev->ws_mtx, PWSEVENT | PCATCH,
"wsevent_read", INFSLP);
if (error) {
- splx(s);
+ mtx_leave(&ev->ws_mtx);
return (error);
}
}
@@ -178,37 +227,43 @@ wsevent_read(struct wseventvar *ev, stru
cnt = WSEVENT_QSIZE - ev->ws_get; /* events in [get..QSIZE) */
else
cnt = ev->ws_put - ev->ws_get; /* events in [get..put) */
- splx(s);
- n = howmany(uio->uio_resid, sizeof(struct wscons_event));
+
if (cnt > n)
cnt = n;
- error = uiomove((caddr_t)&ev->ws_q[ev->ws_get],
- cnt * sizeof(struct wscons_event), uio);
+
+ get = ev->ws_get;
+ tcnt = ev->ws_put;
n -= cnt;
+
+ ev->ws_get = (get + cnt) % WSEVENT_QSIZE;
+ if (!(ev->ws_get != 0 || n == 0 || tcnt == 0)) {
+ wrap = 1;
+
+ if (tcnt > n)
+ tcnt = n;
+ ev->ws_get = tcnt;
+ }
+
+ mtx_leave(&ev->ws_mtx);
+
+ error = uiomove((caddr_t)&ev->ws_q[get],
+ cnt * sizeof(struct wscons_event), uio);
+
/*
- * If we do not wrap to 0, used up all our space, or had an error,
- * stop. Otherwise move from front of queue to put index, if there
- * is anything there to move.
+ * If we do wrap to 0, move from front of queue to put index, if
+ * there is anything there to move.
*/
- if ((ev->ws_get = (ev->ws_get + cnt) % WSEVENT_QSIZE) != 0 ||
- n == 0 || error || (cnt = ev->ws_put) == 0)
- return (error);
- if (cnt > n)
- cnt = n;
- error = uiomove((caddr_t)&ev->ws_q[0],
- cnt * sizeof(struct wscons_event), uio);
- ev->ws_get = cnt;
+ if (wrap && error == 0) {
+ error = uiomove((caddr_t)&ev->ws_q[0],
+ tcnt * sizeof(struct wscons_event), uio);
+ }
+
return (error);
}
int
wsevent_kqfilter(struct wseventvar *ev, struct knote *kn)
{
- struct klist *klist;
- int s;
-
- klist = &ev->ws_sel.si_note;
-
switch (kn->kn_filter) {
case EVFILT_READ:
kn->kn_fop = &wsevent_filtops;
@@ -218,10 +273,7 @@ wsevent_kqfilter(struct wseventvar *ev,
}
kn->kn_hook = ev;
-
- s = splwsevent();
- klist_insert_locked(klist, kn);
- splx(s);
+ klist_insert(&ev->ws_klist, kn);
return (0);
}
@@ -230,12 +282,8 @@ void
filt_wseventdetach(struct knote *kn)
{
struct wseventvar *ev = kn->kn_hook;
- struct klist *klist = &ev->ws_sel.si_note;
- int s;
- s = splwsevent();
- klist_remove_locked(klist, kn);
- splx(s);
+ klist_remove(&ev->ws_klist, kn);
}
int
@@ -243,6 +291,10 @@ filt_wseventread(struct knote *kn, long
{
struct wseventvar *ev = kn->kn_hook;
+ if((ev->ws_flags & WSEVENT_MPSAFE) == 0)
+ KERNEL_ASSERT_LOCKED();
+ MUTEX_ASSERT_LOCKED(&ev->ws_mtx);
+
if (ev->ws_get == ev->ws_put)
return (0);
@@ -252,4 +304,39 @@ filt_wseventread(struct knote *kn, long
kn->kn_data = (WSEVENT_QSIZE - ev->ws_get) + ev->ws_put;
return (1);
+}
+
+int
+filt_wseventmodify(struct kevent *kev, struct knote *kn)
+{
+ struct wseventvar *ev = kn->kn_hook;
+ int active, dolock = ((ev->ws_flags & WSEVENT_MPSAFE) == 0);
+
+ if (dolock)
+ KERNEL_LOCK();
+ mtx_enter(&ev->ws_mtx);
+ active = knote_modify(kev, kn);
+ mtx_leave(&ev->ws_mtx);
+ if (dolock)
+ KERNEL_UNLOCK();
+
+ return (active);
+}
+
+int
+filt_wseventprocess(struct knote *kn, struct kevent *kev)
+{
+ struct wseventvar *ev = kn->kn_hook;
+ int active, dolock = ((ev->ws_flags & WSEVENT_MPSAFE) == 0);
+
+ if (dolock)
+ KERNEL_LOCK();
+ mtx_enter(&ev->ws_mtx);
+ active = knote_process(kn, kev);
+ mtx_leave(&ev->ws_mtx);
+ if (dolock)
+ KERNEL_UNLOCK();
+
+ return (active);
+
}
Index: sys/dev/wscons/wseventvar.h
===================================================================
RCS file: /cvs/src/sys/dev/wscons/wseventvar.h,v
retrieving revision 1.13
diff -u -p -r1.13 wseventvar.h
--- sys/dev/wscons/wseventvar.h 25 Mar 2024 13:01:49 -0000 1.13
+++ sys/dev/wscons/wseventvar.h 20 Jan 2025 18:57:37 -0000
@@ -71,8 +71,9 @@
* @(#)event_var.h 8.1 (Berkeley) 6/11/93
*/
-#include <sys/selinfo.h>
+#include <sys/event.h>
#include <sys/sigio.h>
+#include <sys/signalvar.h>
/*
* Internal "wscons_event" queue interface for the keyboard and mouse drivers.
@@ -80,37 +81,68 @@
* i.e., are expected to run off serial ports or similar devices.
*/
+/*
+ * Locks used to protect data
+ * I immutable
+ * m ws_mtx
+ */
+
+/*
+ * XXXSMP: Non WSEVENT_MPSAFE wseventvar structures rely on kernel lock
+ */
+
/* WSEVENT_QSIZE should be a power of two so that `%' is fast */
#define WSEVENT_QSIZE 256 /* may need tuning; this uses 2k */
struct wseventvar {
- u_int ws_get; /* get (read) index (modified
+ u_int ws_get; /* [m] get (read) index (modified
synchronously) */
- volatile u_int ws_put; /* put (write) index (modified by
+ volatile u_int ws_put; /* [m] put (write) index (modified by
interrupt) */
- struct selinfo ws_sel; /* process selecting */
+ int ws_flags; /* [i] flags, see below*/
+ struct mutex ws_mtx;
+ struct klist ws_klist; /* [k] list of knotes */
struct sigio_ref ws_sigio; /* async I/O registration */
- int ws_wanted; /* wake up on input ready */
- int ws_async; /* send SIGIO on input ready */
- struct wscons_event *ws_q; /* circular buffer (queue) of events */
+ int ws_wanted; /* [m] wake up on input ready */
+ int ws_async; /* [m] send SIGIO on input ready */
+ struct wscons_event *ws_q; /* [m] circular buffer (queue) of
+ events */
};
-#define splwsevent() spltty()
+#define WSEVENT_MPSAFE 0x0001
+
+static inline void
+wsevent_wakeup(struct wseventvar *ev)
+{
+ int dosigio = 0;
+
+ knote(&ev->ws_klist, 0);
+
+ mtx_enter(&ev->ws_mtx);
+ if (ev->ws_wanted) {
+ ev->ws_wanted = 0;
+ wakeup(ev);
+ }
+ if (ev->ws_async)
+ dosigio = 1;
+ mtx_leave(&ev->ws_mtx);
-#define WSEVENT_WAKEUP(ev) { \
- selwakeup(&(ev)->ws_sel); \
- if ((ev)->ws_wanted) { \
- (ev)->ws_wanted = 0; \
- wakeup((caddr_t)(ev)); \
- } \
- if ((ev)->ws_async) \
- pgsigio(&(ev)->ws_sigio, SIGIO, 0); \
+ if (dosigio)
+ pgsigio(&ev->ws_sigio, SIGIO, 0);
}
-int wsevent_init(struct wseventvar *);
+#define WSEVENT_WAKEUP(ev) do { wsevent_wakeup(ev); } while (0)
+
+int wsevent_init_flags(struct wseventvar *, int);
void wsevent_fini(struct wseventvar *);
int wsevent_read(struct wseventvar *, struct uio *, int);
int wsevent_kqfilter(struct wseventvar *, struct knote *);
+
+static inline int
+wsevent_init(struct wseventvar *ev)
+{
+ return wsevent_init_flags(ev, 0);
+}
/*
* PWSEVENT is set just above PSOCK, which is just above TTIPRI, on the
Index: sys/dev/wscons/wskbd.c
===================================================================
RCS file: /cvs/src/sys/dev/wscons/wskbd.c,v
retrieving revision 1.120
diff -u -p -r1.120 wskbd.c
--- sys/dev/wscons/wskbd.c 30 Dec 2024 02:46:00 -0000 1.120
+++ sys/dev/wscons/wskbd.c 20 Jan 2025 18:57:37 -0000
@@ -625,7 +625,6 @@ wskbd_detach(struct device *self, int f
struct wskbd_softc *sc = (struct wskbd_softc *)self;
struct wseventvar *evar;
int maj, mn;
- int s;
#if NWSMUX > 0
/* Tell parent mux we're leaving. */
@@ -647,18 +646,19 @@ wskbd_detach(struct device *self, int f
evar = sc->sc_base.me_evp;
if (evar != NULL) {
- s = spltty();
if (--sc->sc_refcnt >= 0) {
/* Wake everyone by generating a dummy event. */
+
+ mtx_enter(&evar->ws_mtx);
if (++evar->ws_put >= WSEVENT_QSIZE)
evar->ws_put = 0;
- WSEVENT_WAKEUP(evar);
+ mtx_leave(&evar->ws_mtx);
+ wsevent_wakeup(evar);
/* Wait for processes to go away. */
if (tsleep_nsec(sc, PZERO, "wskdet", SEC_TO_NSEC(60)))
printf("wskbd_detach: %s didn't detach\n",
sc->sc_base.me_dv.dv_xname);
}
- splx(s);
}
free(sc->sc_map, M_DEVBUF,
@@ -763,6 +763,7 @@ wskbd_deliver_event(struct wskbd_softc *
}
#endif
+ mtx_enter(&evar->ws_mtx);
put = evar->ws_put;
ev = &evar->ws_q[put];
put = (put + 1) % WSEVENT_QSIZE;
@@ -775,7 +776,8 @@ wskbd_deliver_event(struct wskbd_softc *
ev->value = value;
nanotime(&ev->time);
evar->ws_put = put;
- WSEVENT_WAKEUP(evar);
+ mtx_leave(&evar->ws_mtx);
+ wsevent_wakeup(evar);
}
#ifdef WSDISPLAY_COMPAT_RAWKBD
@@ -862,7 +864,7 @@ wskbdopen(dev_t dev, int flags, int mode
return (EBUSY);
evar = &sc->sc_base.me_evar;
- if (wsevent_init(evar))
+ if (wsevent_init_flags(evar, WSEVENT_MPSAFE))
return (EBUSY);
error = wskbd_do_open(sc, evar);
@@ -1005,7 +1007,9 @@ wskbd_do_ioctl_sc(struct wskbd_softc *sc
case FIOASYNC:
if (sc->sc_base.me_evp == NULL)
return (EINVAL);
+ mtx_enter(&sc->sc_base.me_evp->ws_mtx);
sc->sc_base.me_evp->ws_async = *(int *)data != 0;
+ mtx_leave(&sc->sc_base.me_evp->ws_mtx);
return (0);
case FIOGETOWN:
wskbd(4): mp-safe filterops