Index | Thread | Search

From:
Kirill A. Korinsky <kirill@korins.ky>
Subject:
Re: wskbd(4): mp-safe filterops
To:
Vitaliy Makkoveev <mvs@openbsd.org>
Cc:
tech@openbsd.org
Date:
Fri, 17 Jan 2025 17:12:25 +0100

Download raw body.

Thread
On Wed, 15 Jan 2025 16:46:39 +0100,
Vitaliy Makkoveev <mvs@openbsd.org> wrote:
>
> I like to resurrect my old wsconsi(4) diff and start pushing it forward
> to avoid X stuck in kernel lock during polling. I like to start with
> wskbd(4) to make the diff as small as possible. Note all except wskbd(4)
> ws*(4) drivers still rely on kernel lock even with mp-safe filterops.
>
> So, the `ws_mtx' mutex(9) protects wseventvar structure fields. I used
> dedicated `wsevent_klistops' to take kernel lock before mutex for
> the non mp-safe drivers. It will be removed after complete wsevent
> unlocking.
>
> Note, pgsigio() takes kernel lock deep within and can't be called with
> mutex(9) held, that's why I call it lockless. This the compromise to
> allow step forward.
>
> ok?
>

FWIW this changes make sense and I've tested on HONOR MagicBook Art 14, no
regression found. So, ok kirill@ with inlined nitpicks.

> Index: sys/dev/wscons/wsevent.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/wscons/wsevent.c,v
> diff -u -p -r1.28 wsevent.c
> --- sys/dev/wscons/wsevent.c	25 Mar 2024 13:01:49 -0000	1.28
> +++ sys/dev/wscons/wsevent.c	15 Jan 2025 15:45:01 -0000
> @@ -85,19 +85,61 @@
>
>  void	filt_wseventdetach(struct knote *);
>  int	filt_wseventread(struct knote *, long);
> +int	filt_wseventmodify(struct kevent *, struct knote *);
> +int	filt_wseventprocess(struct knote *, struct kevent *);
> +
> +static void
> +wsevent_klist_assertlk(void *arg)
> +{
> +	struct wseventvar *ev = arg;
> +
> +	if((ev->ws_flags & WSEVENT_MPSAFE) == 0)
> +		KERNEL_ASSERT_LOCKED();
> +	MUTEX_ASSERT_LOCKED(&ev->ws_mtx);
> +}
> +
> +static int
> +wsevent_klist_lock(void *arg)
> +{
> +	struct wseventvar *ev = arg;
> +

^ here a whitespaced tab

> +	if((ev->ws_flags & WSEVENT_MPSAFE) == 0)
> +		KERNEL_LOCK();
> +	mtx_enter(&ev->ws_mtx);
> +
> +	return (0);
> +}
> +
> +static void
> +wsevent_klist_unlock(void *arg, int s)
> +{
> +	struct wseventvar *ev = arg;
> +
> +	mtx_leave(&ev->ws_mtx);
> +	if ((ev->ws_flags & WSEVENT_MPSAFE) == 0)
> +		KERNEL_UNLOCK();
> +}
> +
> +static const struct klistops wsevent_klistops = {
> +	.klo_assertlk	= wsevent_klist_assertlk,
> +	.klo_lock	= wsevent_klist_lock,
> +	.klo_unlock	= wsevent_klist_unlock,
> +};
>
>  const struct filterops wsevent_filtops = {
> -	.f_flags	= FILTEROP_ISFD,
> +	.f_flags	= FILTEROP_ISFD | FILTEROP_MPSAFE,
>  	.f_attach	= NULL,
>  	.f_detach	= filt_wseventdetach,
>  	.f_event	= filt_wseventread,
> +	.f_modify	= filt_wseventmodify,
> +	.f_process	= filt_wseventprocess,
>  };
>
>  /*
>   * Initialize a wscons_event queue.
>   */
>  int
> -wsevent_init(struct wseventvar *ev)
> +wsevent_init_flags(struct wseventvar *ev, int flags)
>  {
>  	struct wscons_event *queue;
>
> @@ -112,6 +154,10 @@ wsevent_init(struct wseventvar *ev)
>  		return (1);
>  	}
>
> +	mtx_init_flags(&ev->ws_mtx, IPL_MPFLOOR, "wsmtx", 0);
> +	klist_init(&ev->ws_klist, &wsevent_klistops, ev);
> +
> +	ev->ws_flags = flags;
>  	ev->ws_q = queue;
>  	ev->ws_get = ev->ws_put = 0;
>
> @@ -135,7 +181,7 @@ wsevent_fini(struct wseventvar *ev)
>  	free(ev->ws_q, M_DEVBUF, WSEVENT_QSIZE * sizeof(struct wscons_event));
>  	ev->ws_q = NULL;
>
> -	klist_invalidate(&ev->ws_sel.si_note);
> +	klist_invalidate(&ev->ws_klist);
>
>  	sigio_free(&ev->ws_sigio);
>  }
> @@ -147,8 +193,8 @@ wsevent_fini(struct wseventvar *ev)
>  int
>  wsevent_read(struct wseventvar *ev, struct uio *uio, int flags)
>  {
> -	int s, error;
> -	u_int cnt;
> +	int error, notwrap = 0;
> +	u_int cnt, tcnt, get;
>  	size_t n;
>
>  	/*
> @@ -156,17 +202,20 @@ wsevent_read(struct wseventvar *ev, stru
>  	 */
>  	if (uio->uio_resid < sizeof(struct wscons_event))
>  		return (EMSGSIZE);	/* ??? */
> -	s = splwsevent();
> +	n = howmany(uio->uio_resid, sizeof(struct wscons_event));
> +
> +	mtx_enter(&ev->ws_mtx);
> +
>  	while (ev->ws_get == ev->ws_put) {
>  		if (flags & IO_NDELAY) {
> -			splx(s);
> +			mtx_leave(&ev->ws_mtx);
>  			return (EWOULDBLOCK);
>  		}
>  		ev->ws_wanted = 1;
>  		error = tsleep_nsec(ev, PWSEVENT | PCATCH,
>  		    "wsevent_read", INFSLP);
>  		if (error) {
> -			splx(s);
> +			mtx_leave(&ev->ws_mtx);
>  			return (error);
>  		}
>  	}
> @@ -178,37 +227,45 @@ wsevent_read(struct wseventvar *ev, stru
>  		cnt = WSEVENT_QSIZE - ev->ws_get; /* events in [get..QSIZE) */
>  	else
>  		cnt = ev->ws_put - ev->ws_get;    /* events in [get..put) */
> -	splx(s);
> -	n = howmany(uio->uio_resid, sizeof(struct wscons_event));
> +
>  	if (cnt > n)
>  		cnt = n;
> -	error = uiomove((caddr_t)&ev->ws_q[ev->ws_get],
> -	    cnt * sizeof(struct wscons_event), uio);
> +
> +	get = ev->ws_get;
> +	tcnt = ev->ws_put;
>  	n -= cnt;
> +
> +	if ((ev->ws_get = (ev->ws_get + cnt) % WSEVENT_QSIZE) != 0 ||
> +	    n == 0 || error || tcnt == 0)
> +		notwrap = 1;
> +	else {
> +		if (tcnt > n)
> +			tcnt = n;
> +		ev->ws_get = tcnt;
> +	}
> +
> +	mtx_leave(&ev->ws_mtx);
> +
> +	error = uiomove((caddr_t)&ev->ws_q[get],
> +	    cnt * sizeof(struct wscons_event), uio);
> +
>  	/*
>  	 * If we do not wrap to 0, used up all our space, or had an error,
>  	 * stop.  Otherwise move from front of queue to put index, if there
>  	 * is anything there to move.
>  	 */
> -	if ((ev->ws_get = (ev->ws_get + cnt) % WSEVENT_QSIZE) != 0 ||
> -	    n == 0 || error || (cnt = ev->ws_put) == 0)
> +	if (notwrap || error)
>  		return (error);
> -	if (cnt > n)
> -		cnt = n;
> +
>  	error = uiomove((caddr_t)&ev->ws_q[0],
> -	    cnt * sizeof(struct wscons_event), uio);
> -	ev->ws_get = cnt;
> +	    tcnt * sizeof(struct wscons_event), uio);
> +
>  	return (error);
>  }
>
>  int
>  wsevent_kqfilter(struct wseventvar *ev, struct knote *kn)
>  {
> -	struct klist *klist;
> -	int s;
> -
> -	klist = &ev->ws_sel.si_note;
> -
>  	switch (kn->kn_filter) {
>  	case EVFILT_READ:
>  		kn->kn_fop = &wsevent_filtops;
> @@ -218,10 +275,7 @@ wsevent_kqfilter(struct wseventvar *ev,
>  	}
>
>  	kn->kn_hook = ev;
> -
> -	s = splwsevent();
> -	klist_insert_locked(klist, kn);
> -	splx(s);
> +	klist_insert(&ev->ws_klist, kn);
>
>  	return (0);
>  }
> @@ -230,12 +284,8 @@ void
>  filt_wseventdetach(struct knote *kn)
>  {
>  	struct wseventvar *ev = kn->kn_hook;
> -	struct klist *klist = &ev->ws_sel.si_note;
> -	int s;
>
> -	s = splwsevent();
> -	klist_remove_locked(klist, kn);
> -	splx(s);
> +	klist_remove(&ev->ws_klist, kn);
>  }
>
>  int
> @@ -243,13 +293,51 @@ filt_wseventread(struct knote *kn, long
>  {
>  	struct wseventvar *ev = kn->kn_hook;
>
> +	if((ev->ws_flags & WSEVENT_MPSAFE) == 0)
> +		KERNEL_ASSERT_LOCKED();
> +	MUTEX_ASSERT_LOCKED(&ev->ws_mtx);
> +
>  	if (ev->ws_get == ev->ws_put)
>  		return (0);
> -
>  	if (ev->ws_get < ev->ws_put)
>  		kn->kn_data = ev->ws_put - ev->ws_get;
>  	else
>  		kn->kn_data = (WSEVENT_QSIZE - ev->ws_get) + ev->ws_put;
>
>  	return (1);
> +}
> +
> +int
> +filt_wseventmodify(struct kevent *kev, struct knote *kn)
> +{
> +	struct wseventvar *ev = kn->kn_hook;
> +	int active, dolock = ((ev->ws_flags & WSEVENT_MPSAFE) == 0);
> +
> +	if (dolock)
> +		KERNEL_LOCK();
> +	mtx_enter(&ev->ws_mtx);
> +	active = knote_modify(kev, kn);
> +	mtx_leave(&ev->ws_mtx);
> +	if (dolock)
> +		KERNEL_UNLOCK();
> +

^ here another whitespaced tab

> +	return (active);
> +}
> +
> +int
> +filt_wseventprocess(struct knote *kn, struct kevent *kev)
> +{
> +	struct wseventvar *ev = kn->kn_hook;
> +	int active, dolock = ((ev->ws_flags & WSEVENT_MPSAFE) == 0);
> +
> +	if (dolock)
> +		KERNEL_LOCK();
> +	mtx_enter(&ev->ws_mtx);
> +	active = knote_process(kn, kev);
> +	mtx_leave(&ev->ws_mtx);
> +	if (dolock)
> +		KERNEL_UNLOCK();
> +

^ and one more  whitespaced tab

> +	return (active);
> +
>  }
> Index: sys/dev/wscons/wseventvar.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/wscons/wseventvar.h,v
> diff -u -p -r1.13 wseventvar.h
> --- sys/dev/wscons/wseventvar.h	25 Mar 2024 13:01:49 -0000	1.13
> +++ sys/dev/wscons/wseventvar.h	15 Jan 2025 15:45:01 -0000
> @@ -71,8 +71,9 @@
>   *	@(#)event_var.h	8.1 (Berkeley) 6/11/93
>   */
>
> -#include <sys/selinfo.h>
> +#include <sys/event.h>
>  #include <sys/sigio.h>
> +#include <sys/signalvar.h>
>
>  /*
>   * Internal "wscons_event" queue interface for the keyboard and mouse drivers.
> @@ -80,37 +81,68 @@
>   * i.e., are expected to run off serial ports or similar devices.
>   */
>
> +/*
> + * Locks used to protect data
> + *	i	immutable
> + *	m	ws_mtx
> + */
> +
> +/*
> + * XXXSMP: Non WSEVENT_MPSAFE wseventvar structures rely on kernel lock
> + */
> +
>  /* WSEVENT_QSIZE should be a power of two so that `%' is fast */
>  #define	WSEVENT_QSIZE	256	/* may need tuning; this uses 2k */
>
>  struct wseventvar {
> -	u_int	ws_get;			/* get (read) index (modified
> +	u_int	ws_get;			/* [m] get (read) index (modified
>  					    synchronously) */
> -	volatile u_int ws_put;		/* put (write) index (modified by
> +	volatile u_int ws_put;		/* [m] put (write) index (modified by
>  					    interrupt) */
> -	struct selinfo ws_sel;		/* process selecting */
> +	int	ws_flags;		/* [i] flags, see below*/
> +	struct mutex ws_mtx;
> +	struct klist ws_klist;		/* [k] list of knotes */
>  	struct sigio_ref ws_sigio;	/* async I/O registration */
> -	int	ws_wanted;		/* wake up on input ready */
> -	int	ws_async;		/* send SIGIO on input ready */
> -	struct wscons_event *ws_q;	/* circular buffer (queue) of events */
> +	int	ws_wanted;		/* [m] wake up on input ready */
> +	int	ws_async;		/* [m] send SIGIO on input ready */
> +	struct wscons_event *ws_q;	/* [m] circular buffer (queue) of
> +					    events */
>  };
>
> -#define	splwsevent()	spltty()
> +#define WSEVENT_MPSAFE		0x0001
> +
> +static inline void
> +wsevent_wakeup(struct wseventvar *ev)
> +{
> +	int dosigio = 0;
> +
> +	knote(&ev->ws_klist, 0);
> +
> +	mtx_enter(&ev->ws_mtx);
> +	if (ev->ws_wanted) {
> +		ev->ws_wanted = 0;
> +		wakeup(ev);
> +	}
> +	if (ev->ws_async)
> +		dosigio = 1;
> +	mtx_leave(&ev->ws_mtx);
>
> -#define	WSEVENT_WAKEUP(ev) { \
> -	selwakeup(&(ev)->ws_sel); \
> -	if ((ev)->ws_wanted) { \
> -		(ev)->ws_wanted = 0; \
> -		wakeup((caddr_t)(ev)); \
> -	} \
> -	if ((ev)->ws_async) \
> -		pgsigio(&(ev)->ws_sigio, SIGIO, 0); \
> +	if (dosigio)
> +		pgsigio(&ev->ws_sigio, SIGIO, 0);
>  }
>
> -int	wsevent_init(struct wseventvar *);
> +#define	WSEVENT_WAKEUP(ev) do { wsevent_wakeup(ev); } while (0)
> +
> +int	wsevent_init_flags(struct wseventvar *, int);
>  void	wsevent_fini(struct wseventvar *);
>  int	wsevent_read(struct wseventvar *, struct uio *, int);
>  int	wsevent_kqfilter(struct wseventvar *, struct knote *);
> +
> +static inline int
> +wsevent_init(struct wseventvar *ev)
> +{
> +	return wsevent_init_flags(ev, 0);
> +}
>
>  /*
>   * PWSEVENT is set just above PSOCK, which is just above TTIPRI, on the
> Index: sys/dev/wscons/wskbd.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/wscons/wskbd.c,v
> diff -u -p -r1.120 wskbd.c
> --- sys/dev/wscons/wskbd.c	30 Dec 2024 02:46:00 -0000	1.120
> +++ sys/dev/wscons/wskbd.c	15 Jan 2025 15:45:01 -0000
> @@ -625,7 +625,6 @@ wskbd_detach(struct device  *self, int f
>  	struct wskbd_softc *sc = (struct wskbd_softc *)self;
>  	struct wseventvar *evar;
>  	int maj, mn;
> -	int s;
>
>  #if NWSMUX > 0
>  	/* Tell parent mux we're leaving. */
> @@ -647,18 +646,19 @@ wskbd_detach(struct device  *self, int f
>
>  	evar = sc->sc_base.me_evp;
>  	if (evar != NULL) {
> -		s = spltty();
>  		if (--sc->sc_refcnt >= 0) {
>  			/* Wake everyone by generating a dummy event. */
> +
> +			mtx_enter(&evar->ws_mtx);
>  			if (++evar->ws_put >= WSEVENT_QSIZE)
>  				evar->ws_put = 0;
> -			WSEVENT_WAKEUP(evar);
> +			mtx_leave(&evar->ws_mtx);
> +			wsevent_wakeup(evar);
>  			/* Wait for processes to go away. */
>  			if (tsleep_nsec(sc, PZERO, "wskdet", SEC_TO_NSEC(60)))
>  				printf("wskbd_detach: %s didn't detach\n",
>  				       sc->sc_base.me_dv.dv_xname);
>  		}
> -		splx(s);
>  	}
>
>  	free(sc->sc_map, M_DEVBUF,
> @@ -763,6 +763,7 @@ wskbd_deliver_event(struct wskbd_softc *
>  	}
>  #endif
>
> +	mtx_enter(&evar->ws_mtx);
>  	put = evar->ws_put;
>  	ev = &evar->ws_q[put];
>  	put = (put + 1) % WSEVENT_QSIZE;
> @@ -775,7 +776,8 @@ wskbd_deliver_event(struct wskbd_softc *
>  	ev->value = value;
>  	nanotime(&ev->time);
>  	evar->ws_put = put;
> -	WSEVENT_WAKEUP(evar);
> +	mtx_leave(&evar->ws_mtx);
> +	wsevent_wakeup(evar);
>  }
>
>  #ifdef WSDISPLAY_COMPAT_RAWKBD
> @@ -862,7 +864,7 @@ wskbdopen(dev_t dev, int flags, int mode
>  		return (EBUSY);
>
>  	evar = &sc->sc_base.me_evar;
> -	if (wsevent_init(evar))
> +	if (wsevent_init_flags(evar, WSEVENT_MPSAFE))
>  		return (EBUSY);
>
>  	error = wskbd_do_open(sc, evar);
> @@ -1005,7 +1007,9 @@ wskbd_do_ioctl_sc(struct wskbd_softc *sc
>  	case FIOASYNC:
>  		if (sc->sc_base.me_evp == NULL)
>  			return (EINVAL);
> +		mtx_enter(&sc->sc_base.me_evp->ws_mtx);
>  		sc->sc_base.me_evp->ws_async = *(int *)data != 0;
> +		mtx_leave(&sc->sc_base.me_evp->ws_mtx);
>  		return (0);
>
>  	case FIOGETOWN:
>

--
wbr, Kirill