From: Claudio Jeker Subject: Re: Towards biodone() w/o KERNEL_LOCK() To: dlg@openbsd.org, tech@openbsd.org Date: Sat, 17 May 2025 14:15:35 +0200 On Fri, May 16, 2025 at 05:03:02PM +0200, Martin Pieuchot wrote: > Running the fault handler in parallel shifts the contention to the > KERNEL_LOCK(). That degrades i386's port bulk time but not amd64's. > The huge difference between the two is the use of swap which is under > KERNEL_LOCK(). > > I sent the diff below a couple of years ago to start a discussion to > take biodone() outside of the KERNEL_LOCK(). This might not be critical > for "common" use of I/O but when the system is OOM and swaps a lot it > might become a bottleneck. > > This diff pushes the KERNEL_LOCK() inside the various "xs->done()" > handlers. This lock is currently necessary to serialized updates to > the buffer descriptor `bp'. > > I'd like us to push the KERNEL_LOCK() into biodone() and only take it > conditionally. > > What are your thoughts? Does that sound like a interesting first step? I very much agree that we want to unlock biodone. Since this is currently one major bottle neck in the IO path. I tried to unlock mfs and while I managed to run most of mfs without lock the problem was that biodone would spin on the kernel lock competing with the other side trying to push IO out. On top of this unlocking biodone would allow to start unlocking the buffer cache since then we have at least one path that can run concurrently. > Index: scsi/cd.c > =================================================================== > RCS file: /cvs/src/sys/scsi/cd.c,v > diff -u -p -r1.266 cd.c > --- scsi/cd.c 1 Sep 2022 13:45:27 -0000 1.266 > +++ scsi/cd.c 13 May 2023 11:30:55 -0000 > @@ -609,6 +609,7 @@ cd_buf_done(struct scsi_xfer *xs) > struct buf *bp = xs->cookie; > int error, s; > > + KERNEL_LOCK(); > switch (xs->error) { > case XS_NOERROR: > bp->b_error = 0; > @@ -641,6 +642,7 @@ cd_buf_done(struct scsi_xfer *xs) > retry: > if (xs->retries--) { > scsi_xs_exec(xs); > + KERNEL_UNLOCK(); > return; > } > /* FALLTHROUGH */ > @@ -659,6 +661,7 @@ retry: > biodone(bp); > splx(s); > scsi_xs_put(xs); > + KERNEL_UNLOCK(); > } > > void > Index: scsi/mpath.c > =================================================================== > RCS file: /cvs/src/sys/scsi/mpath.c,v > diff -u -p -r1.58 mpath.c > --- scsi/mpath.c 13 May 2024 01:15:53 -0000 1.58 > +++ scsi/mpath.c 12 Jun 2024 09:38:44 -0000 > @@ -284,6 +284,7 @@ mpath_done(struct scsi_xfer *mxs) > struct mpath_dev *d = sc->sc_devs[link->target]; > struct mpath_path *p; > > + KERNEL_LOCK(); > switch (mxs->error) { > case XS_SELTIMEOUT: /* physical path is gone, try the next */ > case XS_RESET: > @@ -296,6 +297,7 @@ mpath_done(struct scsi_xfer *mxs) > > if (p != NULL) > scsi_xsh_add(&p->p_xsh); > + KERNEL_UNLOCK(); > return; > case XS_SENSE: > switch (d->d_ops->op_checksense(mxs)) { > @@ -308,6 +310,7 @@ mpath_done(struct scsi_xfer *mxs) > scsi_xs_put(mxs); > > mpath_failover(d); > + KERNEL_UNLOCK(); > return; > case MPATH_SENSE_DECLINED: > break; > @@ -328,6 +331,7 @@ mpath_done(struct scsi_xfer *mxs) > scsi_xs_put(mxs); > > scsi_done(xs); > + KERNEL_UNLOCK(); > } > > void > Index: scsi/mpath_emc.c > =================================================================== > RCS file: /cvs/src/sys/scsi/mpath_emc.c,v > diff -u -p -r1.25 mpath_emc.c > --- scsi/mpath_emc.c 2 Jul 2022 08:50:42 -0000 1.25 > +++ scsi/mpath_emc.c 13 May 2023 11:30:55 -0000 > @@ -241,6 +241,7 @@ emc_status_done(struct scsi_xfer *xs) > struct emc_vpd_sp_info *pg = sc->sc_pg; > int status = MPATH_S_UNKNOWN; > > + KERNEL_LOCK(); > if (xs->error == XS_NOERROR) { > status = (pg->lun_state == EMC_SP_INFO_LUN_STATE_OWNED) ? > MPATH_S_ACTIVE : MPATH_S_PASSIVE; > @@ -249,6 +250,7 @@ emc_status_done(struct scsi_xfer *xs) > scsi_xs_put(xs); > > mpath_path_status(&sc->sc_path, status); > + KERNEL_UNLOCK(); > } > > int > Index: scsi/mpath_hds.c > =================================================================== > RCS file: /cvs/src/sys/scsi/mpath_hds.c,v > diff -u -p -r1.26 mpath_hds.c > --- scsi/mpath_hds.c 2 Jul 2022 08:50:42 -0000 1.26 > +++ scsi/mpath_hds.c 13 May 2023 11:30:55 -0000 > @@ -242,6 +242,7 @@ hds_status_done(struct scsi_xfer *xs) > struct hds_vpd *vpd = sc->sc_vpd; > int status = MPATH_S_UNKNOWN; > > + KERNEL_LOCK(); > if (xs->error == XS_NOERROR && > _2btol(vpd->hdr.page_length) >= sizeof(vpd->state) && > ISSET(vpd->state, HDS_VPD_VALID)) { > @@ -252,6 +253,7 @@ hds_status_done(struct scsi_xfer *xs) > scsi_xs_put(xs); > > mpath_path_status(&sc->sc_path, status); > + KERNEL_UNLOCK(); > } > > int > Index: scsi/mpath_rdac.c > =================================================================== > RCS file: /cvs/src/sys/scsi/mpath_rdac.c,v > diff -u -p -r1.27 mpath_rdac.c > --- scsi/mpath_rdac.c 2 Jul 2022 08:50:42 -0000 1.27 > +++ scsi/mpath_rdac.c 13 May 2023 11:30:55 -0000 > @@ -316,6 +316,7 @@ rdac_status_done(struct scsi_xfer *xs) > struct rdac_vpd_volaccessctl *pg = sc->sc_pg; > int status = MPATH_S_UNKNOWN; > > + KERNEL_LOCK(); > if (xs->error == XS_NOERROR && > _4btol(pg->pg_id) == RDAC_VPD_ID_VOLACCESSCTL) { > status = (ISSET(pg->avtcvp, RDAC_VOLACCESSCTL_AVT) || > @@ -325,6 +326,7 @@ rdac_status_done(struct scsi_xfer *xs) > > scsi_xs_put(xs); > mpath_path_status(&sc->sc_path, status); > + KERNEL_UNLOCK(); > } > > int > Index: scsi/scsi_base.c > =================================================================== > RCS file: /cvs/src/sys/scsi/scsi_base.c,v > diff -u -p -r1.284 scsi_base.c > --- scsi/scsi_base.c 4 Sep 2024 07:54:53 -0000 1.284 > +++ scsi/scsi_base.c 28 Sep 2024 12:55:08 -0000 > @@ -1491,9 +1491,7 @@ scsi_done(struct scsi_xfer *xs) > #endif /* SCSIDEBUG */ > > SET(xs->flags, ITSDONE); > - KERNEL_LOCK(); > xs->done(xs); > - KERNEL_UNLOCK(); > } > > int > @@ -1544,11 +1542,13 @@ scsi_xs_sync_done(struct scsi_xfer *xs) > if (cookie == NULL) > panic("scsi_done called twice on xs(%p)", xs); > > + KERNEL_LOCK(); > mtx_enter(cookie); > xs->cookie = NULL; > if (!ISSET(xs->flags, SCSI_NOSLEEP)) > wakeup_one(xs); > mtx_leave(cookie); > + KERNEL_UNLOCK(); > } > > int > Index: scsi/sd.c > =================================================================== > RCS file: /cvs/src/sys/scsi/sd.c,v > diff -u -p -r1.337 sd.c > --- scsi/sd.c 4 Sep 2024 07:54:53 -0000 1.337 > +++ scsi/sd.c 28 Sep 2024 12:55:08 -0000 > @@ -717,6 +717,7 @@ sd_buf_done(struct scsi_xfer *xs) > struct buf *bp = xs->cookie; > int error, s; > > + KERNEL_LOCK(); > switch (xs->error) { > case XS_NOERROR: > bp->b_error = 0; > @@ -752,6 +753,7 @@ sd_buf_done(struct scsi_xfer *xs) > retry: > if (xs->retries--) { > scsi_xs_exec(xs); > + KERNEL_UNLOCK(); > return; > } > /* FALLTHROUGH */ > @@ -771,6 +773,7 @@ retry: > biodone(bp); > splx(s); > scsi_xs_put(xs); > + KERNEL_UNLOCK(); > } > > void > Index: scsi/st.c > ================================================================== > RCS file: /cvs/src/sys/scsi/st.c,v > diff -u -p -r1.191 st.c > --- scsi/st.c 4 Sep 2024 07:54:53 -0000 1.191 > +++ scsi/st.c 28 Sep 2024 12:55:08 -0000 > @@ -965,6 +965,7 @@ st_buf_done(struct scsi_xfer *xs) > struct buf *bp = xs->cookie; > int error, s; > > + KERNEL_LOCK(); > switch (xs->error) { > case XS_NOERROR: > bp->b_error = 0; > @@ -997,6 +998,7 @@ st_buf_done(struct scsi_xfer *xs) > retry: > if (xs->retries--) { > scsi_xs_exec(xs); > + KERNEL_UNLOCK(); > return; > } > /* FALLTHROUGH */ > @@ -1015,6 +1017,7 @@ st_buf_done(struct scsi_xfer *xs) > biodone(bp); > splx(s); > scsi_xs_put(xs); > + KERNEL_UNLOCK(); > } > > void > > -- :wq Claudio