From: Martin Pieuchot Subject: Towards biodone() w/o KERNEL_LOCK() To: dlg@openbsd.org Cc: tech@openbsd.org Date: Fri, 16 May 2025 17:03:02 +0200 Running the fault handler in parallel shifts the contention to the KERNEL_LOCK(). That degrades i386's port bulk time but not amd64's. The huge difference between the two is the use of swap which is under KERNEL_LOCK(). I sent the diff below a couple of years ago to start a discussion to take biodone() outside of the KERNEL_LOCK(). This might not be critical for "common" use of I/O but when the system is OOM and swaps a lot it might become a bottleneck. This diff pushes the KERNEL_LOCK() inside the various "xs->done()" handlers. This lock is currently necessary to serialized updates to the buffer descriptor `bp'. I'd like us to push the KERNEL_LOCK() into biodone() and only take it conditionally. What are your thoughts? Does that sound like a interesting first step? Index: scsi/cd.c =================================================================== RCS file: /cvs/src/sys/scsi/cd.c,v diff -u -p -r1.266 cd.c --- scsi/cd.c 1 Sep 2022 13:45:27 -0000 1.266 +++ scsi/cd.c 13 May 2023 11:30:55 -0000 @@ -609,6 +609,7 @@ cd_buf_done(struct scsi_xfer *xs) struct buf *bp = xs->cookie; int error, s; + KERNEL_LOCK(); switch (xs->error) { case XS_NOERROR: bp->b_error = 0; @@ -641,6 +642,7 @@ cd_buf_done(struct scsi_xfer *xs) retry: if (xs->retries--) { scsi_xs_exec(xs); + KERNEL_UNLOCK(); return; } /* FALLTHROUGH */ @@ -659,6 +661,7 @@ retry: biodone(bp); splx(s); scsi_xs_put(xs); + KERNEL_UNLOCK(); } void Index: scsi/mpath.c =================================================================== RCS file: /cvs/src/sys/scsi/mpath.c,v diff -u -p -r1.58 mpath.c --- scsi/mpath.c 13 May 2024 01:15:53 -0000 1.58 +++ scsi/mpath.c 12 Jun 2024 09:38:44 -0000 @@ -284,6 +284,7 @@ mpath_done(struct scsi_xfer *mxs) struct mpath_dev *d = sc->sc_devs[link->target]; struct mpath_path *p; + KERNEL_LOCK(); switch (mxs->error) { case XS_SELTIMEOUT: /* physical path is gone, try the next */ case XS_RESET: @@ -296,6 +297,7 @@ mpath_done(struct scsi_xfer *mxs) if (p != NULL) scsi_xsh_add(&p->p_xsh); + KERNEL_UNLOCK(); return; case XS_SENSE: switch (d->d_ops->op_checksense(mxs)) { @@ -308,6 +310,7 @@ mpath_done(struct scsi_xfer *mxs) scsi_xs_put(mxs); mpath_failover(d); + KERNEL_UNLOCK(); return; case MPATH_SENSE_DECLINED: break; @@ -328,6 +331,7 @@ mpath_done(struct scsi_xfer *mxs) scsi_xs_put(mxs); scsi_done(xs); + KERNEL_UNLOCK(); } void Index: scsi/mpath_emc.c =================================================================== RCS file: /cvs/src/sys/scsi/mpath_emc.c,v diff -u -p -r1.25 mpath_emc.c --- scsi/mpath_emc.c 2 Jul 2022 08:50:42 -0000 1.25 +++ scsi/mpath_emc.c 13 May 2023 11:30:55 -0000 @@ -241,6 +241,7 @@ emc_status_done(struct scsi_xfer *xs) struct emc_vpd_sp_info *pg = sc->sc_pg; int status = MPATH_S_UNKNOWN; + KERNEL_LOCK(); if (xs->error == XS_NOERROR) { status = (pg->lun_state == EMC_SP_INFO_LUN_STATE_OWNED) ? MPATH_S_ACTIVE : MPATH_S_PASSIVE; @@ -249,6 +250,7 @@ emc_status_done(struct scsi_xfer *xs) scsi_xs_put(xs); mpath_path_status(&sc->sc_path, status); + KERNEL_UNLOCK(); } int Index: scsi/mpath_hds.c =================================================================== RCS file: /cvs/src/sys/scsi/mpath_hds.c,v diff -u -p -r1.26 mpath_hds.c --- scsi/mpath_hds.c 2 Jul 2022 08:50:42 -0000 1.26 +++ scsi/mpath_hds.c 13 May 2023 11:30:55 -0000 @@ -242,6 +242,7 @@ hds_status_done(struct scsi_xfer *xs) struct hds_vpd *vpd = sc->sc_vpd; int status = MPATH_S_UNKNOWN; + KERNEL_LOCK(); if (xs->error == XS_NOERROR && _2btol(vpd->hdr.page_length) >= sizeof(vpd->state) && ISSET(vpd->state, HDS_VPD_VALID)) { @@ -252,6 +253,7 @@ hds_status_done(struct scsi_xfer *xs) scsi_xs_put(xs); mpath_path_status(&sc->sc_path, status); + KERNEL_UNLOCK(); } int Index: scsi/mpath_rdac.c =================================================================== RCS file: /cvs/src/sys/scsi/mpath_rdac.c,v diff -u -p -r1.27 mpath_rdac.c --- scsi/mpath_rdac.c 2 Jul 2022 08:50:42 -0000 1.27 +++ scsi/mpath_rdac.c 13 May 2023 11:30:55 -0000 @@ -316,6 +316,7 @@ rdac_status_done(struct scsi_xfer *xs) struct rdac_vpd_volaccessctl *pg = sc->sc_pg; int status = MPATH_S_UNKNOWN; + KERNEL_LOCK(); if (xs->error == XS_NOERROR && _4btol(pg->pg_id) == RDAC_VPD_ID_VOLACCESSCTL) { status = (ISSET(pg->avtcvp, RDAC_VOLACCESSCTL_AVT) || @@ -325,6 +326,7 @@ rdac_status_done(struct scsi_xfer *xs) scsi_xs_put(xs); mpath_path_status(&sc->sc_path, status); + KERNEL_UNLOCK(); } int Index: scsi/scsi_base.c =================================================================== RCS file: /cvs/src/sys/scsi/scsi_base.c,v diff -u -p -r1.284 scsi_base.c --- scsi/scsi_base.c 4 Sep 2024 07:54:53 -0000 1.284 +++ scsi/scsi_base.c 28 Sep 2024 12:55:08 -0000 @@ -1491,9 +1491,7 @@ scsi_done(struct scsi_xfer *xs) #endif /* SCSIDEBUG */ SET(xs->flags, ITSDONE); - KERNEL_LOCK(); xs->done(xs); - KERNEL_UNLOCK(); } int @@ -1544,11 +1542,13 @@ scsi_xs_sync_done(struct scsi_xfer *xs) if (cookie == NULL) panic("scsi_done called twice on xs(%p)", xs); + KERNEL_LOCK(); mtx_enter(cookie); xs->cookie = NULL; if (!ISSET(xs->flags, SCSI_NOSLEEP)) wakeup_one(xs); mtx_leave(cookie); + KERNEL_UNLOCK(); } int Index: scsi/sd.c =================================================================== RCS file: /cvs/src/sys/scsi/sd.c,v diff -u -p -r1.337 sd.c --- scsi/sd.c 4 Sep 2024 07:54:53 -0000 1.337 +++ scsi/sd.c 28 Sep 2024 12:55:08 -0000 @@ -717,6 +717,7 @@ sd_buf_done(struct scsi_xfer *xs) struct buf *bp = xs->cookie; int error, s; + KERNEL_LOCK(); switch (xs->error) { case XS_NOERROR: bp->b_error = 0; @@ -752,6 +753,7 @@ sd_buf_done(struct scsi_xfer *xs) retry: if (xs->retries--) { scsi_xs_exec(xs); + KERNEL_UNLOCK(); return; } /* FALLTHROUGH */ @@ -771,6 +773,7 @@ retry: biodone(bp); splx(s); scsi_xs_put(xs); + KERNEL_UNLOCK(); } void Index: scsi/st.c ================================================================== RCS file: /cvs/src/sys/scsi/st.c,v diff -u -p -r1.191 st.c --- scsi/st.c 4 Sep 2024 07:54:53 -0000 1.191 +++ scsi/st.c 28 Sep 2024 12:55:08 -0000 @@ -965,6 +965,7 @@ st_buf_done(struct scsi_xfer *xs) struct buf *bp = xs->cookie; int error, s; + KERNEL_LOCK(); switch (xs->error) { case XS_NOERROR: bp->b_error = 0; @@ -997,6 +998,7 @@ st_buf_done(struct scsi_xfer *xs) retry: if (xs->retries--) { scsi_xs_exec(xs); + KERNEL_UNLOCK(); return; } /* FALLTHROUGH */ @@ -1015,6 +1017,7 @@ st_buf_done(struct scsi_xfer *xs) biodone(bp); splx(s); scsi_xs_put(xs); + KERNEL_UNLOCK(); } void