Download raw body.
Towards biodone() w/o KERNEL_LOCK()
Running the fault handler in parallel shifts the contention to the
KERNEL_LOCK(). That degrades i386's port bulk time but not amd64's.
The huge difference between the two is the use of swap which is under
KERNEL_LOCK().
I sent the diff below a couple of years ago to start a discussion to
take biodone() outside of the KERNEL_LOCK(). This might not be critical
for "common" use of I/O but when the system is OOM and swaps a lot it
might become a bottleneck.
This diff pushes the KERNEL_LOCK() inside the various "xs->done()"
handlers. This lock is currently necessary to serialized updates to
the buffer descriptor `bp'.
I'd like us to push the KERNEL_LOCK() into biodone() and only take it
conditionally.
What are your thoughts? Does that sound like a interesting first step?
Index: scsi/cd.c
===================================================================
RCS file: /cvs/src/sys/scsi/cd.c,v
diff -u -p -r1.266 cd.c
--- scsi/cd.c 1 Sep 2022 13:45:27 -0000 1.266
+++ scsi/cd.c 13 May 2023 11:30:55 -0000
@@ -609,6 +609,7 @@ cd_buf_done(struct scsi_xfer *xs)
struct buf *bp = xs->cookie;
int error, s;
+ KERNEL_LOCK();
switch (xs->error) {
case XS_NOERROR:
bp->b_error = 0;
@@ -641,6 +642,7 @@ cd_buf_done(struct scsi_xfer *xs)
retry:
if (xs->retries--) {
scsi_xs_exec(xs);
+ KERNEL_UNLOCK();
return;
}
/* FALLTHROUGH */
@@ -659,6 +661,7 @@ retry:
biodone(bp);
splx(s);
scsi_xs_put(xs);
+ KERNEL_UNLOCK();
}
void
Index: scsi/mpath.c
===================================================================
RCS file: /cvs/src/sys/scsi/mpath.c,v
diff -u -p -r1.58 mpath.c
--- scsi/mpath.c 13 May 2024 01:15:53 -0000 1.58
+++ scsi/mpath.c 12 Jun 2024 09:38:44 -0000
@@ -284,6 +284,7 @@ mpath_done(struct scsi_xfer *mxs)
struct mpath_dev *d = sc->sc_devs[link->target];
struct mpath_path *p;
+ KERNEL_LOCK();
switch (mxs->error) {
case XS_SELTIMEOUT: /* physical path is gone, try the next */
case XS_RESET:
@@ -296,6 +297,7 @@ mpath_done(struct scsi_xfer *mxs)
if (p != NULL)
scsi_xsh_add(&p->p_xsh);
+ KERNEL_UNLOCK();
return;
case XS_SENSE:
switch (d->d_ops->op_checksense(mxs)) {
@@ -308,6 +310,7 @@ mpath_done(struct scsi_xfer *mxs)
scsi_xs_put(mxs);
mpath_failover(d);
+ KERNEL_UNLOCK();
return;
case MPATH_SENSE_DECLINED:
break;
@@ -328,6 +331,7 @@ mpath_done(struct scsi_xfer *mxs)
scsi_xs_put(mxs);
scsi_done(xs);
+ KERNEL_UNLOCK();
}
void
Index: scsi/mpath_emc.c
===================================================================
RCS file: /cvs/src/sys/scsi/mpath_emc.c,v
diff -u -p -r1.25 mpath_emc.c
--- scsi/mpath_emc.c 2 Jul 2022 08:50:42 -0000 1.25
+++ scsi/mpath_emc.c 13 May 2023 11:30:55 -0000
@@ -241,6 +241,7 @@ emc_status_done(struct scsi_xfer *xs)
struct emc_vpd_sp_info *pg = sc->sc_pg;
int status = MPATH_S_UNKNOWN;
+ KERNEL_LOCK();
if (xs->error == XS_NOERROR) {
status = (pg->lun_state == EMC_SP_INFO_LUN_STATE_OWNED) ?
MPATH_S_ACTIVE : MPATH_S_PASSIVE;
@@ -249,6 +250,7 @@ emc_status_done(struct scsi_xfer *xs)
scsi_xs_put(xs);
mpath_path_status(&sc->sc_path, status);
+ KERNEL_UNLOCK();
}
int
Index: scsi/mpath_hds.c
===================================================================
RCS file: /cvs/src/sys/scsi/mpath_hds.c,v
diff -u -p -r1.26 mpath_hds.c
--- scsi/mpath_hds.c 2 Jul 2022 08:50:42 -0000 1.26
+++ scsi/mpath_hds.c 13 May 2023 11:30:55 -0000
@@ -242,6 +242,7 @@ hds_status_done(struct scsi_xfer *xs)
struct hds_vpd *vpd = sc->sc_vpd;
int status = MPATH_S_UNKNOWN;
+ KERNEL_LOCK();
if (xs->error == XS_NOERROR &&
_2btol(vpd->hdr.page_length) >= sizeof(vpd->state) &&
ISSET(vpd->state, HDS_VPD_VALID)) {
@@ -252,6 +253,7 @@ hds_status_done(struct scsi_xfer *xs)
scsi_xs_put(xs);
mpath_path_status(&sc->sc_path, status);
+ KERNEL_UNLOCK();
}
int
Index: scsi/mpath_rdac.c
===================================================================
RCS file: /cvs/src/sys/scsi/mpath_rdac.c,v
diff -u -p -r1.27 mpath_rdac.c
--- scsi/mpath_rdac.c 2 Jul 2022 08:50:42 -0000 1.27
+++ scsi/mpath_rdac.c 13 May 2023 11:30:55 -0000
@@ -316,6 +316,7 @@ rdac_status_done(struct scsi_xfer *xs)
struct rdac_vpd_volaccessctl *pg = sc->sc_pg;
int status = MPATH_S_UNKNOWN;
+ KERNEL_LOCK();
if (xs->error == XS_NOERROR &&
_4btol(pg->pg_id) == RDAC_VPD_ID_VOLACCESSCTL) {
status = (ISSET(pg->avtcvp, RDAC_VOLACCESSCTL_AVT) ||
@@ -325,6 +326,7 @@ rdac_status_done(struct scsi_xfer *xs)
scsi_xs_put(xs);
mpath_path_status(&sc->sc_path, status);
+ KERNEL_UNLOCK();
}
int
Index: scsi/scsi_base.c
===================================================================
RCS file: /cvs/src/sys/scsi/scsi_base.c,v
diff -u -p -r1.284 scsi_base.c
--- scsi/scsi_base.c 4 Sep 2024 07:54:53 -0000 1.284
+++ scsi/scsi_base.c 28 Sep 2024 12:55:08 -0000
@@ -1491,9 +1491,7 @@ scsi_done(struct scsi_xfer *xs)
#endif /* SCSIDEBUG */
SET(xs->flags, ITSDONE);
- KERNEL_LOCK();
xs->done(xs);
- KERNEL_UNLOCK();
}
int
@@ -1544,11 +1542,13 @@ scsi_xs_sync_done(struct scsi_xfer *xs)
if (cookie == NULL)
panic("scsi_done called twice on xs(%p)", xs);
+ KERNEL_LOCK();
mtx_enter(cookie);
xs->cookie = NULL;
if (!ISSET(xs->flags, SCSI_NOSLEEP))
wakeup_one(xs);
mtx_leave(cookie);
+ KERNEL_UNLOCK();
}
int
Index: scsi/sd.c
===================================================================
RCS file: /cvs/src/sys/scsi/sd.c,v
diff -u -p -r1.337 sd.c
--- scsi/sd.c 4 Sep 2024 07:54:53 -0000 1.337
+++ scsi/sd.c 28 Sep 2024 12:55:08 -0000
@@ -717,6 +717,7 @@ sd_buf_done(struct scsi_xfer *xs)
struct buf *bp = xs->cookie;
int error, s;
+ KERNEL_LOCK();
switch (xs->error) {
case XS_NOERROR:
bp->b_error = 0;
@@ -752,6 +753,7 @@ sd_buf_done(struct scsi_xfer *xs)
retry:
if (xs->retries--) {
scsi_xs_exec(xs);
+ KERNEL_UNLOCK();
return;
}
/* FALLTHROUGH */
@@ -771,6 +773,7 @@ retry:
biodone(bp);
splx(s);
scsi_xs_put(xs);
+ KERNEL_UNLOCK();
}
void
Index: scsi/st.c
==================================================================
RCS file: /cvs/src/sys/scsi/st.c,v
diff -u -p -r1.191 st.c
--- scsi/st.c 4 Sep 2024 07:54:53 -0000 1.191
+++ scsi/st.c 28 Sep 2024 12:55:08 -0000
@@ -965,6 +965,7 @@ st_buf_done(struct scsi_xfer *xs)
struct buf *bp = xs->cookie;
int error, s;
+ KERNEL_LOCK();
switch (xs->error) {
case XS_NOERROR:
bp->b_error = 0;
@@ -997,6 +998,7 @@ st_buf_done(struct scsi_xfer *xs)
retry:
if (xs->retries--) {
scsi_xs_exec(xs);
+ KERNEL_UNLOCK();
return;
}
/* FALLTHROUGH */
@@ -1015,6 +1017,7 @@ st_buf_done(struct scsi_xfer *xs)
biodone(bp);
splx(s);
scsi_xs_put(xs);
+ KERNEL_UNLOCK();
}
void
Towards biodone() w/o KERNEL_LOCK()