Download raw body.
Turn sblock() to `sb_lock' rwlock(9) wrapper for all sockets
Turn sblock() to `sb_lock' rwlock(9) wrapper for all sockets
On Sat, May 04, 2024 at 02:57:00AM +0300, Vitaliy Makkoveev wrote:
> sblock() is fine-grained lock which serializes socket send and receive
> routines on `so_rcv' or `so_snd' buffers. There is no big problem to
> wait netlock while holding sblock().
>
> So unify sblock() behaviour to all sockets. Now it should be always
> taken before solock() in all involved paths as sosend(), soreceive(),
> sorflush() and sosplice() as already done for "unlocked" socket buffers.
>
> This unification removes a lot of temporary "sb_flags & SB_MTXLOCK" code
> from sockets layer. This unification makes straight "solock()" and
> "sblock()" lock order, no more solock() -> sblock() -> sounlock() ->
> solock() -> sbunlock() -> sounlock() chains in sosend() and soreceive()
> paths. This unification brings witness(4) support for sblock(), include
> NFS involved sockets, which is useful.
>
> Since the witness(4) support was introduced to sblock() with this diff,
> some new witness reports appeared. That's normal, it's the first time
> they were exposed. In addition to [1] "lock order reversal in soreceive
> and NFS" thread, I could say that `so_snd' also produces witness
> reports during regress/sys/ffs/nfs run.
>
Updated against last tree changes. In addition, I removed unused `so'
argument from sblock()/sbunlock().
Index: sys/kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
diff -u -p -r1.333 uipc_socket.c
--- sys/kern/uipc_socket.c 3 May 2024 17:43:09 -0000 1.333
+++ sys/kern/uipc_socket.c 7 May 2024 19:27:18 -0000
@@ -66,7 +66,6 @@ void soreaper(void *);
void soput(void *);
int somove(struct socket *, int);
void sorflush(struct socket *);
-void sorflush_locked(struct socket *);
void filt_sordetach(struct knote *kn);
int filt_soread(struct knote *kn, long hint);
@@ -606,11 +605,11 @@ sosend(struct socket *so, struct mbuf *a
#define snderr(errno) { error = errno; goto release; }
- if (dosolock)
- solock_shared(so);
restart:
- if ((error = sblock(so, &so->so_snd, SBLOCKWAIT(flags))) != 0)
+ if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
goto out;
+ if (dosolock)
+ solock_shared(so);
sb_mtx_lock(&so->so_snd);
so->so_snd.sb_state |= SS_ISSENDING;
do {
@@ -643,15 +642,12 @@ restart:
(atomic || space < so->so_snd.sb_lowat))) {
if (flags & MSG_DONTWAIT)
snderr(EWOULDBLOCK);
- sbunlock(so, &so->so_snd);
-
- if (so->so_snd.sb_flags & SB_MTXLOCK)
- error = sbwait_locked(so, &so->so_snd);
- else
- error = sbwait(so, &so->so_snd);
-
+ sbunlock(&so->so_snd);
+ error = sbwait(so, &so->so_snd);
so->so_snd.sb_state &= ~SS_ISSENDING;
sb_mtx_unlock(&so->so_snd);
+ if (dosolock)
+ sounlock_shared(so);
if (error)
goto out;
goto restart;
@@ -705,10 +701,10 @@ restart:
release:
so->so_snd.sb_state &= ~SS_ISSENDING;
sb_mtx_unlock(&so->so_snd);
- sbunlock(so, &so->so_snd);
-out:
if (dosolock)
sounlock_shared(so);
+ sbunlock(&so->so_snd);
+out:
m_freem(top);
m_freem(control);
return (error);
@@ -875,11 +871,11 @@ bad:
if (mp)
*mp = NULL;
+restart:
+ if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
+ return (error);
if (dosolock)
solock_shared(so);
-restart:
- if ((error = sblock(so, &so->so_rcv, SBLOCKWAIT(flags))) != 0)
- goto out;
sb_mtx_lock(&so->so_rcv);
m = so->so_rcv.sb_mb;
@@ -944,25 +940,13 @@ restart:
SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
- if (so->so_rcv.sb_flags & SB_MTXLOCK) {
- sbunlock_locked(so, &so->so_rcv);
- if (dosolock)
- sounlock_shared(so);
- error = sbwait_locked(so, &so->so_rcv);
- sb_mtx_unlock(&so->so_rcv);
- if (error)
- return (error);
- if (dosolock)
- solock_shared(so);
- } else {
- sb_mtx_unlock(&so->so_rcv);
- sbunlock(so, &so->so_rcv);
- error = sbwait(so, &so->so_rcv);
- if (error) {
- sounlock_shared(so);
- return (error);
- }
- }
+ sbunlock(&so->so_rcv);
+ error = sbwait(so, &so->so_rcv);
+ sb_mtx_unlock(&so->so_rcv);
+ if (dosolock)
+ sounlock_shared(so);
+ if (error)
+ return (error);
goto restart;
}
dontblock:
@@ -1202,21 +1186,12 @@ dontblock:
break;
SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2");
SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2");
- if (dosolock) {
+ if (sbwait(so, &so->so_rcv)) {
sb_mtx_unlock(&so->so_rcv);
- error = sbwait(so, &so->so_rcv);
- if (error) {
- sbunlock(so, &so->so_rcv);
+ if (dosolock)
sounlock_shared(so);
- return (0);
- }
- sb_mtx_lock(&so->so_rcv);
- } else {
- if (sbwait_locked(so, &so->so_rcv)) {
- sb_mtx_unlock(&so->so_rcv);
- sbunlock(so, &so->so_rcv);
- return (0);
- }
+ sbunlock(&so->so_rcv);
+ return (0);
}
if ((m = so->so_rcv.sb_mb) != NULL)
nextrecord = m->m_nextpkt;
@@ -1258,7 +1233,7 @@ dontblock:
(flags & MSG_EOR) == 0 &&
(so->so_rcv.sb_state & SS_CANTRCVMORE) == 0) {
sb_mtx_unlock(&so->so_rcv);
- sbunlock(so, &so->so_rcv);
+ sbunlock(&so->so_rcv);
goto restart;
}
@@ -1269,10 +1244,9 @@ dontblock:
*flagsp |= flags;
release:
sb_mtx_unlock(&so->so_rcv);
- sbunlock(so, &so->so_rcv);
-out:
if (dosolock)
sounlock_shared(so);
+ sbunlock(&so->so_rcv);
return (error);
}
@@ -1302,48 +1276,33 @@ soshutdown(struct socket *so, int how)
}
void
-sorflush_locked(struct socket *so)
+sorflush(struct socket *so)
{
struct sockbuf *sb = &so->so_rcv;
struct mbuf *m;
const struct protosw *pr = so->so_proto;
int error;
- if ((sb->sb_flags & SB_MTXLOCK) == 0)
- soassertlocked(so);
-
- error = sblock(so, sb, SBL_WAIT | SBL_NOINTR);
+ error = sblock(sb, SBL_WAIT | SBL_NOINTR);
/* with SBL_WAIT and SLB_NOINTR sblock() must not fail */
KASSERT(error == 0);
- if (sb->sb_flags & SB_MTXLOCK)
- solock(so);
+ solock_shared(so);
socantrcvmore(so);
- if (sb->sb_flags & SB_MTXLOCK)
- sounlock(so);
-
mtx_enter(&sb->sb_mtx);
m = sb->sb_mb;
memset(&sb->sb_startzero, 0,
(caddr_t)&sb->sb_endzero - (caddr_t)&sb->sb_startzero);
sb->sb_timeo_nsecs = INFSLP;
mtx_leave(&sb->sb_mtx);
- sbunlock(so, sb);
+ sounlock_shared(so);
+ sbunlock(sb);
+
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
(*pr->pr_domain->dom_dispose)(m);
m_purge(m);
}
-void
-sorflush(struct socket *so)
-{
- if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0)
- solock_shared(so);
- sorflush_locked(so);
- if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0)
- sounlock_shared(so);
-}
-
#ifdef SOCKET_SPLICE
#define so_splicelen so_sp->ssp_len
@@ -1355,7 +1314,7 @@ sorflush(struct socket *so)
int
sosplice(struct socket *so, int fd, off_t max, struct timeval *tv)
{
- struct file *fp = NULL;
+ struct file *fp;
struct socket *sosp;
struct taskq *tq;
int error = 0;
@@ -1367,6 +1326,29 @@ sosplice(struct socket *so, int fd, off_
if (tv && (tv->tv_sec < 0 || !timerisvalid(tv)))
return (EINVAL);
+ /* If no fd is given, unsplice by removing existing link. */
+ if (fd < 0) {
+ if ((error = sblock(&so->so_rcv, SBL_WAIT)) != 0)
+ return (error);
+ solock(so);
+ if (so->so_options & SO_ACCEPTCONN) {
+ error = EOPNOTSUPP;
+ goto out;
+ }
+ if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
+ (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
+ error = ENOTCONN;
+ goto out;
+ }
+
+ if (so->so_sp && so->so_sp->ssp_socket)
+ sounsplice(so, so->so_sp->ssp_socket, 0);
+ out:
+ sounlock(so);
+ sbunlock(&so->so_rcv);
+ return (error);
+ }
+
if (sosplice_taskq == NULL) {
rw_enter_write(&sosplice_lock);
if (sosplice_taskq == NULL) {
@@ -1386,65 +1368,47 @@ sosplice(struct socket *so, int fd, off_
membar_consumer();
}
- if (so->so_rcv.sb_flags & SB_MTXLOCK) {
- if ((error = sblock(so, &so->so_rcv, SBL_WAIT)) != 0)
- return (error);
- solock(so);
- } else {
- solock(so);
- if ((error = sblock(so, &so->so_rcv, SBL_WAIT)) != 0) {
- sounlock(so);
- return (error);
- }
- }
-
- if (so->so_options & SO_ACCEPTCONN) {
- error = EOPNOTSUPP;
- goto out;
- }
- if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
- (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
- error = ENOTCONN;
- goto out;
- }
- if (so->so_sp == NULL)
- so->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO);
-
- /* If no fd is given, unsplice by removing existing link. */
- if (fd < 0) {
- if (so->so_sp->ssp_socket)
- sounsplice(so, so->so_sp->ssp_socket, 0);
- goto out;
- }
-
/* Find sosp, the drain socket where data will be spliced into. */
if ((error = getsock(curproc, fd, &fp)) != 0)
- goto out;
+ return (error);
sosp = fp->f_data;
+
if (sosp->so_proto->pr_usrreqs->pru_send !=
so->so_proto->pr_usrreqs->pru_send) {
error = EPROTONOSUPPORT;
- goto out;
+ goto frele;
}
- if (sosp->so_sp == NULL)
- sosp->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO);
- if ((error = sblock(sosp, &sosp->so_snd, SBL_WAIT)) != 0) {
- goto out;
+ if ((error = sblock(&so->so_rcv, SBL_WAIT)) != 0)
+ goto frele;
+ if ((error = sblock(&sosp->so_snd, SBL_WAIT)) != 0) {
+ sbunlock(&so->so_rcv);
+ goto frele;
}
+ solock(so);
- if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) {
- error = EBUSY;
+ if ((so->so_options & SO_ACCEPTCONN) ||
+ (sosp->so_options & SO_ACCEPTCONN)) {
+ error = EOPNOTSUPP;
goto release;
}
- if (sosp->so_options & SO_ACCEPTCONN) {
- error = EOPNOTSUPP;
+ if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
+ (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
+ error = ENOTCONN;
goto release;
}
if ((sosp->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0) {
error = ENOTCONN;
goto release;
}
+ if (so->so_sp == NULL)
+ so->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO);
+ if (sosp->so_sp == NULL)
+ sosp->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO);
+ if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) {
+ error = EBUSY;
+ goto release;
+ }
/* Splice so and sosp together. */
mtx_enter(&so->so_rcv.sb_mtx);
@@ -1472,18 +1436,11 @@ sosplice(struct socket *so, int fd, off_
}
release:
- sbunlock(sosp, &sosp->so_snd);
- out:
- if (so->so_rcv.sb_flags & SB_MTXLOCK) {
- sounlock(so);
- sbunlock(so, &so->so_rcv);
- } else {
- sbunlock(so, &so->so_rcv);
- sounlock(so);
- }
-
- if (fp)
- FRELE(fp, curproc);
+ sounlock(so);
+ sbunlock(&sosp->so_snd);
+ sbunlock(&so->so_rcv);
+ frele:
+ FRELE(fp, curproc);
return (error);
}
Index: sys/kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
diff -u -p -r1.154 uipc_socket2.c
--- sys/kern/uipc_socket2.c 7 May 2024 15:54:23 -0000 1.154
+++ sys/kern/uipc_socket2.c 7 May 2024 19:27:19 -0000
@@ -512,23 +512,19 @@ sbmtxassertlocked(struct socket *so, str
* Wait for data to arrive at/drain from a socket buffer.
*/
int
-sbwait_locked(struct socket *so, struct sockbuf *sb)
-{
- int prio = (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH;
-
- MUTEX_ASSERT_LOCKED(&sb->sb_mtx);
-
- sb->sb_flags |= SB_WAIT;
- return msleep_nsec(&sb->sb_cc, &sb->sb_mtx, prio, "sbwait",
- sb->sb_timeo_nsecs);
-}
-
-int
sbwait(struct socket *so, struct sockbuf *sb)
{
uint64_t timeo_nsecs;
int prio = (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH;
+ if (sb->sb_flags & SB_MTXLOCK) {
+ MUTEX_ASSERT_LOCKED(&sb->sb_mtx);
+
+ sb->sb_flags |= SB_WAIT;
+ return msleep_nsec(&sb->sb_cc, &sb->sb_mtx, prio, "sbwait",
+ sb->sb_timeo_nsecs);
+ }
+
soassertlocked(so);
mtx_enter(&sb->sb_mtx);
@@ -540,81 +536,26 @@ sbwait(struct socket *so, struct sockbuf
}
int
-sblock(struct socket *so, struct sockbuf *sb, int flags)
+sblock(struct sockbuf *sb, int flags)
{
- int error = 0, prio = PSOCK;
-
- if (sb->sb_flags & SB_MTXLOCK) {
- int rwflags = RW_WRITE;
-
- if (!(flags & SBL_NOINTR || sb->sb_flags & SB_NOINTR))
- rwflags |= RW_INTR;
- if (!(flags & SBL_WAIT))
- rwflags |= RW_NOSLEEP;
-
- error = rw_enter(&sb->sb_lock, rwflags);
- if (error == EBUSY)
- error = EWOULDBLOCK;
- return error;
- }
+ int rwflags = RW_WRITE, error;
- soassertlocked(so);
-
- mtx_enter(&sb->sb_mtx);
- if ((sb->sb_flags & SB_LOCK) == 0) {
- sb->sb_flags |= SB_LOCK;
- goto out;
- }
- if ((flags & SBL_WAIT) == 0) {
- error = EWOULDBLOCK;
- goto out;
- }
if (!(flags & SBL_NOINTR || sb->sb_flags & SB_NOINTR))
- prio |= PCATCH;
-
- while (sb->sb_flags & SB_LOCK) {
- sb->sb_flags |= SB_WANT;
- mtx_leave(&sb->sb_mtx);
- error = sosleep_nsec(so, &sb->sb_flags, prio, "sblock", INFSLP);
- if (error)
- return (error);
- mtx_enter(&sb->sb_mtx);
- }
- sb->sb_flags |= SB_LOCK;
-out:
- mtx_leave(&sb->sb_mtx);
-
- return (error);
-}
-
-void
-sbunlock_locked(struct socket *so, struct sockbuf *sb)
-{
- if (sb->sb_flags & SB_MTXLOCK) {
- rw_exit(&sb->sb_lock);
- return;
- }
+ rwflags |= RW_INTR;
+ if (!(flags & SBL_WAIT))
+ rwflags |= RW_NOSLEEP;
- MUTEX_ASSERT_LOCKED(&sb->sb_mtx);
+ error = rw_enter(&sb->sb_lock, rwflags);
+ if (error == EBUSY)
+ error = EWOULDBLOCK;
- sb->sb_flags &= ~SB_LOCK;
- if (sb->sb_flags & SB_WANT) {
- sb->sb_flags &= ~SB_WANT;
- wakeup(&sb->sb_flags);
- }
+ return error;
}
void
-sbunlock(struct socket *so, struct sockbuf *sb)
+sbunlock(struct sockbuf *sb)
{
- if (sb->sb_flags & SB_MTXLOCK) {
- rw_exit(&sb->sb_lock);
- return;
- }
-
- mtx_enter(&sb->sb_mtx);
- sbunlock_locked(so, sb);
- mtx_leave(&sb->sb_mtx);
+ rw_exit(&sb->sb_lock);
}
/*
@@ -1128,7 +1069,7 @@ void
sbflush(struct socket *so, struct sockbuf *sb)
{
KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
- KASSERT((sb->sb_flags & SB_LOCK) == 0);
+ rw_assert_unlocked(&sb->sb_lock);
while (sb->sb_mbcnt)
sbdrop(so, sb, (int)sb->sb_cc);
Index: sys/sys/socketvar.h
===================================================================
RCS file: /cvs/src/sys/sys/socketvar.h,v
diff -u -p -r1.130 socketvar.h
--- sys/sys/socketvar.h 3 May 2024 17:43:09 -0000 1.130
+++ sys/sys/socketvar.h 7 May 2024 19:27:20 -0000
@@ -128,13 +128,11 @@ struct socket {
struct klist sb_klist; /* process selecting read/write */
} so_rcv, so_snd;
#define SB_MAX (2*1024*1024) /* default for max chars in sockbuf */
-#define SB_LOCK 0x0001 /* lock on data queue */
-#define SB_WANT 0x0002 /* someone is waiting to lock */
-#define SB_WAIT 0x0004 /* someone is waiting for data/space */
-#define SB_ASYNC 0x0010 /* ASYNC I/O, need signals */
-#define SB_SPLICE 0x0020 /* buffer is splice source or drain */
-#define SB_NOINTR 0x0040 /* operations not interruptible */
-#define SB_MTXLOCK 0x0080 /* sblock() doesn't need solock() */
+#define SB_WAIT 0x0001 /* someone is waiting for data/space */
+#define SB_ASYNC 0x0002 /* ASYNC I/O, need signals */
+#define SB_SPLICE 0x0004 /* buffer is splice source or drain */
+#define SB_NOINTR 0x0008 /* operations not interruptible */
+#define SB_MTXLOCK 0x0010 /* sblock() doesn't need solock() */
void (*so_upcall)(struct socket *so, caddr_t arg, int waitf);
caddr_t so_upcallarg; /* Arg for above */
@@ -315,11 +313,10 @@ sbfree(struct socket *so, struct sockbuf
* sleep is interruptible. Returns error without lock if
* sleep is interrupted.
*/
-int sblock(struct socket *, struct sockbuf *, int);
+int sblock(struct sockbuf *, int);
/* release lock on sockbuf sb */
-void sbunlock(struct socket *, struct sockbuf *);
-void sbunlock_locked(struct socket *, struct sockbuf *);
+void sbunlock(struct sockbuf *);
#define SB_EMPTY_FIXUP(sb) do { \
if ((sb)->sb_mb == NULL) { \
@@ -367,7 +364,6 @@ int sbcheckreserve(u_long, u_long);
int sbchecklowmem(void);
int sbreserve(struct socket *, struct sockbuf *, u_long);
int sbwait(struct socket *, struct sockbuf *);
-int sbwait_locked(struct socket *, struct sockbuf *);
void soinit(void);
void soabort(struct socket *);
int soaccept(struct socket *, struct mbuf *);
Turn sblock() to `sb_lock' rwlock(9) wrapper for all sockets
Turn sblock() to `sb_lock' rwlock(9) wrapper for all sockets