Download raw body.
Switch `so_snd' of udp(4) sockets to the new locking scheme
udp_send() and following udp{,6}_output() do not append packets to
`so_snd' socket buffer. This mean the sosend() and sosplice() sending
paths are dummy pru_send() and there is no problems to simultaneously
run them on the same socket.
This diff lefts udp(4) somove() under exclusive solock() and pushes
shared solock() deep down sosend() to be only around pru_send(). Since
sosend() doesn't modify `so_snd' the unlocked `so_snd' space checks
within somove() are safe. Corresponding `sb_state' and `sb_flags'
modifications are protected by `sb_mtx' mutex(9).
As non obvious bonus, this diff removes the last place where sbwait() is
called with shared netlock held. tcp(4) sockets calls it with exclusive
netlock, the rest relies on `sb_mtx' mutex(9). This allows to remove
shared netlock handling from sosleep_nsec() and rework solock_shared()
to take `so_lock' before netlock.
ok? No new witness asserts during sosplice and ffs/nfs regress runs.
Index: sys/kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
diff -u -p -r1.336 uipc_socket.c
--- sys/kern/uipc_socket.c 14 Jun 2024 08:32:22 -0000 1.336
+++ sys/kern/uipc_socket.c 10 Jul 2024 18:48:25 -0000
@@ -158,9 +158,8 @@ soalloc(const struct protosw *prp, int w
case AF_INET6:
switch (prp->pr_type) {
case SOCK_RAW:
- so->so_snd.sb_flags |= SB_MTXLOCK;
- /* FALLTHROUGH */
case SOCK_DGRAM:
+ so->so_snd.sb_flags |= SB_MTXLOCK;
so->so_rcv.sb_flags |= SB_MTXLOCK;
break;
}
@@ -628,7 +627,7 @@ restart:
} else if (addr == NULL)
snderr(EDESTADDRREQ);
}
- space = sbspace(so, &so->so_snd);
+ space = sbspace_locked(so, &so->so_snd);
if (flags & MSG_OOB)
space += 1024;
if (so->so_proto->pr_domain->dom_family == AF_UNIX) {
@@ -1414,9 +1413,12 @@ sosplice(struct socket *so, int fd, off_
/* Splice so and sosp together. */
mtx_enter(&so->so_rcv.sb_mtx);
+ mtx_enter(&sosp->so_snd.sb_mtx);
so->so_sp->ssp_socket = sosp;
sosp->so_sp->ssp_soback = so;
+ mtx_leave(&sosp->so_snd.sb_mtx);
mtx_leave(&so->so_rcv.sb_mtx);
+
so->so_splicelen = 0;
so->so_splicemax = max;
if (tv)
@@ -1432,9 +1434,11 @@ sosplice(struct socket *so, int fd, off_
*/
if (somove(so, M_WAIT)) {
mtx_enter(&so->so_rcv.sb_mtx);
+ mtx_enter(&sosp->so_snd.sb_mtx);
so->so_rcv.sb_flags |= SB_SPLICE;
- mtx_leave(&so->so_rcv.sb_mtx);
sosp->so_snd.sb_flags |= SB_SPLICE;
+ mtx_leave(&sosp->so_snd.sb_mtx);
+ mtx_leave(&so->so_rcv.sb_mtx);
}
release:
@@ -1454,11 +1458,13 @@ sounsplice(struct socket *so, struct soc
task_del(sosplice_taskq, &so->so_splicetask);
timeout_del(&so->so_idleto);
- sosp->so_snd.sb_flags &= ~SB_SPLICE;
mtx_enter(&so->so_rcv.sb_mtx);
+ mtx_enter(&sosp->so_snd.sb_mtx);
so->so_rcv.sb_flags &= ~SB_SPLICE;
+ sosp->so_snd.sb_flags &= ~SB_SPLICE;
so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL;
+ mtx_leave(&sosp->so_snd.sb_mtx);
mtx_leave(&so->so_rcv.sb_mtx);
/* Do not wakeup a socket that is about to be freed. */
@@ -1571,21 +1577,26 @@ somove(struct socket *so, int wait)
maxreached = 1;
}
}
- space = sbspace(sosp, &sosp->so_snd);
+ mtx_enter(&sosp->so_snd.sb_mtx);
+ space = sbspace_locked(sosp, &sosp->so_snd);
if (so->so_oobmark && so->so_oobmark < len &&
so->so_oobmark < space + 1024)
space += 1024;
if (space <= 0) {
+ mtx_leave(&sosp->so_snd.sb_mtx);
maxreached = 0;
goto release;
}
if (space < len) {
maxreached = 0;
- if (space < sosp->so_snd.sb_lowat)
+ if (space < sosp->so_snd.sb_lowat) {
+ mtx_leave(&sosp->so_snd.sb_mtx);
goto release;
+ }
len = space;
}
sosp->so_snd.sb_state |= SS_ISSENDING;
+ mtx_leave(&sosp->so_snd.sb_mtx);
SBLASTRECORDCHK(&so->so_rcv, "somove 1");
SBLASTMBUFCHK(&so->so_rcv, "somove 1");
@@ -1780,9 +1791,12 @@ somove(struct socket *so, int wait)
}
}
+ mtx_enter(&sosp->so_snd.sb_mtx);
/* Append all remaining data to drain socket. */
if (so->so_rcv.sb_cc == 0 || maxreached)
sosp->so_snd.sb_state &= ~SS_ISSENDING;
+ mtx_leave(&sosp->so_snd.sb_mtx);
+
error = pru_send(sosp, m, NULL, NULL);
if (error) {
if (sosp->so_snd.sb_state & SS_CANTSENDMORE)
@@ -1796,7 +1810,10 @@ somove(struct socket *so, int wait)
goto nextpkt;
release:
+ mtx_enter(&sosp->so_snd.sb_mtx);
sosp->so_snd.sb_state &= ~SS_ISSENDING;
+ mtx_leave(&sosp->so_snd.sb_mtx);
+
if (!error && maxreached && so->so_splicemax == so->so_splicelen)
error = EFBIG;
if (error)
@@ -2346,7 +2363,7 @@ filt_sowrite(struct knote *kn, long hint
if ((so->so_snd.sb_flags & SB_MTXLOCK) == 0)
soassertlocked_readonly(so);
- kn->kn_data = sbspace(so, &so->so_snd);
+ kn->kn_data = sbspace_locked(so, &so->so_snd);
if (so->so_snd.sb_state & SS_CANTSENDMORE) {
kn->kn_flags |= EV_EOF;
if (kn->kn_flags & __EV_POLL) {
Index: sys/kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
diff -u -p -r1.156 uipc_socket2.c
--- sys/kern/uipc_socket2.c 28 Jun 2024 21:30:24 -0000 1.156
+++ sys/kern/uipc_socket2.c 10 Jul 2024 18:48:26 -0000
@@ -926,7 +926,7 @@ sbappendaddr(struct socket *so, struct s
if (n->m_next == NULL) /* keep pointer to last control buf */
break;
}
- if (space > sbspace(so, sb))
+ if (space > sbspace_locked(so, sb))
return (0);
if (asa->sa_len > MLEN)
return (0);
@@ -984,7 +984,7 @@ sbappendcontrol(struct socket *so, struc
m->m_flags &= ~M_EOR;
}
}
- if (space > sbspace(so, sb))
+ if (space > sbspace_locked(so, sb))
return (0);
n->m_next = m0; /* concatenate data to control */
Index: sys/miscfs/fifofs/fifo_vnops.c
===================================================================
RCS file: /cvs/src/sys/miscfs/fifofs/fifo_vnops.c,v
diff -u -p -r1.106 fifo_vnops.c
--- sys/miscfs/fifofs/fifo_vnops.c 28 Jun 2024 21:30:24 -0000 1.106
+++ sys/miscfs/fifofs/fifo_vnops.c 10 Jul 2024 18:48:26 -0000
@@ -564,7 +564,7 @@ filt_fifowrite(struct knote *kn, long hi
MUTEX_ASSERT_LOCKED(&so->so_snd.sb_mtx);
- kn->kn_data = sbspace(so, &so->so_snd);
+ kn->kn_data = sbspace_locked(so, &so->so_snd);
if (so->so_snd.sb_state & SS_CANTSENDMORE) {
kn->kn_flags |= EV_EOF;
rv = 1;
Index: sys/net/rtsock.c
===================================================================
RCS file: /cvs/src/sys/net/rtsock.c,v
diff -u -p -r1.374 rtsock.c
--- sys/net/rtsock.c 14 Jun 2024 08:32:22 -0000 1.374
+++ sys/net/rtsock.c 10 Jul 2024 18:48:26 -0000
@@ -316,7 +316,7 @@ route_rcvd(struct socket *so)
mtx_enter(&so->so_rcv.sb_mtx);
if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) &&
- ((sbspace(so, &so->so_rcv) == so->so_rcv.sb_hiwat)))
+ ((sbspace_locked(so, &so->so_rcv) == so->so_rcv.sb_hiwat)))
rop->rop_flags &= ~ROUTECB_FLAG_FLUSH;
mtx_leave(&so->so_rcv.sb_mtx);
}
@@ -603,7 +603,7 @@ rtm_sendup(struct socket *so, struct mbu
return (ENOMEM);
mtx_enter(&so->so_rcv.sb_mtx);
- if (sbspace(so, &so->so_rcv) < (2 * MSIZE) ||
+ if (sbspace_locked(so, &so->so_rcv) < (2 * MSIZE) ||
sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0)
send_desync = 1;
mtx_leave(&so->so_rcv.sb_mtx);
Index: sys/nfs/nfs_socket.c
===================================================================
RCS file: /cvs/src/sys/nfs/nfs_socket.c,v
diff -u -p -r1.150 nfs_socket.c
--- sys/nfs/nfs_socket.c 30 Apr 2024 17:05:20 -0000 1.150
+++ sys/nfs/nfs_socket.c 10 Jul 2024 18:48:26 -0000
@@ -374,7 +374,9 @@ nfs_connect(struct nfsmount *nmp, struct
mtx_enter(&so->so_rcv.sb_mtx);
so->so_rcv.sb_flags |= SB_NOINTR;
mtx_leave(&so->so_rcv.sb_mtx);
+ mtx_enter(&so->so_snd.sb_mtx);
so->so_snd.sb_flags |= SB_NOINTR;
+ mtx_leave(&so->so_snd.sb_mtx);
sounlock(so);
m_freem(mopt);
Index: sys/sys/socketvar.h
===================================================================
RCS file: /cvs/src/sys/sys/socketvar.h,v
diff -u -p -r1.131 socketvar.h
--- sys/sys/socketvar.h 17 May 2024 19:11:14 -0000 1.131
+++ sys/sys/socketvar.h 10 Jul 2024 18:48:26 -0000
@@ -237,7 +237,7 @@ sb_notify(struct socket *so, struct sock
*/
static inline long
-sbspace(struct socket *so, struct sockbuf *sb)
+sbspace_locked(struct socket *so, struct sockbuf *sb)
{
if (sb->sb_flags & SB_MTXLOCK)
sbmtxassertlocked(so, sb);
@@ -246,6 +246,19 @@ sbspace(struct socket *so, struct sockbu
return lmin(sb->sb_hiwat - sb->sb_cc, sb->sb_mbmax - sb->sb_mbcnt);
}
+
+static inline long
+sbspace(struct socket *so, struct sockbuf *sb)
+{
+ long ret;
+
+ sb_mtx_lock(sb);
+ ret = sbspace_locked(so, sb);
+ sb_mtx_unlock(sb);
+
+ return ret;
+}
+
/* do we have to send all at once on a socket? */
#define sosendallatonce(so) \
Switch `so_snd' of udp(4) sockets to the new locking scheme