Download raw body.
Use `sb_mtx' instead of `inp_mtx' in receive path for inet sockets
In soreceve(), we only touch `so_rcv' socket buffer, which has it's own
`sb_mtx' mutex(9) for protection. So, we can avoid solock() in this
path - it's enough to hold `sb_mtx' in soreceive() and around
corresponding sbappend*(). But not right now :)
This time we use shared netlock for some inet sockets in the soreceive()
path. To protect `so_rcv' buffer we use `inp_mtx' mutex(9) and the
prul_lock() to acquire this mutex(9) at socket layer. But the `inp_mtx'
mutex belongs to the PCB. We initialize socket before PCB, tcp(4)
sockets could exist without PCB, so I want to use different locks for
PCB and socket layers of socket.
This diff mechanically replaces `inp_mtx' by `sb_mtx' in the receive
path. Only for sockets which already use `inp_mtx'. All other sockets
left as is. They will be converted later.
Since the `sb_mtx' is optional, the new SB_MTXLOCK flag introduced. If
this flag is set on `sb_flags', the `sb_mtx' mutex(9) should be taken.
New sb_mtx_lock() and sb_mtx_unlock() was introduced to hide this check.
They are temporary and will be replaced by mtx_enter() when all this
area will be converted to `sb_mtx' mutex(9).
Also, the new sbmtxassertlocked() function introduced to throw
corresponding assertion for SB_MTXLOCK marked buffers. This time only
sbappendaddr() calls it. This function is also temporary and will be
replaced by MTX_ASSERT_LOCKED() later.
This naming could be strange, bu now we sblock() for different purpose,
so I want to keep it as is for a while.
Index: sys/kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.316
diff -u -p -r1.316 uipc_socket.c
--- sys/kern/uipc_socket.c 3 Feb 2024 22:50:08 -0000 1.316
+++ sys/kern/uipc_socket.c 4 Feb 2024 17:08:47 -0000
@@ -150,6 +150,18 @@ soalloc(const struct domain *dp, int wai
TAILQ_INIT(&so->so_q0);
TAILQ_INIT(&so->so_q);
+ switch (dp->dom_family) {
+ case AF_INET:
+ case AF_INET6:
+ switch (dp->dom_protosw->pr_type) {
+ case SOCK_DGRAM:
+ case SOCK_RAW:
+ so->so_rcv.sb_flags |= SB_MTXLOCK;
+ break;
+ }
+ break;
+ }
+
return (so);
}
@@ -825,7 +837,7 @@ restart:
sounlock_shared(so);
return (error);
}
- pru_lock(so);
+ sb_mtx_lock(&so->so_rcv);
m = so->so_rcv.sb_mb;
#ifdef SOCKET_SPLICE
@@ -889,7 +901,7 @@ restart:
SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
sbunlock(so, &so->so_rcv);
- pru_unlock(so);
+ sb_mtx_unlock(&so->so_rcv);
error = sbwait(so, &so->so_rcv);
if (error) {
sounlock_shared(so);
@@ -961,13 +973,13 @@ dontblock:
sbsync(&so->so_rcv, nextrecord);
if (controlp) {
if (pr->pr_domain->dom_externalize) {
- pru_unlock(so);
+ sb_mtx_unlock(&so->so_rcv);
sounlock_shared(so);
error =
(*pr->pr_domain->dom_externalize)
(cm, controllen, flags);
solock_shared(so);
- pru_lock(so);
+ sb_mtx_lock(&so->so_rcv);
}
*controlp = cm;
} else {
@@ -1041,11 +1053,11 @@ dontblock:
SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
resid = uio->uio_resid;
- pru_unlock(so);
+ sb_mtx_unlock(&so->so_rcv);
sounlock_shared(so);
uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio);
solock_shared(so);
- pru_lock(so);
+ sb_mtx_lock(&so->so_rcv);
if (uio_error)
uio->uio_resid = resid - len;
} else
@@ -1127,14 +1139,14 @@ dontblock:
break;
SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2");
SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2");
- pru_unlock(so);
+ sb_mtx_unlock(&so->so_rcv);
error = sbwait(so, &so->so_rcv);
if (error) {
sbunlock(so, &so->so_rcv);
sounlock_shared(so);
return (0);
}
- pru_lock(so);
+ sb_mtx_lock(&so->so_rcv);
if ((m = so->so_rcv.sb_mb) != NULL)
nextrecord = m->m_nextpkt;
}
@@ -1168,7 +1180,7 @@ dontblock:
(flags & MSG_EOR) == 0 &&
(so->so_rcv.sb_state & SS_CANTRCVMORE) == 0) {
sbunlock(so, &so->so_rcv);
- pru_unlock(so);
+ sb_mtx_unlock(&so->so_rcv);
goto restart;
}
@@ -1179,7 +1191,7 @@ dontblock:
*flagsp |= flags;
release:
sbunlock(so, &so->so_rcv);
- pru_unlock(so);
+ sb_mtx_unlock(&so->so_rcv);
sounlock_shared(so);
return (error);
}
Index: sys/kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.141
diff -u -p -r1.141 uipc_socket2.c
--- sys/kern/uipc_socket2.c 3 Feb 2024 22:50:08 -0000 1.141
+++ sys/kern/uipc_socket2.c 4 Feb 2024 17:08:47 -0000
@@ -500,6 +500,16 @@ sosleep_nsec(struct socket *so, void *id
return ret;
}
+void
+sbmtxassertlocked(struct socket *so, struct sockbuf *sb)
+{
+ if (sb->sb_flags & SB_MTXLOCK) {
+ if (splassert_ctl > 0 && mtx_owned(&sb->sb_mtx) == 0)
+ splassert_fail(0, RW_WRITE, __func__);
+ } else
+ soassertlocked(so);
+}
+
/*
* Wait for data to arrive at/drain from a socket buffer.
*/
@@ -919,7 +929,7 @@ sbappendaddr(struct socket *so, struct s
struct mbuf *m, *n, *nlast;
int space = asa->sa_len;
- soassertlocked(so);
+ sbmtxassertlocked(so, sb);
if (m0 && (m0->m_flags & M_PKTHDR) == 0)
panic("sbappendaddr");
Index: sys/netinet/ip_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_divert.c,v
retrieving revision 1.93
diff -u -p -r1.93 ip_divert.c
--- sys/netinet/ip_divert.c 3 Feb 2024 22:50:09 -0000 1.93
+++ sys/netinet/ip_divert.c 4 Feb 2024 17:08:47 -0000
@@ -241,14 +241,14 @@ divert_packet(struct mbuf *m, int dir, u
in_proto_cksum_out(m, NULL);
}
- mtx_enter(&inp->inp_mtx);
so = inp->inp_socket;
+ mtx_enter(&so->so_rcv.sb_mtx);
if (sbappendaddr(so, &so->so_rcv, sintosa(&sin), m, NULL) == 0) {
- mtx_leave(&inp->inp_mtx);
+ mtx_leave(&so->so_rcv.sb_mtx);
divstat_inc(divs_fullsock);
goto bad;
}
- mtx_leave(&inp->inp_mtx);
+ mtx_leave(&so->so_rcv.sb_mtx);
sorwakeup(so);
in_pcbunref(inp);
Index: sys/netinet/ip_mroute.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_mroute.c,v
retrieving revision 1.140
diff -u -p -r1.140 ip_mroute.c
--- sys/netinet/ip_mroute.c 6 Dec 2023 09:27:17 -0000 1.140
+++ sys/netinet/ip_mroute.c 4 Feb 2024 17:08:47 -0000
@@ -1051,12 +1051,11 @@ int
socket_send(struct socket *so, struct mbuf *mm, struct sockaddr_in *src)
{
if (so != NULL) {
- struct inpcb *inp = sotoinpcb(so);
int ret;
- mtx_enter(&inp->inp_mtx);
+ mtx_enter(&so->so_rcv.sb_mtx);
ret = sbappendaddr(so, &so->so_rcv, sintosa(src), mm, NULL);
- mtx_leave(&inp->inp_mtx);
+ mtx_leave(&so->so_rcv.sb_mtx);
if (ret != 0) {
sorwakeup(so);
Index: sys/netinet/raw_ip.c
===================================================================
RCS file: /cvs/src/sys/netinet/raw_ip.c,v
retrieving revision 1.155
diff -u -p -r1.155 raw_ip.c
--- sys/netinet/raw_ip.c 3 Feb 2024 22:50:09 -0000 1.155
+++ sys/netinet/raw_ip.c 4 Feb 2024 17:08:47 -0000
@@ -220,24 +220,24 @@ rip_input(struct mbuf **mp, int *offp, i
else
n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
if (n != NULL) {
+ struct socket *so = inp->inp_socket;
int ret;
if (inp->inp_flags & INP_CONTROLOPTS ||
inp->inp_socket->so_options & SO_TIMESTAMP)
ip_savecontrol(inp, &opts, ip, n);
- mtx_enter(&inp->inp_mtx);
- ret = sbappendaddr(inp->inp_socket,
- &inp->inp_socket->so_rcv,
+ mtx_enter(&so->so_rcv.sb_mtx);
+ ret = sbappendaddr(so, &so->so_rcv,
sintosa(&ripsrc), n, opts);
- mtx_leave(&inp->inp_mtx);
+ mtx_leave(&so->so_rcv.sb_mtx);
if (ret == 0) {
/* should notify about lost packet */
m_freem(n);
m_freem(opts);
} else
- sorwakeup(inp->inp_socket);
+ sorwakeup(so);
}
in_pcbunref(inp);
}
Index: sys/netinet/udp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v
retrieving revision 1.317
diff -u -p -r1.317 udp_usrreq.c
--- sys/netinet/udp_usrreq.c 3 Feb 2024 22:50:09 -0000 1.317
+++ sys/netinet/udp_usrreq.c 4 Feb 2024 17:08:47 -0000
@@ -697,15 +697,15 @@ udp_sbappend(struct inpcb *inp, struct m
#endif
m_adj(m, hlen);
- mtx_enter(&inp->inp_mtx);
+ mtx_enter(&so->so_rcv.sb_mtx);
if (sbappendaddr(so, &so->so_rcv, srcaddr, m, opts) == 0) {
- mtx_leave(&inp->inp_mtx);
+ mtx_leave(&so->so_rcv.sb_mtx);
udpstat_inc(udps_fullsock);
m_freem(m);
m_freem(opts);
return;
}
- mtx_leave(&inp->inp_mtx);
+ mtx_leave(&so->so_rcv.sb_mtx);
sorwakeup(so);
}
Index: sys/netinet6/ip6_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_divert.c,v
retrieving revision 1.92
diff -u -p -r1.92 ip6_divert.c
--- sys/netinet6/ip6_divert.c 3 Feb 2024 22:50:09 -0000 1.92
+++ sys/netinet6/ip6_divert.c 4 Feb 2024 17:08:47 -0000
@@ -248,14 +248,14 @@ divert6_packet(struct mbuf *m, int dir,
in6_proto_cksum_out(m, NULL);
}
- mtx_enter(&inp->inp_mtx);
so = inp->inp_socket;
+ mtx_enter(&so->so_rcv.sb_mtx);
if (sbappendaddr(so, &so->so_rcv, sin6tosa(&sin6), m, NULL) == 0) {
- mtx_leave(&inp->inp_mtx);
+ mtx_leave(&so->so_rcv.sb_mtx);
div6stat_inc(div6s_fullsock);
goto bad;
}
- mtx_leave(&inp->inp_mtx);
+ mtx_leave(&so->so_rcv.sb_mtx);
sorwakeup(so);
in_pcbunref(inp);
Index: sys/netinet6/ip6_mroute.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_mroute.c,v
retrieving revision 1.139
diff -u -p -r1.139 ip6_mroute.c
--- sys/netinet6/ip6_mroute.c 3 Feb 2024 22:50:09 -0000 1.139
+++ sys/netinet6/ip6_mroute.c 4 Feb 2024 17:08:47 -0000
@@ -856,17 +856,16 @@ int
socket6_send(struct socket *so, struct mbuf *mm, struct sockaddr_in6 *src)
{
if (so != NULL) {
- struct inpcb *inp = sotoinpcb(so);
int ret;
- mtx_enter(&inp->inp_mtx);
+ mtx_enter(&so->so_rcv.sb_mtx);
ret = sbappendaddr(so, &so->so_rcv, sin6tosa(src), mm, NULL);
- if (ret != 0)
- sorwakeup(so);
- mtx_leave(&inp->inp_mtx);
+ mtx_leave(&so->so_rcv.sb_mtx);
- if (ret != 0)
+ if (ret != 0) {
+ sorwakeup(so);
return 0;
+ }
}
m_freem(mm);
return -1;
Index: sys/netinet6/raw_ip6.c
===================================================================
RCS file: /cvs/src/sys/netinet6/raw_ip6.c,v
retrieving revision 1.180
diff -u -p -r1.180 raw_ip6.c
--- sys/netinet6/raw_ip6.c 3 Feb 2024 22:50:09 -0000 1.180
+++ sys/netinet6/raw_ip6.c 4 Feb 2024 17:08:47 -0000
@@ -263,6 +263,7 @@ rip6_input(struct mbuf **mp, int *offp,
else
n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
if (n != NULL) {
+ struct socket *so = inp->inp_socket;
int ret;
if (inp->inp_flags & IN6P_CONTROLOPTS)
@@ -270,11 +271,10 @@ rip6_input(struct mbuf **mp, int *offp,
/* strip intermediate headers */
m_adj(n, *offp);
- mtx_enter(&inp->inp_mtx);
- ret = sbappendaddr(inp->inp_socket,
- &inp->inp_socket->so_rcv,
+ mtx_enter(&so->so_rcv.sb_mtx);
+ ret = sbappendaddr(so, &so->so_rcv,
sin6tosa(&rip6src), n, opts);
- mtx_leave(&inp->inp_mtx);
+ mtx_leave(&so->so_rcv.sb_mtx);
if (ret == 0) {
/* should notify about lost packet */
@@ -282,7 +282,7 @@ rip6_input(struct mbuf **mp, int *offp,
m_freem(opts);
rip6stat_inc(rip6s_fullsock);
} else
- sorwakeup(inp->inp_socket);
+ sorwakeup(so);
}
in_pcbunref(inp);
}
Index: sys/sys/socketvar.h
===================================================================
RCS file: /cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.122
diff -u -p -r1.122 socketvar.h
--- sys/sys/socketvar.h 3 Feb 2024 22:50:09 -0000 1.122
+++ sys/sys/socketvar.h 4 Feb 2024 17:08:47 -0000
@@ -134,6 +134,7 @@ struct socket {
#define SB_ASYNC 0x10 /* ASYNC I/O, need signals */
#define SB_SPLICE 0x20 /* buffer is splice source or drain */
#define SB_NOINTR 0x40 /* operations not interruptible */
+#define SB_MTXLOCK 0x80 /* use sb_mtx for sockbuf protection */
void (*so_upcall)(struct socket *so, caddr_t arg, int waitf);
caddr_t so_upcallarg; /* Arg for above */
@@ -196,6 +197,22 @@ sorele(struct socket *so)
#define isspliced(so) ((so)->so_sp && (so)->so_sp->ssp_socket)
#define issplicedback(so) ((so)->so_sp && (so)->so_sp->ssp_soback)
+
+static inline void
+sb_mtx_lock(struct sockbuf *sb)
+{
+ if (sb->sb_flags & SB_MTXLOCK)
+ mtx_enter(&sb->sb_mtx);
+}
+
+static inline void
+sb_mtx_unlock(struct sockbuf *sb)
+{
+ if (sb->sb_flags & SB_MTXLOCK)
+ mtx_leave(&sb->sb_mtx);
+}
+
+void sbmtxassertlocked(struct socket *so, struct sockbuf *);
/*
* Do we need to notify the other side when I/O is possible?
Use `sb_mtx' instead of `inp_mtx' in receive path for inet sockets