Download raw body.
tcp(4): soreceive() with shared netlock
We only need to protect `so_rcv' with `sb_mtx' mutex(9) within
soreceive() and corresponding sbappendstream() within PCB layer.
This time the SS_CANTRCVMORE check could be left unlocked at PCB layer,
but I want do it locked from now.
I propose this diff mostly for tests. At least I don't like the
relocking around pru_rcvd(), but if the benefits will be significant,
we could keep it for a while.
Index: sys/kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.319
diff -u -p -r1.319 uipc_socket.c
--- sys/kern/uipc_socket.c 11 Feb 2024 21:36:49 -0000 1.319
+++ sys/kern/uipc_socket.c 11 Feb 2024 23:38:28 -0000
@@ -153,12 +153,7 @@ soalloc(const struct domain *dp, int wai
switch (dp->dom_family) {
case AF_INET:
case AF_INET6:
- switch (dp->dom_protosw->pr_type) {
- case SOCK_DGRAM:
- case SOCK_RAW:
- so->so_rcv.sb_flags |= SB_MTXLOCK;
- break;
- }
+ so->so_rcv.sb_flags |= SB_MTXLOCK;
break;
}
@@ -831,10 +826,10 @@ bad:
if (mp)
*mp = NULL;
- solock_shared(so);
+ solock_recv(so);
restart:
if ((error = sblock(so, &so->so_rcv, SBLOCKWAIT(flags))) != 0) {
- sounlock_shared(so);
+ sounlock_recv(so);
return (error);
}
sb_mtx_lock(&so->so_rcv);
@@ -974,11 +969,11 @@ dontblock:
if (controlp) {
if (pr->pr_domain->dom_externalize) {
sb_mtx_unlock(&so->so_rcv);
- sounlock_shared(so);
+ sounlock_recv(so);
error =
(*pr->pr_domain->dom_externalize)
(cm, controllen, flags);
- solock_shared(so);
+ solock_recv(so);
sb_mtx_lock(&so->so_rcv);
}
*controlp = cm;
@@ -1054,9 +1049,9 @@ dontblock:
SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
resid = uio->uio_resid;
sb_mtx_unlock(&so->so_rcv);
- sounlock_shared(so);
+ sounlock_recv(so);
uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio);
- solock_shared(so);
+ solock_recv(so);
sb_mtx_lock(&so->so_rcv);
if (uio_error)
uio->uio_resid = resid - len;
@@ -1173,8 +1168,19 @@ dontblock:
}
SBLASTRECORDCHK(&so->so_rcv, "soreceive 4");
SBLASTMBUFCHK(&so->so_rcv, "soreceive 4");
- if (pr->pr_flags & PR_WANTRCVD)
+ if (pr->pr_flags & PR_WANTRCVD) {
+ sb_mtx_unlock(&so->so_rcv);
+ if (so->so_proto->pr_protocol != AF_UNIX) {
+ sounlock_recv(so);
+ solock(so);
+ }
pru_rcvd(so);
+ if (so->so_proto->pr_protocol != AF_UNIX) {
+ sounlock(so);
+ solock_recv(so);
+ }
+ sb_mtx_lock(&so->so_rcv);
+ }
}
if (orig_resid == uio->uio_resid && orig_resid &&
(flags & MSG_EOR) == 0 &&
@@ -1192,7 +1198,7 @@ dontblock:
release:
sb_mtx_unlock(&so->so_rcv);
sbunlock(so, &so->so_rcv);
- sounlock_shared(so);
+ sounlock_recv(so);
return (error);
}
Index: sys/kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.143
diff -u -p -r1.143 uipc_socket2.c
--- sys/kern/uipc_socket2.c 11 Feb 2024 18:14:26 -0000 1.143
+++ sys/kern/uipc_socket2.c 11 Feb 2024 23:38:28 -0000
@@ -382,6 +382,21 @@ solock_shared(struct socket *so)
}
}
+void
+solock_recv(struct socket *so)
+{
+ switch (so->so_proto->pr_domain->dom_family) {
+ case PF_INET:
+ case PF_INET6:
+ NET_LOCK_SHARED();
+ rw_enter_write(&so->so_lock);
+ break;
+ default:
+ rw_enter_write(&so->so_lock);
+ break;
+ }
+}
+
int
solock_persocket(struct socket *so)
{
@@ -443,6 +458,21 @@ sounlock_shared(struct socket *so)
}
void
+sounlock_recv(struct socket *so)
+{
+ switch (so->so_proto->pr_domain->dom_family) {
+ case PF_INET:
+ case PF_INET6:
+ rw_exit_write(&so->so_lock);
+ NET_UNLOCK_SHARED();
+ break;
+ default:
+ rw_exit_write(&so->so_lock);
+ break;
+ }
+}
+
+void
soassertlocked_readonly(struct socket *so)
{
switch (so->so_proto->pr_domain->dom_family) {
@@ -486,15 +516,11 @@ sosleep_nsec(struct socket *so, void *id
switch (so->so_proto->pr_domain->dom_family) {
case PF_INET:
case PF_INET6:
- if (so->so_proto->pr_usrreqs->pru_unlock != NULL &&
- rw_status(&netlock) == RW_READ) {
+ if (rw_status(&netlock) == RW_READ)
rw_exit_write(&so->so_lock);
- }
ret = rwsleep_nsec(ident, &netlock, prio, wmesg, nsecs);
- if (so->so_proto->pr_usrreqs->pru_lock != NULL &&
- rw_status(&netlock) == RW_READ) {
+ if (rw_status(&netlock) == RW_READ)
rw_enter_write(&so->so_lock);
- }
break;
default:
ret = rwsleep_nsec(ident, &so->so_lock, prio, wmesg, nsecs);
@@ -852,8 +878,8 @@ sbappend(struct socket *so, struct sockb
void
sbappendstream(struct socket *so, struct sockbuf *sb, struct mbuf *m)
{
- KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
- soassertlocked(so);
+ sbmtxassertlocked(so, sb);
+
KDASSERT(m->m_nextpkt == NULL);
KASSERT(sb->sb_mb == sb->sb_lastrecord);
Index: sys/netinet/tcp_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_input.c,v
retrieving revision 1.400
diff -u -p -r1.400 tcp_input.c
--- sys/netinet/tcp_input.c 11 Feb 2024 01:27:45 -0000 1.400
+++ sys/netinet/tcp_input.c 11 Feb 2024 23:38:28 -0000
@@ -340,10 +340,12 @@ tcp_flush_queue(struct tcpcb *tp)
nq = TAILQ_NEXT(q, tcpqe_q);
TAILQ_REMOVE(&tp->t_segq, q, tcpqe_q);
ND6_HINT(tp);
+ mtx_enter(&so->so_rcv.sb_mtx);
if (so->so_rcv.sb_state & SS_CANTRCVMORE)
m_freem(q->tcpqe_m);
else
sbappendstream(so, &so->so_rcv, q->tcpqe_m);
+ mtx_leave(&so->so_rcv.sb_mtx);
pool_put(&tcpqe_pool, q);
q = nq;
} while (q != NULL && q->tcpqe_tcp->th_seq == tp->rcv_nxt);
@@ -1041,6 +1043,7 @@ findpcb:
* Drop TCP, IP headers and TCP options then add data
* to socket buffer.
*/
+ mtx_enter(&so->so_rcv.sb_mtx);
if (so->so_rcv.sb_state & SS_CANTRCVMORE)
m_freem(m);
else {
@@ -1056,6 +1059,7 @@ findpcb:
m_adj(m, iphlen + off);
sbappendstream(so, &so->so_rcv, m);
}
+ mtx_leave(&so->so_rcv.sb_mtx);
tp->t_flags |= TF_BLOCKOUTPUT;
sorwakeup(so);
tp->t_flags &= ~TF_BLOCKOUTPUT;
@@ -1944,12 +1948,14 @@ dodata: /* XXX */
tiflags = th->th_flags & TH_FIN;
tcpstat_pkt(tcps_rcvpack, tcps_rcvbyte, tlen);
ND6_HINT(tp);
+ mtx_enter(&so->so_rcv.sb_mtx);
if (so->so_rcv.sb_state & SS_CANTRCVMORE)
m_freem(m);
else {
m_adj(m, hdroptlen);
sbappendstream(so, &so->so_rcv, m);
}
+ mtx_leave(&so->so_rcv.sb_mtx);
tp->t_flags |= TF_BLOCKOUTPUT;
sorwakeup(so);
tp->t_flags &= ~TF_BLOCKOUTPUT;
Index: sys/sys/socketvar.h
===================================================================
RCS file: /cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.123
diff -u -p -r1.123 socketvar.h
--- sys/sys/socketvar.h 11 Feb 2024 18:14:27 -0000 1.123
+++ sys/sys/socketvar.h 11 Feb 2024 23:38:28 -0000
@@ -402,10 +402,12 @@ int sockargs(struct mbuf **, const void
int sosleep_nsec(struct socket *, void *, int, const char *, uint64_t);
void solock(struct socket *);
void solock_shared(struct socket *);
+void solock_recv(struct socket *);
int solock_persocket(struct socket *);
void solock_pair(struct socket *, struct socket *);
void sounlock(struct socket *);
void sounlock_shared(struct socket *);
+void sounlock_recv(struct socket *);
int sendit(struct proc *, int, struct msghdr *, int, register_t *);
int recvit(struct proc *, int, struct msghdr *, caddr_t, register_t *);
tcp(4): soreceive() with shared netlock