Download raw body.
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
On Fri, Dec 27, 2024 at 02:44:46PM +0300, Vitaliy Makkoveev wrote:
> On Thu, Dec 26, 2024 at 06:43:29PM +0300, Vitaliy Makkoveev wrote:
> > > On 26 Dec 2024, at 15:47, Alexander Bluhm <bluhm@openbsd.org> wrote:
> > >
> > > On Tue, Dec 24, 2024 at 10:21:55PM +0300, Vitaliy Makkoveev wrote:
> > >> This is the updated diff. It uses barriers instead of `ssp_idleto'
> > >> timeout and `ssp_task' task redefinition.
> > >
> > > I have tested this diff with my regression, performance, and network
> > > setup. Works fine.
> > >
> > > OK bluhm@
> > >
> > > Could you wait one day before commiting this? Then we have a
> > > snapshot between my and your unlocking diff. If something goes
> > > wrong, it will be easier to bisect.
> >
> > Thanks for testing! Will commit this weekend or Monday.
> >
>
> The updated diff. It doesn't contain already committed tcp_mss_update()
> and unsplicing hunks.
>
> Note, in sofree() I intentionally left "so->so_sp" check within
> "if (!keep-lock)" block.
>
Updated against latest sources.
Index: sys/kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
diff -u -p -r1.351 uipc_socket.c
--- sys/kern/uipc_socket.c 30 Dec 2024 12:12:35 -0000 1.351
+++ sys/kern/uipc_socket.c 30 Dec 2024 13:08:30 -0000
@@ -151,27 +151,8 @@ soalloc(const struct protosw *prp, int w
TAILQ_INIT(&so->so_q0);
TAILQ_INIT(&so->so_q);
- switch (dp->dom_family) {
- case AF_INET:
- case AF_INET6:
- switch (prp->pr_type) {
- case SOCK_RAW:
- case SOCK_DGRAM:
- so->so_snd.sb_flags |= SB_MTXLOCK;
- /* FALLTHROUGH */
- case SOCK_STREAM:
- so->so_rcv.sb_flags |= SB_MTXLOCK;
- break;
- }
- break;
- case AF_KEY:
- case AF_ROUTE:
- case AF_UNIX:
- case AF_FRAME:
- so->so_snd.sb_flags |= SB_MTXLOCK;
- so->so_rcv.sb_flags |= SB_MTXLOCK;
- break;
- }
+ so->so_snd.sb_flags |= SB_MTXLOCK;
+ so->so_rcv.sb_flags |= SB_MTXLOCK;
return (so);
}
@@ -271,9 +252,6 @@ solisten(struct socket *so, int backlog)
void
sorele(struct socket *so, int keep_lock)
{
- int need_lock = (((so->so_snd.sb_flags & SB_MTXLOCK) == 0) &&
- keep_lock == 0);
-
if (keep_lock == 0)
sounlock(so);
@@ -284,13 +262,9 @@ sorele(struct socket *so, int keep_lock)
klist_free(&so->so_rcv.sb_klist);
klist_free(&so->so_snd.sb_klist);
- if (need_lock)
- solock(so);
mtx_enter(&so->so_snd.sb_mtx);
sbrelease(so, &so->so_snd);
mtx_leave(&so->so_snd.sb_mtx);
- if (need_lock)
- sounlock(so);
if (so->so_proto->pr_flags & PR_RIGHTS &&
so->so_proto->pr_domain->dom_dispose)
Index: sys/netinet/tcp_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_input.c,v
diff -u -p -r1.415 tcp_input.c
--- sys/netinet/tcp_input.c 30 Dec 2024 12:20:39 -0000 1.415
+++ sys/netinet/tcp_input.c 30 Dec 2024 13:08:30 -0000
@@ -957,7 +957,10 @@ findpcb:
acked);
tp->t_rcvacktime = now;
ND6_HINT(tp);
+
+ mtx_enter(&so->so_snd.sb_mtx);
sbdrop(so, &so->so_snd, acked);
+ mtx_leave(&so->so_snd.sb_mtx);
/*
* If we had a pending ICMP message that
@@ -1738,10 +1741,14 @@ trimthenstep6:
tp->snd_wnd -= so->so_snd.sb_cc;
else
tp->snd_wnd = 0;
+ mtx_enter(&so->so_snd.sb_mtx);
sbdrop(so, &so->so_snd, (int)so->so_snd.sb_cc);
+ mtx_leave(&so->so_snd.sb_mtx);
ourfinisacked = 1;
} else {
+ mtx_enter(&so->so_snd.sb_mtx);
sbdrop(so, &so->so_snd, acked);
+ mtx_leave(&so->so_snd.sb_mtx);
if (tp->snd_wnd > acked)
tp->snd_wnd -= acked;
else
Index: sys/netinet/tcp_output.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_output.c,v
diff -u -p -r1.148 tcp_output.c
--- sys/netinet/tcp_output.c 28 Dec 2024 22:17:09 -0000 1.148
+++ sys/netinet/tcp_output.c 30 Dec 2024 13:08:30 -0000
@@ -200,7 +200,7 @@ tcp_output(struct tcpcb *tp)
u_int32_t optbuf[howmany(MAX_TCPOPTLEN, sizeof(u_int32_t))];
u_char *opt = (u_char *)optbuf;
unsigned int optlen, hdrlen, packetlen;
- int idle, sendalot = 0;
+ int doing_sosend, idle, sendalot = 0;
int i, sack_rxmit = 0;
struct sackhole *p;
uint64_t now;
@@ -225,6 +225,10 @@ tcp_output(struct tcpcb *tp)
now = tcp_now();
+ mtx_enter(&so->so_snd.sb_mtx);
+ doing_sosend=soissending(so);
+ mtx_leave(&so->so_snd.sb_mtx);
+
/*
* Determine length of data that should be transmitted,
* and flags that will be used.
@@ -241,7 +245,7 @@ tcp_output(struct tcpcb *tp)
tp->snd_cwnd = 2 * tp->t_maxseg;
/* remember 'idle' for next invocation of tcp_output */
- if (idle && soissending(so)) {
+ if (idle && doing_sosend) {
tp->t_flags |= TF_LASTIDLE;
idle = 0;
} else
@@ -390,7 +394,7 @@ again:
if (len >= txmaxseg)
goto send;
if ((idle || (tp->t_flags & TF_NODELAY)) &&
- len + off >= so->so_snd.sb_cc && !soissending(so) &&
+ len + off >= so->so_snd.sb_cc && !doing_sosend &&
(tp->t_flags & TF_NOPUSH) == 0)
goto send;
if (tp->t_force)
@@ -723,7 +727,7 @@ send:
* give data to the user when a buffer fills or
* a PUSH comes in.)
*/
- if (off + len == so->so_snd.sb_cc && !soissending(so))
+ if (off + len == so->so_snd.sb_cc && !doing_sosend)
flags |= TH_PUSH;
tp->t_sndtime = now;
} else {
Index: sys/netinet/tcp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v
diff -u -p -r1.234 tcp_usrreq.c
--- sys/netinet/tcp_usrreq.c 30 Dec 2024 12:20:39 -0000 1.234
+++ sys/netinet/tcp_usrreq.c 30 Dec 2024 13:08:30 -0000
@@ -302,10 +302,12 @@ tcp_fill_info(struct tcpcb *tp, struct s
ti->tcpi_so_rcv_sb_lowat = so->so_rcv.sb_lowat;
ti->tcpi_so_rcv_sb_wat = so->so_rcv.sb_wat;
mtx_leave(&so->so_rcv.sb_mtx);
+ mtx_enter(&so->so_snd.sb_mtx);
ti->tcpi_so_snd_sb_cc = so->so_snd.sb_cc;
ti->tcpi_so_snd_sb_hiwat = so->so_snd.sb_hiwat;
ti->tcpi_so_snd_sb_lowat = so->so_snd.sb_lowat;
ti->tcpi_so_snd_sb_wat = so->so_snd.sb_wat;
+ mtx_leave(&so->so_snd.sb_mtx);
return 0;
}
@@ -842,7 +844,9 @@ tcp_send(struct socket *so, struct mbuf
if (so->so_options & SO_DEBUG)
ostate = tp->t_state;
+ mtx_enter(&so->so_snd.sb_mtx);
sbappendstream(so, &so->so_snd, m);
+ mtx_leave(&so->so_snd.sb_mtx);
m = NULL;
error = tcp_output(tp);
@@ -895,7 +899,9 @@ tcp_sense(struct socket *so, struct stat
if ((error = tcp_sogetpcb(so, &inp, &tp)))
return (error);
+ mtx_enter(&so->so_snd.sb_mtx);
ub->st_blksize = so->so_snd.sb_hiwat;
+ mtx_leave(&so->so_snd.sb_mtx);
if (so->so_options & SO_DEBUG)
tcp_trace(TA_USER, tp->t_state, tp, tp, NULL, PRU_SENSE, 0);
@@ -970,7 +976,9 @@ tcp_sendoob(struct socket *so, struct mb
* of data past the urgent section.
* Otherwise, snd_up should be one lower.
*/
+ mtx_enter(&so->so_snd.sb_mtx);
sbappendstream(so, &so->so_snd, m);
+ mtx_leave(&so->so_snd.sb_mtx);
m = NULL;
tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
tp->t_force = 1;
@@ -1517,7 +1525,11 @@ void
tcp_update_sndspace(struct tcpcb *tp)
{
struct socket *so = tp->t_inpcb->inp_socket;
- u_long nmax = so->so_snd.sb_hiwat;
+ u_long nmax;
+
+ mtx_enter(&so->so_snd.sb_mtx);
+
+ nmax = so->so_snd.sb_hiwat;
if (sbchecklowmem()) {
/* low on memory try to get rid of some */
@@ -1533,7 +1545,7 @@ tcp_update_sndspace(struct tcpcb *tp)
}
/* a writable socket must be preserved because of poll(2) semantics */
- if (sbspace(so, &so->so_snd) >= so->so_snd.sb_lowat) {
+ if (sbspace_locked(so, &so->so_snd) >= so->so_snd.sb_lowat) {
if (nmax < so->so_snd.sb_cc + so->so_snd.sb_lowat)
nmax = so->so_snd.sb_cc + so->so_snd.sb_lowat;
/* keep in sync with sbreserve() calculation */
@@ -1546,6 +1558,8 @@ tcp_update_sndspace(struct tcpcb *tp)
if (nmax != so->so_snd.sb_hiwat)
sbreserve(so, &so->so_snd, nmax);
+
+ mtx_leave(&so->so_snd.sb_mtx);
}
/*
@@ -1579,9 +1593,11 @@ tcp_update_rcvspace(struct tcpcb *tp)
}
/* a readable socket must be preserved because of poll(2) semantics */
+ mtx_enter(&so->so_snd.sb_mtx);
if (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat &&
nmax < so->so_snd.sb_lowat)
nmax = so->so_snd.sb_lowat;
+ mtx_leave(&so->so_snd.sb_mtx);
if (nmax != so->so_rcv.sb_hiwat) {
/* round to MSS boundary */
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer