Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer
To:
Vitaliy Makkoveev <mvs@openbsd.org>
Cc:
OpenBSD tech <tech@openbsd.org>
Date:
Tue, 31 Dec 2024 12:55:15 +0100

Download raw body.

Thread
  • Alexander Bluhm:

    tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer

  • On Mon, Dec 30, 2024 at 11:37:12PM +0300, Vitaliy Makkoveev wrote:
    > Updated against latest sources.
    
    Passed all my tests.
    
    > @@ -225,6 +225,10 @@ tcp_output(struct tcpcb *tp)
    >  
    >  	now = tcp_now();
    >  
    > +	mtx_enter(&so->so_snd.sb_mtx);
    > +	doing_sosend=soissending(so);
    > +	mtx_leave(&so->so_snd.sb_mtx);
    > +
    >  	/*
    >  	 * Determine length of data that should be transmitted,
    >  	 * and flags that will be used.
    
    Could you put spaces around =
    
    OK bluhm@
    
    > Index: sys/kern/uipc_socket.c
    > ===================================================================
    > RCS file: /cvs/src/sys/kern/uipc_socket.c,v
    > diff -u -p -r1.351 uipc_socket.c
    > --- sys/kern/uipc_socket.c	30 Dec 2024 12:12:35 -0000	1.351
    > +++ sys/kern/uipc_socket.c	30 Dec 2024 13:08:30 -0000
    > @@ -151,27 +151,8 @@ soalloc(const struct protosw *prp, int w
    >  	TAILQ_INIT(&so->so_q0);
    >  	TAILQ_INIT(&so->so_q);
    >  
    > -	switch (dp->dom_family) {
    > -	case AF_INET:
    > -	case AF_INET6:
    > -		switch (prp->pr_type) {
    > -		case SOCK_RAW:
    > -		case SOCK_DGRAM:
    > -			so->so_snd.sb_flags |= SB_MTXLOCK;
    > -			/* FALLTHROUGH */
    > -		case SOCK_STREAM:
    > -			so->so_rcv.sb_flags |= SB_MTXLOCK;
    > -			break;
    > -		}
    > -		break;
    > -	case AF_KEY:
    > -	case AF_ROUTE:
    > -	case AF_UNIX:
    > -	case AF_FRAME:
    > -		so->so_snd.sb_flags |= SB_MTXLOCK;
    > -		so->so_rcv.sb_flags |= SB_MTXLOCK;
    > -		break;
    > -	}
    > +	so->so_snd.sb_flags |= SB_MTXLOCK;
    > +	so->so_rcv.sb_flags |= SB_MTXLOCK;
    >  
    >  	return (so);
    >  }
    > @@ -271,9 +252,6 @@ solisten(struct socket *so, int backlog)
    >  void
    >  sorele(struct socket *so, int keep_lock)
    >  {
    > -	int need_lock = (((so->so_snd.sb_flags & SB_MTXLOCK) == 0) &&
    > -	    keep_lock == 0);
    > -
    >  	if (keep_lock == 0)
    >  		sounlock(so);
    >  
    > @@ -284,13 +262,9 @@ sorele(struct socket *so, int keep_lock)
    >  	klist_free(&so->so_rcv.sb_klist);
    >  	klist_free(&so->so_snd.sb_klist);
    >  
    > -	if (need_lock)
    > -		solock(so);
    >  	mtx_enter(&so->so_snd.sb_mtx);
    >  	sbrelease(so, &so->so_snd);
    >  	mtx_leave(&so->so_snd.sb_mtx);
    > -	if (need_lock)
    > -		sounlock(so);
    >  
    >  	if (so->so_proto->pr_flags & PR_RIGHTS &&
    >  	    so->so_proto->pr_domain->dom_dispose)
    > Index: sys/netinet/tcp_input.c
    > ===================================================================
    > RCS file: /cvs/src/sys/netinet/tcp_input.c,v
    > diff -u -p -r1.415 tcp_input.c
    > --- sys/netinet/tcp_input.c	30 Dec 2024 12:20:39 -0000	1.415
    > +++ sys/netinet/tcp_input.c	30 Dec 2024 13:08:30 -0000
    > @@ -957,7 +957,10 @@ findpcb:
    >  				    acked);
    >  				tp->t_rcvacktime = now;
    >  				ND6_HINT(tp);
    > +
    > +				mtx_enter(&so->so_snd.sb_mtx);
    >  				sbdrop(so, &so->so_snd, acked);
    > +				mtx_leave(&so->so_snd.sb_mtx);
    >  
    >  				/*
    >  				 * If we had a pending ICMP message that
    > @@ -1738,10 +1741,14 @@ trimthenstep6:
    >  				tp->snd_wnd -= so->so_snd.sb_cc;
    >  			else
    >  				tp->snd_wnd = 0;
    > +			mtx_enter(&so->so_snd.sb_mtx);
    >  			sbdrop(so, &so->so_snd, (int)so->so_snd.sb_cc);
    > +			mtx_leave(&so->so_snd.sb_mtx);
    >  			ourfinisacked = 1;
    >  		} else {
    > +			mtx_enter(&so->so_snd.sb_mtx);
    >  			sbdrop(so, &so->so_snd, acked);
    > +			mtx_leave(&so->so_snd.sb_mtx);
    >  			if (tp->snd_wnd > acked)
    >  				tp->snd_wnd -= acked;
    >  			else
    > Index: sys/netinet/tcp_output.c
    > ===================================================================
    > RCS file: /cvs/src/sys/netinet/tcp_output.c,v
    > diff -u -p -r1.148 tcp_output.c
    > --- sys/netinet/tcp_output.c	28 Dec 2024 22:17:09 -0000	1.148
    > +++ sys/netinet/tcp_output.c	30 Dec 2024 13:08:30 -0000
    > @@ -200,7 +200,7 @@ tcp_output(struct tcpcb *tp)
    >  	u_int32_t optbuf[howmany(MAX_TCPOPTLEN, sizeof(u_int32_t))];
    >  	u_char *opt = (u_char *)optbuf;
    >  	unsigned int optlen, hdrlen, packetlen;
    > -	int idle, sendalot = 0;
    > +	int doing_sosend, idle, sendalot = 0;
    >  	int i, sack_rxmit = 0;
    >  	struct sackhole *p;
    >  	uint64_t now;
    > @@ -225,6 +225,10 @@ tcp_output(struct tcpcb *tp)
    >  
    >  	now = tcp_now();
    >  
    > +	mtx_enter(&so->so_snd.sb_mtx);
    > +	doing_sosend=soissending(so);
    > +	mtx_leave(&so->so_snd.sb_mtx);
    > +
    >  	/*
    >  	 * Determine length of data that should be transmitted,
    >  	 * and flags that will be used.
    > @@ -241,7 +245,7 @@ tcp_output(struct tcpcb *tp)
    >  		tp->snd_cwnd = 2 * tp->t_maxseg;
    >  
    >  	/* remember 'idle' for next invocation of tcp_output */
    > -	if (idle && soissending(so)) {
    > +	if (idle && doing_sosend) {
    >  		tp->t_flags |= TF_LASTIDLE;
    >  		idle = 0;
    >  	} else
    > @@ -390,7 +394,7 @@ again:
    >  		if (len >= txmaxseg)
    >  			goto send;
    >  		if ((idle || (tp->t_flags & TF_NODELAY)) &&
    > -		    len + off >= so->so_snd.sb_cc && !soissending(so) &&
    > +		    len + off >= so->so_snd.sb_cc && !doing_sosend &&
    >  		    (tp->t_flags & TF_NOPUSH) == 0)
    >  			goto send;
    >  		if (tp->t_force)
    > @@ -723,7 +727,7 @@ send:
    >  		 * give data to the user when a buffer fills or
    >  		 * a PUSH comes in.)
    >  		 */
    > -		if (off + len == so->so_snd.sb_cc && !soissending(so))
    > +		if (off + len == so->so_snd.sb_cc && !doing_sosend)
    >  			flags |= TH_PUSH;
    >  		tp->t_sndtime = now;
    >  	} else {
    > Index: sys/netinet/tcp_usrreq.c
    > ===================================================================
    > RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v
    > diff -u -p -r1.234 tcp_usrreq.c
    > --- sys/netinet/tcp_usrreq.c	30 Dec 2024 12:20:39 -0000	1.234
    > +++ sys/netinet/tcp_usrreq.c	30 Dec 2024 13:08:30 -0000
    > @@ -302,10 +302,12 @@ tcp_fill_info(struct tcpcb *tp, struct s
    >  	ti->tcpi_so_rcv_sb_lowat = so->so_rcv.sb_lowat;
    >  	ti->tcpi_so_rcv_sb_wat = so->so_rcv.sb_wat;
    >  	mtx_leave(&so->so_rcv.sb_mtx);
    > +	mtx_enter(&so->so_snd.sb_mtx);
    >  	ti->tcpi_so_snd_sb_cc = so->so_snd.sb_cc;
    >  	ti->tcpi_so_snd_sb_hiwat = so->so_snd.sb_hiwat;
    >  	ti->tcpi_so_snd_sb_lowat = so->so_snd.sb_lowat;
    >  	ti->tcpi_so_snd_sb_wat = so->so_snd.sb_wat;
    > +	mtx_leave(&so->so_snd.sb_mtx);
    >  
    >  	return 0;
    >  }
    > @@ -842,7 +844,9 @@ tcp_send(struct socket *so, struct mbuf 
    >  	if (so->so_options & SO_DEBUG)
    >  		ostate = tp->t_state;
    >  
    > +	mtx_enter(&so->so_snd.sb_mtx);
    >  	sbappendstream(so, &so->so_snd, m);
    > +	mtx_leave(&so->so_snd.sb_mtx);
    >  	m = NULL;
    >  
    >  	error = tcp_output(tp);
    > @@ -895,7 +899,9 @@ tcp_sense(struct socket *so, struct stat
    >  	if ((error = tcp_sogetpcb(so, &inp, &tp)))
    >  		return (error);
    >  
    > +	mtx_enter(&so->so_snd.sb_mtx);
    >  	ub->st_blksize = so->so_snd.sb_hiwat;
    > +	mtx_leave(&so->so_snd.sb_mtx);
    >  
    >  	if (so->so_options & SO_DEBUG)
    >  		tcp_trace(TA_USER, tp->t_state, tp, tp, NULL, PRU_SENSE, 0);
    > @@ -970,7 +976,9 @@ tcp_sendoob(struct socket *so, struct mb
    >  	 * of data past the urgent section.
    >  	 * Otherwise, snd_up should be one lower.
    >  	 */
    > +	mtx_enter(&so->so_snd.sb_mtx);
    >  	sbappendstream(so, &so->so_snd, m);
    > +	mtx_leave(&so->so_snd.sb_mtx);
    >  	m = NULL;
    >  	tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
    >  	tp->t_force = 1;
    > @@ -1517,7 +1525,11 @@ void
    >  tcp_update_sndspace(struct tcpcb *tp)
    >  {
    >  	struct socket *so = tp->t_inpcb->inp_socket;
    > -	u_long nmax = so->so_snd.sb_hiwat;
    > +	u_long nmax;
    > +
    > +	mtx_enter(&so->so_snd.sb_mtx);
    > +
    > +	nmax = so->so_snd.sb_hiwat;
    >  
    >  	if (sbchecklowmem()) {
    >  		/* low on memory try to get rid of some */
    > @@ -1533,7 +1545,7 @@ tcp_update_sndspace(struct tcpcb *tp)
    >  	}
    >  
    >  	/* a writable socket must be preserved because of poll(2) semantics */
    > -	if (sbspace(so, &so->so_snd) >= so->so_snd.sb_lowat) {
    > +	if (sbspace_locked(so, &so->so_snd) >= so->so_snd.sb_lowat) {
    >  		if (nmax < so->so_snd.sb_cc + so->so_snd.sb_lowat)
    >  			nmax = so->so_snd.sb_cc + so->so_snd.sb_lowat;
    >  		/* keep in sync with sbreserve() calculation */
    > @@ -1546,6 +1558,8 @@ tcp_update_sndspace(struct tcpcb *tp)
    >  
    >  	if (nmax != so->so_snd.sb_hiwat)
    >  		sbreserve(so, &so->so_snd, nmax);
    > +
    > +	mtx_leave(&so->so_snd.sb_mtx);
    >  }
    >  
    >  /*
    > @@ -1579,9 +1593,11 @@ tcp_update_rcvspace(struct tcpcb *tp)
    >  	}
    >  
    >  	/* a readable socket must be preserved because of poll(2) semantics */
    > +	mtx_enter(&so->so_snd.sb_mtx);
    >  	if (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat &&
    >  	    nmax < so->so_snd.sb_lowat)
    >  		nmax = so->so_snd.sb_lowat;
    > +	mtx_leave(&so->so_snd.sb_mtx);
    >  
    >  	if (nmax != so->so_rcv.sb_hiwat) {
    >  		/* round to MSS boundary */
    
    
    
  • Alexander Bluhm:

    tcp(4): use per-sockbuf mutex to protect `so_snd' socket buffer