Index | Thread | Search

From:
Vitaliy Makkoveev <mvs@openbsd.org>
Subject:
Re: tcp input socket lock
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
tech@openbsd.org
Date:
Wed, 16 Apr 2025 12:27:33 +0300

Download raw body.

Thread
  • Alexander Bluhm:

    tcp input socket lock

    • Vitaliy Makkoveev:

      tcp input socket lock

On Sun, Apr 13, 2025 at 03:55:18PM +0200, Alexander Bluhm wrote:
> Hi,
> 
> In preparation to run TCP input in parallel, we have to lock the
> socket.  After inpcb lookup, also take the socket lock and increase
> the socket reference count.  syn_cache_get() unlocks the listen
> socket and returns a locked socket.  syn_cache_add() relies on
> socket lock.
> 
> With this diff, exclusive net lock is still held.  It might get a
> little slower due to the additional mutex and refcount.  But I want
> to see if locking and refcount works before switching tcp_input()
> to shared net lock.
> 
> ok?
> 

ok mvs

> bluhm
> 
> Index: netinet/tcp_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
> diff -u -p -r1.434 tcp_input.c
> --- netinet/tcp_input.c	10 Mar 2025 15:11:46 -0000	1.434
> +++ netinet/tcp_input.c	13 Apr 2025 13:28:28 -0000
> @@ -603,6 +603,11 @@ findpcb:
>  		tcpstat_inc(tcps_noport);
>  		goto dropwithreset_ratelim;
>  	}
> +	so = in_pcbsolock_ref(inp);
> +	if (so == NULL) {
> +		tcpstat_inc(tcps_noport);
> +		goto dropwithreset_ratelim;
> +	}
>  
>  	KASSERT(sotoinpcb(inp->inp_socket) == inp);
>  	KASSERT(intotcpcb(inp) == NULL || intotcpcb(inp)->t_inpcb == inp);
> @@ -635,7 +640,6 @@ findpcb:
>  	else
>  		tiwin = th->th_win;
>  
> -	so = inp->inp_socket;
>  	if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
>  		union syn_cache_sa src;
>  		union syn_cache_sa dst;
> @@ -724,6 +728,7 @@ findpcb:
>  					 * in use for the reply,
>  					 * do not free it.
>  					 */
> +					so = NULL;
>  					m = *mp = NULL;
>  					goto drop;
>  				} else {
> @@ -731,13 +736,11 @@ findpcb:
>  					 * We have created a
>  					 * full-blown connection.
>  					 */
> -					tp = NULL;
>  					in_pcbunref(inp);
>  					inp = in_pcbref(sotoinpcb(so));
>  					tp = intotcpcb(inp);
>  					if (tp == NULL)
>  						goto badsyn;	/*XXX*/
> -
>  				}
>  				break;
>  
> @@ -843,6 +846,7 @@ findpcb:
>  					tcpstat_inc(tcps_dropsyn);
>  					goto drop;
>  				}
> +				in_pcbsounlock_rele(inp, so);
>  				in_pcbunref(inp);
>  				return IPPROTO_DONE;
>  			}
> @@ -1018,6 +1022,7 @@ findpcb:
>  				if (so->so_snd.sb_cc ||
>  				    tp->t_flags & TF_NEEDOUTPUT)
>  					(void) tcp_output(tp);
> +				in_pcbsounlock_rele(inp, so);
>  				in_pcbunref(inp);
>  				return IPPROTO_DONE;
>  			}
> @@ -1068,6 +1073,7 @@ findpcb:
>  			tp->t_flags &= ~TF_BLOCKOUTPUT;
>  			if (tp->t_flags & (TF_ACKNOW|TF_NEEDOUTPUT))
>  				(void) tcp_output(tp);
> +			in_pcbsounlock_rele(inp, so);
>  			in_pcbunref(inp);
>  			return IPPROTO_DONE;
>  		}
> @@ -1261,6 +1267,8 @@ trimthenstep6:
>  			    ((arc4random() & 0x7fffffff) | 0x8000);
>  			reuse = &iss;
>  			tp = tcp_close(tp);
> +			in_pcbsounlock_rele(inp, so);
> +			so = NULL;
>  			in_pcbunref(inp);
>  			inp = NULL;
>  			goto findpcb;
> @@ -2065,6 +2073,7 @@ dodata:							/* XXX */
>  	 */
>  	if (tp->t_flags & (TF_ACKNOW|TF_NEEDOUTPUT))
>  		(void) tcp_output(tp);
> +	in_pcbsounlock_rele(inp, so);
>  	in_pcbunref(inp);
>  	return IPPROTO_DONE;
>  
> @@ -2094,6 +2103,7 @@ dropafterack:
>  	m_freem(m);
>  	tp->t_flags |= TF_ACKNOW;
>  	(void) tcp_output(tp);
> +	in_pcbsounlock_rele(inp, so);
>  	in_pcbunref(inp);
>  	return IPPROTO_DONE;
>  
> @@ -2129,6 +2139,7 @@ dropwithreset:
>  		    (tcp_seq)0, TH_RST|TH_ACK, m->m_pkthdr.ph_rtableid, now);
>  	}
>  	m_freem(m);
> +	in_pcbsounlock_rele(inp, so);
>  	in_pcbunref(inp);
>  	return IPPROTO_DONE;
>  
> @@ -2140,6 +2151,7 @@ drop:
>  		tcp_trace(TA_DROP, ostate, tp, otp, &saveti.caddr, 0, tlen);
>  
>  	m_freem(m);
> +	in_pcbsounlock_rele(inp, so);
>  	in_pcbunref(inp);
>  	return IPPROTO_DONE;
>  }
> @@ -3543,6 +3555,7 @@ syn_cache_get(struct sockaddr *src, stru
>  	sc = syn_cache_lookup(src, dst, &scp, inp->inp_rtableid);
>  	if (sc == NULL) {
>  		mtx_leave(&syn_cache_mtx);
> +		in_pcbsounlock_rele(inp, so);
>  		return (NULL);
>  	}
>  
> @@ -3556,6 +3569,7 @@ syn_cache_get(struct sockaddr *src, stru
>  		refcnt_take(&sc->sc_refcnt);
>  		mtx_leave(&syn_cache_mtx);
>  		(void) syn_cache_respond(sc, m, now, do_ecn);
> +		in_pcbsounlock_rele(inp, so);
>  		syn_cache_put(sc);
>  		return ((struct socket *)(-1));
>  	}
> @@ -3696,7 +3710,7 @@ syn_cache_get(struct sockaddr *src, stru
>  		tp->rcv_adv = tp->rcv_nxt + sc->sc_win;
>  	tp->last_ack_sent = tp->rcv_nxt;
>  
> -	in_pcbsounlock_rele(inp, so);
> +	in_pcbsounlock_rele(listeninp, listenso);
>  	tcpstat_inc(tcps_sc_completed);
>  	syn_cache_put(sc);
>  	return (so);
> @@ -3709,6 +3723,7 @@ abort:
>  		tp = tcp_drop(tp, ECONNABORTED);	/* destroys socket */
>  	m_freem(m);
>  	in_pcbsounlock_rele(inp, so);
> +	in_pcbsounlock_rele(listeninp, listenso);
>  	syn_cache_put(sc);
>  	tcpstat_inc(tcps_sc_aborted);
>  	return ((struct socket *)(-1));
> @@ -3813,7 +3828,7 @@ syn_cache_add(struct sockaddr *src, stru
>  	struct mbuf *ipopts;
>  	struct rtentry *rt = NULL;
>  
> -	NET_ASSERT_LOCKED();
> +	soassertlocked(so);
>  
>  	tp = sototcpcb(so);
>  
> @@ -3989,9 +4004,8 @@ syn_cache_add(struct sockaddr *src, stru
>  	if (syn_cache_respond(sc, m, now, do_ecn) == 0) {
>  		mtx_enter(&syn_cache_mtx);
>  		/*
> -		 * XXXSMP Currently exclusive netlock prevents another insert
> -		 * after our syn_cache_lookup() and before syn_cache_insert().
> -		 * Double insert should be handled and not rely on netlock.
> +		 * Socket lock prevents another insert after our
> +		 * syn_cache_lookup() and before syn_cache_insert().
>  		 */
>  		syn_cache_insert(sc, tp);
>  		mtx_leave(&syn_cache_mtx);
>