From: Vitaliy Makkoveev Subject: Re: tcp input socket lock To: Alexander Bluhm Cc: tech@openbsd.org Date: Wed, 16 Apr 2025 12:27:33 +0300 On Sun, Apr 13, 2025 at 03:55:18PM +0200, Alexander Bluhm wrote: > Hi, > > In preparation to run TCP input in parallel, we have to lock the > socket. After inpcb lookup, also take the socket lock and increase > the socket reference count. syn_cache_get() unlocks the listen > socket and returns a locked socket. syn_cache_add() relies on > socket lock. > > With this diff, exclusive net lock is still held. It might get a > little slower due to the additional mutex and refcount. But I want > to see if locking and refcount works before switching tcp_input() > to shared net lock. > > ok? > ok mvs > bluhm > > Index: netinet/tcp_input.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v > diff -u -p -r1.434 tcp_input.c > --- netinet/tcp_input.c 10 Mar 2025 15:11:46 -0000 1.434 > +++ netinet/tcp_input.c 13 Apr 2025 13:28:28 -0000 > @@ -603,6 +603,11 @@ findpcb: > tcpstat_inc(tcps_noport); > goto dropwithreset_ratelim; > } > + so = in_pcbsolock_ref(inp); > + if (so == NULL) { > + tcpstat_inc(tcps_noport); > + goto dropwithreset_ratelim; > + } > > KASSERT(sotoinpcb(inp->inp_socket) == inp); > KASSERT(intotcpcb(inp) == NULL || intotcpcb(inp)->t_inpcb == inp); > @@ -635,7 +640,6 @@ findpcb: > else > tiwin = th->th_win; > > - so = inp->inp_socket; > if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) { > union syn_cache_sa src; > union syn_cache_sa dst; > @@ -724,6 +728,7 @@ findpcb: > * in use for the reply, > * do not free it. > */ > + so = NULL; > m = *mp = NULL; > goto drop; > } else { > @@ -731,13 +736,11 @@ findpcb: > * We have created a > * full-blown connection. > */ > - tp = NULL; > in_pcbunref(inp); > inp = in_pcbref(sotoinpcb(so)); > tp = intotcpcb(inp); > if (tp == NULL) > goto badsyn; /*XXX*/ > - > } > break; > > @@ -843,6 +846,7 @@ findpcb: > tcpstat_inc(tcps_dropsyn); > goto drop; > } > + in_pcbsounlock_rele(inp, so); > in_pcbunref(inp); > return IPPROTO_DONE; > } > @@ -1018,6 +1022,7 @@ findpcb: > if (so->so_snd.sb_cc || > tp->t_flags & TF_NEEDOUTPUT) > (void) tcp_output(tp); > + in_pcbsounlock_rele(inp, so); > in_pcbunref(inp); > return IPPROTO_DONE; > } > @@ -1068,6 +1073,7 @@ findpcb: > tp->t_flags &= ~TF_BLOCKOUTPUT; > if (tp->t_flags & (TF_ACKNOW|TF_NEEDOUTPUT)) > (void) tcp_output(tp); > + in_pcbsounlock_rele(inp, so); > in_pcbunref(inp); > return IPPROTO_DONE; > } > @@ -1261,6 +1267,8 @@ trimthenstep6: > ((arc4random() & 0x7fffffff) | 0x8000); > reuse = &iss; > tp = tcp_close(tp); > + in_pcbsounlock_rele(inp, so); > + so = NULL; > in_pcbunref(inp); > inp = NULL; > goto findpcb; > @@ -2065,6 +2073,7 @@ dodata: /* XXX */ > */ > if (tp->t_flags & (TF_ACKNOW|TF_NEEDOUTPUT)) > (void) tcp_output(tp); > + in_pcbsounlock_rele(inp, so); > in_pcbunref(inp); > return IPPROTO_DONE; > > @@ -2094,6 +2103,7 @@ dropafterack: > m_freem(m); > tp->t_flags |= TF_ACKNOW; > (void) tcp_output(tp); > + in_pcbsounlock_rele(inp, so); > in_pcbunref(inp); > return IPPROTO_DONE; > > @@ -2129,6 +2139,7 @@ dropwithreset: > (tcp_seq)0, TH_RST|TH_ACK, m->m_pkthdr.ph_rtableid, now); > } > m_freem(m); > + in_pcbsounlock_rele(inp, so); > in_pcbunref(inp); > return IPPROTO_DONE; > > @@ -2140,6 +2151,7 @@ drop: > tcp_trace(TA_DROP, ostate, tp, otp, &saveti.caddr, 0, tlen); > > m_freem(m); > + in_pcbsounlock_rele(inp, so); > in_pcbunref(inp); > return IPPROTO_DONE; > } > @@ -3543,6 +3555,7 @@ syn_cache_get(struct sockaddr *src, stru > sc = syn_cache_lookup(src, dst, &scp, inp->inp_rtableid); > if (sc == NULL) { > mtx_leave(&syn_cache_mtx); > + in_pcbsounlock_rele(inp, so); > return (NULL); > } > > @@ -3556,6 +3569,7 @@ syn_cache_get(struct sockaddr *src, stru > refcnt_take(&sc->sc_refcnt); > mtx_leave(&syn_cache_mtx); > (void) syn_cache_respond(sc, m, now, do_ecn); > + in_pcbsounlock_rele(inp, so); > syn_cache_put(sc); > return ((struct socket *)(-1)); > } > @@ -3696,7 +3710,7 @@ syn_cache_get(struct sockaddr *src, stru > tp->rcv_adv = tp->rcv_nxt + sc->sc_win; > tp->last_ack_sent = tp->rcv_nxt; > > - in_pcbsounlock_rele(inp, so); > + in_pcbsounlock_rele(listeninp, listenso); > tcpstat_inc(tcps_sc_completed); > syn_cache_put(sc); > return (so); > @@ -3709,6 +3723,7 @@ abort: > tp = tcp_drop(tp, ECONNABORTED); /* destroys socket */ > m_freem(m); > in_pcbsounlock_rele(inp, so); > + in_pcbsounlock_rele(listeninp, listenso); > syn_cache_put(sc); > tcpstat_inc(tcps_sc_aborted); > return ((struct socket *)(-1)); > @@ -3813,7 +3828,7 @@ syn_cache_add(struct sockaddr *src, stru > struct mbuf *ipopts; > struct rtentry *rt = NULL; > > - NET_ASSERT_LOCKED(); > + soassertlocked(so); > > tp = sototcpcb(so); > > @@ -3989,9 +4004,8 @@ syn_cache_add(struct sockaddr *src, stru > if (syn_cache_respond(sc, m, now, do_ecn) == 0) { > mtx_enter(&syn_cache_mtx); > /* > - * XXXSMP Currently exclusive netlock prevents another insert > - * after our syn_cache_lookup() and before syn_cache_insert(). > - * Double insert should be handled and not rely on netlock. > + * Socket lock prevents another insert after our > + * syn_cache_lookup() and before syn_cache_insert(). > */ > syn_cache_insert(sc, tp); > mtx_leave(&syn_cache_mtx); >