From: Alexander Bluhm Subject: tcp input socket lock To: tech@openbsd.org Date: Sun, 13 Apr 2025 15:55:18 +0200 Hi, In preparation to run TCP input in parallel, we have to lock the socket. After inpcb lookup, also take the socket lock and increase the socket reference count. syn_cache_get() unlocks the listen socket and returns a locked socket. syn_cache_add() relies on socket lock. With this diff, exclusive net lock is still held. It might get a little slower due to the additional mutex and refcount. But I want to see if locking and refcount works before switching tcp_input() to shared net lock. ok? bluhm Index: netinet/tcp_input.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v diff -u -p -r1.434 tcp_input.c --- netinet/tcp_input.c 10 Mar 2025 15:11:46 -0000 1.434 +++ netinet/tcp_input.c 13 Apr 2025 13:28:28 -0000 @@ -603,6 +603,11 @@ findpcb: tcpstat_inc(tcps_noport); goto dropwithreset_ratelim; } + so = in_pcbsolock_ref(inp); + if (so == NULL) { + tcpstat_inc(tcps_noport); + goto dropwithreset_ratelim; + } KASSERT(sotoinpcb(inp->inp_socket) == inp); KASSERT(intotcpcb(inp) == NULL || intotcpcb(inp)->t_inpcb == inp); @@ -635,7 +640,6 @@ findpcb: else tiwin = th->th_win; - so = inp->inp_socket; if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) { union syn_cache_sa src; union syn_cache_sa dst; @@ -724,6 +728,7 @@ findpcb: * in use for the reply, * do not free it. */ + so = NULL; m = *mp = NULL; goto drop; } else { @@ -731,13 +736,11 @@ findpcb: * We have created a * full-blown connection. */ - tp = NULL; in_pcbunref(inp); inp = in_pcbref(sotoinpcb(so)); tp = intotcpcb(inp); if (tp == NULL) goto badsyn; /*XXX*/ - } break; @@ -843,6 +846,7 @@ findpcb: tcpstat_inc(tcps_dropsyn); goto drop; } + in_pcbsounlock_rele(inp, so); in_pcbunref(inp); return IPPROTO_DONE; } @@ -1018,6 +1022,7 @@ findpcb: if (so->so_snd.sb_cc || tp->t_flags & TF_NEEDOUTPUT) (void) tcp_output(tp); + in_pcbsounlock_rele(inp, so); in_pcbunref(inp); return IPPROTO_DONE; } @@ -1068,6 +1073,7 @@ findpcb: tp->t_flags &= ~TF_BLOCKOUTPUT; if (tp->t_flags & (TF_ACKNOW|TF_NEEDOUTPUT)) (void) tcp_output(tp); + in_pcbsounlock_rele(inp, so); in_pcbunref(inp); return IPPROTO_DONE; } @@ -1261,6 +1267,8 @@ trimthenstep6: ((arc4random() & 0x7fffffff) | 0x8000); reuse = &iss; tp = tcp_close(tp); + in_pcbsounlock_rele(inp, so); + so = NULL; in_pcbunref(inp); inp = NULL; goto findpcb; @@ -2065,6 +2073,7 @@ dodata: /* XXX */ */ if (tp->t_flags & (TF_ACKNOW|TF_NEEDOUTPUT)) (void) tcp_output(tp); + in_pcbsounlock_rele(inp, so); in_pcbunref(inp); return IPPROTO_DONE; @@ -2094,6 +2103,7 @@ dropafterack: m_freem(m); tp->t_flags |= TF_ACKNOW; (void) tcp_output(tp); + in_pcbsounlock_rele(inp, so); in_pcbunref(inp); return IPPROTO_DONE; @@ -2129,6 +2139,7 @@ dropwithreset: (tcp_seq)0, TH_RST|TH_ACK, m->m_pkthdr.ph_rtableid, now); } m_freem(m); + in_pcbsounlock_rele(inp, so); in_pcbunref(inp); return IPPROTO_DONE; @@ -2140,6 +2151,7 @@ drop: tcp_trace(TA_DROP, ostate, tp, otp, &saveti.caddr, 0, tlen); m_freem(m); + in_pcbsounlock_rele(inp, so); in_pcbunref(inp); return IPPROTO_DONE; } @@ -3543,6 +3555,7 @@ syn_cache_get(struct sockaddr *src, stru sc = syn_cache_lookup(src, dst, &scp, inp->inp_rtableid); if (sc == NULL) { mtx_leave(&syn_cache_mtx); + in_pcbsounlock_rele(inp, so); return (NULL); } @@ -3556,6 +3569,7 @@ syn_cache_get(struct sockaddr *src, stru refcnt_take(&sc->sc_refcnt); mtx_leave(&syn_cache_mtx); (void) syn_cache_respond(sc, m, now, do_ecn); + in_pcbsounlock_rele(inp, so); syn_cache_put(sc); return ((struct socket *)(-1)); } @@ -3696,7 +3710,7 @@ syn_cache_get(struct sockaddr *src, stru tp->rcv_adv = tp->rcv_nxt + sc->sc_win; tp->last_ack_sent = tp->rcv_nxt; - in_pcbsounlock_rele(inp, so); + in_pcbsounlock_rele(listeninp, listenso); tcpstat_inc(tcps_sc_completed); syn_cache_put(sc); return (so); @@ -3709,6 +3723,7 @@ abort: tp = tcp_drop(tp, ECONNABORTED); /* destroys socket */ m_freem(m); in_pcbsounlock_rele(inp, so); + in_pcbsounlock_rele(listeninp, listenso); syn_cache_put(sc); tcpstat_inc(tcps_sc_aborted); return ((struct socket *)(-1)); @@ -3813,7 +3828,7 @@ syn_cache_add(struct sockaddr *src, stru struct mbuf *ipopts; struct rtentry *rt = NULL; - NET_ASSERT_LOCKED(); + soassertlocked(so); tp = sototcpcb(so); @@ -3989,9 +4004,8 @@ syn_cache_add(struct sockaddr *src, stru if (syn_cache_respond(sc, m, now, do_ecn) == 0) { mtx_enter(&syn_cache_mtx); /* - * XXXSMP Currently exclusive netlock prevents another insert - * after our syn_cache_lookup() and before syn_cache_insert(). - * Double insert should be handled and not rely on netlock. + * Socket lock prevents another insert after our + * syn_cache_lookup() and before syn_cache_insert(). */ syn_cache_insert(sc, tp); mtx_leave(&syn_cache_mtx);