Download raw body.
tcp input socket lock
On Sun, Apr 13, 2025 at 03:55:18PM +0200, Alexander Bluhm wrote:
> Hi,
>
> In preparation to run TCP input in parallel, we have to lock the
> socket. After inpcb lookup, also take the socket lock and increase
> the socket reference count. syn_cache_get() unlocks the listen
> socket and returns a locked socket. syn_cache_add() relies on
> socket lock.
>
> With this diff, exclusive net lock is still held. It might get a
> little slower due to the additional mutex and refcount. But I want
> to see if locking and refcount works before switching tcp_input()
> to shared net lock.
>
> ok?
>
ok mvs
> bluhm
>
> Index: netinet/tcp_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
> diff -u -p -r1.434 tcp_input.c
> --- netinet/tcp_input.c 10 Mar 2025 15:11:46 -0000 1.434
> +++ netinet/tcp_input.c 13 Apr 2025 13:28:28 -0000
> @@ -603,6 +603,11 @@ findpcb:
> tcpstat_inc(tcps_noport);
> goto dropwithreset_ratelim;
> }
> + so = in_pcbsolock_ref(inp);
> + if (so == NULL) {
> + tcpstat_inc(tcps_noport);
> + goto dropwithreset_ratelim;
> + }
>
> KASSERT(sotoinpcb(inp->inp_socket) == inp);
> KASSERT(intotcpcb(inp) == NULL || intotcpcb(inp)->t_inpcb == inp);
> @@ -635,7 +640,6 @@ findpcb:
> else
> tiwin = th->th_win;
>
> - so = inp->inp_socket;
> if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
> union syn_cache_sa src;
> union syn_cache_sa dst;
> @@ -724,6 +728,7 @@ findpcb:
> * in use for the reply,
> * do not free it.
> */
> + so = NULL;
> m = *mp = NULL;
> goto drop;
> } else {
> @@ -731,13 +736,11 @@ findpcb:
> * We have created a
> * full-blown connection.
> */
> - tp = NULL;
> in_pcbunref(inp);
> inp = in_pcbref(sotoinpcb(so));
> tp = intotcpcb(inp);
> if (tp == NULL)
> goto badsyn; /*XXX*/
> -
> }
> break;
>
> @@ -843,6 +846,7 @@ findpcb:
> tcpstat_inc(tcps_dropsyn);
> goto drop;
> }
> + in_pcbsounlock_rele(inp, so);
> in_pcbunref(inp);
> return IPPROTO_DONE;
> }
> @@ -1018,6 +1022,7 @@ findpcb:
> if (so->so_snd.sb_cc ||
> tp->t_flags & TF_NEEDOUTPUT)
> (void) tcp_output(tp);
> + in_pcbsounlock_rele(inp, so);
> in_pcbunref(inp);
> return IPPROTO_DONE;
> }
> @@ -1068,6 +1073,7 @@ findpcb:
> tp->t_flags &= ~TF_BLOCKOUTPUT;
> if (tp->t_flags & (TF_ACKNOW|TF_NEEDOUTPUT))
> (void) tcp_output(tp);
> + in_pcbsounlock_rele(inp, so);
> in_pcbunref(inp);
> return IPPROTO_DONE;
> }
> @@ -1261,6 +1267,8 @@ trimthenstep6:
> ((arc4random() & 0x7fffffff) | 0x8000);
> reuse = &iss;
> tp = tcp_close(tp);
> + in_pcbsounlock_rele(inp, so);
> + so = NULL;
> in_pcbunref(inp);
> inp = NULL;
> goto findpcb;
> @@ -2065,6 +2073,7 @@ dodata: /* XXX */
> */
> if (tp->t_flags & (TF_ACKNOW|TF_NEEDOUTPUT))
> (void) tcp_output(tp);
> + in_pcbsounlock_rele(inp, so);
> in_pcbunref(inp);
> return IPPROTO_DONE;
>
> @@ -2094,6 +2103,7 @@ dropafterack:
> m_freem(m);
> tp->t_flags |= TF_ACKNOW;
> (void) tcp_output(tp);
> + in_pcbsounlock_rele(inp, so);
> in_pcbunref(inp);
> return IPPROTO_DONE;
>
> @@ -2129,6 +2139,7 @@ dropwithreset:
> (tcp_seq)0, TH_RST|TH_ACK, m->m_pkthdr.ph_rtableid, now);
> }
> m_freem(m);
> + in_pcbsounlock_rele(inp, so);
> in_pcbunref(inp);
> return IPPROTO_DONE;
>
> @@ -2140,6 +2151,7 @@ drop:
> tcp_trace(TA_DROP, ostate, tp, otp, &saveti.caddr, 0, tlen);
>
> m_freem(m);
> + in_pcbsounlock_rele(inp, so);
> in_pcbunref(inp);
> return IPPROTO_DONE;
> }
> @@ -3543,6 +3555,7 @@ syn_cache_get(struct sockaddr *src, stru
> sc = syn_cache_lookup(src, dst, &scp, inp->inp_rtableid);
> if (sc == NULL) {
> mtx_leave(&syn_cache_mtx);
> + in_pcbsounlock_rele(inp, so);
> return (NULL);
> }
>
> @@ -3556,6 +3569,7 @@ syn_cache_get(struct sockaddr *src, stru
> refcnt_take(&sc->sc_refcnt);
> mtx_leave(&syn_cache_mtx);
> (void) syn_cache_respond(sc, m, now, do_ecn);
> + in_pcbsounlock_rele(inp, so);
> syn_cache_put(sc);
> return ((struct socket *)(-1));
> }
> @@ -3696,7 +3710,7 @@ syn_cache_get(struct sockaddr *src, stru
> tp->rcv_adv = tp->rcv_nxt + sc->sc_win;
> tp->last_ack_sent = tp->rcv_nxt;
>
> - in_pcbsounlock_rele(inp, so);
> + in_pcbsounlock_rele(listeninp, listenso);
> tcpstat_inc(tcps_sc_completed);
> syn_cache_put(sc);
> return (so);
> @@ -3709,6 +3723,7 @@ abort:
> tp = tcp_drop(tp, ECONNABORTED); /* destroys socket */
> m_freem(m);
> in_pcbsounlock_rele(inp, so);
> + in_pcbsounlock_rele(listeninp, listenso);
> syn_cache_put(sc);
> tcpstat_inc(tcps_sc_aborted);
> return ((struct socket *)(-1));
> @@ -3813,7 +3828,7 @@ syn_cache_add(struct sockaddr *src, stru
> struct mbuf *ipopts;
> struct rtentry *rt = NULL;
>
> - NET_ASSERT_LOCKED();
> + soassertlocked(so);
>
> tp = sototcpcb(so);
>
> @@ -3989,9 +4004,8 @@ syn_cache_add(struct sockaddr *src, stru
> if (syn_cache_respond(sc, m, now, do_ecn) == 0) {
> mtx_enter(&syn_cache_mtx);
> /*
> - * XXXSMP Currently exclusive netlock prevents another insert
> - * after our syn_cache_lookup() and before syn_cache_insert().
> - * Double insert should be handled and not rely on netlock.
> + * Socket lock prevents another insert after our
> + * syn_cache_lookup() and before syn_cache_insert().
> */
> syn_cache_insert(sc, tp);
> mtx_leave(&syn_cache_mtx);
>
tcp input socket lock