From: Alexander Bluhm Subject: TCP syn cache ref count incpb To: tech@openbsd.org Date: Sun, 29 Dec 2024 17:43:41 +0100 Hi, To make progress in unlocking TCP input path we need more ref counting. The syn cache has a reference to the listen socket. Instead of adding a refcount to tcpcb, I think is easier to use the inpcb which already has a ref count. This diff is part of a larger diff that switches syn cache timeout to shared net lock. ok? bluhm Index: netinet/tcp_input.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v diff -u -p -r1.413 tcp_input.c --- netinet/tcp_input.c 26 Dec 2024 12:16:17 -0000 1.413 +++ netinet/tcp_input.c 28 Dec 2024 15:22:44 -0000 @@ -3165,7 +3165,8 @@ syn_cache_rm(struct syn_cache *sc) KASSERT(!ISSET(sc->sc_dynflags, SCF_DEAD)); SET(sc->sc_dynflags, SCF_DEAD); TAILQ_REMOVE(&sc->sc_buckethead->sch_bucket, sc, sc_bucketq); - sc->sc_tp = NULL; + in_pcbunref(sc->sc_inplisten); + sc->sc_inplisten = NULL; LIST_REMOVE(sc, sc_tpq); refcnt_rele(&sc->sc_refcnt); sc->sc_buckethead->sch_length--; @@ -3359,6 +3360,7 @@ void syn_cache_timer(void *arg) { struct syn_cache *sc = arg; + struct inpcb *inp; uint64_t now; int lastref; @@ -3387,6 +3389,9 @@ syn_cache_timer(void *arg) TCPTV_REXMTMAX); if (timeout_add_msec(&sc->sc_timer, sc->sc_rxtcur)) refcnt_take(&sc->sc_refcnt); + inp = in_pcbref(sc->sc_inplisten); + if (inp == NULL) + goto freeit; mtx_leave(&syn_cache_mtx); NET_LOCK(); @@ -3395,6 +3400,7 @@ syn_cache_timer(void *arg) tcpstat_inc(tcps_sc_retransmitted); NET_UNLOCK(); + in_pcbunref(inp); syn_cache_put(sc); return; @@ -3424,10 +3430,7 @@ syn_cache_cleanup(struct tcpcb *tp) mtx_enter(&syn_cache_mtx); LIST_FOREACH_SAFE(sc, &tp->t_sc, sc_tpq, nsc) { -#ifdef DIAGNOSTIC - if (sc->sc_tp != tp) - panic("invalid sc_tp in syn_cache_cleanup"); -#endif + KASSERT(sc->sc_inplisten == tp->t_inpcb); syn_cache_rm(sc); syn_cache_put(sc); } @@ -3938,7 +3941,7 @@ syn_cache_add(struct sockaddr *src, stru if (tb.t_flags & TF_SIGNATURE) SET(sc->sc_fixflags, SCF_SIGNATURE); #endif - sc->sc_tp = tp; + sc->sc_inplisten = in_pcbref(tp->t_inpcb); if (syn_cache_respond(sc, m, now) == 0) { mtx_enter(&syn_cache_mtx); /* @@ -3951,6 +3954,7 @@ syn_cache_add(struct sockaddr *src, stru tcpstat_inc(tcps_sndacks); tcpstat_inc(tcps_sndtotal); } else { + in_pcbunref(sc->sc_inplisten); syn_cache_put(sc); tcpstat_inc(tcps_sc_dropped); } @@ -4147,7 +4151,7 @@ syn_cache_respond(struct syn_cache *sc, /* use IPsec policy and ttl from listening socket, on SYN ACK */ mtx_enter(&syn_cache_mtx); - inp = sc->sc_tp ? sc->sc_tp->t_inpcb : NULL; + inp = in_pcbref(sc->sc_inplisten); mtx_leave(&syn_cache_mtx); /* @@ -4178,5 +4182,6 @@ syn_cache_respond(struct syn_cache *sc, break; #endif } + in_pcbunref(inp); return (error); } Index: netinet/tcp_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v diff -u -p -r1.180 tcp_var.h --- netinet/tcp_var.h 26 Dec 2024 12:16:17 -0000 1.180 +++ netinet/tcp_var.h 28 Dec 2024 15:19:16 -0000 @@ -278,7 +278,7 @@ struct syn_cache { u_int sc_request_r_scale : 4, /* [I] */ sc_requested_s_scale : 4; /* [I] */ - struct tcpcb *sc_tp; /* [S] tcb for listening socket */ + struct inpcb *sc_inplisten; /* [S] inpcb for listening socket */ LIST_ENTRY(syn_cache) sc_tpq; /* [S] list of entries by same tp */ };