Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
run TCP syn cache timer in parallel
To:
tech@openbsd.org
Date:
Wed, 1 Jan 2025 14:01:50 +0100

Download raw body.

Thread
Hi,

With mvs@'s socket ref counting, I think it is possible to run TCP
SYN cache in parallel.

Idea is to reset inp_socket pointer when socket is about to be
freed.  By protecting this with a mutex, we reliable retrieve the
socket from an inpcb reference.  By using the socket refcount we
switch from mutex to socket lock.

The syn cache timer is an easy place to use this.  Goal it to use
in_pcbsolock() also for TCP timer and input.  Note that switching
sc_tp to sc_inplisten and refcounting it, is a diff I sent to tech@
earlier.

ok?

bluhm

Index: netinet/in_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
diff -u -p -r1.307 in_pcb.c
--- netinet/in_pcb.c	24 Dec 2024 16:27:07 -0000	1.307
+++ netinet/in_pcb.c	1 Jan 2025 12:55:13 -0000
@@ -584,6 +584,9 @@ in_pcbdetach(struct inpcb *inp)
 	struct inpcbtable *table = inp->inp_table;
 
 	so->so_pcb = NULL;
+	mtx_enter(&inp->inp_sofree_mtx);
+	inp->inp_socket = NULL;
+	mtx_leave(&inp->inp_sofree_mtx);
 	/*
 	 * As long as the NET_LOCK() is the default lock for Internet
 	 * sockets, do not release it to not introduce new sleeping
@@ -616,6 +619,32 @@ in_pcbdetach(struct inpcb *inp)
 	mtx_leave(&table->inpt_mtx);
 
 	in_pcbunref(inp);
+}
+
+struct socket *
+in_pcbsolock(struct inpcb *inp)
+{
+	struct socket *so;
+
+	NET_ASSERT_LOCKED();
+
+	mtx_enter(&inp->inp_sofree_mtx);
+	so = soref(inp->inp_socket);
+	mtx_leave(&inp->inp_sofree_mtx);
+	if (so == NULL)
+		return NULL;
+
+	rw_enter_write(&so->so_lock);
+	sorele(so, 1);
+
+	return so;
+}
+
+void
+in_pcbsounlock(struct inpcb *inp, struct socket *so)
+{
+	KASSERT(inp->inp_socket == so);
+	rw_exit_write(&so->so_lock);
 }
 
 struct inpcb *
Index: netinet/in_pcb.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v
diff -u -p -r1.161 in_pcb.h
--- netinet/in_pcb.h	24 Dec 2024 16:27:07 -0000	1.161
+++ netinet/in_pcb.h	1 Jan 2025 10:53:17 -0000
@@ -81,6 +81,7 @@
  *	t	inpt_mtx		pcb table mutex
  *	L	pf_inp_mtx		link pf to inp mutex
  *	s	so_lock			socket rwlock
+ *	f	inp_sofree_mtx		socket detach and lock
  */
 
 /*
@@ -136,7 +137,8 @@ struct inpcb {
 #define	inp_laddr6	inp_laddru.iau_addr6
 	u_int16_t inp_fport;		/* [t] foreign port */
 	u_int16_t inp_lport;		/* [t] local port */
-	struct	  socket *inp_socket;	/* [I] back pointer to socket */
+	struct	  socket *inp_socket;	/* [f] back pointer to socket */
+	struct	  mutex inp_sofree_mtx;	/* protect socket free */
 	caddr_t	  inp_ppcb;		/* pointer to per-protocol pcb */
 	struct    route inp_route;	/* [s] cached route */
 	struct    refcnt inp_refcnt;	/* refcount PCB, delay memory free */
@@ -309,6 +311,9 @@ int	 in_pcbaddrisavail(const struct inpc
 	    struct proc *);
 int	 in_pcbconnect(struct inpcb *, struct mbuf *);
 void	 in_pcbdetach(struct inpcb *);
+struct socket *
+	 in_pcbsolock(struct inpcb *);
+void	 in_pcbsounlock(struct inpcb *, struct socket *);
 struct inpcb *
 	 in_pcbref(struct inpcb *);
 void	 in_pcbunref(struct inpcb *);
Index: netinet/tcp_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
diff -u -p -r1.416 tcp_input.c
--- netinet/tcp_input.c	31 Dec 2024 12:19:46 -0000	1.416
+++ netinet/tcp_input.c	1 Jan 2025 10:49:40 -0000
@@ -3173,7 +3173,8 @@ syn_cache_rm(struct syn_cache *sc)
 	KASSERT(!ISSET(sc->sc_dynflags, SCF_DEAD));
 	SET(sc->sc_dynflags, SCF_DEAD);
 	TAILQ_REMOVE(&sc->sc_buckethead->sch_bucket, sc, sc_bucketq);
-	sc->sc_tp = NULL;
+	in_pcbunref(sc->sc_inplisten);
+	sc->sc_inplisten = NULL;
 	LIST_REMOVE(sc, sc_tpq);
 	refcnt_rele(&sc->sc_refcnt);
 	sc->sc_buckethead->sch_length--;
@@ -3369,6 +3370,8 @@ void
 syn_cache_timer(void *arg)
 {
 	struct syn_cache *sc = arg;
+	struct inpcb *inp;
+	struct socket *so;
 	uint64_t now;
 	int lastref;
 
@@ -3397,14 +3400,22 @@ syn_cache_timer(void *arg)
 	    TCPTV_REXMTMAX);
 	if (timeout_add_msec(&sc->sc_timer, sc->sc_rxtcur))
 		refcnt_take(&sc->sc_refcnt);
+	inp = in_pcbref(sc->sc_inplisten);
+	if (inp == NULL)
+		goto freeit;
 	mtx_leave(&syn_cache_mtx);
 
-	NET_LOCK();
-	now = tcp_now();
-	(void) syn_cache_respond(sc, NULL, now);
-	tcpstat_inc(tcps_sc_retransmitted);
-	NET_UNLOCK();
+	NET_LOCK_SHARED();
+	so = in_pcbsolock(inp);
+	if (so != NULL) {
+		now = tcp_now();
+		(void) syn_cache_respond(sc, NULL, now);
+		tcpstat_inc(tcps_sc_retransmitted);
+		in_pcbsounlock(inp, so);
+	}
+	NET_UNLOCK_SHARED();
 
+	in_pcbunref(inp);
 	syn_cache_put(sc);
 	return;
 
@@ -3434,10 +3445,7 @@ syn_cache_cleanup(struct tcpcb *tp)
 
 	mtx_enter(&syn_cache_mtx);
 	LIST_FOREACH_SAFE(sc, &tp->t_sc, sc_tpq, nsc) {
-#ifdef DIAGNOSTIC
-		if (sc->sc_tp != tp)
-			panic("invalid sc_tp in syn_cache_cleanup");
-#endif
+		KASSERT(sc->sc_inplisten == tp->t_inpcb);
 		syn_cache_rm(sc);
 		syn_cache_put(sc);
 	}
@@ -3949,7 +3957,7 @@ syn_cache_add(struct sockaddr *src, stru
 	if (tb.t_flags & TF_SIGNATURE)
 		SET(sc->sc_fixflags, SCF_SIGNATURE);
 #endif
-	sc->sc_tp = tp;
+	sc->sc_inplisten = in_pcbref(tp->t_inpcb);
 	if (syn_cache_respond(sc, m, now) == 0) {
 		mtx_enter(&syn_cache_mtx);
 		/*
@@ -3962,6 +3970,7 @@ syn_cache_add(struct sockaddr *src, stru
 		tcpstat_inc(tcps_sndacks);
 		tcpstat_inc(tcps_sndtotal);
 	} else {
+		in_pcbunref(sc->sc_inplisten);
 		syn_cache_put(sc);
 		tcpstat_inc(tcps_sc_dropped);
 	}
@@ -4158,7 +4167,7 @@ syn_cache_respond(struct syn_cache *sc, 
 
 	/* use IPsec policy and ttl from listening socket, on SYN ACK */
 	mtx_enter(&syn_cache_mtx);
-	inp = sc->sc_tp ? sc->sc_tp->t_inpcb : NULL;
+	inp = in_pcbref(sc->sc_inplisten);
 	mtx_leave(&syn_cache_mtx);
 
 	/*
@@ -4189,5 +4198,6 @@ syn_cache_respond(struct syn_cache *sc, 
 		break;
 #endif
 	}
+	in_pcbunref(inp);
 	return (error);
 }
Index: netinet/tcp_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
diff -u -p -r1.181 tcp_var.h
--- netinet/tcp_var.h	28 Dec 2024 22:17:09 -0000	1.181
+++ netinet/tcp_var.h	1 Jan 2025 10:26:36 -0000
@@ -278,7 +278,7 @@ struct syn_cache {
 	u_int     sc_request_r_scale	: 4,	/* [I] */
 		  sc_requested_s_scale	: 4;	/* [I] */
 
-	struct tcpcb *sc_tp;		/* [S] tcb for listening socket */
+	struct inpcb *sc_inplisten;	/* [S] inpcb for listening socket */
 	LIST_ENTRY(syn_cache) sc_tpq;	/* [S] list of entries by same tp */
 };
 
Index: sys/socketvar.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/socketvar.h,v
diff -u -p -r1.135 socketvar.h
--- sys/socketvar.h	30 Dec 2024 12:12:35 -0000	1.135
+++ sys/socketvar.h	1 Jan 2025 10:56:10 -0000
@@ -209,10 +209,13 @@ struct socket {
 void	soassertlocked(struct socket *);
 void	soassertlocked_readonly(struct socket *);
 
-static inline void
+static inline struct socket *
 soref(struct socket *so)
 {
+	if (so == NULL)
+		return NULL;
 	refcnt_take(&so->so_refcnt);
+	return so;
 }
 
 /*