Download raw body.
run TCP syn cache timer in parallel
Hi,
With mvs@'s socket ref counting, I think it is possible to run TCP
SYN cache in parallel.
Idea is to reset inp_socket pointer when socket is about to be
freed. By protecting this with a mutex, we reliable retrieve the
socket from an inpcb reference. By using the socket refcount we
switch from mutex to socket lock.
The syn cache timer is an easy place to use this. Goal it to use
in_pcbsolock() also for TCP timer and input. Note that switching
sc_tp to sc_inplisten and refcounting it, is a diff I sent to tech@
earlier.
ok?
bluhm
Index: netinet/in_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
diff -u -p -r1.307 in_pcb.c
--- netinet/in_pcb.c 24 Dec 2024 16:27:07 -0000 1.307
+++ netinet/in_pcb.c 1 Jan 2025 12:55:13 -0000
@@ -584,6 +584,9 @@ in_pcbdetach(struct inpcb *inp)
struct inpcbtable *table = inp->inp_table;
so->so_pcb = NULL;
+ mtx_enter(&inp->inp_sofree_mtx);
+ inp->inp_socket = NULL;
+ mtx_leave(&inp->inp_sofree_mtx);
/*
* As long as the NET_LOCK() is the default lock for Internet
* sockets, do not release it to not introduce new sleeping
@@ -616,6 +619,32 @@ in_pcbdetach(struct inpcb *inp)
mtx_leave(&table->inpt_mtx);
in_pcbunref(inp);
+}
+
+struct socket *
+in_pcbsolock(struct inpcb *inp)
+{
+ struct socket *so;
+
+ NET_ASSERT_LOCKED();
+
+ mtx_enter(&inp->inp_sofree_mtx);
+ so = soref(inp->inp_socket);
+ mtx_leave(&inp->inp_sofree_mtx);
+ if (so == NULL)
+ return NULL;
+
+ rw_enter_write(&so->so_lock);
+ sorele(so, 1);
+
+ return so;
+}
+
+void
+in_pcbsounlock(struct inpcb *inp, struct socket *so)
+{
+ KASSERT(inp->inp_socket == so);
+ rw_exit_write(&so->so_lock);
}
struct inpcb *
Index: netinet/in_pcb.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v
diff -u -p -r1.161 in_pcb.h
--- netinet/in_pcb.h 24 Dec 2024 16:27:07 -0000 1.161
+++ netinet/in_pcb.h 1 Jan 2025 10:53:17 -0000
@@ -81,6 +81,7 @@
* t inpt_mtx pcb table mutex
* L pf_inp_mtx link pf to inp mutex
* s so_lock socket rwlock
+ * f inp_sofree_mtx socket detach and lock
*/
/*
@@ -136,7 +137,8 @@ struct inpcb {
#define inp_laddr6 inp_laddru.iau_addr6
u_int16_t inp_fport; /* [t] foreign port */
u_int16_t inp_lport; /* [t] local port */
- struct socket *inp_socket; /* [I] back pointer to socket */
+ struct socket *inp_socket; /* [f] back pointer to socket */
+ struct mutex inp_sofree_mtx; /* protect socket free */
caddr_t inp_ppcb; /* pointer to per-protocol pcb */
struct route inp_route; /* [s] cached route */
struct refcnt inp_refcnt; /* refcount PCB, delay memory free */
@@ -309,6 +311,9 @@ int in_pcbaddrisavail(const struct inpc
struct proc *);
int in_pcbconnect(struct inpcb *, struct mbuf *);
void in_pcbdetach(struct inpcb *);
+struct socket *
+ in_pcbsolock(struct inpcb *);
+void in_pcbsounlock(struct inpcb *, struct socket *);
struct inpcb *
in_pcbref(struct inpcb *);
void in_pcbunref(struct inpcb *);
Index: netinet/tcp_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
diff -u -p -r1.416 tcp_input.c
--- netinet/tcp_input.c 31 Dec 2024 12:19:46 -0000 1.416
+++ netinet/tcp_input.c 1 Jan 2025 10:49:40 -0000
@@ -3173,7 +3173,8 @@ syn_cache_rm(struct syn_cache *sc)
KASSERT(!ISSET(sc->sc_dynflags, SCF_DEAD));
SET(sc->sc_dynflags, SCF_DEAD);
TAILQ_REMOVE(&sc->sc_buckethead->sch_bucket, sc, sc_bucketq);
- sc->sc_tp = NULL;
+ in_pcbunref(sc->sc_inplisten);
+ sc->sc_inplisten = NULL;
LIST_REMOVE(sc, sc_tpq);
refcnt_rele(&sc->sc_refcnt);
sc->sc_buckethead->sch_length--;
@@ -3369,6 +3370,8 @@ void
syn_cache_timer(void *arg)
{
struct syn_cache *sc = arg;
+ struct inpcb *inp;
+ struct socket *so;
uint64_t now;
int lastref;
@@ -3397,14 +3400,22 @@ syn_cache_timer(void *arg)
TCPTV_REXMTMAX);
if (timeout_add_msec(&sc->sc_timer, sc->sc_rxtcur))
refcnt_take(&sc->sc_refcnt);
+ inp = in_pcbref(sc->sc_inplisten);
+ if (inp == NULL)
+ goto freeit;
mtx_leave(&syn_cache_mtx);
- NET_LOCK();
- now = tcp_now();
- (void) syn_cache_respond(sc, NULL, now);
- tcpstat_inc(tcps_sc_retransmitted);
- NET_UNLOCK();
+ NET_LOCK_SHARED();
+ so = in_pcbsolock(inp);
+ if (so != NULL) {
+ now = tcp_now();
+ (void) syn_cache_respond(sc, NULL, now);
+ tcpstat_inc(tcps_sc_retransmitted);
+ in_pcbsounlock(inp, so);
+ }
+ NET_UNLOCK_SHARED();
+ in_pcbunref(inp);
syn_cache_put(sc);
return;
@@ -3434,10 +3445,7 @@ syn_cache_cleanup(struct tcpcb *tp)
mtx_enter(&syn_cache_mtx);
LIST_FOREACH_SAFE(sc, &tp->t_sc, sc_tpq, nsc) {
-#ifdef DIAGNOSTIC
- if (sc->sc_tp != tp)
- panic("invalid sc_tp in syn_cache_cleanup");
-#endif
+ KASSERT(sc->sc_inplisten == tp->t_inpcb);
syn_cache_rm(sc);
syn_cache_put(sc);
}
@@ -3949,7 +3957,7 @@ syn_cache_add(struct sockaddr *src, stru
if (tb.t_flags & TF_SIGNATURE)
SET(sc->sc_fixflags, SCF_SIGNATURE);
#endif
- sc->sc_tp = tp;
+ sc->sc_inplisten = in_pcbref(tp->t_inpcb);
if (syn_cache_respond(sc, m, now) == 0) {
mtx_enter(&syn_cache_mtx);
/*
@@ -3962,6 +3970,7 @@ syn_cache_add(struct sockaddr *src, stru
tcpstat_inc(tcps_sndacks);
tcpstat_inc(tcps_sndtotal);
} else {
+ in_pcbunref(sc->sc_inplisten);
syn_cache_put(sc);
tcpstat_inc(tcps_sc_dropped);
}
@@ -4158,7 +4167,7 @@ syn_cache_respond(struct syn_cache *sc,
/* use IPsec policy and ttl from listening socket, on SYN ACK */
mtx_enter(&syn_cache_mtx);
- inp = sc->sc_tp ? sc->sc_tp->t_inpcb : NULL;
+ inp = in_pcbref(sc->sc_inplisten);
mtx_leave(&syn_cache_mtx);
/*
@@ -4189,5 +4198,6 @@ syn_cache_respond(struct syn_cache *sc,
break;
#endif
}
+ in_pcbunref(inp);
return (error);
}
Index: netinet/tcp_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
diff -u -p -r1.181 tcp_var.h
--- netinet/tcp_var.h 28 Dec 2024 22:17:09 -0000 1.181
+++ netinet/tcp_var.h 1 Jan 2025 10:26:36 -0000
@@ -278,7 +278,7 @@ struct syn_cache {
u_int sc_request_r_scale : 4, /* [I] */
sc_requested_s_scale : 4; /* [I] */
- struct tcpcb *sc_tp; /* [S] tcb for listening socket */
+ struct inpcb *sc_inplisten; /* [S] inpcb for listening socket */
LIST_ENTRY(syn_cache) sc_tpq; /* [S] list of entries by same tp */
};
Index: sys/socketvar.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/socketvar.h,v
diff -u -p -r1.135 socketvar.h
--- sys/socketvar.h 30 Dec 2024 12:12:35 -0000 1.135
+++ sys/socketvar.h 1 Jan 2025 10:56:10 -0000
@@ -209,10 +209,13 @@ struct socket {
void soassertlocked(struct socket *);
void soassertlocked_readonly(struct socket *);
-static inline void
+static inline struct socket *
soref(struct socket *so)
{
+ if (so == NULL)
+ return NULL;
refcnt_take(&so->so_refcnt);
+ return so;
}
/*
run TCP syn cache timer in parallel