Index | Thread | Search

From:
Vitaliy Makkoveev <otto@bsdbox.dev>
Subject:
Re: run TCP syn cache timer in parallel
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
tech@openbsd.org
Date:
Wed, 1 Jan 2025 18:02:54 +0300

Download raw body.

Thread
> On 1 Jan 2025, at 16:01, Alexander Bluhm <bluhm@openbsd.org> wrote:
> 
> Hi,
> 
> With mvs@'s socket ref counting, I think it is possible to run TCP
> SYN cache in parallel.
> 
> Idea is to reset inp_socket pointer when socket is about to be
> freed.  By protecting this with a mutex, we reliable retrieve the
> socket from an inpcb reference.  By using the socket refcount we
> switch from mutex to socket lock.
> 
> The syn cache timer is an easy place to use this.  Goal it to use
> in_pcbsolock() also for TCP timer and input.  Note that switching
> sc_tp to sc_inplisten and refcounting it, is a diff I sent to tech@
> earlier.
> 
> ok?
> 

Hi Alexander,

This diff lokks good to me. However, I don’t like the sorele()
after so_lock rwlock(9) release. I made this with previous
sorele() and this worked perfectly because exclusive solock()
should be taken before continue teardown, but now I prefer to keep
the reference until we leave this socket alone. This doesn’t bring
any extra impact.

So I like to rename in_pcbsolock() to in_pcb_solock_ref() and
in_pcbsounlock() to in_pcb_sounlock_rele() and rework them as
something like below.

in_pcb_solock_ref()
{
	mtx_enter(&inp->inp_sofree_mtx);
	so = soref(inp->inp_socket);
	mtx_leave(&inp->inp_sofree_mtx);

	if (so)
		rw_enter_write(&so->so_lock);
	return so;
}

in_pcb_sounlock_rele()
{
	rw_exit_write(&so->so_lock);
	sorele(so, 1);
}

> bluhm
> 
> Index: netinet/in_pcb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
> diff -u -p -r1.307 in_pcb.c
> --- netinet/in_pcb.c	24 Dec 2024 16:27:07 -0000	1.307
> +++ netinet/in_pcb.c	1 Jan 2025 12:55:13 -0000
> @@ -584,6 +584,9 @@ in_pcbdetach(struct inpcb *inp)
> 	struct inpcbtable *table = inp->inp_table;
> 
> 	so->so_pcb = NULL;
> +	mtx_enter(&inp->inp_sofree_mtx);
> +	inp->inp_socket = NULL;
> +	mtx_leave(&inp->inp_sofree_mtx);
> 	/*
> 	 * As long as the NET_LOCK() is the default lock for Internet
> 	 * sockets, do not release it to not introduce new sleeping
> @@ -616,6 +619,32 @@ in_pcbdetach(struct inpcb *inp)
> 	mtx_leave(&table->inpt_mtx);
> 
> 	in_pcbunref(inp);
> +}
> +
> +struct socket *
> +in_pcbsolock(struct inpcb *inp)
> +{
> +	struct socket *so;
> +
> +	NET_ASSERT_LOCKED();
> +
> +	mtx_enter(&inp->inp_sofree_mtx);
> +	so = soref(inp->inp_socket);
> +	mtx_leave(&inp->inp_sofree_mtx);
> +	if (so == NULL)
> +		return NULL;
> +
> +	rw_enter_write(&so->so_lock);
> +	sorele(so, 1);
> +
> +	return so;
> +}
> +
> +void
> +in_pcbsounlock(struct inpcb *inp, struct socket *so)
> +{
> +	KASSERT(inp->inp_socket == so);
> +	rw_exit_write(&so->so_lock);
> }
> 
> struct inpcb *
> Index: netinet/in_pcb.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v
> diff -u -p -r1.161 in_pcb.h
> --- netinet/in_pcb.h	24 Dec 2024 16:27:07 -0000	1.161
> +++ netinet/in_pcb.h	1 Jan 2025 10:53:17 -0000
> @@ -81,6 +81,7 @@
>  *	t	inpt_mtx		pcb table mutex
>  *	L	pf_inp_mtx		link pf to inp mutex
>  *	s	so_lock			socket rwlock
> + *	f	inp_sofree_mtx		socket detach and lock
>  */
> 
> /*
> @@ -136,7 +137,8 @@ struct inpcb {
> #define	inp_laddr6	inp_laddru.iau_addr6
> 	u_int16_t inp_fport;		/* [t] foreign port */
> 	u_int16_t inp_lport;		/* [t] local port */
> -	struct	  socket *inp_socket;	/* [I] back pointer to socket */
> +	struct	  socket *inp_socket;	/* [f] back pointer to socket */
> +	struct	  mutex inp_sofree_mtx;	/* protect socket free */
> 	caddr_t	  inp_ppcb;		/* pointer to per-protocol pcb */
> 	struct    route inp_route;	/* [s] cached route */
> 	struct    refcnt inp_refcnt;	/* refcount PCB, delay memory free */
> @@ -309,6 +311,9 @@ int	 in_pcbaddrisavail(const struct inpc
> 	    struct proc *);
> int	 in_pcbconnect(struct inpcb *, struct mbuf *);
> void	 in_pcbdetach(struct inpcb *);
> +struct socket *
> +	 in_pcbsolock(struct inpcb *);
> +void	 in_pcbsounlock(struct inpcb *, struct socket *);
> struct inpcb *
> 	 in_pcbref(struct inpcb *);
> void	 in_pcbunref(struct inpcb *);
> Index: netinet/tcp_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
> diff -u -p -r1.416 tcp_input.c
> --- netinet/tcp_input.c	31 Dec 2024 12:19:46 -0000	1.416
> +++ netinet/tcp_input.c	1 Jan 2025 10:49:40 -0000
> @@ -3173,7 +3173,8 @@ syn_cache_rm(struct syn_cache *sc)
> 	KASSERT(!ISSET(sc->sc_dynflags, SCF_DEAD));
> 	SET(sc->sc_dynflags, SCF_DEAD);
> 	TAILQ_REMOVE(&sc->sc_buckethead->sch_bucket, sc, sc_bucketq);
> -	sc->sc_tp = NULL;
> +	in_pcbunref(sc->sc_inplisten);
> +	sc->sc_inplisten = NULL;
> 	LIST_REMOVE(sc, sc_tpq);
> 	refcnt_rele(&sc->sc_refcnt);
> 	sc->sc_buckethead->sch_length--;
> @@ -3369,6 +3370,8 @@ void
> syn_cache_timer(void *arg)
> {
> 	struct syn_cache *sc = arg;
> +	struct inpcb *inp;
> +	struct socket *so;
> 	uint64_t now;
> 	int lastref;
> 
> @@ -3397,14 +3400,22 @@ syn_cache_timer(void *arg)
> 	    TCPTV_REXMTMAX);
> 	if (timeout_add_msec(&sc->sc_timer, sc->sc_rxtcur))
> 		refcnt_take(&sc->sc_refcnt);
> +	inp = in_pcbref(sc->sc_inplisten);
> +	if (inp == NULL)
> +		goto freeit;
> 	mtx_leave(&syn_cache_mtx);
> 
> -	NET_LOCK();
> -	now = tcp_now();
> -	(void) syn_cache_respond(sc, NULL, now);
> -	tcpstat_inc(tcps_sc_retransmitted);
> -	NET_UNLOCK();
> +	NET_LOCK_SHARED();
> +	so = in_pcbsolock(inp);
> +	if (so != NULL) {
> +		now = tcp_now();
> +		(void) syn_cache_respond(sc, NULL, now);
> +		tcpstat_inc(tcps_sc_retransmitted);
> +		in_pcbsounlock(inp, so);
> +	}
> +	NET_UNLOCK_SHARED();
> 
> +	in_pcbunref(inp);
> 	syn_cache_put(sc);
> 	return;
> 
> @@ -3434,10 +3445,7 @@ syn_cache_cleanup(struct tcpcb *tp)
> 
> 	mtx_enter(&syn_cache_mtx);
> 	LIST_FOREACH_SAFE(sc, &tp->t_sc, sc_tpq, nsc) {
> -#ifdef DIAGNOSTIC
> -		if (sc->sc_tp != tp)
> -			panic("invalid sc_tp in syn_cache_cleanup");
> -#endif
> +		KASSERT(sc->sc_inplisten == tp->t_inpcb);
> 		syn_cache_rm(sc);
> 		syn_cache_put(sc);
> 	}
> @@ -3949,7 +3957,7 @@ syn_cache_add(struct sockaddr *src, stru
> 	if (tb.t_flags & TF_SIGNATURE)
> 		SET(sc->sc_fixflags, SCF_SIGNATURE);
> #endif
> -	sc->sc_tp = tp;
> +	sc->sc_inplisten = in_pcbref(tp->t_inpcb);
> 	if (syn_cache_respond(sc, m, now) == 0) {
> 		mtx_enter(&syn_cache_mtx);
> 		/*
> @@ -3962,6 +3970,7 @@ syn_cache_add(struct sockaddr *src, stru
> 		tcpstat_inc(tcps_sndacks);
> 		tcpstat_inc(tcps_sndtotal);
> 	} else {
> +		in_pcbunref(sc->sc_inplisten);
> 		syn_cache_put(sc);
> 		tcpstat_inc(tcps_sc_dropped);
> 	}
> @@ -4158,7 +4167,7 @@ syn_cache_respond(struct syn_cache *sc, 
> 
> 	/* use IPsec policy and ttl from listening socket, on SYN ACK */
> 	mtx_enter(&syn_cache_mtx);
> -	inp = sc->sc_tp ? sc->sc_tp->t_inpcb : NULL;
> +	inp = in_pcbref(sc->sc_inplisten);
> 	mtx_leave(&syn_cache_mtx);
> 
> 	/*
> @@ -4189,5 +4198,6 @@ syn_cache_respond(struct syn_cache *sc, 
> 		break;
> #endif
> 	}
> +	in_pcbunref(inp);
> 	return (error);
> }
> Index: netinet/tcp_var.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
> diff -u -p -r1.181 tcp_var.h
> --- netinet/tcp_var.h	28 Dec 2024 22:17:09 -0000	1.181
> +++ netinet/tcp_var.h	1 Jan 2025 10:26:36 -0000
> @@ -278,7 +278,7 @@ struct syn_cache {
> 	u_int     sc_request_r_scale	: 4,	/* [I] */
> 		  sc_requested_s_scale	: 4;	/* [I] */
> 
> -	struct tcpcb *sc_tp;		/* [S] tcb for listening socket */
> +	struct inpcb *sc_inplisten;	/* [S] inpcb for listening socket */
> 	LIST_ENTRY(syn_cache) sc_tpq;	/* [S] list of entries by same tp */
> };
> 
> Index: sys/socketvar.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/sys/socketvar.h,v
> diff -u -p -r1.135 socketvar.h
> --- sys/socketvar.h	30 Dec 2024 12:12:35 -0000	1.135
> +++ sys/socketvar.h	1 Jan 2025 10:56:10 -0000
> @@ -209,10 +209,13 @@ struct socket {
> void	soassertlocked(struct socket *);
> void	soassertlocked_readonly(struct socket *);
> 
> -static inline void
> +static inline struct socket *
> soref(struct socket *so)
> {
> +	if (so == NULL)
> +		return NULL;
> 	refcnt_take(&so->so_refcnt);
> +	return so;
> }
> 
> /*
>