Index | Thread | Search

From:
Vitaliy Makkoveev <otto@bsdbox.dev>
Subject:
Re: tcp timer refcount
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
tech@openbsd.org
Date:
Fri, 3 Jan 2025 19:02:38 +0300

Download raw body.

Thread
  • Alexander Bluhm:

    tcp timer refcount

    • Vitaliy Makkoveev:

      tcp timer refcount

> On 3 Jan 2025, at 14:54, Alexander Bluhm <bluhm@openbsd.org> wrote:
> 
> Hi,
> 
> I would like to reference count the inpcb used in TCP timer.  The
> diff switches from struct tcpcb to inpcb in the timer argument.
> This allows to ref count it.  Reaper is special as it does not use
> the inp.  So I removed it from the generic timeout functions and
> added a special field.
> 
> For now I just want to commit the ref count part to see that there
> are no leaks.
> 
> Later we can run timer with shared net lock.  Also the reaper may
> be removed, but that needs more thoughts.
> 
> ok?
> 

ok mvs

> bluhm
> 
> Index: netinet/tcp_subr.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v
> diff -u -p -r1.203 tcp_subr.c
> --- netinet/tcp_subr.c	28 Dec 2024 22:17:09 -0000	1.203
> +++ netinet/tcp_subr.c	3 Jan 2025 10:45:48 -0000
> @@ -440,13 +440,15 @@ tcp_newtcpcb(struct inpcb *inp, int wait
> 	tp->t_maxseg = atomic_load_int(&tcp_mssdflt);
> 	tp->t_maxopd = 0;
> 
> +	tp->t_inpcb = inp;
> 	for (i = 0; i < TCPT_NTIMERS; i++)
> 		TCP_TIMER_INIT(tp, i);
> +	timeout_set_flags(&tp->t_timer_reaper, tcp_timer_reaper, tp,
> +	    KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE);
> 
> 	tp->sack_enable = atomic_load_int(&tcp_do_sack);
> 	tp->t_flags = atomic_load_int(&tcp_do_rfc1323) ?
> 	    (TF_REQ_SCALE|TF_REQ_TSTMP) : 0;
> -	tp->t_inpcb = inp;
> 	/*
> 	 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
> 	 * rtt estimate.  Set rttvar so that srtt + 2 * rttvar gives
> @@ -530,11 +532,11 @@ tcp_close(struct tcpcb *tp)
> 
> 	m_free(tp->t_template);
> 	/* Free tcpcb after all pending timers have been run. */
> -	TCP_TIMER_ARM(tp, TCPT_REAPER, 1);
> -
> +	timeout_add(&tp->t_timer_reaper, 0);
> 	inp->inp_ppcb = NULL;
> 	soisdisconnected(so);
> 	in_pcbdetach(inp);
> +	tcpstat_inc(tcps_closed);
> 	return (NULL);
> }
> 
> Index: netinet/tcp_timer.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.c,v
> diff -u -p -r1.78 tcp_timer.c
> --- netinet/tcp_timer.c	28 Dec 2024 22:17:09 -0000	1.78
> +++ netinet/tcp_timer.c	3 Jan 2025 11:09:13 -0000
> @@ -76,7 +76,6 @@ void	tcp_timer_rexmt(void *);
> void	tcp_timer_persist(void *);
> void	tcp_timer_keep(void *);
> void	tcp_timer_2msl(void *);
> -void	tcp_timer_reaper(void *);
> void	tcp_timer_delack(void *);
> 
> const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS] = {
> @@ -84,7 +83,6 @@ const tcp_timer_func_t tcp_timer_funcs[T
> 	tcp_timer_persist,
> 	tcp_timer_keep,
> 	tcp_timer_2msl,
> -	tcp_timer_reaper,
> 	tcp_timer_delack,
> };
> 
> @@ -114,7 +112,8 @@ tcp_timer_init(void)
> void
> tcp_timer_delack(void *arg)
> {
> -	struct tcpcb *otp = NULL, *tp = arg;
> +	struct inpcb *inp = arg;
> +	struct tcpcb *otp = NULL, *tp;
> 	short ostate;
> 
> 	/*
> @@ -123,13 +122,14 @@ tcp_timer_delack(void *arg)
> 	 * ACK callout.
> 	 */
> 	NET_LOCK();
> +	tp = intotcpcb(inp);
> 	/* Ignore canceled timeouts or timeouts that have been rescheduled. */
> -	if (!ISSET((tp)->t_flags, TF_TMR_DELACK) ||
> +	if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_DELACK) ||
> 	    timeout_pending(&tp->t_timer[TCPT_DELACK]))
> 		goto out;
> -	CLR((tp)->t_flags, TF_TMR_DELACK);
> +	CLR(tp->t_flags, TF_TMR_DELACK);
> 
> -	if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) {
> +	if (inp->inp_socket->so_options & SO_DEBUG) {
> 		otp = tp;
> 		ostate = tp->t_state;
> 	}
> @@ -139,6 +139,7 @@ tcp_timer_delack(void *arg)
> 		tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_DELACK, 0);
>  out:
> 	NET_UNLOCK();
> +	in_pcbunref(inp);
> }
> 
> /*
> @@ -197,19 +198,18 @@ tcp_timer_freesack(struct tcpcb *tp)
> void
> tcp_timer_rexmt(void *arg)
> {
> -	struct tcpcb *otp = NULL, *tp = arg;
> -	struct inpcb *inp;
> +	struct inpcb *inp = arg;
> +	struct tcpcb *otp = NULL, *tp;
> 	uint32_t rto;
> 	short ostate;
> 
> 	NET_LOCK();
> -	inp = tp->t_inpcb;
> -
> +	tp = intotcpcb(inp);
> 	/* Ignore canceled timeouts or timeouts that have been rescheduled. */
> -	if (!ISSET((tp)->t_flags, TF_TMR_REXMT) ||
> +	if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_REXMT) ||
> 	    timeout_pending(&tp->t_timer[TCPT_REXMT]))
> 		goto out;
> -	CLR((tp)->t_flags, TF_TMR_REXMT);
> +	CLR(tp->t_flags, TF_TMR_REXMT);
> 
> 	if ((tp->t_flags & TF_PMTUD_PEND) && inp &&
> 	    SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
> @@ -392,27 +392,30 @@ tcp_timer_rexmt(void *arg)
> 		tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_REXMT, 0);
>  out:
> 	NET_UNLOCK();
> +	in_pcbunref(inp);
> }
> 
> void
> tcp_timer_persist(void *arg)
> {
> -	struct tcpcb *otp = NULL, *tp = arg;
> +	struct inpcb *inp = arg;
> +	struct tcpcb *otp = NULL, *tp;
> 	uint32_t rto;
> 	short ostate;
> 	uint64_t now;
> 
> 	NET_LOCK();
> +	tp = intotcpcb(inp);
> 	/* Ignore canceled timeouts or timeouts that have been rescheduled. */
> -	if (!ISSET((tp)->t_flags, TF_TMR_PERSIST) ||
> +	if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_PERSIST) ||
> 	    timeout_pending(&tp->t_timer[TCPT_PERSIST]))
> 		goto out;
> -	CLR((tp)->t_flags, TF_TMR_PERSIST);
> +	CLR(tp->t_flags, TF_TMR_PERSIST);
> 
> 	if (TCP_TIMER_ISARMED(tp, TCPT_REXMT))
> 		goto out;
> 
> -	if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) {
> +	if (inp->inp_socket->so_options & SO_DEBUG) {
> 		otp = tp;
> 		ostate = tp->t_state;
> 	}
> @@ -443,30 +446,36 @@ tcp_timer_persist(void *arg)
> 		tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_PERSIST, 0);
>  out:
> 	NET_UNLOCK();
> +	in_pcbunref(inp);
> }
> 
> void
> tcp_timer_keep(void *arg)
> {
> -	struct tcpcb *otp = NULL, *tp = arg;
> +	struct inpcb *inp = arg;
> +	struct tcpcb *otp = NULL, *tp;
> 	short ostate;
> 
> 	NET_LOCK();
> +	tp = intotcpcb(inp);
> 	/* Ignore canceled timeouts or timeouts that have been rescheduled. */
> -	if (!ISSET((tp)->t_flags, TF_TMR_KEEP) ||
> +	if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_KEEP) ||
> 	    timeout_pending(&tp->t_timer[TCPT_KEEP]))
> 		goto out;
> -	CLR((tp)->t_flags, TF_TMR_KEEP);
> +	CLR(tp->t_flags, TF_TMR_KEEP);
> 
> -	if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) {
> +	if (inp->inp_socket->so_options & SO_DEBUG) {
> 		otp = tp;
> 		ostate = tp->t_state;
> 	}
> 	tcpstat_inc(tcps_keeptimeo);
> -	if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
> -		goto dropit;
> +	if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) {
> +		tcpstat_inc(tcps_keepdrops);
> +		tp = tcp_drop(tp, ETIMEDOUT);
> +		goto out;
> +	}
> 	if ((atomic_load_int(&tcp_always_keepalive) ||
> -	    tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) &&
> +	    inp->inp_socket->so_options & SO_KEEPALIVE) &&
> 	    tp->t_state <= TCPS_CLOSING) {
> 		int maxidle;
> 		uint64_t now;
> @@ -474,8 +483,11 @@ tcp_timer_keep(void *arg)
> 		maxidle = READ_ONCE(tcp_maxidle);
> 		now = tcp_now();
> 		if ((maxidle > 0) &&
> -		    ((now - tp->t_rcvtime) >= tcp_keepidle + maxidle))
> -			goto dropit;
> +		    ((now - tp->t_rcvtime) >= tcp_keepidle + maxidle)) {
> +			tcpstat_inc(tcps_keepdrops);
> +			tp = tcp_drop(tp, ETIMEDOUT);
> +			goto out;
> +		}
> 		/*
> 		 * Send a packet designed to force a response
> 		 * if the peer is up and reachable:
> @@ -498,30 +510,27 @@ tcp_timer_keep(void *arg)
> 		tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_KEEP, 0);
>  out:
> 	NET_UNLOCK();
> -	return;
> -
> - dropit:
> -	tcpstat_inc(tcps_keepdrops);
> -	tp = tcp_drop(tp, ETIMEDOUT);
> -	NET_UNLOCK();
> +	in_pcbunref(inp);
> }
> 
> void
> tcp_timer_2msl(void *arg)
> {
> -	struct tcpcb *otp = NULL, *tp = arg;
> +	struct inpcb *inp = arg;
> +	struct tcpcb *otp = NULL, *tp;
> 	short ostate;
> 	int maxidle;
> 	uint64_t now;
> 
> 	NET_LOCK();
> +	tp = intotcpcb(inp);
> 	/* Ignore canceled timeouts or timeouts that have been rescheduled. */
> -	if (!ISSET((tp)->t_flags, TF_TMR_2MSL) ||
> +	if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_2MSL) ||
> 	    timeout_pending(&tp->t_timer[TCPT_2MSL]))
> 		goto out;
> -	CLR((tp)->t_flags, TF_TMR_2MSL);
> +	CLR(tp->t_flags, TF_TMR_2MSL);
> 
> -	if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) {
> +	if (inp->inp_socket->so_options & SO_DEBUG) {
> 		otp = tp;
> 		ostate = tp->t_state;
> 	}
> @@ -538,6 +547,7 @@ tcp_timer_2msl(void *arg)
> 		tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_2MSL, 0);
>  out:
> 	NET_UNLOCK();
> +	in_pcbunref(inp);
> }
> 
> void
> @@ -554,5 +564,4 @@ tcp_timer_reaper(void *arg)
> 	 * Freeing may run in parallel as it does not grab the net lock.
> 	 */
> 	pool_put(&tcpcb_pool, tp);
> -	tcpstat_inc(tcps_closed);
> }
> Index: netinet/tcp_timer.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.h,v
> diff -u -p -r1.24 tcp_timer.h
> --- netinet/tcp_timer.h	1 Jan 2025 13:44:22 -0000	1.24
> +++ netinet/tcp_timer.h	3 Jan 2025 10:45:48 -0000
> @@ -42,10 +42,9 @@
> #define	TCPT_PERSIST	1		/* retransmit persistence */
> #define	TCPT_KEEP	2		/* keep alive */
> #define	TCPT_2MSL	3		/* 2*msl quiet time timer */
> -#define	TCPT_REAPER	4		/* delayed cleanup timeout */
> -#define	TCPT_DELACK	5		/* delayed ack timeout */
> +#define	TCPT_DELACK	4		/* delayed ack timeout */
> 
> -#define	TCPT_NTIMERS	6
> +#define	TCPT_NTIMERS	5
> 
> /*
>  * The TCPT_REXMT timer is used to force retransmissions.
> @@ -110,7 +109,7 @@
> 
> #ifdef	TCPTIMERS
> const char *tcptimers[TCPT_NTIMERS] =
> -    { "REXMT", "PERSIST", "KEEP", "2MSL", "REAPER", "DELACK" };
> +    { "REXMT", "PERSIST", "KEEP", "2MSL", "DELACK" };
> #endif /* TCPTIMERS */
> 
> /*
> @@ -118,19 +117,21 @@ const char *tcptimers[TCPT_NTIMERS] =
>  */
> #define	TCP_TIMER_INIT(tp, timer)					\
> 	timeout_set_flags(&(tp)->t_timer[(timer)],			\
> -	    tcp_timer_funcs[(timer)], tp, KCLOCK_NONE,			\
> -	    TIMEOUT_PROC | TIMEOUT_MPSAFE)
> +	    tcp_timer_funcs[(timer)], (tp)->t_inpcb,			\
> +	    KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE)
> 
> #define	TCP_TIMER_ARM(tp, timer, msecs)					\
> do {									\
> 	SET((tp)->t_flags, TF_TIMER << (timer));			\
> -	timeout_add_msec(&(tp)->t_timer[(timer)], (msecs));		\
> +	if (timeout_add_msec(&(tp)->t_timer[(timer)], (msecs)))		\
> +		in_pcbref((tp)->t_inpcb);				\
> } while (0)
> 
> #define	TCP_TIMER_DISARM(tp, timer)					\
> do {									\
> 	CLR((tp)->t_flags, TF_TIMER << (timer));			\
> -	timeout_del(&(tp)->t_timer[(timer)]);				\
> +	if (timeout_del(&(tp)->t_timer[(timer)]))			\
> +		in_pcbunref((tp)->t_inpcb);				\
> } while (0)
> 
> #define	TCP_TIMER_ISARMED(tp, timer)					\
> @@ -163,5 +164,7 @@ extern int tcp_ttl;			/* time to live fo
> extern const int tcp_backoff[];
> 
> void	tcp_timer_init(void);
> +void	tcp_timer_reaper(void *);
> +
> #endif /* _KERNEL */
> #endif /* _NETINET_TCP_TIMER_H_ */
> Index: netinet/tcp_var.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
> diff -u -p -r1.182 tcp_var.h
> --- netinet/tcp_var.h	2 Jan 2025 10:55:18 -0000	1.182
> +++ netinet/tcp_var.h	3 Jan 2025 10:45:48 -0000
> @@ -70,6 +70,7 @@ struct tcpqent {
> struct tcpcb {
> 	struct tcpqehead t_segq;		/* sequencing queue */
> 	struct timeout t_timer[TCPT_NTIMERS];	/* tcp timers */
> +	struct timeout t_timer_reaper;	/* reaper is special, no refcnt */
> 	short	t_state;		/* state of this connection */
> 	short	t_rxtshift;		/* log(2) of rexmt exp. backoff */
> 	int	t_rxtcur;		/* current retransmit value */
> @@ -102,8 +103,7 @@ struct tcpcb {
> #define TF_TMR_PERSIST	0x08000000U	/* retransmit persistence timer armed */
> #define TF_TMR_KEEP	0x10000000U	/* keep alive timer armed */
> #define TF_TMR_2MSL	0x20000000U	/* 2*msl quiet time timer armed */
> -#define TF_TMR_REAPER	0x40000000U	/* delayed cleanup timer armed, dead */
> -#define TF_TMR_DELACK	0x80000000U	/* delayed ack timer armed */
> +#define TF_TMR_DELACK	0x40000000U	/* delayed ack timer armed */
> #define TF_TIMER	TF_TMR_REXMT	/* used to shift with TCPT values */
> 
> 	struct	mbuf *t_template;	/* skeletal packet for transmit */
>