Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
tcp timer refcount
To:
tech@openbsd.org
Date:
Fri, 3 Jan 2025 12:54:23 +0100

Download raw body.

Thread
  • Alexander Bluhm:

    tcp timer refcount

Hi,

I would like to reference count the inpcb used in TCP timer.  The
diff switches from struct tcpcb to inpcb in the timer argument.
This allows to ref count it.  Reaper is special as it does not use
the inp.  So I removed it from the generic timeout functions and
added a special field.

For now I just want to commit the ref count part to see that there
are no leaks.

Later we can run timer with shared net lock.  Also the reaper may
be removed, but that needs more thoughts.

ok?

bluhm

Index: netinet/tcp_subr.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v
diff -u -p -r1.203 tcp_subr.c
--- netinet/tcp_subr.c	28 Dec 2024 22:17:09 -0000	1.203
+++ netinet/tcp_subr.c	3 Jan 2025 10:45:48 -0000
@@ -440,13 +440,15 @@ tcp_newtcpcb(struct inpcb *inp, int wait
 	tp->t_maxseg = atomic_load_int(&tcp_mssdflt);
 	tp->t_maxopd = 0;
 
+	tp->t_inpcb = inp;
 	for (i = 0; i < TCPT_NTIMERS; i++)
 		TCP_TIMER_INIT(tp, i);
+	timeout_set_flags(&tp->t_timer_reaper, tcp_timer_reaper, tp,
+	    KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE);
 
 	tp->sack_enable = atomic_load_int(&tcp_do_sack);
 	tp->t_flags = atomic_load_int(&tcp_do_rfc1323) ?
 	    (TF_REQ_SCALE|TF_REQ_TSTMP) : 0;
-	tp->t_inpcb = inp;
 	/*
 	 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
 	 * rtt estimate.  Set rttvar so that srtt + 2 * rttvar gives
@@ -530,11 +532,11 @@ tcp_close(struct tcpcb *tp)
 
 	m_free(tp->t_template);
 	/* Free tcpcb after all pending timers have been run. */
-	TCP_TIMER_ARM(tp, TCPT_REAPER, 1);
-
+	timeout_add(&tp->t_timer_reaper, 0);
 	inp->inp_ppcb = NULL;
 	soisdisconnected(so);
 	in_pcbdetach(inp);
+	tcpstat_inc(tcps_closed);
 	return (NULL);
 }
 
Index: netinet/tcp_timer.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.c,v
diff -u -p -r1.78 tcp_timer.c
--- netinet/tcp_timer.c	28 Dec 2024 22:17:09 -0000	1.78
+++ netinet/tcp_timer.c	3 Jan 2025 11:09:13 -0000
@@ -76,7 +76,6 @@ void	tcp_timer_rexmt(void *);
 void	tcp_timer_persist(void *);
 void	tcp_timer_keep(void *);
 void	tcp_timer_2msl(void *);
-void	tcp_timer_reaper(void *);
 void	tcp_timer_delack(void *);
 
 const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS] = {
@@ -84,7 +83,6 @@ const tcp_timer_func_t tcp_timer_funcs[T
 	tcp_timer_persist,
 	tcp_timer_keep,
 	tcp_timer_2msl,
-	tcp_timer_reaper,
 	tcp_timer_delack,
 };
 
@@ -114,7 +112,8 @@ tcp_timer_init(void)
 void
 tcp_timer_delack(void *arg)
 {
-	struct tcpcb *otp = NULL, *tp = arg;
+	struct inpcb *inp = arg;
+	struct tcpcb *otp = NULL, *tp;
 	short ostate;
 
 	/*
@@ -123,13 +122,14 @@ tcp_timer_delack(void *arg)
 	 * ACK callout.
 	 */
 	NET_LOCK();
+	tp = intotcpcb(inp);
 	/* Ignore canceled timeouts or timeouts that have been rescheduled. */
-	if (!ISSET((tp)->t_flags, TF_TMR_DELACK) ||
+	if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_DELACK) ||
 	    timeout_pending(&tp->t_timer[TCPT_DELACK]))
 		goto out;
-	CLR((tp)->t_flags, TF_TMR_DELACK);
+	CLR(tp->t_flags, TF_TMR_DELACK);
 
-	if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) {
+	if (inp->inp_socket->so_options & SO_DEBUG) {
 		otp = tp;
 		ostate = tp->t_state;
 	}
@@ -139,6 +139,7 @@ tcp_timer_delack(void *arg)
 		tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_DELACK, 0);
  out:
 	NET_UNLOCK();
+	in_pcbunref(inp);
 }
 
 /*
@@ -197,19 +198,18 @@ tcp_timer_freesack(struct tcpcb *tp)
 void
 tcp_timer_rexmt(void *arg)
 {
-	struct tcpcb *otp = NULL, *tp = arg;
-	struct inpcb *inp;
+	struct inpcb *inp = arg;
+	struct tcpcb *otp = NULL, *tp;
 	uint32_t rto;
 	short ostate;
 
 	NET_LOCK();
-	inp = tp->t_inpcb;
-
+	tp = intotcpcb(inp);
 	/* Ignore canceled timeouts or timeouts that have been rescheduled. */
-	if (!ISSET((tp)->t_flags, TF_TMR_REXMT) ||
+	if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_REXMT) ||
 	    timeout_pending(&tp->t_timer[TCPT_REXMT]))
 		goto out;
-	CLR((tp)->t_flags, TF_TMR_REXMT);
+	CLR(tp->t_flags, TF_TMR_REXMT);
 
 	if ((tp->t_flags & TF_PMTUD_PEND) && inp &&
 	    SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
@@ -392,27 +392,30 @@ tcp_timer_rexmt(void *arg)
 		tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_REXMT, 0);
  out:
 	NET_UNLOCK();
+	in_pcbunref(inp);
 }
 
 void
 tcp_timer_persist(void *arg)
 {
-	struct tcpcb *otp = NULL, *tp = arg;
+	struct inpcb *inp = arg;
+	struct tcpcb *otp = NULL, *tp;
 	uint32_t rto;
 	short ostate;
 	uint64_t now;
 
 	NET_LOCK();
+	tp = intotcpcb(inp);
 	/* Ignore canceled timeouts or timeouts that have been rescheduled. */
-	if (!ISSET((tp)->t_flags, TF_TMR_PERSIST) ||
+	if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_PERSIST) ||
 	    timeout_pending(&tp->t_timer[TCPT_PERSIST]))
 		goto out;
-	CLR((tp)->t_flags, TF_TMR_PERSIST);
+	CLR(tp->t_flags, TF_TMR_PERSIST);
 
 	if (TCP_TIMER_ISARMED(tp, TCPT_REXMT))
 		goto out;
 
-	if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) {
+	if (inp->inp_socket->so_options & SO_DEBUG) {
 		otp = tp;
 		ostate = tp->t_state;
 	}
@@ -443,30 +446,36 @@ tcp_timer_persist(void *arg)
 		tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_PERSIST, 0);
  out:
 	NET_UNLOCK();
+	in_pcbunref(inp);
 }
 
 void
 tcp_timer_keep(void *arg)
 {
-	struct tcpcb *otp = NULL, *tp = arg;
+	struct inpcb *inp = arg;
+	struct tcpcb *otp = NULL, *tp;
 	short ostate;
 
 	NET_LOCK();
+	tp = intotcpcb(inp);
 	/* Ignore canceled timeouts or timeouts that have been rescheduled. */
-	if (!ISSET((tp)->t_flags, TF_TMR_KEEP) ||
+	if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_KEEP) ||
 	    timeout_pending(&tp->t_timer[TCPT_KEEP]))
 		goto out;
-	CLR((tp)->t_flags, TF_TMR_KEEP);
+	CLR(tp->t_flags, TF_TMR_KEEP);
 
-	if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) {
+	if (inp->inp_socket->so_options & SO_DEBUG) {
 		otp = tp;
 		ostate = tp->t_state;
 	}
 	tcpstat_inc(tcps_keeptimeo);
-	if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
-		goto dropit;
+	if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) {
+		tcpstat_inc(tcps_keepdrops);
+		tp = tcp_drop(tp, ETIMEDOUT);
+		goto out;
+	}
 	if ((atomic_load_int(&tcp_always_keepalive) ||
-	    tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) &&
+	    inp->inp_socket->so_options & SO_KEEPALIVE) &&
 	    tp->t_state <= TCPS_CLOSING) {
 		int maxidle;
 		uint64_t now;
@@ -474,8 +483,11 @@ tcp_timer_keep(void *arg)
 		maxidle = READ_ONCE(tcp_maxidle);
 		now = tcp_now();
 		if ((maxidle > 0) &&
-		    ((now - tp->t_rcvtime) >= tcp_keepidle + maxidle))
-			goto dropit;
+		    ((now - tp->t_rcvtime) >= tcp_keepidle + maxidle)) {
+			tcpstat_inc(tcps_keepdrops);
+			tp = tcp_drop(tp, ETIMEDOUT);
+			goto out;
+		}
 		/*
 		 * Send a packet designed to force a response
 		 * if the peer is up and reachable:
@@ -498,30 +510,27 @@ tcp_timer_keep(void *arg)
 		tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_KEEP, 0);
  out:
 	NET_UNLOCK();
-	return;
-
- dropit:
-	tcpstat_inc(tcps_keepdrops);
-	tp = tcp_drop(tp, ETIMEDOUT);
-	NET_UNLOCK();
+	in_pcbunref(inp);
 }
 
 void
 tcp_timer_2msl(void *arg)
 {
-	struct tcpcb *otp = NULL, *tp = arg;
+	struct inpcb *inp = arg;
+	struct tcpcb *otp = NULL, *tp;
 	short ostate;
 	int maxidle;
 	uint64_t now;
 
 	NET_LOCK();
+	tp = intotcpcb(inp);
 	/* Ignore canceled timeouts or timeouts that have been rescheduled. */
-	if (!ISSET((tp)->t_flags, TF_TMR_2MSL) ||
+	if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_2MSL) ||
 	    timeout_pending(&tp->t_timer[TCPT_2MSL]))
 		goto out;
-	CLR((tp)->t_flags, TF_TMR_2MSL);
+	CLR(tp->t_flags, TF_TMR_2MSL);
 
-	if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) {
+	if (inp->inp_socket->so_options & SO_DEBUG) {
 		otp = tp;
 		ostate = tp->t_state;
 	}
@@ -538,6 +547,7 @@ tcp_timer_2msl(void *arg)
 		tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_2MSL, 0);
  out:
 	NET_UNLOCK();
+	in_pcbunref(inp);
 }
 
 void
@@ -554,5 +564,4 @@ tcp_timer_reaper(void *arg)
 	 * Freeing may run in parallel as it does not grab the net lock.
 	 */
 	pool_put(&tcpcb_pool, tp);
-	tcpstat_inc(tcps_closed);
 }
Index: netinet/tcp_timer.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.h,v
diff -u -p -r1.24 tcp_timer.h
--- netinet/tcp_timer.h	1 Jan 2025 13:44:22 -0000	1.24
+++ netinet/tcp_timer.h	3 Jan 2025 10:45:48 -0000
@@ -42,10 +42,9 @@
 #define	TCPT_PERSIST	1		/* retransmit persistence */
 #define	TCPT_KEEP	2		/* keep alive */
 #define	TCPT_2MSL	3		/* 2*msl quiet time timer */
-#define	TCPT_REAPER	4		/* delayed cleanup timeout */
-#define	TCPT_DELACK	5		/* delayed ack timeout */
+#define	TCPT_DELACK	4		/* delayed ack timeout */
 
-#define	TCPT_NTIMERS	6
+#define	TCPT_NTIMERS	5
 
 /*
  * The TCPT_REXMT timer is used to force retransmissions.
@@ -110,7 +109,7 @@
 
 #ifdef	TCPTIMERS
 const char *tcptimers[TCPT_NTIMERS] =
-    { "REXMT", "PERSIST", "KEEP", "2MSL", "REAPER", "DELACK" };
+    { "REXMT", "PERSIST", "KEEP", "2MSL", "DELACK" };
 #endif /* TCPTIMERS */
 
 /*
@@ -118,19 +117,21 @@ const char *tcptimers[TCPT_NTIMERS] =
  */
 #define	TCP_TIMER_INIT(tp, timer)					\
 	timeout_set_flags(&(tp)->t_timer[(timer)],			\
-	    tcp_timer_funcs[(timer)], tp, KCLOCK_NONE,			\
-	    TIMEOUT_PROC | TIMEOUT_MPSAFE)
+	    tcp_timer_funcs[(timer)], (tp)->t_inpcb,			\
+	    KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE)
 
 #define	TCP_TIMER_ARM(tp, timer, msecs)					\
 do {									\
 	SET((tp)->t_flags, TF_TIMER << (timer));			\
-	timeout_add_msec(&(tp)->t_timer[(timer)], (msecs));		\
+	if (timeout_add_msec(&(tp)->t_timer[(timer)], (msecs)))		\
+		in_pcbref((tp)->t_inpcb);				\
 } while (0)
 
 #define	TCP_TIMER_DISARM(tp, timer)					\
 do {									\
 	CLR((tp)->t_flags, TF_TIMER << (timer));			\
-	timeout_del(&(tp)->t_timer[(timer)]);				\
+	if (timeout_del(&(tp)->t_timer[(timer)]))			\
+		in_pcbunref((tp)->t_inpcb);				\
 } while (0)
 
 #define	TCP_TIMER_ISARMED(tp, timer)					\
@@ -163,5 +164,7 @@ extern int tcp_ttl;			/* time to live fo
 extern const int tcp_backoff[];
 
 void	tcp_timer_init(void);
+void	tcp_timer_reaper(void *);
+
 #endif /* _KERNEL */
 #endif /* _NETINET_TCP_TIMER_H_ */
Index: netinet/tcp_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
diff -u -p -r1.182 tcp_var.h
--- netinet/tcp_var.h	2 Jan 2025 10:55:18 -0000	1.182
+++ netinet/tcp_var.h	3 Jan 2025 10:45:48 -0000
@@ -70,6 +70,7 @@ struct tcpqent {
 struct tcpcb {
 	struct tcpqehead t_segq;		/* sequencing queue */
 	struct timeout t_timer[TCPT_NTIMERS];	/* tcp timers */
+	struct timeout t_timer_reaper;	/* reaper is special, no refcnt */
 	short	t_state;		/* state of this connection */
 	short	t_rxtshift;		/* log(2) of rexmt exp. backoff */
 	int	t_rxtcur;		/* current retransmit value */
@@ -102,8 +103,7 @@ struct tcpcb {
 #define TF_TMR_PERSIST	0x08000000U	/* retransmit persistence timer armed */
 #define TF_TMR_KEEP	0x10000000U	/* keep alive timer armed */
 #define TF_TMR_2MSL	0x20000000U	/* 2*msl quiet time timer armed */
-#define TF_TMR_REAPER	0x40000000U	/* delayed cleanup timer armed, dead */
-#define TF_TMR_DELACK	0x80000000U	/* delayed ack timer armed */
+#define TF_TMR_DELACK	0x40000000U	/* delayed ack timer armed */
 #define TF_TIMER	TF_TMR_REXMT	/* used to shift with TCPT values */
 
 	struct	mbuf *t_template;	/* skeletal packet for transmit */