From: Alexander Bluhm Subject: tcp timer refcount To: tech@openbsd.org Date: Fri, 3 Jan 2025 12:54:23 +0100 Hi, I would like to reference count the inpcb used in TCP timer. The diff switches from struct tcpcb to inpcb in the timer argument. This allows to ref count it. Reaper is special as it does not use the inp. So I removed it from the generic timeout functions and added a special field. For now I just want to commit the ref count part to see that there are no leaks. Later we can run timer with shared net lock. Also the reaper may be removed, but that needs more thoughts. ok? bluhm Index: netinet/tcp_subr.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v diff -u -p -r1.203 tcp_subr.c --- netinet/tcp_subr.c 28 Dec 2024 22:17:09 -0000 1.203 +++ netinet/tcp_subr.c 3 Jan 2025 10:45:48 -0000 @@ -440,13 +440,15 @@ tcp_newtcpcb(struct inpcb *inp, int wait tp->t_maxseg = atomic_load_int(&tcp_mssdflt); tp->t_maxopd = 0; + tp->t_inpcb = inp; for (i = 0; i < TCPT_NTIMERS; i++) TCP_TIMER_INIT(tp, i); + timeout_set_flags(&tp->t_timer_reaper, tcp_timer_reaper, tp, + KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); tp->sack_enable = atomic_load_int(&tcp_do_sack); tp->t_flags = atomic_load_int(&tcp_do_rfc1323) ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0; - tp->t_inpcb = inp; /* * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives @@ -530,11 +532,11 @@ tcp_close(struct tcpcb *tp) m_free(tp->t_template); /* Free tcpcb after all pending timers have been run. */ - TCP_TIMER_ARM(tp, TCPT_REAPER, 1); - + timeout_add(&tp->t_timer_reaper, 0); inp->inp_ppcb = NULL; soisdisconnected(so); in_pcbdetach(inp); + tcpstat_inc(tcps_closed); return (NULL); } Index: netinet/tcp_timer.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.c,v diff -u -p -r1.78 tcp_timer.c --- netinet/tcp_timer.c 28 Dec 2024 22:17:09 -0000 1.78 +++ netinet/tcp_timer.c 3 Jan 2025 11:09:13 -0000 @@ -76,7 +76,6 @@ void tcp_timer_rexmt(void *); void tcp_timer_persist(void *); void tcp_timer_keep(void *); void tcp_timer_2msl(void *); -void tcp_timer_reaper(void *); void tcp_timer_delack(void *); const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS] = { @@ -84,7 +83,6 @@ const tcp_timer_func_t tcp_timer_funcs[T tcp_timer_persist, tcp_timer_keep, tcp_timer_2msl, - tcp_timer_reaper, tcp_timer_delack, }; @@ -114,7 +112,8 @@ tcp_timer_init(void) void tcp_timer_delack(void *arg) { - struct tcpcb *otp = NULL, *tp = arg; + struct inpcb *inp = arg; + struct tcpcb *otp = NULL, *tp; short ostate; /* @@ -123,13 +122,14 @@ tcp_timer_delack(void *arg) * ACK callout. */ NET_LOCK(); + tp = intotcpcb(inp); /* Ignore canceled timeouts or timeouts that have been rescheduled. */ - if (!ISSET((tp)->t_flags, TF_TMR_DELACK) || + if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_DELACK) || timeout_pending(&tp->t_timer[TCPT_DELACK])) goto out; - CLR((tp)->t_flags, TF_TMR_DELACK); + CLR(tp->t_flags, TF_TMR_DELACK); - if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) { + if (inp->inp_socket->so_options & SO_DEBUG) { otp = tp; ostate = tp->t_state; } @@ -139,6 +139,7 @@ tcp_timer_delack(void *arg) tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_DELACK, 0); out: NET_UNLOCK(); + in_pcbunref(inp); } /* @@ -197,19 +198,18 @@ tcp_timer_freesack(struct tcpcb *tp) void tcp_timer_rexmt(void *arg) { - struct tcpcb *otp = NULL, *tp = arg; - struct inpcb *inp; + struct inpcb *inp = arg; + struct tcpcb *otp = NULL, *tp; uint32_t rto; short ostate; NET_LOCK(); - inp = tp->t_inpcb; - + tp = intotcpcb(inp); /* Ignore canceled timeouts or timeouts that have been rescheduled. */ - if (!ISSET((tp)->t_flags, TF_TMR_REXMT) || + if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_REXMT) || timeout_pending(&tp->t_timer[TCPT_REXMT])) goto out; - CLR((tp)->t_flags, TF_TMR_REXMT); + CLR(tp->t_flags, TF_TMR_REXMT); if ((tp->t_flags & TF_PMTUD_PEND) && inp && SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) && @@ -392,27 +392,30 @@ tcp_timer_rexmt(void *arg) tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_REXMT, 0); out: NET_UNLOCK(); + in_pcbunref(inp); } void tcp_timer_persist(void *arg) { - struct tcpcb *otp = NULL, *tp = arg; + struct inpcb *inp = arg; + struct tcpcb *otp = NULL, *tp; uint32_t rto; short ostate; uint64_t now; NET_LOCK(); + tp = intotcpcb(inp); /* Ignore canceled timeouts or timeouts that have been rescheduled. */ - if (!ISSET((tp)->t_flags, TF_TMR_PERSIST) || + if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_PERSIST) || timeout_pending(&tp->t_timer[TCPT_PERSIST])) goto out; - CLR((tp)->t_flags, TF_TMR_PERSIST); + CLR(tp->t_flags, TF_TMR_PERSIST); if (TCP_TIMER_ISARMED(tp, TCPT_REXMT)) goto out; - if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) { + if (inp->inp_socket->so_options & SO_DEBUG) { otp = tp; ostate = tp->t_state; } @@ -443,30 +446,36 @@ tcp_timer_persist(void *arg) tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_PERSIST, 0); out: NET_UNLOCK(); + in_pcbunref(inp); } void tcp_timer_keep(void *arg) { - struct tcpcb *otp = NULL, *tp = arg; + struct inpcb *inp = arg; + struct tcpcb *otp = NULL, *tp; short ostate; NET_LOCK(); + tp = intotcpcb(inp); /* Ignore canceled timeouts or timeouts that have been rescheduled. */ - if (!ISSET((tp)->t_flags, TF_TMR_KEEP) || + if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_KEEP) || timeout_pending(&tp->t_timer[TCPT_KEEP])) goto out; - CLR((tp)->t_flags, TF_TMR_KEEP); + CLR(tp->t_flags, TF_TMR_KEEP); - if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) { + if (inp->inp_socket->so_options & SO_DEBUG) { otp = tp; ostate = tp->t_state; } tcpstat_inc(tcps_keeptimeo); - if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) - goto dropit; + if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) { + tcpstat_inc(tcps_keepdrops); + tp = tcp_drop(tp, ETIMEDOUT); + goto out; + } if ((atomic_load_int(&tcp_always_keepalive) || - tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) && + inp->inp_socket->so_options & SO_KEEPALIVE) && tp->t_state <= TCPS_CLOSING) { int maxidle; uint64_t now; @@ -474,8 +483,11 @@ tcp_timer_keep(void *arg) maxidle = READ_ONCE(tcp_maxidle); now = tcp_now(); if ((maxidle > 0) && - ((now - tp->t_rcvtime) >= tcp_keepidle + maxidle)) - goto dropit; + ((now - tp->t_rcvtime) >= tcp_keepidle + maxidle)) { + tcpstat_inc(tcps_keepdrops); + tp = tcp_drop(tp, ETIMEDOUT); + goto out; + } /* * Send a packet designed to force a response * if the peer is up and reachable: @@ -498,30 +510,27 @@ tcp_timer_keep(void *arg) tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_KEEP, 0); out: NET_UNLOCK(); - return; - - dropit: - tcpstat_inc(tcps_keepdrops); - tp = tcp_drop(tp, ETIMEDOUT); - NET_UNLOCK(); + in_pcbunref(inp); } void tcp_timer_2msl(void *arg) { - struct tcpcb *otp = NULL, *tp = arg; + struct inpcb *inp = arg; + struct tcpcb *otp = NULL, *tp; short ostate; int maxidle; uint64_t now; NET_LOCK(); + tp = intotcpcb(inp); /* Ignore canceled timeouts or timeouts that have been rescheduled. */ - if (!ISSET((tp)->t_flags, TF_TMR_2MSL) || + if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_2MSL) || timeout_pending(&tp->t_timer[TCPT_2MSL])) goto out; - CLR((tp)->t_flags, TF_TMR_2MSL); + CLR(tp->t_flags, TF_TMR_2MSL); - if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) { + if (inp->inp_socket->so_options & SO_DEBUG) { otp = tp; ostate = tp->t_state; } @@ -538,6 +547,7 @@ tcp_timer_2msl(void *arg) tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_2MSL, 0); out: NET_UNLOCK(); + in_pcbunref(inp); } void @@ -554,5 +564,4 @@ tcp_timer_reaper(void *arg) * Freeing may run in parallel as it does not grab the net lock. */ pool_put(&tcpcb_pool, tp); - tcpstat_inc(tcps_closed); } Index: netinet/tcp_timer.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.h,v diff -u -p -r1.24 tcp_timer.h --- netinet/tcp_timer.h 1 Jan 2025 13:44:22 -0000 1.24 +++ netinet/tcp_timer.h 3 Jan 2025 10:45:48 -0000 @@ -42,10 +42,9 @@ #define TCPT_PERSIST 1 /* retransmit persistence */ #define TCPT_KEEP 2 /* keep alive */ #define TCPT_2MSL 3 /* 2*msl quiet time timer */ -#define TCPT_REAPER 4 /* delayed cleanup timeout */ -#define TCPT_DELACK 5 /* delayed ack timeout */ +#define TCPT_DELACK 4 /* delayed ack timeout */ -#define TCPT_NTIMERS 6 +#define TCPT_NTIMERS 5 /* * The TCPT_REXMT timer is used to force retransmissions. @@ -110,7 +109,7 @@ #ifdef TCPTIMERS const char *tcptimers[TCPT_NTIMERS] = - { "REXMT", "PERSIST", "KEEP", "2MSL", "REAPER", "DELACK" }; + { "REXMT", "PERSIST", "KEEP", "2MSL", "DELACK" }; #endif /* TCPTIMERS */ /* @@ -118,19 +117,21 @@ const char *tcptimers[TCPT_NTIMERS] = */ #define TCP_TIMER_INIT(tp, timer) \ timeout_set_flags(&(tp)->t_timer[(timer)], \ - tcp_timer_funcs[(timer)], tp, KCLOCK_NONE, \ - TIMEOUT_PROC | TIMEOUT_MPSAFE) + tcp_timer_funcs[(timer)], (tp)->t_inpcb, \ + KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE) #define TCP_TIMER_ARM(tp, timer, msecs) \ do { \ SET((tp)->t_flags, TF_TIMER << (timer)); \ - timeout_add_msec(&(tp)->t_timer[(timer)], (msecs)); \ + if (timeout_add_msec(&(tp)->t_timer[(timer)], (msecs))) \ + in_pcbref((tp)->t_inpcb); \ } while (0) #define TCP_TIMER_DISARM(tp, timer) \ do { \ CLR((tp)->t_flags, TF_TIMER << (timer)); \ - timeout_del(&(tp)->t_timer[(timer)]); \ + if (timeout_del(&(tp)->t_timer[(timer)])) \ + in_pcbunref((tp)->t_inpcb); \ } while (0) #define TCP_TIMER_ISARMED(tp, timer) \ @@ -163,5 +164,7 @@ extern int tcp_ttl; /* time to live fo extern const int tcp_backoff[]; void tcp_timer_init(void); +void tcp_timer_reaper(void *); + #endif /* _KERNEL */ #endif /* _NETINET_TCP_TIMER_H_ */ Index: netinet/tcp_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v diff -u -p -r1.182 tcp_var.h --- netinet/tcp_var.h 2 Jan 2025 10:55:18 -0000 1.182 +++ netinet/tcp_var.h 3 Jan 2025 10:45:48 -0000 @@ -70,6 +70,7 @@ struct tcpqent { struct tcpcb { struct tcpqehead t_segq; /* sequencing queue */ struct timeout t_timer[TCPT_NTIMERS]; /* tcp timers */ + struct timeout t_timer_reaper; /* reaper is special, no refcnt */ short t_state; /* state of this connection */ short t_rxtshift; /* log(2) of rexmt exp. backoff */ int t_rxtcur; /* current retransmit value */ @@ -102,8 +103,7 @@ struct tcpcb { #define TF_TMR_PERSIST 0x08000000U /* retransmit persistence timer armed */ #define TF_TMR_KEEP 0x10000000U /* keep alive timer armed */ #define TF_TMR_2MSL 0x20000000U /* 2*msl quiet time timer armed */ -#define TF_TMR_REAPER 0x40000000U /* delayed cleanup timer armed, dead */ -#define TF_TMR_DELACK 0x80000000U /* delayed ack timer armed */ +#define TF_TMR_DELACK 0x40000000U /* delayed ack timer armed */ #define TF_TIMER TF_TMR_REXMT /* used to shift with TCPT values */ struct mbuf *t_template; /* skeletal packet for transmit */