From: Vitaliy Makkoveev Subject: Re: tcp timer refcount To: Alexander Bluhm Cc: tech@openbsd.org Date: Fri, 3 Jan 2025 19:02:38 +0300 > On 3 Jan 2025, at 14:54, Alexander Bluhm wrote: > > Hi, > > I would like to reference count the inpcb used in TCP timer. The > diff switches from struct tcpcb to inpcb in the timer argument. > This allows to ref count it. Reaper is special as it does not use > the inp. So I removed it from the generic timeout functions and > added a special field. > > For now I just want to commit the ref count part to see that there > are no leaks. > > Later we can run timer with shared net lock. Also the reaper may > be removed, but that needs more thoughts. > > ok? > ok mvs > bluhm > > Index: netinet/tcp_subr.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v > diff -u -p -r1.203 tcp_subr.c > --- netinet/tcp_subr.c 28 Dec 2024 22:17:09 -0000 1.203 > +++ netinet/tcp_subr.c 3 Jan 2025 10:45:48 -0000 > @@ -440,13 +440,15 @@ tcp_newtcpcb(struct inpcb *inp, int wait > tp->t_maxseg = atomic_load_int(&tcp_mssdflt); > tp->t_maxopd = 0; > > + tp->t_inpcb = inp; > for (i = 0; i < TCPT_NTIMERS; i++) > TCP_TIMER_INIT(tp, i); > + timeout_set_flags(&tp->t_timer_reaper, tcp_timer_reaper, tp, > + KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); > > tp->sack_enable = atomic_load_int(&tcp_do_sack); > tp->t_flags = atomic_load_int(&tcp_do_rfc1323) ? > (TF_REQ_SCALE|TF_REQ_TSTMP) : 0; > - tp->t_inpcb = inp; > /* > * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no > * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives > @@ -530,11 +532,11 @@ tcp_close(struct tcpcb *tp) > > m_free(tp->t_template); > /* Free tcpcb after all pending timers have been run. */ > - TCP_TIMER_ARM(tp, TCPT_REAPER, 1); > - > + timeout_add(&tp->t_timer_reaper, 0); > inp->inp_ppcb = NULL; > soisdisconnected(so); > in_pcbdetach(inp); > + tcpstat_inc(tcps_closed); > return (NULL); > } > > Index: netinet/tcp_timer.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.c,v > diff -u -p -r1.78 tcp_timer.c > --- netinet/tcp_timer.c 28 Dec 2024 22:17:09 -0000 1.78 > +++ netinet/tcp_timer.c 3 Jan 2025 11:09:13 -0000 > @@ -76,7 +76,6 @@ void tcp_timer_rexmt(void *); > void tcp_timer_persist(void *); > void tcp_timer_keep(void *); > void tcp_timer_2msl(void *); > -void tcp_timer_reaper(void *); > void tcp_timer_delack(void *); > > const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS] = { > @@ -84,7 +83,6 @@ const tcp_timer_func_t tcp_timer_funcs[T > tcp_timer_persist, > tcp_timer_keep, > tcp_timer_2msl, > - tcp_timer_reaper, > tcp_timer_delack, > }; > > @@ -114,7 +112,8 @@ tcp_timer_init(void) > void > tcp_timer_delack(void *arg) > { > - struct tcpcb *otp = NULL, *tp = arg; > + struct inpcb *inp = arg; > + struct tcpcb *otp = NULL, *tp; > short ostate; > > /* > @@ -123,13 +122,14 @@ tcp_timer_delack(void *arg) > * ACK callout. > */ > NET_LOCK(); > + tp = intotcpcb(inp); > /* Ignore canceled timeouts or timeouts that have been rescheduled. */ > - if (!ISSET((tp)->t_flags, TF_TMR_DELACK) || > + if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_DELACK) || > timeout_pending(&tp->t_timer[TCPT_DELACK])) > goto out; > - CLR((tp)->t_flags, TF_TMR_DELACK); > + CLR(tp->t_flags, TF_TMR_DELACK); > > - if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) { > + if (inp->inp_socket->so_options & SO_DEBUG) { > otp = tp; > ostate = tp->t_state; > } > @@ -139,6 +139,7 @@ tcp_timer_delack(void *arg) > tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_DELACK, 0); > out: > NET_UNLOCK(); > + in_pcbunref(inp); > } > > /* > @@ -197,19 +198,18 @@ tcp_timer_freesack(struct tcpcb *tp) > void > tcp_timer_rexmt(void *arg) > { > - struct tcpcb *otp = NULL, *tp = arg; > - struct inpcb *inp; > + struct inpcb *inp = arg; > + struct tcpcb *otp = NULL, *tp; > uint32_t rto; > short ostate; > > NET_LOCK(); > - inp = tp->t_inpcb; > - > + tp = intotcpcb(inp); > /* Ignore canceled timeouts or timeouts that have been rescheduled. */ > - if (!ISSET((tp)->t_flags, TF_TMR_REXMT) || > + if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_REXMT) || > timeout_pending(&tp->t_timer[TCPT_REXMT])) > goto out; > - CLR((tp)->t_flags, TF_TMR_REXMT); > + CLR(tp->t_flags, TF_TMR_REXMT); > > if ((tp->t_flags & TF_PMTUD_PEND) && inp && > SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) && > @@ -392,27 +392,30 @@ tcp_timer_rexmt(void *arg) > tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_REXMT, 0); > out: > NET_UNLOCK(); > + in_pcbunref(inp); > } > > void > tcp_timer_persist(void *arg) > { > - struct tcpcb *otp = NULL, *tp = arg; > + struct inpcb *inp = arg; > + struct tcpcb *otp = NULL, *tp; > uint32_t rto; > short ostate; > uint64_t now; > > NET_LOCK(); > + tp = intotcpcb(inp); > /* Ignore canceled timeouts or timeouts that have been rescheduled. */ > - if (!ISSET((tp)->t_flags, TF_TMR_PERSIST) || > + if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_PERSIST) || > timeout_pending(&tp->t_timer[TCPT_PERSIST])) > goto out; > - CLR((tp)->t_flags, TF_TMR_PERSIST); > + CLR(tp->t_flags, TF_TMR_PERSIST); > > if (TCP_TIMER_ISARMED(tp, TCPT_REXMT)) > goto out; > > - if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) { > + if (inp->inp_socket->so_options & SO_DEBUG) { > otp = tp; > ostate = tp->t_state; > } > @@ -443,30 +446,36 @@ tcp_timer_persist(void *arg) > tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_PERSIST, 0); > out: > NET_UNLOCK(); > + in_pcbunref(inp); > } > > void > tcp_timer_keep(void *arg) > { > - struct tcpcb *otp = NULL, *tp = arg; > + struct inpcb *inp = arg; > + struct tcpcb *otp = NULL, *tp; > short ostate; > > NET_LOCK(); > + tp = intotcpcb(inp); > /* Ignore canceled timeouts or timeouts that have been rescheduled. */ > - if (!ISSET((tp)->t_flags, TF_TMR_KEEP) || > + if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_KEEP) || > timeout_pending(&tp->t_timer[TCPT_KEEP])) > goto out; > - CLR((tp)->t_flags, TF_TMR_KEEP); > + CLR(tp->t_flags, TF_TMR_KEEP); > > - if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) { > + if (inp->inp_socket->so_options & SO_DEBUG) { > otp = tp; > ostate = tp->t_state; > } > tcpstat_inc(tcps_keeptimeo); > - if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) > - goto dropit; > + if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) { > + tcpstat_inc(tcps_keepdrops); > + tp = tcp_drop(tp, ETIMEDOUT); > + goto out; > + } > if ((atomic_load_int(&tcp_always_keepalive) || > - tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) && > + inp->inp_socket->so_options & SO_KEEPALIVE) && > tp->t_state <= TCPS_CLOSING) { > int maxidle; > uint64_t now; > @@ -474,8 +483,11 @@ tcp_timer_keep(void *arg) > maxidle = READ_ONCE(tcp_maxidle); > now = tcp_now(); > if ((maxidle > 0) && > - ((now - tp->t_rcvtime) >= tcp_keepidle + maxidle)) > - goto dropit; > + ((now - tp->t_rcvtime) >= tcp_keepidle + maxidle)) { > + tcpstat_inc(tcps_keepdrops); > + tp = tcp_drop(tp, ETIMEDOUT); > + goto out; > + } > /* > * Send a packet designed to force a response > * if the peer is up and reachable: > @@ -498,30 +510,27 @@ tcp_timer_keep(void *arg) > tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_KEEP, 0); > out: > NET_UNLOCK(); > - return; > - > - dropit: > - tcpstat_inc(tcps_keepdrops); > - tp = tcp_drop(tp, ETIMEDOUT); > - NET_UNLOCK(); > + in_pcbunref(inp); > } > > void > tcp_timer_2msl(void *arg) > { > - struct tcpcb *otp = NULL, *tp = arg; > + struct inpcb *inp = arg; > + struct tcpcb *otp = NULL, *tp; > short ostate; > int maxidle; > uint64_t now; > > NET_LOCK(); > + tp = intotcpcb(inp); > /* Ignore canceled timeouts or timeouts that have been rescheduled. */ > - if (!ISSET((tp)->t_flags, TF_TMR_2MSL) || > + if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_2MSL) || > timeout_pending(&tp->t_timer[TCPT_2MSL])) > goto out; > - CLR((tp)->t_flags, TF_TMR_2MSL); > + CLR(tp->t_flags, TF_TMR_2MSL); > > - if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) { > + if (inp->inp_socket->so_options & SO_DEBUG) { > otp = tp; > ostate = tp->t_state; > } > @@ -538,6 +547,7 @@ tcp_timer_2msl(void *arg) > tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_2MSL, 0); > out: > NET_UNLOCK(); > + in_pcbunref(inp); > } > > void > @@ -554,5 +564,4 @@ tcp_timer_reaper(void *arg) > * Freeing may run in parallel as it does not grab the net lock. > */ > pool_put(&tcpcb_pool, tp); > - tcpstat_inc(tcps_closed); > } > Index: netinet/tcp_timer.h > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.h,v > diff -u -p -r1.24 tcp_timer.h > --- netinet/tcp_timer.h 1 Jan 2025 13:44:22 -0000 1.24 > +++ netinet/tcp_timer.h 3 Jan 2025 10:45:48 -0000 > @@ -42,10 +42,9 @@ > #define TCPT_PERSIST 1 /* retransmit persistence */ > #define TCPT_KEEP 2 /* keep alive */ > #define TCPT_2MSL 3 /* 2*msl quiet time timer */ > -#define TCPT_REAPER 4 /* delayed cleanup timeout */ > -#define TCPT_DELACK 5 /* delayed ack timeout */ > +#define TCPT_DELACK 4 /* delayed ack timeout */ > > -#define TCPT_NTIMERS 6 > +#define TCPT_NTIMERS 5 > > /* > * The TCPT_REXMT timer is used to force retransmissions. > @@ -110,7 +109,7 @@ > > #ifdef TCPTIMERS > const char *tcptimers[TCPT_NTIMERS] = > - { "REXMT", "PERSIST", "KEEP", "2MSL", "REAPER", "DELACK" }; > + { "REXMT", "PERSIST", "KEEP", "2MSL", "DELACK" }; > #endif /* TCPTIMERS */ > > /* > @@ -118,19 +117,21 @@ const char *tcptimers[TCPT_NTIMERS] = > */ > #define TCP_TIMER_INIT(tp, timer) \ > timeout_set_flags(&(tp)->t_timer[(timer)], \ > - tcp_timer_funcs[(timer)], tp, KCLOCK_NONE, \ > - TIMEOUT_PROC | TIMEOUT_MPSAFE) > + tcp_timer_funcs[(timer)], (tp)->t_inpcb, \ > + KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE) > > #define TCP_TIMER_ARM(tp, timer, msecs) \ > do { \ > SET((tp)->t_flags, TF_TIMER << (timer)); \ > - timeout_add_msec(&(tp)->t_timer[(timer)], (msecs)); \ > + if (timeout_add_msec(&(tp)->t_timer[(timer)], (msecs))) \ > + in_pcbref((tp)->t_inpcb); \ > } while (0) > > #define TCP_TIMER_DISARM(tp, timer) \ > do { \ > CLR((tp)->t_flags, TF_TIMER << (timer)); \ > - timeout_del(&(tp)->t_timer[(timer)]); \ > + if (timeout_del(&(tp)->t_timer[(timer)])) \ > + in_pcbunref((tp)->t_inpcb); \ > } while (0) > > #define TCP_TIMER_ISARMED(tp, timer) \ > @@ -163,5 +164,7 @@ extern int tcp_ttl; /* time to live fo > extern const int tcp_backoff[]; > > void tcp_timer_init(void); > +void tcp_timer_reaper(void *); > + > #endif /* _KERNEL */ > #endif /* _NETINET_TCP_TIMER_H_ */ > Index: netinet/tcp_var.h > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v > diff -u -p -r1.182 tcp_var.h > --- netinet/tcp_var.h 2 Jan 2025 10:55:18 -0000 1.182 > +++ netinet/tcp_var.h 3 Jan 2025 10:45:48 -0000 > @@ -70,6 +70,7 @@ struct tcpqent { > struct tcpcb { > struct tcpqehead t_segq; /* sequencing queue */ > struct timeout t_timer[TCPT_NTIMERS]; /* tcp timers */ > + struct timeout t_timer_reaper; /* reaper is special, no refcnt */ > short t_state; /* state of this connection */ > short t_rxtshift; /* log(2) of rexmt exp. backoff */ > int t_rxtcur; /* current retransmit value */ > @@ -102,8 +103,7 @@ struct tcpcb { > #define TF_TMR_PERSIST 0x08000000U /* retransmit persistence timer armed */ > #define TF_TMR_KEEP 0x10000000U /* keep alive timer armed */ > #define TF_TMR_2MSL 0x20000000U /* 2*msl quiet time timer armed */ > -#define TF_TMR_REAPER 0x40000000U /* delayed cleanup timer armed, dead */ > -#define TF_TMR_DELACK 0x80000000U /* delayed ack timer armed */ > +#define TF_TMR_DELACK 0x40000000U /* delayed ack timer armed */ > #define TF_TIMER TF_TMR_REXMT /* used to shift with TCPT values */ > > struct mbuf *t_template; /* skeletal packet for transmit */ >