Download raw body.
tcp timer refcount
> On 3 Jan 2025, at 14:54, Alexander Bluhm <bluhm@openbsd.org> wrote:
>
> Hi,
>
> I would like to reference count the inpcb used in TCP timer. The
> diff switches from struct tcpcb to inpcb in the timer argument.
> This allows to ref count it. Reaper is special as it does not use
> the inp. So I removed it from the generic timeout functions and
> added a special field.
>
> For now I just want to commit the ref count part to see that there
> are no leaks.
>
> Later we can run timer with shared net lock. Also the reaper may
> be removed, but that needs more thoughts.
>
> ok?
>
ok mvs
> bluhm
>
> Index: netinet/tcp_subr.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v
> diff -u -p -r1.203 tcp_subr.c
> --- netinet/tcp_subr.c 28 Dec 2024 22:17:09 -0000 1.203
> +++ netinet/tcp_subr.c 3 Jan 2025 10:45:48 -0000
> @@ -440,13 +440,15 @@ tcp_newtcpcb(struct inpcb *inp, int wait
> tp->t_maxseg = atomic_load_int(&tcp_mssdflt);
> tp->t_maxopd = 0;
>
> + tp->t_inpcb = inp;
> for (i = 0; i < TCPT_NTIMERS; i++)
> TCP_TIMER_INIT(tp, i);
> + timeout_set_flags(&tp->t_timer_reaper, tcp_timer_reaper, tp,
> + KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE);
>
> tp->sack_enable = atomic_load_int(&tcp_do_sack);
> tp->t_flags = atomic_load_int(&tcp_do_rfc1323) ?
> (TF_REQ_SCALE|TF_REQ_TSTMP) : 0;
> - tp->t_inpcb = inp;
> /*
> * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
> * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives
> @@ -530,11 +532,11 @@ tcp_close(struct tcpcb *tp)
>
> m_free(tp->t_template);
> /* Free tcpcb after all pending timers have been run. */
> - TCP_TIMER_ARM(tp, TCPT_REAPER, 1);
> -
> + timeout_add(&tp->t_timer_reaper, 0);
> inp->inp_ppcb = NULL;
> soisdisconnected(so);
> in_pcbdetach(inp);
> + tcpstat_inc(tcps_closed);
> return (NULL);
> }
>
> Index: netinet/tcp_timer.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.c,v
> diff -u -p -r1.78 tcp_timer.c
> --- netinet/tcp_timer.c 28 Dec 2024 22:17:09 -0000 1.78
> +++ netinet/tcp_timer.c 3 Jan 2025 11:09:13 -0000
> @@ -76,7 +76,6 @@ void tcp_timer_rexmt(void *);
> void tcp_timer_persist(void *);
> void tcp_timer_keep(void *);
> void tcp_timer_2msl(void *);
> -void tcp_timer_reaper(void *);
> void tcp_timer_delack(void *);
>
> const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS] = {
> @@ -84,7 +83,6 @@ const tcp_timer_func_t tcp_timer_funcs[T
> tcp_timer_persist,
> tcp_timer_keep,
> tcp_timer_2msl,
> - tcp_timer_reaper,
> tcp_timer_delack,
> };
>
> @@ -114,7 +112,8 @@ tcp_timer_init(void)
> void
> tcp_timer_delack(void *arg)
> {
> - struct tcpcb *otp = NULL, *tp = arg;
> + struct inpcb *inp = arg;
> + struct tcpcb *otp = NULL, *tp;
> short ostate;
>
> /*
> @@ -123,13 +122,14 @@ tcp_timer_delack(void *arg)
> * ACK callout.
> */
> NET_LOCK();
> + tp = intotcpcb(inp);
> /* Ignore canceled timeouts or timeouts that have been rescheduled. */
> - if (!ISSET((tp)->t_flags, TF_TMR_DELACK) ||
> + if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_DELACK) ||
> timeout_pending(&tp->t_timer[TCPT_DELACK]))
> goto out;
> - CLR((tp)->t_flags, TF_TMR_DELACK);
> + CLR(tp->t_flags, TF_TMR_DELACK);
>
> - if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) {
> + if (inp->inp_socket->so_options & SO_DEBUG) {
> otp = tp;
> ostate = tp->t_state;
> }
> @@ -139,6 +139,7 @@ tcp_timer_delack(void *arg)
> tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_DELACK, 0);
> out:
> NET_UNLOCK();
> + in_pcbunref(inp);
> }
>
> /*
> @@ -197,19 +198,18 @@ tcp_timer_freesack(struct tcpcb *tp)
> void
> tcp_timer_rexmt(void *arg)
> {
> - struct tcpcb *otp = NULL, *tp = arg;
> - struct inpcb *inp;
> + struct inpcb *inp = arg;
> + struct tcpcb *otp = NULL, *tp;
> uint32_t rto;
> short ostate;
>
> NET_LOCK();
> - inp = tp->t_inpcb;
> -
> + tp = intotcpcb(inp);
> /* Ignore canceled timeouts or timeouts that have been rescheduled. */
> - if (!ISSET((tp)->t_flags, TF_TMR_REXMT) ||
> + if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_REXMT) ||
> timeout_pending(&tp->t_timer[TCPT_REXMT]))
> goto out;
> - CLR((tp)->t_flags, TF_TMR_REXMT);
> + CLR(tp->t_flags, TF_TMR_REXMT);
>
> if ((tp->t_flags & TF_PMTUD_PEND) && inp &&
> SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
> @@ -392,27 +392,30 @@ tcp_timer_rexmt(void *arg)
> tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_REXMT, 0);
> out:
> NET_UNLOCK();
> + in_pcbunref(inp);
> }
>
> void
> tcp_timer_persist(void *arg)
> {
> - struct tcpcb *otp = NULL, *tp = arg;
> + struct inpcb *inp = arg;
> + struct tcpcb *otp = NULL, *tp;
> uint32_t rto;
> short ostate;
> uint64_t now;
>
> NET_LOCK();
> + tp = intotcpcb(inp);
> /* Ignore canceled timeouts or timeouts that have been rescheduled. */
> - if (!ISSET((tp)->t_flags, TF_TMR_PERSIST) ||
> + if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_PERSIST) ||
> timeout_pending(&tp->t_timer[TCPT_PERSIST]))
> goto out;
> - CLR((tp)->t_flags, TF_TMR_PERSIST);
> + CLR(tp->t_flags, TF_TMR_PERSIST);
>
> if (TCP_TIMER_ISARMED(tp, TCPT_REXMT))
> goto out;
>
> - if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) {
> + if (inp->inp_socket->so_options & SO_DEBUG) {
> otp = tp;
> ostate = tp->t_state;
> }
> @@ -443,30 +446,36 @@ tcp_timer_persist(void *arg)
> tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_PERSIST, 0);
> out:
> NET_UNLOCK();
> + in_pcbunref(inp);
> }
>
> void
> tcp_timer_keep(void *arg)
> {
> - struct tcpcb *otp = NULL, *tp = arg;
> + struct inpcb *inp = arg;
> + struct tcpcb *otp = NULL, *tp;
> short ostate;
>
> NET_LOCK();
> + tp = intotcpcb(inp);
> /* Ignore canceled timeouts or timeouts that have been rescheduled. */
> - if (!ISSET((tp)->t_flags, TF_TMR_KEEP) ||
> + if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_KEEP) ||
> timeout_pending(&tp->t_timer[TCPT_KEEP]))
> goto out;
> - CLR((tp)->t_flags, TF_TMR_KEEP);
> + CLR(tp->t_flags, TF_TMR_KEEP);
>
> - if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) {
> + if (inp->inp_socket->so_options & SO_DEBUG) {
> otp = tp;
> ostate = tp->t_state;
> }
> tcpstat_inc(tcps_keeptimeo);
> - if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
> - goto dropit;
> + if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) {
> + tcpstat_inc(tcps_keepdrops);
> + tp = tcp_drop(tp, ETIMEDOUT);
> + goto out;
> + }
> if ((atomic_load_int(&tcp_always_keepalive) ||
> - tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) &&
> + inp->inp_socket->so_options & SO_KEEPALIVE) &&
> tp->t_state <= TCPS_CLOSING) {
> int maxidle;
> uint64_t now;
> @@ -474,8 +483,11 @@ tcp_timer_keep(void *arg)
> maxidle = READ_ONCE(tcp_maxidle);
> now = tcp_now();
> if ((maxidle > 0) &&
> - ((now - tp->t_rcvtime) >= tcp_keepidle + maxidle))
> - goto dropit;
> + ((now - tp->t_rcvtime) >= tcp_keepidle + maxidle)) {
> + tcpstat_inc(tcps_keepdrops);
> + tp = tcp_drop(tp, ETIMEDOUT);
> + goto out;
> + }
> /*
> * Send a packet designed to force a response
> * if the peer is up and reachable:
> @@ -498,30 +510,27 @@ tcp_timer_keep(void *arg)
> tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_KEEP, 0);
> out:
> NET_UNLOCK();
> - return;
> -
> - dropit:
> - tcpstat_inc(tcps_keepdrops);
> - tp = tcp_drop(tp, ETIMEDOUT);
> - NET_UNLOCK();
> + in_pcbunref(inp);
> }
>
> void
> tcp_timer_2msl(void *arg)
> {
> - struct tcpcb *otp = NULL, *tp = arg;
> + struct inpcb *inp = arg;
> + struct tcpcb *otp = NULL, *tp;
> short ostate;
> int maxidle;
> uint64_t now;
>
> NET_LOCK();
> + tp = intotcpcb(inp);
> /* Ignore canceled timeouts or timeouts that have been rescheduled. */
> - if (!ISSET((tp)->t_flags, TF_TMR_2MSL) ||
> + if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_2MSL) ||
> timeout_pending(&tp->t_timer[TCPT_2MSL]))
> goto out;
> - CLR((tp)->t_flags, TF_TMR_2MSL);
> + CLR(tp->t_flags, TF_TMR_2MSL);
>
> - if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) {
> + if (inp->inp_socket->so_options & SO_DEBUG) {
> otp = tp;
> ostate = tp->t_state;
> }
> @@ -538,6 +547,7 @@ tcp_timer_2msl(void *arg)
> tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_2MSL, 0);
> out:
> NET_UNLOCK();
> + in_pcbunref(inp);
> }
>
> void
> @@ -554,5 +564,4 @@ tcp_timer_reaper(void *arg)
> * Freeing may run in parallel as it does not grab the net lock.
> */
> pool_put(&tcpcb_pool, tp);
> - tcpstat_inc(tcps_closed);
> }
> Index: netinet/tcp_timer.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.h,v
> diff -u -p -r1.24 tcp_timer.h
> --- netinet/tcp_timer.h 1 Jan 2025 13:44:22 -0000 1.24
> +++ netinet/tcp_timer.h 3 Jan 2025 10:45:48 -0000
> @@ -42,10 +42,9 @@
> #define TCPT_PERSIST 1 /* retransmit persistence */
> #define TCPT_KEEP 2 /* keep alive */
> #define TCPT_2MSL 3 /* 2*msl quiet time timer */
> -#define TCPT_REAPER 4 /* delayed cleanup timeout */
> -#define TCPT_DELACK 5 /* delayed ack timeout */
> +#define TCPT_DELACK 4 /* delayed ack timeout */
>
> -#define TCPT_NTIMERS 6
> +#define TCPT_NTIMERS 5
>
> /*
> * The TCPT_REXMT timer is used to force retransmissions.
> @@ -110,7 +109,7 @@
>
> #ifdef TCPTIMERS
> const char *tcptimers[TCPT_NTIMERS] =
> - { "REXMT", "PERSIST", "KEEP", "2MSL", "REAPER", "DELACK" };
> + { "REXMT", "PERSIST", "KEEP", "2MSL", "DELACK" };
> #endif /* TCPTIMERS */
>
> /*
> @@ -118,19 +117,21 @@ const char *tcptimers[TCPT_NTIMERS] =
> */
> #define TCP_TIMER_INIT(tp, timer) \
> timeout_set_flags(&(tp)->t_timer[(timer)], \
> - tcp_timer_funcs[(timer)], tp, KCLOCK_NONE, \
> - TIMEOUT_PROC | TIMEOUT_MPSAFE)
> + tcp_timer_funcs[(timer)], (tp)->t_inpcb, \
> + KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE)
>
> #define TCP_TIMER_ARM(tp, timer, msecs) \
> do { \
> SET((tp)->t_flags, TF_TIMER << (timer)); \
> - timeout_add_msec(&(tp)->t_timer[(timer)], (msecs)); \
> + if (timeout_add_msec(&(tp)->t_timer[(timer)], (msecs))) \
> + in_pcbref((tp)->t_inpcb); \
> } while (0)
>
> #define TCP_TIMER_DISARM(tp, timer) \
> do { \
> CLR((tp)->t_flags, TF_TIMER << (timer)); \
> - timeout_del(&(tp)->t_timer[(timer)]); \
> + if (timeout_del(&(tp)->t_timer[(timer)])) \
> + in_pcbunref((tp)->t_inpcb); \
> } while (0)
>
> #define TCP_TIMER_ISARMED(tp, timer) \
> @@ -163,5 +164,7 @@ extern int tcp_ttl; /* time to live fo
> extern const int tcp_backoff[];
>
> void tcp_timer_init(void);
> +void tcp_timer_reaper(void *);
> +
> #endif /* _KERNEL */
> #endif /* _NETINET_TCP_TIMER_H_ */
> Index: netinet/tcp_var.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
> diff -u -p -r1.182 tcp_var.h
> --- netinet/tcp_var.h 2 Jan 2025 10:55:18 -0000 1.182
> +++ netinet/tcp_var.h 3 Jan 2025 10:45:48 -0000
> @@ -70,6 +70,7 @@ struct tcpqent {
> struct tcpcb {
> struct tcpqehead t_segq; /* sequencing queue */
> struct timeout t_timer[TCPT_NTIMERS]; /* tcp timers */
> + struct timeout t_timer_reaper; /* reaper is special, no refcnt */
> short t_state; /* state of this connection */
> short t_rxtshift; /* log(2) of rexmt exp. backoff */
> int t_rxtcur; /* current retransmit value */
> @@ -102,8 +103,7 @@ struct tcpcb {
> #define TF_TMR_PERSIST 0x08000000U /* retransmit persistence timer armed */
> #define TF_TMR_KEEP 0x10000000U /* keep alive timer armed */
> #define TF_TMR_2MSL 0x20000000U /* 2*msl quiet time timer armed */
> -#define TF_TMR_REAPER 0x40000000U /* delayed cleanup timer armed, dead */
> -#define TF_TMR_DELACK 0x80000000U /* delayed ack timer armed */
> +#define TF_TMR_DELACK 0x40000000U /* delayed ack timer armed */
> #define TF_TIMER TF_TMR_REXMT /* used to shift with TCPT values */
>
> struct mbuf *t_template; /* skeletal packet for transmit */
>
tcp timer refcount