From: Alexander Bluhm Subject: Re: TCP sysctl keep alive unlock To: Vitaliy Makkoveev Cc: tech@openbsd.org Date: Wed, 15 Jan 2025 23:40:26 +0100 On Wed, Jan 15, 2025 at 09:13:17PM +0300, Vitaliy Makkoveev wrote: > On Wed, Jan 15, 2025 at 04:16:12PM +0100, Alexander Bluhm wrote: > > Hi, > > > > By converting the TCP keepalive timer variables to seconds, sysctl > > for those variables can be made atomic easily. Then we have to > > multiply with TCP_TIME(1) when using them. > > > > An alternative would be to switch the sysctl values from seconds > > to milliseconds, but that would mean an API break for people with > > sysctl.conf modifications. > > > > ok? > > I prefer to use timeout_add_sec(9) within TCP_TIMER_ARM() macro. sysctl > variables have seconds resolution, so you can just convert them to > ticks and avoid one extra multiplication and one division. This would require a TCP_TIMER_ARM_SEC() macro. And we would mix units for different timers. I tried this, it did not look nice. New idea: We keep copies in seconds for the sysctl and update timer variables when they change. While there make the names consistent. tcp_maxidle was historically calculated in tcp_slowtimo() as the timers were called from there. Much easier to calculate maxidle when needed. tcp_timer_init() looks pretty useless, just initialize data. ok? bluhm Index: netinet/tcp_input.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v diff -u -p -r1.422 tcp_input.c --- netinet/tcp_input.c 10 Jan 2025 20:19:03 -0000 1.422 +++ netinet/tcp_input.c 15 Jan 2025 22:06:50 -0000 @@ -109,7 +109,6 @@ int tcp_flush_queue(struct tcpcb *); #endif /* INET6 */ const int tcprexmtthresh = 3; -int tcptv_keep_init = TCPTV_KEEP_INIT; int tcp_rst_ppslim = 100; /* 100pps */ int tcp_rst_ppslim_count = 0; @@ -870,7 +869,7 @@ findpcb: */ tp->t_rcvtime = now; if (TCPS_HAVEESTABLISHED(tp->t_state)) - TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle); + TCP_TIMER_ARM(tp, TCPT_KEEP, atomic_load_int(&tcp_keepidle)); if (tp->sack_enable) tcp_del_sackholes(tp, th); /* Delete stale SACK holes */ @@ -1187,7 +1186,8 @@ findpcb: soisconnected(so); tp->t_flags &= ~TF_BLOCKOUTPUT; tp->t_state = TCPS_ESTABLISHED; - TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle); + TCP_TIMER_ARM(tp, TCPT_KEEP, + atomic_load_int(&tcp_keepidle)); /* Do window scaling on this connection? */ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == (TF_RCVD_SCALE|TF_REQ_SCALE)) { @@ -1473,7 +1473,7 @@ trimthenstep6: soisconnected(so); tp->t_flags &= ~TF_BLOCKOUTPUT; tp->t_state = TCPS_ESTABLISHED; - TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle); + TCP_TIMER_ARM(tp, TCPT_KEEP, atomic_load_int(&tcp_keepidle)); /* Do window scaling? */ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == (TF_RCVD_SCALE|TF_REQ_SCALE)) { @@ -1809,10 +1809,14 @@ trimthenstep6: * we'll hang forever. */ if (so->so_rcv.sb_state & SS_CANTRCVMORE) { + int maxidle; + tp->t_flags |= TF_BLOCKOUTPUT; soisdisconnected(so); tp->t_flags &= ~TF_BLOCKOUTPUT; - TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); + maxidle = TCPTV_KEEPCNT * + atomic_load_int(&tcp_keepidle); + TCP_TIMER_ARM(tp, TCPT_2MSL, maxidle); } tp->t_state = TCPS_FIN_WAIT_2; } @@ -3394,7 +3398,7 @@ syn_cache_timer(void *arg) * than the keep alive timer would allow, expire it. */ sc->sc_rxttot += sc->sc_rxtcur; - if (sc->sc_rxttot >= READ_ONCE(tcptv_keep_init)) + if (sc->sc_rxttot >= atomic_load_int(&tcp_keepinit)) goto dropit; /* Advance the timer back-off. */ @@ -3673,7 +3677,7 @@ syn_cache_get(struct sockaddr *src, stru tp->t_sndtime = now; tp->t_rcvacktime = now; tp->t_sndacktime = now; - TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init); + TCP_TIMER_ARM(tp, TCPT_KEEP, atomic_load_int(&tcp_keepinit)); tcpstat_inc(tcps_accepts); tcp_mss(tp, sc->sc_peermaxseg); /* sets t_maxseg */ Index: netinet/tcp_subr.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v diff -u -p -r1.204 tcp_subr.c --- netinet/tcp_subr.c 3 Jan 2025 17:23:51 -0000 1.204 +++ netinet/tcp_subr.c 15 Jan 2025 21:24:52 -0000 @@ -184,9 +184,6 @@ tcp_init(void) /* Initialize the compressed state engine. */ syn_cache_init(); - - /* Initialize timer state. */ - tcp_timer_init(); } /* Index: netinet/tcp_timer.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.c,v diff -u -p -r1.81 tcp_timer.c --- netinet/tcp_timer.c 14 Jan 2025 13:49:44 -0000 1.81 +++ netinet/tcp_timer.c 15 Jan 2025 22:04:22 -0000 @@ -61,16 +61,14 @@ */ int tcp_always_keepalive; -int tcp_keepidle; -int tcp_keepintvl; -int tcp_maxpersistidle; /* max idle time in persist */ -int tcp_maxidle; /* [T] max idle time for keep alive */ - -/* - * Time to delay the ACK. This is initialized in tcp_init(), unless - * its patched. - */ -int tcp_delack_msecs; +int tcp_keepinit = TCPTV_KEEPINIT; +int tcp_keepidle = TCPTV_KEEPIDLE; +int tcp_keepintvl = TCPTV_KEEPINTVL; +int tcp_keepinit_sec = TCPTV_KEEPINIT / TCP_TIME(1); +int tcp_keepidle_sec = TCPTV_KEEPIDLE / TCP_TIME(1); +int tcp_keepintvl_sec = TCPTV_KEEPINTVL / TCP_TIME(1); +int tcp_maxpersistidle = TCPTV_KEEPIDLE; /* max idle time in persist */ +int tcp_delack_msecs = TCP_DELACK_MSECS; /* time to delay the ACK */ void tcp_timer_rexmt(void *); void tcp_timer_persist(void *); @@ -86,26 +84,6 @@ const tcp_timer_func_t tcp_timer_funcs[T tcp_timer_delack, }; -/* - * Timer state initialization, called from tcp_init(). - */ -void -tcp_timer_init(void) -{ - - if (tcp_keepidle == 0) - tcp_keepidle = TCPTV_KEEP_IDLE; - - if (tcp_keepintvl == 0) - tcp_keepintvl = TCPTV_KEEPINTVL; - - if (tcp_maxpersistidle == 0) - tcp_maxpersistidle = TCPTV_KEEP_IDLE; - - if (tcp_delack_msecs == 0) - tcp_delack_msecs = TCP_DELACK_MSECS; -} - static inline int tcp_timer_enter(struct inpcb *inp, struct socket **so, struct tcpcb **tp, u_int timer) @@ -176,7 +154,6 @@ void tcp_slowtimo(void) { mtx_enter(&tcp_timer_mtx); - tcp_maxidle = TCPTV_KEEPCNT * tcp_keepintvl; tcp_iss += TCP_ISSINCR2/PR_SLOWHZ; /* increment iss */ mtx_leave(&tcp_timer_mtx); } @@ -488,13 +465,14 @@ tcp_timer_keep(void *arg) if ((atomic_load_int(&tcp_always_keepalive) || so->so_options & SO_KEEPALIVE) && tp->t_state <= TCPS_CLOSING) { - int maxidle; + int keepidle, maxidle; uint64_t now; - maxidle = READ_ONCE(tcp_maxidle); + keepidle = atomic_load_int(&tcp_keepidle); + maxidle = TCPTV_KEEPCNT * keepidle; now = tcp_now(); if ((maxidle > 0) && - ((now - tp->t_rcvtime) >= tcp_keepidle + maxidle)) { + ((now - tp->t_rcvtime) >= keepidle + maxidle)) { tcpstat_inc(tcps_keepdrops); tp = tcp_drop(tp, ETIMEDOUT); goto out; @@ -514,9 +492,9 @@ tcp_timer_keep(void *arg) tcpstat_inc(tcps_keepprobe); tcp_respond(tp, mtod(tp->t_template, caddr_t), NULL, tp->rcv_nxt, tp->snd_una - 1, 0, 0, now); - TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl); + TCP_TIMER_ARM(tp, TCPT_KEEP, atomic_load_int(&tcp_keepintvl)); } else - TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle); + TCP_TIMER_ARM(tp, TCPT_KEEP, atomic_load_int(&tcp_keepidle)); if (otp) tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_KEEP, 0); out: @@ -542,11 +520,11 @@ tcp_timer_2msl(void *arg) } tcp_timer_freesack(tp); - maxidle = READ_ONCE(tcp_maxidle); + maxidle = TCPTV_KEEPCNT * atomic_load_int(&tcp_keepidle); now = tcp_now(); if (tp->t_state != TCPS_TIME_WAIT && ((maxidle == 0) || ((now - tp->t_rcvtime) <= maxidle))) - TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_keepintvl); + TCP_TIMER_ARM(tp, TCPT_2MSL, atomic_load_int(&tcp_keepintvl)); else tp = tcp_close(tp); if (otp) Index: netinet/tcp_timer.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.h,v diff -u -p -r1.25 tcp_timer.h --- netinet/tcp_timer.h 3 Jan 2025 17:23:51 -0000 1.25 +++ netinet/tcp_timer.h 15 Jan 2025 22:04:57 -0000 @@ -93,8 +93,8 @@ #define TCPTV_PERSMIN TCP_TIME(5) /* retransmit persistence */ #define TCPTV_PERSMAX TCP_TIME(60) /* maximum persist interval */ -#define TCPTV_KEEP_INIT TCP_TIME(75) /* initial connect keep alive */ -#define TCPTV_KEEP_IDLE TCP_TIME(120*60) /* dflt time before probing */ +#define TCPTV_KEEPINIT TCP_TIME(75) /* initial connect keep alive */ +#define TCPTV_KEEPIDLE TCP_TIME(120*60) /* dflt time before probing */ #define TCPTV_KEEPINTVL TCP_TIME(75) /* default probe interval */ #define TCPTV_KEEPCNT 8 /* max probes before drop */ @@ -154,16 +154,17 @@ typedef void (*tcp_timer_func_t)(void *) extern const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS]; -extern int tcp_delack_msecs; /* delayed ACK timeout in millisecs */ -extern int tcptv_keep_init; -extern int tcp_always_keepalive; /* [a] assume SO_KEEPALIVE always set */ -extern int tcp_keepidle; /* time before keepalive probes begin */ -extern int tcp_keepintvl; /* time between keepalive probes */ -extern int tcp_maxidle; /* time to drop after starting probes */ -extern int tcp_ttl; /* time to live for TCP segs */ +extern int tcp_delack_msecs; /* [I] delayed ACK timeout in millisecs */ +extern int tcp_always_keepalive;/* [a] assume SO_KEEPALIVE always set */ +extern int tcp_keepinit; /* [a] time to keep alive initial SYN packet */ +extern int tcp_keepidle; /* [a] time before keepalive probes begin */ +extern int tcp_keepintvl; /* [a] time between keepalive probes */ +extern int tcp_keepinit_sec; /* [a] copy of above in seconds for sysctl */ +extern int tcp_keepidle_sec; /* [a] copy of above in seconds for sysctl */ +extern int tcp_keepintvl_sec; /* [a] copy of above in seconds for sysctl */ +extern int tcp_ttl; /* time to live for TCP segs */ extern const int tcp_backoff[]; -void tcp_timer_init(void); void tcp_timer_reaper(void *); #endif /* _KERNEL */ Index: netinet/tcp_usrreq.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v diff -u -p -r1.239 tcp_usrreq.c --- netinet/tcp_usrreq.c 9 Jan 2025 16:47:24 -0000 1.239 +++ netinet/tcp_usrreq.c 15 Jan 2025 22:13:17 -0000 @@ -160,6 +160,12 @@ const struct pr_usrreqs tcp6_usrreqs = { #endif const struct sysctl_bounded_args tcpctl_vars[] = { + { TCPCTL_KEEPINITTIME, &tcp_keepinit_sec, 1, + 3 * TCPTV_KEEPINIT / TCP_TIME(1) }, + { TCPCTL_KEEPIDLE, &tcp_keepidle_sec, 1, + 5 * TCPTV_KEEPIDLE / TCP_TIME(1) }, + { TCPCTL_KEEPINTVL, &tcp_keepintvl_sec, 1, + 3 * TCPTV_KEEPINTVL / TCP_TIME(1) }, { TCPCTL_RFC1323, &tcp_do_rfc1323, 0, 1 }, { TCPCTL_SACK, &tcp_do_sack, 0, 1 }, { TCPCTL_MSSDFLT, &tcp_mssdflt, TCP_MSS, 65535 }, @@ -682,7 +688,7 @@ tcp_connect(struct socket *so, struct mb soisconnecting(so); tcpstat_inc(tcps_connattempt); tp->t_state = TCPS_SYN_SENT; - TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init); + TCP_TIMER_ARM(tp, TCPT_KEEP, atomic_load_int(&tcp_keepinit)); tcp_set_iss_tsm(tp); tcp_sendseqinit(tp); tp->snd_last = tp->snd_una; @@ -1107,8 +1113,13 @@ tcp_usrclosed(struct tcpcb *tp) * a full close, we start a timer to make sure sockets are * not left in FIN_WAIT_2 forever. */ - if (tp->t_state == TCPS_FIN_WAIT_2) - TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); + if (tp->t_state == TCPS_FIN_WAIT_2) { + int maxidle; + + maxidle = TCPTV_KEEPCNT * + atomic_load_int(&tcp_keepidle); + TCP_TIMER_ARM(tp, TCPT_2MSL, maxidle); + } } return (tp); } @@ -1409,36 +1420,6 @@ tcp_sysctl(int *name, u_int namelen, voi return (ENOTDIR); switch (name[0]) { - case TCPCTL_KEEPINITTIME: - NET_LOCK(); - nval = tcptv_keep_init / TCP_TIME(1); - error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &nval, - 1, 3 * (TCPTV_KEEP_INIT / TCP_TIME(1))); - if (!error) - tcptv_keep_init = TCP_TIME(nval); - NET_UNLOCK(); - return (error); - - case TCPCTL_KEEPIDLE: - NET_LOCK(); - nval = tcp_keepidle / TCP_TIME(1); - error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &nval, - 1, 5 * (TCPTV_KEEP_IDLE / TCP_TIME(1))); - if (!error) - tcp_keepidle = TCP_TIME(nval); - NET_UNLOCK(); - return (error); - - case TCPCTL_KEEPINTVL: - NET_LOCK(); - nval = tcp_keepintvl / TCP_TIME(1); - error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &nval, - 1, 3 * (TCPTV_KEEPINTVL / TCP_TIME(1))); - if (!error) - tcp_keepintvl = TCP_TIME(nval); - NET_UNLOCK(); - return (error); - case TCPCTL_BADDYNAMIC: NET_LOCK(); error = sysctl_struct(oldp, oldlenp, newp, newlen, @@ -1528,8 +1509,23 @@ tcp_sysctl(int *name, u_int namelen, voi return (error); default: - return sysctl_bounded_arr(tcpctl_vars, nitems(tcpctl_vars), + error = sysctl_bounded_arr(tcpctl_vars, nitems(tcpctl_vars), name, namelen, oldp, oldlenp, newp, newlen); + switch (name[0]) { + case TCPCTL_KEEPINITTIME: + atomic_store_int(&tcp_keepinit, + atomic_load_int(&tcp_keepinit_sec) * TCP_TIME(1)); + break; + case TCPCTL_KEEPIDLE: + atomic_store_int(&tcp_keepidle, + atomic_load_int(&tcp_keepidle_sec) * TCP_TIME(1)); + break; + case TCPCTL_KEEPINTVL: + atomic_store_int(&tcp_keepintvl, + atomic_load_int(&tcp_keepintvl_sec) * TCP_TIME(1)); + break; + } + return (error); } /* NOTREACHED */ } Index: netinet/tcp_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v diff -u -p -r1.184 tcp_var.h --- netinet/tcp_var.h 5 Jan 2025 12:18:48 -0000 1.184 +++ netinet/tcp_var.h 15 Jan 2025 20:22:23 -0000 @@ -679,7 +679,6 @@ extern struct pool tcpcb_pool; extern struct inpcbtable tcbtable, tcb6table; /* queue of active tcpcb's */ extern int tcp_do_rfc1323; /* [a] enabled/disabled? */ extern const int tcprexmtthresh; -extern int tcptv_keep_init; /* [N] time to keep alive initial SYN packet */ extern int tcp_mssdflt; /* [a] default maximum segment size */ extern int tcp_rst_ppslim; /* [a] maximum outgoing RST packet per second */ extern int tcp_ack_on_push; /* [a] ACK immediately on PUSH */