Download raw body.
TCP sysctl keep alive unlock
On Wed, Jan 15, 2025 at 04:16:12PM +0100, Alexander Bluhm wrote:
> Hi,
>
> By converting the TCP keepalive timer variables to seconds, sysctl
> for those variables can be made atomic easily. Then we have to
> multiply with TCP_TIME(1) when using them.
>
> An alternative would be to switch the sysctl values from seconds
> to milliseconds, but that would mean an API break for people with
> sysctl.conf modifications.
>
> ok?
>
I prefer to use timeout_add_sec(9) within TCP_TIMER_ARM() macro. sysctl
variables have seconds resolution, so you can just convert them to
ticks and avoid one extra multiplication and one division.
timeout_add_sec(struct timeout *to, int secs)
{
uint64_t to_ticks;
to_ticks = (uint64_t)hz * secs;
return timeout_add_ticks(to, to_ticks, 1);
}
timeout_add_msec(struct timeout *to, uint64_t msecs)
{
uint64_t to_ticks;
to_ticks = msecs * 1000 / tick;
return timeout_add_ticks(to, to_ticks, msecs > 0);
}
> bluhm
>
> Index: netinet/tcp_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
> diff -u -p -r1.422 tcp_input.c
> --- netinet/tcp_input.c 10 Jan 2025 20:19:03 -0000 1.422
> +++ netinet/tcp_input.c 15 Jan 2025 10:44:31 -0000
> @@ -869,8 +869,10 @@ findpcb:
> * Reset idle time and keep-alive timer.
> */
> tp->t_rcvtime = now;
> - if (TCPS_HAVEESTABLISHED(tp->t_state))
> - TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
> + if (TCPS_HAVEESTABLISHED(tp->t_state)) {
> + TCP_TIMER_ARM(tp, TCPT_KEEP,
> + atomic_load_int(&tcp_keepidle) * TCP_TIME(1));
> + }
>
> if (tp->sack_enable)
> tcp_del_sackholes(tp, th); /* Delete stale SACK holes */
> @@ -1187,7 +1189,8 @@ findpcb:
> soisconnected(so);
> tp->t_flags &= ~TF_BLOCKOUTPUT;
> tp->t_state = TCPS_ESTABLISHED;
> - TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
> + TCP_TIMER_ARM(tp, TCPT_KEEP,
> + atomic_load_int(&tcp_keepidle) * TCP_TIME(1));
> /* Do window scaling on this connection? */
> if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
> (TF_RCVD_SCALE|TF_REQ_SCALE)) {
> @@ -1473,7 +1476,8 @@ trimthenstep6:
> soisconnected(so);
> tp->t_flags &= ~TF_BLOCKOUTPUT;
> tp->t_state = TCPS_ESTABLISHED;
> - TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
> + TCP_TIMER_ARM(tp, TCPT_KEEP,
> + atomic_load_int(&tcp_keepidle) * TCP_TIME(1));
> /* Do window scaling? */
> if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
> (TF_RCVD_SCALE|TF_REQ_SCALE)) {
> @@ -3394,7 +3398,7 @@ syn_cache_timer(void *arg)
> * than the keep alive timer would allow, expire it.
> */
> sc->sc_rxttot += sc->sc_rxtcur;
> - if (sc->sc_rxttot >= READ_ONCE(tcptv_keep_init))
> + if (sc->sc_rxttot >= atomic_load_int(&tcptv_keep_init) * TCP_TIME(1))
> goto dropit;
>
> /* Advance the timer back-off. */
> @@ -3673,7 +3677,8 @@ syn_cache_get(struct sockaddr *src, stru
> tp->t_sndtime = now;
> tp->t_rcvacktime = now;
> tp->t_sndacktime = now;
> - TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init);
> + TCP_TIMER_ARM(tp, TCPT_KEEP,
> + atomic_load_int(&tcptv_keep_init) * TCP_TIME(1));
> tcpstat_inc(tcps_accepts);
>
> tcp_mss(tp, sc->sc_peermaxseg); /* sets t_maxseg */
> Index: netinet/tcp_timer.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.c,v
> diff -u -p -r1.81 tcp_timer.c
> --- netinet/tcp_timer.c 14 Jan 2025 13:49:44 -0000 1.81
> +++ netinet/tcp_timer.c 15 Jan 2025 14:23:49 -0000
> @@ -100,7 +100,7 @@ tcp_timer_init(void)
> tcp_keepintvl = TCPTV_KEEPINTVL;
>
> if (tcp_maxpersistidle == 0)
> - tcp_maxpersistidle = TCPTV_KEEP_IDLE;
> + tcp_maxpersistidle = TCPTV_KEEP_IDLE * TCP_TIME(1);
>
> if (tcp_delack_msecs == 0)
> tcp_delack_msecs = TCP_DELACK_MSECS;
> @@ -176,7 +176,8 @@ void
> tcp_slowtimo(void)
> {
> mtx_enter(&tcp_timer_mtx);
> - tcp_maxidle = TCPTV_KEEPCNT * tcp_keepintvl;
> + tcp_maxidle = TCPTV_KEEPCNT *
> + atomic_load_int(&tcp_keepintvl) * TCP_TIME(1);
> tcp_iss += TCP_ISSINCR2/PR_SLOWHZ; /* increment iss */
> mtx_leave(&tcp_timer_mtx);
> }
> @@ -493,8 +494,8 @@ tcp_timer_keep(void *arg)
>
> maxidle = READ_ONCE(tcp_maxidle);
> now = tcp_now();
> - if ((maxidle > 0) &&
> - ((now - tp->t_rcvtime) >= tcp_keepidle + maxidle)) {
> + if ((maxidle > 0) && ((now - tp->t_rcvtime) >=
> + atomic_load_int(&tcp_keepidle) * TCP_TIME(1) + maxidle)) {
> tcpstat_inc(tcps_keepdrops);
> tp = tcp_drop(tp, ETIMEDOUT);
> goto out;
> @@ -514,9 +515,12 @@ tcp_timer_keep(void *arg)
> tcpstat_inc(tcps_keepprobe);
> tcp_respond(tp, mtod(tp->t_template, caddr_t),
> NULL, tp->rcv_nxt, tp->snd_una - 1, 0, 0, now);
> - TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl);
> - } else
> - TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
> + TCP_TIMER_ARM(tp, TCPT_KEEP,
> + atomic_load_int(&tcp_keepintvl) * TCP_TIME(1));
> + } else {
> + TCP_TIMER_ARM(tp, TCPT_KEEP,
> + atomic_load_int(&tcp_keepidle) * TCP_TIME(1));
> + }
> if (otp)
> tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_KEEP, 0);
> out:
> @@ -545,9 +549,10 @@ tcp_timer_2msl(void *arg)
> maxidle = READ_ONCE(tcp_maxidle);
> now = tcp_now();
> if (tp->t_state != TCPS_TIME_WAIT &&
> - ((maxidle == 0) || ((now - tp->t_rcvtime) <= maxidle)))
> - TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_keepintvl);
> - else
> + ((maxidle == 0) || ((now - tp->t_rcvtime) <= maxidle))) {
> + TCP_TIMER_ARM(tp, TCPT_2MSL,
> + atomic_load_int(&tcp_keepintvl) * TCP_TIME(1));
> + } else
> tp = tcp_close(tp);
> if (otp)
> tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_2MSL, 0);
> Index: netinet/tcp_timer.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.h,v
> diff -u -p -r1.25 tcp_timer.h
> --- netinet/tcp_timer.h 3 Jan 2025 17:23:51 -0000 1.25
> +++ netinet/tcp_timer.h 15 Jan 2025 14:37:03 -0000
> @@ -93,9 +93,9 @@
> #define TCPTV_PERSMIN TCP_TIME(5) /* retransmit persistence */
> #define TCPTV_PERSMAX TCP_TIME(60) /* maximum persist interval */
>
> -#define TCPTV_KEEP_INIT TCP_TIME(75) /* initial connect keep alive */
> -#define TCPTV_KEEP_IDLE TCP_TIME(120*60) /* dflt time before probing */
> -#define TCPTV_KEEPINTVL TCP_TIME(75) /* default probe interval */
> +#define TCPTV_KEEP_INIT 75 /* initial connect keep alive */
> +#define TCPTV_KEEP_IDLE (120*60) /* dflt time before probing */
> +#define TCPTV_KEEPINTVL 75 /* default probe interval */
> #define TCPTV_KEEPCNT 8 /* max probes before drop */
>
> #define TCPTV_MIN TCP_TIME(1) /* minimum allowable value */
> @@ -154,13 +154,13 @@ typedef void (*tcp_timer_func_t)(void *)
>
> extern const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS];
>
> -extern int tcp_delack_msecs; /* delayed ACK timeout in millisecs */
> -extern int tcptv_keep_init;
> -extern int tcp_always_keepalive; /* [a] assume SO_KEEPALIVE always set */
> -extern int tcp_keepidle; /* time before keepalive probes begin */
> -extern int tcp_keepintvl; /* time between keepalive probes */
> -extern int tcp_maxidle; /* time to drop after starting probes */
> -extern int tcp_ttl; /* time to live for TCP segs */
> +extern int tcp_delack_msecs; /* delayed ACK timeout in millisecs */
> +extern int tcp_always_keepalive;/* [a] assume SO_KEEPALIVE always set */
> +extern int tcptv_keep_init; /* [a] time to keep alive initial SYN packet */
> +extern int tcp_keepidle; /* [a] time before keepalive probes begin */
> +extern int tcp_keepintvl; /* [a] time between keepalive probes */
> +extern int tcp_maxidle; /* time to drop after starting probes */
> +extern int tcp_ttl; /* time to live for TCP segs */
> extern const int tcp_backoff[];
>
> void tcp_timer_init(void);
> Index: netinet/tcp_usrreq.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v
> diff -u -p -r1.239 tcp_usrreq.c
> --- netinet/tcp_usrreq.c 9 Jan 2025 16:47:24 -0000 1.239
> +++ netinet/tcp_usrreq.c 15 Jan 2025 14:20:54 -0000
> @@ -160,6 +160,9 @@ const struct pr_usrreqs tcp6_usrreqs = {
> #endif
>
> const struct sysctl_bounded_args tcpctl_vars[] = {
> + { TCPCTL_KEEPINITTIME, &tcptv_keep_init, 1, 3 * TCPTV_KEEP_INIT },
> + { TCPCTL_KEEPIDLE, &tcp_keepidle, 1, 5 * TCPTV_KEEP_IDLE },
> + { TCPCTL_KEEPINTVL, &tcp_keepintvl, 1, 3 * TCPTV_KEEPINTVL },
> { TCPCTL_RFC1323, &tcp_do_rfc1323, 0, 1 },
> { TCPCTL_SACK, &tcp_do_sack, 0, 1 },
> { TCPCTL_MSSDFLT, &tcp_mssdflt, TCP_MSS, 65535 },
> @@ -682,7 +685,8 @@ tcp_connect(struct socket *so, struct mb
> soisconnecting(so);
> tcpstat_inc(tcps_connattempt);
> tp->t_state = TCPS_SYN_SENT;
> - TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init);
> + TCP_TIMER_ARM(tp, TCPT_KEEP,
> + atomic_load_int(&tcptv_keep_init) * TCP_TIME(1));
> tcp_set_iss_tsm(tp);
> tcp_sendseqinit(tp);
> tp->snd_last = tp->snd_una;
> @@ -1409,36 +1413,6 @@ tcp_sysctl(int *name, u_int namelen, voi
> return (ENOTDIR);
>
> switch (name[0]) {
> - case TCPCTL_KEEPINITTIME:
> - NET_LOCK();
> - nval = tcptv_keep_init / TCP_TIME(1);
> - error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &nval,
> - 1, 3 * (TCPTV_KEEP_INIT / TCP_TIME(1)));
> - if (!error)
> - tcptv_keep_init = TCP_TIME(nval);
> - NET_UNLOCK();
> - return (error);
> -
> - case TCPCTL_KEEPIDLE:
> - NET_LOCK();
> - nval = tcp_keepidle / TCP_TIME(1);
> - error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &nval,
> - 1, 5 * (TCPTV_KEEP_IDLE / TCP_TIME(1)));
> - if (!error)
> - tcp_keepidle = TCP_TIME(nval);
> - NET_UNLOCK();
> - return (error);
> -
> - case TCPCTL_KEEPINTVL:
> - NET_LOCK();
> - nval = tcp_keepintvl / TCP_TIME(1);
> - error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &nval,
> - 1, 3 * (TCPTV_KEEPINTVL / TCP_TIME(1)));
> - if (!error)
> - tcp_keepintvl = TCP_TIME(nval);
> - NET_UNLOCK();
> - return (error);
> -
> case TCPCTL_BADDYNAMIC:
> NET_LOCK();
> error = sysctl_struct(oldp, oldlenp, newp, newlen,
> Index: netinet/tcp_var.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
> diff -u -p -r1.184 tcp_var.h
> --- netinet/tcp_var.h 5 Jan 2025 12:18:48 -0000 1.184
> +++ netinet/tcp_var.h 14 Jan 2025 18:51:31 -0000
> @@ -679,7 +679,6 @@ extern struct pool tcpcb_pool;
> extern struct inpcbtable tcbtable, tcb6table; /* queue of active tcpcb's */
> extern int tcp_do_rfc1323; /* [a] enabled/disabled? */
> extern const int tcprexmtthresh;
> -extern int tcptv_keep_init; /* [N] time to keep alive initial SYN packet */
> extern int tcp_mssdflt; /* [a] default maximum segment size */
> extern int tcp_rst_ppslim; /* [a] maximum outgoing RST packet per second */
> extern int tcp_ack_on_push; /* [a] ACK immediately on PUSH */
>
TCP sysctl keep alive unlock