Index | Thread | Search

From:
Stuart Henderson <stu@spacehopper.org>
Subject:
Re: TCP sysctl keep alive unlock
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
tech@openbsd.org
Date:
Wed, 15 Jan 2025 15:50:16 +0000

Download raw body.

Thread
On 2025/01/15 16:16, Alexander Bluhm wrote:
> Hi,
> 
> By converting the TCP keepalive timer variables to seconds, sysctl
> for those variables can be made atomic easily.  Then we have to
> multiply with TCP_TIME(1) when using them.
> 
> An alternative would be to switch the sysctl values from seconds
> to milliseconds, but that would mean an API break for people with
> sysctl.conf modifications.
> 
> ok?

I prefer it this way with seconds in sysctl.

ok.


> bluhm
> 
> Index: netinet/tcp_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
> diff -u -p -r1.422 tcp_input.c
> --- netinet/tcp_input.c	10 Jan 2025 20:19:03 -0000	1.422
> +++ netinet/tcp_input.c	15 Jan 2025 10:44:31 -0000
> @@ -869,8 +869,10 @@ findpcb:
>  	 * Reset idle time and keep-alive timer.
>  	 */
>  	tp->t_rcvtime = now;
> -	if (TCPS_HAVEESTABLISHED(tp->t_state))
> -		TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
> +	if (TCPS_HAVEESTABLISHED(tp->t_state)) {
> +		TCP_TIMER_ARM(tp, TCPT_KEEP,
> +		    atomic_load_int(&tcp_keepidle) * TCP_TIME(1));
> +	}
>  
>  	if (tp->sack_enable)
>  		tcp_del_sackholes(tp, th); /* Delete stale SACK holes */
> @@ -1187,7 +1189,8 @@ findpcb:
>  			soisconnected(so);
>  			tp->t_flags &= ~TF_BLOCKOUTPUT;
>  			tp->t_state = TCPS_ESTABLISHED;
> -			TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
> +			TCP_TIMER_ARM(tp, TCPT_KEEP,
> +			    atomic_load_int(&tcp_keepidle) * TCP_TIME(1));
>  			/* Do window scaling on this connection? */
>  			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
>  				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
> @@ -1473,7 +1476,8 @@ trimthenstep6:
>  		soisconnected(so);
>  		tp->t_flags &= ~TF_BLOCKOUTPUT;
>  		tp->t_state = TCPS_ESTABLISHED;
> -		TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
> +		TCP_TIMER_ARM(tp, TCPT_KEEP,
> +		    atomic_load_int(&tcp_keepidle) * TCP_TIME(1));
>  		/* Do window scaling? */
>  		if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
>  			(TF_RCVD_SCALE|TF_REQ_SCALE)) {
> @@ -3394,7 +3398,7 @@ syn_cache_timer(void *arg)
>  	 * than the keep alive timer would allow, expire it.
>  	 */
>  	sc->sc_rxttot += sc->sc_rxtcur;
> -	if (sc->sc_rxttot >= READ_ONCE(tcptv_keep_init))
> +	if (sc->sc_rxttot >= atomic_load_int(&tcptv_keep_init) * TCP_TIME(1))
>  		goto dropit;
>  
>  	/* Advance the timer back-off. */
> @@ -3673,7 +3677,8 @@ syn_cache_get(struct sockaddr *src, stru
>  	tp->t_sndtime = now;
>  	tp->t_rcvacktime = now;
>  	tp->t_sndacktime = now;
> -	TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init);
> +	TCP_TIMER_ARM(tp, TCPT_KEEP,
> +	    atomic_load_int(&tcptv_keep_init) * TCP_TIME(1));
>  	tcpstat_inc(tcps_accepts);
>  
>  	tcp_mss(tp, sc->sc_peermaxseg);	 /* sets t_maxseg */
> Index: netinet/tcp_timer.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.c,v
> diff -u -p -r1.81 tcp_timer.c
> --- netinet/tcp_timer.c	14 Jan 2025 13:49:44 -0000	1.81
> +++ netinet/tcp_timer.c	15 Jan 2025 14:23:49 -0000
> @@ -100,7 +100,7 @@ tcp_timer_init(void)
>  		tcp_keepintvl = TCPTV_KEEPINTVL;
>  
>  	if (tcp_maxpersistidle == 0)
> -		tcp_maxpersistidle = TCPTV_KEEP_IDLE;
> +		tcp_maxpersistidle = TCPTV_KEEP_IDLE * TCP_TIME(1);
>  
>  	if (tcp_delack_msecs == 0)
>  		tcp_delack_msecs = TCP_DELACK_MSECS;
> @@ -176,7 +176,8 @@ void
>  tcp_slowtimo(void)
>  {
>  	mtx_enter(&tcp_timer_mtx);
> -	tcp_maxidle = TCPTV_KEEPCNT * tcp_keepintvl;
> +	tcp_maxidle = TCPTV_KEEPCNT *
> +	    atomic_load_int(&tcp_keepintvl) * TCP_TIME(1);
>  	tcp_iss += TCP_ISSINCR2/PR_SLOWHZ;		/* increment iss */
>  	mtx_leave(&tcp_timer_mtx);
>  }
> @@ -493,8 +494,8 @@ tcp_timer_keep(void *arg)
>  
>  		maxidle = READ_ONCE(tcp_maxidle);
>  		now = tcp_now();
> -		if ((maxidle > 0) &&
> -		    ((now - tp->t_rcvtime) >= tcp_keepidle + maxidle)) {
> +		if ((maxidle > 0) && ((now - tp->t_rcvtime) >=
> +		    atomic_load_int(&tcp_keepidle) * TCP_TIME(1) + maxidle)) {
>  			tcpstat_inc(tcps_keepdrops);
>  			tp = tcp_drop(tp, ETIMEDOUT);
>  			goto out;
> @@ -514,9 +515,12 @@ tcp_timer_keep(void *arg)
>  		tcpstat_inc(tcps_keepprobe);
>  		tcp_respond(tp, mtod(tp->t_template, caddr_t),
>  		    NULL, tp->rcv_nxt, tp->snd_una - 1, 0, 0, now);
> -		TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl);
> -	} else
> -		TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
> +		TCP_TIMER_ARM(tp, TCPT_KEEP,
> +		    atomic_load_int(&tcp_keepintvl) * TCP_TIME(1));
> +	} else {
> +		TCP_TIMER_ARM(tp, TCPT_KEEP,
> +		    atomic_load_int(&tcp_keepidle) * TCP_TIME(1));
> +	}
>  	if (otp)
>  		tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_KEEP, 0);
>   out:
> @@ -545,9 +549,10 @@ tcp_timer_2msl(void *arg)
>  	maxidle = READ_ONCE(tcp_maxidle);
>  	now = tcp_now();
>  	if (tp->t_state != TCPS_TIME_WAIT &&
> -	    ((maxidle == 0) || ((now - tp->t_rcvtime) <= maxidle)))
> -		TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_keepintvl);
> -	else
> +	    ((maxidle == 0) || ((now - tp->t_rcvtime) <= maxidle))) {
> +		TCP_TIMER_ARM(tp, TCPT_2MSL,
> +		    atomic_load_int(&tcp_keepintvl) * TCP_TIME(1));
> +	} else
>  		tp = tcp_close(tp);
>  	if (otp)
>  		tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_2MSL, 0);
> Index: netinet/tcp_timer.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.h,v
> diff -u -p -r1.25 tcp_timer.h
> --- netinet/tcp_timer.h	3 Jan 2025 17:23:51 -0000	1.25
> +++ netinet/tcp_timer.h	15 Jan 2025 14:37:03 -0000
> @@ -93,9 +93,9 @@
>  #define	TCPTV_PERSMIN	TCP_TIME(5)	/* retransmit persistence */
>  #define	TCPTV_PERSMAX	TCP_TIME(60)	/* maximum persist interval */
>  
> -#define	TCPTV_KEEP_INIT	TCP_TIME(75)	/* initial connect keep alive */
> -#define	TCPTV_KEEP_IDLE	TCP_TIME(120*60) /* dflt time before probing */
> -#define	TCPTV_KEEPINTVL	TCP_TIME(75)	/* default probe interval */
> +#define	TCPTV_KEEP_INIT	75		/* initial connect keep alive */
> +#define	TCPTV_KEEP_IDLE	(120*60)	/* dflt time before probing */
> +#define	TCPTV_KEEPINTVL	75		/* default probe interval */
>  #define	TCPTV_KEEPCNT	8		/* max probes before drop */
>  
>  #define	TCPTV_MIN	TCP_TIME(1)	/* minimum allowable value */
> @@ -154,13 +154,13 @@ typedef void (*tcp_timer_func_t)(void *)
>  
>  extern const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS];
>  
> -extern int tcp_delack_msecs;		/* delayed ACK timeout in millisecs */
> -extern int tcptv_keep_init;
> -extern int tcp_always_keepalive;	/* [a] assume SO_KEEPALIVE always set */
> -extern int tcp_keepidle;		/* time before keepalive probes begin */
> -extern int tcp_keepintvl;		/* time between keepalive probes */
> -extern int tcp_maxidle;			/* time to drop after starting probes */
> -extern int tcp_ttl;			/* time to live for TCP segs */
> +extern int tcp_delack_msecs;	/* delayed ACK timeout in millisecs */
> +extern int tcp_always_keepalive;/* [a] assume SO_KEEPALIVE always set */
> +extern int tcptv_keep_init;	/* [a] time to keep alive initial SYN packet */
> +extern int tcp_keepidle;	/* [a] time before keepalive probes begin */
> +extern int tcp_keepintvl;	/* [a] time between keepalive probes */
> +extern int tcp_maxidle;		/* time to drop after starting probes */
> +extern int tcp_ttl;		/* time to live for TCP segs */
>  extern const int tcp_backoff[];
>  
>  void	tcp_timer_init(void);
> Index: netinet/tcp_usrreq.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v
> diff -u -p -r1.239 tcp_usrreq.c
> --- netinet/tcp_usrreq.c	9 Jan 2025 16:47:24 -0000	1.239
> +++ netinet/tcp_usrreq.c	15 Jan 2025 14:20:54 -0000
> @@ -160,6 +160,9 @@ const struct pr_usrreqs tcp6_usrreqs = {
>  #endif
>  
>  const struct sysctl_bounded_args tcpctl_vars[] = {
> +	{ TCPCTL_KEEPINITTIME, &tcptv_keep_init, 1, 3 * TCPTV_KEEP_INIT },
> +	{ TCPCTL_KEEPIDLE, &tcp_keepidle, 1, 5 * TCPTV_KEEP_IDLE },
> +	{ TCPCTL_KEEPINTVL, &tcp_keepintvl, 1, 3 * TCPTV_KEEPINTVL },
>  	{ TCPCTL_RFC1323, &tcp_do_rfc1323, 0, 1 },
>  	{ TCPCTL_SACK, &tcp_do_sack, 0, 1 },
>  	{ TCPCTL_MSSDFLT, &tcp_mssdflt, TCP_MSS, 65535 },
> @@ -682,7 +685,8 @@ tcp_connect(struct socket *so, struct mb
>  	soisconnecting(so);
>  	tcpstat_inc(tcps_connattempt);
>  	tp->t_state = TCPS_SYN_SENT;
> -	TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init);
> +	TCP_TIMER_ARM(tp, TCPT_KEEP,
> +	    atomic_load_int(&tcptv_keep_init) * TCP_TIME(1));
>  	tcp_set_iss_tsm(tp);
>  	tcp_sendseqinit(tp);
>  	tp->snd_last = tp->snd_una;
> @@ -1409,36 +1413,6 @@ tcp_sysctl(int *name, u_int namelen, voi
>  		return (ENOTDIR);
>  
>  	switch (name[0]) {
> -	case TCPCTL_KEEPINITTIME:
> -		NET_LOCK();
> -		nval = tcptv_keep_init / TCP_TIME(1);
> -		error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &nval,
> -		    1, 3 * (TCPTV_KEEP_INIT / TCP_TIME(1)));
> -		if (!error)
> -			tcptv_keep_init = TCP_TIME(nval);
> -		NET_UNLOCK();
> -		return (error);
> -
> -	case TCPCTL_KEEPIDLE:
> -		NET_LOCK();
> -		nval = tcp_keepidle / TCP_TIME(1);
> -		error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &nval,
> -		    1, 5 * (TCPTV_KEEP_IDLE / TCP_TIME(1)));
> -		if (!error)
> -			tcp_keepidle = TCP_TIME(nval);
> -		NET_UNLOCK();
> -		return (error);
> -
> -	case TCPCTL_KEEPINTVL:
> -		NET_LOCK();
> -		nval = tcp_keepintvl / TCP_TIME(1);
> -		error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &nval,
> -		    1, 3 * (TCPTV_KEEPINTVL / TCP_TIME(1)));
> -		if (!error)
> -			tcp_keepintvl = TCP_TIME(nval);
> -		NET_UNLOCK();
> -		return (error);
> -
>  	case TCPCTL_BADDYNAMIC:
>  		NET_LOCK();
>  		error = sysctl_struct(oldp, oldlenp, newp, newlen,
> Index: netinet/tcp_var.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
> diff -u -p -r1.184 tcp_var.h
> --- netinet/tcp_var.h	5 Jan 2025 12:18:48 -0000	1.184
> +++ netinet/tcp_var.h	14 Jan 2025 18:51:31 -0000
> @@ -679,7 +679,6 @@ extern	struct pool tcpcb_pool;
>  extern	struct inpcbtable tcbtable, tcb6table;	/* queue of active tcpcb's */
>  extern	int tcp_do_rfc1323;	/* [a] enabled/disabled? */
>  extern	const int tcprexmtthresh;
> -extern	int tcptv_keep_init;	/* [N] time to keep alive initial SYN packet */
>  extern	int tcp_mssdflt;	/* [a] default maximum segment size */
>  extern	int tcp_rst_ppslim;	/* [a] maximum outgoing RST packet per second */
>  extern	int tcp_ack_on_push;	/* [a] ACK immediately on PUSH */
>