Download raw body.
TCP sysctl keep alive unlock
On Wed, Jan 15, 2025 at 09:13:17PM +0300, Vitaliy Makkoveev wrote:
> On Wed, Jan 15, 2025 at 04:16:12PM +0100, Alexander Bluhm wrote:
> > Hi,
> >
> > By converting the TCP keepalive timer variables to seconds, sysctl
> > for those variables can be made atomic easily. Then we have to
> > multiply with TCP_TIME(1) when using them.
> >
> > An alternative would be to switch the sysctl values from seconds
> > to milliseconds, but that would mean an API break for people with
> > sysctl.conf modifications.
> >
> > ok?
>
> I prefer to use timeout_add_sec(9) within TCP_TIMER_ARM() macro. sysctl
> variables have seconds resolution, so you can just convert them to
> ticks and avoid one extra multiplication and one division.
This would require a TCP_TIMER_ARM_SEC() macro. And we would mix
units for different timers. I tried this, it did not look nice.
New idea: We keep copies in seconds for the sysctl and update
timer variables when they change.
While there make the names consistent. tcp_maxidle was historically
calculated in tcp_slowtimo() as the timers were called from there.
Much easier to calculate maxidle when needed. tcp_timer_init() looks
pretty useless, just initialize data.
ok?
bluhm
Index: netinet/tcp_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
diff -u -p -r1.422 tcp_input.c
--- netinet/tcp_input.c 10 Jan 2025 20:19:03 -0000 1.422
+++ netinet/tcp_input.c 15 Jan 2025 22:06:50 -0000
@@ -109,7 +109,6 @@ int tcp_flush_queue(struct tcpcb *);
#endif /* INET6 */
const int tcprexmtthresh = 3;
-int tcptv_keep_init = TCPTV_KEEP_INIT;
int tcp_rst_ppslim = 100; /* 100pps */
int tcp_rst_ppslim_count = 0;
@@ -870,7 +869,7 @@ findpcb:
*/
tp->t_rcvtime = now;
if (TCPS_HAVEESTABLISHED(tp->t_state))
- TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
+ TCP_TIMER_ARM(tp, TCPT_KEEP, atomic_load_int(&tcp_keepidle));
if (tp->sack_enable)
tcp_del_sackholes(tp, th); /* Delete stale SACK holes */
@@ -1187,7 +1186,8 @@ findpcb:
soisconnected(so);
tp->t_flags &= ~TF_BLOCKOUTPUT;
tp->t_state = TCPS_ESTABLISHED;
- TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
+ TCP_TIMER_ARM(tp, TCPT_KEEP,
+ atomic_load_int(&tcp_keepidle));
/* Do window scaling on this connection? */
if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
(TF_RCVD_SCALE|TF_REQ_SCALE)) {
@@ -1473,7 +1473,7 @@ trimthenstep6:
soisconnected(so);
tp->t_flags &= ~TF_BLOCKOUTPUT;
tp->t_state = TCPS_ESTABLISHED;
- TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
+ TCP_TIMER_ARM(tp, TCPT_KEEP, atomic_load_int(&tcp_keepidle));
/* Do window scaling? */
if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
(TF_RCVD_SCALE|TF_REQ_SCALE)) {
@@ -1809,10 +1809,14 @@ trimthenstep6:
* we'll hang forever.
*/
if (so->so_rcv.sb_state & SS_CANTRCVMORE) {
+ int maxidle;
+
tp->t_flags |= TF_BLOCKOUTPUT;
soisdisconnected(so);
tp->t_flags &= ~TF_BLOCKOUTPUT;
- TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle);
+ maxidle = TCPTV_KEEPCNT *
+ atomic_load_int(&tcp_keepidle);
+ TCP_TIMER_ARM(tp, TCPT_2MSL, maxidle);
}
tp->t_state = TCPS_FIN_WAIT_2;
}
@@ -3394,7 +3398,7 @@ syn_cache_timer(void *arg)
* than the keep alive timer would allow, expire it.
*/
sc->sc_rxttot += sc->sc_rxtcur;
- if (sc->sc_rxttot >= READ_ONCE(tcptv_keep_init))
+ if (sc->sc_rxttot >= atomic_load_int(&tcp_keepinit))
goto dropit;
/* Advance the timer back-off. */
@@ -3673,7 +3677,7 @@ syn_cache_get(struct sockaddr *src, stru
tp->t_sndtime = now;
tp->t_rcvacktime = now;
tp->t_sndacktime = now;
- TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init);
+ TCP_TIMER_ARM(tp, TCPT_KEEP, atomic_load_int(&tcp_keepinit));
tcpstat_inc(tcps_accepts);
tcp_mss(tp, sc->sc_peermaxseg); /* sets t_maxseg */
Index: netinet/tcp_subr.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v
diff -u -p -r1.204 tcp_subr.c
--- netinet/tcp_subr.c 3 Jan 2025 17:23:51 -0000 1.204
+++ netinet/tcp_subr.c 15 Jan 2025 21:24:52 -0000
@@ -184,9 +184,6 @@ tcp_init(void)
/* Initialize the compressed state engine. */
syn_cache_init();
-
- /* Initialize timer state. */
- tcp_timer_init();
}
/*
Index: netinet/tcp_timer.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.c,v
diff -u -p -r1.81 tcp_timer.c
--- netinet/tcp_timer.c 14 Jan 2025 13:49:44 -0000 1.81
+++ netinet/tcp_timer.c 15 Jan 2025 22:04:22 -0000
@@ -61,16 +61,14 @@
*/
int tcp_always_keepalive;
-int tcp_keepidle;
-int tcp_keepintvl;
-int tcp_maxpersistidle; /* max idle time in persist */
-int tcp_maxidle; /* [T] max idle time for keep alive */
-
-/*
- * Time to delay the ACK. This is initialized in tcp_init(), unless
- * its patched.
- */
-int tcp_delack_msecs;
+int tcp_keepinit = TCPTV_KEEPINIT;
+int tcp_keepidle = TCPTV_KEEPIDLE;
+int tcp_keepintvl = TCPTV_KEEPINTVL;
+int tcp_keepinit_sec = TCPTV_KEEPINIT / TCP_TIME(1);
+int tcp_keepidle_sec = TCPTV_KEEPIDLE / TCP_TIME(1);
+int tcp_keepintvl_sec = TCPTV_KEEPINTVL / TCP_TIME(1);
+int tcp_maxpersistidle = TCPTV_KEEPIDLE; /* max idle time in persist */
+int tcp_delack_msecs = TCP_DELACK_MSECS; /* time to delay the ACK */
void tcp_timer_rexmt(void *);
void tcp_timer_persist(void *);
@@ -86,26 +84,6 @@ const tcp_timer_func_t tcp_timer_funcs[T
tcp_timer_delack,
};
-/*
- * Timer state initialization, called from tcp_init().
- */
-void
-tcp_timer_init(void)
-{
-
- if (tcp_keepidle == 0)
- tcp_keepidle = TCPTV_KEEP_IDLE;
-
- if (tcp_keepintvl == 0)
- tcp_keepintvl = TCPTV_KEEPINTVL;
-
- if (tcp_maxpersistidle == 0)
- tcp_maxpersistidle = TCPTV_KEEP_IDLE;
-
- if (tcp_delack_msecs == 0)
- tcp_delack_msecs = TCP_DELACK_MSECS;
-}
-
static inline int
tcp_timer_enter(struct inpcb *inp, struct socket **so, struct tcpcb **tp,
u_int timer)
@@ -176,7 +154,6 @@ void
tcp_slowtimo(void)
{
mtx_enter(&tcp_timer_mtx);
- tcp_maxidle = TCPTV_KEEPCNT * tcp_keepintvl;
tcp_iss += TCP_ISSINCR2/PR_SLOWHZ; /* increment iss */
mtx_leave(&tcp_timer_mtx);
}
@@ -488,13 +465,14 @@ tcp_timer_keep(void *arg)
if ((atomic_load_int(&tcp_always_keepalive) ||
so->so_options & SO_KEEPALIVE) &&
tp->t_state <= TCPS_CLOSING) {
- int maxidle;
+ int keepidle, maxidle;
uint64_t now;
- maxidle = READ_ONCE(tcp_maxidle);
+ keepidle = atomic_load_int(&tcp_keepidle);
+ maxidle = TCPTV_KEEPCNT * keepidle;
now = tcp_now();
if ((maxidle > 0) &&
- ((now - tp->t_rcvtime) >= tcp_keepidle + maxidle)) {
+ ((now - tp->t_rcvtime) >= keepidle + maxidle)) {
tcpstat_inc(tcps_keepdrops);
tp = tcp_drop(tp, ETIMEDOUT);
goto out;
@@ -514,9 +492,9 @@ tcp_timer_keep(void *arg)
tcpstat_inc(tcps_keepprobe);
tcp_respond(tp, mtod(tp->t_template, caddr_t),
NULL, tp->rcv_nxt, tp->snd_una - 1, 0, 0, now);
- TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl);
+ TCP_TIMER_ARM(tp, TCPT_KEEP, atomic_load_int(&tcp_keepintvl));
} else
- TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
+ TCP_TIMER_ARM(tp, TCPT_KEEP, atomic_load_int(&tcp_keepidle));
if (otp)
tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_KEEP, 0);
out:
@@ -542,11 +520,11 @@ tcp_timer_2msl(void *arg)
}
tcp_timer_freesack(tp);
- maxidle = READ_ONCE(tcp_maxidle);
+ maxidle = TCPTV_KEEPCNT * atomic_load_int(&tcp_keepidle);
now = tcp_now();
if (tp->t_state != TCPS_TIME_WAIT &&
((maxidle == 0) || ((now - tp->t_rcvtime) <= maxidle)))
- TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_keepintvl);
+ TCP_TIMER_ARM(tp, TCPT_2MSL, atomic_load_int(&tcp_keepintvl));
else
tp = tcp_close(tp);
if (otp)
Index: netinet/tcp_timer.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.h,v
diff -u -p -r1.25 tcp_timer.h
--- netinet/tcp_timer.h 3 Jan 2025 17:23:51 -0000 1.25
+++ netinet/tcp_timer.h 15 Jan 2025 22:04:57 -0000
@@ -93,8 +93,8 @@
#define TCPTV_PERSMIN TCP_TIME(5) /* retransmit persistence */
#define TCPTV_PERSMAX TCP_TIME(60) /* maximum persist interval */
-#define TCPTV_KEEP_INIT TCP_TIME(75) /* initial connect keep alive */
-#define TCPTV_KEEP_IDLE TCP_TIME(120*60) /* dflt time before probing */
+#define TCPTV_KEEPINIT TCP_TIME(75) /* initial connect keep alive */
+#define TCPTV_KEEPIDLE TCP_TIME(120*60) /* dflt time before probing */
#define TCPTV_KEEPINTVL TCP_TIME(75) /* default probe interval */
#define TCPTV_KEEPCNT 8 /* max probes before drop */
@@ -154,16 +154,17 @@ typedef void (*tcp_timer_func_t)(void *)
extern const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS];
-extern int tcp_delack_msecs; /* delayed ACK timeout in millisecs */
-extern int tcptv_keep_init;
-extern int tcp_always_keepalive; /* [a] assume SO_KEEPALIVE always set */
-extern int tcp_keepidle; /* time before keepalive probes begin */
-extern int tcp_keepintvl; /* time between keepalive probes */
-extern int tcp_maxidle; /* time to drop after starting probes */
-extern int tcp_ttl; /* time to live for TCP segs */
+extern int tcp_delack_msecs; /* [I] delayed ACK timeout in millisecs */
+extern int tcp_always_keepalive;/* [a] assume SO_KEEPALIVE always set */
+extern int tcp_keepinit; /* [a] time to keep alive initial SYN packet */
+extern int tcp_keepidle; /* [a] time before keepalive probes begin */
+extern int tcp_keepintvl; /* [a] time between keepalive probes */
+extern int tcp_keepinit_sec; /* [a] copy of above in seconds for sysctl */
+extern int tcp_keepidle_sec; /* [a] copy of above in seconds for sysctl */
+extern int tcp_keepintvl_sec; /* [a] copy of above in seconds for sysctl */
+extern int tcp_ttl; /* time to live for TCP segs */
extern const int tcp_backoff[];
-void tcp_timer_init(void);
void tcp_timer_reaper(void *);
#endif /* _KERNEL */
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v
diff -u -p -r1.239 tcp_usrreq.c
--- netinet/tcp_usrreq.c 9 Jan 2025 16:47:24 -0000 1.239
+++ netinet/tcp_usrreq.c 15 Jan 2025 22:13:17 -0000
@@ -160,6 +160,12 @@ const struct pr_usrreqs tcp6_usrreqs = {
#endif
const struct sysctl_bounded_args tcpctl_vars[] = {
+ { TCPCTL_KEEPINITTIME, &tcp_keepinit_sec, 1,
+ 3 * TCPTV_KEEPINIT / TCP_TIME(1) },
+ { TCPCTL_KEEPIDLE, &tcp_keepidle_sec, 1,
+ 5 * TCPTV_KEEPIDLE / TCP_TIME(1) },
+ { TCPCTL_KEEPINTVL, &tcp_keepintvl_sec, 1,
+ 3 * TCPTV_KEEPINTVL / TCP_TIME(1) },
{ TCPCTL_RFC1323, &tcp_do_rfc1323, 0, 1 },
{ TCPCTL_SACK, &tcp_do_sack, 0, 1 },
{ TCPCTL_MSSDFLT, &tcp_mssdflt, TCP_MSS, 65535 },
@@ -682,7 +688,7 @@ tcp_connect(struct socket *so, struct mb
soisconnecting(so);
tcpstat_inc(tcps_connattempt);
tp->t_state = TCPS_SYN_SENT;
- TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init);
+ TCP_TIMER_ARM(tp, TCPT_KEEP, atomic_load_int(&tcp_keepinit));
tcp_set_iss_tsm(tp);
tcp_sendseqinit(tp);
tp->snd_last = tp->snd_una;
@@ -1107,8 +1113,13 @@ tcp_usrclosed(struct tcpcb *tp)
* a full close, we start a timer to make sure sockets are
* not left in FIN_WAIT_2 forever.
*/
- if (tp->t_state == TCPS_FIN_WAIT_2)
- TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle);
+ if (tp->t_state == TCPS_FIN_WAIT_2) {
+ int maxidle;
+
+ maxidle = TCPTV_KEEPCNT *
+ atomic_load_int(&tcp_keepidle);
+ TCP_TIMER_ARM(tp, TCPT_2MSL, maxidle);
+ }
}
return (tp);
}
@@ -1409,36 +1420,6 @@ tcp_sysctl(int *name, u_int namelen, voi
return (ENOTDIR);
switch (name[0]) {
- case TCPCTL_KEEPINITTIME:
- NET_LOCK();
- nval = tcptv_keep_init / TCP_TIME(1);
- error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &nval,
- 1, 3 * (TCPTV_KEEP_INIT / TCP_TIME(1)));
- if (!error)
- tcptv_keep_init = TCP_TIME(nval);
- NET_UNLOCK();
- return (error);
-
- case TCPCTL_KEEPIDLE:
- NET_LOCK();
- nval = tcp_keepidle / TCP_TIME(1);
- error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &nval,
- 1, 5 * (TCPTV_KEEP_IDLE / TCP_TIME(1)));
- if (!error)
- tcp_keepidle = TCP_TIME(nval);
- NET_UNLOCK();
- return (error);
-
- case TCPCTL_KEEPINTVL:
- NET_LOCK();
- nval = tcp_keepintvl / TCP_TIME(1);
- error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &nval,
- 1, 3 * (TCPTV_KEEPINTVL / TCP_TIME(1)));
- if (!error)
- tcp_keepintvl = TCP_TIME(nval);
- NET_UNLOCK();
- return (error);
-
case TCPCTL_BADDYNAMIC:
NET_LOCK();
error = sysctl_struct(oldp, oldlenp, newp, newlen,
@@ -1528,8 +1509,23 @@ tcp_sysctl(int *name, u_int namelen, voi
return (error);
default:
- return sysctl_bounded_arr(tcpctl_vars, nitems(tcpctl_vars),
+ error = sysctl_bounded_arr(tcpctl_vars, nitems(tcpctl_vars),
name, namelen, oldp, oldlenp, newp, newlen);
+ switch (name[0]) {
+ case TCPCTL_KEEPINITTIME:
+ atomic_store_int(&tcp_keepinit,
+ atomic_load_int(&tcp_keepinit_sec) * TCP_TIME(1));
+ break;
+ case TCPCTL_KEEPIDLE:
+ atomic_store_int(&tcp_keepidle,
+ atomic_load_int(&tcp_keepidle_sec) * TCP_TIME(1));
+ break;
+ case TCPCTL_KEEPINTVL:
+ atomic_store_int(&tcp_keepintvl,
+ atomic_load_int(&tcp_keepintvl_sec) * TCP_TIME(1));
+ break;
+ }
+ return (error);
}
/* NOTREACHED */
}
Index: netinet/tcp_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
diff -u -p -r1.184 tcp_var.h
--- netinet/tcp_var.h 5 Jan 2025 12:18:48 -0000 1.184
+++ netinet/tcp_var.h 15 Jan 2025 20:22:23 -0000
@@ -679,7 +679,6 @@ extern struct pool tcpcb_pool;
extern struct inpcbtable tcbtable, tcb6table; /* queue of active tcpcb's */
extern int tcp_do_rfc1323; /* [a] enabled/disabled? */
extern const int tcprexmtthresh;
-extern int tcptv_keep_init; /* [N] time to keep alive initial SYN packet */
extern int tcp_mssdflt; /* [a] default maximum segment size */
extern int tcp_rst_ppslim; /* [a] maximum outgoing RST packet per second */
extern int tcp_ack_on_push; /* [a] ACK immediately on PUSH */
TCP sysctl keep alive unlock