Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: tcp sysctl tcp_do_ecn atomic
To:
tech@openbsd.org
Date:
Sun, 5 Jan 2025 12:24:09 +0100

Download raw body.

Thread
On Thu, Jan 02, 2025 at 06:20:49PM +0100, Alexander Bluhm wrote:
> Hi,
> 
> This diff makes tcp_do_ecn atomic by reading it only once during
> packet processing.
> 
> As was the final non-atomic variable, we can remove the net lock
> for integers in tcp_sysctl().
> 
> ok?
> 
> bluhm

rebased to -current

Index: netinet/tcp_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
diff -u -p -r1.419 tcp_input.c
--- netinet/tcp_input.c	3 Jan 2025 21:27:40 -0000	1.419
+++ netinet/tcp_input.c	5 Jan 2025 11:16:54 -0000
@@ -183,17 +183,17 @@ void	 tcp_newreno_partialack(struct tcpc
 
 void	 syn_cache_put(struct syn_cache *);
 void	 syn_cache_rm(struct syn_cache *);
-int	 syn_cache_respond(struct syn_cache *, struct mbuf *, uint64_t);
+int	 syn_cache_respond(struct syn_cache *, struct mbuf *, uint64_t, int);
 void	 syn_cache_timer(void *);
 void	 syn_cache_insert(struct syn_cache *, struct tcpcb *);
 void	 syn_cache_reset(struct sockaddr *, struct sockaddr *,
 		struct tcphdr *, u_int);
 int	 syn_cache_add(struct sockaddr *, struct sockaddr *, struct tcphdr *,
 		unsigned int, struct socket *, struct mbuf *, u_char *, int,
-		struct tcp_opt_info *, tcp_seq *, uint64_t);
+		struct tcp_opt_info *, tcp_seq *, uint64_t, int);
 struct socket *syn_cache_get(struct sockaddr *, struct sockaddr *,
 		struct tcphdr *, unsigned int, unsigned int, struct socket *,
-		struct mbuf *, uint64_t);
+		struct mbuf *, uint64_t, int);
 struct syn_cache *syn_cache_lookup(const struct sockaddr *,
 		const struct sockaddr *, struct syn_cache_head **, u_int);
 
@@ -383,6 +383,7 @@ tcp_input(struct mbuf **mp, int *offp, i
 #ifdef INET6
 	struct ip6_hdr *ip6 = NULL;
 #endif /* INET6 */
+	int do_ecn = 0;
 #ifdef TCP_ECN
 	u_char iptos;
 #endif
@@ -392,6 +393,9 @@ tcp_input(struct mbuf **mp, int *offp, i
 	opti.ts_present = 0;
 	opti.maxseg = 0;
 	now = tcp_now();
+#ifdef TCP_ECN
+	do_ecn = atomic_load_int(&tcp_do_ecn);
+#endif
 
 	/*
 	 * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
@@ -704,7 +708,7 @@ findpcb:
 
 			case TH_ACK:
 				so = syn_cache_get(&src.sa, &dst.sa,
-				    th, iphlen, tlen, so, m, now);
+				    th, iphlen, tlen, so, m, now, do_ecn);
 				if (so == NULL) {
 					/*
 					 * We don't have a SYN for
@@ -836,8 +840,8 @@ findpcb:
 				 */
 				if (so->so_qlen > so->so_qlimit ||
 				    syn_cache_add(&src.sa, &dst.sa, th, iphlen,
-				    so, m, optp, optlen, &opti, reuse, now)
-				    == -1) {
+				    so, m, optp, optlen, &opti, reuse, now,
+				    do_ecn) == -1) {
 					tcpstat_inc(tcps_dropsyn);
 					goto drop;
 				}
@@ -1131,7 +1135,7 @@ findpcb:
 		if (tiflags & TH_RST) {
 #ifdef TCP_ECN
 			/* if ECN is enabled, fall back to non-ecn at rexmit */
-			if (tcp_do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
+			if (do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
 				goto drop;
 #endif
 			if (tiflags & TH_ACK)
@@ -1166,7 +1170,7 @@ findpcb:
 		 * both ECE and CWR are set for simultaneous open,
 		 * peer is ECN capable.
 		 */
-		if (tcp_do_ecn) {
+		if (do_ecn) {
 			switch (tiflags & (TH_ACK|TH_ECE|TH_CWR)) {
 			case TH_ACK|TH_ECE:
 			case TH_ECE|TH_CWR:
@@ -1412,7 +1416,7 @@ trimthenstep6:
 		case TCPS_SYN_RECEIVED:
 #ifdef TCP_ECN
 			/* if ECN is enabled, fall back to non-ecn at rexmit */
-			if (tcp_do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
+			if (do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
 				goto drop;
 #endif
 			so->so_error = ECONNREFUSED;
@@ -1503,7 +1507,7 @@ trimthenstep6:
 		 * advance snd_last to snd_max not to reduce cwnd again
 		 * until all outstanding packets are acked.
 		 */
-		if (tcp_do_ecn && (tiflags & TH_ECE)) {
+		if (do_ecn && (tiflags & TH_ECE)) {
 			if ((tp->t_flags & TF_ECN_PERMIT) &&
 			    SEQ_GEQ(tp->snd_una, tp->snd_last)) {
 				u_int win;
@@ -3373,7 +3377,7 @@ syn_cache_timer(void *arg)
 	struct inpcb *inp;
 	struct socket *so;
 	uint64_t now;
-	int lastref;
+	int lastref, do_ecn = 0;
 
 	mtx_enter(&syn_cache_mtx);
 	if (ISSET(sc->sc_dynflags, SCF_DEAD))
@@ -3409,7 +3413,10 @@ syn_cache_timer(void *arg)
 	so = in_pcbsolock(inp);
 	if (so != NULL) {
 		now = tcp_now();
-		(void) syn_cache_respond(sc, NULL, now);
+#ifdef TCP_ECN
+		do_ecn = atomic_load_int(&tcp_do_ecn);
+#endif
+		(void) syn_cache_respond(sc, NULL, now, do_ecn);
 		tcpstat_inc(tcps_sc_retransmitted);
 		in_pcbsounlock(inp, so);
 	}
@@ -3516,7 +3523,8 @@ syn_cache_lookup(const struct sockaddr *
  */
 struct socket *
 syn_cache_get(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th,
-    u_int hlen, u_int tlen, struct socket *so, struct mbuf *m, uint64_t now)
+    u_int hlen, u_int tlen, struct socket *so, struct mbuf *m, uint64_t now,
+    int do_ecn)
 {
 	struct syn_cache *sc;
 	struct syn_cache_head *scp;
@@ -3544,7 +3552,7 @@ syn_cache_get(struct sockaddr *src, stru
 	    SEQ_GT(th->th_seq, sc->sc_irs + 1 + sc->sc_win)) {
 		refcnt_take(&sc->sc_refcnt);
 		mtx_leave(&syn_cache_mtx);
-		(void) syn_cache_respond(sc, m, now);
+		(void) syn_cache_respond(sc, m, now, do_ecn);
 		syn_cache_put(sc);
 		return ((struct socket *)(-1));
 	}
@@ -3794,7 +3802,7 @@ syn_cache_unreach(const struct sockaddr 
 int
 syn_cache_add(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th,
     u_int iphlen, struct socket *so, struct mbuf *m, u_char *optp, int optlen,
-    struct tcp_opt_info *oi, tcp_seq *issp, uint64_t now)
+    struct tcp_opt_info *oi, tcp_seq *issp, uint64_t now, int do_ecn)
 {
 	struct tcpcb tb, *tp;
 	long win;
@@ -3870,7 +3878,7 @@ syn_cache_add(struct sockaddr *src, stru
 			sc->sc_ipopts = ipopts;
 		}
 		sc->sc_timestamp = tb.ts_recent;
-		if (syn_cache_respond(sc, m, now) == 0) {
+		if (syn_cache_respond(sc, m, now, do_ecn) == 0) {
 			tcpstat_inc(tcps_sndacks);
 			tcpstat_inc(tcps_sndtotal);
 		}
@@ -3943,8 +3951,7 @@ syn_cache_add(struct sockaddr *src, stru
 	/*
 	 * if both ECE and CWR flag bits are set, peer is ECN capable.
 	 */
-	if (tcp_do_ecn &&
-	    (th->th_flags & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR))
+	if (do_ecn && (th->th_flags & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR))
 		SET(sc->sc_fixflags, SCF_ECN_PERMIT);
 #endif
 	/*
@@ -3958,7 +3965,7 @@ syn_cache_add(struct sockaddr *src, stru
 		SET(sc->sc_fixflags, SCF_SIGNATURE);
 #endif
 	sc->sc_inplisten = in_pcbref(tp->t_inpcb);
-	if (syn_cache_respond(sc, m, now) == 0) {
+	if (syn_cache_respond(sc, m, now, do_ecn) == 0) {
 		mtx_enter(&syn_cache_mtx);
 		/*
 		 * XXXSMP Currently exclusive netlock prevents another insert
@@ -3979,7 +3986,8 @@ syn_cache_add(struct sockaddr *src, stru
 }
 
 int
-syn_cache_respond(struct syn_cache *sc, struct mbuf *m, uint64_t now)
+syn_cache_respond(struct syn_cache *sc, struct mbuf *m, uint64_t now,
+    int do_ecn)
 {
 	u_int8_t *optp;
 	int optlen, error;
@@ -4073,7 +4081,7 @@ syn_cache_respond(struct syn_cache *sc, 
 	th->th_flags = TH_SYN|TH_ACK;
 #ifdef TCP_ECN
 	/* Set ECE for SYN-ACK if peer supports ECN. */
-	if (tcp_do_ecn && ISSET(sc->sc_fixflags, SCF_ECN_PERMIT))
+	if (do_ecn && ISSET(sc->sc_fixflags, SCF_ECN_PERMIT))
 		th->th_flags |= TH_ECE;
 #endif
 	th->th_win = htons(sc->sc_win);
Index: netinet/tcp_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_output.c,v
diff -u -p -r1.150 tcp_output.c
--- netinet/tcp_output.c	1 Jan 2025 13:44:22 -0000	1.150
+++ netinet/tcp_output.c	5 Jan 2025 11:16:54 -0000
@@ -208,6 +208,7 @@ tcp_output(struct tcpcb *tp)
 	unsigned int sigoff;
 #endif /* TCP_SIGNATURE */
 #ifdef TCP_ECN
+	int do_ecn = atomic_load_int(&tcp_do_ecn);
 	int needect;
 #endif
 	int tso;
@@ -821,7 +822,7 @@ send:
 		th->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
 	}
 #ifdef TCP_ECN
-	if (tcp_do_ecn) {
+	if (do_ecn) {
 		/*
 		 * if we have received congestion experienced segs,
 		 * set ECE bit.
@@ -1054,7 +1055,7 @@ send:
 	 * but don't set ECT for a pure ack, a retransmit or a window probe.
 	 */
 	needect = 0;
-	if (tcp_do_ecn && (tp->t_flags & TF_ECN_PERMIT)) {
+	if (do_ecn && (tp->t_flags & TF_ECN_PERMIT)) {
 		if (len == 0 || SEQ_LT(tp->snd_nxt, tp->snd_max) ||
 		    (tp->t_force && len == 1)) {
 			/* don't set ECT */
Index: netinet/tcp_timer.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.c,v
diff -u -p -r1.79 tcp_timer.c
--- netinet/tcp_timer.c	3 Jan 2025 17:23:51 -0000	1.79
+++ netinet/tcp_timer.c	5 Jan 2025 11:16:54 -0000
@@ -343,7 +343,7 @@ tcp_timer_rexmt(void *arg)
 	 * blocks ecn packets.  fall back to non-ecn.
 	 */
 	if ((tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED)
-	    && tcp_do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
+	    && atomic_load_int(&tcp_do_ecn) && !(tp->t_flags & TF_DISABLE_ECN))
 		tp->t_flags |= TF_DISABLE_ECN;
 #endif
 	/*
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v
diff -u -p -r1.237 tcp_usrreq.c
--- netinet/tcp_usrreq.c	4 Jan 2025 15:57:02 -0000	1.237
+++ netinet/tcp_usrreq.c	5 Jan 2025 11:16:54 -0000
@@ -1506,10 +1506,8 @@ tcp_sysctl(int *name, u_int namelen, voi
 		return (error);
 
 	default:
-		NET_LOCK();
 		error = sysctl_bounded_arr(tcpctl_vars, nitems(tcpctl_vars),
 		    name, namelen, oldp, oldlenp, newp, newlen);
-		NET_UNLOCK();
 		return (error);
 	}
 	/* NOTREACHED */
Index: netinet/tcp_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
diff -u -p -r1.183 tcp_var.h
--- netinet/tcp_var.h	3 Jan 2025 17:23:51 -0000	1.183
+++ netinet/tcp_var.h	5 Jan 2025 11:16:54 -0000
@@ -686,7 +686,7 @@ extern	int tcp_ack_on_push;	/* [a] ACK i
 extern	int tcp_do_sack;	/* [a] SACK enabled/disabled */
 extern	struct pool sackhl_pool;
 extern	int tcp_sackhole_limit;	/* max entries for tcp sack queues */
-extern	int tcp_do_ecn;		/* RFC3168 ECN enabled/disabled? */
+extern	int tcp_do_ecn;		/* [a] RFC3168 ECN enabled/disabled? */
 extern	int tcp_do_rfc3390;	/* [a] RFC3390 Increasing TCP Initial Window */
 extern	int tcp_do_tso;		/* [a] enable TSO for TCP output packets */