Index | Thread | Search

From:
Vitaliy Makkoveev <otto@bsdbox.dev>
Subject:
Re: tcp sysctl tcp_do_ecn atomic
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
OpenBSD tech <tech@openbsd.org>
Date:
Sun, 5 Jan 2025 14:40:35 +0300

Download raw body.

Thread
> On 5 Jan 2025, at 14:24, Alexander Bluhm <bluhm@openbsd.org> wrote:
> 
> On Thu, Jan 02, 2025 at 06:20:49PM +0100, Alexander Bluhm wrote:
>> Hi,
>> 
>> This diff makes tcp_do_ecn atomic by reading it only once during
>> packet processing.
>> 
>> As was the final non-atomic variable, we can remove the net lock
>> for integers in tcp_sysctl().
>> 
>> ok?
>> 
>> bluhm
> 
> rebased to -current
> 

ok mvs

> Index: netinet/tcp_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
> diff -u -p -r1.419 tcp_input.c
> --- netinet/tcp_input.c	3 Jan 2025 21:27:40 -0000	1.419
> +++ netinet/tcp_input.c	5 Jan 2025 11:16:54 -0000
> @@ -183,17 +183,17 @@ void	 tcp_newreno_partialack(struct tcpc
> 
> void	 syn_cache_put(struct syn_cache *);
> void	 syn_cache_rm(struct syn_cache *);
> -int	 syn_cache_respond(struct syn_cache *, struct mbuf *, uint64_t);
> +int	 syn_cache_respond(struct syn_cache *, struct mbuf *, uint64_t, int);
> void	 syn_cache_timer(void *);
> void	 syn_cache_insert(struct syn_cache *, struct tcpcb *);
> void	 syn_cache_reset(struct sockaddr *, struct sockaddr *,
> 		struct tcphdr *, u_int);
> int	 syn_cache_add(struct sockaddr *, struct sockaddr *, struct tcphdr *,
> 		unsigned int, struct socket *, struct mbuf *, u_char *, int,
> -		struct tcp_opt_info *, tcp_seq *, uint64_t);
> +		struct tcp_opt_info *, tcp_seq *, uint64_t, int);
> struct socket *syn_cache_get(struct sockaddr *, struct sockaddr *,
> 		struct tcphdr *, unsigned int, unsigned int, struct socket *,
> -		struct mbuf *, uint64_t);
> +		struct mbuf *, uint64_t, int);
> struct syn_cache *syn_cache_lookup(const struct sockaddr *,
> 		const struct sockaddr *, struct syn_cache_head **, u_int);
> 
> @@ -383,6 +383,7 @@ tcp_input(struct mbuf **mp, int *offp, i
> #ifdef INET6
> 	struct ip6_hdr *ip6 = NULL;
> #endif /* INET6 */
> +	int do_ecn = 0;
> #ifdef TCP_ECN
> 	u_char iptos;
> #endif
> @@ -392,6 +393,9 @@ tcp_input(struct mbuf **mp, int *offp, i
> 	opti.ts_present = 0;
> 	opti.maxseg = 0;
> 	now = tcp_now();
> +#ifdef TCP_ECN
> +	do_ecn = atomic_load_int(&tcp_do_ecn);
> +#endif
> 
> 	/*
> 	 * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
> @@ -704,7 +708,7 @@ findpcb:
> 
> 			case TH_ACK:
> 				so = syn_cache_get(&src.sa, &dst.sa,
> -				    th, iphlen, tlen, so, m, now);
> +				    th, iphlen, tlen, so, m, now, do_ecn);
> 				if (so == NULL) {
> 					/*
> 					 * We don't have a SYN for
> @@ -836,8 +840,8 @@ findpcb:
> 				 */
> 				if (so->so_qlen > so->so_qlimit ||
> 				    syn_cache_add(&src.sa, &dst.sa, th, iphlen,
> -				    so, m, optp, optlen, &opti, reuse, now)
> -				    == -1) {
> +				    so, m, optp, optlen, &opti, reuse, now,
> +				    do_ecn) == -1) {
> 					tcpstat_inc(tcps_dropsyn);
> 					goto drop;
> 				}
> @@ -1131,7 +1135,7 @@ findpcb:
> 		if (tiflags & TH_RST) {
> #ifdef TCP_ECN
> 			/* if ECN is enabled, fall back to non-ecn at rexmit */
> -			if (tcp_do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
> +			if (do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
> 				goto drop;
> #endif
> 			if (tiflags & TH_ACK)
> @@ -1166,7 +1170,7 @@ findpcb:
> 		 * both ECE and CWR are set for simultaneous open,
> 		 * peer is ECN capable.
> 		 */
> -		if (tcp_do_ecn) {
> +		if (do_ecn) {
> 			switch (tiflags & (TH_ACK|TH_ECE|TH_CWR)) {
> 			case TH_ACK|TH_ECE:
> 			case TH_ECE|TH_CWR:
> @@ -1412,7 +1416,7 @@ trimthenstep6:
> 		case TCPS_SYN_RECEIVED:
> #ifdef TCP_ECN
> 			/* if ECN is enabled, fall back to non-ecn at rexmit */
> -			if (tcp_do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
> +			if (do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
> 				goto drop;
> #endif
> 			so->so_error = ECONNREFUSED;
> @@ -1503,7 +1507,7 @@ trimthenstep6:
> 		 * advance snd_last to snd_max not to reduce cwnd again
> 		 * until all outstanding packets are acked.
> 		 */
> -		if (tcp_do_ecn && (tiflags & TH_ECE)) {
> +		if (do_ecn && (tiflags & TH_ECE)) {
> 			if ((tp->t_flags & TF_ECN_PERMIT) &&
> 			    SEQ_GEQ(tp->snd_una, tp->snd_last)) {
> 				u_int win;
> @@ -3373,7 +3377,7 @@ syn_cache_timer(void *arg)
> 	struct inpcb *inp;
> 	struct socket *so;
> 	uint64_t now;
> -	int lastref;
> +	int lastref, do_ecn = 0;
> 
> 	mtx_enter(&syn_cache_mtx);
> 	if (ISSET(sc->sc_dynflags, SCF_DEAD))
> @@ -3409,7 +3413,10 @@ syn_cache_timer(void *arg)
> 	so = in_pcbsolock(inp);
> 	if (so != NULL) {
> 		now = tcp_now();
> -		(void) syn_cache_respond(sc, NULL, now);
> +#ifdef TCP_ECN
> +		do_ecn = atomic_load_int(&tcp_do_ecn);
> +#endif
> +		(void) syn_cache_respond(sc, NULL, now, do_ecn);
> 		tcpstat_inc(tcps_sc_retransmitted);
> 		in_pcbsounlock(inp, so);
> 	}
> @@ -3516,7 +3523,8 @@ syn_cache_lookup(const struct sockaddr *
>  */
> struct socket *
> syn_cache_get(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th,
> -    u_int hlen, u_int tlen, struct socket *so, struct mbuf *m, uint64_t now)
> +    u_int hlen, u_int tlen, struct socket *so, struct mbuf *m, uint64_t now,
> +    int do_ecn)
> {
> 	struct syn_cache *sc;
> 	struct syn_cache_head *scp;
> @@ -3544,7 +3552,7 @@ syn_cache_get(struct sockaddr *src, stru
> 	    SEQ_GT(th->th_seq, sc->sc_irs + 1 + sc->sc_win)) {
> 		refcnt_take(&sc->sc_refcnt);
> 		mtx_leave(&syn_cache_mtx);
> -		(void) syn_cache_respond(sc, m, now);
> +		(void) syn_cache_respond(sc, m, now, do_ecn);
> 		syn_cache_put(sc);
> 		return ((struct socket *)(-1));
> 	}
> @@ -3794,7 +3802,7 @@ syn_cache_unreach(const struct sockaddr 
> int
> syn_cache_add(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th,
>     u_int iphlen, struct socket *so, struct mbuf *m, u_char *optp, int optlen,
> -    struct tcp_opt_info *oi, tcp_seq *issp, uint64_t now)
> +    struct tcp_opt_info *oi, tcp_seq *issp, uint64_t now, int do_ecn)
> {
> 	struct tcpcb tb, *tp;
> 	long win;
> @@ -3870,7 +3878,7 @@ syn_cache_add(struct sockaddr *src, stru
> 			sc->sc_ipopts = ipopts;
> 		}
> 		sc->sc_timestamp = tb.ts_recent;
> -		if (syn_cache_respond(sc, m, now) == 0) {
> +		if (syn_cache_respond(sc, m, now, do_ecn) == 0) {
> 			tcpstat_inc(tcps_sndacks);
> 			tcpstat_inc(tcps_sndtotal);
> 		}
> @@ -3943,8 +3951,7 @@ syn_cache_add(struct sockaddr *src, stru
> 	/*
> 	 * if both ECE and CWR flag bits are set, peer is ECN capable.
> 	 */
> -	if (tcp_do_ecn &&
> -	    (th->th_flags & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR))
> +	if (do_ecn && (th->th_flags & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR))
> 		SET(sc->sc_fixflags, SCF_ECN_PERMIT);
> #endif
> 	/*
> @@ -3958,7 +3965,7 @@ syn_cache_add(struct sockaddr *src, stru
> 		SET(sc->sc_fixflags, SCF_SIGNATURE);
> #endif
> 	sc->sc_inplisten = in_pcbref(tp->t_inpcb);
> -	if (syn_cache_respond(sc, m, now) == 0) {
> +	if (syn_cache_respond(sc, m, now, do_ecn) == 0) {
> 		mtx_enter(&syn_cache_mtx);
> 		/*
> 		 * XXXSMP Currently exclusive netlock prevents another insert
> @@ -3979,7 +3986,8 @@ syn_cache_add(struct sockaddr *src, stru
> }
> 
> int
> -syn_cache_respond(struct syn_cache *sc, struct mbuf *m, uint64_t now)
> +syn_cache_respond(struct syn_cache *sc, struct mbuf *m, uint64_t now,
> +    int do_ecn)
> {
> 	u_int8_t *optp;
> 	int optlen, error;
> @@ -4073,7 +4081,7 @@ syn_cache_respond(struct syn_cache *sc, 
> 	th->th_flags = TH_SYN|TH_ACK;
> #ifdef TCP_ECN
> 	/* Set ECE for SYN-ACK if peer supports ECN. */
> -	if (tcp_do_ecn && ISSET(sc->sc_fixflags, SCF_ECN_PERMIT))
> +	if (do_ecn && ISSET(sc->sc_fixflags, SCF_ECN_PERMIT))
> 		th->th_flags |= TH_ECE;
> #endif
> 	th->th_win = htons(sc->sc_win);
> Index: netinet/tcp_output.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_output.c,v
> diff -u -p -r1.150 tcp_output.c
> --- netinet/tcp_output.c	1 Jan 2025 13:44:22 -0000	1.150
> +++ netinet/tcp_output.c	5 Jan 2025 11:16:54 -0000
> @@ -208,6 +208,7 @@ tcp_output(struct tcpcb *tp)
> 	unsigned int sigoff;
> #endif /* TCP_SIGNATURE */
> #ifdef TCP_ECN
> +	int do_ecn = atomic_load_int(&tcp_do_ecn);
> 	int needect;
> #endif
> 	int tso;
> @@ -821,7 +822,7 @@ send:
> 		th->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
> 	}
> #ifdef TCP_ECN
> -	if (tcp_do_ecn) {
> +	if (do_ecn) {
> 		/*
> 		 * if we have received congestion experienced segs,
> 		 * set ECE bit.
> @@ -1054,7 +1055,7 @@ send:
> 	 * but don't set ECT for a pure ack, a retransmit or a window probe.
> 	 */
> 	needect = 0;
> -	if (tcp_do_ecn && (tp->t_flags & TF_ECN_PERMIT)) {
> +	if (do_ecn && (tp->t_flags & TF_ECN_PERMIT)) {
> 		if (len == 0 || SEQ_LT(tp->snd_nxt, tp->snd_max) ||
> 		    (tp->t_force && len == 1)) {
> 			/* don't set ECT */
> Index: netinet/tcp_timer.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.c,v
> diff -u -p -r1.79 tcp_timer.c
> --- netinet/tcp_timer.c	3 Jan 2025 17:23:51 -0000	1.79
> +++ netinet/tcp_timer.c	5 Jan 2025 11:16:54 -0000
> @@ -343,7 +343,7 @@ tcp_timer_rexmt(void *arg)
> 	 * blocks ecn packets.  fall back to non-ecn.
> 	 */
> 	if ((tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED)
> -	    && tcp_do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
> +	    && atomic_load_int(&tcp_do_ecn) && !(tp->t_flags & TF_DISABLE_ECN))
> 		tp->t_flags |= TF_DISABLE_ECN;
> #endif
> 	/*
> Index: netinet/tcp_usrreq.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v
> diff -u -p -r1.237 tcp_usrreq.c
> --- netinet/tcp_usrreq.c	4 Jan 2025 15:57:02 -0000	1.237
> +++ netinet/tcp_usrreq.c	5 Jan 2025 11:16:54 -0000
> @@ -1506,10 +1506,8 @@ tcp_sysctl(int *name, u_int namelen, voi
> 		return (error);
> 
> 	default:
> -		NET_LOCK();
> 		error = sysctl_bounded_arr(tcpctl_vars, nitems(tcpctl_vars),
> 		    name, namelen, oldp, oldlenp, newp, newlen);
> -		NET_UNLOCK();
> 		return (error);
> 	}
> 	/* NOTREACHED */
> Index: netinet/tcp_var.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
> diff -u -p -r1.183 tcp_var.h
> --- netinet/tcp_var.h	3 Jan 2025 17:23:51 -0000	1.183
> +++ netinet/tcp_var.h	5 Jan 2025 11:16:54 -0000
> @@ -686,7 +686,7 @@ extern	int tcp_ack_on_push;	/* [a] ACK i
> extern	int tcp_do_sack;	/* [a] SACK enabled/disabled */
> extern	struct pool sackhl_pool;
> extern	int tcp_sackhole_limit;	/* max entries for tcp sack queues */
> -extern	int tcp_do_ecn;		/* RFC3168 ECN enabled/disabled? */
> +extern	int tcp_do_ecn;		/* [a] RFC3168 ECN enabled/disabled? */
> extern	int tcp_do_rfc3390;	/* [a] RFC3390 Increasing TCP Initial Window */
> extern	int tcp_do_tso;		/* [a] enable TSO for TCP output packets */