Download raw body.
tcp sysctl tcp_do_ecn atomic
> On 5 Jan 2025, at 14:24, Alexander Bluhm <bluhm@openbsd.org> wrote:
>
> On Thu, Jan 02, 2025 at 06:20:49PM +0100, Alexander Bluhm wrote:
>> Hi,
>>
>> This diff makes tcp_do_ecn atomic by reading it only once during
>> packet processing.
>>
>> As was the final non-atomic variable, we can remove the net lock
>> for integers in tcp_sysctl().
>>
>> ok?
>>
>> bluhm
>
> rebased to -current
>
ok mvs
> Index: netinet/tcp_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
> diff -u -p -r1.419 tcp_input.c
> --- netinet/tcp_input.c 3 Jan 2025 21:27:40 -0000 1.419
> +++ netinet/tcp_input.c 5 Jan 2025 11:16:54 -0000
> @@ -183,17 +183,17 @@ void tcp_newreno_partialack(struct tcpc
>
> void syn_cache_put(struct syn_cache *);
> void syn_cache_rm(struct syn_cache *);
> -int syn_cache_respond(struct syn_cache *, struct mbuf *, uint64_t);
> +int syn_cache_respond(struct syn_cache *, struct mbuf *, uint64_t, int);
> void syn_cache_timer(void *);
> void syn_cache_insert(struct syn_cache *, struct tcpcb *);
> void syn_cache_reset(struct sockaddr *, struct sockaddr *,
> struct tcphdr *, u_int);
> int syn_cache_add(struct sockaddr *, struct sockaddr *, struct tcphdr *,
> unsigned int, struct socket *, struct mbuf *, u_char *, int,
> - struct tcp_opt_info *, tcp_seq *, uint64_t);
> + struct tcp_opt_info *, tcp_seq *, uint64_t, int);
> struct socket *syn_cache_get(struct sockaddr *, struct sockaddr *,
> struct tcphdr *, unsigned int, unsigned int, struct socket *,
> - struct mbuf *, uint64_t);
> + struct mbuf *, uint64_t, int);
> struct syn_cache *syn_cache_lookup(const struct sockaddr *,
> const struct sockaddr *, struct syn_cache_head **, u_int);
>
> @@ -383,6 +383,7 @@ tcp_input(struct mbuf **mp, int *offp, i
> #ifdef INET6
> struct ip6_hdr *ip6 = NULL;
> #endif /* INET6 */
> + int do_ecn = 0;
> #ifdef TCP_ECN
> u_char iptos;
> #endif
> @@ -392,6 +393,9 @@ tcp_input(struct mbuf **mp, int *offp, i
> opti.ts_present = 0;
> opti.maxseg = 0;
> now = tcp_now();
> +#ifdef TCP_ECN
> + do_ecn = atomic_load_int(&tcp_do_ecn);
> +#endif
>
> /*
> * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
> @@ -704,7 +708,7 @@ findpcb:
>
> case TH_ACK:
> so = syn_cache_get(&src.sa, &dst.sa,
> - th, iphlen, tlen, so, m, now);
> + th, iphlen, tlen, so, m, now, do_ecn);
> if (so == NULL) {
> /*
> * We don't have a SYN for
> @@ -836,8 +840,8 @@ findpcb:
> */
> if (so->so_qlen > so->so_qlimit ||
> syn_cache_add(&src.sa, &dst.sa, th, iphlen,
> - so, m, optp, optlen, &opti, reuse, now)
> - == -1) {
> + so, m, optp, optlen, &opti, reuse, now,
> + do_ecn) == -1) {
> tcpstat_inc(tcps_dropsyn);
> goto drop;
> }
> @@ -1131,7 +1135,7 @@ findpcb:
> if (tiflags & TH_RST) {
> #ifdef TCP_ECN
> /* if ECN is enabled, fall back to non-ecn at rexmit */
> - if (tcp_do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
> + if (do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
> goto drop;
> #endif
> if (tiflags & TH_ACK)
> @@ -1166,7 +1170,7 @@ findpcb:
> * both ECE and CWR are set for simultaneous open,
> * peer is ECN capable.
> */
> - if (tcp_do_ecn) {
> + if (do_ecn) {
> switch (tiflags & (TH_ACK|TH_ECE|TH_CWR)) {
> case TH_ACK|TH_ECE:
> case TH_ECE|TH_CWR:
> @@ -1412,7 +1416,7 @@ trimthenstep6:
> case TCPS_SYN_RECEIVED:
> #ifdef TCP_ECN
> /* if ECN is enabled, fall back to non-ecn at rexmit */
> - if (tcp_do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
> + if (do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
> goto drop;
> #endif
> so->so_error = ECONNREFUSED;
> @@ -1503,7 +1507,7 @@ trimthenstep6:
> * advance snd_last to snd_max not to reduce cwnd again
> * until all outstanding packets are acked.
> */
> - if (tcp_do_ecn && (tiflags & TH_ECE)) {
> + if (do_ecn && (tiflags & TH_ECE)) {
> if ((tp->t_flags & TF_ECN_PERMIT) &&
> SEQ_GEQ(tp->snd_una, tp->snd_last)) {
> u_int win;
> @@ -3373,7 +3377,7 @@ syn_cache_timer(void *arg)
> struct inpcb *inp;
> struct socket *so;
> uint64_t now;
> - int lastref;
> + int lastref, do_ecn = 0;
>
> mtx_enter(&syn_cache_mtx);
> if (ISSET(sc->sc_dynflags, SCF_DEAD))
> @@ -3409,7 +3413,10 @@ syn_cache_timer(void *arg)
> so = in_pcbsolock(inp);
> if (so != NULL) {
> now = tcp_now();
> - (void) syn_cache_respond(sc, NULL, now);
> +#ifdef TCP_ECN
> + do_ecn = atomic_load_int(&tcp_do_ecn);
> +#endif
> + (void) syn_cache_respond(sc, NULL, now, do_ecn);
> tcpstat_inc(tcps_sc_retransmitted);
> in_pcbsounlock(inp, so);
> }
> @@ -3516,7 +3523,8 @@ syn_cache_lookup(const struct sockaddr *
> */
> struct socket *
> syn_cache_get(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th,
> - u_int hlen, u_int tlen, struct socket *so, struct mbuf *m, uint64_t now)
> + u_int hlen, u_int tlen, struct socket *so, struct mbuf *m, uint64_t now,
> + int do_ecn)
> {
> struct syn_cache *sc;
> struct syn_cache_head *scp;
> @@ -3544,7 +3552,7 @@ syn_cache_get(struct sockaddr *src, stru
> SEQ_GT(th->th_seq, sc->sc_irs + 1 + sc->sc_win)) {
> refcnt_take(&sc->sc_refcnt);
> mtx_leave(&syn_cache_mtx);
> - (void) syn_cache_respond(sc, m, now);
> + (void) syn_cache_respond(sc, m, now, do_ecn);
> syn_cache_put(sc);
> return ((struct socket *)(-1));
> }
> @@ -3794,7 +3802,7 @@ syn_cache_unreach(const struct sockaddr
> int
> syn_cache_add(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th,
> u_int iphlen, struct socket *so, struct mbuf *m, u_char *optp, int optlen,
> - struct tcp_opt_info *oi, tcp_seq *issp, uint64_t now)
> + struct tcp_opt_info *oi, tcp_seq *issp, uint64_t now, int do_ecn)
> {
> struct tcpcb tb, *tp;
> long win;
> @@ -3870,7 +3878,7 @@ syn_cache_add(struct sockaddr *src, stru
> sc->sc_ipopts = ipopts;
> }
> sc->sc_timestamp = tb.ts_recent;
> - if (syn_cache_respond(sc, m, now) == 0) {
> + if (syn_cache_respond(sc, m, now, do_ecn) == 0) {
> tcpstat_inc(tcps_sndacks);
> tcpstat_inc(tcps_sndtotal);
> }
> @@ -3943,8 +3951,7 @@ syn_cache_add(struct sockaddr *src, stru
> /*
> * if both ECE and CWR flag bits are set, peer is ECN capable.
> */
> - if (tcp_do_ecn &&
> - (th->th_flags & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR))
> + if (do_ecn && (th->th_flags & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR))
> SET(sc->sc_fixflags, SCF_ECN_PERMIT);
> #endif
> /*
> @@ -3958,7 +3965,7 @@ syn_cache_add(struct sockaddr *src, stru
> SET(sc->sc_fixflags, SCF_SIGNATURE);
> #endif
> sc->sc_inplisten = in_pcbref(tp->t_inpcb);
> - if (syn_cache_respond(sc, m, now) == 0) {
> + if (syn_cache_respond(sc, m, now, do_ecn) == 0) {
> mtx_enter(&syn_cache_mtx);
> /*
> * XXXSMP Currently exclusive netlock prevents another insert
> @@ -3979,7 +3986,8 @@ syn_cache_add(struct sockaddr *src, stru
> }
>
> int
> -syn_cache_respond(struct syn_cache *sc, struct mbuf *m, uint64_t now)
> +syn_cache_respond(struct syn_cache *sc, struct mbuf *m, uint64_t now,
> + int do_ecn)
> {
> u_int8_t *optp;
> int optlen, error;
> @@ -4073,7 +4081,7 @@ syn_cache_respond(struct syn_cache *sc,
> th->th_flags = TH_SYN|TH_ACK;
> #ifdef TCP_ECN
> /* Set ECE for SYN-ACK if peer supports ECN. */
> - if (tcp_do_ecn && ISSET(sc->sc_fixflags, SCF_ECN_PERMIT))
> + if (do_ecn && ISSET(sc->sc_fixflags, SCF_ECN_PERMIT))
> th->th_flags |= TH_ECE;
> #endif
> th->th_win = htons(sc->sc_win);
> Index: netinet/tcp_output.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_output.c,v
> diff -u -p -r1.150 tcp_output.c
> --- netinet/tcp_output.c 1 Jan 2025 13:44:22 -0000 1.150
> +++ netinet/tcp_output.c 5 Jan 2025 11:16:54 -0000
> @@ -208,6 +208,7 @@ tcp_output(struct tcpcb *tp)
> unsigned int sigoff;
> #endif /* TCP_SIGNATURE */
> #ifdef TCP_ECN
> + int do_ecn = atomic_load_int(&tcp_do_ecn);
> int needect;
> #endif
> int tso;
> @@ -821,7 +822,7 @@ send:
> th->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
> }
> #ifdef TCP_ECN
> - if (tcp_do_ecn) {
> + if (do_ecn) {
> /*
> * if we have received congestion experienced segs,
> * set ECE bit.
> @@ -1054,7 +1055,7 @@ send:
> * but don't set ECT for a pure ack, a retransmit or a window probe.
> */
> needect = 0;
> - if (tcp_do_ecn && (tp->t_flags & TF_ECN_PERMIT)) {
> + if (do_ecn && (tp->t_flags & TF_ECN_PERMIT)) {
> if (len == 0 || SEQ_LT(tp->snd_nxt, tp->snd_max) ||
> (tp->t_force && len == 1)) {
> /* don't set ECT */
> Index: netinet/tcp_timer.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.c,v
> diff -u -p -r1.79 tcp_timer.c
> --- netinet/tcp_timer.c 3 Jan 2025 17:23:51 -0000 1.79
> +++ netinet/tcp_timer.c 5 Jan 2025 11:16:54 -0000
> @@ -343,7 +343,7 @@ tcp_timer_rexmt(void *arg)
> * blocks ecn packets. fall back to non-ecn.
> */
> if ((tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED)
> - && tcp_do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
> + && atomic_load_int(&tcp_do_ecn) && !(tp->t_flags & TF_DISABLE_ECN))
> tp->t_flags |= TF_DISABLE_ECN;
> #endif
> /*
> Index: netinet/tcp_usrreq.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v
> diff -u -p -r1.237 tcp_usrreq.c
> --- netinet/tcp_usrreq.c 4 Jan 2025 15:57:02 -0000 1.237
> +++ netinet/tcp_usrreq.c 5 Jan 2025 11:16:54 -0000
> @@ -1506,10 +1506,8 @@ tcp_sysctl(int *name, u_int namelen, voi
> return (error);
>
> default:
> - NET_LOCK();
> error = sysctl_bounded_arr(tcpctl_vars, nitems(tcpctl_vars),
> name, namelen, oldp, oldlenp, newp, newlen);
> - NET_UNLOCK();
> return (error);
> }
> /* NOTREACHED */
> Index: netinet/tcp_var.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
> diff -u -p -r1.183 tcp_var.h
> --- netinet/tcp_var.h 3 Jan 2025 17:23:51 -0000 1.183
> +++ netinet/tcp_var.h 5 Jan 2025 11:16:54 -0000
> @@ -686,7 +686,7 @@ extern int tcp_ack_on_push; /* [a] ACK i
> extern int tcp_do_sack; /* [a] SACK enabled/disabled */
> extern struct pool sackhl_pool;
> extern int tcp_sackhole_limit; /* max entries for tcp sack queues */
> -extern int tcp_do_ecn; /* RFC3168 ECN enabled/disabled? */
> +extern int tcp_do_ecn; /* [a] RFC3168 ECN enabled/disabled? */
> extern int tcp_do_rfc3390; /* [a] RFC3390 Increasing TCP Initial Window */
> extern int tcp_do_tso; /* [a] enable TSO for TCP output packets */
tcp sysctl tcp_do_ecn atomic