From: Alexander Bluhm Subject: tcp mss MP fixes To: tech@openbsd.org Date: Fri, 8 Nov 2024 23:35:03 +0100 Hi, This diff makes tcp_mss() MP safe assuming it is called with socket lock. Calling in_pcbrtentry() changes the inpcb route cache. This needs socket lock. Global sysctl variable tcp_mssdflt is read in multiple places, so mark it as net locked. I should be made atomic later. rt->rt_mtu is written in multiple places. I think making it atomic is the way to go. Access it with READ_ONCE() for now. Global sysctl tcp_do_rfc3390 variable is accessed only here, it uses atomic operations only. I decided to put the locking comment into the header file as they are already there. ok? bluhm Index: netinet/in_pcb.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v diff -u -p -r1.159 in_pcb.h --- netinet/in_pcb.h 5 Nov 2024 10:49:23 -0000 1.159 +++ netinet/in_pcb.h 8 Nov 2024 22:03:25 -0000 @@ -140,7 +140,7 @@ struct inpcb { u_int16_t inp_lport; /* [t] local port */ struct socket *inp_socket; /* [I] back pointer to socket */ caddr_t inp_ppcb; /* pointer to per-protocol pcb */ - struct route inp_route; /* cached route */ + struct route inp_route; /* [s] cached route */ struct refcnt inp_refcnt; /* refcount PCB, delay memory free */ int inp_flags; /* generic IP/datagram flags */ union { /* Header prototype. */ Index: netinet/tcp_input.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v diff -u -p -r1.408 tcp_input.c --- netinet/tcp_input.c 8 Nov 2024 21:40:39 -0000 1.408 +++ netinet/tcp_input.c 8 Nov 2024 22:16:55 -0000 @@ -2790,11 +2790,11 @@ tcp_xmit_timer(struct tcpcb *tp, int32_t int tcp_mss(struct tcpcb *tp, int offer) { + struct inpcb *inp; struct rtentry *rt; struct ifnet *ifp = NULL; - int mss, mssopt; - int iphlen; - struct inpcb *inp; + int mss, mssopt, iphlen, do_rfc3390; + u_int rtmtu; inp = tp->t_inpcb; @@ -2827,12 +2827,13 @@ tcp_mss(struct tcpcb *tp, int offer) * if there's an mtu associated with the route and we support * path MTU discovery for the underlying protocol family, use it. */ - if (rt->rt_mtu) { + rtmtu = READ_ONCE(rt->rt_mtu); + if (rtmtu) { /* * One may wish to lower MSS to take into account options, * especially security-related options. */ - if (tp->pf == AF_INET6 && rt->rt_mtu < IPV6_MMTU) { + if (tp->pf == AF_INET6 && rtmtu < IPV6_MMTU) { /* * RFC2460 section 5, last paragraph: if path MTU is * smaller than 1280, use 1280 as packet size and @@ -2841,8 +2842,7 @@ tcp_mss(struct tcpcb *tp, int offer) mss = IPV6_MMTU - iphlen - sizeof(struct ip6_frag) - sizeof(struct tcphdr); } else { - mss = rt->rt_mtu - iphlen - - sizeof(struct tcphdr); + mss = rtmtu - iphlen - sizeof(struct tcphdr); } } else if (ifp->if_flags & IFF_LOOPBACK) { mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr); @@ -2902,6 +2902,7 @@ tcp_mss(struct tcpcb *tp, int offer) mss -= TCPOLEN_SIGLEN; #endif + do_rfc3390 = atomic_load_int(&tcp_do_rfc3390); if (offer == -1) { /* mss changed due to Path MTU discovery */ tp->t_flags &= ~TF_PMTUD_PEND; @@ -2916,10 +2917,10 @@ tcp_mss(struct tcpcb *tp, int offer) tp->snd_cwnd = ulmax((tp->snd_cwnd / tp->t_maxseg) * mss, mss); } - } else if (tcp_do_rfc3390 == 2) { + } else if (do_rfc3390 == 2) { /* increase initial window */ tp->snd_cwnd = ulmin(10 * mss, ulmax(2 * mss, 14600)); - } else if (tcp_do_rfc3390) { + } else if (do_rfc3390) { /* increase initial window */ tp->snd_cwnd = ulmin(4 * mss, ulmax(2 * mss, 4380)); } else Index: netinet/tcp_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v diff -u -p -r1.178 tcp_var.h --- netinet/tcp_var.h 13 May 2024 01:15:53 -0000 1.178 +++ netinet/tcp_var.h 8 Nov 2024 22:07:37 -0000 @@ -679,14 +679,14 @@ extern struct pool tcpcb_pool; extern struct inpcbtable tcbtable, tcb6table; /* queue of active tcpcb's */ extern int tcp_do_rfc1323; /* enabled/disabled? */ extern int tcptv_keep_init; /* [N] time to keep alive initial SYN packet */ -extern int tcp_mssdflt; /* default maximum segment size */ +extern int tcp_mssdflt; /* [N] default maximum segment size */ extern int tcp_rst_ppslim; /* maximum outgoing RST packet per second */ extern int tcp_ack_on_push; /* ACK immediately on PUSH */ extern int tcp_do_sack; /* SACK enabled/disabled */ extern struct pool sackhl_pool; extern int tcp_sackhole_limit; /* max entries for tcp sack queues */ extern int tcp_do_ecn; /* RFC3168 ECN enabled/disabled? */ -extern int tcp_do_rfc3390; /* RFC3390 Increasing TCP's Initial Window */ +extern int tcp_do_rfc3390; /* [a] RFC3390 Increasing TCP Initial Window */ extern int tcp_do_tso; /* enable TSO for TCP output packets */ extern struct pool tcpqe_pool;