From: Vitaliy Makkoveev Subject: Re: make tcp_mss() MP safe To: Alexander Bluhm Cc: tech@openbsd.org Date: Mon, 23 Dec 2024 01:38:00 +0300 On Sat, Dec 21, 2024 at 12:50:37AM +0100, Alexander Bluhm wrote: > Hi, > > Make tcp_mss() MP safe so that it could be called with shared net > lock together with socket lock. > > Document that inp_route is protected by socket lock. > > Read tcp_mssdflt and tcp_do_rfc3390 atomically. Note that tcp_mssdflt > is used in more places, so it still needs net lock to be written. > > Address family must be AF_INET or AF_INET6. Panic if not. Makes > goto out a bit simpler. > > rt->rt_mtu is read once, another thread might modify it. > > Fix the signed vs. unsigned comparison with max() and min(). > > ok? > Hi, > @@ -679,14 +680,14 @@ extern struct pool tcpcb_pool; > extern struct inpcbtable tcbtable, tcb6table; /* queue of active tcpcb's */ > extern int tcp_do_rfc1323; /* enabled/disabled? */ > extern int tcptv_keep_init; /* [N] time to keep alive initial SYN packet */ > -extern int tcp_mssdflt; /* default maximum segment size */ > +extern int tcp_mssdflt; /* [N] default maximum segment size */ > extern int tcp_rst_ppslim; /* maximum outgoing RST packet per second */ Please, put locks description to the tcp_usrreq.c where variables are defined. Also `tcp_mssdflt' should be [a], not [N]. The rest is ok mvs@. > bluhm > > Index: netinet/in_pcb.h > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v > diff -u -p -r1.159 in_pcb.h > --- netinet/in_pcb.h 5 Nov 2024 10:49:23 -0000 1.159 > +++ netinet/in_pcb.h 20 Dec 2024 21:38:31 -0000 > @@ -140,7 +140,7 @@ struct inpcb { > u_int16_t inp_lport; /* [t] local port */ > struct socket *inp_socket; /* [I] back pointer to socket */ > caddr_t inp_ppcb; /* pointer to per-protocol pcb */ > - struct route inp_route; /* cached route */ > + struct route inp_route; /* [s] cached route */ > struct refcnt inp_refcnt; /* refcount PCB, delay memory free */ > int inp_flags; /* generic IP/datagram flags */ > union { /* Header prototype. */ > Index: netinet/tcp_input.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v > diff -u -p -r1.410 tcp_input.c > --- netinet/tcp_input.c 20 Dec 2024 19:20:34 -0000 1.410 > +++ netinet/tcp_input.c 20 Dec 2024 22:26:41 -0000 > @@ -2804,17 +2804,13 @@ int > tcp_mss(struct tcpcb *tp, int offer) > { > struct rtentry *rt; > - struct ifnet *ifp = NULL; > - int mss, mssopt; > - int iphlen; > - struct inpcb *inp; > + struct ifnet *ifp; > + int mss, mssopt, mssdflt, iphlen, do_rfc3390; > + u_int rtmtu; > > - inp = tp->t_inpcb; > - > - mssopt = mss = tcp_mssdflt; > - > - rt = in_pcbrtentry(inp); > + mss = mssopt = mssdflt = atomic_load_int(&tcp_mssdflt); > > + rt = in_pcbrtentry(tp->t_inpcb); > if (rt == NULL) > goto out; > > @@ -2823,29 +2819,29 @@ tcp_mss(struct tcpcb *tp, int offer) > goto out; > > switch (tp->pf) { > + case AF_INET: > + iphlen = sizeof(struct ip); > + break; > #ifdef INET6 > case AF_INET6: > iphlen = sizeof(struct ip6_hdr); > break; > #endif > - case AF_INET: > - iphlen = sizeof(struct ip); > - break; > default: > - /* the family does not support path MTU discovery */ > - goto out; > + unhandled_af(tp->pf); > } > > /* > * if there's an mtu associated with the route and we support > * path MTU discovery for the underlying protocol family, use it. > */ > - if (rt->rt_mtu) { > + rtmtu = READ_ONCE(rt->rt_mtu); > + if (rtmtu) { > /* > * One may wish to lower MSS to take into account options, > * especially security-related options. > */ > - if (tp->pf == AF_INET6 && rt->rt_mtu < IPV6_MMTU) { > + if (tp->pf == AF_INET6 && rtmtu < IPV6_MMTU) { > /* > * RFC2460 section 5, last paragraph: if path MTU is > * smaller than 1280, use 1280 as packet size and > @@ -2854,8 +2850,7 @@ tcp_mss(struct tcpcb *tp, int offer) > mss = IPV6_MMTU - iphlen - sizeof(struct ip6_frag) - > sizeof(struct tcphdr); > } else { > - mss = rt->rt_mtu - iphlen - > - sizeof(struct tcphdr); > + mss = rtmtu - iphlen - sizeof(struct tcphdr); > } > } else if (ifp->if_flags & IFF_LOOPBACK) { > mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr); > @@ -2876,10 +2871,10 @@ tcp_mss(struct tcpcb *tp, int offer) > /* Calculate the value that we offer in TCPOPT_MAXSEG */ > if (offer != -1) { > mssopt = ifp->if_mtu - iphlen - sizeof(struct tcphdr); > - mssopt = max(tcp_mssdflt, mssopt); > + mssopt = imax(mssopt, mssdflt); > } > - out: > if_put(ifp); > + out: > /* > * The current mss, t_maxseg, is initialized to the default value. > * If we compute a smaller value, reduce the current mss. > @@ -2893,10 +2888,10 @@ tcp_mss(struct tcpcb *tp, int offer) > if (offer > 0) > tp->t_peermss = offer; > if (tp->t_peermss) > - mss = min(mss, max(tp->t_peermss, 216)); > + mss = imin(mss, max(tp->t_peermss, 216)); > > /* sanity - at least max opt. space */ > - mss = max(mss, 64); > + mss = imax(mss, 64); > > /* > * maxopd stores the maximum length of data AND options > @@ -2915,6 +2910,7 @@ tcp_mss(struct tcpcb *tp, int offer) > mss -= TCPOLEN_SIGLEN; > #endif > > + do_rfc3390 = atomic_load_int(&tcp_do_rfc3390); > if (offer == -1) { > /* mss changed due to Path MTU discovery */ > tp->t_flags &= ~TF_PMTUD_PEND; > @@ -2929,10 +2925,10 @@ tcp_mss(struct tcpcb *tp, int offer) > tp->snd_cwnd = ulmax((tp->snd_cwnd / tp->t_maxseg) * > mss, mss); > } > - } else if (tcp_do_rfc3390 == 2) { > + } else if (do_rfc3390 == 2) { > /* increase initial window */ > tp->snd_cwnd = ulmin(10 * mss, ulmax(2 * mss, 14600)); > - } else if (tcp_do_rfc3390) { > + } else if (do_rfc3390) { > /* increase initial window */ > tp->snd_cwnd = ulmin(4 * mss, ulmax(2 * mss, 4380)); > } else > @@ -3020,7 +3016,6 @@ tcp_mss_update(struct tcpcb *tp) > (void)sbreserve(so, &so->so_rcv, bufsize); > } > mtx_leave(&so->so_rcv.sb_mtx); > - > } > > /* > Index: netinet/tcp_var.h > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v > diff -u -p -r1.178 tcp_var.h > --- netinet/tcp_var.h 13 May 2024 01:15:53 -0000 1.178 > +++ netinet/tcp_var.h 20 Dec 2024 22:22:06 -0000 > @@ -227,6 +227,7 @@ struct tcp_opt_info { > > /* > * Locks used to protect global data and struct members: > + * a atomic operations > * I immutable after creation > * N net lock > * S syn_cache_mtx tcp syn cache global mutex > @@ -679,14 +680,14 @@ extern struct pool tcpcb_pool; > extern struct inpcbtable tcbtable, tcb6table; /* queue of active tcpcb's */ > extern int tcp_do_rfc1323; /* enabled/disabled? */ > extern int tcptv_keep_init; /* [N] time to keep alive initial SYN packet */ > -extern int tcp_mssdflt; /* default maximum segment size */ > +extern int tcp_mssdflt; /* [N] default maximum segment size */ > extern int tcp_rst_ppslim; /* maximum outgoing RST packet per second */ > extern int tcp_ack_on_push; /* ACK immediately on PUSH */ > extern int tcp_do_sack; /* SACK enabled/disabled */ > extern struct pool sackhl_pool; > extern int tcp_sackhole_limit; /* max entries for tcp sack queues */ > extern int tcp_do_ecn; /* RFC3168 ECN enabled/disabled? */ > -extern int tcp_do_rfc3390; /* RFC3390 Increasing TCP's Initial Window */ > +extern int tcp_do_rfc3390; /* [a] RFC3390 Increasing TCP Initial Window */ > extern int tcp_do_tso; /* enable TSO for TCP output packets */ > > extern struct pool tcpqe_pool; >