From: Alexander Bluhm Subject: Re: IPsec path MTU fix To: Vitaliy Makkoveev Cc: tech@openbsd.org, Markus Friedl Date: Fri, 7 Feb 2025 22:34:54 +0100 On Fri, Feb 07, 2025 at 11:41:37PM +0300, Vitaliy Makkoveev wrote: > > On 7 Feb 2025, at 22:53, Alexander Bluhm wrote: > > > > Hi, > > > > In December I have fixed an integer sign bug in tcp_mss_adv(). > > Since then my IPsec regress test is failing. > > > > When IPsec is used, if_get(m->m_pkthdr.ph_ifidx) returns enc0. Its > > if_mtu is 0 which results in negative mss. There was a unsigned > > max() before, so a huge positive number was taken. Now mssdflt, > > which is 512, is used. So the TCP SYN cache uses a very small > > maximum TCP segment number. > > > > I converted the logic back to imax() which makes it a bit more > > obvious. > > > > The real problem is, that SYN cache uses the incoming interface > > m->m_pkthdr.ph_ifidx for the outgoing MTU. The correct way is to > > use the route of the destination address like tcp_mss() does it. > > > > The SYN cache has a struct route which can be used. An additional > > route lookup does not happen as the route is cached. > > > > ok? > > > > Looks correct, but isn???t it better to pass interface index to > tcp_mss_adv()? You don???t need to care about rt dereference. I wanted to do it simmilar to tcp_mss(). And then I would to have to deal with rt == NULL in the caller. Only tcp_mss_adv() knows about tcp_mssdflt magic. > > Index: netinet/tcp_input.c > > =================================================================== > > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v > > diff -u -p -r1.429 tcp_input.c > > --- netinet/tcp_input.c 31 Jan 2025 11:48:18 -0000 1.429 > > +++ netinet/tcp_input.c 7 Feb 2025 19:51:06 -0000 > > @@ -100,7 +100,7 @@ > > #include > > #endif > > > > -int tcp_mss_adv(struct mbuf *, int); > > +int tcp_mss_adv(struct rtentry *, int); > > int tcp_flush_queue(struct tcpcb *); > > > > #ifdef INET6 > > @@ -3077,17 +3077,17 @@ tcp_newreno_partialack(struct tcpcb *tp, > > } > > > > int > > -tcp_mss_adv(struct mbuf *m, int af) > > +tcp_mss_adv(struct rtentry *rt, int af) > > { > > struct ifnet *ifp; > > int iphlen, mss, mssdflt; > > > > mssdflt = atomic_load_int(&tcp_mssdflt); > > > > - if (m == NULL || (m->m_flags & M_PKTHDR) == 0) > > + if (rt == NULL) > > return mssdflt; > > > > - ifp = if_get(m->m_pkthdr.ph_ifidx); > > + ifp = if_get(rt->rt_ifidx); > > if (ifp == NULL) > > return mssdflt; > > > > @@ -3106,9 +3106,7 @@ tcp_mss_adv(struct mbuf *m, int af) > > mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr); > > if_put(ifp); > > > > - if (mss < mssdflt) > > - return mssdflt; > > - return mss; > > + return imax(mss, mssdflt); > > } > > > > /* > > @@ -3814,6 +3812,7 @@ syn_cache_add(struct sockaddr *src, stru > > struct syn_cache *sc; > > struct syn_cache_head *scp; > > struct mbuf *ipopts; > > + struct rtentry *rt = NULL; > > > > NET_ASSERT_LOCKED(); > > > > @@ -3908,12 +3907,30 @@ syn_cache_add(struct sockaddr *src, stru > > memcpy(&sc->sc_src, src, src->sa_len); > > memcpy(&sc->sc_dst, dst, dst->sa_len); > > sc->sc_rtableid = sotoinpcb(so)->inp_rtableid; > > + switch (sc->sc_src.sa.sa_family) { > > + case AF_INET: > > + if (sc->sc_src.sin.sin_addr.s_addr != INADDR_ANY) { > > + rt = route_mpath(&sc->sc_route, > > + &sc->sc_src.sin.sin_addr, > > + &sc->sc_dst.sin.sin_addr, sc->sc_rtableid); > > + } > > + break; > > +#ifdef INET6 > > + case AF_INET6: > > + if (!IN6_IS_ADDR_UNSPECIFIED(&sc->sc_src.sin6.sin6_addr)) { > > + rt = route6_mpath(&sc->sc_route, > > + &sc->sc_src.sin6.sin6_addr, > > + &sc->sc_dst.sin6.sin6_addr, sc->sc_rtableid); > > + } > > + break; > > +#endif > > + } > > sc->sc_ipopts = ipopts; > > sc->sc_irs = th->th_seq; > > > > sc->sc_iss = issp ? *issp : arc4random(); > > sc->sc_peermaxseg = oi->maxseg; > > - sc->sc_ourmaxseg = tcp_mss_adv(m, sc->sc_src.sa.sa_family); > > + sc->sc_ourmaxseg = tcp_mss_adv(rt, sc->sc_src.sa.sa_family); > > sc->sc_win = win; > > sc->sc_timestamp = tb.ts_recent; > > if ((tb.t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP)) == > > >