Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: IPsec path MTU fix
To:
Vitaliy Makkoveev <otto@bsdbox.dev>
Cc:
tech@openbsd.org, Markus Friedl <markus@openbsd.org>
Date:
Fri, 7 Feb 2025 22:34:54 +0100

Download raw body.

Thread
On Fri, Feb 07, 2025 at 11:41:37PM +0300, Vitaliy Makkoveev wrote:
> > On 7 Feb 2025, at 22:53, Alexander Bluhm <bluhm@openbsd.org> wrote:
> > 
> > Hi,
> > 
> > In December I have fixed an integer sign bug in tcp_mss_adv().
> > Since then my IPsec regress test is failing.
> > 
> > When IPsec is used, if_get(m->m_pkthdr.ph_ifidx) returns enc0.  Its
> > if_mtu is 0 which results in negative mss.  There was a unsigned
> > max() before, so a huge positive number was taken.  Now mssdflt,
> > which is 512, is used.  So the TCP SYN cache uses a very small
> > maximum TCP segment number.
> > 
> > I converted the logic back to imax() which makes it a bit more
> > obvious.
> > 
> > The real problem is, that SYN cache uses the incoming interface
> > m->m_pkthdr.ph_ifidx for the outgoing MTU.  The correct way is to
> > use the route of the destination address like tcp_mss() does it.
> > 
> > The SYN cache has a struct route which can be used.  An additional
> > route lookup does not happen as the route is cached.
> > 
> > ok?
> > 
> 
> Looks correct, but isn???t it better to pass interface index to
> tcp_mss_adv()? You don???t need to care about rt dereference.

I wanted to do it simmilar to tcp_mss().  And then I would to
have to deal with rt == NULL in the caller.  Only tcp_mss_adv()
knows about tcp_mssdflt magic.

> > Index: netinet/tcp_input.c
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
> > diff -u -p -r1.429 tcp_input.c
> > --- netinet/tcp_input.c	31 Jan 2025 11:48:18 -0000	1.429
> > +++ netinet/tcp_input.c	7 Feb 2025 19:51:06 -0000
> > @@ -100,7 +100,7 @@
> > #include <net/pfvar.h>
> > #endif
> > 
> > -int tcp_mss_adv(struct mbuf *, int);
> > +int tcp_mss_adv(struct rtentry *, int);
> > int tcp_flush_queue(struct tcpcb *);
> > 
> > #ifdef INET6
> > @@ -3077,17 +3077,17 @@ tcp_newreno_partialack(struct tcpcb *tp,
> > }
> > 
> > int
> > -tcp_mss_adv(struct mbuf *m, int af)
> > +tcp_mss_adv(struct rtentry *rt, int af)
> > {
> > 	struct ifnet *ifp;
> > 	int iphlen, mss, mssdflt;
> > 
> > 	mssdflt = atomic_load_int(&tcp_mssdflt);
> > 
> > -	if (m == NULL || (m->m_flags & M_PKTHDR) == 0)
> > +	if (rt == NULL)
> > 		return mssdflt;
> > 
> > -	ifp = if_get(m->m_pkthdr.ph_ifidx);
> > +	ifp = if_get(rt->rt_ifidx);
> > 	if (ifp == NULL)
> > 		return mssdflt;
> > 
> > @@ -3106,9 +3106,7 @@ tcp_mss_adv(struct mbuf *m, int af)
> > 	mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr);
> > 	if_put(ifp);
> > 
> > -	if (mss < mssdflt)
> > -		return mssdflt;
> > -	return mss;
> > +	return imax(mss, mssdflt);
> > }
> > 
> > /*
> > @@ -3814,6 +3812,7 @@ syn_cache_add(struct sockaddr *src, stru
> > 	struct syn_cache *sc;
> > 	struct syn_cache_head *scp;
> > 	struct mbuf *ipopts;
> > +	struct rtentry *rt = NULL;
> > 
> > 	NET_ASSERT_LOCKED();
> > 
> > @@ -3908,12 +3907,30 @@ syn_cache_add(struct sockaddr *src, stru
> > 	memcpy(&sc->sc_src, src, src->sa_len);
> > 	memcpy(&sc->sc_dst, dst, dst->sa_len);
> > 	sc->sc_rtableid = sotoinpcb(so)->inp_rtableid;
> > +	switch (sc->sc_src.sa.sa_family) {
> > +	case AF_INET:
> > +		if (sc->sc_src.sin.sin_addr.s_addr != INADDR_ANY) {
> > +			rt = route_mpath(&sc->sc_route,
> > +			    &sc->sc_src.sin.sin_addr,
> > +			    &sc->sc_dst.sin.sin_addr, sc->sc_rtableid);
> > +		}
> > +		break;
> > +#ifdef INET6
> > +	case AF_INET6:
> > +		if (!IN6_IS_ADDR_UNSPECIFIED(&sc->sc_src.sin6.sin6_addr)) {
> > +			rt = route6_mpath(&sc->sc_route,
> > +			    &sc->sc_src.sin6.sin6_addr,
> > +			    &sc->sc_dst.sin6.sin6_addr, sc->sc_rtableid);
> > +		}
> > +		break;
> > +#endif
> > +	}
> > 	sc->sc_ipopts = ipopts;
> > 	sc->sc_irs = th->th_seq;
> > 
> > 	sc->sc_iss = issp ? *issp : arc4random();
> > 	sc->sc_peermaxseg = oi->maxseg;
> > -	sc->sc_ourmaxseg = tcp_mss_adv(m, sc->sc_src.sa.sa_family);
> > +	sc->sc_ourmaxseg = tcp_mss_adv(rt, sc->sc_src.sa.sa_family);
> > 	sc->sc_win = win;
> > 	sc->sc_timestamp = tb.ts_recent;
> > 	if ((tb.t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP)) ==
> > 
>