Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
IPsec path MTU fix
To:
tech@openbsd.org
Cc:
Markus Friedl <markus@openbsd.org>
Date:
Fri, 7 Feb 2025 20:53:48 +0100

Download raw body.

Thread
Hi,

In December I have fixed an integer sign bug in tcp_mss_adv().
Since then my IPsec regress test is failing.

When IPsec is used, if_get(m->m_pkthdr.ph_ifidx) returns enc0.  Its
if_mtu is 0 which results in negative mss.  There was a unsigned
max() before, so a huge positive number was taken.  Now mssdflt,
which is 512, is used.  So the TCP SYN cache uses a very small
maximum TCP segment number.

I converted the logic back to imax() which makes it a bit more
obvious.

The real problem is, that SYN cache uses the incoming interface
m->m_pkthdr.ph_ifidx for the outgoing MTU.  The correct way is to
use the route of the destination address like tcp_mss() does it.

The SYN cache has a struct route which can be used.  An additional
route lookup does not happen as the route is cached.

ok?

bluhm

Index: netinet/tcp_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
diff -u -p -r1.429 tcp_input.c
--- netinet/tcp_input.c	31 Jan 2025 11:48:18 -0000	1.429
+++ netinet/tcp_input.c	7 Feb 2025 19:51:06 -0000
@@ -100,7 +100,7 @@
 #include <net/pfvar.h>
 #endif
 
-int tcp_mss_adv(struct mbuf *, int);
+int tcp_mss_adv(struct rtentry *, int);
 int tcp_flush_queue(struct tcpcb *);
 
 #ifdef INET6
@@ -3077,17 +3077,17 @@ tcp_newreno_partialack(struct tcpcb *tp,
 }
 
 int
-tcp_mss_adv(struct mbuf *m, int af)
+tcp_mss_adv(struct rtentry *rt, int af)
 {
 	struct ifnet *ifp;
 	int iphlen, mss, mssdflt;
 
 	mssdflt = atomic_load_int(&tcp_mssdflt);
 
-	if (m == NULL || (m->m_flags & M_PKTHDR) == 0)
+	if (rt == NULL)
 		return mssdflt;
 
-	ifp = if_get(m->m_pkthdr.ph_ifidx);
+	ifp = if_get(rt->rt_ifidx);
 	if (ifp == NULL)
 		return mssdflt;
 
@@ -3106,9 +3106,7 @@ tcp_mss_adv(struct mbuf *m, int af)
 	mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr);
 	if_put(ifp);
 
-	if (mss < mssdflt)
-		return mssdflt;
-	return mss;
+	return imax(mss, mssdflt);
 }
 
 /*
@@ -3814,6 +3812,7 @@ syn_cache_add(struct sockaddr *src, stru
 	struct syn_cache *sc;
 	struct syn_cache_head *scp;
 	struct mbuf *ipopts;
+	struct rtentry *rt = NULL;
 
 	NET_ASSERT_LOCKED();
 
@@ -3908,12 +3907,30 @@ syn_cache_add(struct sockaddr *src, stru
 	memcpy(&sc->sc_src, src, src->sa_len);
 	memcpy(&sc->sc_dst, dst, dst->sa_len);
 	sc->sc_rtableid = sotoinpcb(so)->inp_rtableid;
+	switch (sc->sc_src.sa.sa_family) {
+	case AF_INET:
+		if (sc->sc_src.sin.sin_addr.s_addr != INADDR_ANY) {
+			rt = route_mpath(&sc->sc_route,
+			    &sc->sc_src.sin.sin_addr,
+			    &sc->sc_dst.sin.sin_addr, sc->sc_rtableid);
+		}
+		break;
+#ifdef INET6
+	case AF_INET6:
+		if (!IN6_IS_ADDR_UNSPECIFIED(&sc->sc_src.sin6.sin6_addr)) {
+			rt = route6_mpath(&sc->sc_route,
+			    &sc->sc_src.sin6.sin6_addr,
+			    &sc->sc_dst.sin6.sin6_addr, sc->sc_rtableid);
+		}
+		break;
+#endif
+	}
 	sc->sc_ipopts = ipopts;
 	sc->sc_irs = th->th_seq;
 
 	sc->sc_iss = issp ? *issp : arc4random();
 	sc->sc_peermaxseg = oi->maxseg;
-	sc->sc_ourmaxseg = tcp_mss_adv(m, sc->sc_src.sa.sa_family);
+	sc->sc_ourmaxseg = tcp_mss_adv(rt, sc->sc_src.sa.sa_family);
 	sc->sc_win = win;
 	sc->sc_timestamp = tb.ts_recent;
 	if ((tb.t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP)) ==