From: Alexander Bluhm Subject: ipsec pmtu rtable To: tech@openbsd.org Cc: Markus Friedl Date: Sat, 22 Feb 2025 01:30:51 +0100 Hi, If pf(4) switches the rtable, the route for path MTU discovery must be generated in the original routing table. For that ip_output() keeps the original rtableid. Then a local TCP socket uses the correct route. This did not work when IPsec was involed. Pass orig_rtableid also to ip_output_ipsec_send() to use the same logic in ip_output_ipsec_pmtu_update(). A similar change is necessary for ip6_output() and ip6_forward(). ok? bluhm Index: netinet/ip_output.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v diff -u -p -r1.404 ip_output.c --- netinet/ip_output.c 14 Feb 2025 13:14:13 -0000 1.404 +++ netinet/ip_output.c 21 Feb 2025 14:43:09 -0000 @@ -88,7 +88,8 @@ int ip_output_ipsec_lookup(struct mbuf * const struct ipsec_level *seclevel, struct tdb **, int ipsecflowinfo); void ip_output_ipsec_pmtu_update(struct tdb *, struct route *, struct in_addr, int); -int ip_output_ipsec_send(struct tdb *, struct mbuf *, struct route *, int); +int ip_output_ipsec_send(struct tdb *, struct mbuf *, struct route *, u_int, + int); /* * IP output. The packet in mbuf chain m contains a skeletal IP @@ -110,9 +111,7 @@ ip_output(struct mbuf *m, struct mbuf *o struct sockaddr_in *dst; struct tdb *tdb = NULL; u_long mtu; -#if NPF > 0 u_int orig_rtableid; -#endif NET_ASSERT_LOCKED(); @@ -147,8 +146,8 @@ ip_output(struct mbuf *m, struct mbuf *o goto bad; } -#if NPF > 0 orig_rtableid = m->m_pkthdr.ph_rtableid; +#if NPF > 0 reroute: #endif @@ -393,7 +392,7 @@ sendit: */ if (tdb != NULL) { /* Callee frees mbuf */ - error = ip_output_ipsec_send(tdb, m, ro, + error = ip_output_ipsec_send(tdb, m, ro, orig_rtableid, (flags & IP_FORWARDING) ? 1 : 0); goto done; } @@ -569,6 +568,7 @@ ip_output_ipsec_pmtu_update(struct tdb * atomic_store_int(&rt->rt_mtu, tdb->tdb_mtu); if (ro != NULL && ro->ro_rt != NULL) { rtfree(ro->ro_rt); + ro->ro_tableid = rtableid; ro->ro_rt = rtalloc(&ro->ro_dstsa, RT_RESOLVE, rtableid); } @@ -578,14 +578,15 @@ ip_output_ipsec_pmtu_update(struct tdb * } int -ip_output_ipsec_send(struct tdb *tdb, struct mbuf *m, struct route *ro, int fwd) +ip_output_ipsec_send(struct tdb *tdb, struct mbuf *m, struct route *ro, + u_int rtableid, int fwd) { struct mbuf_list ml; struct ifnet *encif = NULL; struct ip *ip; struct in_addr dst; u_int len; - int error, rtableid, tso = 0; + int error, tso = 0; #if NPF > 0 /* @@ -618,7 +619,6 @@ ip_output_ipsec_send(struct tdb *tdb, st /* Check if we are allowed to fragment */ dst = ip->ip_dst; - rtableid = m->m_pkthdr.ph_rtableid; if (ip_mtudisc && (ip->ip_off & htons(IP_DF)) && tdb->tdb_mtu && len > tdb->tdb_mtu && tdb->tdb_mtutimeout > gettime()) { ip_output_ipsec_pmtu_update(tdb, ro, dst, rtableid); Index: netinet6/ip6_forward.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v diff -u -p -r1.125 ip6_forward.c --- netinet6/ip6_forward.c 3 Jan 2025 21:27:40 -0000 1.125 +++ netinet6/ip6_forward.c 21 Feb 2025 14:40:53 -0000 @@ -97,6 +97,7 @@ ip6_forward(struct mbuf *m, struct route u_short mflags, pfflags; struct mbuf *mcopy; int error = 0, type = 0, code = 0, destmtu = 0; + u_int orig_rtableid; #ifdef IPSEC struct tdb *tdb = NULL; #endif /* IPSEC */ @@ -180,6 +181,7 @@ ip6_forward(struct mbuf *m, struct route icmp_len = 0; } + orig_rtableid = m->m_pkthdr.ph_rtableid; #if NPF > 0 reroute: #endif @@ -254,7 +256,7 @@ reroute: */ if (tdb != NULL) { /* Callee frees mbuf */ - error = ip6_output_ipsec_send(tdb, m, ro, 0, 1); + error = ip6_output_ipsec_send(tdb, m, ro, orig_rtableid, 0, 1); rt = ro->ro_rt; if (error) goto senderr; Index: netinet6/ip6_output.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v diff -u -p -r1.295 ip6_output.c --- netinet6/ip6_output.c 14 Feb 2025 13:14:13 -0000 1.295 +++ netinet6/ip6_output.c 21 Feb 2025 14:42:38 -0000 @@ -172,6 +172,7 @@ ip6_output(struct mbuf *m, struct ip6_pk struct sockaddr_in6 *dst; int error = 0; u_long mtu; + u_int orig_rtableid; int dontfrag; u_int16_t src_scope, dst_scope; u_int32_t optlen = 0, plen = 0, unfragpartlen = 0; @@ -384,6 +385,7 @@ ip6_output(struct mbuf *m, struct ip6_pk /* * Route packet. */ + orig_rtableid = m->m_pkthdr.ph_rtableid; #if NPF > 0 reroute: #endif @@ -436,7 +438,7 @@ reroute: * packet just because ip6_dst is different from what tdb has. * XXX */ - error = ip6_output_ipsec_send(tdb, m, ro, + error = ip6_output_ipsec_send(tdb, m, ro, orig_rtableid, exthdrs.ip6e_rthdr ? 1 : 0, 0); goto done; } @@ -2815,6 +2817,7 @@ ip6_output_ipsec_pmtu_update(struct tdb atomic_store_int(&rt->rt_mtu, tdb->tdb_mtu); if (ro != NULL && ro->ro_rt != NULL) { rtfree(ro->ro_rt); + ro->ro_tableid = rtableid; ro->ro_rt = rtalloc(&ro->ro_dstsa, RT_RESOLVE, rtableid); } @@ -2826,14 +2829,14 @@ ip6_output_ipsec_pmtu_update(struct tdb int ip6_output_ipsec_send(struct tdb *tdb, struct mbuf *m, struct route *ro, - int tunalready, int fwd) + u_int rtableid, int tunalready, int fwd) { struct mbuf_list ml; struct ifnet *encif = NULL; struct ip6_hdr *ip6; struct in6_addr dst; u_int len; - int error, ifidx, rtableid, tso = 0; + int error, ifidx, tso = 0; #if NPF > 0 /* @@ -2867,7 +2870,6 @@ ip6_output_ipsec_send(struct tdb *tdb, s /* Check if we are allowed to fragment */ dst = ip6->ip6_dst; ifidx = m->m_pkthdr.ph_ifidx; - rtableid = m->m_pkthdr.ph_rtableid; if (ip_mtudisc && tdb->tdb_mtu && len > tdb->tdb_mtu && tdb->tdb_mtutimeout > gettime()) { int transportmode; Index: netinet6/ip6_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_var.h,v diff -u -p -r1.121 ip6_var.h --- netinet6/ip6_var.h 21 Nov 2024 20:15:44 -0000 1.121 +++ netinet6/ip6_var.h 21 Feb 2025 14:33:04 -0000 @@ -379,7 +379,7 @@ struct tdb; int ip6_output_ipsec_lookup(struct mbuf *, const struct ipsec_level *, struct tdb **); int ip6_output_ipsec_send(struct tdb *, struct mbuf *, struct route *, - int, int); + u_int, int, int); #endif /* IPSEC */ #endif /* _KERNEL */