Index | Thread | Search

From:
David Gwynne <david@gwynne.id.au>
Subject:
pf_route simplification, or pf_route vs pfsync
To:
tech@openbsd.org
Date:
Sat, 6 Jul 2024 16:06:16 +1000

Download raw body.

Thread
at work i have a pair of firewalls with pfsync set up, and we have
pfsync defer enabled so we cope better in when routes flop between
them.

i was trying to use route-to in a rule like this:

 pass in quick proto { tcp udp } from $mydesktop to $rns port domain
route-to $newrns

the $rns addresses are aliases on the resolver boxes, so i just
needed to force the packets to go to the new server without actually
rewriting them.

this didnt work as expeted. the packet leaving the firewall always
went to the original resolver. the weird thing was that for tcp
connections the first packet went to the original resolver which would
ack the connection attempt, but the firewall would send subsequent
packets packet to th new resolvers. this also didnt work.

i figured out it was a bad interaction between pfsync defer and
route-to.

route-to steals packets leaving the firewall and holds onto them
until the peer pfsync box acks the state creation or a timeout
occurs. if we're using route-to on a "pass in" rule, then pf_route
has done it's thing. when it comes time for pfsync to send the
packet out, it's lost the work that pf_route did to change the
outgoing gateway/interface and assumes it can route it normally.

i tried to fix this by using the state links to look at the incoming
state and using that to rerun pf_route. however, this doesnt work
because we don't really have links between pf states, we end up
with links between pf_state_keys (see "link mbufs/inpcbs to pf_states,
not pf_state_keys" sent to this list last year), so resolving the
incoming state is more complicated than it should be. on top of
that, these links don't get set up properly on the first packets
in the state, and even if they were set up pf tears them down by
the time pfsync steals the packet.

my second attempt was to gut pf_route and have it jus add an mbuf
tag with the dst address i want to use instead of the real ip. the
actual ip stack is then modified to use the tag for the real route
lookups.

in my opinion this turned out really well. it's long bothered me that
pf_route and pf_route6 are a large duplication of the ip_output code.
there's been cases where we forget to update pf_route with new stack
features. this problem goes away if we just use the stack for
everything. a big chunk of code just goes away.

pfsync defer with route-to works too.

ive been running this in production since the end of april without
issue. if there's a performance hit i haven't noticed it. mbuf tag
lookups are cheap since henning added what's basically a bloom
filter for them to mbufs.

ok?

Index: net/if_pfsync.c
===================================================================
RCS file: /cvs/src/sys/net/if_pfsync.c,v
diff -u -p -r1.326 if_pfsync.c
--- net/if_pfsync.c	24 May 2024 06:38:41 -0000	1.326
+++ net/if_pfsync.c	6 Jul 2024 05:16:51 -0000
@@ -2068,13 +2068,16 @@ pfsync_deferrals_task(void *arg)
 static void
 pfsync_defer_output(struct pfsync_deferral *pd)
 {
-	struct pf_pdesc pdesc;
 	struct pf_state *st = pd->pd_st;
+	struct mbuf *m = pd->pd_m;
+
+	if (st->rt) {
+		struct pf_pdesc pdesc;
 
-	if (st->rt == PF_ROUTETO) {
 		if (pf_setup_pdesc(&pdesc, st->key[PF_SK_WIRE]->af,
-		    st->direction, NULL, pd->pd_m, NULL) != PF_PASS)
-			return;
+		    st->direction, NULL, m, NULL) != PF_PASS)
+			goto done;
+
 		switch (st->key[PF_SK_WIRE]->af) {
 		case AF_INET:
 			pf_route(&pdesc, st);
@@ -2087,26 +2090,27 @@ pfsync_defer_output(struct pfsync_deferr
 		default:
 			unhandled_af(st->key[PF_SK_WIRE]->af);
 		}
-		pd->pd_m = pdesc.m;
-	} else {
-		switch (st->key[PF_SK_WIRE]->af) {
-		case AF_INET:
-			ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, 0);
-			break;
+		m = pdesc.m;
+
+		if (m == NULL)
+			goto done;
+	}
+
+	switch (st->key[PF_SK_WIRE]->af) {
+	case AF_INET:
+		ip_output(m, NULL, NULL, 0, NULL, NULL, 0);
+		break;
 #ifdef INET6
-		case AF_INET6:
-			ip6_output(pd->pd_m, NULL, NULL, 0, NULL, NULL);
-			break;
+	case AF_INET6:
+		ip6_output(m, NULL, NULL, 0, NULL, NULL);
+		break;
 #endif /* INET6 */
-		default:
-			unhandled_af(st->key[PF_SK_WIRE]->af);
-		}
-
-		pd->pd_m = NULL;
+	default:
+		unhandled_af(st->key[PF_SK_WIRE]->af);
 	}
 
+done:
 	pf_state_unref(st);
-	m_freem(pd->pd_m);
 	pool_put(&pfsync_deferrals_pool, pd);
 }
 
Index: net/pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf.c,v
diff -u -p -r1.1201 pf.c
--- net/pf.c	4 Jul 2024 12:50:08 -0000	1.1201
+++ net/pf.c	6 Jul 2024 05:16:51 -0000
@@ -6568,12 +6568,7 @@ void
 pf_route(struct pf_pdesc *pd, struct pf_state *st)
 {
 	struct mbuf		*m0;
-	struct mbuf_list	 ml;
-	struct sockaddr_in	*dst, sin;
-	struct rtentry		*rt = NULL;
-	struct ip		*ip;
-	struct ifnet		*ifp = NULL;
-	unsigned int		 rtableid;
+	struct m_tag		*mtag;
 
 	if (pd->m->m_pkthdr.pf.routed++ > 3) {
 		m_freem(pd->m);
@@ -6582,117 +6577,47 @@ pf_route(struct pf_pdesc *pd, struct pf_
 	}
 
 	if (st->rt == PF_DUPTO) {
-		if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL)
-			return;
-	} else {
-		if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir))
+		m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT);
+		if (m0 == NULL)
 			return;
+	} else if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir))
+		return;
+	else
 		m0 = pd->m;
-		pd->m = NULL;
-	}
-
-	if (m0->m_len < sizeof(struct ip)) {
-		DPFPRINTF(LOG_ERR,
-		    "%s: m0->m_len < sizeof(struct ip)", __func__);
-		goto bad;
-	}
 
-	ip = mtod(m0, struct ip *);
-
-	if (pd->dir == PF_IN) {
-		if (ip->ip_ttl <= IPTTLDEC) {
-			if (st->rt != PF_DUPTO) {
-				pf_send_icmp(m0, ICMP_TIMXCEED,
-				    ICMP_TIMXCEED_INTRANS, 0,
-				    pd->af, st->rule.ptr, pd->rdomain);
-			}
-			goto bad;
-		}
-		ip->ip_ttl -= IPTTLDEC;
-	}
-
-	memset(&sin, 0, sizeof(sin));
-	dst = &sin;
-	dst->sin_family = AF_INET;
-	dst->sin_len = sizeof(*dst);
-	dst->sin_addr = st->rt_addr.v4;
-	rtableid = m0->m_pkthdr.ph_rtableid;
-
-	rt = rtalloc_mpath(sintosa(dst), &ip->ip_src.s_addr, rtableid);
-	if (!rtisvalid(rt)) {
-		if (st->rt != PF_DUPTO) {
-			pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_HOST,
-			    0, pd->af, st->rule.ptr, pd->rdomain);
-		}
-		ipstat_inc(ips_noroute);
-		goto bad;
-	}
-
-	ifp = if_get(rt->rt_ifidx);
-	if (ifp == NULL)
-		goto bad;
-
-	/* A locally generated packet may have invalid source address. */
-	if ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET &&
-	    (ifp->if_flags & IFF_LOOPBACK) == 0)
-		ip->ip_src = ifatoia(rt->rt_ifa)->ia_addr.sin_addr;
-
-	if (st->rt != PF_DUPTO && pd->dir == PF_IN) {
-		if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS)
-			goto bad;
-		else if (m0 == NULL)
-			goto done;
-		if (m0->m_len < sizeof(struct ip)) {
-			DPFPRINTF(LOG_ERR,
-			    "%s: m0->m_len < sizeof(struct ip)", __func__);
-			goto bad;
-		}
-		ip = mtod(m0, struct ip *);
+	mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTE, NULL);
+	if (mtag == NULL) {
+		mtag = m_tag_get(PACKET_TAG_PF_ROUTE, sizeof(st->rt_addr),
+		    M_NOWAIT);
+		if (mtag == NULL)
+			goto drop;
 	}
 
-	if (if_output_tso(ifp, &m0, sintosa(dst), rt, ifp->if_mtu) ||
-	    m0 == NULL)
-		goto done;
+	*(struct pf_addr *)(mtag + 1) = st->rt_addr;
+	m_tag_prepend(m0, mtag);
 
-	/*
-	 * Too large for interface; fragment if possible.
-	 * Must be able to put at least 8 bytes per fragment.
-	 */
-	if (ip->ip_off & htons(IP_DF)) {
-		ipstat_inc(ips_cantfrag);
-		if (st->rt != PF_DUPTO)
-			pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
-			    ifp->if_mtu, pd->af, st->rule.ptr, pd->rdomain);
-		goto bad;
-	}
+	if (st->rt == PF_DUPTO) {
+		SET(m0->m_pkthdr.pf.flags, PF_TAG_GENERATED);
 
-	if (ip_fragment(m0, &ml, ifp, ifp->if_mtu) ||
-	    if_output_ml(ifp, &ml, sintosa(dst), rt))
-		goto done;
-	ipstat_inc(ips_fragmented);
+		ip_output(m0, NULL, NULL, 0, NULL, NULL, 0);
+	} else if (pd->dir == PF_OUT)
+		SET(m0->m_pkthdr.pf.flags, PF_TAG_REROUTE);
 
-done:
-	if_put(ifp);
-	rtfree(rt);
 	return;
-
-bad:
+drop:
+	if (m0 == pd->m)
+		pd->m = NULL;
 	m_freem(m0);
-	goto done;
 }
 
 #ifdef INET6
-/* pf_route6() may change pd->m, adjust local copies after calling */
 void
 pf_route6(struct pf_pdesc *pd, struct pf_state *st)
 {
 	struct mbuf		*m0;
-	struct sockaddr_in6	*dst, sin6;
-	struct rtentry		*rt = NULL;
-	struct ip6_hdr		*ip6;
-	struct ifnet		*ifp = NULL;
 	struct m_tag		*mtag;
-	unsigned int		 rtableid;
+
+printf("%s\n", __func__);
 
 	if (pd->m->m_pkthdr.pf.routed++ > 3) {
 		m_freem(pd->m);
@@ -6701,101 +6626,37 @@ pf_route6(struct pf_pdesc *pd, struct pf
 	}
 
 	if (st->rt == PF_DUPTO) {
-		if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL)
-			return;
-	} else {
-		if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir))
+		m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT);
+		if (m0 == NULL)
 			return;
+	} else if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir))
+		return;
+	else
 		m0 = pd->m;
-		pd->m = NULL;
-	}
 
-	if (m0->m_len < sizeof(struct ip6_hdr)) {
-		DPFPRINTF(LOG_ERR,
-		    "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__);
-		goto bad;
+	mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTE, NULL);
+	if (mtag == NULL) {
+		mtag = m_tag_get(PACKET_TAG_PF_ROUTE, sizeof(st->rt_addr),
+		    M_NOWAIT);
+		if (mtag == NULL)
+			goto drop;
 	}
-	ip6 = mtod(m0, struct ip6_hdr *);
 
-	if (pd->dir == PF_IN) {
-		if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
-			if (st->rt != PF_DUPTO) {
-				pf_send_icmp(m0, ICMP6_TIME_EXCEEDED,
-				    ICMP6_TIME_EXCEED_TRANSIT, 0,
-				    pd->af, st->rule.ptr, pd->rdomain);
-			}
-			goto bad;
-		}
-		ip6->ip6_hlim -= IPV6_HLIMDEC;
-	}
-
-	memset(&sin6, 0, sizeof(sin6));
-	dst = &sin6;
-	dst->sin6_family = AF_INET6;
-	dst->sin6_len = sizeof(*dst);
-	dst->sin6_addr = st->rt_addr.v6;
-	rtableid = m0->m_pkthdr.ph_rtableid;
-
-	rt = rtalloc_mpath(sin6tosa(dst), &ip6->ip6_src.s6_addr32[0],
-	    rtableid);
-	if (!rtisvalid(rt)) {
-		if (st->rt != PF_DUPTO) {
-			pf_send_icmp(m0, ICMP6_DST_UNREACH,
-			    ICMP6_DST_UNREACH_NOROUTE, 0,
-			    pd->af, st->rule.ptr, pd->rdomain);
-		}
-		ip6stat_inc(ip6s_noroute);
-		goto bad;
-	}
-
-	ifp = if_get(rt->rt_ifidx);
-	if (ifp == NULL)
-		goto bad;
-
-	/* A locally generated packet may have invalid source address. */
-	if (IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) &&
-	    (ifp->if_flags & IFF_LOOPBACK) == 0)
-		ip6->ip6_src = ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr;
-
-	if (st->rt != PF_DUPTO && pd->dir == PF_IN) {
-		if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS)
-			goto bad;
-		else if (m0 == NULL)
-			goto done;
-		if (m0->m_len < sizeof(struct ip6_hdr)) {
-			DPFPRINTF(LOG_ERR,
-			    "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__);
-			goto bad;
-		}
-	}
+	*(struct pf_addr *)(mtag + 1) = st->rt_addr;
+	m_tag_prepend(m0, mtag);
 
-	/*
-	 * If packet has been reassembled by PF earlier, we have to
-	 * use pf_refragment6() here to turn it back to fragments.
-	 */
-	if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) {
-		(void) pf_refragment6(&m0, mtag, dst, ifp, rt);
-		goto done;
-	}
-
-	if (if_output_tso(ifp, &m0, sin6tosa(dst), rt, ifp->if_mtu) ||
-	    m0 == NULL)
-		goto done;
+	if (st->rt == PF_DUPTO) {
+		SET(m0->m_pkthdr.pf.flags, PF_TAG_GENERATED);
 
-	ip6stat_inc(ip6s_cantfrag);
-	if (st->rt != PF_DUPTO)
-		pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0,
-		    ifp->if_mtu, pd->af, st->rule.ptr, pd->rdomain);
-	goto bad;
+		ip6_output(m0, NULL, NULL, 0, NULL, NULL);
+	} else if (pd->dir == PF_OUT)
+		SET(m0->m_pkthdr.pf.flags, PF_TAG_REROUTE);
 
-done:
-	if_put(ifp);
-	rtfree(rt);
 	return;
-
-bad:
+drop:
+	if (m0 == pd->m)
+		pd->m = NULL;
 	m_freem(m0);
-	goto done;
 }
 #endif /* INET6 */
 
@@ -8059,6 +7920,9 @@ pf_ouraddr(struct mbuf *m)
 {
 	struct pf_state_key	*sk;
 
+	if (ISSET(m->m_pkthdr.ph_tagsset, PACKET_TAG_PF_ROUTE))
+		return (0); 
+
 	if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED)
 		return (1);
 
@@ -8078,6 +7942,16 @@ pf_ouraddr(struct mbuf *m)
 void
 pf_pkt_addr_changed(struct mbuf *m)
 {
+	struct m_tag *mtag;
+
+	mtag = m_tag_find(m, PACKET_TAG_PF_ROUTE, NULL);
+	if (mtag != NULL) {
+		m_tag_delete(m, mtag);
+
+		KASSERTMSG(m_tag_find(m, PACKET_TAG_PF_ROUTE, NULL) == NULL,
+		    "mbuf %p had multiple PACKET_TAG_PF_ROUTE mbuf tags", m);
+	}
+
 	pf_mbuf_unlink_state_key(m);
 	pf_mbuf_unlink_inpcb(m);
 }
Index: netinet/ip_output.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_output.c,v
diff -u -p -r1.401 ip_output.c
--- netinet/ip_output.c	2 Jul 2024 18:33:47 -0000	1.401
+++ netinet/ip_output.c	6 Jul 2024 05:16:51 -0000
@@ -110,7 +110,9 @@ ip_output(struct mbuf *m, struct mbuf *o
 	struct sockaddr_in *dst;
 	struct tdb *tdb = NULL;
 	u_long mtu;
+	struct in_addr *rt_dst;
 #if NPF > 0
+	struct m_tag *rt_mtag;
 	u_int orig_rtableid;
 #endif
 
@@ -128,7 +130,7 @@ ip_output(struct mbuf *m, struct mbuf *o
 	/*
 	 * Fill in IP header.
 	 */
-	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
+	if (!ISSET(flags, IP_FORWARDING|IP_RAWOUTPUT)) {
 		ip->ip_v = IPVERSION;
 		ip->ip_off &= htons(IP_DF);
 		ip->ip_id = htons(ip_randomid());
@@ -151,6 +153,7 @@ ip_output(struct mbuf *m, struct mbuf *o
 	orig_rtableid = m->m_pkthdr.ph_rtableid;
 reroute:
 #endif
+	rt_dst = &ip->ip_dst;
 
 	/*
 	 * Do a route lookup now in case we need the source address to
@@ -163,11 +166,19 @@ reroute:
 		ro->ro_rt = NULL;
 	}
 
+#if NPF > 0
+	rt_mtag = m_tag_find(m, PACKET_TAG_PF_ROUTE, NULL);
+	if (rt_mtag != NULL) {
+		struct pf_addr *rt_addr = (struct pf_addr *)(rt_mtag + 1);
+		rt_dst = &rt_addr->v4;
+	}
+#endif
+
 	/*
 	 * If there is a cached route, check that it is to the same
 	 * destination and is still up.  If not, free it and try again.
 	 */
-	route_cache(ro, &ip->ip_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid);
+	route_cache(ro, rt_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid);
 	dst = &ro->ro_dstsin;
 
 	if ((IN_MULTICAST(ip->ip_dst.s_addr) ||
Index: netinet6/ip6_forward.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_forward.c,v
diff -u -p -r1.120 ip6_forward.c
--- netinet6/ip6_forward.c	4 Jul 2024 12:50:08 -0000	1.120
+++ netinet6/ip6_forward.c	6 Jul 2024 05:16:51 -0000
@@ -95,6 +95,10 @@ ip6_forward(struct mbuf *m, struct route
 	struct tdb *tdb = NULL;
 #endif /* IPSEC */
 	char src6[INET6_ADDRSTRLEN], dst6[INET6_ADDRSTRLEN];
+	struct in6_addr *rt_dst;
+#if NPF > 0
+	struct m_tag *rt_mtag;
+#endif
 
 	/*
 	 * Do not forward packets to multicast destination (should be handled
@@ -166,11 +170,20 @@ reroute:
 	}
 #endif /* IPSEC */
 
+	rt_dst = &ip6->ip6_dst;
+#if NPF > 0
+	rt_mtag = m_tag_find(m, PACKET_TAG_PF_ROUTE, NULL);
+	if (rt_mtag != NULL) {
+		struct pf_addr *rt_addr = (struct pf_addr *)(rt_mtag + 1);
+		rt_dst = &rt_addr->v6;
+	}
+#endif
+
 	if (ro == NULL) {
 		ro = &iproute;
 		ro->ro_rt = NULL;
 	}
-	rt = route6_mpath(ro, &ip6->ip6_dst, &ip6->ip6_src,
+	rt = route6_mpath(ro, rt_dst, &ip6->ip6_src,
 	    m->m_pkthdr.ph_rtableid);
 	if (rt == NULL) {
 		ip6stat_inc(ip6s_noroute);
Index: netinet6/ip6_input.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
diff -u -p -r1.264 ip6_input.c
--- netinet6/ip6_input.c	4 Jul 2024 12:50:08 -0000	1.264
+++ netinet6/ip6_input.c	6 Jul 2024 05:16:51 -0000
@@ -364,7 +364,9 @@ ip6_input_if(struct mbuf **mp, int *offp
 	u_int16_t src_scope, dst_scope;
 #if NPF > 0
 	struct in6_addr odst;
+	struct m_tag *rt_mtag;
 #endif
+	struct in6_addr *rt_dst;
 	int flags = 0;
 
 	KASSERT(*offp == 0);
@@ -523,11 +525,19 @@ ip6_input_if(struct mbuf **mp, int *offp
 		goto out;
 	}
 
-
 	/*
 	 *  Unicast check
 	 */
-	rt = route6_mpath(&ro, &ip6->ip6_dst, &ip6->ip6_src,
+	rt_dst = &ip6->ip6_dst;
+#if NPF > 0
+	rt_mtag = m_tag_find(m, PACKET_TAG_PF_ROUTE, NULL);
+	if (rt_mtag != NULL) {
+		struct pf_addr *rt_addr = (struct pf_addr *)(rt_mtag + 1);
+		rt_dst = &rt_addr->v6;
+	}
+#endif
+
+	rt = route6_mpath(&ro, rt_dst, &ip6->ip6_src,
 	    m->m_pkthdr.ph_rtableid);
 
 	/*
Index: netinet6/ip6_output.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_output.c,v
diff -u -p -r1.292 ip6_output.c
--- netinet6/ip6_output.c	4 Jul 2024 12:50:08 -0000	1.292
+++ netinet6/ip6_output.c	6 Jul 2024 05:16:51 -0000
@@ -177,6 +177,7 @@ ip6_output(struct mbuf *m, struct ip6_pk
 	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
 	struct ip6_exthdrs exthdrs;
 	struct in6_addr finaldst;
+	struct in6_addr *rt_dst;
 	struct route *ro_pmtu = NULL;
 	int hdrsplit = 0;
 	u_int8_t sproto = 0;
@@ -184,6 +185,9 @@ ip6_output(struct mbuf *m, struct ip6_pk
 #ifdef IPSEC
 	struct tdb *tdb = NULL;
 #endif /* IPSEC */
+#if NPF > 0
+	struct m_tag *rt_mtag;
+#endif
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	finaldst = ip6->ip6_dst;
@@ -387,6 +391,7 @@ ip6_output(struct mbuf *m, struct ip6_pk
 #if NPF > 0
 reroute:
 #endif
+	rt_dst = &ip6->ip6_dst;
 
 	/* initialize cached route */
 	if (ro == NULL) {
@@ -456,8 +461,16 @@ reroute:
 			ifp = if_get(im6o->im6o_ifidx);
 	}
 
+#if NPF > 0
+	rt_mtag = m_tag_find(m, PACKET_TAG_PF_ROUTE, NULL);
+	if (rt_mtag != NULL) {
+		struct pf_addr *rt_addr = (struct pf_addr *)(rt_mtag + 1);
+		rt_dst = &rt_addr->v6;
+	}
+#endif
+
 	if (ifp == NULL) {
-		rt = in6_selectroute(&ip6->ip6_dst, opt, ro,
+		rt = in6_selectroute(rt_dst, opt, ro,
 		    m->m_pkthdr.ph_rtableid);
 		if (rt == NULL) {
 			ip6stat_inc(ip6s_noroute);
@@ -480,7 +493,7 @@ reroute:
 			goto bad;
 		}
 	} else {
-		route6_cache(ro, &ip6->ip6_dst, NULL, m->m_pkthdr.ph_rtableid);
+		route6_cache(ro, rt_dst, NULL, m->m_pkthdr.ph_rtableid);
 	}
 
 	if (rt && (rt->rt_flags & RTF_GATEWAY) &&
Index: sys/mbuf.h
===================================================================
RCS file: /cvs/src/sys/sys/mbuf.h,v
diff -u -p -r1.263 mbuf.h
--- sys/mbuf.h	14 Apr 2024 20:46:27 -0000	1.263
+++ sys/mbuf.h	6 Jul 2024 05:16:51 -0000
@@ -477,6 +478,7 @@ struct m_tag *m_tag_next(struct mbuf *, 
 #define PACKET_TAG_GRE			0x0080  /* GRE processing done */
 #define PACKET_TAG_DLT			0x0100 /* data link layer type */
 #define PACKET_TAG_PF_DIVERT		0x0200 /* pf(4) diverted packet */
+#define PACKET_TAG_PF_ROUTE		0x0400 /* pf(4) route-to */
 #define PACKET_TAG_PF_REASSEMBLED	0x0800 /* pf reassembled ipv6 packet */
 #define PACKET_TAG_SRCROUTE		0x1000 /* IPv4 source routing options */
 #define PACKET_TAG_TUNNEL		0x2000	/* Tunnel endpoint address */