Download raw body.
pf_route simplification, or pf_route vs pfsync
regress sys/net/pf_forward shows that it breaks path MTU discovery
with pf route-to.
Unfortunately this regress test requires 4 machines and is hard to
set up. But it covers a bunch of corner caes.
bluhm
On Sat, Jul 06, 2024 at 04:06:16PM +1000, David Gwynne wrote:
> at work i have a pair of firewalls with pfsync set up, and we have
> pfsync defer enabled so we cope better in when routes flop between
> them.
>
> i was trying to use route-to in a rule like this:
>
> pass in quick proto { tcp udp } from $mydesktop to $rns port domain
> route-to $newrns
>
> the $rns addresses are aliases on the resolver boxes, so i just
> needed to force the packets to go to the new server without actually
> rewriting them.
>
> this didnt work as expeted. the packet leaving the firewall always
> went to the original resolver. the weird thing was that for tcp
> connections the first packet went to the original resolver which would
> ack the connection attempt, but the firewall would send subsequent
> packets packet to th new resolvers. this also didnt work.
>
> i figured out it was a bad interaction between pfsync defer and
> route-to.
>
> route-to steals packets leaving the firewall and holds onto them
> until the peer pfsync box acks the state creation or a timeout
> occurs. if we're using route-to on a "pass in" rule, then pf_route
> has done it's thing. when it comes time for pfsync to send the
> packet out, it's lost the work that pf_route did to change the
> outgoing gateway/interface and assumes it can route it normally.
>
> i tried to fix this by using the state links to look at the incoming
> state and using that to rerun pf_route. however, this doesnt work
> because we don't really have links between pf states, we end up
> with links between pf_state_keys (see "link mbufs/inpcbs to pf_states,
> not pf_state_keys" sent to this list last year), so resolving the
> incoming state is more complicated than it should be. on top of
> that, these links don't get set up properly on the first packets
> in the state, and even if they were set up pf tears them down by
> the time pfsync steals the packet.
>
> my second attempt was to gut pf_route and have it jus add an mbuf
> tag with the dst address i want to use instead of the real ip. the
> actual ip stack is then modified to use the tag for the real route
> lookups.
>
> in my opinion this turned out really well. it's long bothered me that
> pf_route and pf_route6 are a large duplication of the ip_output code.
> there's been cases where we forget to update pf_route with new stack
> features. this problem goes away if we just use the stack for
> everything. a big chunk of code just goes away.
>
> pfsync defer with route-to works too.
>
> ive been running this in production since the end of april without
> issue. if there's a performance hit i haven't noticed it. mbuf tag
> lookups are cheap since henning added what's basically a bloom
> filter for them to mbufs.
>
> ok?
>
> Index: net/if_pfsync.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_pfsync.c,v
> diff -u -p -r1.326 if_pfsync.c
> --- net/if_pfsync.c 24 May 2024 06:38:41 -0000 1.326
> +++ net/if_pfsync.c 6 Jul 2024 05:16:51 -0000
> @@ -2068,13 +2068,16 @@ pfsync_deferrals_task(void *arg)
> static void
> pfsync_defer_output(struct pfsync_deferral *pd)
> {
> - struct pf_pdesc pdesc;
> struct pf_state *st = pd->pd_st;
> + struct mbuf *m = pd->pd_m;
> +
> + if (st->rt) {
> + struct pf_pdesc pdesc;
>
> - if (st->rt == PF_ROUTETO) {
> if (pf_setup_pdesc(&pdesc, st->key[PF_SK_WIRE]->af,
> - st->direction, NULL, pd->pd_m, NULL) != PF_PASS)
> - return;
> + st->direction, NULL, m, NULL) != PF_PASS)
> + goto done;
> +
> switch (st->key[PF_SK_WIRE]->af) {
> case AF_INET:
> pf_route(&pdesc, st);
> @@ -2087,26 +2090,27 @@ pfsync_defer_output(struct pfsync_deferr
> default:
> unhandled_af(st->key[PF_SK_WIRE]->af);
> }
> - pd->pd_m = pdesc.m;
> - } else {
> - switch (st->key[PF_SK_WIRE]->af) {
> - case AF_INET:
> - ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, 0);
> - break;
> + m = pdesc.m;
> +
> + if (m == NULL)
> + goto done;
> + }
> +
> + switch (st->key[PF_SK_WIRE]->af) {
> + case AF_INET:
> + ip_output(m, NULL, NULL, 0, NULL, NULL, 0);
> + break;
> #ifdef INET6
> - case AF_INET6:
> - ip6_output(pd->pd_m, NULL, NULL, 0, NULL, NULL);
> - break;
> + case AF_INET6:
> + ip6_output(m, NULL, NULL, 0, NULL, NULL);
> + break;
> #endif /* INET6 */
> - default:
> - unhandled_af(st->key[PF_SK_WIRE]->af);
> - }
> -
> - pd->pd_m = NULL;
> + default:
> + unhandled_af(st->key[PF_SK_WIRE]->af);
> }
>
> +done:
> pf_state_unref(st);
> - m_freem(pd->pd_m);
> pool_put(&pfsync_deferrals_pool, pd);
> }
>
> Index: net/pf.c
> ===================================================================
> RCS file: /cvs/src/sys/net/pf.c,v
> diff -u -p -r1.1201 pf.c
> --- net/pf.c 4 Jul 2024 12:50:08 -0000 1.1201
> +++ net/pf.c 6 Jul 2024 05:16:51 -0000
> @@ -6568,12 +6568,7 @@ void
> pf_route(struct pf_pdesc *pd, struct pf_state *st)
> {
> struct mbuf *m0;
> - struct mbuf_list ml;
> - struct sockaddr_in *dst, sin;
> - struct rtentry *rt = NULL;
> - struct ip *ip;
> - struct ifnet *ifp = NULL;
> - unsigned int rtableid;
> + struct m_tag *mtag;
>
> if (pd->m->m_pkthdr.pf.routed++ > 3) {
> m_freem(pd->m);
> @@ -6582,117 +6577,47 @@ pf_route(struct pf_pdesc *pd, struct pf_
> }
>
> if (st->rt == PF_DUPTO) {
> - if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL)
> - return;
> - } else {
> - if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir))
> + m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT);
> + if (m0 == NULL)
> return;
> + } else if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir))
> + return;
> + else
> m0 = pd->m;
> - pd->m = NULL;
> - }
> -
> - if (m0->m_len < sizeof(struct ip)) {
> - DPFPRINTF(LOG_ERR,
> - "%s: m0->m_len < sizeof(struct ip)", __func__);
> - goto bad;
> - }
>
> - ip = mtod(m0, struct ip *);
> -
> - if (pd->dir == PF_IN) {
> - if (ip->ip_ttl <= IPTTLDEC) {
> - if (st->rt != PF_DUPTO) {
> - pf_send_icmp(m0, ICMP_TIMXCEED,
> - ICMP_TIMXCEED_INTRANS, 0,
> - pd->af, st->rule.ptr, pd->rdomain);
> - }
> - goto bad;
> - }
> - ip->ip_ttl -= IPTTLDEC;
> - }
> -
> - memset(&sin, 0, sizeof(sin));
> - dst = &sin;
> - dst->sin_family = AF_INET;
> - dst->sin_len = sizeof(*dst);
> - dst->sin_addr = st->rt_addr.v4;
> - rtableid = m0->m_pkthdr.ph_rtableid;
> -
> - rt = rtalloc_mpath(sintosa(dst), &ip->ip_src.s_addr, rtableid);
> - if (!rtisvalid(rt)) {
> - if (st->rt != PF_DUPTO) {
> - pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_HOST,
> - 0, pd->af, st->rule.ptr, pd->rdomain);
> - }
> - ipstat_inc(ips_noroute);
> - goto bad;
> - }
> -
> - ifp = if_get(rt->rt_ifidx);
> - if (ifp == NULL)
> - goto bad;
> -
> - /* A locally generated packet may have invalid source address. */
> - if ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET &&
> - (ifp->if_flags & IFF_LOOPBACK) == 0)
> - ip->ip_src = ifatoia(rt->rt_ifa)->ia_addr.sin_addr;
> -
> - if (st->rt != PF_DUPTO && pd->dir == PF_IN) {
> - if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS)
> - goto bad;
> - else if (m0 == NULL)
> - goto done;
> - if (m0->m_len < sizeof(struct ip)) {
> - DPFPRINTF(LOG_ERR,
> - "%s: m0->m_len < sizeof(struct ip)", __func__);
> - goto bad;
> - }
> - ip = mtod(m0, struct ip *);
> + mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTE, NULL);
> + if (mtag == NULL) {
> + mtag = m_tag_get(PACKET_TAG_PF_ROUTE, sizeof(st->rt_addr),
> + M_NOWAIT);
> + if (mtag == NULL)
> + goto drop;
> }
>
> - if (if_output_tso(ifp, &m0, sintosa(dst), rt, ifp->if_mtu) ||
> - m0 == NULL)
> - goto done;
> + *(struct pf_addr *)(mtag + 1) = st->rt_addr;
> + m_tag_prepend(m0, mtag);
>
> - /*
> - * Too large for interface; fragment if possible.
> - * Must be able to put at least 8 bytes per fragment.
> - */
> - if (ip->ip_off & htons(IP_DF)) {
> - ipstat_inc(ips_cantfrag);
> - if (st->rt != PF_DUPTO)
> - pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
> - ifp->if_mtu, pd->af, st->rule.ptr, pd->rdomain);
> - goto bad;
> - }
> + if (st->rt == PF_DUPTO) {
> + SET(m0->m_pkthdr.pf.flags, PF_TAG_GENERATED);
>
> - if (ip_fragment(m0, &ml, ifp, ifp->if_mtu) ||
> - if_output_ml(ifp, &ml, sintosa(dst), rt))
> - goto done;
> - ipstat_inc(ips_fragmented);
> + ip_output(m0, NULL, NULL, 0, NULL, NULL, 0);
> + } else if (pd->dir == PF_OUT)
> + SET(m0->m_pkthdr.pf.flags, PF_TAG_REROUTE);
>
> -done:
> - if_put(ifp);
> - rtfree(rt);
> return;
> -
> -bad:
> +drop:
> + if (m0 == pd->m)
> + pd->m = NULL;
> m_freem(m0);
> - goto done;
> }
>
> #ifdef INET6
> -/* pf_route6() may change pd->m, adjust local copies after calling */
> void
> pf_route6(struct pf_pdesc *pd, struct pf_state *st)
> {
> struct mbuf *m0;
> - struct sockaddr_in6 *dst, sin6;
> - struct rtentry *rt = NULL;
> - struct ip6_hdr *ip6;
> - struct ifnet *ifp = NULL;
> struct m_tag *mtag;
> - unsigned int rtableid;
> +
> +printf("%s\n", __func__);
>
> if (pd->m->m_pkthdr.pf.routed++ > 3) {
> m_freem(pd->m);
> @@ -6701,101 +6626,37 @@ pf_route6(struct pf_pdesc *pd, struct pf
> }
>
> if (st->rt == PF_DUPTO) {
> - if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL)
> - return;
> - } else {
> - if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir))
> + m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT);
> + if (m0 == NULL)
> return;
> + } else if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir))
> + return;
> + else
> m0 = pd->m;
> - pd->m = NULL;
> - }
>
> - if (m0->m_len < sizeof(struct ip6_hdr)) {
> - DPFPRINTF(LOG_ERR,
> - "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__);
> - goto bad;
> + mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTE, NULL);
> + if (mtag == NULL) {
> + mtag = m_tag_get(PACKET_TAG_PF_ROUTE, sizeof(st->rt_addr),
> + M_NOWAIT);
> + if (mtag == NULL)
> + goto drop;
> }
> - ip6 = mtod(m0, struct ip6_hdr *);
>
> - if (pd->dir == PF_IN) {
> - if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
> - if (st->rt != PF_DUPTO) {
> - pf_send_icmp(m0, ICMP6_TIME_EXCEEDED,
> - ICMP6_TIME_EXCEED_TRANSIT, 0,
> - pd->af, st->rule.ptr, pd->rdomain);
> - }
> - goto bad;
> - }
> - ip6->ip6_hlim -= IPV6_HLIMDEC;
> - }
> -
> - memset(&sin6, 0, sizeof(sin6));
> - dst = &sin6;
> - dst->sin6_family = AF_INET6;
> - dst->sin6_len = sizeof(*dst);
> - dst->sin6_addr = st->rt_addr.v6;
> - rtableid = m0->m_pkthdr.ph_rtableid;
> -
> - rt = rtalloc_mpath(sin6tosa(dst), &ip6->ip6_src.s6_addr32[0],
> - rtableid);
> - if (!rtisvalid(rt)) {
> - if (st->rt != PF_DUPTO) {
> - pf_send_icmp(m0, ICMP6_DST_UNREACH,
> - ICMP6_DST_UNREACH_NOROUTE, 0,
> - pd->af, st->rule.ptr, pd->rdomain);
> - }
> - ip6stat_inc(ip6s_noroute);
> - goto bad;
> - }
> -
> - ifp = if_get(rt->rt_ifidx);
> - if (ifp == NULL)
> - goto bad;
> -
> - /* A locally generated packet may have invalid source address. */
> - if (IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) &&
> - (ifp->if_flags & IFF_LOOPBACK) == 0)
> - ip6->ip6_src = ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr;
> -
> - if (st->rt != PF_DUPTO && pd->dir == PF_IN) {
> - if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS)
> - goto bad;
> - else if (m0 == NULL)
> - goto done;
> - if (m0->m_len < sizeof(struct ip6_hdr)) {
> - DPFPRINTF(LOG_ERR,
> - "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__);
> - goto bad;
> - }
> - }
> + *(struct pf_addr *)(mtag + 1) = st->rt_addr;
> + m_tag_prepend(m0, mtag);
>
> - /*
> - * If packet has been reassembled by PF earlier, we have to
> - * use pf_refragment6() here to turn it back to fragments.
> - */
> - if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) {
> - (void) pf_refragment6(&m0, mtag, dst, ifp, rt);
> - goto done;
> - }
> -
> - if (if_output_tso(ifp, &m0, sin6tosa(dst), rt, ifp->if_mtu) ||
> - m0 == NULL)
> - goto done;
> + if (st->rt == PF_DUPTO) {
> + SET(m0->m_pkthdr.pf.flags, PF_TAG_GENERATED);
>
> - ip6stat_inc(ip6s_cantfrag);
> - if (st->rt != PF_DUPTO)
> - pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0,
> - ifp->if_mtu, pd->af, st->rule.ptr, pd->rdomain);
> - goto bad;
> + ip6_output(m0, NULL, NULL, 0, NULL, NULL);
> + } else if (pd->dir == PF_OUT)
> + SET(m0->m_pkthdr.pf.flags, PF_TAG_REROUTE);
>
> -done:
> - if_put(ifp);
> - rtfree(rt);
> return;
> -
> -bad:
> +drop:
> + if (m0 == pd->m)
> + pd->m = NULL;
> m_freem(m0);
> - goto done;
> }
> #endif /* INET6 */
>
> @@ -8059,6 +7920,9 @@ pf_ouraddr(struct mbuf *m)
> {
> struct pf_state_key *sk;
>
> + if (ISSET(m->m_pkthdr.ph_tagsset, PACKET_TAG_PF_ROUTE))
> + return (0);
> +
> if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED)
> return (1);
>
> @@ -8078,6 +7942,16 @@ pf_ouraddr(struct mbuf *m)
> void
> pf_pkt_addr_changed(struct mbuf *m)
> {
> + struct m_tag *mtag;
> +
> + mtag = m_tag_find(m, PACKET_TAG_PF_ROUTE, NULL);
> + if (mtag != NULL) {
> + m_tag_delete(m, mtag);
> +
> + KASSERTMSG(m_tag_find(m, PACKET_TAG_PF_ROUTE, NULL) == NULL,
> + "mbuf %p had multiple PACKET_TAG_PF_ROUTE mbuf tags", m);
> + }
> +
> pf_mbuf_unlink_state_key(m);
> pf_mbuf_unlink_inpcb(m);
> }
> Index: netinet/ip_output.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ip_output.c,v
> diff -u -p -r1.401 ip_output.c
> --- netinet/ip_output.c 2 Jul 2024 18:33:47 -0000 1.401
> +++ netinet/ip_output.c 6 Jul 2024 05:16:51 -0000
> @@ -110,7 +110,9 @@ ip_output(struct mbuf *m, struct mbuf *o
> struct sockaddr_in *dst;
> struct tdb *tdb = NULL;
> u_long mtu;
> + struct in_addr *rt_dst;
> #if NPF > 0
> + struct m_tag *rt_mtag;
> u_int orig_rtableid;
> #endif
>
> @@ -128,7 +130,7 @@ ip_output(struct mbuf *m, struct mbuf *o
> /*
> * Fill in IP header.
> */
> - if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
> + if (!ISSET(flags, IP_FORWARDING|IP_RAWOUTPUT)) {
> ip->ip_v = IPVERSION;
> ip->ip_off &= htons(IP_DF);
> ip->ip_id = htons(ip_randomid());
> @@ -151,6 +153,7 @@ ip_output(struct mbuf *m, struct mbuf *o
> orig_rtableid = m->m_pkthdr.ph_rtableid;
> reroute:
> #endif
> + rt_dst = &ip->ip_dst;
>
> /*
> * Do a route lookup now in case we need the source address to
> @@ -163,11 +166,19 @@ reroute:
> ro->ro_rt = NULL;
> }
>
> +#if NPF > 0
> + rt_mtag = m_tag_find(m, PACKET_TAG_PF_ROUTE, NULL);
> + if (rt_mtag != NULL) {
> + struct pf_addr *rt_addr = (struct pf_addr *)(rt_mtag + 1);
> + rt_dst = &rt_addr->v4;
> + }
> +#endif
> +
> /*
> * If there is a cached route, check that it is to the same
> * destination and is still up. If not, free it and try again.
> */
> - route_cache(ro, &ip->ip_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid);
> + route_cache(ro, rt_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid);
> dst = &ro->ro_dstsin;
>
> if ((IN_MULTICAST(ip->ip_dst.s_addr) ||
> Index: netinet6/ip6_forward.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet6/ip6_forward.c,v
> diff -u -p -r1.120 ip6_forward.c
> --- netinet6/ip6_forward.c 4 Jul 2024 12:50:08 -0000 1.120
> +++ netinet6/ip6_forward.c 6 Jul 2024 05:16:51 -0000
> @@ -95,6 +95,10 @@ ip6_forward(struct mbuf *m, struct route
> struct tdb *tdb = NULL;
> #endif /* IPSEC */
> char src6[INET6_ADDRSTRLEN], dst6[INET6_ADDRSTRLEN];
> + struct in6_addr *rt_dst;
> +#if NPF > 0
> + struct m_tag *rt_mtag;
> +#endif
>
> /*
> * Do not forward packets to multicast destination (should be handled
> @@ -166,11 +170,20 @@ reroute:
> }
> #endif /* IPSEC */
>
> + rt_dst = &ip6->ip6_dst;
> +#if NPF > 0
> + rt_mtag = m_tag_find(m, PACKET_TAG_PF_ROUTE, NULL);
> + if (rt_mtag != NULL) {
> + struct pf_addr *rt_addr = (struct pf_addr *)(rt_mtag + 1);
> + rt_dst = &rt_addr->v6;
> + }
> +#endif
> +
> if (ro == NULL) {
> ro = &iproute;
> ro->ro_rt = NULL;
> }
> - rt = route6_mpath(ro, &ip6->ip6_dst, &ip6->ip6_src,
> + rt = route6_mpath(ro, rt_dst, &ip6->ip6_src,
> m->m_pkthdr.ph_rtableid);
> if (rt == NULL) {
> ip6stat_inc(ip6s_noroute);
> Index: netinet6/ip6_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
> diff -u -p -r1.264 ip6_input.c
> --- netinet6/ip6_input.c 4 Jul 2024 12:50:08 -0000 1.264
> +++ netinet6/ip6_input.c 6 Jul 2024 05:16:51 -0000
> @@ -364,7 +364,9 @@ ip6_input_if(struct mbuf **mp, int *offp
> u_int16_t src_scope, dst_scope;
> #if NPF > 0
> struct in6_addr odst;
> + struct m_tag *rt_mtag;
> #endif
> + struct in6_addr *rt_dst;
> int flags = 0;
>
> KASSERT(*offp == 0);
> @@ -523,11 +525,19 @@ ip6_input_if(struct mbuf **mp, int *offp
> goto out;
> }
>
> -
> /*
> * Unicast check
> */
> - rt = route6_mpath(&ro, &ip6->ip6_dst, &ip6->ip6_src,
> + rt_dst = &ip6->ip6_dst;
> +#if NPF > 0
> + rt_mtag = m_tag_find(m, PACKET_TAG_PF_ROUTE, NULL);
> + if (rt_mtag != NULL) {
> + struct pf_addr *rt_addr = (struct pf_addr *)(rt_mtag + 1);
> + rt_dst = &rt_addr->v6;
> + }
> +#endif
> +
> + rt = route6_mpath(&ro, rt_dst, &ip6->ip6_src,
> m->m_pkthdr.ph_rtableid);
>
> /*
> Index: netinet6/ip6_output.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet6/ip6_output.c,v
> diff -u -p -r1.292 ip6_output.c
> --- netinet6/ip6_output.c 4 Jul 2024 12:50:08 -0000 1.292
> +++ netinet6/ip6_output.c 6 Jul 2024 05:16:51 -0000
> @@ -177,6 +177,7 @@ ip6_output(struct mbuf *m, struct ip6_pk
> u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
> struct ip6_exthdrs exthdrs;
> struct in6_addr finaldst;
> + struct in6_addr *rt_dst;
> struct route *ro_pmtu = NULL;
> int hdrsplit = 0;
> u_int8_t sproto = 0;
> @@ -184,6 +185,9 @@ ip6_output(struct mbuf *m, struct ip6_pk
> #ifdef IPSEC
> struct tdb *tdb = NULL;
> #endif /* IPSEC */
> +#if NPF > 0
> + struct m_tag *rt_mtag;
> +#endif
>
> ip6 = mtod(m, struct ip6_hdr *);
> finaldst = ip6->ip6_dst;
> @@ -387,6 +391,7 @@ ip6_output(struct mbuf *m, struct ip6_pk
> #if NPF > 0
> reroute:
> #endif
> + rt_dst = &ip6->ip6_dst;
>
> /* initialize cached route */
> if (ro == NULL) {
> @@ -456,8 +461,16 @@ reroute:
> ifp = if_get(im6o->im6o_ifidx);
> }
>
> +#if NPF > 0
> + rt_mtag = m_tag_find(m, PACKET_TAG_PF_ROUTE, NULL);
> + if (rt_mtag != NULL) {
> + struct pf_addr *rt_addr = (struct pf_addr *)(rt_mtag + 1);
> + rt_dst = &rt_addr->v6;
> + }
> +#endif
> +
> if (ifp == NULL) {
> - rt = in6_selectroute(&ip6->ip6_dst, opt, ro,
> + rt = in6_selectroute(rt_dst, opt, ro,
> m->m_pkthdr.ph_rtableid);
> if (rt == NULL) {
> ip6stat_inc(ip6s_noroute);
> @@ -480,7 +493,7 @@ reroute:
> goto bad;
> }
> } else {
> - route6_cache(ro, &ip6->ip6_dst, NULL, m->m_pkthdr.ph_rtableid);
> + route6_cache(ro, rt_dst, NULL, m->m_pkthdr.ph_rtableid);
> }
>
> if (rt && (rt->rt_flags & RTF_GATEWAY) &&
> Index: sys/mbuf.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/mbuf.h,v
> diff -u -p -r1.263 mbuf.h
> --- sys/mbuf.h 14 Apr 2024 20:46:27 -0000 1.263
> +++ sys/mbuf.h 6 Jul 2024 05:16:51 -0000
> @@ -477,6 +478,7 @@ struct m_tag *m_tag_next(struct mbuf *,
> #define PACKET_TAG_GRE 0x0080 /* GRE processing done */
> #define PACKET_TAG_DLT 0x0100 /* data link layer type */
> #define PACKET_TAG_PF_DIVERT 0x0200 /* pf(4) diverted packet */
> +#define PACKET_TAG_PF_ROUTE 0x0400 /* pf(4) route-to */
> #define PACKET_TAG_PF_REASSEMBLED 0x0800 /* pf reassembled ipv6 packet */
> #define PACKET_TAG_SRCROUTE 0x1000 /* IPv4 source routing options */
> #define PACKET_TAG_TUNNEL 0x2000 /* Tunnel endpoint address */
pf_route simplification, or pf_route vs pfsync