Index | Thread | Search

From:
David Gwynne <david@gwynne.id.au>
Subject:
pf: make af-to less magical
To:
tech@openbsd.org
Date:
Fri, 16 Jan 2026 12:11:57 +1000

Download raw body.

Thread
  • David Gwynne:

    pf: make af-to less magical

i only recently figured out that af-to is very special in pf, but i dont
think it should be.

currently af-to has the following restrictions:

1. it only works for incoming packets, ie, you can only use it on "pass
in" rules in pf.

2. it forces the translated packet to be forwarded.

a consequence of these, and 2 in particular, is that only one state is
created for an af-to connection over the firewall. this is unlike other
forwarded connections where there's generally two states created, one
when the packet comes in from the wire into the stack, and another when
the packet goes out from the stack to the wire.

this difference means that af-to is special cased in pf a lot. i'm
arguing that it would be better if we made af-to less special, which
is what this diff does.

the important bit here is af-to no longer forces forwarding of
packets. this is implemented by having pf_test called by ip_input
push the translated packet into ip6_input instead of ip6_forward.
this leads to a problem where we need to skip processing by pf_test
in this nested ip6_input call after the packet has been translated.

this is the same problem we already have in ip_output and ip6_output
when we redirect or reroute outgoing packets, which is solved using
a little state machine with the PF_TAG_GENERATED and PF_TAG_REROUTE
flags on mbufs. this diff makes this state machine work on the
ip_input and ip6_input side to support af-to.

because the translated packets are now handed to the other
ip_input/ip6_input handler, it allows the local tcp/udp/etc stack
to handle these translated packets like we do for rdr-to.

we can also use this same semantic to support translation for "pass
out" rules. this allows us to translate ipv4 or ipv6 packets from
the local network stack to the other address family, and it Just
Works(tm).

there is a downside from an operator point of view though. because af-to
no longer forces forwarding, forwarded connections now require rules to
allow the outgoing traffic to pass. ie, this worked before:

block
pass in on em0 inet proto udp \
	from 192.168.1.1 to 192.168.2.1 \
	af-to from 2407::1 to 2407:2::1

but now you need to add this rule too:

pass out inet6 proto udp \
	from 2407::1 to 2407:2::1

however, the code is so much simpler. we no longer have to special
case and swap key indices around to cope with there only being one
af-to state to cover both sides of a forwarded connection anymore.

thoughts?

i've only done minimal testing. my interest is mostly in making the code
simpler, so i'd appreciate feedback from people who actually use af-to
about whether it still works or not.

Index: sbin/pfctl/parse.y
===================================================================
RCS file: /cvs/src/sbin/pfctl/parse.y,v
diff -u -p -r1.723 parse.y
--- sbin/pfctl/parse.y	7 Jan 2026 13:50:05 -0000	1.723
+++ sbin/pfctl/parse.y	14 Jan 2026 04:42:10 -0000
@@ -4462,10 +4462,6 @@ rule_consistent(struct pf_rule *r)
 			   "are not supported on match rules");
 			problems++;
 		}
-		if (r->rule_flag & PFRULE_AFTO) {
-			yyerror("af-to is not supported on match rules");
-			problems++;
-		}
 		break;
 	case PF_DROP:
 		if (r->rt) {
@@ -6485,10 +6481,6 @@ filteropts_to_rule(struct pf_rule *r, st
 
 	if (opts->marker & FOM_AFTO)
 		r->rule_flag |= PFRULE_AFTO;
-	if ((opts->marker & FOM_AFTO) && r->direction != PF_IN) {
-		yyerror("af-to can only be used with direction in");
-		return (1);
-	}
 	if ((opts->marker & FOM_AFTO) && opts->rt) {
 		yyerror("af-to cannot be used together with "
 		    "route-to, reply-to, dup-to");
Index: sys/net/pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf.c,v
diff -u -p -r1.1230 pf.c
--- sys/net/pf.c	7 Jan 2026 13:50:05 -0000	1.1230
+++ sys/net/pf.c	14 Jan 2026 04:42:10 -0000
@@ -1025,44 +1025,41 @@ pf_state_key_attach(struct pf_state_key 
 		/* key exists. check for same kif, if none, add to key */
 		TAILQ_FOREACH(si, &cur->sk_states, si_entry) {
 			struct pf_state *sist = si->si_st;
-			if (sist->kif == st->kif &&
-			    ((sist->key[PF_SK_WIRE]->af == sk->af &&
-			     sist->direction == st->direction) ||
-			    (sist->key[PF_SK_WIRE]->af !=
-			     sist->key[PF_SK_STACK]->af &&
-			     sk->af == sist->key[PF_SK_STACK]->af &&
-			     sist->direction != st->direction))) {
-				int reuse = 0;
-
-				if (sk->proto == IPPROTO_TCP &&
-				    sist->src.state >= TCPS_FIN_WAIT_2 &&
-				    sist->dst.state >= TCPS_FIN_WAIT_2)
-					reuse = 1;
-				if (pf_status.debug >= LOG_NOTICE) {
-					log(LOG_NOTICE,
-					    "pf: %s key attach %s on %s: ",
-					    (idx == PF_SK_WIRE) ?
-					    "wire" : "stack",
-					    reuse ? "reuse" : "failed",
-					    st->kif->pfik_name);
-					pf_print_state_parts(st,
-					    (idx == PF_SK_WIRE) ?  sk : NULL,
-					    (idx == PF_SK_STACK) ?  sk : NULL);
-					addlog(", existing: ");
-					pf_print_state_parts(sist,
-					    (idx == PF_SK_WIRE) ?  sk : NULL,
-					    (idx == PF_SK_STACK) ?  sk : NULL);
-					addlog("\n");
-				}
-				if (reuse) {
-					pf_set_protostate(sist, PF_PEER_BOTH,
-					    TCPS_CLOSED);
-					/* remove late or sks can go away */
-					oldst = sist;
-				} else {
-					pf_state_key_unref(sk);
-					return (NULL);	/* collision! */
-				}
+			int reuse = 0;
+
+			if (sist->kif != st->kif)
+				continue;
+			if (sist->direction != st->direction)
+				continue;
+
+			if (sk->proto == IPPROTO_TCP &&
+			    sist->src.state >= TCPS_FIN_WAIT_2 &&
+			    sist->dst.state >= TCPS_FIN_WAIT_2)
+				reuse = 1;
+			if (pf_status.debug >= LOG_NOTICE) {
+				log(LOG_NOTICE,
+				    "pf: %s key attach %s on %s: ",
+				    (idx == PF_SK_WIRE) ?
+				    "wire" : "stack",
+				    reuse ? "reuse" : "failed",
+				    st->kif->pfik_name);
+				pf_print_state_parts(st,
+				    (idx == PF_SK_WIRE) ?  sk : NULL,
+				    (idx == PF_SK_STACK) ?  sk : NULL);
+				addlog(", existing: ");
+				pf_print_state_parts(sist,
+				    (idx == PF_SK_WIRE) ?  sk : NULL,
+				    (idx == PF_SK_STACK) ?  sk : NULL);
+				addlog("\n");
+			}
+			if (reuse) {
+				pf_set_protostate(sist, PF_PEER_BOTH,
+				    TCPS_CLOSED);
+				/* remove late or sks can go away */
+				oldst = sist;
+			} else {
+				pf_state_key_unref(sk);
+				return (NULL);	/* collision! */
 			}
 		}
 
@@ -1239,11 +1236,11 @@ pf_state_key_setup(struct pf_pdesc *pd, 
 			pf_state_key_unref(sk1);
 			return (ENOMEM);
 		}
-		pf_state_key_addr_setup(pd, sk2, afto ? pd->didx : pd->sidx,
-		    &pd->nsaddr, afto ? pd->sidx : pd->didx, &pd->ndaddr,
+		pf_state_key_addr_setup(pd, sk2, pd->sidx,
+		    &pd->nsaddr, pd->didx, &pd->ndaddr,
 		    pd->naf, 0);
-		sk2->port[afto ? pd->didx : pd->sidx] = pd->nsport;
-		sk2->port[afto ? pd->sidx : pd->didx] = pd->ndport;
+		sk2->port[pd->sidx] = pd->nsport;
+		sk2->port[pd->didx] = pd->ndport;
 		if (afto) {
 			switch (pd->proto) {
 			case IPPROTO_ICMP:
@@ -1479,25 +1476,8 @@ pf_find_state(struct pf_pdesc *pd, struc
 		if (sist->kif != pfi_all && sist->kif != pd->kif)
 			continue;
 
-		/* af-to needs to handled specially */
-		if (sist->key[PF_SK_WIRE]->af == sist->key[PF_SK_STACK]->af) {
-			if (sk != sist->key[didx])
-				continue;
-
-		/* af-to case */
-		} else {
-			/*
-			 * af-to creates state for incoming (PF_IN)
-			 * connections, and then forces forwarding without
-			 * creating an outgoing state. this means the one
-			 * state covers both sides of the stack, so should
-			 * only match when pd dir is PF_IN.
-			 */
-			if (pd->dir != PF_IN)
-				continue;
-
-			/* one of the sist keys has to be sk */
-		}
+		if (sk != sist->key[didx])
+			continue;
 
 		st = sist;
 		break;
@@ -5066,10 +5046,10 @@ pf_test_rule(struct pf_pdesc *pd, struct
 			else
 				sk = skw;
 			rewrite += pf_translate(pd,
-			    &sk->addr[pd->af == pd->naf ? pd->sidx : pd->didx],
-			    sk->port[pd->af == pd->naf ? pd->sidx : pd->didx],
-			    &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx],
-			    sk->port[pd->af == pd->naf ? pd->didx : pd->sidx],
+			    &sk->addr[pd->sidx],
+			    sk->port[pd->sidx],
+			    &sk->addr[pd->didx],
+			    sk->port[pd->didx],
 			    virtual_type, ctx.icmp_dir);
 		}
 
@@ -6150,7 +6130,7 @@ pf_test_state(struct pf_pdesc *pd, struc
 				return (PF_DROP);
 		} else {
 			if (pf_tcp_track_full(pd, stp, reason, &copyback,
-			    PF_REVERSED_KEY((*stp)->key, pd->af)) == PF_DROP)
+			    0) == PF_DROP)
 				return (PF_DROP);
 		}
 		break;
@@ -6191,14 +6171,11 @@ pf_test_state(struct pf_pdesc *pd, struc
 		struct pf_state_key	*nk;
 		int			 afto, sidx, didx;
 
-		if (PF_REVERSED_KEY((*stp)->key, pd->af))
-			nk = (*stp)->key[pd->sidx];
-		else
-			nk = (*stp)->key[pd->didx];
+		nk = (*stp)->key[pd->didx];
 
 		afto = pd->af != nk->af;
-		sidx = afto ? pd->didx : pd->sidx;
-		didx = afto ? pd->sidx : pd->didx;
+		sidx = pd->sidx;
+		didx = pd->didx;
 
 #ifdef INET6
 		if (afto) {
@@ -6271,11 +6248,7 @@ pf_icmp_state_lookup(struct pf_pdesc *pd
 		return (-1);
 
 	/* Is this ICMP message flowing in right direction? */
-	if ((*stp)->key[PF_SK_WIRE]->af != (*stp)->key[PF_SK_STACK]->af)
-		direction = (pd->af == (*stp)->key[PF_SK_WIRE]->af) ?
-		    PF_IN : PF_OUT;
-	else
-		direction = (*stp)->direction;
+	direction = (*stp)->direction;
 	if ((((!inner && direction == pd->dir) ||
 	    (inner && direction != pd->dir)) ?
 	    PF_IN : PF_OUT) != icmp_dir) {
@@ -6340,15 +6313,11 @@ pf_test_state_icmp(struct pf_pdesc *pd, 
 			struct pf_state_key	*nk;
 			int			 afto, sidx, didx;
 
-			if (PF_REVERSED_KEY((*stp)->key, pd->af))
-				nk = (*stp)->key[pd->sidx];
-			else
-				nk = (*stp)->key[pd->didx];
+			nk = (*stp)->key[pd->didx];
 
 			afto = pd->af != nk->af;
-			sidx = afto ? pd->didx : pd->sidx;
-			didx = afto ? pd->sidx : pd->didx;
-			iidx = afto ? !iidx : iidx;
+			sidx = pd->sidx;
+			didx = pd->didx;
 #ifdef	INET6
 			if (afto) {
 				pf_addrcpy(&pd->nsaddr, &nk->addr[sidx],
@@ -6542,21 +6511,11 @@ pf_test_state_icmp(struct pf_pdesc *pd, 
 				return (action);
 
 			if (pd2.dir == (*stp)->direction) {
-				if (PF_REVERSED_KEY((*stp)->key, pd->af)) {
-					src = &(*stp)->src;
-					dst = &(*stp)->dst;
-				} else {
-					src = &(*stp)->dst;
-					dst = &(*stp)->src;
-				}
+				src = &(*stp)->dst;
+				dst = &(*stp)->src;
 			} else {
-				if (PF_REVERSED_KEY((*stp)->key, pd->af)) {
-					src = &(*stp)->dst;
-					dst = &(*stp)->src;
-				} else {
-					src = &(*stp)->src;
-					dst = &(*stp)->dst;
-				}
+				src = &(*stp)->src;
+				dst = &(*stp)->dst;
 			}
 
 			if (src->wscale && dst->wscale)
@@ -6607,14 +6566,11 @@ pf_test_state_icmp(struct pf_pdesc *pd, 
 				struct pf_state_key	*nk;
 				int			 afto, sidx, didx;
 
-				if (PF_REVERSED_KEY((*stp)->key, pd->af))
-					nk = (*stp)->key[pd->sidx];
-				else
-					nk = (*stp)->key[pd->didx];
+				nk = (*stp)->key[pd->didx];
 
 				afto = pd->af != nk->af;
-				sidx = afto ? pd2.didx : pd2.sidx;
-				didx = afto ? pd2.sidx : pd2.didx;
+				sidx = pd2.sidx;
+				didx = pd2.didx;
 
 #ifdef INET6
 				if (afto) {
@@ -6737,14 +6693,11 @@ pf_test_state_icmp(struct pf_pdesc *pd, 
 				struct pf_state_key	*nk;
 				int			 afto, sidx, didx;
 
-				if (PF_REVERSED_KEY((*stp)->key, pd->af))
-					nk = (*stp)->key[pd->sidx];
-				else
-					nk = (*stp)->key[pd->didx];
+				nk = (*stp)->key[pd->didx];
 
 				afto = pd->af != nk->af;
-				sidx = afto ? pd2.didx : pd2.sidx;
-				didx = afto ? pd2.sidx : pd2.didx;
+				sidx = pd2.sidx;
+				didx = pd2.didx;
 
 #ifdef INET6
 				if (afto) {
@@ -6869,15 +6822,11 @@ pf_test_state_icmp(struct pf_pdesc *pd, 
 				struct pf_state_key	*nk;
 				int			 afto, sidx, didx;
 
-				if (PF_REVERSED_KEY((*stp)->key, pd->af))
-					nk = (*stp)->key[pd->sidx];
-				else
-					nk = (*stp)->key[pd->didx];
+				nk = (*stp)->key[pd->didx];
 
 				afto = pd->af != nk->af;
-				sidx = afto ? pd2.didx : pd2.sidx;
-				didx = afto ? pd2.sidx : pd2.didx;
-				iidx = afto ? !iidx : iidx;
+				sidx = pd2.sidx;
+				didx = pd2.didx;
 
 #ifdef INET6
 				if (afto) {
@@ -6988,15 +6937,11 @@ pf_test_state_icmp(struct pf_pdesc *pd, 
 				struct pf_state_key	*nk;
 				int			 afto, sidx, didx;
 
-				if (PF_REVERSED_KEY((*stp)->key, pd->af))
-					nk = (*stp)->key[pd->sidx];
-				else
-					nk = (*stp)->key[pd->didx];
+				nk = (*stp)->key[pd->didx];
 
 				afto = pd->af != nk->af;
-				sidx = afto ? pd2.didx : pd2.sidx;
-				didx = afto ? pd2.sidx : pd2.didx;
-				iidx = afto ? !iidx : iidx;
+				sidx = pd2.sidx;
+				didx = pd2.didx;
 
 				if (afto) {
 					if (nk->af != AF_INET)
@@ -8283,6 +8183,29 @@ pf_test(sa_family_t af, int fwdir, struc
 	u_int32_t		 qid, pqid = 0;
 	int			 have_pf_lock = 0;
 
+#ifdef DIAGNOSTIC
+	if (((*m0)->m_flags & M_PKTHDR) == 0)
+		panic("non-M_PKTHDR is passed to pf_test");
+#endif /* DIAGNOSTIC */
+
+	if ((*m0)->m_pkthdr.pf.flags & PF_TAG_GENERATED) {
+		if ((*m0)->m_pkthdr.pf.flags & PF_TAG_REROUTE) {
+			(*m0)->m_pkthdr.pf.flags &=
+			    ~(PF_TAG_GENERATED|PF_TAG_REROUTE);
+		}
+		return (PF_PASS);
+	}
+
+	if ((*m0)->m_pkthdr.pf.flags & PF_TAG_DIVERTED_PACKET) {
+		(*m0)->m_pkthdr.pf.flags &= ~PF_TAG_DIVERTED_PACKET;
+		return (PF_PASS);
+	}
+
+	if ((*m0)->m_pkthdr.pf.flags & PF_TAG_REFRAGMENTED) {
+		(*m0)->m_pkthdr.pf.flags &= ~PF_TAG_REFRAGMENTED;
+		return (PF_PASS);
+	}
+
 	if (!pf_status.running)
 		return (PF_PASS);
 
@@ -8307,24 +8230,6 @@ pf_test(sa_family_t af, int fwdir, struc
 	if (kif->pfik_flags & PFI_IFLAG_SKIP)
 		return (PF_PASS);
 
-#ifdef DIAGNOSTIC
-	if (((*m0)->m_flags & M_PKTHDR) == 0)
-		panic("non-M_PKTHDR is passed to pf_test");
-#endif /* DIAGNOSTIC */
-
-	if ((*m0)->m_pkthdr.pf.flags & PF_TAG_GENERATED)
-		return (PF_PASS);
-
-	if ((*m0)->m_pkthdr.pf.flags & PF_TAG_DIVERTED_PACKET) {
-		(*m0)->m_pkthdr.pf.flags &= ~PF_TAG_DIVERTED_PACKET;
-		return (PF_PASS);
-	}
-
-	if ((*m0)->m_pkthdr.pf.flags & PF_TAG_REFRAGMENTED) {
-		(*m0)->m_pkthdr.pf.flags &= ~PF_TAG_REFRAGMENTED;
-		return (PF_PASS);
-	}
-
 	action = pf_setup_pdesc(&pd, af, dir, kif, *m0, &reason);
 	if (action != PF_PASS) {
 #if NPFLOG > 0
@@ -8704,48 +8609,19 @@ done:
 			action = PF_DROP;
 			goto out;
 		}
-		pd.m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
+		pd.m->m_pkthdr.pf.flags |= PF_TAG_GENERATED | PF_TAG_REROUTE;
 		switch (pd.naf) {
 		case AF_INET:
 			if (pd.dir == PF_IN) {
-				int flags = IP_REDIRECT;
-
-				switch (atomic_load_int(&ip_forwarding)) {
-				case 2:
-					SET(flags, IP_FORWARDING_IPSEC);
-					/* FALLTHROUGH */
-				case 1:
-					SET(flags, IP_FORWARDING);
-					break;
-				default:
-					ipstat_inc(ips_cantforward);
-					action = PF_DROP;
-					goto out;
-				}
-				if (atomic_load_int(&ip_directedbcast))
-					SET(flags, IP_ALLOWBROADCAST);
-				ip_forward(pd.m, ifp, NULL, flags);
+				pd.m->m_pkthdr.csum_flags |= M_IPV4_CSUM_IN_OK;
+				ipv4_input(ifp, pd.m, NULL);
 			} else
 				ip_output(pd.m, NULL, NULL, 0, NULL, NULL, 0);
 			break;
 		case AF_INET6:
-			if (pd.dir == PF_IN) {
-				int flags = IPV6_REDIRECT;
-
-				switch (atomic_load_int(&ip6_forwarding)) {
-				case 2:
-					SET(flags, IPV6_FORWARDING_IPSEC);
-					/* FALLTHROUGH */
-				case 1:
-					SET(flags, IPV6_FORWARDING);
-					break;
-				default:
-					ip6stat_inc(ip6s_cantforward);
-					action = PF_DROP;
-					goto out;
-				}
-				ip6_forward(pd.m, NULL, flags);
-			} else
+			if (pd.dir == PF_IN)
+				ipv6_input(ifp, pd.m, NULL);
+			else
 				ip6_output(pd.m, NULL, NULL, 0, NULL, NULL);
 			break;
 		}