Index | Thread | Search

From:
Alexandr Nedvedicky <sashan@fastmail.net>
Subject:
Re: dhcpd(8): use UDP sockets instead of BPF
To:
tech@openbsd.org
Date:
Mon, 16 Jun 2025 08:51:34 +0200

Download raw body.

Thread
Hello,

On Sun, Jun 15, 2025 at 11:28:46AM +0200, Florian Obser wrote:
> On 2025-06-13 16:29 +02, Alexander Bluhm <bluhm@openbsd.org> wrote:
> > This brings us closer to IPv6 where you need pf rules for router
> > discovery.  Right now the dhcp rule is stateless as broadcast an
> > multicast addresses do not match.  sashan@ has a diff to make a
> > state for dhcp match on both directions.  pf also handles neigbor
> > discovery special, although it does not work in all situations.
> >
> > So if we switch to dhcpd UDP sockets, I would like to see sashan@'s
> > diff commited.
> 
> I was playing around with sashan's diff a lot during j2k25 and I *think*
> it wasn't working correctly quite yet. But I was juggling too many diffs
> at the time and the network was too noisy to figure out what was going
> on. I try to find some cycles soon to pick this up again.
> 
> (This was in the context of dhcp client side, but I think the tests
> would still be correct and valuable for server side as well.)
> 

    If I remember correctly the diff still fails to handle
    IP renewal requests.

    we need to explain pf(4) how IP addresses for client/server are changing
    in bootp/dhcp protocol. in case of request it is
	0.0.0.0:68 -> 255.255.255.255:67
    then there is a response:
	a.b.c.d:67 -> 255.255.255.255:68

    in case of renewal request the packet looks like:
	w.x.y.z:68 -> 255.255.255.255:67


    the idea is the dhcp/bootp traffic for client should be covered by 'pass
    all' rule.  the semi-working diff is attached for reference.


regards
sashan

--------8<---------------8<---------------8<------------------8<--------
diff --git a/sys/net/pf.c b/sys/net/pf.c
index c0e5be6768f..7f7ddbe44d9 100644
--- a/sys/net/pf.c
+++ b/sys/net/pf.c
@@ -258,6 +258,9 @@ void			 pf_state_key_link_inpcb(struct pf_state_key *,
 void			 pf_state_key_unlink_inpcb(struct pf_state_key *);
 void			 pf_pktenqueue_delayed(void *);
 int32_t			 pf_state_expires(const struct pf_state *, uint8_t);
+int			 pf_get_bootp_status(sa_family_t, uint8_t,
+			    const struct pf_addr *, const struct pf_addr *,
+			    uint16_t, uint16_t);
 
 #if NPFLOG > 0
 void			 pf_log_matches(struct pf_pdesc *, struct pf_rule *,
@@ -702,6 +705,15 @@ pf_state_compare_key(const struct pf_state_key *a,
 		return (diff);
 	if ((diff = a->af - b->af) != 0)
 		return (diff);
+	if ((diff = a->rdomain - b->rdomain) != 0)
+		return (diff);
+
+	/*
+	 * short circuit to match bootp/dhcp request with reply
+	 */
+	if (a->bootp != PF_BOOTP_NONE && b->bootp != PF_BOOTP_NONE)
+		return 0;
+
 	if ((diff = pf_addr_compare(&a->addr[0], &b->addr[0], a->af)) != 0)
 		return (diff);
 	if ((diff = pf_addr_compare(&a->addr[1], &b->addr[1], a->af)) != 0)
@@ -710,8 +722,6 @@ pf_state_compare_key(const struct pf_state_key *a,
 		return (diff);
 	if ((diff = a->port[1] - b->port[1]) != 0)
 		return (diff);
-	if ((diff = a->rdomain - b->rdomain) != 0)
-		return (diff);
 	return (0);
 }
 
@@ -953,14 +963,22 @@ pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw,
 	sk1->af = pd->af;
 	sk1->rdomain = pd->rdomain;
 	sk1->hash = pf_pkt_hash(sk1->af, sk1->proto,
-	    &sk1->addr[0], &sk1->addr[1], sk1->port[0], sk1->port[1]);
+	    &sk1->addr[pd->sidx], &sk1->addr[pd->didx],
+	    sk1->port[pd->sidx], sk1->port[pd->didx]);
+	sk1->bootp = pf_get_bootp_status(sk1->af, sk1->proto,
+	    &sk1->addr[pd->sidx], &sk1->addr[pd->didx],
+	    sk1->port[pd->sidx], sk1->port[pd->didx]);
 	if (rtableid >= 0)
 		wrdom = rtable_l2(rtableid);
 
-	if (PF_ANEQ(&pd->nsaddr, pd->src, pd->af) ||
+	/*
+	 * suppress NAT for dhcp/bootp.
+	 */
+	if (sk1->bootp == PF_BOOTP_NONE &&
+	    (PF_ANEQ(&pd->nsaddr, pd->src, pd->af) ||
 	    PF_ANEQ(&pd->ndaddr, pd->dst, pd->af) ||
 	    pd->nsport != pd->osport || pd->ndport != pd->odport ||
-	    wrdom != pd->rdomain || afto) {	/* NAT/NAT64 */
+	    wrdom != pd->rdomain || afto)) {	/* NAT/NAT64 */
 		if ((sk2 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) {
 			pf_state_key_unref(sk1);
 			return (ENOMEM);
@@ -1123,6 +1141,27 @@ pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b,
 	}
 }
 
+int
+pf_get_bootp_status(sa_family_t af, uint8_t proto,
+    const struct pf_addr *src, const struct pf_addr *dst,
+    uint16_t sport, uint16_t dport)
+{
+	int	status = PF_BOOTP_NONE;
+
+	if (af == AF_INET && proto == IPPROTO_UDP) {
+		if (src->addr32[0] == INADDR_ANY &&
+		    dst->addr32[0] == INADDR_BROADCAST &&
+		    sport == htons(68) && dport == htons(67))
+			status = PF_BOOTP_REQUEST;
+		else if (src->addr32[0] != INADDR_ANY &&
+		    dst->addr32[0] == INADDR_BROADCAST &&
+		    sport == htons(67) && dport == htons(68))
+			status = PF_BOOTP_REPLY;
+	}
+
+	return (status);
+}
+
 int
 pf_find_state(struct pf_pdesc *pd, struct pf_state_key_cmp *key,
     struct pf_state **stp)
@@ -7299,14 +7338,25 @@ pf_pkt_hash(sa_family_t af, uint8_t proto,
 {
 	uint32_t hash;
 
-	hash = src->addr32[0] ^ dst->addr32[0];
+	/*
+	 * don't calculate hash for IPv4 bootp/dhcp as those protocols
+	 * use different IP addresses in requests and replies.
+	 * request is: 0.0.0.0:68 -> 255.255.255.255:67
+	 * reply is: a.b.c.d:67 -> 255.255.255.255:68
+	 */
+	if (pf_get_bootp_status(af, proto, src, dst, sport, dport) !=
+	    PF_BOOTP_NONE) {
+		hash = 0;
+	} else {
+		hash = src->addr32[0] ^ dst->addr32[0];
 #ifdef INET6
-	if (af == AF_INET6) {
-		hash ^= src->addr32[1] ^ dst->addr32[1];
-		hash ^= src->addr32[2] ^ dst->addr32[2];
-		hash ^= src->addr32[3] ^ dst->addr32[3];
-	}
+		if (af == AF_INET6) {
+			hash ^= src->addr32[1] ^ dst->addr32[1];
+			hash ^= src->addr32[2] ^ dst->addr32[2];
+			hash ^= src->addr32[3] ^ dst->addr32[3];
+		}
 #endif
+	}
 
 	switch (proto) {
 	case IPPROTO_TCP:
@@ -7788,6 +7838,8 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
 		key.port[pd.sidx] = pd.osport;
 		key.port[pd.didx] = pd.odport;
 		key.hash = pd.hash;
+		key.bootp = pf_get_bootp_status(pd.af, pd.proto, pd.src,
+		    pd.dst, *pd.sport, *pd.dport);
 
 		PF_STATE_ENTER_READ();
 		action = pf_find_state(&pd, &key, &st);
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 098138a312f..00fb7c286df 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -698,6 +698,12 @@ struct pf_state_peer {
 
 TAILQ_HEAD(pf_state_queue, pf_state);
 
+enum {
+	PF_BOOTP_NONE,
+	PF_BOOTP_REQUEST,
+	PF_BOOTP_REPLY
+};
+
 /* keep synced with struct pf_state_key, used in RB_FIND */
 struct pf_state_key_cmp {
 	struct pf_addr	 addr[2];
@@ -706,6 +712,7 @@ struct pf_state_key_cmp {
 	u_int16_t	 hash;
 	sa_family_t	 af;
 	u_int8_t	 proto;
+	u_int8_t	 bootp;
 };
 
 /* keep synced with struct pf_state, used in RB_FIND */
diff --git a/sys/net/pfvar_priv.h b/sys/net/pfvar_priv.h
index 374ce186738..b962fe4c9eb 100644
--- a/sys/net/pfvar_priv.h
+++ b/sys/net/pfvar_priv.h
@@ -71,6 +71,7 @@ struct pf_state_key {
 	u_int16_t	 hash;
 	sa_family_t	 af;
 	u_int8_t	 proto;
+	u_int8_t	 bootp;
 
 	RBT_ENTRY(pf_state_key)	 sk_entry;
 	struct pf_statelisthead	 sk_states;