Index | Thread | Search

From:
YASUOKA Masahiko <yasuoka@openbsd.org>
Subject:
diff: pf least-states
To:
sashan@openbsd.org, tech@openbsd.org
Date:
Thu, 14 Aug 2025 20:08:45 +0900

Download raw body.

Thread
Hello,

"least-states" doesn't count the number of states properly.  When the
state is removed, the state counter must be decremented propery.  But
it doesn't in most setups, the counter actually is the cumulative
value. Then if we add an address to the table, pf will select the
newly added the address always until its number of the state reaches
the counter value of the other addresses.  This is a problem.

Let's see sys/net/pf_lb.c:

879 int
880 pf_postprocess_addr(struct pf_state *cur)
881 {
:
888         nr = cur->natrule.ptr;
889 
890         if (nr == NULL)
891                 return (0);

When "least-state" is used for "route-to", nr might be NULL.  Then the
counter will not be decremented.

892 
893         /* decrease counter */
894 
895         sks = cur->key[PF_SK_STACK];
896 
897         /* check for outgoing or ingoing balancing */
898         if (nr->rt == PF_ROUTETO)
899                 lookup_addr = cur->rt_addr;
900         else if (sks != NULL)
901                 lookup_addr = sks->addr[1];

#895, this assumes the state is "in" direction because the NATed
address will be PF_SK_WIRE for "out" direction.

#898 and #900, the conditions don't consider the case if both
 "route-to" and "nat-to" might be done for the same state.

And so on.

The diff considers all cases I know.  And tested them roughly.

comments?
ok?

Index: sys/net/pf_lb.c
===================================================================
RCS file: /var/cvs/openbsd/src/sys/net/pf_lb.c,v
retrieving revision 1.75
diff -u -p -r1.75 pf_lb.c
--- sys/net/pf_lb.c	7 Jul 2025 02:28:50 -0000	1.75
+++ sys/net/pf_lb.c	14 Aug 2025 07:47:49 -0000
@@ -77,6 +77,8 @@ int			 pf_map_addr_sticky(sa_family_t, s
 			    struct pf_addr *, struct pf_addr *,
 			    struct pf_src_node **, struct pf_pool *,
 			    enum pf_sn_types);
+int			 pf_pool_states_decrease_addr(struct pf_pool *, int,
+			    struct pf_addr *);
 
 u_int64_t
 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
@@ -706,6 +708,12 @@ pf_get_transaddr(struct pf_rule *r, stru
 			    r->nat.proxy_port[1]);
 			return (-1);
 		}
+		/* decrease least-connection state counter of the previous */
+		if ((*nr) != NULL && (*nr)->nat.addr.type != PF_ADDR_NONE &&
+		    ((*nr)->nat.opts & PF_POOL_TYPEMASK) ==
+		    PF_POOL_LEASTSTATES)
+			pf_pool_states_decrease_addr(&(*nr)->nat, pd->af,
+			    &pd->nsaddr);
 		*nr = r;
 		pf_addrcpy(&pd->nsaddr, &naddr, pd->af);
 		pd->nsport = nport;
@@ -735,6 +743,12 @@ pf_get_transaddr(struct pf_rule *r, stru
 			nport = htons((u_int16_t)tmp_nport);
 		} else if (r->rdr.proxy_port[0])
 			nport = htons(r->rdr.proxy_port[0]);
+		/* decrease least-connection state counter of the previous */
+		if ((*nr) != NULL && (*nr)->rdr.addr.type != PF_ADDR_NONE &&
+		    ((*nr)->rdr.opts & PF_POOL_TYPEMASK) ==
+		    PF_POOL_LEASTSTATES)
+			pf_pool_states_decrease_addr(&(*nr)->rdr, pd->af,
+			    &pd->ndaddr);
 		*nr = r;
 		pf_addrcpy(&pd->ndaddr, &naddr, pd->af);
 		if (nport)
@@ -879,70 +893,102 @@ pf_get_transaddr_af(struct pf_rule *r, s
 int
 pf_postprocess_addr(struct pf_state *cur)
 {
-	struct pf_rule		*nr;
 	struct pf_state_key	*sks;
-	struct pf_pool		 rpool;
-	struct pf_addr		 lookup_addr;
-	int			 slbcount = -1;
-
-	nr = cur->natrule.ptr;
-
-	if (nr == NULL)
-		return (0);
-
-	/* decrease counter */
-
-	sks = cur->key[PF_SK_STACK];
-
-	/* check for outgoing or ingoing balancing */
-	if (nr->rt == PF_ROUTETO)
-		lookup_addr = cur->rt_addr;
-	else if (sks != NULL)
-		lookup_addr = sks->addr[1];
-	else {
-		if (pf_status.debug >= LOG_DEBUG) {
-			log(LOG_DEBUG, "pf: %s: unable to obtain address",
-			    __func__);
+	struct pf_addr		*addr;
+	struct pf_pool		*rpool, *natpl = NULL, *rdrpl = NULL;
+	struct pf_rule_item	*ri;
+	int			 ret = 0;
+
+	/* decrease the states counter in pool for "least-states" */
+	if (cur->natrule.ptr != NULL) { /* this is the final */
+		if (cur->natrule.ptr->nat.addr.type != PF_ADDR_NONE)
+			natpl = &cur->natrule.ptr->nat;
+		if (cur->natrule.ptr->rdr.addr.type != PF_ADDR_NONE)
+			rdrpl = &cur->natrule.ptr->rdr;
+	}
+	if (natpl == NULL || rdrpl == NULL) {
+		/* nat or rdr might be done by a previous "match" */
+		SLIST_FOREACH(ri, &cur->match_rules, entry) {
+			/* first match since the list order is reversed */
+			if (natpl == NULL &&
+			    ri->r->nat.addr.type != PF_ADDR_NONE)
+				natpl = &ri->r->nat;
+			if (rdrpl == NULL &&
+			    ri->r->rdr.addr.type != PF_ADDR_NONE)
+				rdrpl = &ri->r->rdr;
+			if (natpl != NULL && rdrpl != NULL)
+				break;
+		}
+	}
+	if (natpl != NULL &&
+	    (natpl->opts & PF_POOL_TYPEMASK) == PF_POOL_LEASTSTATES) {
+		/* nat-to <table> least-state */
+		if (cur->direction == PF_IN) {
+			sks = cur->key[PF_SK_STACK];
+			addr = &sks->addr[0];
+		} else {
+			sks = cur->key[PF_SK_WIRE];
+			addr = &sks->addr[1];
+		}
+		if (pf_pool_states_decrease_addr(natpl, sks->af, addr) != 0)
+			ret = 1;
+	}
+	if (rdrpl != NULL &&
+	    (rdrpl->opts & PF_POOL_TYPEMASK) == PF_POOL_LEASTSTATES) {
+		/* rdr <table> least-state */
+		if (cur->direction == PF_IN) {
+			sks = cur->key[PF_SK_STACK];
+			addr = &sks->addr[1];
+		} else {
+			sks = cur->key[PF_SK_WIRE];
+			addr = &sks->addr[0];
+		}
+		if (pf_pool_states_decrease_addr(rdrpl, sks->af, addr) != 0)
+			ret = 1;
+	}
+	if (cur->rule.ptr != NULL) {
+		/* route-to <table> least-state */
+		rpool = &cur->rule.ptr->route;
+		if (rpool->addr.type != PF_ADDR_NONE &&
+		    (rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_LEASTSTATES) {
+			sks = cur->key[(cur->direction == PF_IN)
+			    ? PF_SK_STACK : PF_SK_WIRE];
+			addr = &cur->rt_addr;
+			KASSERT(sks != NULL);
+			if (pf_pool_states_decrease_addr(rpool, sks->af, addr)
+			    != 0)
+				ret = 1;
 		}
-		return (1);
 	}
 
-	/* check for appropriate pool */
-	if (nr->rdr.addr.type != PF_ADDR_NONE)
-		rpool = nr->rdr;
-	else if (nr->nat.addr.type != PF_ADDR_NONE)
-		rpool = nr->nat;
-	else if (nr->route.addr.type != PF_ADDR_NONE)
-		rpool = nr->route;
-	else
-		return (0);
+	return (ret);
+}
 
-	if (((rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_LEASTSTATES))
-		return (0);
+int
+pf_pool_states_decrease_addr(struct pf_pool *rpool, int af,
+    struct pf_addr *addr)
+{
+	int	 slbcount = -1;
 
-	if (rpool.addr.type == PF_ADDR_TABLE) {
+	if (rpool->addr.type == PF_ADDR_TABLE) {
 		if ((slbcount = pfr_states_decrease(
-		    rpool.addr.p.tbl,
-		    &lookup_addr, sks->af)) == -1) {
+		    rpool->addr.p.tbl, addr, af)) == -1) {
 			if (pf_status.debug >= LOG_DEBUG) {
 				log(LOG_DEBUG, "pf: %s: selected address ",
 				    __func__);
-				pf_print_host(&lookup_addr,
-				    sks->port[0], sks->af);
+				pf_print_host(addr, 0, af);
 				addlog(". Failed to "
 				    "decrease count!\n");
 			}
 			return (1);
 		}
-	} else if (rpool.addr.type == PF_ADDR_DYNIFTL) {
+	} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
 		if ((slbcount = pfr_states_decrease(
-		    rpool.addr.p.dyn->pfid_kt,
-		    &lookup_addr, sks->af)) == -1) {
+		    rpool->addr.p.dyn->pfid_kt, addr, af)) == -1) {
 			if (pf_status.debug >= LOG_DEBUG) {
 				log(LOG_DEBUG, "pf: %s: selected address ",
 				    __func__);
-				pf_print_host(&lookup_addr,
-				    sks->port[0], sks->af);
+				pf_print_host(addr, 0, af);
 				addlog(". Failed to "
 				    "decrease count!\n");
 			}
@@ -952,11 +998,11 @@ pf_postprocess_addr(struct pf_state *cur
 	if (slbcount > -1) {
 		if (pf_status.debug >= LOG_INFO) {
 			log(LOG_INFO, "pf: %s: selected address ", __func__);
-			pf_print_host(&lookup_addr, sks->port[0],
-			    sks->af);
+			pf_print_host(addr, 0, af);
 			addlog(" decreased state count to %u\n",
 			    slbcount);
 		}
 	}
+
 	return (0);
 }