From: YASUOKA Masahiko Subject: diff: pf least-states To: sashan@openbsd.org, tech@openbsd.org Date: Thu, 14 Aug 2025 20:08:45 +0900 Hello, "least-states" doesn't count the number of states properly. When the state is removed, the state counter must be decremented propery. But it doesn't in most setups, the counter actually is the cumulative value. Then if we add an address to the table, pf will select the newly added the address always until its number of the state reaches the counter value of the other addresses. This is a problem. Let's see sys/net/pf_lb.c: 879 int 880 pf_postprocess_addr(struct pf_state *cur) 881 { : 888 nr = cur->natrule.ptr; 889 890 if (nr == NULL) 891 return (0); When "least-state" is used for "route-to", nr might be NULL. Then the counter will not be decremented. 892 893 /* decrease counter */ 894 895 sks = cur->key[PF_SK_STACK]; 896 897 /* check for outgoing or ingoing balancing */ 898 if (nr->rt == PF_ROUTETO) 899 lookup_addr = cur->rt_addr; 900 else if (sks != NULL) 901 lookup_addr = sks->addr[1]; #895, this assumes the state is "in" direction because the NATed address will be PF_SK_WIRE for "out" direction. #898 and #900, the conditions don't consider the case if both "route-to" and "nat-to" might be done for the same state. And so on. The diff considers all cases I know. And tested them roughly. comments? ok? Index: sys/net/pf_lb.c =================================================================== RCS file: /var/cvs/openbsd/src/sys/net/pf_lb.c,v retrieving revision 1.75 diff -u -p -r1.75 pf_lb.c --- sys/net/pf_lb.c 7 Jul 2025 02:28:50 -0000 1.75 +++ sys/net/pf_lb.c 14 Aug 2025 07:47:49 -0000 @@ -77,6 +77,8 @@ int pf_map_addr_sticky(sa_family_t, s struct pf_addr *, struct pf_addr *, struct pf_src_node **, struct pf_pool *, enum pf_sn_types); +int pf_pool_states_decrease_addr(struct pf_pool *, int, + struct pf_addr *); u_int64_t pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, @@ -706,6 +708,12 @@ pf_get_transaddr(struct pf_rule *r, stru r->nat.proxy_port[1]); return (-1); } + /* decrease least-connection state counter of the previous */ + if ((*nr) != NULL && (*nr)->nat.addr.type != PF_ADDR_NONE && + ((*nr)->nat.opts & PF_POOL_TYPEMASK) == + PF_POOL_LEASTSTATES) + pf_pool_states_decrease_addr(&(*nr)->nat, pd->af, + &pd->nsaddr); *nr = r; pf_addrcpy(&pd->nsaddr, &naddr, pd->af); pd->nsport = nport; @@ -735,6 +743,12 @@ pf_get_transaddr(struct pf_rule *r, stru nport = htons((u_int16_t)tmp_nport); } else if (r->rdr.proxy_port[0]) nport = htons(r->rdr.proxy_port[0]); + /* decrease least-connection state counter of the previous */ + if ((*nr) != NULL && (*nr)->rdr.addr.type != PF_ADDR_NONE && + ((*nr)->rdr.opts & PF_POOL_TYPEMASK) == + PF_POOL_LEASTSTATES) + pf_pool_states_decrease_addr(&(*nr)->rdr, pd->af, + &pd->ndaddr); *nr = r; pf_addrcpy(&pd->ndaddr, &naddr, pd->af); if (nport) @@ -879,70 +893,102 @@ pf_get_transaddr_af(struct pf_rule *r, s int pf_postprocess_addr(struct pf_state *cur) { - struct pf_rule *nr; struct pf_state_key *sks; - struct pf_pool rpool; - struct pf_addr lookup_addr; - int slbcount = -1; - - nr = cur->natrule.ptr; - - if (nr == NULL) - return (0); - - /* decrease counter */ - - sks = cur->key[PF_SK_STACK]; - - /* check for outgoing or ingoing balancing */ - if (nr->rt == PF_ROUTETO) - lookup_addr = cur->rt_addr; - else if (sks != NULL) - lookup_addr = sks->addr[1]; - else { - if (pf_status.debug >= LOG_DEBUG) { - log(LOG_DEBUG, "pf: %s: unable to obtain address", - __func__); + struct pf_addr *addr; + struct pf_pool *rpool, *natpl = NULL, *rdrpl = NULL; + struct pf_rule_item *ri; + int ret = 0; + + /* decrease the states counter in pool for "least-states" */ + if (cur->natrule.ptr != NULL) { /* this is the final */ + if (cur->natrule.ptr->nat.addr.type != PF_ADDR_NONE) + natpl = &cur->natrule.ptr->nat; + if (cur->natrule.ptr->rdr.addr.type != PF_ADDR_NONE) + rdrpl = &cur->natrule.ptr->rdr; + } + if (natpl == NULL || rdrpl == NULL) { + /* nat or rdr might be done by a previous "match" */ + SLIST_FOREACH(ri, &cur->match_rules, entry) { + /* first match since the list order is reversed */ + if (natpl == NULL && + ri->r->nat.addr.type != PF_ADDR_NONE) + natpl = &ri->r->nat; + if (rdrpl == NULL && + ri->r->rdr.addr.type != PF_ADDR_NONE) + rdrpl = &ri->r->rdr; + if (natpl != NULL && rdrpl != NULL) + break; + } + } + if (natpl != NULL && + (natpl->opts & PF_POOL_TYPEMASK) == PF_POOL_LEASTSTATES) { + /* nat-to least-state */ + if (cur->direction == PF_IN) { + sks = cur->key[PF_SK_STACK]; + addr = &sks->addr[0]; + } else { + sks = cur->key[PF_SK_WIRE]; + addr = &sks->addr[1]; + } + if (pf_pool_states_decrease_addr(natpl, sks->af, addr) != 0) + ret = 1; + } + if (rdrpl != NULL && + (rdrpl->opts & PF_POOL_TYPEMASK) == PF_POOL_LEASTSTATES) { + /* rdr
least-state */ + if (cur->direction == PF_IN) { + sks = cur->key[PF_SK_STACK]; + addr = &sks->addr[1]; + } else { + sks = cur->key[PF_SK_WIRE]; + addr = &sks->addr[0]; + } + if (pf_pool_states_decrease_addr(rdrpl, sks->af, addr) != 0) + ret = 1; + } + if (cur->rule.ptr != NULL) { + /* route-to
least-state */ + rpool = &cur->rule.ptr->route; + if (rpool->addr.type != PF_ADDR_NONE && + (rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_LEASTSTATES) { + sks = cur->key[(cur->direction == PF_IN) + ? PF_SK_STACK : PF_SK_WIRE]; + addr = &cur->rt_addr; + KASSERT(sks != NULL); + if (pf_pool_states_decrease_addr(rpool, sks->af, addr) + != 0) + ret = 1; } - return (1); } - /* check for appropriate pool */ - if (nr->rdr.addr.type != PF_ADDR_NONE) - rpool = nr->rdr; - else if (nr->nat.addr.type != PF_ADDR_NONE) - rpool = nr->nat; - else if (nr->route.addr.type != PF_ADDR_NONE) - rpool = nr->route; - else - return (0); + return (ret); +} - if (((rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_LEASTSTATES)) - return (0); +int +pf_pool_states_decrease_addr(struct pf_pool *rpool, int af, + struct pf_addr *addr) +{ + int slbcount = -1; - if (rpool.addr.type == PF_ADDR_TABLE) { + if (rpool->addr.type == PF_ADDR_TABLE) { if ((slbcount = pfr_states_decrease( - rpool.addr.p.tbl, - &lookup_addr, sks->af)) == -1) { + rpool->addr.p.tbl, addr, af)) == -1) { if (pf_status.debug >= LOG_DEBUG) { log(LOG_DEBUG, "pf: %s: selected address ", __func__); - pf_print_host(&lookup_addr, - sks->port[0], sks->af); + pf_print_host(addr, 0, af); addlog(". Failed to " "decrease count!\n"); } return (1); } - } else if (rpool.addr.type == PF_ADDR_DYNIFTL) { + } else if (rpool->addr.type == PF_ADDR_DYNIFTL) { if ((slbcount = pfr_states_decrease( - rpool.addr.p.dyn->pfid_kt, - &lookup_addr, sks->af)) == -1) { + rpool->addr.p.dyn->pfid_kt, addr, af)) == -1) { if (pf_status.debug >= LOG_DEBUG) { log(LOG_DEBUG, "pf: %s: selected address ", __func__); - pf_print_host(&lookup_addr, - sks->port[0], sks->af); + pf_print_host(addr, 0, af); addlog(". Failed to " "decrease count!\n"); } @@ -952,11 +998,11 @@ pf_postprocess_addr(struct pf_state *cur if (slbcount > -1) { if (pf_status.debug >= LOG_INFO) { log(LOG_INFO, "pf: %s: selected address ", __func__); - pf_print_host(&lookup_addr, sks->port[0], - sks->af); + pf_print_host(addr, 0, af); addlog(" decreased state count to %u\n", slbcount); } } + return (0); }