Mailing List Archive

From:: David Gwynne <david@gwynne.id.au>
Subject:: deprecate packet/byte counters on pf tables and table entries
To:: tech@openbsd.org
Date:: Fri, 12 Dec 2025 10:42:58 +1000
Download raw body.
Thread
2025-12-12 00:42 David Gwynne:
deprecate packet/byte counters on pf tables and table entries
this diff removes pfr_update_stats() from the kernel, which is
responsible to counting packets and bytes from packets going through
pf against pf tables and the entries in tables.

a description of these counters can be found in the pfctl manpage. look
for "Each table can maintain a set of counters".

note that i only want to deprecate the packet and byte counters, not the
ruleset eval counters.

i have a couple of reasons for wanting to remove them:

1. counting packets/bytes on tables/table entries is unreliable

it has some subtle semantics that make the counters unreliable
or inconsistent. i argue that a pf table and an entry in it is only
useful at ruleset evaluation and state creation time. once a state
is created it has a separate lifetime to the policy that created
it, and we shouldn't burden ourselves by pretending they stay
connected, even if it looks like they do a lot of the time.

pf_counters_inc updates tables hanging off rules, but after the
first packet the relationship between a state and a rule isn't
guaranteed.  loading a new ruleset will break the relationship. if
a state is learnt from a pfsync peer with a different ruleset, that
relationship doesnt exist.

the contents of a table can change over the lifetime of a connection
too. what does it mean to count packets against an entry in the table if
that entry is removed or a more specific prefix gets added? if an entry
is removed and then added again, is it correct to add to the counters on
the new entry when it wasn't responsible for allowing the state to be
created?

2. the implementation is expensive

you have to look up the address in the table to decide what type of
counter you're updating, so you can update that counter on the table
before deciding if you want to update the counter on the entry.

this means we're doing table lookups for every packet in every table it
can find a link to.

lastly, these counters are implemented as a single chunk of memory, and
the updates are serialised implicitly by one of the global locks that pf
currently operates under. these make it harder to scale pf to run in
parallel.


so the collection of the counters is expensive, and they aren't
good at counting what they look like they're supposed to count
anyway. i'm not convinced there's anything useful you can do with
the counters that have been collected either. what are they supposed
to tell you?

anyone want to ok me removing the kernel code as a first step in
deprecating this functionality?

Index: pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf.c,v
diff -u -p -r1.1224 pf.c
--- pf.c	11 Dec 2025 04:59:26 -0000	1.1224
+++ pf.c	11 Dec 2025 07:28:53 -0000
@@ -8184,31 +8184,8 @@ pf_counters_inc(int action, struct pf_pd
 			SLIST_FOREACH(ri, &st->match_rules, entry) {
 				ri->r->packets[dirndx]++;
 				ri->r->bytes[dirndx] += pd->tot_len;
-
-				if (ri->r->src.addr.type == PF_ADDR_TABLE)
-					pfr_update_stats(ri->r->src.addr.p.tbl,
-					    &st->key[(st->direction == PF_IN)]->
-						addr[(st->direction == PF_OUT)],
-					    pd, ri->r->action, ri->r->src.neg);
-				if (ri->r->dst.addr.type == PF_ADDR_TABLE)
-					pfr_update_stats(ri->r->dst.addr.p.tbl,
-					    &st->key[(st->direction == PF_IN)]->
-						addr[(st->direction == PF_IN)],
-					    pd, ri->r->action, ri->r->dst.neg);
 			}
 		}
-		if (r->src.addr.type == PF_ADDR_TABLE)
-			pfr_update_stats(r->src.addr.p.tbl,
-			    (st == NULL) ? pd->src :
-			    &st->key[(st->direction == PF_IN)]->
-				addr[(st->direction == PF_OUT)],
-			    pd, r->action, r->src.neg);
-		if (r->dst.addr.type == PF_ADDR_TABLE)
-			pfr_update_stats(r->dst.addr.p.tbl,
-			    (st == NULL) ? pd->dst :
-			    &st->key[(st->direction == PF_IN)]->
-				addr[(st->direction == PF_IN)],
-			    pd, r->action, r->dst.neg);
 	}
 }
 
Index: pf_table.c
===================================================================
RCS file: /cvs/src/sys/net/pf_table.c,v
diff -u -p -r1.147 pf_table.c
--- pf_table.c	11 Nov 2025 04:06:20 -0000	1.147
+++ pf_table.c	11 Dec 2025 07:28:53 -0000
@@ -2478,79 +2478,6 @@ pfr_kentry_byaddr(struct pfr_ktable *kt,
 	return (ke);
 }
 
-void
-pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, struct pf_pdesc *pd,
-    int op, int notrule)
-{
-	struct pfr_kentry	*ke = NULL;
-	struct sockaddr_in	 tmp4;
-#ifdef INET6
-	struct sockaddr_in6	 tmp6;
-#endif /* INET6 */
-	sa_family_t		 af = pd->af;
-	u_int64_t		 len = pd->tot_len;
-	int			 dir_idx = (pd->dir == PF_OUT);
-	int			 op_idx;
-
-	kt = pfr_ktable_select_active(kt);
-	if (kt == NULL)
-		return;
-
-	switch (af) {
-	case AF_INET:
-		bzero(&tmp4, sizeof(tmp4));
-		tmp4.sin_len = sizeof(tmp4);
-		tmp4.sin_family = AF_INET;
-		tmp4.sin_addr.s_addr = a->addr32[0];
-		ke = (struct pfr_kentry *)rn_match(&tmp4, kt->pfrkt_ip4);
-		break;
-#ifdef INET6
-	case AF_INET6:
-		bzero(&tmp6, sizeof(tmp6));
-		tmp6.sin6_len = sizeof(tmp6);
-		tmp6.sin6_family = AF_INET6;
-		bcopy(a, &tmp6.sin6_addr, sizeof(tmp6.sin6_addr));
-		ke = (struct pfr_kentry *)rn_match(&tmp6, kt->pfrkt_ip6);
-		break;
-#endif /* INET6 */
-	default:
-		unhandled_af(af);
-	}
-
-	switch (op) {
-	case PF_PASS:
-		op_idx = PFR_OP_PASS;
-		break;
-	case PF_MATCH:
-		op_idx = PFR_OP_MATCH;
-		break;
-	case PF_DROP:
-		op_idx = PFR_OP_BLOCK;
-		break;
-	default:
-		panic("unhandled op");
-	}
-
-	if ((ke == NULL || (ke->pfrke_flags & PFRKE_FLAG_NOT)) != notrule) {
-		if (op_idx != PFR_OP_PASS)
-			DPFPRINTF(LOG_DEBUG,
-			    "pfr_update_stats: assertion failed.");
-		op_idx = PFR_OP_XPASS;
-	}
-	kt->pfrkt_packets[dir_idx][op_idx]++;
-	kt->pfrkt_bytes[dir_idx][op_idx] += len;
-	if (ke != NULL && op_idx != PFR_OP_XPASS &&
-	    (kt->pfrkt_flags & PFR_TFLAG_COUNTERS)) {
-		if (ke->pfrke_counters == NULL)
-			ke->pfrke_counters = pool_get(&pfr_kcounters_pl,
-			    PR_NOWAIT | PR_ZERO);
-		if (ke->pfrke_counters != NULL) {
-			ke->pfrke_counters->pfrkc_packets[dir_idx][op_idx]++;
-			ke->pfrke_counters->pfrkc_bytes[dir_idx][op_idx] += len;
-		}
-	}
-}
-
 struct pfr_ktable *
 pfr_attach_table(struct pf_ruleset *rs, char *name, int wait)
 {
Index: pfvar.h
===================================================================
RCS file: /cvs/src/sys/net/pfvar.h,v
diff -u -p -r1.544 pfvar.h
--- pfvar.h	11 Nov 2025 04:06:20 -0000	1.544
+++ pfvar.h	11 Dec 2025 07:28:53 -0000
@@ -1839,8 +1839,6 @@ int	pf_delay_pkt(struct mbuf *, u_int);
 
 void	pfr_initialize(void);
 int	pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t);
-void	pfr_update_stats(struct pfr_ktable *, struct pf_addr *,
-	    struct pf_pdesc *, int, int);
 int	pfr_pool_get(struct pf_pool *, struct pf_addr **,
 	    struct pf_addr **, sa_family_t);
 int	pfr_states_increase(struct pfr_ktable *, struct pf_addr *, int);
2025-12-12 00:42 David Gwynne:
deprecate packet/byte counters on pf tables and table entries