Index | Thread | Search

From:
Claudio Jeker <cjeker@diehard.n-r-g.com>
Subject:
bgpd: rework rde out filters
To:
tech@openbsd.org
Date:
Thu, 12 Feb 2026 16:40:10 +0100

Download raw body.

Thread
The out filters are currently very expensive because at 700 peers
rde_filter_out() is chasing memory all over the place and spends lot of
time waiting for RAM access into pretty fat objects.

Do the same thing as was done for filter_sets. Use a rde_filter struct
that uses an array of match rules and on top of this use a hash table
and refcnt to dedup equal filters.

This diff reduces the initial load of my test IXP RS setup from 25min to
around 18min. So this change is a significant speedup on busy systems.

I want to apply the same for inbound filters but that requires a fair
amount of reshuffling which will take some time.
-- 
:wq Claudio

Index: usr.sbin/bgpctl/output.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/output.c,v
diff -u -p -r1.68 output.c
--- usr.sbin/bgpctl/output.c	4 Feb 2026 11:48:33 -0000	1.68
+++ usr.sbin/bgpctl/output.c	11 Feb 2026 21:59:40 -0000
@@ -1104,6 +1104,10 @@ show_rib_mem(struct rde_memstats *stats)
 	printf("%10lld pending prefix entries using %s of memory\n",
 	    stats->pend_prefix_cnt, fmt_mem(stats->pend_prefix_cnt *
 	    sizeof(struct pend_prefix)));
+	printf("%10lld filters using %s of memory\n",
+	    stats->filter_cnt, fmt_mem(stats->filter_size));
+	printf("\t   and holding %lld references\n",
+	    stats->filter_refs);
 	printf("%10lld filter-sets using %s of memory\n",
 	    stats->filter_set_cnt, fmt_mem(stats->filter_set_size));
 	printf("\t   and holding %lld references\n",
Index: usr.sbin/bgpctl/output_json.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/output_json.c,v
diff -u -p -r1.59 output_json.c
--- usr.sbin/bgpctl/output_json.c	4 Feb 2026 11:48:33 -0000	1.59
+++ usr.sbin/bgpctl/output_json.c	11 Feb 2026 22:00:28 -0000
@@ -938,10 +938,12 @@ json_rib_mem(struct rde_memstats *stats)
 	json_do_end();
 
 	json_do_object("filters", 0);
+	json_rib_mem_element("filter", stats->filter_cnt,
+	    stats->filter_size, stats->filter_refs);
 	json_rib_mem_element("filter_set", stats->filter_set_cnt,
 	    stats->filter_set_size, stats->filter_set_refs);
 	json_rib_mem_element("total", UINT64_MAX,
-	    stats->filter_set_size, UINT64_MAX);
+	    stats->filter_size + stats->filter_set_size, UINT64_MAX);
 	json_do_end();
 
 	json_do_object("sets", 0);
Index: usr.sbin/bgpctl/output_ometric.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/output_ometric.c,v
diff -u -p -r1.23 output_ometric.c
--- usr.sbin/bgpctl/output_ometric.c	4 Feb 2026 11:48:33 -0000	1.23
+++ usr.sbin/bgpctl/output_ometric.c	11 Feb 2026 22:01:03 -0000
@@ -327,10 +327,12 @@ ometric_rib_mem(struct rde_memstats *sta
 	    stats->aspath_size + stats->attr_cnt * sizeof(struct attr) +
 	    stats->attr_data, UINT64_MAX);
 
+	ometric_rib_mem_element("filter", stats->filter_cnt,
+	    stats->filter_size, stats->filter_refs);
 	ometric_rib_mem_element("filter_set", stats->filter_set_cnt,
 	    stats->filter_set_size, stats->filter_set_refs);
 	ometric_rib_mem_element("filter_total", UINT64_MAX,
-	    stats->filter_set_size, UINT64_MAX);
+	    stats->filter_size + stats->filter_set_size, UINT64_MAX);
 
 	ometric_set_int(rde_table_count, stats->aset_cnt, NULL);
 
Index: usr.sbin/bgpd/bgpd.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
diff -u -p -r1.532 bgpd.h
--- usr.sbin/bgpd/bgpd.h	4 Feb 2026 13:49:23 -0000	1.532
+++ usr.sbin/bgpd/bgpd.h	11 Feb 2026 13:05:19 -0000
@@ -1420,6 +1420,9 @@ struct rde_memstats {
 	long long	aset_nmemb;
 	long long	pset_cnt;
 	long long	pset_size;
+	long long	filter_cnt;
+	long long	filter_size;
+	long long	filter_refs;
 	long long	filter_set_cnt;
 	long long	filter_set_size;
 	long long	filter_set_refs;
Index: usr.sbin/bgpd/rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
diff -u -p -r1.682 rde.c
--- usr.sbin/bgpd/rde.c	4 Feb 2026 11:41:11 -0000	1.682
+++ usr.sbin/bgpd/rde.c	11 Feb 2026 19:29:19 -0000
@@ -3772,6 +3772,7 @@ rde_reload_done(void)
 {
 	struct rde_peer		*peer;
 	struct filter_head	*fh;
+	struct rde_filter	*rf;
 	struct rde_prefixset_head prefixsets_old;
 	struct rde_prefixset_head originsets_old;
 	struct as_set_head	 as_sets_old;
@@ -3922,15 +3923,15 @@ rde_reload_done(void)
 		}
 
 		/* reapply outbound filters for this peer */
-		fh = peer_apply_out_filter(peer, out_rules);
+		rf = peer_apply_out_filter(peer, out_rules);
 
-		if (!rde_filter_equal(peer->out_rules, fh)) {
+		if (rf != peer->out_rules) {
 			char *p = log_fmt_peer(&peer->conf);
 			log_debug("out filter change: reloading peer %s", p);
 			free(p);
 			peer->reconf_out = 1;
 		}
-		filterlist_free(fh);
+		rde_filter_unref(rf);
 	}
 
 	/* bring ribs in sync */
Index: usr.sbin/bgpd/rde.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
diff -u -p -r1.341 rde.h
--- usr.sbin/bgpd/rde.h	11 Feb 2026 10:24:57 -0000	1.341
+++ usr.sbin/bgpd/rde.h	11 Feb 2026 19:51:13 -0000
@@ -83,6 +83,7 @@ CH_HEAD(pend_prefix_hash, pend_prefix);
 TAILQ_HEAD(pend_prefix_queue, pend_prefix);
 CH_HEAD(pend_attr_hash, pend_prefix);
 TAILQ_HEAD(pend_attr_queue, pend_attr);
+struct rde_filter;
 
 struct rde_peer {
 	RB_ENTRY(rde_peer)		 entry;
@@ -97,7 +98,7 @@ struct rde_peer {
 	struct pend_prefix_queue	 withdraws[AID_MAX];
 	struct pend_attr_hash		 pend_attrs;
 	struct pend_prefix_hash		 pend_prefixes;
-	struct filter_head		*out_rules;
+	struct rde_filter		*out_rules;
 	struct ibufqueue		*ibufq;
 	struct rib_queue		 rib_pq_head;
 	monotime_t			 staletime[AID_MAX];
@@ -417,7 +418,7 @@ void		 peer_foreach(void (*)(struct rde_
 struct rde_peer	*peer_get(uint32_t);
 struct rde_peer *peer_match(struct ctl_neighbor *, uint32_t);
 struct rde_peer	*peer_add(uint32_t, struct peer_config *, struct filter_head *);
-struct filter_head	*peer_apply_out_filter(struct rde_peer *,
+struct rde_filter	*peer_apply_out_filter(struct rde_peer *,
 			    struct filter_head *);
 
 void		 rde_generate_updates(struct rib_entry *, struct prefix *,
@@ -549,7 +550,11 @@ void		 prefix_evaluate_nexthop(struct pr
 void	rde_apply_set(const struct rde_filter_set *, struct rde_peer *,
 	    struct rde_peer *, struct filterstate *, u_int8_t);
 int	rde_l3vpn_import(struct rde_community *, struct l3vpn *);
-struct filter_rule     *rde_filter_dup(const struct filter_rule *);
+void	rde_filter_unref(struct rde_filter *);
+struct rde_filter *rde_filter_new(size_t);
+struct rde_filter *rde_filter_getcache(struct rde_filter *);
+void	rde_filter_fill(struct rde_filter *, size_t,
+	    const struct filter_rule *);
 void	rde_filterstate_init(struct filterstate *);
 void	rde_filterstate_prep(struct filterstate *, struct prefix *);
 void	rde_filterstate_copy(struct filterstate *, struct filterstate *);
@@ -563,7 +568,7 @@ void	rde_filter_calc_skip_steps(struct f
 enum filter_actions rde_filter(struct filter_head *, struct rde_peer *,
 	    struct rde_peer *, struct bgpd_addr *, uint8_t,
 	    struct filterstate *);
-enum filter_actions rde_filter_out(struct filter_head *, struct rde_peer *,
+enum filter_actions rde_filter_out(struct rde_filter *, struct rde_peer *,
 	    struct rde_peer *, struct bgpd_addr *, uint8_t,
 	    struct filterstate *);
 
Index: usr.sbin/bgpd/rde_filter.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_filter.c,v
diff -u -p -r1.144 rde_filter.c
--- usr.sbin/bgpd/rde_filter.c	11 Feb 2026 12:25:57 -0000	1.144
+++ usr.sbin/bgpd/rde_filter.c	11 Feb 2026 22:03:30 -0000
@@ -56,6 +56,20 @@ struct rde_filter_set {
 	struct rde_filter_set_elm	set[0];
 };
 
+struct rde_filter_rule {
+	struct filter_match		match;
+	struct rde_filter_set		*rde_set;
+	enum filter_actions		action;
+	uint8_t				quick;
+};
+
+struct rde_filter {
+	uint64_t			hash;
+	size_t				len;
+	int				refcnt;
+	struct rde_filter_rule		rules[0];
+};
+
 void
 rde_apply_set(const struct rde_filter_set *rfs, struct rde_peer *peer,
     struct rde_peer *from, struct filterstate *state, uint8_t aid)
@@ -447,19 +461,119 @@ rde_filter_equal(struct filter_head *a, 
 	return (1);
 }
 
-struct filter_rule *
-rde_filter_dup(const struct filter_rule *fr)
+static SIPHASH_KEY	rfkey;
+
+static inline uint64_t
+rde_filter_hash(const struct rde_filter *rf)
+{
+	return rf->hash;
+}
+
+static uint64_t
+rde_filter_calc_hash(const struct rde_filter *rf)
+{
+	return SipHash24(&rfkey, rf->rules, rf->len * sizeof(rf->rules[0]));
+}
+
+CH_HEAD(rde_filtertable, rde_filter);
+CH_PROTOTYPE(rde_filtertable, rde_filter, rde_filter_hash);
+
+static struct rde_filtertable filter = CH_INITIALIZER(&filter);
+
+static void
+rde_filter_free(struct rde_filter *rf)
+{
+	if (rf == NULL)
+		return;
+
+	rdemem.filter_size -= sizeof(*rf) + rf->len * sizeof(rf->rules[0]);
+	rdemem.filter_cnt--;
+	free(rf);
+}
+
+static void
+rde_filter_ref(struct rde_filter *rf)
+{
+	rf->refcnt++;
+	rdemem.filter_refs++;
+}
+
+void
+rde_filter_unref(struct rde_filter *rf)
 {
-	struct filter_rule *new;
+	rf->refcnt--;
+	rdemem.filter_refs--;
+	if (rf->refcnt <= 0) {
+		CH_REMOVE(rde_filtertable, &filter, rf);
+		rde_filter_free(rf);
+	}
+}
 
-	if ((new = malloc(sizeof(*new))) == NULL)
+struct rde_filter *
+rde_filter_new(size_t count)
+{
+	struct rde_filter *rf;
+
+	if ((rf = calloc(1, sizeof(*rf) + count * sizeof(rf->rules[0]))) ==
+	    NULL)
 		fatal(NULL);
-	*new = *fr;
-	/* XXX think about skip table */
-	rde_filterset_ref(new->rde_set);
-	return new;
+
+	rdemem.filter_size += sizeof(*rf) + count * sizeof(rf->rules[0]);
+	rdemem.filter_cnt++;
+
+	rf->len = count;
+	return rf;
 }
 
+struct rde_filter *
+rde_filter_getcache(struct rde_filter *rf)
+{
+	struct rde_filter *nrf;
+
+	rf->hash = rde_filter_calc_hash(rf);
+	if ((nrf = CH_FIND(rde_filtertable, &filter, rf)) == NULL) {
+		if (CH_INSERT(rde_filtertable, &filter, rf, NULL) != 1)
+			fatalx("%s: already present filter", __func__);
+	} else {
+		rde_filter_free(rf);
+		rf = nrf;
+	}
+	rde_filter_ref(rf);
+	return rf;
+}
+
+void
+rde_filter_fill(struct rde_filter *rf, size_t index,
+    const struct filter_rule *fr)
+{
+	struct rde_filter_rule	*rule;
+
+	if (rf->len <= index)
+		fatalx(__func__);
+
+	rule = &rf->rules[index];
+	rule->match = fr->match;
+	rule->rde_set = fr->rde_set;
+	rde_filterset_ref(rule->rde_set);
+	rule->action = fr->action;
+	rule->quick = fr->quick;
+}
+
+static int
+rde_filtertable_equal(const struct rde_filter *arf,
+    const struct rde_filter *brf)
+{
+	if (arf->len != brf->len)
+		return 0;
+	if (memcmp(arf->rules, brf->rules,
+	    arf->len * sizeof(arf->rules[0])) != 0)
+		return 0;
+	return 1;
+}
+
+CH_GENERATE(rde_filtertable, rde_filter, rde_filtertable_equal,
+    rde_filter_hash);
+
 void
 rde_filterstate_init(struct filterstate *state)
 {
@@ -1026,12 +1140,13 @@ rde_filter(struct filter_head *rules, st
 }
 
 enum filter_actions
-rde_filter_out(struct filter_head *rules, struct rde_peer *peer,
+rde_filter_out(struct rde_filter *rf, struct rde_peer *peer,
     struct rde_peer *from, struct bgpd_addr *prefix, uint8_t plen,
     struct filterstate *state)
 {
-	struct filter_rule	*f;
+	struct rde_filter_rule	*f;
 	enum filter_actions	 action = ACTION_DENY; /* default deny */
+	size_t			 i;
 
 	if (state->aspath.flags & F_ATTR_PARSE_ERR)
 		/*
@@ -1040,14 +1155,11 @@ rde_filter_out(struct filter_head *rules
 		 */
 		return (ACTION_DENY);
 
-	if (rules == NULL)
-		return (action);
-
 	if (prefix->aid == AID_FLOWSPECv4 || prefix->aid == AID_FLOWSPECv6)
 		return (ACTION_ALLOW);
 
-	f = TAILQ_FIRST(rules);
-	while (f != NULL) {
+	for (i = 0; i < rf->len; i++) {
+		f = &rf->rules[i];
 		if (rde_filter_match(&f->match, peer, from, state,
 		    prefix, plen)) {
 			rde_apply_set(f->rde_set, peer, from, state,
@@ -1057,7 +1169,6 @@ rde_filter_out(struct filter_head *rules
 			if (f->quick)
 				return (action);
 		}
-		f = TAILQ_NEXT(f, entry);
 	}
 	return (action);
 }
Index: usr.sbin/bgpd/rde_peer.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v
diff -u -p -r1.66 rde_peer.c
--- usr.sbin/bgpd/rde_peer.c	3 Feb 2026 12:25:16 -0000	1.66
+++ usr.sbin/bgpd/rde_peer.c	11 Feb 2026 21:42:59 -0000
@@ -209,25 +209,30 @@ peer_add(uint32_t id, struct peer_config
 	return peer;
 }
 
-struct filter_head *
+struct rde_filter *
 peer_apply_out_filter(struct rde_peer *peer, struct filter_head *rules)
 {
-	struct filter_head *old;
-	struct filter_rule *fr, *new;
+	struct rde_filter *old, *new;
+	struct filter_rule *fr;
+	size_t count = 0;
 
 	old = peer->out_rules;
-	if ((peer->out_rules = malloc(sizeof(*peer->out_rules))) == NULL)
-		fatal(NULL);
-	TAILQ_INIT(peer->out_rules);
 
 	TAILQ_FOREACH(fr, rules, entry) {
 		if (rde_filter_skip_rule(peer, fr))
 			continue;
+		count++;
+	}
+	new = rde_filter_new(count);
 
-		new = rde_filter_dup(fr);
-		TAILQ_INSERT_TAIL(peer->out_rules, new, entry);
+	count = 0;
+	TAILQ_FOREACH(fr, rules, entry) {
+		if (rde_filter_skip_rule(peer, fr))
+			continue;
+		rde_filter_fill(new, count++, fr);
 	}
 
+	peer->out_rules = rde_filter_getcache(new);
 	return old;
 }
 
@@ -530,7 +535,7 @@ peer_delete(struct rde_peer *peer)
 	if (peer->state != PEER_DOWN)
 		peer_down(peer);
 
-	filterlist_free(peer->out_rules);
+	rde_filter_unref(peer->out_rules);
 	adjout_peer_free(peer);
 
 	RB_REMOVE(peer_tree, &peertable, peer);