From: Claudio Jeker Subject: bgpd: rework rde out filters To: tech@openbsd.org Date: Thu, 12 Feb 2026 16:40:10 +0100 The out filters are currently very expensive because at 700 peers rde_filter_out() is chasing memory all over the place and spends lot of time waiting for RAM access into pretty fat objects. Do the same thing as was done for filter_sets. Use a rde_filter struct that uses an array of match rules and on top of this use a hash table and refcnt to dedup equal filters. This diff reduces the initial load of my test IXP RS setup from 25min to around 18min. So this change is a significant speedup on busy systems. I want to apply the same for inbound filters but that requires a fair amount of reshuffling which will take some time. -- :wq Claudio Index: usr.sbin/bgpctl/output.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpctl/output.c,v diff -u -p -r1.68 output.c --- usr.sbin/bgpctl/output.c 4 Feb 2026 11:48:33 -0000 1.68 +++ usr.sbin/bgpctl/output.c 11 Feb 2026 21:59:40 -0000 @@ -1104,6 +1104,10 @@ show_rib_mem(struct rde_memstats *stats) printf("%10lld pending prefix entries using %s of memory\n", stats->pend_prefix_cnt, fmt_mem(stats->pend_prefix_cnt * sizeof(struct pend_prefix))); + printf("%10lld filters using %s of memory\n", + stats->filter_cnt, fmt_mem(stats->filter_size)); + printf("\t and holding %lld references\n", + stats->filter_refs); printf("%10lld filter-sets using %s of memory\n", stats->filter_set_cnt, fmt_mem(stats->filter_set_size)); printf("\t and holding %lld references\n", Index: usr.sbin/bgpctl/output_json.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpctl/output_json.c,v diff -u -p -r1.59 output_json.c --- usr.sbin/bgpctl/output_json.c 4 Feb 2026 11:48:33 -0000 1.59 +++ usr.sbin/bgpctl/output_json.c 11 Feb 2026 22:00:28 -0000 @@ -938,10 +938,12 @@ json_rib_mem(struct rde_memstats *stats) json_do_end(); json_do_object("filters", 0); + json_rib_mem_element("filter", stats->filter_cnt, + stats->filter_size, stats->filter_refs); json_rib_mem_element("filter_set", stats->filter_set_cnt, stats->filter_set_size, stats->filter_set_refs); json_rib_mem_element("total", UINT64_MAX, - stats->filter_set_size, UINT64_MAX); + stats->filter_size + stats->filter_set_size, UINT64_MAX); json_do_end(); json_do_object("sets", 0); Index: usr.sbin/bgpctl/output_ometric.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpctl/output_ometric.c,v diff -u -p -r1.23 output_ometric.c --- usr.sbin/bgpctl/output_ometric.c 4 Feb 2026 11:48:33 -0000 1.23 +++ usr.sbin/bgpctl/output_ometric.c 11 Feb 2026 22:01:03 -0000 @@ -327,10 +327,12 @@ ometric_rib_mem(struct rde_memstats *sta stats->aspath_size + stats->attr_cnt * sizeof(struct attr) + stats->attr_data, UINT64_MAX); + ometric_rib_mem_element("filter", stats->filter_cnt, + stats->filter_size, stats->filter_refs); ometric_rib_mem_element("filter_set", stats->filter_set_cnt, stats->filter_set_size, stats->filter_set_refs); ometric_rib_mem_element("filter_total", UINT64_MAX, - stats->filter_set_size, UINT64_MAX); + stats->filter_size + stats->filter_set_size, UINT64_MAX); ometric_set_int(rde_table_count, stats->aset_cnt, NULL); Index: usr.sbin/bgpd/bgpd.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v diff -u -p -r1.532 bgpd.h --- usr.sbin/bgpd/bgpd.h 4 Feb 2026 13:49:23 -0000 1.532 +++ usr.sbin/bgpd/bgpd.h 11 Feb 2026 13:05:19 -0000 @@ -1420,6 +1420,9 @@ struct rde_memstats { long long aset_nmemb; long long pset_cnt; long long pset_size; + long long filter_cnt; + long long filter_size; + long long filter_refs; long long filter_set_cnt; long long filter_set_size; long long filter_set_refs; Index: usr.sbin/bgpd/rde.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v diff -u -p -r1.682 rde.c --- usr.sbin/bgpd/rde.c 4 Feb 2026 11:41:11 -0000 1.682 +++ usr.sbin/bgpd/rde.c 11 Feb 2026 19:29:19 -0000 @@ -3772,6 +3772,7 @@ rde_reload_done(void) { struct rde_peer *peer; struct filter_head *fh; + struct rde_filter *rf; struct rde_prefixset_head prefixsets_old; struct rde_prefixset_head originsets_old; struct as_set_head as_sets_old; @@ -3922,15 +3923,15 @@ rde_reload_done(void) } /* reapply outbound filters for this peer */ - fh = peer_apply_out_filter(peer, out_rules); + rf = peer_apply_out_filter(peer, out_rules); - if (!rde_filter_equal(peer->out_rules, fh)) { + if (rf != peer->out_rules) { char *p = log_fmt_peer(&peer->conf); log_debug("out filter change: reloading peer %s", p); free(p); peer->reconf_out = 1; } - filterlist_free(fh); + rde_filter_unref(rf); } /* bring ribs in sync */ Index: usr.sbin/bgpd/rde.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v diff -u -p -r1.341 rde.h --- usr.sbin/bgpd/rde.h 11 Feb 2026 10:24:57 -0000 1.341 +++ usr.sbin/bgpd/rde.h 11 Feb 2026 19:51:13 -0000 @@ -83,6 +83,7 @@ CH_HEAD(pend_prefix_hash, pend_prefix); TAILQ_HEAD(pend_prefix_queue, pend_prefix); CH_HEAD(pend_attr_hash, pend_prefix); TAILQ_HEAD(pend_attr_queue, pend_attr); +struct rde_filter; struct rde_peer { RB_ENTRY(rde_peer) entry; @@ -97,7 +98,7 @@ struct rde_peer { struct pend_prefix_queue withdraws[AID_MAX]; struct pend_attr_hash pend_attrs; struct pend_prefix_hash pend_prefixes; - struct filter_head *out_rules; + struct rde_filter *out_rules; struct ibufqueue *ibufq; struct rib_queue rib_pq_head; monotime_t staletime[AID_MAX]; @@ -417,7 +418,7 @@ void peer_foreach(void (*)(struct rde_ struct rde_peer *peer_get(uint32_t); struct rde_peer *peer_match(struct ctl_neighbor *, uint32_t); struct rde_peer *peer_add(uint32_t, struct peer_config *, struct filter_head *); -struct filter_head *peer_apply_out_filter(struct rde_peer *, +struct rde_filter *peer_apply_out_filter(struct rde_peer *, struct filter_head *); void rde_generate_updates(struct rib_entry *, struct prefix *, @@ -549,7 +550,11 @@ void prefix_evaluate_nexthop(struct pr void rde_apply_set(const struct rde_filter_set *, struct rde_peer *, struct rde_peer *, struct filterstate *, u_int8_t); int rde_l3vpn_import(struct rde_community *, struct l3vpn *); -struct filter_rule *rde_filter_dup(const struct filter_rule *); +void rde_filter_unref(struct rde_filter *); +struct rde_filter *rde_filter_new(size_t); +struct rde_filter *rde_filter_getcache(struct rde_filter *); +void rde_filter_fill(struct rde_filter *, size_t, + const struct filter_rule *); void rde_filterstate_init(struct filterstate *); void rde_filterstate_prep(struct filterstate *, struct prefix *); void rde_filterstate_copy(struct filterstate *, struct filterstate *); @@ -563,7 +568,7 @@ void rde_filter_calc_skip_steps(struct f enum filter_actions rde_filter(struct filter_head *, struct rde_peer *, struct rde_peer *, struct bgpd_addr *, uint8_t, struct filterstate *); -enum filter_actions rde_filter_out(struct filter_head *, struct rde_peer *, +enum filter_actions rde_filter_out(struct rde_filter *, struct rde_peer *, struct rde_peer *, struct bgpd_addr *, uint8_t, struct filterstate *); Index: usr.sbin/bgpd/rde_filter.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_filter.c,v diff -u -p -r1.144 rde_filter.c --- usr.sbin/bgpd/rde_filter.c 11 Feb 2026 12:25:57 -0000 1.144 +++ usr.sbin/bgpd/rde_filter.c 11 Feb 2026 22:03:30 -0000 @@ -56,6 +56,20 @@ struct rde_filter_set { struct rde_filter_set_elm set[0]; }; +struct rde_filter_rule { + struct filter_match match; + struct rde_filter_set *rde_set; + enum filter_actions action; + uint8_t quick; +}; + +struct rde_filter { + uint64_t hash; + size_t len; + int refcnt; + struct rde_filter_rule rules[0]; +}; + void rde_apply_set(const struct rde_filter_set *rfs, struct rde_peer *peer, struct rde_peer *from, struct filterstate *state, uint8_t aid) @@ -447,19 +461,119 @@ rde_filter_equal(struct filter_head *a, return (1); } -struct filter_rule * -rde_filter_dup(const struct filter_rule *fr) +static SIPHASH_KEY rfkey; + +static inline uint64_t +rde_filter_hash(const struct rde_filter *rf) +{ + return rf->hash; +} + +static uint64_t +rde_filter_calc_hash(const struct rde_filter *rf) +{ + return SipHash24(&rfkey, rf->rules, rf->len * sizeof(rf->rules[0])); +} + +CH_HEAD(rde_filtertable, rde_filter); +CH_PROTOTYPE(rde_filtertable, rde_filter, rde_filter_hash); + +static struct rde_filtertable filter = CH_INITIALIZER(&filter); + +static void +rde_filter_free(struct rde_filter *rf) +{ + if (rf == NULL) + return; + + rdemem.filter_size -= sizeof(*rf) + rf->len * sizeof(rf->rules[0]); + rdemem.filter_cnt--; + free(rf); +} + +static void +rde_filter_ref(struct rde_filter *rf) +{ + rf->refcnt++; + rdemem.filter_refs++; +} + +void +rde_filter_unref(struct rde_filter *rf) { - struct filter_rule *new; + rf->refcnt--; + rdemem.filter_refs--; + if (rf->refcnt <= 0) { + CH_REMOVE(rde_filtertable, &filter, rf); + rde_filter_free(rf); + } +} - if ((new = malloc(sizeof(*new))) == NULL) +struct rde_filter * +rde_filter_new(size_t count) +{ + struct rde_filter *rf; + + if ((rf = calloc(1, sizeof(*rf) + count * sizeof(rf->rules[0]))) == + NULL) fatal(NULL); - *new = *fr; - /* XXX think about skip table */ - rde_filterset_ref(new->rde_set); - return new; + + rdemem.filter_size += sizeof(*rf) + count * sizeof(rf->rules[0]); + rdemem.filter_cnt++; + + rf->len = count; + return rf; } +struct rde_filter * +rde_filter_getcache(struct rde_filter *rf) +{ + struct rde_filter *nrf; + + rf->hash = rde_filter_calc_hash(rf); + if ((nrf = CH_FIND(rde_filtertable, &filter, rf)) == NULL) { + if (CH_INSERT(rde_filtertable, &filter, rf, NULL) != 1) + fatalx("%s: already present filter", __func__); + } else { + rde_filter_free(rf); + rf = nrf; + } + rde_filter_ref(rf); + return rf; +} + +void +rde_filter_fill(struct rde_filter *rf, size_t index, + const struct filter_rule *fr) +{ + struct rde_filter_rule *rule; + + if (rf->len <= index) + fatalx(__func__); + + rule = &rf->rules[index]; + rule->match = fr->match; + rule->rde_set = fr->rde_set; + rde_filterset_ref(rule->rde_set); + rule->action = fr->action; + rule->quick = fr->quick; +} + +static int +rde_filtertable_equal(const struct rde_filter *arf, + const struct rde_filter *brf) +{ + if (arf->len != brf->len) + return 0; + if (memcmp(arf->rules, brf->rules, + arf->len * sizeof(arf->rules[0])) != 0) + return 0; + return 1; +} + +CH_GENERATE(rde_filtertable, rde_filter, rde_filtertable_equal, + rde_filter_hash); + void rde_filterstate_init(struct filterstate *state) { @@ -1026,12 +1140,13 @@ rde_filter(struct filter_head *rules, st } enum filter_actions -rde_filter_out(struct filter_head *rules, struct rde_peer *peer, +rde_filter_out(struct rde_filter *rf, struct rde_peer *peer, struct rde_peer *from, struct bgpd_addr *prefix, uint8_t plen, struct filterstate *state) { - struct filter_rule *f; + struct rde_filter_rule *f; enum filter_actions action = ACTION_DENY; /* default deny */ + size_t i; if (state->aspath.flags & F_ATTR_PARSE_ERR) /* @@ -1040,14 +1155,11 @@ rde_filter_out(struct filter_head *rules */ return (ACTION_DENY); - if (rules == NULL) - return (action); - if (prefix->aid == AID_FLOWSPECv4 || prefix->aid == AID_FLOWSPECv6) return (ACTION_ALLOW); - f = TAILQ_FIRST(rules); - while (f != NULL) { + for (i = 0; i < rf->len; i++) { + f = &rf->rules[i]; if (rde_filter_match(&f->match, peer, from, state, prefix, plen)) { rde_apply_set(f->rde_set, peer, from, state, @@ -1057,7 +1169,6 @@ rde_filter_out(struct filter_head *rules if (f->quick) return (action); } - f = TAILQ_NEXT(f, entry); } return (action); } Index: usr.sbin/bgpd/rde_peer.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v diff -u -p -r1.66 rde_peer.c --- usr.sbin/bgpd/rde_peer.c 3 Feb 2026 12:25:16 -0000 1.66 +++ usr.sbin/bgpd/rde_peer.c 11 Feb 2026 21:42:59 -0000 @@ -209,25 +209,30 @@ peer_add(uint32_t id, struct peer_config return peer; } -struct filter_head * +struct rde_filter * peer_apply_out_filter(struct rde_peer *peer, struct filter_head *rules) { - struct filter_head *old; - struct filter_rule *fr, *new; + struct rde_filter *old, *new; + struct filter_rule *fr; + size_t count = 0; old = peer->out_rules; - if ((peer->out_rules = malloc(sizeof(*peer->out_rules))) == NULL) - fatal(NULL); - TAILQ_INIT(peer->out_rules); TAILQ_FOREACH(fr, rules, entry) { if (rde_filter_skip_rule(peer, fr)) continue; + count++; + } + new = rde_filter_new(count); - new = rde_filter_dup(fr); - TAILQ_INSERT_TAIL(peer->out_rules, new, entry); + count = 0; + TAILQ_FOREACH(fr, rules, entry) { + if (rde_filter_skip_rule(peer, fr)) + continue; + rde_filter_fill(new, count++, fr); } + peer->out_rules = rde_filter_getcache(new); return old; } @@ -530,7 +535,7 @@ peer_delete(struct rde_peer *peer) if (peer->state != PEER_DOWN) peer_down(peer); - filterlist_free(peer->out_rules); + rde_filter_unref(peer->out_rules); adjout_peer_free(peer); RB_REMOVE(peer_tree, &peertable, peer);