From: Claudio Jeker Subject: bgpd: rework the rde side of filter_sets To: tech@openbsd.org Date: Tue, 3 Feb 2026 16:01:57 +0100 rde_apply_set() is very inefficent at the moment. The code is hunting for memory all the time since there are just too many objects all over the place. This switches this away from a linked list to an array of filter_set elements. On top of this the new rde_filter_set_elm structs are much smaller then the regular filter_sets. These changes make rde_apply_set() a lot more efficent. At the same time this brings in a new way to send and recv the imsgs with the filter sets in them. The goal is to add more of those send / recv functions to better validate this internal structs on receive. Also since IMSG_FILTER_SET is also sent via the control socket add a imsg_check_filterset() function that validates these messages before passing them on. There is a lot of churn because of the new rde_filter_set object. Also the diff of rde_filter.c is a big mess and so is hard to read. It is probably better to look at the result. Next step is to do a similar cleanup for filter rules but that is a bit more involved. -- :wq Claudio Index: bgpctl/Makefile =================================================================== RCS file: /cvs/src/usr.sbin/bgpctl/Makefile,v diff -u -p -r1.20 Makefile --- bgpctl/Makefile 20 Feb 2025 19:48:14 -0000 1.20 +++ bgpctl/Makefile 8 Dec 2025 13:00:31 -0000 @@ -5,7 +5,7 @@ PROG= bgpctl SRCS= bgpctl.c output.c output_json.c output_ometric.c parser.c \ mrtparser.c json.c ometric.c -SRCS+= util.c flowspec.c monotime.c +SRCS+= util.c flowspec.c monotime.c bgpd_imsg.c CFLAGS+= -Wall CFLAGS+= -Wstrict-prototypes -Wmissing-prototypes CFLAGS+= -Wmissing-declarations Index: bgpctl/bgpctl.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.c,v diff -u -p -r1.319 bgpctl.c --- bgpctl/bgpctl.c 4 Nov 2025 15:30:50 -0000 1.319 +++ bgpctl/bgpctl.c 6 Dec 2025 21:38:31 -0000 @@ -49,7 +49,6 @@ int main(int, char *[]); int show(struct imsg *, struct parse_result *); -void send_filterset(struct imsgbuf *, struct filter_set_head *); void show_mrt_dump_neighbors(struct mrt_rib *, struct mrt_peer *, void *); void show_mrt_dump(struct mrt_rib *, struct mrt_peer *, void *); @@ -329,7 +328,7 @@ main(int argc, char *argv[]) if (res->action == NETWORK_ADD) { imsg_compose(imsgbuf, IMSG_NETWORK_ADD, 0, 0, -1, &net, sizeof(net)); - send_filterset(imsgbuf, &res->set); + imsg_send_filterset(imsgbuf, &res->set); imsg_compose(imsgbuf, IMSG_NETWORK_DONE, 0, 0, -1, NULL, 0); } else @@ -373,7 +372,7 @@ main(int argc, char *argv[]) if (res->action == FLOWSPEC_ADD) { imsg_compose(imsgbuf, IMSG_FLOWSPEC_ADD, 0, 0, -1, f, FLOWSPEC_SIZE + f->len); - send_filterset(imsgbuf, &res->set); + imsg_send_filterset(imsgbuf, &res->set); imsg_compose(imsgbuf, IMSG_FLOWSPEC_DONE, 0, 0, -1, NULL, 0); } else @@ -1134,19 +1133,6 @@ fmt_set_type(struct ctl_show_set *set) return "ASNUM"; default: return "BULA"; - } -} - -void -send_filterset(struct imsgbuf *i, struct filter_set_head *set) -{ - struct filter_set *s; - - while ((s = TAILQ_FIRST(set)) != NULL) { - imsg_compose(i, IMSG_FILTER_SET, 0, 0, -1, s, - sizeof(struct filter_set)); - TAILQ_REMOVE(set, s, entry); - free(s); } } Index: bgpctl/output.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpctl/output.c,v diff -u -p -r1.67 output.c --- bgpctl/output.c 29 Dec 2025 07:49:05 -0000 1.67 +++ bgpctl/output.c 3 Feb 2026 13:58:19 -0000 @@ -1104,6 +1104,10 @@ show_rib_mem(struct rde_memstats *stats) printf("%10lld pending prefix entries using %s of memory\n", stats->pend_prefix_cnt, fmt_mem(stats->pend_prefix_cnt * sizeof(struct pend_prefix))); + printf("%10lld filter-sets using %s of memory\n", + stats->filter_set_cnt, fmt_mem(stats->filter_set_size)); + printf("\t and holding %lld references\n", + stats->filter_set_refs); printf("%10lld as-set elements in %lld tables using " "%s of memory\n", stats->aset_nmemb, stats->aset_cnt, fmt_mem(stats->aset_size)); @@ -1119,8 +1123,9 @@ show_rib_mem(struct rde_memstats *stats) stats->path_cnt * sizeof(struct rde_aspath) + stats->aspath_size + stats->attr_cnt * sizeof(struct attr) + stats->attr_data)); - printf("Sets using %s of memory\n", fmt_mem(stats->aset_size + - stats->pset_size)); + printf("Sets and filters using %s of memory\n", + fmt_mem(stats->aset_size + stats->pset_size + + stats->filter_set_size)); printf("\nRDE timing statistics\n"); printf("%10lld usec spent in the event loop for %llu rounds\n", Index: bgpctl/output_json.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpctl/output_json.c,v diff -u -p -r1.58 output_json.c --- bgpctl/output_json.c 29 Dec 2025 07:49:05 -0000 1.58 +++ bgpctl/output_json.c 3 Feb 2026 14:12:15 -0000 @@ -937,6 +937,13 @@ json_rib_mem(struct rde_memstats *stats) stats->attr_data, UINT64_MAX); json_do_end(); + json_do_object("filters", 0); + json_rib_mem_element("filter_set", stats->filter_set_cnt, + stats->filter_set_size, stats->filter_set_refs); + json_rib_mem_element("total", UINT64_MAX, + stats->filter_set_size, UINT64_MAX); + json_do_end(); + json_do_object("sets", 0); json_rib_mem_element("as_set", stats->aset_nmemb, stats->aset_size, UINT64_MAX); Index: bgpctl/output_ometric.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpctl/output_ometric.c,v diff -u -p -r1.22 output_ometric.c --- bgpctl/output_ometric.c 29 Dec 2025 07:49:05 -0000 1.22 +++ bgpctl/output_ometric.c 3 Feb 2026 14:11:14 -0000 @@ -327,6 +327,11 @@ ometric_rib_mem(struct rde_memstats *sta stats->aspath_size + stats->attr_cnt * sizeof(struct attr) + stats->attr_data, UINT64_MAX); + ometric_rib_mem_element("filter_set", stats->filter_set_cnt, + stats->filter_set_size, stats->filter_set_refs); + ometric_rib_mem_element("filter_total", UINT64_MAX, + stats->filter_set_size, UINT64_MAX); + ometric_set_int(rde_table_count, stats->aset_cnt, NULL); ometric_set_int_with_labels(rde_set_size, stats->aset_size, Index: bgpd/Makefile =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/Makefile,v diff -u -p -r1.45 Makefile --- bgpd/Makefile 11 Dec 2025 12:18:27 -0000 1.45 +++ bgpd/Makefile 13 Dec 2025 21:47:20 -0000 @@ -2,6 +2,7 @@ PROG= bgpd SRCS= bgpd.c +SRCS+= bgpd_imsg.c SRCS+= bitmap.c SRCS+= carp.c SRCS+= chash.c Index: bgpd/bgpd.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v diff -u -p -r1.286 bgpd.c --- bgpd/bgpd.c 3 Dec 2025 12:20:19 -0000 1.286 +++ bgpd/bgpd.c 6 Dec 2025 21:32:44 -0000 @@ -672,7 +672,7 @@ send_config(struct bgpd_config *conf) if (imsg_compose(ibuf_rde, IMSG_FLOWSPEC_ADD, 0, 0, -1, f->flow, FLOWSPEC_SIZE + f->flow->len) == -1) return (-1); - if (filterset_send(ibuf_rde, &f->attrset) == -1) + if (imsg_send_filterset(ibuf_rde, &f->attrset) == -1) return (-1); if (imsg_compose(ibuf_rde, IMSG_FLOWSPEC_DONE, 0, 0, -1, NULL, 0) == -1) @@ -781,7 +781,7 @@ send_config(struct bgpd_config *conf) /* filters for the RDE */ while ((r = TAILQ_FIRST(conf->filters)) != NULL) { TAILQ_REMOVE(conf->filters, r, entry); - if (filterset_send(ibuf_rde, &r->set) == -1) + if (imsg_send_filterset(ibuf_rde, &r->set) == -1) return (-1); if (imsg_compose(ibuf_rde, IMSG_RECONF_FILTER, 0, 0, -1, r, sizeof(struct filter_rule)) == -1) @@ -806,7 +806,7 @@ send_config(struct bgpd_config *conf) return (-1); /* export targets */ - if (filterset_send(ibuf_rde, &vpn->export) == -1) + if (imsg_send_filterset(ibuf_rde, &vpn->export) == -1) return (-1); if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_EXPORT, 0, 0, -1, NULL, 0) == -1) @@ -814,7 +814,7 @@ send_config(struct bgpd_config *conf) filterset_free(&vpn->export); /* import targets */ - if (filterset_send(ibuf_rde, &vpn->import) == -1) + if (imsg_send_filterset(ibuf_rde, &vpn->import) == -1) return (-1); if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_IMPORT, 0, 0, -1, NULL, 0) == -1) @@ -1170,7 +1170,7 @@ send_network(int type, struct network_co /* networks that get deleted don't need to send the filter set */ if (type == IMSG_NETWORK_REMOVE) return (0); - if (filterset_send(ibuf_rde, h) == -1) + if (imsg_send_filterset(ibuf_rde, h) == -1) return (-1); if (imsg_compose(ibuf_rde, IMSG_NETWORK_DONE, 0, 0, -1, NULL, 0) == -1) return (-1); Index: bgpd/bgpd.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v diff -u -p -r1.530 bgpd.h --- bgpd/bgpd.h 3 Feb 2026 12:25:16 -0000 1.530 +++ bgpd/bgpd.h 3 Feb 2026 13:56:43 -0000 @@ -252,6 +252,7 @@ TAILQ_HEAD(timer_head, timer); TAILQ_HEAD(listen_addrs, listen_addr); TAILQ_HEAD(filter_set_head, filter_set); +struct rde_filter_set; struct bitmap { uint64_t data[2]; @@ -566,6 +567,7 @@ enum network_type { struct network_config { struct bgpd_addr prefix; struct filter_set_head attrset; + struct rde_filter_set *rde_attrset; char psname[SET_NAME_LEN]; uint64_t rd; enum network_type type; @@ -591,6 +593,7 @@ struct flowspec { struct flowspec_config { RB_ENTRY(flowspec_config) entry; struct filter_set_head attrset; + struct rde_filter_set *rde_attrset; struct flowspec *flow; enum reconf_action reconf_action; }; @@ -1265,6 +1268,7 @@ struct filter_rule { struct filter_peers peer; struct filter_match match; struct filter_set_head set; + struct rde_filter_set *rde_set; #define RDE_FILTER_SKIP_PEERID 0 #define RDE_FILTER_SKIP_GROUPID 1 #define RDE_FILTER_SKIP_REMOTE_AS 2 @@ -1363,6 +1367,8 @@ struct l3vpn { char ifmpe[IFNAMSIZ]; struct filter_set_head import; struct filter_set_head export; + struct rde_filter_set *rde_import; + struct rde_filter_set *rde_export; struct network_head net_l; uint64_t rd; u_int rtableid; @@ -1420,6 +1426,9 @@ struct rde_memstats { long long aset_nmemb; long long pset_cnt; long long pset_size; + long long filter_set_cnt; + long long filter_set_size; + long long filter_set_refs; long long rde_event_loop_count; long long rde_event_loop_usec; long long rde_event_io_usec; @@ -1581,13 +1590,12 @@ int pftable_commit(void); /* rde_filter.c */ void filterset_free(struct filter_set_head *); +void rde_filterset_unref(struct rde_filter_set *); int filterset_cmp(struct filter_set *, struct filter_set *); void filterset_move(struct filter_set_head *, struct filter_set_head *); void filterset_copy(const struct filter_set_head *, struct filter_set_head *); const char *filterset_name(enum action_types); -int filterset_send(struct imsgbuf *, struct filter_set_head *); -void filterset_recv(struct imsg *, struct filter_set_head *); /* bitmap.c */ int bitmap_set(struct bitmap *, uint32_t); @@ -1680,6 +1688,12 @@ const char *get_baudrate(unsigned long l unsigned int bin_of_attrs(unsigned int); unsigned int bin_of_communities(unsigned int); unsigned int bin_of_adjout_prefixes(unsigned int); + +/* bgpd_imsg.c */ +int imsg_send_filterset(struct imsgbuf *, struct filter_set_head *); +int imsg_check_filterset(struct imsg *); +int ibuf_recv_filterset_count(struct ibuf *); +int ibuf_recv_one_filterset(struct ibuf *, struct filter_set *); /* flowspec.c */ int flowspec_valid(const uint8_t *, int, int); Index: bgpd/bgpd_imsg.c =================================================================== RCS file: bgpd/bgpd_imsg.c diff -N bgpd/bgpd_imsg.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ bgpd/bgpd_imsg.c 3 Feb 2026 13:06:04 -0000 @@ -0,0 +1,211 @@ +/* $OpenBSD$ */ +/* + * Copyright (c) 2026 Claudio Jeker + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include + +#include "bgpd.h" +#include "rde.h" +#include "log.h" + +int +imsg_send_filterset(struct imsgbuf *imsgbuf, struct filter_set_head *set) +{ + struct filter_set *s; + struct ibuf *msg; + int nsets = 0; + + msg = imsg_create(imsgbuf, IMSG_FILTER_SET, 0, 0, 0); + if (msg == NULL) + return -1; + + TAILQ_FOREACH(s, set, entry) + nsets++; + if (ibuf_add_n16(msg, nsets) == -1) + goto fail; + + TAILQ_FOREACH(s, set, entry) { + if (ibuf_add_n32(msg, s->type) == -1) + goto fail; + + switch (s->type) { + case ACTION_SET_PREPEND_SELF: + case ACTION_SET_PREPEND_PEER: + if (ibuf_add_n8(msg, s->action.prepend) == -1) + goto fail; + break; + case ACTION_SET_AS_OVERRIDE: + break; + case ACTION_SET_LOCALPREF: + case ACTION_SET_MED: + case ACTION_SET_WEIGHT: + if (ibuf_add_n32(msg, s->action.metric) == -1) + goto fail; + break; + case ACTION_SET_RELATIVE_LOCALPREF: + case ACTION_SET_RELATIVE_MED: + case ACTION_SET_RELATIVE_WEIGHT: + if (ibuf_add_n32(msg, s->action.relative) == -1) + goto fail; + break; + case ACTION_SET_NEXTHOP: + if (ibuf_add(msg, &s->action.nexthop, + sizeof(s->action.nexthop)) == -1) + goto fail; + break; + case ACTION_SET_NEXTHOP_BLACKHOLE: + case ACTION_SET_NEXTHOP_REJECT: + case ACTION_SET_NEXTHOP_NOMODIFY: + case ACTION_SET_NEXTHOP_SELF: + break; + case ACTION_DEL_COMMUNITY: + case ACTION_SET_COMMUNITY: + if (ibuf_add(msg, &s->action.community, + sizeof(s->action.community)) == -1) + goto fail; + break; + case ACTION_PFTABLE: + if (ibuf_add_strbuf(msg, s->action.pftable, + sizeof(s->action.pftable)) == -1) + goto fail; + break; + case ACTION_RTLABEL: + if (ibuf_add_strbuf(msg, s->action.rtlabel, + sizeof(s->action.rtlabel)) == -1) + goto fail; + break; + case ACTION_SET_ORIGIN: + if (ibuf_add_n8(msg, s->action.origin) == -1) + goto fail; + break; + case ACTION_SET_NEXTHOP_REF: + case ACTION_RTLABEL_ID: + case ACTION_PFTABLE_ID: + goto fail; + } + } + + imsg_close(imsgbuf, msg); + return 0; + +fail: + ibuf_free(msg); + return -1; +} + +int +imsg_check_filterset(struct imsg *imsg) +{ + struct ibuf ibuf; + uint16_t count, i; + + if (imsg_get_ibuf(imsg, &ibuf) == -1) + return -1; + if (ibuf_get_n16(&ibuf, &count) == -1) + return -1; + for (i = 0; i < count; i++) { + struct filter_set set; + if (ibuf_recv_one_filterset(&ibuf, &set) == -1) + return -1; + } + if (ibuf_size(&ibuf) != 0) { + errno = EBADMSG; + return -1; + } + return 0; +} + +int +ibuf_recv_filterset_count(struct ibuf *ibuf) +{ + uint16_t count; + + if (ibuf_get_n16(ibuf, &count) == -1) + return -1; + return count; +} + +int +ibuf_recv_one_filterset(struct ibuf *ibuf, struct filter_set *set) +{ + uint32_t type; + + memset(set, 0, sizeof(*set)); + + if (ibuf_get_n32(ibuf, &type) == -1) + return -1; + set->type = type; + + switch (set->type) { + case ACTION_SET_PREPEND_SELF: + case ACTION_SET_PREPEND_PEER: + if (ibuf_get_n8(ibuf, &set->action.prepend) == -1) + return -1; + break; + case ACTION_SET_AS_OVERRIDE: + break; + case ACTION_SET_LOCALPREF: + case ACTION_SET_MED: + case ACTION_SET_WEIGHT: + if (ibuf_get_n32(ibuf, &set->action.metric) == -1) + return -1; + break; + case ACTION_SET_RELATIVE_LOCALPREF: + case ACTION_SET_RELATIVE_MED: + case ACTION_SET_RELATIVE_WEIGHT: + if (ibuf_get_n32(ibuf, &set->action.relative) == -1) + return -1; + break; + case ACTION_SET_NEXTHOP: + if (ibuf_get(ibuf, &set->action.nexthop, + sizeof(set->action.nexthop)) == -1) + return -1; + break; + case ACTION_SET_NEXTHOP_BLACKHOLE: + case ACTION_SET_NEXTHOP_REJECT: + case ACTION_SET_NEXTHOP_NOMODIFY: + case ACTION_SET_NEXTHOP_SELF: + break; + case ACTION_DEL_COMMUNITY: + case ACTION_SET_COMMUNITY: + if (ibuf_get(ibuf, &set->action.community, + sizeof(set->action.community)) == -1) + return -1; + break; + case ACTION_PFTABLE: + if (ibuf_get_strbuf(ibuf, set->action.pftable, + sizeof(set->action.pftable)) == -1) + return -1; + break; + case ACTION_RTLABEL: + if (ibuf_get_strbuf(ibuf, set->action.rtlabel, + sizeof(set->action.rtlabel)) == -1) + return -1; + break; + case ACTION_SET_ORIGIN: + if (ibuf_get_n8(ibuf, &set->action.origin) == -1) + return -1; + break; + case ACTION_SET_NEXTHOP_REF: + case ACTION_RTLABEL_ID: + case ACTION_PFTABLE_ID: + errno = EBADMSG; + return -1; + } + return 0; +} Index: bgpd/control.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/control.c,v diff -u -p -r1.135 control.c --- bgpd/control.c 10 Mar 2025 14:11:38 -0000 1.135 +++ bgpd/control.c 3 Feb 2026 13:05:51 -0000 @@ -513,7 +513,14 @@ control_dispatch_msg(struct pollfd *pfd, case IMSG_FLOWSPEC_REMOVE: case IMSG_FLOWSPEC_DONE: case IMSG_FLOWSPEC_FLUSH: + imsg_ctl_rde(&imsg); + break; case IMSG_FILTER_SET: + if (imsg_check_filterset(&imsg) == -1) { + /* malformed request */ + control_result(c, CTL_RES_PARSE_ERROR); + break; + } imsg_ctl_rde(&imsg); break; case IMSG_CTL_LOG_VERBOSE: Index: bgpd/rde.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v diff -u -p -r1.681 rde.c --- bgpd/rde.c 3 Feb 2026 12:25:16 -0000 1.681 +++ bgpd/rde.c 3 Feb 2026 12:52:48 -0000 @@ -101,7 +101,7 @@ static void network_dump_upcall(struct static void network_flush_upcall(struct rib_entry *, void *); void flowspec_add(struct flowspec *, struct filterstate *, - struct filter_set_head *); + struct rde_filter_set *); void flowspec_delete(struct flowspec *); static void flowspec_flush_upcall(struct rib_entry *, void *); static void flowspec_dump_upcall(struct rib_entry *, void *); @@ -400,8 +400,8 @@ rde_main(int debug, int verbose) struct network_config netconf_s, netconf_p; struct filterstate netconf_state; -struct filter_set_head session_set = TAILQ_HEAD_INITIALIZER(session_set); -struct filter_set_head parent_set = TAILQ_HEAD_INITIALIZER(parent_set); +struct rde_filter_set *session_set; +struct rde_filter_set *parent_set; void rde_dispatch_imsg_session(struct imsgbuf *imsgbuf) @@ -563,7 +563,8 @@ rde_dispatch_imsg_session(struct imsgbuf } break; case IMSG_NETWORK_DONE: - TAILQ_CONCAT(&netconf_s.attrset, &session_set, entry); + netconf_s.rde_attrset = session_set; + session_set = NULL; switch (netconf_s.prefix.aid) { case AID_INET: if (netconf_s.prefixlen > 32) @@ -658,10 +659,11 @@ badnetdel: "from bgpctl"); else flowspec_add(curflow, &netconf_state, - &session_set); + session_set); rde_filterstate_clean(&netconf_state); - filterset_free(&session_set); + rde_filterset_unref(session_set); + session_set = NULL; free(curflow); curflow = NULL; break; @@ -702,7 +704,7 @@ badnetdel: flowspec_flush_upcall, NULL); break; case IMSG_FILTER_SET: - filterset_recv(&imsg, &session_set); + session_set = rde_filterset_imsg_recv(&imsg); break; case IMSG_CTL_SHOW_NETWORK: case IMSG_CTL_SHOW_RIB: @@ -920,7 +922,8 @@ rde_dispatch_imsg_parent(struct imsgbuf TAILQ_INIT(&netconf_p.attrset); break; case IMSG_NETWORK_DONE: - TAILQ_CONCAT(&netconf_p.attrset, &parent_set, entry); + netconf_p.rde_attrset = parent_set; + parent_set = NULL; rde_filterstate_init(&state); asp = &state.aspath; @@ -982,10 +985,11 @@ rde_dispatch_imsg_parent(struct imsgbuf log_warnx("invalid flowspec update received " "from parent"); else - flowspec_add(curflow, &state, &parent_set); + flowspec_add(curflow, &state, parent_set); rde_filterstate_clean(&state); - filterset_free(&parent_set); + rde_filterset_unref(parent_set); + parent_set = NULL; free(curflow); curflow = NULL; break; @@ -1090,11 +1094,12 @@ rde_dispatch_imsg_parent(struct imsgbuf } } TAILQ_INIT(&r->set); - TAILQ_CONCAT(&r->set, &parent_set, entry); + r->rde_set = parent_set; + parent_set = NULL; if ((rib = rib_byid(rib_find(r->rib))) == NULL) { log_warnx("IMSG_RECONF_FILTER: filter rule " "for nonexistent rib %s", r->rib); - filterset_free(&r->set); + rde_filterset_unref(r->rde_set); free(r); break; } @@ -1192,7 +1197,8 @@ rde_dispatch_imsg_parent(struct imsgbuf "IMSG_RECONF_VPN_EXPORT unexpected"); break; } - TAILQ_CONCAT(&vpn->export, &parent_set, entry); + vpn->rde_export = parent_set; + parent_set = NULL; break; case IMSG_RECONF_VPN_IMPORT: if (vpn == NULL) { @@ -1200,7 +1206,8 @@ rde_dispatch_imsg_parent(struct imsgbuf "IMSG_RECONF_VPN_IMPORT unexpected"); break; } - TAILQ_CONCAT(&vpn->import, &parent_set, entry); + vpn->rde_import = parent_set; + parent_set = NULL; break; case IMSG_RECONF_VPN_DONE: break; @@ -1221,7 +1228,7 @@ rde_dispatch_imsg_parent(struct imsgbuf nexthop_update(&knext); break; case IMSG_FILTER_SET: - filterset_recv(&imsg, &parent_set); + parent_set = rde_filterset_imsg_recv(&imsg); break; case IMSG_MRT_OPEN: case IMSG_MRT_REOPEN: @@ -4545,7 +4552,7 @@ void network_add(struct network_config *nc, struct filterstate *state) { struct l3vpn *vpn; - struct filter_set_head *vpnset = NULL; + struct rde_filter_set *vpnset = NULL; struct in_addr prefix4; struct in6_addr prefix6; uint32_t path_id_tx; @@ -4571,7 +4578,7 @@ network_add(struct network_config *nc, s nc->prefix.labelstack[2] = (vpn->label << 4) & 0xf0; nc->prefix.labelstack[2] |= BGP_MPLS_BOS; - vpnset = &vpn->export; + vpnset = vpn->rde_export; break; case AID_INET6: prefix6 = nc->prefix.v6; @@ -4587,11 +4594,11 @@ network_add(struct network_config *nc, s nc->prefix.labelstack[2] = (vpn->label << 4) & 0xf0; nc->prefix.labelstack[2] |= BGP_MPLS_BOS; - vpnset = &vpn->export; + vpnset = vpn->rde_export; break; default: log_warnx("unable to VPNize prefix"); - filterset_free(&nc->attrset); + rde_filterset_unref(nc->rde_attrset); return; } break; @@ -4605,7 +4612,8 @@ network_add(struct network_config *nc, s } } - rde_apply_set(&nc->attrset, peerself, peerself, state, nc->prefix.aid); + rde_apply_set(nc->rde_attrset, peerself, peerself, state, + nc->prefix.aid); if (vpnset) rde_apply_set(vpnset, peerself, peerself, state, nc->prefix.aid); @@ -4628,7 +4636,7 @@ network_add(struct network_config *nc, s prefix_update(rib, peerself, 0, path_id_tx, state, 0, &nc->prefix, nc->prefixlen); } - filterset_free(&nc->attrset); + rde_filterset_unref(nc->rde_attrset); } void @@ -4761,7 +4769,7 @@ network_flush_upcall(struct rib_entry *r */ void flowspec_add(struct flowspec *f, struct filterstate *state, - struct filter_set_head *attrset) + struct rde_filter_set *attrset) { struct pt_entry *pte; uint32_t path_id_tx; Index: bgpd/rde.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v diff -u -p -r1.339 rde.h --- bgpd/rde.h 3 Feb 2026 12:25:16 -0000 1.339 +++ bgpd/rde.h 3 Feb 2026 12:55:30 -0000 @@ -546,17 +546,19 @@ void prefix_evaluate_nexthop(struct pr enum nexthop_state); /* rde_filter.c */ -void rde_apply_set(struct filter_set_head *, struct rde_peer *, +void rde_apply_set(const struct rde_filter_set *, struct rde_peer *, struct rde_peer *, struct filterstate *, u_int8_t); -int rde_l3vpn_import(struct rde_community *, struct l3vpn *); +int rde_l3vpn_import(struct rde_community *, struct l3vpn *); struct filter_rule *rde_filter_dup(const struct filter_rule *); void rde_filterstate_init(struct filterstate *); void rde_filterstate_prep(struct filterstate *, struct prefix *); void rde_filterstate_copy(struct filterstate *, struct filterstate *); void rde_filterstate_set_vstate(struct filterstate *, uint8_t, uint8_t); void rde_filterstate_clean(struct filterstate *); +uint64_t rde_filterset_calc_hash(const struct rde_filter_set *); int rde_filter_skip_rule(struct rde_peer *, struct filter_rule *); int rde_filter_equal(struct filter_head *, struct filter_head *); +struct rde_filter_set *rde_filterset_imsg_recv(struct imsg *); void rde_filter_calc_skip_steps(struct filter_head *); enum filter_actions rde_filter(struct filter_head *, struct rde_peer *, struct rde_peer *, struct bgpd_addr *, uint8_t, Index: bgpd/rde_filter.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_filter.c,v diff -u -p -r1.139 rde_filter.c --- bgpd/rde_filter.c 3 Feb 2026 12:25:16 -0000 1.139 +++ bgpd/rde_filter.c 3 Feb 2026 14:53:38 -0000 @@ -21,27 +21,56 @@ #include #include +#include #include #include #include +#include #include "bgpd.h" #include "rde.h" #include "log.h" +#include "chash.h" -int filterset_equal(struct filter_set_head *, struct filter_set_head *); +static int rde_filterset_equal(const struct rde_filter_set *, + const struct rde_filter_set *); +static void rde_filterset_ref(struct rde_filter_set *); + +struct rde_filter_set_elm { + enum action_types type; + union { + uint8_t prepend; + uint8_t origin; + uint16_t id; + uint32_t metric; + int32_t relative; + struct nexthop *nh_ref; + struct community community; + } action; +}; + +struct rde_filter_set { + uint64_t hash; + size_t len; + int refcnt; + struct rde_filter_set_elm set[0]; +}; void -rde_apply_set(struct filter_set_head *sh, struct rde_peer *peer, +rde_apply_set(const struct rde_filter_set *rfs, struct rde_peer *peer, struct rde_peer *from, struct filterstate *state, uint8_t aid) { - struct filter_set *set; u_char *np; + size_t i; uint32_t prep_as; uint16_t nl; uint8_t prepend; - TAILQ_FOREACH(set, sh, entry) { + if (rfs == NULL) + return; + for (i = 0; i < rfs->len; i++) { + const struct rde_filter_set_elm *set = &rfs->set[i]; + switch (set->type) { case ACTION_SET_LOCALPREF: state->aspath.lpref = set->action.metric; @@ -169,12 +198,16 @@ rde_apply_set(struct filter_set_head *sh } } +/* use to match the import filters for vpn imports */ int rde_l3vpn_import(struct rde_community *comm, struct l3vpn *rd) { - struct filter_set *s; + size_t i; - TAILQ_FOREACH(s, &rd->import, entry) { + if (rd->rde_import == NULL) + return (0); + for (i = 0; i < rd->rde_import->len; i++) { + struct rde_filter_set_elm *s = &rd->rde_import->set[i]; if (community_match(comm, &s->action.community, 0)) return (1); } @@ -414,7 +447,7 @@ rde_filter_equal(struct filter_head *a, return (0); } - if (!filterset_equal(&fa->set, &fb->set)) + if (!rde_filterset_equal(fa->rde_set, fb->rde_set)) return (0); fa = TAILQ_NEXT(fa, entry); @@ -431,7 +464,8 @@ rde_filter_dup(const struct filter_rule if ((new = malloc(sizeof(*new))) == NULL) fatal(NULL); *new = *fr; - filterset_copy(&fr->set, &new->set); + /* XXX think about skip table */ + rde_filterset_ref(new->rde_set); return new; } @@ -511,6 +545,8 @@ filterlist_free(struct filter_head *fh) while ((r = TAILQ_FIRST(fh)) != NULL) { TAILQ_REMOVE(fh, r, entry); filterset_free(&r->set); + if (r->rde_set != NULL) + rde_filterset_unref(r->rde_set); free(r); } free(fh); @@ -537,6 +573,49 @@ filterset_free(struct filter_set_head *s } } +const char * +filterset_name(enum action_types type) +{ + switch (type) { + case ACTION_SET_LOCALPREF: + case ACTION_SET_RELATIVE_LOCALPREF: + return ("localpref"); + case ACTION_SET_MED: + case ACTION_SET_RELATIVE_MED: + return ("metric"); + case ACTION_SET_WEIGHT: + case ACTION_SET_RELATIVE_WEIGHT: + return ("weight"); + case ACTION_SET_PREPEND_SELF: + return ("prepend-self"); + case ACTION_SET_PREPEND_PEER: + return ("prepend-peer"); + case ACTION_SET_AS_OVERRIDE: + return ("as-override"); + case ACTION_SET_NEXTHOP: + case ACTION_SET_NEXTHOP_REF: + case ACTION_SET_NEXTHOP_REJECT: + case ACTION_SET_NEXTHOP_BLACKHOLE: + case ACTION_SET_NEXTHOP_NOMODIFY: + case ACTION_SET_NEXTHOP_SELF: + return ("nexthop"); + case ACTION_SET_COMMUNITY: + return ("community"); + case ACTION_DEL_COMMUNITY: + return ("community delete"); + case ACTION_PFTABLE: + case ACTION_PFTABLE_ID: + return ("pftable"); + case ACTION_RTLABEL: + case ACTION_RTLABEL_ID: + return ("rtlabel"); + case ACTION_SET_ORIGIN: + return ("origin"); + } + + fatalx("filterset_name: got lost"); +} + /* * this function is a bit more complicated than a memcmp() because there are * types that need to be considered equal e.g. ACTION_SET_MED and @@ -610,201 +689,248 @@ filterset_copy(const struct filter_set_h } } -int -filterset_equal(struct filter_set_head *ah, struct filter_set_head *bh) +static int +rde_filterset_equal(const struct rde_filter_set *afs, + const struct rde_filter_set *bfs) { - struct filter_set *a, *b; - const char *as, *bs; + const struct rde_filter_set_elm *a, *b; + size_t i; + + if (afs->len != bfs->len) + return 0; + + a = afs->set; + b = bfs->set; + for (i = 0; i < afs->len; i++, a++, b++) { + if (a->type != b->type) + return 0; - for (a = TAILQ_FIRST(ah), b = TAILQ_FIRST(bh); - a != NULL && b != NULL; - a = TAILQ_NEXT(a, entry), b = TAILQ_NEXT(b, entry)) { switch (a->type) { case ACTION_SET_PREPEND_SELF: case ACTION_SET_PREPEND_PEER: - if (a->type == b->type && - a->action.prepend == b->action.prepend) + if (a->action.prepend == b->action.prepend) continue; break; case ACTION_SET_AS_OVERRIDE: - if (a->type == b->type) - continue; - break; + continue; case ACTION_SET_LOCALPREF: case ACTION_SET_MED: case ACTION_SET_WEIGHT: - if (a->type == b->type && - a->action.metric == b->action.metric) + if (a->action.metric == b->action.metric) continue; break; case ACTION_SET_RELATIVE_LOCALPREF: case ACTION_SET_RELATIVE_MED: case ACTION_SET_RELATIVE_WEIGHT: - if (a->type == b->type && - a->action.relative == b->action.relative) - continue; - break; - case ACTION_SET_NEXTHOP: - if (a->type == b->type && - memcmp(&a->action.nexthop, &b->action.nexthop, - sizeof(a->action.nexthop)) == 0) + if (a->action.relative == b->action.relative) continue; break; case ACTION_SET_NEXTHOP_REF: - if (a->type == b->type && - a->action.nh_ref == b->action.nh_ref) + if (a->action.nh_ref == b->action.nh_ref) continue; break; case ACTION_SET_NEXTHOP_BLACKHOLE: case ACTION_SET_NEXTHOP_REJECT: case ACTION_SET_NEXTHOP_NOMODIFY: case ACTION_SET_NEXTHOP_SELF: - if (a->type == b->type) - continue; - break; + continue; case ACTION_DEL_COMMUNITY: case ACTION_SET_COMMUNITY: - if (a->type == b->type && - memcmp(&a->action.community, &b->action.community, + if (memcmp(&a->action.community, &b->action.community, sizeof(a->action.community)) == 0) continue; break; - case ACTION_PFTABLE: - case ACTION_PFTABLE_ID: - if (b->type == ACTION_PFTABLE) - bs = b->action.pftable; - else if (b->type == ACTION_PFTABLE_ID) - bs = pftable_id2name(b->action.id); - else - break; - - if (a->type == ACTION_PFTABLE) - as = a->action.pftable; - else - as = pftable_id2name(a->action.id); - - if (strcmp(as, bs) == 0) - continue; - break; - case ACTION_RTLABEL: case ACTION_RTLABEL_ID: - if (b->type == ACTION_RTLABEL) - bs = b->action.rtlabel; - else if (b->type == ACTION_RTLABEL_ID) - bs = rtlabel_id2name(b->action.id); - else - break; - - if (a->type == ACTION_RTLABEL) - as = a->action.rtlabel; - else - as = rtlabel_id2name(a->action.id); - - if (strcmp(as, bs) == 0) + case ACTION_PFTABLE_ID: + if (a->action.id == b->action.id) continue; break; case ACTION_SET_ORIGIN: - if (a->type == b->type && - a->action.origin == b->action.origin) + if (a->action.origin == b->action.origin) continue; break; + case ACTION_SET_NEXTHOP: + case ACTION_RTLABEL: + case ACTION_PFTABLE: + fatalx("unexpected filter action in RDE"); } /* compare failed */ - return (0); + return 0; } - if (a != NULL || b != NULL) - return (0); - return (1); + return 1; } -const char * -filterset_name(enum action_types type) +static SIPHASH_KEY fskey; + +static inline uint64_t +rde_filterset_hash(const struct rde_filter_set *rfs) { - switch (type) { + return rfs->hash; +} + +uint64_t +rde_filterset_calc_hash(const struct rde_filter_set *rfs) +{ + return SipHash24(&fskey, rfs->set, rfs->len * sizeof(*rfs->set)); +} + +CH_HEAD(rde_filterset, rde_filter_set); +CH_PROTOTYPE(rde_filterset, rde_filter_set, rde_filterset_hash); + +static struct rde_filterset filterset = CH_INITIALIZER(&filterset); + +static void +rde_filterset_free(struct rde_filter_set *rfs) +{ + struct rde_filter_set_elm *rfse; + size_t i; + + if (rfs == NULL) + return; + + rdemem.filter_set_size -= sizeof(*rfs) + rfs->len * sizeof(*rfse); + rdemem.filter_set_cnt--; + + rfse = rfs->set; + for (i = 0; i < rfs->len; i++, rfse++) { + if (rfse->type == ACTION_RTLABEL_ID) + rtlabel_unref(rfse->action.id); + else if (rfse->type == ACTION_PFTABLE_ID) + pftable_unref(rfse->action.id); + else if (rfse->type == ACTION_SET_NEXTHOP_REF) + nexthop_unref(rfse->action.nh_ref); + } + free(rfs); +} + +static void +rde_filterset_ref(struct rde_filter_set *rfs) +{ + rfs->refcnt++; + rdemem.filter_set_refs++; +} + +void +rde_filterset_unref(struct rde_filter_set *rfs) +{ + rfs->refcnt--; + rdemem.filter_set_refs--; + if (rfs->refcnt <= 0) { + CH_REMOVE(rde_filterset, &filterset, rfs); + rde_filterset_free(rfs); + } +} + +static void +rde_filterset_conv(const struct filter_set *set, + struct rde_filter_set_elm *rfse) +{ + rfse->type = set->type; + switch (set->type) { + case ACTION_SET_PREPEND_SELF: + case ACTION_SET_PREPEND_PEER: + rfse->action.prepend = set->action.prepend; + break; + case ACTION_SET_AS_OVERRIDE: + break; case ACTION_SET_LOCALPREF: - case ACTION_SET_RELATIVE_LOCALPREF: - return ("localpref"); case ACTION_SET_MED: - case ACTION_SET_RELATIVE_MED: - return ("metric"); case ACTION_SET_WEIGHT: + rfse->action.metric = set->action.metric; + break; + case ACTION_SET_RELATIVE_LOCALPREF: + case ACTION_SET_RELATIVE_MED: case ACTION_SET_RELATIVE_WEIGHT: - return ("weight"); - case ACTION_SET_PREPEND_SELF: - return ("prepend-self"); - case ACTION_SET_PREPEND_PEER: - return ("prepend-peer"); - case ACTION_SET_AS_OVERRIDE: - return ("as-override"); - case ACTION_SET_NEXTHOP: - case ACTION_SET_NEXTHOP_REF: - case ACTION_SET_NEXTHOP_REJECT: + rfse->action.relative = set->action.relative; + break; case ACTION_SET_NEXTHOP_BLACKHOLE: + case ACTION_SET_NEXTHOP_REJECT: case ACTION_SET_NEXTHOP_NOMODIFY: case ACTION_SET_NEXTHOP_SELF: - return ("nexthop"); - case ACTION_SET_COMMUNITY: - return ("community"); + break; case ACTION_DEL_COMMUNITY: - return ("community delete"); - case ACTION_PFTABLE: - case ACTION_PFTABLE_ID: - return ("pftable"); + case ACTION_SET_COMMUNITY: + rfse->action.community = set->action.community; + break; + case ACTION_SET_ORIGIN: + rfse->action.origin = set->action.origin; + break; + case ACTION_SET_NEXTHOP: + rfse->action.nh_ref = nexthop_get(&set->action.nexthop); + rfse->type = ACTION_SET_NEXTHOP_REF; + break; case ACTION_RTLABEL: + rfse->action.id = rtlabel_name2id(set->action.rtlabel); + rfse->type = ACTION_RTLABEL_ID; + break; + case ACTION_PFTABLE: + rfse->action.id = pftable_name2id(set->action.pftable); + rfse->type = ACTION_PFTABLE_ID; + break; + case ACTION_SET_NEXTHOP_REF: case ACTION_RTLABEL_ID: - return ("rtlabel"); - case ACTION_SET_ORIGIN: - return ("origin"); + case ACTION_PFTABLE_ID: + fatalx("unexpected filter action in RDE"); } - - fatalx("filterset_name: got lost"); } -int -filterset_send(struct imsgbuf *imsgbuf, struct filter_set_head *set) +struct rde_filter_set * +rde_filterset_imsg_recv(struct imsg *imsg) { - struct filter_set *s; + struct ibuf ibuf; + struct rde_filter_set *rfs = NULL, *nrfs; + struct rde_filter_set_elm *rfse; + int count, i; - TAILQ_FOREACH(s, set, entry) - if (imsg_compose(imsgbuf, IMSG_FILTER_SET, 0, 0, -1, s, - sizeof(*s)) == -1) - return (-1); - return (0); -} + if (imsg_get_ibuf(imsg, &ibuf) == -1) + goto fail; -void -filterset_recv(struct imsg *imsg, struct filter_set_head *set) -{ - struct filter_set *s; + count = ibuf_recv_filterset_count(&ibuf); + if (count == -1) + goto fail; - if ((s = malloc(sizeof(*s))) == NULL) - fatal(NULL); - if (imsg_get_data(imsg, s, sizeof(*s)) == -1) { - log_warnx("rde_dispatch: wrong imsg len"); - free(s); - return; + if ((rfs = calloc(1, sizeof(*rfs) + count * sizeof(*rfse))) == NULL) + goto fail; + + rdemem.filter_set_size += sizeof(*rfs) + count * sizeof(*rfse); + rdemem.filter_set_cnt++; + + rfs->len = count; + rfse = rfs->set; + + for (i = 0; i < count; i++, rfse++) { + struct filter_set set; + if (ibuf_recv_one_filterset(&ibuf, &set) == -1) + goto fail; + rde_filterset_conv(&set, rfse); } - switch (s->type) { - case ACTION_SET_NEXTHOP: - s->action.nh_ref = nexthop_get(&s->action.nexthop); - s->type = ACTION_SET_NEXTHOP_REF; - break; - case ACTION_RTLABEL: - /* convert the route label to an id for faster access */ - s->action.id = rtlabel_name2id(s->action.rtlabel); - s->type = ACTION_RTLABEL_ID; - break; - case ACTION_PFTABLE: - /* convert pftable name to an id */ - s->action.id = pftable_name2id(s->action.pftable); - s->type = ACTION_PFTABLE_ID; - break; - default: - break; + + if (ibuf_size(&ibuf) != 0) { + errno = EBADMSG; + goto fail; } - TAILQ_INSERT_TAIL(set, s, entry); + + rfs->hash = rde_filterset_calc_hash(rfs); + + if ((nrfs = CH_FIND(rde_filterset, &filterset, rfs)) == NULL) { + if (CH_INSERT(rde_filterset, &filterset, rfs, NULL) != 1) + fatalx("%s: already present set", __func__); + } else { + rde_filterset_free(rfs); + rfs = nrfs; + } + rde_filterset_ref(rfs); + return rfs; + + fail: + log_warn("filter set receive"); + rde_filterset_free(rfs); + return NULL; } +CH_GENERATE(rde_filterset, rde_filter_set, rde_filterset_equal, + rde_filterset_hash); + /* * Copyright (c) 2001 Daniel Hartmeier * All rights reserved. @@ -921,7 +1047,8 @@ rde_filter(struct filter_head *rules, st f->skip[RDE_FILTER_SKIP_REMOTE_AS]); if (rde_filter_match(f, peer, from, state, prefix, plen)) { - rde_apply_set(&f->set, peer, from, state, prefix->aid); + rde_apply_set(f->rde_set, peer, from, state, + prefix->aid); if (f->action != ACTION_NONE) action = f->action; if (f->quick)