Download raw body.
bgpd: rework the rde side of filter_sets
On Wed, Feb 04, 2026 at 10:23:33AM +0100, Theo Buehler wrote:
> On Tue, Feb 03, 2026 at 04:01:57PM +0100, Claudio Jeker wrote:
> > rde_apply_set() is very inefficent at the moment. The code is hunting
> > for memory all the time since there are just too many objects all over the
> > place.
> >
> > This switches this away from a linked list to an array of filter_set
> > elements. On top of this the new rde_filter_set_elm structs are much
> > smaller then the regular filter_sets. These changes make rde_apply_set()
> > a lot more efficent.
> >
> > At the same time this brings in a new way to send and recv the imsgs with
> > the filter sets in them. The goal is to add more of those send / recv
> > functions to better validate this internal structs on receive.
> > Also since IMSG_FILTER_SET is also sent via the control socket add a
> > imsg_check_filterset() function that validates these messages before
> > passing them on.
> >
> > There is a lot of churn because of the new rde_filter_set object. Also the
> > diff of rde_filter.c is a big mess and so is hard to read. It is probably
> > better to look at the result.
>
> It's a quite a bit less messy if you hoist filterset_name() to the right
> place in a separate commit.
>
> >
> > Next step is to do a similar cleanup for filter rules but that is a bit
> > more involved.
>
> This one's straightforward enough. Some minor comments/questions below.
>
> > --
> > :wq Claudio
> >
> > + return 0;
> > +
> > +fail:
>
> I'd indent the label.
Done.
> > + ibuf_free(msg);
> > + return -1;
> > +}
> > +
> > +int
> > +imsg_check_filterset(struct imsg *imsg)
> > +{
> > + struct ibuf ibuf;
> > + uint16_t count, i;
> > +
> > + if (imsg_get_ibuf(imsg, &ibuf) == -1)
> > + return -1;
> > + if (ibuf_get_n16(&ibuf, &count) == -1)
> > + return -1;
>
> Should this use ibuf_recv_filterset_count()? But see below.
Yes. In some form or another that would make sense.
> > + for (i = 0; i < count; i++) {
> > + struct filter_set set;
> > + if (ibuf_recv_one_filterset(&ibuf, &set) == -1)
> > + return -1;
> > + }
> > + if (ibuf_size(&ibuf) != 0) {
> > + errno = EBADMSG;
> > + return -1;
> > + }
> > + return 0;
> > +}
> > +
> > +int
> > +ibuf_recv_filterset_count(struct ibuf *ibuf)
> > +{
> > + uint16_t count;
> > +
> > + if (ibuf_get_n16(ibuf, &count) == -1)
> > + return -1;
> > + return count;
> > +}
>
> I'm not entirely sold on the ibuf_recv_filterset_count() interface.
> It doesn't really seem to make the caller simpler. The in-band error
> with a signed type also isn't great in the caller, but that's probably
> just me :)
Changed this to:
int
ibuf_recv_filterset_count(struct ibuf *ibuf, uint16_t *count)
{
return ibuf_get_n16(ibuf, count);
}
I want this dumb wrapper to correctly decouple the message parsing from
the rest.
> > +
> > +int
> > +ibuf_recv_one_filterset(struct ibuf *ibuf, struct filter_set *set)
> > +{
> > + uint32_t type;
> > +
> > + memset(set, 0, sizeof(*set));
> > +
> > + if (ibuf_get_n32(ibuf, &type) == -1)
> > + return -1;
> > + set->type = type;
> > +
> > + switch (set->type) {
> > + case ACTION_SET_PREPEND_SELF:
> > + case ACTION_SET_PREPEND_PEER:
> > + if (ibuf_get_n8(ibuf, &set->action.prepend) == -1)
> > + return -1;
> > + break;
> > + case ACTION_SET_AS_OVERRIDE:
> > + break;
> > + case ACTION_SET_LOCALPREF:
> > + case ACTION_SET_MED:
> > + case ACTION_SET_WEIGHT:
> > + if (ibuf_get_n32(ibuf, &set->action.metric) == -1)
> > + return -1;
> > + break;
> > + case ACTION_SET_RELATIVE_LOCALPREF:
> > + case ACTION_SET_RELATIVE_MED:
> > + case ACTION_SET_RELATIVE_WEIGHT:
> > + if (ibuf_get_n32(ibuf, &set->action.relative) == -1)
>
> I'm a bit surprised that gcc doesn't complain about int32_t * vs uint32_t *.
Did not try this on sparc64.
Let me add a helper variable here.
...
> > - case ACTION_SET_NEXTHOP:
> > - if (a->type == b->type &&
> > - memcmp(&a->action.nexthop, &b->action.nexthop,
> > - sizeof(a->action.nexthop)) == 0)
> > + if (a->action.relative == b->action.relative)
> > continue;
> > break;
> > case ACTION_SET_NEXTHOP_REF:
> > - if (a->type == b->type &&
> > - a->action.nh_ref == b->action.nh_ref)
> > + if (a->action.nh_ref == b->action.nh_ref)
>
> I was somewhat surprised by a check for pointer equality. I guess
> it makes sense given that they reference objects in a global tree.
Yes, nexthop pointers are unique so there is no need to look at the
objects.
> > continue;
> > break;
> > case ACTION_SET_NEXTHOP_BLACKHOLE:
> > case ACTION_SET_NEXTHOP_REJECT:
> > case ACTION_SET_NEXTHOP_NOMODIFY:
> > case ACTION_SET_NEXTHOP_SELF:
>
> I don't follow why these ACTION_SET_NEXTHOP_* are ignored wile the
> ACTION_SET_NEXTHOP_REF is an error.
These actions do not include any extra data. The type is enough.
On the other hand ACTION_SET_NEXTHOP_REF is not an allowed type in
struct filter_set. Similar to ACTION_PFTABLE_ID and ACTION_RTLABEL_ID.
For rde_filter_set on the other hand ACTION_SET_NEXTHOP, ACTION_RTLABEL
and ACTION_PFTABLE are not allowed.
The filter_set needs to pass the un-referenced object (bgpd_addr or
string) and then the RDE converts them in rde_filterset_conv().
I could probably change this now to use the same types for the id/ref
since we now have two distinct objects.
I should do that cleanup as next.
> > +static inline uint64_t
> > +rde_filterset_hash(const struct rde_filter_set *rfs)
> > {
> > - switch (type) {
> > + return rfs->hash;
>
> use a tab, not 8 spaces
fixed.
> > -void
> > -filterset_recv(struct imsg *imsg, struct filter_set_head *set)
> > -{
> > - struct filter_set *s;
> > + count = ibuf_recv_filterset_count(&ibuf);
>
> I'd align this with imsg_check_filterset().
done.
> > + if (count == -1)
> > + goto fail;
> >
> > - if ((s = malloc(sizeof(*s))) == NULL)
> > - fatal(NULL);
> > - if (imsg_get_data(imsg, s, sizeof(*s)) == -1) {
> > - log_warnx("rde_dispatch: wrong imsg len");
> > - free(s);
> > - return;
> > + if ((rfs = calloc(1, sizeof(*rfs) + count * sizeof(*rfse))) == NULL)
>
> Should this check for overflow?
>
> if (count > (SIZE_MAX - sizeof(*rfs)) / sizeof(*rfse))
I think this could be a CTASSERT if at all since the count is limited to
UINT16_MAX. sizeof(*rfse) is not that big that it can overflow.
> > + goto fail;
> > +
> > + rdemem.filter_set_size += sizeof(*rfs) + count * sizeof(*rfse);
> > + rdemem.filter_set_cnt++;
> > +
> > + rfs->len = count;
> > + rfse = rfs->set;
> > +
> > + for (i = 0; i < count; i++, rfse++) {
> > + struct filter_set set;
> > + if (ibuf_recv_one_filterset(&ibuf, &set) == -1)
> > + goto fail;
> > + rde_filterset_conv(&set, rfse);
>
> trailing space after the semicolon
fixed.
New diff below
--
:wq Claudio
? obj
Index: Makefile
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/Makefile,v
diff -u -p -r1.45 Makefile
--- Makefile 11 Dec 2025 12:18:27 -0000 1.45
+++ Makefile 4 Feb 2026 10:13:43 -0000
@@ -2,6 +2,7 @@
PROG= bgpd
SRCS= bgpd.c
+SRCS+= bgpd_imsg.c
SRCS+= bitmap.c
SRCS+= carp.c
SRCS+= chash.c
Index: bgpd.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v
diff -u -p -r1.286 bgpd.c
--- bgpd.c 3 Dec 2025 12:20:19 -0000 1.286
+++ bgpd.c 4 Feb 2026 10:13:43 -0000
@@ -672,7 +672,7 @@ send_config(struct bgpd_config *conf)
if (imsg_compose(ibuf_rde, IMSG_FLOWSPEC_ADD, 0, 0, -1,
f->flow, FLOWSPEC_SIZE + f->flow->len) == -1)
return (-1);
- if (filterset_send(ibuf_rde, &f->attrset) == -1)
+ if (imsg_send_filterset(ibuf_rde, &f->attrset) == -1)
return (-1);
if (imsg_compose(ibuf_rde, IMSG_FLOWSPEC_DONE, 0, 0, -1,
NULL, 0) == -1)
@@ -781,7 +781,7 @@ send_config(struct bgpd_config *conf)
/* filters for the RDE */
while ((r = TAILQ_FIRST(conf->filters)) != NULL) {
TAILQ_REMOVE(conf->filters, r, entry);
- if (filterset_send(ibuf_rde, &r->set) == -1)
+ if (imsg_send_filterset(ibuf_rde, &r->set) == -1)
return (-1);
if (imsg_compose(ibuf_rde, IMSG_RECONF_FILTER, 0, 0, -1,
r, sizeof(struct filter_rule)) == -1)
@@ -806,7 +806,7 @@ send_config(struct bgpd_config *conf)
return (-1);
/* export targets */
- if (filterset_send(ibuf_rde, &vpn->export) == -1)
+ if (imsg_send_filterset(ibuf_rde, &vpn->export) == -1)
return (-1);
if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_EXPORT, 0, 0,
-1, NULL, 0) == -1)
@@ -814,7 +814,7 @@ send_config(struct bgpd_config *conf)
filterset_free(&vpn->export);
/* import targets */
- if (filterset_send(ibuf_rde, &vpn->import) == -1)
+ if (imsg_send_filterset(ibuf_rde, &vpn->import) == -1)
return (-1);
if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_IMPORT, 0, 0,
-1, NULL, 0) == -1)
@@ -1170,7 +1170,7 @@ send_network(int type, struct network_co
/* networks that get deleted don't need to send the filter set */
if (type == IMSG_NETWORK_REMOVE)
return (0);
- if (filterset_send(ibuf_rde, h) == -1)
+ if (imsg_send_filterset(ibuf_rde, h) == -1)
return (-1);
if (imsg_compose(ibuf_rde, IMSG_NETWORK_DONE, 0, 0, -1, NULL, 0) == -1)
return (-1);
Index: bgpd.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
diff -u -p -r1.530 bgpd.h
--- bgpd.h 3 Feb 2026 12:25:16 -0000 1.530
+++ bgpd.h 4 Feb 2026 10:13:44 -0000
@@ -252,6 +252,7 @@ TAILQ_HEAD(timer_head, timer);
TAILQ_HEAD(listen_addrs, listen_addr);
TAILQ_HEAD(filter_set_head, filter_set);
+struct rde_filter_set;
struct bitmap {
uint64_t data[2];
@@ -566,6 +567,7 @@ enum network_type {
struct network_config {
struct bgpd_addr prefix;
struct filter_set_head attrset;
+ struct rde_filter_set *rde_attrset;
char psname[SET_NAME_LEN];
uint64_t rd;
enum network_type type;
@@ -591,6 +593,7 @@ struct flowspec {
struct flowspec_config {
RB_ENTRY(flowspec_config) entry;
struct filter_set_head attrset;
+ struct rde_filter_set *rde_attrset;
struct flowspec *flow;
enum reconf_action reconf_action;
};
@@ -1265,6 +1268,7 @@ struct filter_rule {
struct filter_peers peer;
struct filter_match match;
struct filter_set_head set;
+ struct rde_filter_set *rde_set;
#define RDE_FILTER_SKIP_PEERID 0
#define RDE_FILTER_SKIP_GROUPID 1
#define RDE_FILTER_SKIP_REMOTE_AS 2
@@ -1363,6 +1367,8 @@ struct l3vpn {
char ifmpe[IFNAMSIZ];
struct filter_set_head import;
struct filter_set_head export;
+ struct rde_filter_set *rde_import;
+ struct rde_filter_set *rde_export;
struct network_head net_l;
uint64_t rd;
u_int rtableid;
@@ -1420,6 +1426,9 @@ struct rde_memstats {
long long aset_nmemb;
long long pset_cnt;
long long pset_size;
+ long long filter_set_cnt;
+ long long filter_set_size;
+ long long filter_set_refs;
long long rde_event_loop_count;
long long rde_event_loop_usec;
long long rde_event_io_usec;
@@ -1581,13 +1590,12 @@ int pftable_commit(void);
/* rde_filter.c */
void filterset_free(struct filter_set_head *);
+void rde_filterset_unref(struct rde_filter_set *);
int filterset_cmp(struct filter_set *, struct filter_set *);
void filterset_move(struct filter_set_head *, struct filter_set_head *);
void filterset_copy(const struct filter_set_head *,
struct filter_set_head *);
const char *filterset_name(enum action_types);
-int filterset_send(struct imsgbuf *, struct filter_set_head *);
-void filterset_recv(struct imsg *, struct filter_set_head *);
/* bitmap.c */
int bitmap_set(struct bitmap *, uint32_t);
@@ -1680,6 +1688,12 @@ const char *get_baudrate(unsigned long l
unsigned int bin_of_attrs(unsigned int);
unsigned int bin_of_communities(unsigned int);
unsigned int bin_of_adjout_prefixes(unsigned int);
+
+/* bgpd_imsg.c */
+int imsg_send_filterset(struct imsgbuf *, struct filter_set_head *);
+int ibuf_recv_filterset_count(struct ibuf *, uint16_t *);
+int ibuf_recv_one_filterset(struct ibuf *, struct filter_set *);
+int imsg_check_filterset(struct imsg *);
/* flowspec.c */
int flowspec_valid(const uint8_t *, int, int);
Index: bgpd_imsg.c
===================================================================
RCS file: bgpd_imsg.c
diff -N bgpd_imsg.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ bgpd_imsg.c 4 Feb 2026 10:13:44 -0000
@@ -0,0 +1,208 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2026 Claudio Jeker <claudio@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "bgpd.h"
+#include "rde.h"
+#include "log.h"
+
+int
+imsg_send_filterset(struct imsgbuf *imsgbuf, struct filter_set_head *set)
+{
+ struct filter_set *s;
+ struct ibuf *msg;
+ int nsets = 0;
+
+ msg = imsg_create(imsgbuf, IMSG_FILTER_SET, 0, 0, 0);
+ if (msg == NULL)
+ return -1;
+
+ TAILQ_FOREACH(s, set, entry)
+ nsets++;
+ if (ibuf_add_n16(msg, nsets) == -1)
+ goto fail;
+
+ TAILQ_FOREACH(s, set, entry) {
+ if (ibuf_add_n32(msg, s->type) == -1)
+ goto fail;
+
+ switch (s->type) {
+ case ACTION_SET_PREPEND_SELF:
+ case ACTION_SET_PREPEND_PEER:
+ if (ibuf_add_n8(msg, s->action.prepend) == -1)
+ goto fail;
+ break;
+ case ACTION_SET_AS_OVERRIDE:
+ break;
+ case ACTION_SET_LOCALPREF:
+ case ACTION_SET_MED:
+ case ACTION_SET_WEIGHT:
+ if (ibuf_add_n32(msg, s->action.metric) == -1)
+ goto fail;
+ break;
+ case ACTION_SET_RELATIVE_LOCALPREF:
+ case ACTION_SET_RELATIVE_MED:
+ case ACTION_SET_RELATIVE_WEIGHT:
+ if (ibuf_add_n32(msg, s->action.relative) == -1)
+ goto fail;
+ break;
+ case ACTION_SET_NEXTHOP:
+ if (ibuf_add(msg, &s->action.nexthop,
+ sizeof(s->action.nexthop)) == -1)
+ goto fail;
+ break;
+ case ACTION_SET_NEXTHOP_BLACKHOLE:
+ case ACTION_SET_NEXTHOP_REJECT:
+ case ACTION_SET_NEXTHOP_NOMODIFY:
+ case ACTION_SET_NEXTHOP_SELF:
+ break;
+ case ACTION_DEL_COMMUNITY:
+ case ACTION_SET_COMMUNITY:
+ if (ibuf_add(msg, &s->action.community,
+ sizeof(s->action.community)) == -1)
+ goto fail;
+ break;
+ case ACTION_PFTABLE:
+ if (ibuf_add_strbuf(msg, s->action.pftable,
+ sizeof(s->action.pftable)) == -1)
+ goto fail;
+ break;
+ case ACTION_RTLABEL:
+ if (ibuf_add_strbuf(msg, s->action.rtlabel,
+ sizeof(s->action.rtlabel)) == -1)
+ goto fail;
+ break;
+ case ACTION_SET_ORIGIN:
+ if (ibuf_add_n8(msg, s->action.origin) == -1)
+ goto fail;
+ break;
+ case ACTION_SET_NEXTHOP_REF:
+ case ACTION_RTLABEL_ID:
+ case ACTION_PFTABLE_ID:
+ goto fail;
+ }
+ }
+
+ imsg_close(imsgbuf, msg);
+ return 0;
+
+ fail:
+ ibuf_free(msg);
+ return -1;
+}
+
+int
+ibuf_recv_filterset_count(struct ibuf *ibuf, uint16_t *count)
+{
+ return ibuf_get_n16(ibuf, count);
+}
+
+int
+ibuf_recv_one_filterset(struct ibuf *ibuf, struct filter_set *set)
+{
+ uint32_t type, num;
+
+ memset(set, 0, sizeof(*set));
+
+ if (ibuf_get_n32(ibuf, &type) == -1)
+ return -1;
+ set->type = type;
+
+ switch (set->type) {
+ case ACTION_SET_PREPEND_SELF:
+ case ACTION_SET_PREPEND_PEER:
+ if (ibuf_get_n8(ibuf, &set->action.prepend) == -1)
+ return -1;
+ break;
+ case ACTION_SET_AS_OVERRIDE:
+ break;
+ case ACTION_SET_LOCALPREF:
+ case ACTION_SET_MED:
+ case ACTION_SET_WEIGHT:
+ if (ibuf_get_n32(ibuf, &set->action.metric) == -1)
+ return -1;
+ break;
+ case ACTION_SET_RELATIVE_LOCALPREF:
+ case ACTION_SET_RELATIVE_MED:
+ case ACTION_SET_RELATIVE_WEIGHT:
+ if (ibuf_get_n32(ibuf, &num) == -1)
+ return -1;
+ set->action.relative = num;
+ break;
+ case ACTION_SET_NEXTHOP:
+ if (ibuf_get(ibuf, &set->action.nexthop,
+ sizeof(set->action.nexthop)) == -1)
+ return -1;
+ break;
+ case ACTION_SET_NEXTHOP_BLACKHOLE:
+ case ACTION_SET_NEXTHOP_REJECT:
+ case ACTION_SET_NEXTHOP_NOMODIFY:
+ case ACTION_SET_NEXTHOP_SELF:
+ break;
+ case ACTION_DEL_COMMUNITY:
+ case ACTION_SET_COMMUNITY:
+ if (ibuf_get(ibuf, &set->action.community,
+ sizeof(set->action.community)) == -1)
+ return -1;
+ break;
+ case ACTION_PFTABLE:
+ if (ibuf_get_strbuf(ibuf, set->action.pftable,
+ sizeof(set->action.pftable)) == -1)
+ return -1;
+ break;
+ case ACTION_RTLABEL:
+ if (ibuf_get_strbuf(ibuf, set->action.rtlabel,
+ sizeof(set->action.rtlabel)) == -1)
+ return -1;
+ break;
+ case ACTION_SET_ORIGIN:
+ if (ibuf_get_n8(ibuf, &set->action.origin) == -1)
+ return -1;
+ break;
+ case ACTION_SET_NEXTHOP_REF:
+ case ACTION_RTLABEL_ID:
+ case ACTION_PFTABLE_ID:
+ errno = EBADMSG;
+ return -1;
+ }
+ return 0;
+}
+
+int
+imsg_check_filterset(struct imsg *imsg)
+{
+ struct ibuf ibuf;
+ uint16_t count, i;
+
+ if (imsg_get_ibuf(imsg, &ibuf) == -1)
+ return -1;
+ if (ibuf_recv_filterset_count(&ibuf, &count) == -1)
+ return -1;
+ for (i = 0; i < count; i++) {
+ struct filter_set set;
+ if (ibuf_recv_one_filterset(&ibuf, &set) == -1)
+ return -1;
+ }
+ if (ibuf_size(&ibuf) != 0) {
+ errno = EBADMSG;
+ return -1;
+ }
+ return 0;
+}
Index: control.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/control.c,v
diff -u -p -r1.135 control.c
--- control.c 10 Mar 2025 14:11:38 -0000 1.135
+++ control.c 4 Feb 2026 10:13:44 -0000
@@ -513,7 +513,14 @@ control_dispatch_msg(struct pollfd *pfd,
case IMSG_FLOWSPEC_REMOVE:
case IMSG_FLOWSPEC_DONE:
case IMSG_FLOWSPEC_FLUSH:
+ imsg_ctl_rde(&imsg);
+ break;
case IMSG_FILTER_SET:
+ if (imsg_check_filterset(&imsg) == -1) {
+ /* malformed request */
+ control_result(c, CTL_RES_PARSE_ERROR);
+ break;
+ }
imsg_ctl_rde(&imsg);
break;
case IMSG_CTL_LOG_VERBOSE:
Index: rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
diff -u -p -r1.681 rde.c
--- rde.c 3 Feb 2026 12:25:16 -0000 1.681
+++ rde.c 4 Feb 2026 10:13:44 -0000
@@ -101,7 +101,7 @@ static void network_dump_upcall(struct
static void network_flush_upcall(struct rib_entry *, void *);
void flowspec_add(struct flowspec *, struct filterstate *,
- struct filter_set_head *);
+ struct rde_filter_set *);
void flowspec_delete(struct flowspec *);
static void flowspec_flush_upcall(struct rib_entry *, void *);
static void flowspec_dump_upcall(struct rib_entry *, void *);
@@ -400,8 +400,8 @@ rde_main(int debug, int verbose)
struct network_config netconf_s, netconf_p;
struct filterstate netconf_state;
-struct filter_set_head session_set = TAILQ_HEAD_INITIALIZER(session_set);
-struct filter_set_head parent_set = TAILQ_HEAD_INITIALIZER(parent_set);
+struct rde_filter_set *session_set;
+struct rde_filter_set *parent_set;
void
rde_dispatch_imsg_session(struct imsgbuf *imsgbuf)
@@ -563,7 +563,8 @@ rde_dispatch_imsg_session(struct imsgbuf
}
break;
case IMSG_NETWORK_DONE:
- TAILQ_CONCAT(&netconf_s.attrset, &session_set, entry);
+ netconf_s.rde_attrset = session_set;
+ session_set = NULL;
switch (netconf_s.prefix.aid) {
case AID_INET:
if (netconf_s.prefixlen > 32)
@@ -658,10 +659,11 @@ badnetdel:
"from bgpctl");
else
flowspec_add(curflow, &netconf_state,
- &session_set);
+ session_set);
rde_filterstate_clean(&netconf_state);
- filterset_free(&session_set);
+ rde_filterset_unref(session_set);
+ session_set = NULL;
free(curflow);
curflow = NULL;
break;
@@ -702,7 +704,7 @@ badnetdel:
flowspec_flush_upcall, NULL);
break;
case IMSG_FILTER_SET:
- filterset_recv(&imsg, &session_set);
+ session_set = rde_filterset_imsg_recv(&imsg);
break;
case IMSG_CTL_SHOW_NETWORK:
case IMSG_CTL_SHOW_RIB:
@@ -920,7 +922,8 @@ rde_dispatch_imsg_parent(struct imsgbuf
TAILQ_INIT(&netconf_p.attrset);
break;
case IMSG_NETWORK_DONE:
- TAILQ_CONCAT(&netconf_p.attrset, &parent_set, entry);
+ netconf_p.rde_attrset = parent_set;
+ parent_set = NULL;
rde_filterstate_init(&state);
asp = &state.aspath;
@@ -982,10 +985,11 @@ rde_dispatch_imsg_parent(struct imsgbuf
log_warnx("invalid flowspec update received "
"from parent");
else
- flowspec_add(curflow, &state, &parent_set);
+ flowspec_add(curflow, &state, parent_set);
rde_filterstate_clean(&state);
- filterset_free(&parent_set);
+ rde_filterset_unref(parent_set);
+ parent_set = NULL;
free(curflow);
curflow = NULL;
break;
@@ -1090,11 +1094,12 @@ rde_dispatch_imsg_parent(struct imsgbuf
}
}
TAILQ_INIT(&r->set);
- TAILQ_CONCAT(&r->set, &parent_set, entry);
+ r->rde_set = parent_set;
+ parent_set = NULL;
if ((rib = rib_byid(rib_find(r->rib))) == NULL) {
log_warnx("IMSG_RECONF_FILTER: filter rule "
"for nonexistent rib %s", r->rib);
- filterset_free(&r->set);
+ rde_filterset_unref(r->rde_set);
free(r);
break;
}
@@ -1192,7 +1197,8 @@ rde_dispatch_imsg_parent(struct imsgbuf
"IMSG_RECONF_VPN_EXPORT unexpected");
break;
}
- TAILQ_CONCAT(&vpn->export, &parent_set, entry);
+ vpn->rde_export = parent_set;
+ parent_set = NULL;
break;
case IMSG_RECONF_VPN_IMPORT:
if (vpn == NULL) {
@@ -1200,7 +1206,8 @@ rde_dispatch_imsg_parent(struct imsgbuf
"IMSG_RECONF_VPN_IMPORT unexpected");
break;
}
- TAILQ_CONCAT(&vpn->import, &parent_set, entry);
+ vpn->rde_import = parent_set;
+ parent_set = NULL;
break;
case IMSG_RECONF_VPN_DONE:
break;
@@ -1221,7 +1228,7 @@ rde_dispatch_imsg_parent(struct imsgbuf
nexthop_update(&knext);
break;
case IMSG_FILTER_SET:
- filterset_recv(&imsg, &parent_set);
+ parent_set = rde_filterset_imsg_recv(&imsg);
break;
case IMSG_MRT_OPEN:
case IMSG_MRT_REOPEN:
@@ -4545,7 +4552,7 @@ void
network_add(struct network_config *nc, struct filterstate *state)
{
struct l3vpn *vpn;
- struct filter_set_head *vpnset = NULL;
+ struct rde_filter_set *vpnset = NULL;
struct in_addr prefix4;
struct in6_addr prefix6;
uint32_t path_id_tx;
@@ -4571,7 +4578,7 @@ network_add(struct network_config *nc, s
nc->prefix.labelstack[2] =
(vpn->label << 4) & 0xf0;
nc->prefix.labelstack[2] |= BGP_MPLS_BOS;
- vpnset = &vpn->export;
+ vpnset = vpn->rde_export;
break;
case AID_INET6:
prefix6 = nc->prefix.v6;
@@ -4587,11 +4594,11 @@ network_add(struct network_config *nc, s
nc->prefix.labelstack[2] =
(vpn->label << 4) & 0xf0;
nc->prefix.labelstack[2] |= BGP_MPLS_BOS;
- vpnset = &vpn->export;
+ vpnset = vpn->rde_export;
break;
default:
log_warnx("unable to VPNize prefix");
- filterset_free(&nc->attrset);
+ rde_filterset_unref(nc->rde_attrset);
return;
}
break;
@@ -4605,7 +4612,8 @@ network_add(struct network_config *nc, s
}
}
- rde_apply_set(&nc->attrset, peerself, peerself, state, nc->prefix.aid);
+ rde_apply_set(nc->rde_attrset, peerself, peerself, state,
+ nc->prefix.aid);
if (vpnset)
rde_apply_set(vpnset, peerself, peerself, state,
nc->prefix.aid);
@@ -4628,7 +4636,7 @@ network_add(struct network_config *nc, s
prefix_update(rib, peerself, 0, path_id_tx, state, 0,
&nc->prefix, nc->prefixlen);
}
- filterset_free(&nc->attrset);
+ rde_filterset_unref(nc->rde_attrset);
}
void
@@ -4761,7 +4769,7 @@ network_flush_upcall(struct rib_entry *r
*/
void
flowspec_add(struct flowspec *f, struct filterstate *state,
- struct filter_set_head *attrset)
+ struct rde_filter_set *attrset)
{
struct pt_entry *pte;
uint32_t path_id_tx;
Index: rde.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
diff -u -p -r1.339 rde.h
--- rde.h 3 Feb 2026 12:25:16 -0000 1.339
+++ rde.h 4 Feb 2026 10:13:44 -0000
@@ -546,17 +546,19 @@ void prefix_evaluate_nexthop(struct pr
enum nexthop_state);
/* rde_filter.c */
-void rde_apply_set(struct filter_set_head *, struct rde_peer *,
+void rde_apply_set(const struct rde_filter_set *, struct rde_peer *,
struct rde_peer *, struct filterstate *, u_int8_t);
-int rde_l3vpn_import(struct rde_community *, struct l3vpn *);
+int rde_l3vpn_import(struct rde_community *, struct l3vpn *);
struct filter_rule *rde_filter_dup(const struct filter_rule *);
void rde_filterstate_init(struct filterstate *);
void rde_filterstate_prep(struct filterstate *, struct prefix *);
void rde_filterstate_copy(struct filterstate *, struct filterstate *);
void rde_filterstate_set_vstate(struct filterstate *, uint8_t, uint8_t);
void rde_filterstate_clean(struct filterstate *);
+uint64_t rde_filterset_calc_hash(const struct rde_filter_set *);
int rde_filter_skip_rule(struct rde_peer *, struct filter_rule *);
int rde_filter_equal(struct filter_head *, struct filter_head *);
+struct rde_filter_set *rde_filterset_imsg_recv(struct imsg *);
void rde_filter_calc_skip_steps(struct filter_head *);
enum filter_actions rde_filter(struct filter_head *, struct rde_peer *,
struct rde_peer *, struct bgpd_addr *, uint8_t,
Index: rde_filter.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_filter.c,v
diff -u -p -r1.140 rde_filter.c
--- rde_filter.c 4 Feb 2026 09:36:57 -0000 1.140
+++ rde_filter.c 4 Feb 2026 10:13:44 -0000
@@ -21,27 +21,56 @@
#include <sys/types.h>
#include <sys/queue.h>
+#include <errno.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
+#include <siphash.h>
#include "bgpd.h"
#include "rde.h"
#include "log.h"
+#include "chash.h"
-int filterset_equal(struct filter_set_head *, struct filter_set_head *);
+static int rde_filterset_equal(const struct rde_filter_set *,
+ const struct rde_filter_set *);
+static void rde_filterset_ref(struct rde_filter_set *);
+
+struct rde_filter_set_elm {
+ enum action_types type;
+ union {
+ uint8_t prepend;
+ uint8_t origin;
+ uint16_t id;
+ uint32_t metric;
+ int32_t relative;
+ struct nexthop *nh_ref;
+ struct community community;
+ } action;
+};
+
+struct rde_filter_set {
+ uint64_t hash;
+ size_t len;
+ int refcnt;
+ struct rde_filter_set_elm set[0];
+};
void
-rde_apply_set(struct filter_set_head *sh, struct rde_peer *peer,
+rde_apply_set(const struct rde_filter_set *rfs, struct rde_peer *peer,
struct rde_peer *from, struct filterstate *state, uint8_t aid)
{
- struct filter_set *set;
u_char *np;
+ size_t i;
uint32_t prep_as;
uint16_t nl;
uint8_t prepend;
- TAILQ_FOREACH(set, sh, entry) {
+ if (rfs == NULL)
+ return;
+ for (i = 0; i < rfs->len; i++) {
+ const struct rde_filter_set_elm *set = &rfs->set[i];
+
switch (set->type) {
case ACTION_SET_LOCALPREF:
state->aspath.lpref = set->action.metric;
@@ -169,12 +198,16 @@ rde_apply_set(struct filter_set_head *sh
}
}
+/* use to match the import filters for vpn imports */
int
rde_l3vpn_import(struct rde_community *comm, struct l3vpn *rd)
{
- struct filter_set *s;
+ size_t i;
- TAILQ_FOREACH(s, &rd->import, entry) {
+ if (rd->rde_import == NULL)
+ return (0);
+ for (i = 0; i < rd->rde_import->len; i++) {
+ struct rde_filter_set_elm *s = &rd->rde_import->set[i];
if (community_match(comm, &s->action.community, 0))
return (1);
}
@@ -414,7 +447,7 @@ rde_filter_equal(struct filter_head *a,
return (0);
}
- if (!filterset_equal(&fa->set, &fb->set))
+ if (!rde_filterset_equal(fa->rde_set, fb->rde_set))
return (0);
fa = TAILQ_NEXT(fa, entry);
@@ -431,7 +464,8 @@ rde_filter_dup(const struct filter_rule
if ((new = malloc(sizeof(*new))) == NULL)
fatal(NULL);
*new = *fr;
- filterset_copy(&fr->set, &new->set);
+ /* XXX think about skip table */
+ rde_filterset_ref(new->rde_set);
return new;
}
@@ -511,6 +545,8 @@ filterlist_free(struct filter_head *fh)
while ((r = TAILQ_FIRST(fh)) != NULL) {
TAILQ_REMOVE(fh, r, entry);
filterset_free(&r->set);
+ if (r->rde_set != NULL)
+ rde_filterset_unref(r->rde_set);
free(r);
}
free(fh);
@@ -653,158 +689,247 @@ filterset_copy(const struct filter_set_h
}
}
-int
-filterset_equal(struct filter_set_head *ah, struct filter_set_head *bh)
+static int
+rde_filterset_equal(const struct rde_filter_set *afs,
+ const struct rde_filter_set *bfs)
{
- struct filter_set *a, *b;
- const char *as, *bs;
+ const struct rde_filter_set_elm *a, *b;
+ size_t i;
+
+ if (afs->len != bfs->len)
+ return 0;
+
+ a = afs->set;
+ b = bfs->set;
+ for (i = 0; i < afs->len; i++, a++, b++) {
+ if (a->type != b->type)
+ return 0;
- for (a = TAILQ_FIRST(ah), b = TAILQ_FIRST(bh);
- a != NULL && b != NULL;
- a = TAILQ_NEXT(a, entry), b = TAILQ_NEXT(b, entry)) {
switch (a->type) {
case ACTION_SET_PREPEND_SELF:
case ACTION_SET_PREPEND_PEER:
- if (a->type == b->type &&
- a->action.prepend == b->action.prepend)
+ if (a->action.prepend == b->action.prepend)
continue;
break;
case ACTION_SET_AS_OVERRIDE:
- if (a->type == b->type)
- continue;
- break;
+ continue;
case ACTION_SET_LOCALPREF:
case ACTION_SET_MED:
case ACTION_SET_WEIGHT:
- if (a->type == b->type &&
- a->action.metric == b->action.metric)
+ if (a->action.metric == b->action.metric)
continue;
break;
case ACTION_SET_RELATIVE_LOCALPREF:
case ACTION_SET_RELATIVE_MED:
case ACTION_SET_RELATIVE_WEIGHT:
- if (a->type == b->type &&
- a->action.relative == b->action.relative)
- continue;
- break;
- case ACTION_SET_NEXTHOP:
- if (a->type == b->type &&
- memcmp(&a->action.nexthop, &b->action.nexthop,
- sizeof(a->action.nexthop)) == 0)
+ if (a->action.relative == b->action.relative)
continue;
break;
case ACTION_SET_NEXTHOP_REF:
- if (a->type == b->type &&
- a->action.nh_ref == b->action.nh_ref)
+ if (a->action.nh_ref == b->action.nh_ref)
continue;
break;
case ACTION_SET_NEXTHOP_BLACKHOLE:
case ACTION_SET_NEXTHOP_REJECT:
case ACTION_SET_NEXTHOP_NOMODIFY:
case ACTION_SET_NEXTHOP_SELF:
- if (a->type == b->type)
- continue;
- break;
+ continue;
case ACTION_DEL_COMMUNITY:
case ACTION_SET_COMMUNITY:
- if (a->type == b->type &&
- memcmp(&a->action.community, &b->action.community,
+ if (memcmp(&a->action.community, &b->action.community,
sizeof(a->action.community)) == 0)
continue;
break;
- case ACTION_PFTABLE:
- case ACTION_PFTABLE_ID:
- if (b->type == ACTION_PFTABLE)
- bs = b->action.pftable;
- else if (b->type == ACTION_PFTABLE_ID)
- bs = pftable_id2name(b->action.id);
- else
- break;
-
- if (a->type == ACTION_PFTABLE)
- as = a->action.pftable;
- else
- as = pftable_id2name(a->action.id);
-
- if (strcmp(as, bs) == 0)
- continue;
- break;
- case ACTION_RTLABEL:
case ACTION_RTLABEL_ID:
- if (b->type == ACTION_RTLABEL)
- bs = b->action.rtlabel;
- else if (b->type == ACTION_RTLABEL_ID)
- bs = rtlabel_id2name(b->action.id);
- else
- break;
-
- if (a->type == ACTION_RTLABEL)
- as = a->action.rtlabel;
- else
- as = rtlabel_id2name(a->action.id);
-
- if (strcmp(as, bs) == 0)
+ case ACTION_PFTABLE_ID:
+ if (a->action.id == b->action.id)
continue;
break;
case ACTION_SET_ORIGIN:
- if (a->type == b->type &&
- a->action.origin == b->action.origin)
+ if (a->action.origin == b->action.origin)
continue;
break;
+ case ACTION_SET_NEXTHOP:
+ case ACTION_RTLABEL:
+ case ACTION_PFTABLE:
+ fatalx("unexpected filter action in RDE");
}
/* compare failed */
- return (0);
+ return 0;
}
- if (a != NULL || b != NULL)
- return (0);
- return (1);
+ return 1;
}
-int
-filterset_send(struct imsgbuf *imsgbuf, struct filter_set_head *set)
+static SIPHASH_KEY fskey;
+
+static inline uint64_t
+rde_filterset_hash(const struct rde_filter_set *rfs)
{
- struct filter_set *s;
+ return rfs->hash;
+}
- TAILQ_FOREACH(s, set, entry)
- if (imsg_compose(imsgbuf, IMSG_FILTER_SET, 0, 0, -1, s,
- sizeof(*s)) == -1)
- return (-1);
- return (0);
+uint64_t
+rde_filterset_calc_hash(const struct rde_filter_set *rfs)
+{
+ return SipHash24(&fskey, rfs->set, rfs->len * sizeof(*rfs->set));
}
-void
-filterset_recv(struct imsg *imsg, struct filter_set_head *set)
+CH_HEAD(rde_filterset, rde_filter_set);
+CH_PROTOTYPE(rde_filterset, rde_filter_set, rde_filterset_hash);
+
+static struct rde_filterset filterset = CH_INITIALIZER(&filterset);
+
+static void
+rde_filterset_free(struct rde_filter_set *rfs)
{
- struct filter_set *s;
+ struct rde_filter_set_elm *rfse;
+ size_t i;
- if ((s = malloc(sizeof(*s))) == NULL)
- fatal(NULL);
- if (imsg_get_data(imsg, s, sizeof(*s)) == -1) {
- log_warnx("rde_dispatch: wrong imsg len");
- free(s);
+ if (rfs == NULL)
return;
+
+ rdemem.filter_set_size -= sizeof(*rfs) + rfs->len * sizeof(*rfse);
+ rdemem.filter_set_cnt--;
+
+ rfse = rfs->set;
+ for (i = 0; i < rfs->len; i++, rfse++) {
+ if (rfse->type == ACTION_RTLABEL_ID)
+ rtlabel_unref(rfse->action.id);
+ else if (rfse->type == ACTION_PFTABLE_ID)
+ pftable_unref(rfse->action.id);
+ else if (rfse->type == ACTION_SET_NEXTHOP_REF)
+ nexthop_unref(rfse->action.nh_ref);
}
- switch (s->type) {
+ free(rfs);
+}
+
+static void
+rde_filterset_ref(struct rde_filter_set *rfs)
+{
+ rfs->refcnt++;
+ rdemem.filter_set_refs++;
+}
+
+void
+rde_filterset_unref(struct rde_filter_set *rfs)
+{
+ rfs->refcnt--;
+ rdemem.filter_set_refs--;
+ if (rfs->refcnt <= 0) {
+ CH_REMOVE(rde_filterset, &filterset, rfs);
+ rde_filterset_free(rfs);
+ }
+}
+
+static void
+rde_filterset_conv(const struct filter_set *set,
+ struct rde_filter_set_elm *rfse)
+{
+ rfse->type = set->type;
+ switch (set->type) {
+ case ACTION_SET_PREPEND_SELF:
+ case ACTION_SET_PREPEND_PEER:
+ rfse->action.prepend = set->action.prepend;
+ break;
+ case ACTION_SET_AS_OVERRIDE:
+ break;
+ case ACTION_SET_LOCALPREF:
+ case ACTION_SET_MED:
+ case ACTION_SET_WEIGHT:
+ rfse->action.metric = set->action.metric;
+ break;
+ case ACTION_SET_RELATIVE_LOCALPREF:
+ case ACTION_SET_RELATIVE_MED:
+ case ACTION_SET_RELATIVE_WEIGHT:
+ rfse->action.relative = set->action.relative;
+ break;
+ case ACTION_SET_NEXTHOP_BLACKHOLE:
+ case ACTION_SET_NEXTHOP_REJECT:
+ case ACTION_SET_NEXTHOP_NOMODIFY:
+ case ACTION_SET_NEXTHOP_SELF:
+ break;
+ case ACTION_DEL_COMMUNITY:
+ case ACTION_SET_COMMUNITY:
+ rfse->action.community = set->action.community;
+ break;
+ case ACTION_SET_ORIGIN:
+ rfse->action.origin = set->action.origin;
+ break;
case ACTION_SET_NEXTHOP:
- s->action.nh_ref = nexthop_get(&s->action.nexthop);
- s->type = ACTION_SET_NEXTHOP_REF;
+ rfse->action.nh_ref = nexthop_get(&set->action.nexthop);
+ rfse->type = ACTION_SET_NEXTHOP_REF;
break;
case ACTION_RTLABEL:
- /* convert the route label to an id for faster access */
- s->action.id = rtlabel_name2id(s->action.rtlabel);
- s->type = ACTION_RTLABEL_ID;
+ rfse->action.id = rtlabel_name2id(set->action.rtlabel);
+ rfse->type = ACTION_RTLABEL_ID;
break;
case ACTION_PFTABLE:
- /* convert pftable name to an id */
- s->action.id = pftable_name2id(s->action.pftable);
- s->type = ACTION_PFTABLE_ID;
- break;
- default:
+ rfse->action.id = pftable_name2id(set->action.pftable);
+ rfse->type = ACTION_PFTABLE_ID;
break;
+ case ACTION_SET_NEXTHOP_REF:
+ case ACTION_RTLABEL_ID:
+ case ACTION_PFTABLE_ID:
+ fatalx("unexpected filter action in RDE");
+ }
+}
+
+struct rde_filter_set *
+rde_filterset_imsg_recv(struct imsg *imsg)
+{
+ struct ibuf ibuf;
+ struct rde_filter_set *rfs = NULL, *nrfs;
+ struct rde_filter_set_elm *rfse;
+ uint16_t count, i;
+
+ if (imsg_get_ibuf(imsg, &ibuf) == -1)
+ goto fail;
+
+ if (ibuf_recv_filterset_count(&ibuf, &count) == -1)
+ goto fail;
+
+ if ((rfs = calloc(1, sizeof(*rfs) + count * sizeof(*rfse))) == NULL)
+ goto fail;
+
+ rdemem.filter_set_size += sizeof(*rfs) + count * sizeof(*rfse);
+ rdemem.filter_set_cnt++;
+
+ rfs->len = count;
+ rfse = rfs->set;
+
+ for (i = 0; i < count; i++, rfse++) {
+ struct filter_set set;
+ if (ibuf_recv_one_filterset(&ibuf, &set) == -1)
+ goto fail;
+ rde_filterset_conv(&set, rfse);
}
- TAILQ_INSERT_TAIL(set, s, entry);
+
+ if (ibuf_size(&ibuf) != 0) {
+ errno = EBADMSG;
+ goto fail;
+ }
+
+ rfs->hash = rde_filterset_calc_hash(rfs);
+
+ if ((nrfs = CH_FIND(rde_filterset, &filterset, rfs)) == NULL) {
+ if (CH_INSERT(rde_filterset, &filterset, rfs, NULL) != 1)
+ fatalx("%s: already present set", __func__);
+ } else {
+ rde_filterset_free(rfs);
+ rfs = nrfs;
+ }
+ rde_filterset_ref(rfs);
+ return rfs;
+
+ fail:
+ log_warn("filter set receive");
+ rde_filterset_free(rfs);
+ return NULL;
}
+CH_GENERATE(rde_filterset, rde_filter_set, rde_filterset_equal,
+ rde_filterset_hash);
+
/*
* Copyright (c) 2001 Daniel Hartmeier
* All rights reserved.
@@ -921,7 +1046,8 @@ rde_filter(struct filter_head *rules, st
f->skip[RDE_FILTER_SKIP_REMOTE_AS]);
if (rde_filter_match(f, peer, from, state, prefix, plen)) {
- rde_apply_set(&f->set, peer, from, state, prefix->aid);
+ rde_apply_set(f->rde_set, peer, from, state,
+ prefix->aid);
if (f->action != ACTION_NONE)
action = f->action;
if (f->quick)
bgpd: rework the rde side of filter_sets