Download raw body.
bgpd: introduce pending attr and prefix queues
Implement a per-peer pending prefix queue and lookup table and
a pending attribute queue and lookup table.
Withdraws just end up in the peer pending prefix queue while for updates
the prefix is queued on a pending attribute entry which itself is queued
on the peer. This allows to aggregate multiple prefixes into a single
UPDATE message. When prefixes are added check the lookup table if there
is already an object. In such a case the prefix is first dequeued and then
readded. pend_prefix_add() is therefor a bit fiddly.
If the attr pointer in struct pend_prefix is NULL then it is a withdraw.
The pend_attr needs to hold the aid so prefixes end up on the right queue.
If the attrs pointer is NULL the pend_attr is actually the End-of-RIB
marker.
This replaces the red-black tree contraption used right now. Which is a
big preformance bottleneck.
--
:wq Claudio
Index: bgpctl/output.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/output.c,v
diff -u -p -r1.64 output.c
--- bgpctl/output.c 2 Dec 2025 13:03:54 -0000 1.64
+++ bgpctl/output.c 4 Dec 2025 15:01:57 -0000
@@ -1097,6 +1097,12 @@ show_rib_mem(struct rde_memstats *stats)
stats->attr_refs);
printf("%10lld BGP attributes using %s of memory\n",
stats->attr_dcnt, fmt_mem(stats->attr_data));
+ printf("%10lld pending attribute entries using %s of memory\n",
+ stats->pend_attr_cnt, fmt_mem(stats->pend_attr_cnt *
+ sizeof(struct pend_attr)));
+ printf("%10lld pending prefix entries using %s of memory\n",
+ stats->pend_prefix_cnt, fmt_mem(stats->pend_prefix_cnt *
+ sizeof(struct pend_prefix)));
printf("%10lld as-set elements in %lld tables using "
"%s of memory\n", stats->aset_nmemb, stats->aset_cnt,
fmt_mem(stats->aset_size));
@@ -1104,6 +1110,10 @@ show_rib_mem(struct rde_memstats *stats)
stats->pset_cnt, fmt_mem(stats->pset_size));
printf("RIB using %s of memory\n", fmt_mem(pts +
stats->prefix_cnt * sizeof(struct prefix) +
+ stats->adjout_prefix_cnt * sizeof(struct adjout_prefix) +
+ stats->adjout_attr_cnt * sizeof(struct adjout_attr) +
+ stats->pend_prefix_cnt * sizeof(struct pend_prefix) +
+ stats->pend_attr_cnt * sizeof(struct pend_attr) +
stats->rib_cnt * sizeof(struct rib_entry) +
stats->path_cnt * sizeof(struct rde_aspath) +
stats->aspath_size + stats->attr_cnt * sizeof(struct attr) +
Index: bgpctl/output_json.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/output_json.c,v
diff -u -p -r1.55 output_json.c
--- bgpctl/output_json.c 2 Dec 2025 13:03:54 -0000 1.55
+++ bgpctl/output_json.c 4 Dec 2025 15:01:57 -0000
@@ -909,6 +909,10 @@ json_rib_mem(struct rde_memstats *stats)
json_rib_mem_element("adjout_attr", stats->adjout_attr_cnt,
stats->adjout_attr_cnt * sizeof(struct adjout_attr),
stats->adjout_attr_refs);
+ json_rib_mem_element("pend_attr", stats->pend_attr_cnt,
+ stats->pend_attr_cnt * sizeof(struct pend_attr), UINT64_MAX);
+ json_rib_mem_element("pend_prefix", stats->pend_prefix_cnt,
+ stats->pend_prefix_cnt * sizeof(struct pend_prefix), UINT64_MAX);
json_rib_mem_element("rde_aspath", stats->path_cnt,
stats->path_cnt * sizeof(struct rde_aspath),
stats->path_refs);
@@ -924,6 +928,10 @@ json_rib_mem(struct rde_memstats *stats)
stats->attr_data, UINT64_MAX);
json_rib_mem_element("total", UINT64_MAX,
pts + stats->prefix_cnt * sizeof(struct prefix) +
+ stats->adjout_prefix_cnt * sizeof(struct adjout_prefix) +
+ stats->adjout_attr_cnt * sizeof(struct adjout_attr) +
+ stats->pend_prefix_cnt * sizeof(struct pend_prefix) +
+ stats->pend_attr_cnt * sizeof(struct pend_attr) +
stats->rib_cnt * sizeof(struct rib_entry) +
stats->path_cnt * sizeof(struct rde_aspath) +
stats->aspath_size + stats->attr_cnt * sizeof(struct attr) +
Index: bgpctl/output_ometric.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/output_ometric.c,v
diff -u -p -r1.19 output_ometric.c
--- bgpctl/output_ometric.c 2 Dec 2025 13:03:54 -0000 1.19
+++ bgpctl/output_ometric.c 4 Dec 2025 15:01:57 -0000
@@ -296,6 +296,10 @@ ometric_rib_mem(struct rde_memstats *sta
ometric_rib_mem_element("adjout_prefix", stats->adjout_prefix_cnt,
stats->adjout_prefix_cnt * sizeof(struct adjout_prefix),
UINT64_MAX);
+ ometric_rib_mem_element("pend_attr", stats->pend_attr_cnt,
+ stats->pend_attr_cnt * sizeof(struct pend_attr), UINT64_MAX);
+ ometric_rib_mem_element("pend_prefix", stats->pend_prefix_cnt,
+ stats->pend_prefix_cnt * sizeof(struct pend_prefix), UINT64_MAX);
ometric_rib_mem_element("adjout_attr", stats->adjout_attr_cnt,
stats->adjout_attr_cnt * sizeof(struct adjout_attr),
stats->adjout_attr_refs);
@@ -315,6 +319,10 @@ ometric_rib_mem(struct rde_memstats *sta
ometric_rib_mem_element("total", UINT64_MAX,
pts + stats->prefix_cnt * sizeof(struct prefix) +
+ stats->adjout_prefix_cnt * sizeof(struct adjout_prefix) +
+ stats->adjout_attr_cnt * sizeof(struct adjout_attr) +
+ stats->pend_prefix_cnt * sizeof(struct pend_prefix) +
+ stats->pend_attr_cnt * sizeof(struct pend_attr) +
stats->rib_cnt * sizeof(struct rib_entry) +
stats->path_cnt * sizeof(struct rde_aspath) +
stats->aspath_size + stats->attr_cnt * sizeof(struct attr) +
Index: bgpd/bgpd.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
diff -u -p -r1.524 bgpd.h
--- bgpd/bgpd.h 3 Dec 2025 12:20:19 -0000 1.524
+++ bgpd/bgpd.h 4 Dec 2025 15:01:57 -0000
@@ -1392,6 +1392,8 @@ struct rde_memstats {
long long path_refs;
long long prefix_cnt;
long long adjout_prefix_cnt;
+ long long pend_prefix_cnt;
+ long long pend_attr_cnt;
long long rib_cnt;
long long pt_cnt[AID_MAX];
long long pt_size[AID_MAX];
Index: bgpd/rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
diff -u -p -r1.673 rde.c
--- bgpd/rde.c 3 Dec 2025 12:20:19 -0000 1.673
+++ bgpd/rde.c 4 Dec 2025 15:04:40 -0000
@@ -2938,16 +2938,18 @@ rde_dump_rib_as(struct prefix *p, struct
static void
rde_dump_adjout_as(struct rde_peer *peer, struct adjout_prefix *p,
- struct rde_aspath *asp, pid_t pid, int flags)
+ struct adjout_attr *attrs, pid_t pid, int flags)
{
struct ctl_show_rib rib;
struct ibuf *wbuf;
struct attr *a;
+ struct rde_aspath *asp;
struct nexthop *nexthop;
size_t aslen;
uint8_t l;
- nexthop = adjout_prefix_nexthop(p);
+ nexthop = attrs->nexthop;
+ asp = attrs->aspath;
memset(&rib, 0, sizeof(rib));
rib.local_pref = asp->lpref;
rib.med = asp->med;
@@ -2989,7 +2991,7 @@ rde_dump_adjout_as(struct rde_peer *peer
imsg_close(ibuf_se_ctl, wbuf);
if (flags & F_CTL_DETAIL) {
- struct rde_community *comm = adjout_prefix_communities(p);
+ struct rde_community *comm = attrs->communities;
size_t len = comm->nentries * sizeof(struct community);
if (comm->nentries > 0) {
if (imsg_compose(ibuf_se_ctl,
@@ -3073,14 +3075,14 @@ rde_dump_filter(struct prefix *p, struct
static void
rde_dump_adjout_filter(struct rde_peer *peer, struct adjout_prefix *p,
- struct ctl_show_rib_request *req)
+ struct adjout_attr *attrs, struct ctl_show_rib_request *req)
{
struct rde_aspath *asp;
if (!rde_match_peer(peer, &req->neighbor))
return;
- asp = adjout_prefix_aspath(p);
+ asp = attrs->aspath;
if ((req->flags & F_CTL_HAS_PATHID)) {
/* Match against the transmit path id if adjout is used. */
if (req->path_id != p->path_id_tx)
@@ -3090,12 +3092,11 @@ rde_dump_adjout_filter(struct rde_peer *
!aspath_match(asp->aspath, &req->as, 0))
return;
if (req->community.flags != 0) {
- if (!community_match(adjout_prefix_communities(p),
- &req->community, NULL))
+ if (!community_match(attrs->communities, &req->community, NULL))
return;
}
/* in the adj-rib-out, skip matching against roa and aspa state */
- rde_dump_adjout_as(peer, p, asp, req->pid, req->flags);
+ rde_dump_adjout_as(peer, p, attrs, req->pid, req->flags);
}
static void
@@ -3115,13 +3116,13 @@ rde_dump_adjout_upcall(struct adjout_pre
{
struct rde_dump_ctx *ctx = ptr;
struct rde_peer *peer;
+ struct adjout_attr *attrs;
if ((peer = peer_get(ctx->peerid)) == NULL)
return;
- if (p->flags & PREFIX_ADJOUT_FLAG_WITHDRAW)
- return;
- rde_dump_adjout_filter(peer, p, &ctx->req);
+ attrs = p->attrs;
+ rde_dump_adjout_filter(peer, p, attrs, &ctx->req);
}
static int
@@ -3561,8 +3562,8 @@ rde_update_queue_pending(void)
if (peer->throttled)
continue;
for (aid = AID_MIN; aid < AID_MAX; aid++) {
- if (!RB_EMPTY(&peer->updates[aid]) ||
- !RB_EMPTY(&peer->withdraws[aid]))
+ if (!TAILQ_EMPTY(&peer->updates[aid]) ||
+ !TAILQ_EMPTY(&peer->withdraws[aid]))
return 1;
}
}
@@ -3585,7 +3586,7 @@ rde_update_queue_runner(uint8_t aid)
continue;
if (peer->throttled)
continue;
- if (RB_EMPTY(&peer->withdraws[aid]))
+ if (TAILQ_EMPTY(&peer->withdraws[aid]))
continue;
up_dump_withdraws(ibuf_se, peer, aid);
@@ -3605,7 +3606,7 @@ rde_update_queue_runner(uint8_t aid)
continue;
if (peer->throttled)
continue;
- if (RB_EMPTY(&peer->updates[aid]))
+ if (TAILQ_EMPTY(&peer->updates[aid]))
continue;
if (up_is_eor(peer, aid)) {
Index: bgpd/rde.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
diff -u -p -r1.327 rde.h
--- bgpd/rde.h 2 Dec 2025 13:03:35 -0000 1.327
+++ bgpd/rde.h 4 Dec 2025 15:58:51 -0000
@@ -27,6 +27,7 @@
#include "bgpd.h"
#include "log.h"
+#include "chash.h"
/* rde internal structures */
@@ -71,9 +72,13 @@ struct rib {
* Currently I assume that we can do that with the neighbor_ip...
*/
RB_HEAD(peer_tree, rde_peer);
-RB_HEAD(prefix_tree, adjout_prefix);
RB_HEAD(prefix_index, adjout_prefix);
+CH_HEAD(pend_prefix_hash, pend_prefix);
+TAILQ_HEAD(pend_prefix_queue, pend_prefix);
+CH_HEAD(pend_attr_hash, pend_prefix);
+TAILQ_HEAD(pend_attr_queue, pend_attr);
+
struct rde_peer {
RB_ENTRY(rde_peer) entry;
struct peer_config conf;
@@ -84,8 +89,10 @@ struct rde_peer {
struct capabilities capa;
struct addpath_eval eval;
struct prefix_index adj_rib_out;
- struct prefix_tree updates[AID_MAX];
- struct prefix_tree withdraws[AID_MAX];
+ struct pend_prefix_hash pend_prefixes;
+ struct pend_attr_hash pend_attrs;
+ struct pend_attr_queue updates[AID_MAX];
+ struct pend_prefix_queue withdraws[AID_MAX];
struct filter_head *out_rules;
struct ibufqueue *ibufq;
monotime_t staletime[AID_MAX];
@@ -311,20 +318,31 @@ struct adjout_attr {
};
struct adjout_prefix {
- RB_ENTRY(adjout_prefix) index, update;
+ RB_ENTRY(adjout_prefix) index;
struct pt_entry *pt;
struct adjout_attr *attrs;
uint32_t path_id_tx;
uint8_t flags;
};
-#define PREFIX_ADJOUT_FLAG_WITHDRAW 0x01 /* enqueued on withdraw queue */
-#define PREFIX_ADJOUT_FLAG_UPDATE 0x02 /* enqueued on update queue */
#define PREFIX_ADJOUT_FLAG_DEAD 0x04 /* locked but removed */
#define PREFIX_ADJOUT_FLAG_STALE 0x08 /* stale entry (for addpath) */
#define PREFIX_ADJOUT_FLAG_MASK 0x0f /* mask for the prefix types */
-#define PREFIX_ADJOUT_FLAG_EOR 0x10 /* prefix is EoR */
#define PREFIX_ADJOUT_FLAG_LOCKED 0x20 /* locked by rib walker */
+struct pend_attr {
+ TAILQ_ENTRY(pend_attr) entry;
+ struct pend_prefix_queue prefixes;
+ struct adjout_attr *attrs;
+ uint8_t aid;
+};
+
+struct pend_prefix {
+ TAILQ_ENTRY(pend_prefix) entry;
+ struct pt_entry *pt;
+ struct pend_attr *attrs;
+ uint32_t path_id_tx;
+};
+
struct filterstate {
struct rde_aspath aspath;
struct rde_community communities;
@@ -637,8 +655,6 @@ struct prefix *prefix_bypeer(struct rib_
uint32_t);
void prefix_destroy(struct prefix *);
-RB_PROTOTYPE(prefix_tree, adjout_prefix, entry, prefix_cmp)
-
static inline struct rde_peer *
prefix_peer(struct prefix *p)
{
@@ -750,24 +766,14 @@ int adjout_prefix_dump_subtree(struct
struct bgpd_addr *, uint8_t, unsigned int, void *,
void (*)(struct adjout_prefix *, void *),
void (*)(void *, uint8_t), int (*)(void *));
+void adjout_peer_init(struct rde_peer *);
-static inline struct rde_aspath *
-adjout_prefix_aspath(struct adjout_prefix *p)
-{
- return (p->attrs->aspath);
-}
-
-static inline struct rde_community *
-adjout_prefix_communities(struct adjout_prefix *p)
-{
- return (p->attrs->communities);
-}
-
-static inline struct nexthop *
-adjout_prefix_nexthop(struct adjout_prefix *p)
-{
- return (p->attrs->nexthop);
-}
+void pend_attr_done(struct pend_attr *, struct rde_peer *);
+void pend_eor_add(struct rde_peer *, uint8_t);
+void pend_prefix_add(struct rde_peer *, struct adjout_attr *,
+ struct pt_entry *, uint32_t);
+void pend_prefix_free(struct pend_prefix *,
+ struct pend_prefix_queue *, struct rde_peer *);
/* rde_update.c */
void up_generate_updates(struct rde_peer *, struct rib_entry *);
Index: bgpd/rde_adjout.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_adjout.c,v
diff -u -p -r1.8 rde_adjout.c
--- bgpd/rde_adjout.c 2 Dec 2025 13:03:35 -0000 1.8
+++ bgpd/rde_adjout.c 4 Dec 2025 16:09:14 -0000
@@ -30,6 +30,228 @@
#include "log.h"
#include "chash.h"
+static struct adjout_attr *adjout_attr_ref(struct adjout_attr *);
+static void adjout_attr_unref(struct adjout_attr *);
+
+static uint64_t pendkey;
+
+static inline uint64_t
+pend_prefix_hash(const struct pend_prefix *pp)
+{
+ uint64_t h = pendkey;
+
+ h = ch_qhash64(h, (uintptr_t)pp->pt);
+ h = ch_qhash64(h, pp->path_id_tx);
+ return h;
+}
+
+static inline uint64_t
+pend_attr_hash(const struct pend_attr *pa)
+{
+ uint64_t h = pendkey;
+
+ h = ch_qhash64(h, (uintptr_t)pa->attrs);
+ h = ch_qhash64(h, pa->aid);
+ return h;
+}
+
+CH_PROTOTYPE(pend_prefix_hash, pend_prefix, pend_prefix_hash);
+CH_PROTOTYPE(pend_attr_hash, pend_attr, pend_attr_hash);
+
+/* pending prefix queue functions */
+static struct pend_attr *
+pend_attr_alloc(struct adjout_attr *attrs, uint8_t aid,
+ struct rde_peer *peer)
+{
+ struct pend_attr *pa;
+
+ if ((pa = calloc(1, sizeof(*pa))) == NULL)
+ fatal(__func__);
+ rdemem.pend_attr_cnt++;
+ TAILQ_INIT(&pa->prefixes);
+ if (attrs)
+ pa->attrs = adjout_attr_ref(attrs);
+ pa->aid = aid;
+
+ TAILQ_INSERT_TAIL(&peer->updates[aid], pa, entry);
+ if (CH_INSERT(pend_attr_hash, &peer->pend_attrs, pa, NULL) != 1)
+ fatalx("corrupted pending attr hash table");
+ return pa;
+}
+
+static void
+pend_attr_free(struct pend_attr *pa, struct rde_peer *peer)
+{
+ if (!TAILQ_EMPTY(&pa->prefixes)) {
+ log_warnx("freeing not empty pending attribute");
+ abort();
+ }
+
+ TAILQ_REMOVE(&peer->updates[pa->aid], pa, entry);
+ CH_REMOVE(pend_attr_hash, &peer->pend_attrs, pa);
+
+ if (pa->attrs != NULL)
+ adjout_attr_unref(pa->attrs);
+
+ rdemem.pend_attr_cnt--;
+ free(pa);
+}
+
+void
+pend_attr_done(struct pend_attr *pa, struct rde_peer *peer)
+{
+ if (pa == NULL)
+ return;
+ if (TAILQ_EMPTY(&pa->prefixes))
+ pend_attr_free(pa, peer);
+}
+
+static struct pend_attr *
+pend_attr_lookup(struct rde_peer *peer, struct adjout_attr *attrs, uint8_t aid)
+{
+ struct pend_attr needle = { .attrs = attrs, .aid = aid };
+
+ return CH_FIND(pend_attr_hash, &peer->pend_attrs, &needle);
+}
+
+static inline int
+pend_attr_eq(const struct pend_attr *a, const struct pend_attr *b)
+{
+ if (a->attrs != b->attrs)
+ return 0;
+ if (a->aid != b->aid)
+ return 0;
+ return 1;
+}
+
+CH_GENERATE(pend_attr_hash, pend_attr, pend_attr_eq, pend_attr_hash);
+
+/*
+ * Insert an End-of-RIB marker into the update queue.
+ */
+void
+pend_eor_add(struct rde_peer *peer, uint8_t aid)
+{
+ struct pend_attr *pa;
+
+ pa = pend_attr_lookup(peer, NULL, aid);
+ if (pa == NULL)
+ pa = pend_attr_alloc(NULL, aid, peer);
+}
+
+
+static struct pend_prefix *pend_prefix_alloc(struct pend_attr *,
+ struct pt_entry *, uint32_t);
+
+static struct pend_prefix *
+pend_prefix_lookup(struct rde_peer *peer, struct pt_entry *pt,
+ uint32_t path_id_tx)
+{
+ struct pend_prefix needle = { .pt = pt, .path_id_tx = path_id_tx };
+
+ return CH_FIND(pend_prefix_hash, &peer->pend_prefixes, &needle);
+}
+
+static void
+pend_prefix_remove(struct pend_prefix *pp, struct pend_prefix_queue *head,
+ struct rde_peer *peer)
+{
+ if (CH_REMOVE(pend_prefix_hash, &peer->pend_prefixes, pp) != pp) {
+ log_warnx("missing pending prefix in hash table");
+ abort();
+ }
+ TAILQ_REMOVE(head, pp, entry);
+
+ if (pp->attrs == NULL) {
+ peer->stats.pending_withdraw--;
+ } else {
+ peer->stats.pending_update--;
+ }
+ pp->attrs = NULL;
+}
+
+void
+pend_prefix_add(struct rde_peer *peer, struct adjout_attr *attrs,
+ struct pt_entry *pt, uint32_t path_id_tx)
+{
+ struct pend_attr *pa = NULL, *oldpa = NULL;
+ struct pend_prefix *pp;
+ struct pend_prefix_queue *head;
+
+ if (attrs != NULL) {
+ pa = pend_attr_lookup(peer, attrs, pt->aid);
+ if (pa == NULL)
+ pa = pend_attr_alloc(attrs, pt->aid, peer);
+ }
+
+ pp = pend_prefix_lookup(peer, pt, path_id_tx);
+ if (pp == NULL) {
+ pp = pend_prefix_alloc(pa, pt, path_id_tx);
+ } else {
+ if (pp->attrs == NULL)
+ head = &peer->withdraws[pt->aid];
+ else
+ head = &pp->attrs->prefixes;
+ oldpa = pp->attrs;
+ pend_prefix_remove(pp, head, peer);
+ pp->attrs = pa;
+ }
+
+ if (pa == NULL) {
+ head = &peer->withdraws[pt->aid];
+ peer->stats.pending_withdraw++;
+ } else {
+ head = &pa->prefixes;
+ peer->stats.pending_update++;
+ }
+
+ TAILQ_INSERT_TAIL(head, pp, entry);
+ if (CH_INSERT(pend_prefix_hash, &peer->pend_prefixes, pp, NULL) != 1) {
+ log_warnx("corrupted pending prefix hash table");
+ abort();
+ }
+
+ pend_attr_done(oldpa, peer);
+}
+
+static struct pend_prefix *
+pend_prefix_alloc(struct pend_attr *attrs, struct pt_entry *pt,
+ uint32_t path_id_tx)
+{
+ struct pend_prefix *pp;
+
+ if ((pp = calloc(1, sizeof(*pp))) == NULL)
+ fatal(__func__);
+ rdemem.pend_prefix_cnt++;
+ pp->pt = pt_ref(pt);
+ pp->attrs = attrs;
+ pp->path_id_tx = path_id_tx;
+
+ return pp;
+}
+
+void
+pend_prefix_free(struct pend_prefix *pp, struct pend_prefix_queue *head,
+ struct rde_peer *peer)
+{
+ pend_prefix_remove(pp, head, peer);
+ pt_unref(pp->pt);
+ rdemem.pend_prefix_cnt--;
+ free(pp);
+}
+
+static inline int
+pend_prefix_eq(const struct pend_prefix *a, const struct pend_prefix *b)
+{
+ if (a->pt != b->pt)
+ return 0;
+ if (a->path_id_tx != b->path_id_tx)
+ return 0;
+ return 1;
+}
+
+CH_GENERATE(pend_prefix_hash, pend_prefix, pend_prefix_eq, pend_prefix_hash);
+
/* adj-rib-out specific functions */
static uint64_t attrkey;
@@ -68,6 +290,7 @@ void
adjout_init(void)
{
arc4random_buf(&attrkey, sizeof(attrkey));
+ arc4random_buf(&pendkey, sizeof(pendkey));
}
/* Alloc, init and add a new entry into the has table. May not fail. */
@@ -77,8 +300,7 @@ adjout_attr_alloc(struct rde_aspath *asp
{
struct adjout_attr *a;
- a = calloc(1, sizeof(*a));
- if (a == NULL)
+ if ((a = calloc(1, sizeof(*a))) == NULL)
fatal(__func__);
rdemem.adjout_attr_cnt++;
@@ -115,7 +337,7 @@ adjout_attr_free(struct adjout_attr *a)
}
static struct adjout_attr *
-adjout_attr_ref(struct adjout_attr *attrs, struct rde_peer *peer)
+adjout_attr_ref(struct adjout_attr *attrs)
{
attrs->refcnt++;
rdemem.adjout_attr_refs++;
@@ -123,7 +345,7 @@ adjout_attr_ref(struct adjout_attr *attr
}
static void
-adjout_attr_unref(struct adjout_attr *attrs, struct rde_peer *peer)
+adjout_attr_unref(struct adjout_attr *attrs)
{
attrs->refcnt--;
rdemem.adjout_attr_refs--;
@@ -217,22 +439,6 @@ prefix_index_cmp(struct adjout_prefix *a
return 0;
}
-static inline int
-prefix_cmp(struct adjout_prefix *a, struct adjout_prefix *b)
-{
- if ((a->flags & PREFIX_ADJOUT_FLAG_EOR) !=
- (b->flags & PREFIX_ADJOUT_FLAG_EOR))
- return (a->flags & PREFIX_ADJOUT_FLAG_EOR) ? 1 : -1;
- /* if EOR marker no need to check the rest */
- if (a->flags & PREFIX_ADJOUT_FLAG_EOR)
- return 0;
-
- if (a->attrs != b->attrs)
- return (a->attrs > b->attrs ? 1 : -1);
- return prefix_index_cmp(a, b);
-}
-
-RB_GENERATE(prefix_tree, adjout_prefix, update, prefix_cmp)
RB_GENERATE_STATIC(prefix_index, adjout_prefix, index, prefix_index_cmp)
/*
@@ -328,22 +534,6 @@ adjout_prefix_match(struct rde_peer *pee
}
/*
- * Insert an End-of-RIB marker into the update queue.
- */
-void
-prefix_add_eor(struct rde_peer *peer, uint8_t aid)
-{
- struct adjout_prefix *p;
-
- p = adjout_prefix_alloc();
- p->flags = PREFIX_ADJOUT_FLAG_UPDATE | PREFIX_ADJOUT_FLAG_EOR;
- if (RB_INSERT(prefix_tree, &peer->updates[aid], p) != NULL)
- /* no need to add if EoR marker already present */
- adjout_prefix_free(p);
- /* EOR marker is not inserted into the adj_rib_out index */
-}
-
-/*
* Put a prefix from the Adj-RIB-Out onto the update queue.
*/
void
@@ -364,38 +554,28 @@ adjout_prefix_update(struct adjout_prefi
fatalx("%s: RB index invariant violated", __func__);
}
- if ((p->flags & (PREFIX_ADJOUT_FLAG_WITHDRAW |
- PREFIX_ADJOUT_FLAG_DEAD)) == 0) {
+ if ((p->flags & (PREFIX_ADJOUT_FLAG_DEAD)) == 0) {
/*
* XXX for now treat a different path_id_tx like different
* attributes and force out an update. It is unclear how
* common it is to have equivalent updates from alternative
* paths.
*/
+ attrs = p->attrs;
if (p->path_id_tx == path_id_tx &&
- adjout_prefix_nexthop(p) == state->nexthop &&
+ attrs->nexthop == state->nexthop &&
communities_equal(&state->communities,
- adjout_prefix_communities(p)) &&
- path_equal(&state->aspath, adjout_prefix_aspath(p))) {
+ attrs->communities) &&
+ path_equal(&state->aspath, attrs->aspath)) {
/* nothing changed */
p->flags &= ~PREFIX_ADJOUT_FLAG_STALE;
return;
}
- /* if pending update unhook it before it is unlinked */
- if (p->flags & PREFIX_ADJOUT_FLAG_UPDATE) {
- RB_REMOVE(prefix_tree, &peer->updates[pte->aid], p);
- peer->stats.pending_update--;
- }
-
/* unlink prefix so it can be relinked below */
adjout_prefix_unlink(p, peer);
peer->stats.prefix_out_cnt--;
}
- if (p->flags & PREFIX_ADJOUT_FLAG_WITHDRAW) {
- RB_REMOVE(prefix_tree, &peer->withdraws[pte->aid], p);
- peer->stats.pending_withdraw--;
- }
/* nothing needs to be done for PREFIX_ADJOUT_FLAG_DEAD and STALE */
p->flags &= ~PREFIX_ADJOUT_FLAG_MASK;
@@ -417,10 +597,7 @@ adjout_prefix_update(struct adjout_prefi
if (p->flags & PREFIX_ADJOUT_FLAG_MASK)
fatalx("%s: bad flags %x", __func__, p->flags);
if (peer_is_up(peer)) {
- p->flags |= PREFIX_ADJOUT_FLAG_UPDATE;
- if (RB_INSERT(prefix_tree, &peer->updates[pte->aid], p) != NULL)
- fatalx("%s: RB tree invariant violated", __func__);
- peer->stats.pending_update++;
+ pend_prefix_add(peer, p->attrs, p->pt, p->path_id_tx);
}
}
@@ -431,59 +608,17 @@ adjout_prefix_update(struct adjout_prefi
void
adjout_prefix_withdraw(struct rde_peer *peer, struct adjout_prefix *p)
{
- /* already a withdraw, shortcut */
- if (p->flags & PREFIX_ADJOUT_FLAG_WITHDRAW) {
- p->flags &= ~PREFIX_ADJOUT_FLAG_STALE;
- return;
- }
- /* pending update just got withdrawn */
- if (p->flags & PREFIX_ADJOUT_FLAG_UPDATE) {
- RB_REMOVE(prefix_tree, &peer->updates[p->pt->aid], p);
- peer->stats.pending_update--;
- }
- /* unlink prefix if it was linked (not a withdraw or dead) */
- if ((p->flags & (PREFIX_ADJOUT_FLAG_WITHDRAW |
- PREFIX_ADJOUT_FLAG_DEAD)) == 0) {
- adjout_prefix_unlink(p, peer);
- peer->stats.prefix_out_cnt--;
- }
+ if (peer_is_up(peer))
+ pend_prefix_add(peer, NULL, p->pt, p->path_id_tx);
- /* nothing needs to be done for PREFIX_ADJOUT_FLAG_DEAD and STALE */
- p->flags &= ~PREFIX_ADJOUT_FLAG_MASK;
-
- if (peer_is_up(peer)) {
- p->flags |= PREFIX_ADJOUT_FLAG_WITHDRAW;
- if (RB_INSERT(prefix_tree, &peer->withdraws[p->pt->aid],
- p) != NULL)
- fatalx("%s: RB tree invariant violated", __func__);
- peer->stats.pending_withdraw++;
- } else {
- /* mark prefix dead to skip unlink on destroy */
- p->flags |= PREFIX_ADJOUT_FLAG_DEAD;
- adjout_prefix_destroy(peer, p);
- }
+ adjout_prefix_destroy(peer, p);
}
void
adjout_prefix_destroy(struct rde_peer *peer, struct adjout_prefix *p)
{
- if (p->flags & PREFIX_ADJOUT_FLAG_EOR) {
- /* EOR marker is not linked in the index */
- adjout_prefix_free(p);
- return;
- }
-
- if (p->flags & PREFIX_ADJOUT_FLAG_WITHDRAW) {
- RB_REMOVE(prefix_tree, &peer->withdraws[p->pt->aid], p);
- peer->stats.pending_withdraw--;
- }
- if (p->flags & PREFIX_ADJOUT_FLAG_UPDATE) {
- RB_REMOVE(prefix_tree, &peer->updates[p->pt->aid], p);
- peer->stats.pending_update--;
- }
- /* unlink prefix if it was linked (not a withdraw or dead) */
- if ((p->flags & (PREFIX_ADJOUT_FLAG_WITHDRAW |
- PREFIX_ADJOUT_FLAG_DEAD)) == 0) {
+ /* unlink prefix if it was linked (not dead) */
+ if ((p->flags & PREFIX_ADJOUT_FLAG_DEAD) == 0) {
adjout_prefix_unlink(p, peer);
peer->stats.prefix_out_cnt--;
}
@@ -505,21 +640,19 @@ adjout_prefix_destroy(struct rde_peer *p
void
adjout_prefix_flush_pending(struct rde_peer *peer)
{
- struct adjout_prefix *p, *np;
+ struct pend_attr *pa, *npa;
+ struct pend_prefix *pp, *npp;
uint8_t aid;
for (aid = AID_MIN; aid < AID_MAX; aid++) {
- RB_FOREACH_SAFE(p, prefix_tree, &peer->withdraws[aid], np) {
- adjout_prefix_destroy(peer, p);
+ TAILQ_FOREACH_SAFE(pp, &peer->withdraws[aid], entry, npp) {
+ pend_prefix_free(pp, &peer->withdraws[aid], peer);
}
- RB_FOREACH_SAFE(p, prefix_tree, &peer->updates[aid], np) {
- p->flags &= ~PREFIX_ADJOUT_FLAG_UPDATE;
- RB_REMOVE(prefix_tree, &peer->updates[aid], p);
- if (p->flags & PREFIX_ADJOUT_FLAG_EOR) {
- adjout_prefix_destroy(peer, p);
- } else {
- peer->stats.pending_update--;
+ TAILQ_FOREACH_SAFE(pa, &peer->updates[aid], entry, npa) {
+ TAILQ_FOREACH_SAFE(pp, &pa->prefixes, entry, npp) {
+ pend_prefix_free(pp, &pa->prefixes, peer);
}
+ pend_attr_done(pa, peer);
}
}
}
@@ -690,7 +823,7 @@ static void
adjout_prefix_link(struct adjout_prefix *p, struct rde_peer *peer,
struct adjout_attr *attrs, struct pt_entry *pt, uint32_t path_id_tx)
{
- p->attrs = adjout_attr_ref(attrs, peer);
+ p->attrs = adjout_attr_ref(attrs);
p->pt = pt_ref(pt);
p->path_id_tx = path_id_tx;
}
@@ -702,7 +835,7 @@ static void
adjout_prefix_unlink(struct adjout_prefix *p, struct rde_peer *peer)
{
/* destroy all references to other objects */
- adjout_attr_unref(p->attrs, peer);
+ adjout_attr_unref(p->attrs);
p->attrs = NULL;
pt_unref(p->pt);
/* must keep p->pt valid since there is an extra ref */
@@ -727,4 +860,17 @@ adjout_prefix_free(struct adjout_prefix
{
rdemem.adjout_prefix_cnt--;
free(p);
+}
+
+void
+adjout_peer_init(struct rde_peer *peer)
+{
+ unsigned int i;
+
+ CH_INIT(pend_attr_hash, &peer->pend_attrs);
+ CH_INIT(pend_prefix_hash, &peer->pend_prefixes);
+ for (i = 0; i < nitems(peer->updates); i++)
+ TAILQ_INIT(&peer->updates[i]);
+ for (i = 0; i < nitems(peer->withdraws); i++)
+ TAILQ_INIT(&peer->withdraws[i]);
}
Index: bgpd/rde_peer.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v
diff -u -p -r1.59 rde_peer.c
--- bgpd/rde_peer.c 2 Dec 2025 13:03:35 -0000 1.59
+++ bgpd/rde_peer.c 4 Dec 2025 15:03:48 -0000
@@ -175,6 +175,7 @@ peer_add(uint32_t id, struct peer_config
if ((peer->ibufq = ibufq_new()) == NULL)
fatal(NULL);
+ adjout_peer_init(peer);
peer_apply_out_filter(peer, rules);
/*
@@ -520,15 +521,11 @@ static void
peer_blast_upcall(struct adjout_prefix *p, void *ptr)
{
struct rde_peer *peer = ptr;
+ struct adjout_attr *attrs = NULL;
- if ((p->flags & PREFIX_ADJOUT_FLAG_MASK) == 0) {
- /* put entries on the update queue if not already on a queue */
- p->flags |= PREFIX_ADJOUT_FLAG_UPDATE;
- if (RB_INSERT(prefix_tree, &peer->updates[p->pt->aid],
- p) != NULL)
- fatalx("%s: RB tree invariant violated", __func__);
- peer->stats.pending_update++;
- }
+ attrs = p->attrs;
+
+ pend_prefix_add(peer, attrs, p->pt, p->path_id_tx);
}
/*
@@ -543,7 +540,7 @@ peer_blast_done(void *ptr, uint8_t aid)
/* Adj-RIB-Out ready, unthrottle peer and inject EOR */
peer->throttled = 0;
if (peer->capa.grestart.restart)
- prefix_add_eor(peer, aid);
+ pend_eor_add(peer, aid);
}
/*
Index: bgpd/rde_update.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
diff -u -p -r1.187 rde_update.c
--- bgpd/rde_update.c 3 Dec 2025 10:00:15 -0000 1.187
+++ bgpd/rde_update.c 4 Dec 2025 15:59:28 -0000
@@ -799,17 +799,11 @@ up_generate_attr(struct ibuf *buf, struc
int
up_is_eor(struct rde_peer *peer, uint8_t aid)
{
- struct adjout_prefix *p;
+ struct pend_attr *pa;
- p = RB_MIN(prefix_tree, &peer->updates[aid]);
- if (p != NULL && (p->flags & PREFIX_ADJOUT_FLAG_EOR)) {
- /*
- * Need to remove eor from update tree because
- * adjout_prefix_destroy() can't handle that.
- */
- RB_REMOVE(prefix_tree, &peer->updates[aid], p);
- p->flags &= ~PREFIX_ADJOUT_FLAG_UPDATE;
- adjout_prefix_destroy(peer, p);
+ pa = TAILQ_FIRST(&peer->updates[aid]);
+ if (pa != NULL && (uintptr_t)pa->attrs < AID_MAX) {
+ pend_attr_done(pa, peer);
return 1;
}
return 0;
@@ -818,36 +812,19 @@ up_is_eor(struct rde_peer *peer, uint8_t
/* minimal buffer size > withdraw len + attr len + attr hdr + afi/safi */
#define MIN_UPDATE_LEN 16
-static void
-up_prefix_free(struct prefix_tree *prefix_head, struct adjout_prefix *p,
- struct rde_peer *peer, int withdraw)
-{
- if (withdraw) {
- /* prefix no longer needed, remove it */
- adjout_prefix_destroy(peer, p);
- peer->stats.prefix_sent_withdraw++;
- } else {
- /* prefix still in Adj-RIB-Out, keep it */
- RB_REMOVE(prefix_tree, prefix_head, p);
- p->flags &= ~PREFIX_ADJOUT_FLAG_UPDATE;
- peer->stats.pending_update--;
- peer->stats.prefix_sent_update++;
- }
-}
-
/*
* Write prefixes to buffer until either there is no more space or
* the next prefix has no longer the same ASPATH attributes.
* Returns -1 if no prefix was written else 0.
*/
static int
-up_dump_prefix(struct ibuf *buf, struct prefix_tree *prefix_head,
+up_dump_prefix(struct ibuf *buf, struct pend_prefix_queue *prefix_head,
struct rde_peer *peer, int withdraw)
{
- struct adjout_prefix *p, *np;
- int done = 0, has_ap = -1, rv = -1;
+ struct pend_prefix *p, *np;
+ int has_ap = -1, rv = -1;
- RB_FOREACH_SAFE(p, prefix_tree, prefix_head, np) {
+ TAILQ_FOREACH_SAFE(p, prefix_head, entry, np) {
if (has_ap == -1)
has_ap = peer_has_add_path(peer, p->pt->aid,
CAPA_AP_SEND);
@@ -855,24 +832,21 @@ up_dump_prefix(struct ibuf *buf, struct
-1)
break;
- /* make sure we only dump prefixes which belong together */
- if (np == NULL ||
- np->attrs != p->attrs ||
- (np->flags & PREFIX_ADJOUT_FLAG_EOR))
- done = 1;
-
rv = 0;
- up_prefix_free(prefix_head, p, peer, withdraw);
- if (done)
- break;
+ if (withdraw)
+ peer->stats.prefix_sent_withdraw++;
+ else
+ peer->stats.prefix_sent_update++;
+ pend_prefix_free(p, prefix_head, peer);
}
return rv;
}
static int
up_generate_mp_reach(struct ibuf *buf, struct rde_peer *peer,
- struct nexthop *nh, uint8_t aid)
+ struct pend_attr *pa, uint8_t aid)
{
+ struct nexthop *nh = pa->attrs->nexthop;
struct bgpd_addr *nexthop;
size_t off, nhoff;
uint16_t len, afi;
@@ -969,7 +943,7 @@ up_generate_mp_reach(struct ibuf *buf, s
if (ibuf_add_zero(buf, 1) == -1) /* Reserved must be 0 */
return -1;
- if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
+ if (up_dump_prefix(buf, &pa->prefixes, peer, 0) == -1)
/* no prefixes written, fail update */
return -1;
@@ -1084,8 +1058,8 @@ up_dump_withdraws(struct imsgbuf *imsg,
* Withdraw a single prefix after an error.
*/
static int
-up_dump_withdraw_one(struct rde_peer *peer, struct adjout_prefix *p,
- struct ibuf *buf)
+up_dump_withdraw_one(struct rde_peer *peer, struct pt_entry *pt,
+ uint32_t path_id_tx, struct ibuf *buf)
{
size_t off;
int has_ap;
@@ -1100,7 +1074,7 @@ up_dump_withdraw_one(struct rde_peer *pe
if (ibuf_add_zero(buf, sizeof(len)) == -1)
return -1;
- if (p->pt->aid != AID_INET) {
+ if (pt->aid != AID_INET) {
/* reserve space for 2-byte path attribute length */
off = ibuf_size(buf);
if (ibuf_add_zero(buf, sizeof(len)) == -1)
@@ -1115,7 +1089,7 @@ up_dump_withdraw_one(struct rde_peer *pe
return -1;
/* afi & safi */
- if (aid2afi(p->pt->aid, &afi, &safi) == -1)
+ if (aid2afi(pt->aid, &afi, &safi) == -1)
fatalx("%s: bad AID", __func__);
if (ibuf_add_n16(buf, afi) == -1)
return -1;
@@ -1123,8 +1097,8 @@ up_dump_withdraw_one(struct rde_peer *pe
return -1;
}
- has_ap = peer_has_add_path(peer, p->pt->aid, CAPA_AP_SEND);
- if (pt_writebuf(buf, p->pt, 1, has_ap, p->path_id_tx) == -1)
+ has_ap = peer_has_add_path(peer, pt->aid, CAPA_AP_SEND);
+ if (pt_writebuf(buf, pt, 1, has_ap, path_id_tx) == -1)
return -1;
/* update length field (either withdrawn routes or attribute length) */
@@ -1132,7 +1106,7 @@ up_dump_withdraw_one(struct rde_peer *pe
if (ibuf_set_n16(buf, off, len) == -1)
return -1;
- if (p->pt->aid != AID_INET) {
+ if (pt->aid != AID_INET) {
/* write MP_UNREACH_NLRI attribute length (always extended) */
len -= 4; /* skip attribute header */
if (ibuf_set_n16(buf, off + sizeof(len) + 2, len) == -1)
@@ -1158,17 +1132,18 @@ up_dump_update(struct imsgbuf *imsg, str
{
struct ibuf *buf;
struct bgpd_addr addr;
- struct adjout_prefix *p;
+ struct pend_attr *pa;
+ struct pend_prefix *pp;
size_t off, pkgsize = MAX_PKTSIZE;
uint16_t len;
int force_ip4mp = 0;
- p = RB_MIN(prefix_tree, &peer->updates[aid]);
- if (p == NULL)
+ pa = TAILQ_FIRST(&peer->updates[aid]);
+ if (pa == NULL)
return;
if (aid == AID_INET && peer_has_ext_nexthop(peer, AID_INET)) {
- struct nexthop *nh = adjout_prefix_nexthop(p);
+ struct nexthop *nh = pa->attrs->nexthop;
if (nh != NULL && nh->exit_nexthop.aid == AID_INET6)
force_ip4mp = 1;
}
@@ -1191,8 +1166,8 @@ up_dump_update(struct imsgbuf *imsg, str
if (ibuf_add_zero(buf, sizeof(len)) == -1)
goto fail;
- if (up_generate_attr(buf, peer, adjout_prefix_aspath(p),
- adjout_prefix_communities(p), adjout_prefix_nexthop(p), aid) == -1)
+ if (up_generate_attr(buf, peer, pa->attrs->aspath,
+ pa->attrs->communities, pa->attrs->nexthop, aid) == -1)
goto drop;
if (aid != AID_INET || force_ip4mp) {
@@ -1204,8 +1179,7 @@ up_dump_update(struct imsgbuf *imsg, str
* merge the attributes together in reverse order of
* creation.
*/
- if (up_generate_mp_reach(buf, peer, adjout_prefix_nexthop(p),
- aid) == -1)
+ if (up_generate_mp_reach(buf, peer, pa, aid) == -1)
goto drop;
}
@@ -1216,28 +1190,32 @@ up_dump_update(struct imsgbuf *imsg, str
if (aid == AID_INET && !force_ip4mp) {
/* last but not least dump the IPv4 nlri */
- if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
+ if (up_dump_prefix(buf, &pa->prefixes, peer, 0) == -1)
goto drop;
}
+ pend_attr_done(pa, peer);
+
imsg_close(imsg, buf);
return;
drop:
/* Not enough space. Drop current prefix, it will never fit. */
- p = RB_MIN(prefix_tree, &peer->updates[aid]);
- pt_getaddr(p->pt, &addr);
+ pp = TAILQ_FIRST(&pa->prefixes);
+ pt_getaddr(pp->pt, &addr);
log_peer_warnx(&peer->conf, "generating update failed, "
- "prefix %s/%d dropped", log_addr(&addr), p->pt->prefixlen);
+ "prefix %s/%d dropped", log_addr(&addr), pp->pt->prefixlen);
- up_prefix_free(&peer->updates[aid], p, peer, 0);
- if (up_dump_withdraw_one(peer, p, buf) == -1)
+ if (up_dump_withdraw_one(peer, pp->pt, pp->path_id_tx, buf) == -1)
goto fail;
+ pend_prefix_free(pp, &pa->prefixes, peer);
+ pend_attr_done(pa, peer);
imsg_close(imsg, buf);
return;
fail:
/* something went horribly wrong */
+ pend_attr_done(pa, peer);
log_peer_warn(&peer->conf, "generating update failed, peer desynced");
ibuf_free(buf);
}
bgpd: introduce pending attr and prefix queues