Download raw body.
bgpd: rewrite adj-out-rib code
Fully rewrite the adj-rib-out code to not be per peer based but instead
global with a peer bitmap to know which peer holds which prefix version.
So a pt_entry now includes an array of struct adjout_prefix elements
each entry is for a different path (different set of attributes) and
includes a bitmap that tracks which peers include the prefix.
This alters most of the adjout_prefix functions in some way or another.
An optimisation on top of this is that the path_id_tx is forced to 0 for
peers that have no add-path send enabled. This way the lookup for this
common case is less deep.
The peer_reaper is now replaced with a simple adjout_prefix_dump call.
This is enough for a first step.
In general this reduces memory consumption by more than 50% especially if
the outbound filters are producing the same path for many peers. My IXP
test setup dropped from over 20G to below 5GB memory usage.
--
:wq Claudio
Index: bgpd.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
diff -u -p -r1.527 bgpd.h
--- bgpd.h 16 Dec 2025 15:38:55 -0000 1.527
+++ bgpd.h 17 Dec 2025 14:25:24 -0000
@@ -1396,6 +1396,7 @@ struct rde_memstats {
long long path_refs;
long long prefix_cnt;
long long adjout_prefix_cnt;
+ long long adjout_prefix_size;
long long pend_prefix_cnt;
long long pend_attr_cnt;
long long rib_cnt;
@@ -1676,6 +1677,7 @@ const char *get_baudrate(unsigned long l
unsigned int bin_of_attrs(unsigned int);
unsigned int bin_of_communities(unsigned int);
+unsigned int bin_of_adjout_prefixes(unsigned int);
/* flowspec.c */
int flowspec_valid(const uint8_t *, int, int);
Index: rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
diff -u -p -r1.677 rde.c
--- rde.c 16 Dec 2025 12:16:03 -0000 1.677
+++ rde.c 17 Dec 2025 14:25:24 -0000
@@ -326,7 +326,6 @@ rde_main(int debug, int verbose)
monotime_to_usec(monotime_sub(io_end, loop_start));
peer_foreach(rde_dispatch_imsg_peer, NULL);
- peer_reaper(NULL);
peer_end = getmonotime();
rdemem.rde_event_peer_usec +=
Index: rde.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
diff -u -p -r1.335 rde.h
--- rde.h 16 Dec 2025 12:16:03 -0000 1.335
+++ rde.h 17 Dec 2025 14:25:24 -0000
@@ -72,7 +72,6 @@ struct rib {
* Currently I assume that we can do that with the neighbor_ip...
*/
RB_HEAD(peer_tree, rde_peer);
-RB_HEAD(prefix_index, adjout_prefix);
CH_HEAD(pend_prefix_hash, pend_prefix);
TAILQ_HEAD(pend_prefix_queue, pend_prefix);
@@ -88,7 +87,6 @@ struct rde_peer {
struct bgpd_addr local_v6_addr;
struct capabilities capa;
struct addpath_eval eval;
- struct prefix_index adj_rib_out;
struct pend_attr_queue updates[AID_MAX];
struct pend_prefix_queue withdraws[AID_MAX];
struct pend_attr_hash pend_attrs;
@@ -96,6 +94,7 @@ struct rde_peer {
struct filter_head *out_rules;
struct ibufqueue *ibufq;
monotime_t staletime[AID_MAX];
+ uint32_t adjout_bid;
uint32_t remote_bgpid;
uint32_t path_id_tx;
unsigned int local_if_scope;
@@ -263,14 +262,19 @@ struct nexthop {
#define NEXTHOP_CONNECTED 0x01
};
+struct adjout_prefix;
+
/* generic entry without address specific part */
struct pt_entry {
RB_ENTRY(pt_entry) pt_e;
+ struct adjout_prefix *adjout;
+ uint32_t adjoutlen;
+ uint32_t adjoutavail;
uint8_t aid;
uint8_t prefixlen;
uint16_t len;
uint32_t refcnt;
- uint8_t data[4]; /* data depending on aid */
+ uint8_t data[0]; /* data depending on aid */
};
struct prefix {
@@ -318,13 +322,10 @@ struct adjout_attr {
};
struct adjout_prefix {
- RB_ENTRY(adjout_prefix) index;
- struct pt_entry *pt;
- struct adjout_attr *attrs;
uint32_t path_id_tx;
- uint8_t flags;
+ struct adjout_attr *attrs;
+ struct bitmap peermap;
};
-#define PREFIX_ADJOUT_FLAG_LOCKED 0x01 /* locked by rib walker */
struct pend_attr {
TAILQ_ENTRY(pend_attr) entry;
@@ -357,7 +358,7 @@ enum eval_mode {
struct rib_context {
LIST_ENTRY(rib_context) entry;
struct rib_entry *ctx_re;
- struct adjout_prefix *ctx_p;
+ struct pt_entry *ctx_pt;
uint32_t ctx_id;
void (*ctx_rib_call)(struct rib_entry *, void *);
void (*ctx_prefix_call)(struct rde_peer *,
@@ -423,7 +424,6 @@ void peer_blast(struct rde_peer *, uin
void peer_dump(struct rde_peer *, uint8_t);
void peer_begin_rrefresh(struct rde_peer *, uint8_t);
int peer_work_pending(void);
-void peer_reaper(struct rde_peer *);
void peer_imsg_push(struct rde_peer *, struct imsg *);
int peer_imsg_pop(struct rde_peer *, struct imsg *);
@@ -748,9 +748,7 @@ void adjout_prefix_update(struct adjou
struct filterstate *, struct pt_entry *, uint32_t);
void adjout_prefix_withdraw(struct rde_peer *, struct pt_entry *,
struct adjout_prefix *);
-void adjout_prefix_destroy(struct rde_peer *,
- struct adjout_prefix *);
-int adjout_prefix_reaper(struct rde_peer *);
+void adjout_prefix_reaper(struct rde_peer *);
void adjout_prefix_dump_cleanup(struct rib_context *);
void adjout_prefix_dump_r(struct rib_context *);
int adjout_prefix_dump_new(struct rde_peer *, uint8_t,
Index: rde_adjout.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_adjout.c,v
diff -u -p -r1.14 rde_adjout.c
--- rde_adjout.c 16 Dec 2025 12:16:03 -0000 1.14
+++ rde_adjout.c 17 Dec 2025 14:25:24 -0000
@@ -30,6 +30,8 @@
#include "log.h"
#include "chash.h"
+struct bitmap adjout_id_map;
+
static struct adjout_attr *adjout_attr_ref(struct adjout_attr *);
static void adjout_attr_unref(struct adjout_attr *);
@@ -385,77 +387,73 @@ adjout_attr_get(struct filterstate *stat
CH_GENERATE(adjout_attr_tree, adjout_attr, adjout_attr_eq, adjout_attr_hash);
-static inline struct adjout_prefix *
-adjout_prefix_lock(struct adjout_prefix *p)
-{
- if (p->flags & PREFIX_ADJOUT_FLAG_LOCKED)
- fatalx("%s: locking locked prefix", __func__);
- p->flags |= PREFIX_ADJOUT_FLAG_LOCKED;
- return p;
-}
-
-static inline struct adjout_prefix *
-adjout_prefix_unlock(struct adjout_prefix *p)
-{
- if ((p->flags & PREFIX_ADJOUT_FLAG_LOCKED) == 0)
- fatalx("%s: unlocking unlocked prefix", __func__);
- p->flags &= ~PREFIX_ADJOUT_FLAG_LOCKED;
- return p;
-}
-
-static inline int
-prefix_is_locked(struct adjout_prefix *p)
-{
- return (p->flags & PREFIX_ADJOUT_FLAG_LOCKED) != 0;
-}
-
-static inline int
-prefix_is_dead(struct adjout_prefix *p)
-{
- return p->attrs == NULL;
-}
-
-static void adjout_prefix_link(struct adjout_prefix *, struct rde_peer *,
- struct adjout_attr *, struct pt_entry *, uint32_t);
+static void adjout_prefix_link(struct pt_entry *, struct rde_peer *,
+ struct adjout_attr *, uint32_t);
static void adjout_prefix_unlink(struct adjout_prefix *,
- struct rde_peer *);
+ struct pt_entry *, struct rde_peer *);
-static struct adjout_prefix *adjout_prefix_alloc(void);
-static void adjout_prefix_free(struct adjout_prefix *);
+static struct adjout_prefix *adjout_prefix_alloc(struct pt_entry *,
+ uint32_t);
+static void adjout_prefix_free(struct pt_entry *,
+ struct adjout_prefix *);
-/* RB tree comparison function */
-static inline int
-prefix_index_cmp(struct adjout_prefix *a, struct adjout_prefix *b)
+static inline uint32_t
+adjout_prefix_index(struct pt_entry *pte, struct adjout_prefix *p)
{
- int r;
- r = pt_prefix_cmp(a->pt, b->pt);
- if (r != 0)
- return r;
+ ptrdiff_t idx = p - pte->adjout;
- if (a->path_id_tx > b->path_id_tx)
- return 1;
- if (a->path_id_tx < b->path_id_tx)
- return -1;
- return 0;
-}
+ if (idx < 0 || idx > pte->adjoutlen)
+ fatalx("corrupt pte adjout list");
-RB_GENERATE_STATIC(prefix_index, adjout_prefix, index, prefix_index_cmp)
+ return idx;
+}
/*
- * Search for specified prefix in the peer prefix_index.
- * Returns NULL if not found.
+ * Search for specified prefix in the pte adjout array that is for the
+ * specified path_id_tx and peer. Returns NULL if not found.
*/
struct adjout_prefix *
adjout_prefix_get(struct rde_peer *peer, uint32_t path_id_tx,
struct pt_entry *pte)
{
- struct adjout_prefix xp;
+ struct adjout_prefix *p;
+ uint32_t i;
- memset(&xp, 0, sizeof(xp));
- xp.pt = pte;
- xp.path_id_tx = path_id_tx;
+ for (i = 0; i < pte->adjoutlen; i++) {
+ p = &pte->adjout[i];
+ if (p->path_id_tx != path_id_tx)
+ continue;
+ if (bitmap_test(&p->peermap, peer->adjout_bid))
+ return p;
+ if (p->path_id_tx > path_id_tx)
+ break;
+ }
- return RB_FIND(prefix_index, &peer->adj_rib_out, &xp);
+ return NULL;
+}
+
+/*
+ * Search for specified prefix in the pte adjout array that is for the
+ * specified path_id_tx and attrs. Returns NULL if not found.
+ */
+static struct adjout_prefix *
+adjout_prefix_with_attrs(struct pt_entry *pte, uint32_t path_id_tx,
+ struct adjout_attr *attrs)
+{
+ struct adjout_prefix *p;
+ uint32_t i;
+
+ for (i = 0; i < pte->adjoutlen; i++) {
+ p = &pte->adjout[i];
+ if (p->path_id_tx != path_id_tx)
+ continue;
+ if (p->attrs == attrs)
+ return p;
+ if (p->path_id_tx > path_id_tx)
+ break;
+ }
+
+ return NULL;
}
/*
@@ -465,15 +463,23 @@ adjout_prefix_get(struct rde_peer *peer,
struct adjout_prefix *
adjout_prefix_first(struct rde_peer *peer, struct pt_entry *pte)
{
- struct adjout_prefix xp, *np;
+ struct adjout_prefix *p;
+ uint32_t i;
+ int has_add_path = 0;
- memset(&xp, 0, sizeof(xp));
- xp.pt = pte;
+ if (peer_has_add_path(peer, pte->aid, CAPA_AP_SEND))
+ has_add_path = 1;
- np = RB_NFIND(prefix_index, &peer->adj_rib_out, &xp);
- if (np == NULL || pt_prefix_cmp(np->pt, xp.pt) != 0)
- return NULL;
- return np;
+ for (i = 0; i < pte->adjoutlen; i++) {
+ p = &pte->adjout[i];
+ if (bitmap_test(&p->peermap, peer->adjout_bid))
+ return p;
+ if (!has_add_path && p->path_id_tx != 0) {
+ return NULL;
+ }
+ }
+
+ return NULL;
}
/*
@@ -481,14 +487,25 @@ adjout_prefix_first(struct rde_peer *pee
*/
struct adjout_prefix *
adjout_prefix_next(struct rde_peer *peer, struct pt_entry *pte,
- struct adjout_prefix *p)
+ struct adjout_prefix *last)
{
- struct adjout_prefix *np;
+ struct adjout_prefix *p;
+ uint32_t i;
- np = RB_NEXT(prefix_index, &peer->adj_rib_out, p);
- if (np == NULL || np->pt != p->pt)
+ if (!peer_has_add_path(peer, pte->aid, CAPA_AP_SEND))
return NULL;
- return np;
+
+ i = adjout_prefix_index(pte, last);
+ for (; i < pte->adjoutlen; i++)
+ if (pte->adjout[i].path_id_tx != last->path_id_tx)
+ break;
+ for (; i < pte->adjoutlen; i++) {
+ p = &pte->adjout[i];
+ if (bitmap_test(&p->peermap, peer->adjout_bid))
+ return p;
+ }
+
+ return NULL;
}
/*
@@ -500,16 +517,11 @@ adjout_prefix_update(struct adjout_prefi
{
struct adjout_attr *attrs;
- if (p == NULL) {
- p = adjout_prefix_alloc();
- /* initially mark DEAD so code below is skipped */
-
- p->pt = pt_ref(pte);
- p->path_id_tx = path_id_tx;
+ if (p != NULL) {
+ if (p->path_id_tx != path_id_tx ||
+ bitmap_test(&p->peermap, peer->adjout_bid) == 0)
+ fatalx("%s: king bula is unhappy", __func__);
- if (RB_INSERT(prefix_index, &peer->adj_rib_out, p) != NULL)
- fatalx("%s: RB index invariant violated", __func__);
- } else {
/*
* XXX for now treat a different path_id_tx like different
* attributes and force out an update. It is unclear how
@@ -527,26 +539,16 @@ adjout_prefix_update(struct adjout_prefi
}
/* unlink prefix so it can be relinked below */
- adjout_prefix_unlink(p, peer);
+ adjout_prefix_unlink(p, pte, peer);
peer->stats.prefix_out_cnt--;
}
- /* update path_id_tx now that the prefix is unlinked */
- if (p->path_id_tx != path_id_tx) {
- /* path_id_tx is part of the index so remove and re-insert p */
- RB_REMOVE(prefix_index, &peer->adj_rib_out, p);
- p->path_id_tx = path_id_tx;
- if (RB_INSERT(prefix_index, &peer->adj_rib_out, p) != NULL)
- fatalx("%s: RB index invariant violated", __func__);
- }
-
attrs = adjout_attr_get(state);
-
- adjout_prefix_link(p, peer, attrs, p->pt, p->path_id_tx);
+ adjout_prefix_link(pte, peer, attrs, path_id_tx);
peer->stats.prefix_out_cnt++;
if (peer_is_up(peer))
- pend_prefix_add(peer, p->attrs, p->pt, p->path_id_tx);
+ pend_prefix_add(peer, attrs, pte, path_id_tx);
}
/*
@@ -557,115 +559,87 @@ void
adjout_prefix_withdraw(struct rde_peer *peer, struct pt_entry *pte,
struct adjout_prefix *p)
{
+ if (bitmap_test(&p->peermap, peer->adjout_bid) == 0)
+ fatalx("%s: king bula is unhappy", __func__);
+
if (peer_is_up(peer))
pend_prefix_add(peer, NULL, pte, p->path_id_tx);
- adjout_prefix_destroy(peer, p);
+ adjout_prefix_unlink(p, pte, peer);
+ peer->stats.prefix_out_cnt--;
}
void
-adjout_prefix_destroy(struct rde_peer *peer, struct adjout_prefix *p)
-{
- /* unlink prefix if it was linked (not dead) */
- if (!prefix_is_dead(p)) {
- adjout_prefix_unlink(p, peer);
- peer->stats.prefix_out_cnt--;
- }
-
- if (!prefix_is_locked(p)) {
- RB_REMOVE(prefix_index, &peer->adj_rib_out, p);
- /* remove the last prefix reference before free */
- pt_unref(p->pt);
- adjout_prefix_free(p);
- }
-}
-
-int
adjout_prefix_reaper(struct rde_peer *peer)
{
- struct adjout_prefix *p, *np;
- int count = RDE_REAPER_ROUNDS;
-
- RB_FOREACH_SAFE(p, prefix_index, &peer->adj_rib_out, np) {
- adjout_prefix_destroy(peer, p);
- if (count-- <= 0)
- return 0;
- }
- return 1;
+ bitmap_id_put(&adjout_id_map, peer->adjout_bid);
}
-static struct adjout_prefix *
+static struct pt_entry *
prefix_restart(struct rib_context *ctx)
{
- struct adjout_prefix *p = NULL;
+ struct pt_entry *pte = NULL;
struct rde_peer *peer;
if ((peer = peer_get(ctx->ctx_id)) == NULL)
return NULL;
- if (ctx->ctx_p)
- p = adjout_prefix_unlock(ctx->ctx_p);
-
- while (p && prefix_is_dead(p)) {
- struct adjout_prefix *next;
-
- next = RB_NEXT(prefix_index, unused, p);
- adjout_prefix_destroy(peer, p);
- p = next;
+ /* be careful when this is the last reference to pte */
+ if (ctx->ctx_pt != NULL) {
+ pte = ctx->ctx_pt;
+ if (pte->refcnt == 1)
+ pte = pt_next(pte);
+ pt_unref(ctx->ctx_pt);
}
- ctx->ctx_p = NULL;
- return p;
+ ctx->ctx_pt = NULL;
+ return pte;
}
void
adjout_prefix_dump_cleanup(struct rib_context *ctx)
{
- struct adjout_prefix *p = ctx->ctx_p;
- struct rde_peer *peer;
-
- if ((peer = peer_get(ctx->ctx_id)) == NULL)
- return;
- if (prefix_is_dead(adjout_prefix_unlock(p)))
- adjout_prefix_destroy(peer, p);
+ if (ctx->ctx_pt != NULL)
+ pt_unref(ctx->ctx_pt);
}
void
adjout_prefix_dump_r(struct rib_context *ctx)
{
- struct adjout_prefix *p, *next;
+ struct pt_entry *pte, *next;
+ struct adjout_prefix *p;
struct rde_peer *peer;
unsigned int i;
if ((peer = peer_get(ctx->ctx_id)) == NULL)
goto done;
- if (ctx->ctx_p == NULL && ctx->ctx_subtree.aid == AID_UNSPEC)
- p = RB_MIN(prefix_index, &peer->adj_rib_out);
+ if (ctx->ctx_pt == NULL && ctx->ctx_subtree.aid == AID_UNSPEC)
+ pte = pt_first(ctx->ctx_aid);
else
- p = prefix_restart(ctx);
+ pte = prefix_restart(ctx);
- for (i = 0; p != NULL; p = next) {
- next = RB_NEXT(prefix_index, unused, p);
- if (prefix_is_dead(p))
- continue;
+ for (i = 0; pte != NULL; pte = next) {
+ next = pt_next(pte);
if (ctx->ctx_aid != AID_UNSPEC &&
- ctx->ctx_aid != p->pt->aid)
+ ctx->ctx_aid != pte->aid)
continue;
if (ctx->ctx_subtree.aid != AID_UNSPEC) {
struct bgpd_addr addr;
- pt_getaddr(p->pt, &addr);
+ pt_getaddr(pte, &addr);
if (prefix_compare(&ctx->ctx_subtree, &addr,
ctx->ctx_subtreelen) != 0)
/* left subtree, walk is done */
break;
}
- if (ctx->ctx_count && i++ >= ctx->ctx_count &&
- !prefix_is_locked(p)) {
+ if (ctx->ctx_count && i++ >= ctx->ctx_count) {
/* store and lock last element */
- ctx->ctx_p = adjout_prefix_lock(p);
+ ctx->ctx_pt = pt_ref(pte);
return;
}
- ctx->ctx_prefix_call(peer, p->pt, p, ctx->ctx_arg);
+ p = adjout_prefix_first(peer, pte);
+ if (p == NULL)
+ continue;
+ ctx->ctx_prefix_call(peer, pte, p, ctx->ctx_arg);
}
done:
@@ -713,7 +687,6 @@ adjout_prefix_dump_subtree(struct rde_pe
int (*throttle)(void *))
{
struct rib_context *ctx;
- struct adjout_prefix xp;
if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
return -1;
@@ -728,11 +701,9 @@ adjout_prefix_dump_subtree(struct rde_pe
ctx->ctx_subtreelen = subtreelen;
/* lookup start of subtree */
- memset(&xp, 0, sizeof(xp));
- xp.pt = pt_fill(subtree, subtreelen);
- ctx->ctx_p = RB_NFIND(prefix_index, &peer->adj_rib_out, &xp);
- if (ctx->ctx_p)
- adjout_prefix_lock(ctx->ctx_p);
+ ctx->ctx_pt = pt_get_next(subtree, subtreelen);
+ if (ctx->ctx_pt)
+ pt_ref(ctx->ctx_pt); /* store and lock first element */
rib_dump_insert(ctx);
@@ -747,46 +718,112 @@ adjout_prefix_dump_subtree(struct rde_pe
* Link a prefix into the different parent objects.
*/
static void
-adjout_prefix_link(struct adjout_prefix *p, struct rde_peer *peer,
- struct adjout_attr *attrs, struct pt_entry *pt, uint32_t path_id_tx)
+adjout_prefix_link(struct pt_entry *pte, struct rde_peer *peer,
+ struct adjout_attr *attrs, uint32_t path_id_tx)
{
- p->attrs = adjout_attr_ref(attrs);
- p->pt = pt_ref(pt);
- p->path_id_tx = path_id_tx;
+ struct adjout_prefix *p;
+
+ /* assign ids on first use to keep the bitmap as small as possible */
+ if (peer->adjout_bid == 0)
+ if (bitmap_id_get(&adjout_id_map, &peer->adjout_bid) == -1)
+ fatal(__func__);
+
+ if ((p = adjout_prefix_with_attrs(pte, path_id_tx, attrs)) == NULL) {
+ p = adjout_prefix_alloc(pte, path_id_tx);
+ p->attrs = adjout_attr_ref(attrs);
+ }
+
+ if (bitmap_set(&p->peermap, peer->adjout_bid) == -1)
+ fatal(__func__);
}
/*
* Unlink a prefix from the different parent objects.
*/
static void
-adjout_prefix_unlink(struct adjout_prefix *p, struct rde_peer *peer)
+adjout_prefix_unlink(struct adjout_prefix *p, struct pt_entry *pte,
+ struct rde_peer *peer)
{
- /* destroy all references to other objects */
- adjout_attr_unref(p->attrs);
- p->attrs = NULL;
- pt_unref(p->pt);
- /* must keep p->pt valid since there is an extra ref */
+ bitmap_clear(&p->peermap, peer->adjout_bid);
+ if (bitmap_empty(&p->peermap)) {
+ /* destroy all references to other objects */
+ adjout_attr_unref(p->attrs);
+ p->attrs = NULL;
+
+ adjout_prefix_free(pte, p);
+ }
+}
+
+static void
+adjout_prefix_resize(struct pt_entry *pte)
+{
+ struct adjout_prefix *new;
+ uint32_t newlen, avail;
+
+ avail = pte->adjoutavail;
+ newlen = bin_of_adjout_prefixes(avail + 1);
+ if ((new = reallocarray(pte->adjout, newlen, sizeof(*new))) == NULL)
+ fatal(__func__);
+ rdemem.adjout_prefix_size += sizeof(*new) * (newlen - avail);
+
+ memset(&new[avail], 0, sizeof(*new) * (newlen - avail));
+ pte->adjout = new;
+ pte->adjoutavail = newlen;
}
-/* alloc and zero new entry. May not fail. */
+/*
+ * Insert a new entry into the pte adjout array, extending the array if needed.
+ * May not fail.
+ */
static struct adjout_prefix *
-adjout_prefix_alloc(void)
+adjout_prefix_alloc(struct pt_entry *pte, uint32_t path_id_tx)
{
struct adjout_prefix *p;
+ uint32_t i;
- p = calloc(1, sizeof(*p));
- if (p == NULL)
- fatal(__func__);
+ if (pte->adjoutlen + 1 > pte->adjoutavail)
+ adjout_prefix_resize(pte);
+
+ /* keep array sorted by path_id_tx */
+ for (i = 0; i < pte->adjoutlen; i++) {
+ if (pte->adjout[i].path_id_tx > path_id_tx)
+ break;
+ }
+
+ p = &pte->adjout[i];
+ /* shift reminder by one slot */
+ for (i = pte->adjoutlen; &pte->adjout[i] > p; i--)
+ pte->adjout[i] = pte->adjout[i - 1];
+
+ /* initialize new element */
+ p->attrs = NULL;
+ p->path_id_tx = path_id_tx;
+ bitmap_init(&p->peermap);
+
+ pte->adjoutlen++;
rdemem.adjout_prefix_cnt++;
return p;
}
-/* free a unlinked entry */
+/* remove an entry from the pte adjout array */
static void
-adjout_prefix_free(struct adjout_prefix *p)
+adjout_prefix_free(struct pt_entry *pte, struct adjout_prefix *p)
{
+ uint32_t i, idx;
+
+ bitmap_reset(&p->peermap);
+
+ idx = adjout_prefix_index(pte, p);
+ for (i = idx + 1; i < pte->adjoutlen; i++)
+ pte->adjout[i - 1] = pte->adjout[i];
+
+ p = &pte->adjout[pte->adjoutlen - 1];
+ memset(p, 0, sizeof(*p));
+ pte->adjoutlen--;
+
+ /* TODO shrink array if X% empty */
+
rdemem.adjout_prefix_cnt--;
- free(p);
}
void
Index: rde_peer.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v
diff -u -p -r1.63 rde_peer.c
--- rde_peer.c 16 Dec 2025 16:07:31 -0000 1.63
+++ rde_peer.c 17 Dec 2025 14:25:24 -0000
@@ -92,11 +92,10 @@ peer_shutdown(void)
RB_FOREACH_SAFE(peer, peer_tree, &peertable, np)
peer_delete(peer);
- while (!RB_EMPTY(&zombietable))
- peer_reaper(NULL);
-
if (!RB_EMPTY(&peertable))
log_warnx("%s: free non-free table", __func__);
+
+ /* XXX wait until all peer got reaped */
}
/*
@@ -435,6 +434,29 @@ peer_down(struct rde_peer *peer)
peer->stats.prefix_cnt = 0;
}
+/*
+ * RIB walker callback for peer_delete / the reaper.
+ */
+static void
+peer_reaper_upcall(struct rde_peer *peer, struct pt_entry *pte,
+ struct adjout_prefix *p, void *ptr)
+{
+ adjout_prefix_withdraw(peer, pte, p);
+}
+
+/*
+ * Called after the adj-rib-out has been cleared, time to kill the zombie.
+ */
+static void
+peer_reaper_done(void *ptr, uint8_t aid)
+{
+ struct rde_peer *peer = ptr;
+
+ adjout_prefix_reaper(peer);
+ ibufq_free(peer->ibufq);
+ free(peer);
+}
+
void
peer_delete(struct rde_peer *peer)
{
@@ -445,13 +467,11 @@ peer_delete(struct rde_peer *peer)
adjout_peer_free(peer);
RB_REMOVE(peer_tree, &peertable, peer);
- while (RB_INSERT(peer_tree, &zombietable, peer) != NULL) {
- log_warnx("zombie peer conflict");
- peer->conf.id = arc4random();
- }
/* start reaping the zombie */
- peer_reaper(peer);
+ if (adjout_prefix_dump_new(peer, AID_UNSPEC, RDE_RUNNER_ROUNDS, peer,
+ peer_reaper_upcall, peer_reaper_done, NULL) == -1)
+ fatal("%s: adjout_prefix_dump_new", __func__);
}
/*
@@ -550,8 +570,8 @@ peer_blast(struct rde_peer *peer, uint8_
rde_peer_send_rrefresh(peer, aid, ROUTE_REFRESH_BEGIN_RR);
/* force out all updates from the Adj-RIB-Out */
- if (adjout_prefix_dump_new(peer, aid, 0, peer, peer_blast_upcall,
- peer_blast_done, NULL) == -1)
+ if (adjout_prefix_dump_new(peer, aid, RDE_RUNNER_ROUNDS, peer,
+ peer_blast_upcall, peer_blast_done, NULL) == -1)
fatal("%s: adjout_prefix_dump_new", __func__);
}
@@ -622,22 +642,6 @@ peer_begin_rrefresh(struct rde_peer *pee
struct timespec ts = { .tv_nsec = 1000 * 1000 };
nanosleep(&ts, NULL);
}
-}
-
-void
-peer_reaper(struct rde_peer *peer)
-{
- if (peer == NULL)
- peer = RB_ROOT(&zombietable);
- if (peer == NULL)
- return;
-
- if (!adjout_prefix_reaper(peer))
- return;
-
- ibufq_free(peer->ibufq);
- RB_REMOVE(peer_tree, &zombietable, peer);
- free(peer);
}
/*
Index: rde_prefix.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_prefix.c,v
diff -u -p -r1.59 rde_prefix.c
--- rde_prefix.c 16 Dec 2025 12:11:16 -0000 1.59
+++ rde_prefix.c 17 Dec 2025 14:25:24 -0000
@@ -51,6 +51,9 @@ static void pt_free(struct pt_entry *)
struct pt_entry4 {
RB_ENTRY(pt_entry) pt_e;
+ struct adjout_prefix *adjout;
+ uint32_t adjoutlen;
+ uint32_t adjoutavail;
uint8_t aid;
uint8_t prefixlen;
uint16_t len;
@@ -60,6 +63,9 @@ struct pt_entry4 {
struct pt_entry6 {
RB_ENTRY(pt_entry) pt_e;
+ struct adjout_prefix *adjout;
+ uint32_t adjoutlen;
+ uint32_t adjoutavail;
uint8_t aid;
uint8_t prefixlen;
uint16_t len;
@@ -69,6 +75,9 @@ struct pt_entry6 {
struct pt_entry_vpn4 {
RB_ENTRY(pt_entry) pt_e;
+ struct adjout_prefix *adjout;
+ uint32_t adjoutlen;
+ uint32_t adjoutavail;
uint8_t aid;
uint8_t prefixlen;
uint16_t len;
@@ -83,6 +92,9 @@ struct pt_entry_vpn4 {
struct pt_entry_vpn6 {
RB_ENTRY(pt_entry) pt_e;
+ struct adjout_prefix *adjout;
+ uint32_t adjoutlen;
+ uint32_t adjoutavail;
uint8_t aid;
uint8_t prefixlen;
uint16_t len;
@@ -97,6 +109,9 @@ struct pt_entry_vpn6 {
struct pt_entry_evpn {
RB_ENTRY(pt_entry) pt_e;
+ struct adjout_prefix *adjout;
+ uint32_t adjoutlen;
+ uint32_t adjoutavail;
uint8_t aid;
uint8_t prefixlen;
uint16_t len;
@@ -117,12 +132,15 @@ struct pt_entry_evpn {
struct pt_entry_flow {
RB_ENTRY(pt_entry) pt_e;
+ struct adjout_prefix *adjout;
+ uint32_t adjoutlen;
+ uint32_t adjoutavail;
uint8_t aid;
uint8_t prefixlen; /* unused ??? */
uint16_t len;
uint32_t refcnt;
uint64_t rd;
- uint8_t flow[1]; /* NLRI */
+ uint8_t flow[0]; /* NLRI */
};
#define PT_FLOW_SIZE (offsetof(struct pt_entry_flow, flow))
Index: rde_rib.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
diff -u -p -r1.284 rde_rib.c
--- rde_rib.c 2 Dec 2025 13:03:35 -0000 1.284
+++ rde_rib.c 17 Dec 2025 14:25:24 -0000
@@ -465,7 +465,7 @@ rib_dump_free(struct rib_context *ctx)
ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid);
if (ctx->ctx_re)
rib_dump_cleanup(ctx);
- if (ctx->ctx_p)
+ if (ctx->ctx_pt)
adjout_prefix_dump_cleanup(ctx);
LIST_REMOVE(ctx, entry);
free(ctx);
Index: rde_update.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
diff -u -p -r1.191 rde_update.c
--- rde_update.c 16 Dec 2025 12:16:03 -0000 1.191
+++ rde_update.c 17 Dec 2025 14:25:24 -0000
@@ -164,6 +164,7 @@ up_process_prefix(struct rde_peer *peer,
struct filterstate state;
struct bgpd_addr addr;
int excluded = 0;
+ uint32_t path_id_tx = 0;
/*
* up_test_update() needs to run before the output filters
@@ -194,11 +195,13 @@ up_process_prefix(struct rde_peer *peer,
}
/* from here on we know this is an update */
- if (p == (void *)-1)
+ if (p == (void *)-1) {
+ path_id_tx = new->path_id_tx;
p = adjout_prefix_get(peer, new->path_id_tx, new->pt);
+ }
up_prep_adjout(peer, &state, new->pt->aid);
- adjout_prefix_update(p, peer, &state, new->pt, new->path_id_tx);
+ adjout_prefix_update(p, peer, &state, new->pt, path_id_tx);
rde_filterstate_clean(&state);
/* max prefix checker outbound */
Index: util.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/util.c,v
diff -u -p -r1.97 util.c
--- util.c 16 Dec 2025 15:38:55 -0000 1.97
+++ util.c 17 Dec 2025 14:25:24 -0000
@@ -1339,3 +1339,10 @@ bin_of_communities(unsigned int count)
/* 8, 16, 24, ... 56, 64, 80, 96, ... */
return bin_of(count, 5, 2);
}
+
+unsigned int
+bin_of_adjout_prefixes(unsigned int count)
+{
+ /* 1, 2, 3, 4, 6, 8, 12, 16, 24, ... */
+ return bin_of(count, 1, 1);
+}
bgpd: rewrite adj-out-rib code