From: Claudio Jeker Subject: bgpd: rewrite adj-out-rib code To: tech@openbsd.org Date: Wed, 17 Dec 2025 15:58:46 +0100 Fully rewrite the adj-rib-out code to not be per peer based but instead global with a peer bitmap to know which peer holds which prefix version. So a pt_entry now includes an array of struct adjout_prefix elements each entry is for a different path (different set of attributes) and includes a bitmap that tracks which peers include the prefix. This alters most of the adjout_prefix functions in some way or another. An optimisation on top of this is that the path_id_tx is forced to 0 for peers that have no add-path send enabled. This way the lookup for this common case is less deep. The peer_reaper is now replaced with a simple adjout_prefix_dump call. This is enough for a first step. In general this reduces memory consumption by more than 50% especially if the outbound filters are producing the same path for many peers. My IXP test setup dropped from over 20G to below 5GB memory usage. -- :wq Claudio Index: bgpd.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v diff -u -p -r1.527 bgpd.h --- bgpd.h 16 Dec 2025 15:38:55 -0000 1.527 +++ bgpd.h 17 Dec 2025 14:25:24 -0000 @@ -1396,6 +1396,7 @@ struct rde_memstats { long long path_refs; long long prefix_cnt; long long adjout_prefix_cnt; + long long adjout_prefix_size; long long pend_prefix_cnt; long long pend_attr_cnt; long long rib_cnt; @@ -1676,6 +1677,7 @@ const char *get_baudrate(unsigned long l unsigned int bin_of_attrs(unsigned int); unsigned int bin_of_communities(unsigned int); +unsigned int bin_of_adjout_prefixes(unsigned int); /* flowspec.c */ int flowspec_valid(const uint8_t *, int, int); Index: rde.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v diff -u -p -r1.677 rde.c --- rde.c 16 Dec 2025 12:16:03 -0000 1.677 +++ rde.c 17 Dec 2025 14:25:24 -0000 @@ -326,7 +326,6 @@ rde_main(int debug, int verbose) monotime_to_usec(monotime_sub(io_end, loop_start)); peer_foreach(rde_dispatch_imsg_peer, NULL); - peer_reaper(NULL); peer_end = getmonotime(); rdemem.rde_event_peer_usec += Index: rde.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v diff -u -p -r1.335 rde.h --- rde.h 16 Dec 2025 12:16:03 -0000 1.335 +++ rde.h 17 Dec 2025 14:25:24 -0000 @@ -72,7 +72,6 @@ struct rib { * Currently I assume that we can do that with the neighbor_ip... */ RB_HEAD(peer_tree, rde_peer); -RB_HEAD(prefix_index, adjout_prefix); CH_HEAD(pend_prefix_hash, pend_prefix); TAILQ_HEAD(pend_prefix_queue, pend_prefix); @@ -88,7 +87,6 @@ struct rde_peer { struct bgpd_addr local_v6_addr; struct capabilities capa; struct addpath_eval eval; - struct prefix_index adj_rib_out; struct pend_attr_queue updates[AID_MAX]; struct pend_prefix_queue withdraws[AID_MAX]; struct pend_attr_hash pend_attrs; @@ -96,6 +94,7 @@ struct rde_peer { struct filter_head *out_rules; struct ibufqueue *ibufq; monotime_t staletime[AID_MAX]; + uint32_t adjout_bid; uint32_t remote_bgpid; uint32_t path_id_tx; unsigned int local_if_scope; @@ -263,14 +262,19 @@ struct nexthop { #define NEXTHOP_CONNECTED 0x01 }; +struct adjout_prefix; + /* generic entry without address specific part */ struct pt_entry { RB_ENTRY(pt_entry) pt_e; + struct adjout_prefix *adjout; + uint32_t adjoutlen; + uint32_t adjoutavail; uint8_t aid; uint8_t prefixlen; uint16_t len; uint32_t refcnt; - uint8_t data[4]; /* data depending on aid */ + uint8_t data[0]; /* data depending on aid */ }; struct prefix { @@ -318,13 +322,10 @@ struct adjout_attr { }; struct adjout_prefix { - RB_ENTRY(adjout_prefix) index; - struct pt_entry *pt; - struct adjout_attr *attrs; uint32_t path_id_tx; - uint8_t flags; + struct adjout_attr *attrs; + struct bitmap peermap; }; -#define PREFIX_ADJOUT_FLAG_LOCKED 0x01 /* locked by rib walker */ struct pend_attr { TAILQ_ENTRY(pend_attr) entry; @@ -357,7 +358,7 @@ enum eval_mode { struct rib_context { LIST_ENTRY(rib_context) entry; struct rib_entry *ctx_re; - struct adjout_prefix *ctx_p; + struct pt_entry *ctx_pt; uint32_t ctx_id; void (*ctx_rib_call)(struct rib_entry *, void *); void (*ctx_prefix_call)(struct rde_peer *, @@ -423,7 +424,6 @@ void peer_blast(struct rde_peer *, uin void peer_dump(struct rde_peer *, uint8_t); void peer_begin_rrefresh(struct rde_peer *, uint8_t); int peer_work_pending(void); -void peer_reaper(struct rde_peer *); void peer_imsg_push(struct rde_peer *, struct imsg *); int peer_imsg_pop(struct rde_peer *, struct imsg *); @@ -748,9 +748,7 @@ void adjout_prefix_update(struct adjou struct filterstate *, struct pt_entry *, uint32_t); void adjout_prefix_withdraw(struct rde_peer *, struct pt_entry *, struct adjout_prefix *); -void adjout_prefix_destroy(struct rde_peer *, - struct adjout_prefix *); -int adjout_prefix_reaper(struct rde_peer *); +void adjout_prefix_reaper(struct rde_peer *); void adjout_prefix_dump_cleanup(struct rib_context *); void adjout_prefix_dump_r(struct rib_context *); int adjout_prefix_dump_new(struct rde_peer *, uint8_t, Index: rde_adjout.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_adjout.c,v diff -u -p -r1.14 rde_adjout.c --- rde_adjout.c 16 Dec 2025 12:16:03 -0000 1.14 +++ rde_adjout.c 17 Dec 2025 14:25:24 -0000 @@ -30,6 +30,8 @@ #include "log.h" #include "chash.h" +struct bitmap adjout_id_map; + static struct adjout_attr *adjout_attr_ref(struct adjout_attr *); static void adjout_attr_unref(struct adjout_attr *); @@ -385,77 +387,73 @@ adjout_attr_get(struct filterstate *stat CH_GENERATE(adjout_attr_tree, adjout_attr, adjout_attr_eq, adjout_attr_hash); -static inline struct adjout_prefix * -adjout_prefix_lock(struct adjout_prefix *p) -{ - if (p->flags & PREFIX_ADJOUT_FLAG_LOCKED) - fatalx("%s: locking locked prefix", __func__); - p->flags |= PREFIX_ADJOUT_FLAG_LOCKED; - return p; -} - -static inline struct adjout_prefix * -adjout_prefix_unlock(struct adjout_prefix *p) -{ - if ((p->flags & PREFIX_ADJOUT_FLAG_LOCKED) == 0) - fatalx("%s: unlocking unlocked prefix", __func__); - p->flags &= ~PREFIX_ADJOUT_FLAG_LOCKED; - return p; -} - -static inline int -prefix_is_locked(struct adjout_prefix *p) -{ - return (p->flags & PREFIX_ADJOUT_FLAG_LOCKED) != 0; -} - -static inline int -prefix_is_dead(struct adjout_prefix *p) -{ - return p->attrs == NULL; -} - -static void adjout_prefix_link(struct adjout_prefix *, struct rde_peer *, - struct adjout_attr *, struct pt_entry *, uint32_t); +static void adjout_prefix_link(struct pt_entry *, struct rde_peer *, + struct adjout_attr *, uint32_t); static void adjout_prefix_unlink(struct adjout_prefix *, - struct rde_peer *); + struct pt_entry *, struct rde_peer *); -static struct adjout_prefix *adjout_prefix_alloc(void); -static void adjout_prefix_free(struct adjout_prefix *); +static struct adjout_prefix *adjout_prefix_alloc(struct pt_entry *, + uint32_t); +static void adjout_prefix_free(struct pt_entry *, + struct adjout_prefix *); -/* RB tree comparison function */ -static inline int -prefix_index_cmp(struct adjout_prefix *a, struct adjout_prefix *b) +static inline uint32_t +adjout_prefix_index(struct pt_entry *pte, struct adjout_prefix *p) { - int r; - r = pt_prefix_cmp(a->pt, b->pt); - if (r != 0) - return r; + ptrdiff_t idx = p - pte->adjout; - if (a->path_id_tx > b->path_id_tx) - return 1; - if (a->path_id_tx < b->path_id_tx) - return -1; - return 0; -} + if (idx < 0 || idx > pte->adjoutlen) + fatalx("corrupt pte adjout list"); -RB_GENERATE_STATIC(prefix_index, adjout_prefix, index, prefix_index_cmp) + return idx; +} /* - * Search for specified prefix in the peer prefix_index. - * Returns NULL if not found. + * Search for specified prefix in the pte adjout array that is for the + * specified path_id_tx and peer. Returns NULL if not found. */ struct adjout_prefix * adjout_prefix_get(struct rde_peer *peer, uint32_t path_id_tx, struct pt_entry *pte) { - struct adjout_prefix xp; + struct adjout_prefix *p; + uint32_t i; - memset(&xp, 0, sizeof(xp)); - xp.pt = pte; - xp.path_id_tx = path_id_tx; + for (i = 0; i < pte->adjoutlen; i++) { + p = &pte->adjout[i]; + if (p->path_id_tx != path_id_tx) + continue; + if (bitmap_test(&p->peermap, peer->adjout_bid)) + return p; + if (p->path_id_tx > path_id_tx) + break; + } - return RB_FIND(prefix_index, &peer->adj_rib_out, &xp); + return NULL; +} + +/* + * Search for specified prefix in the pte adjout array that is for the + * specified path_id_tx and attrs. Returns NULL if not found. + */ +static struct adjout_prefix * +adjout_prefix_with_attrs(struct pt_entry *pte, uint32_t path_id_tx, + struct adjout_attr *attrs) +{ + struct adjout_prefix *p; + uint32_t i; + + for (i = 0; i < pte->adjoutlen; i++) { + p = &pte->adjout[i]; + if (p->path_id_tx != path_id_tx) + continue; + if (p->attrs == attrs) + return p; + if (p->path_id_tx > path_id_tx) + break; + } + + return NULL; } /* @@ -465,15 +463,23 @@ adjout_prefix_get(struct rde_peer *peer, struct adjout_prefix * adjout_prefix_first(struct rde_peer *peer, struct pt_entry *pte) { - struct adjout_prefix xp, *np; + struct adjout_prefix *p; + uint32_t i; + int has_add_path = 0; - memset(&xp, 0, sizeof(xp)); - xp.pt = pte; + if (peer_has_add_path(peer, pte->aid, CAPA_AP_SEND)) + has_add_path = 1; - np = RB_NFIND(prefix_index, &peer->adj_rib_out, &xp); - if (np == NULL || pt_prefix_cmp(np->pt, xp.pt) != 0) - return NULL; - return np; + for (i = 0; i < pte->adjoutlen; i++) { + p = &pte->adjout[i]; + if (bitmap_test(&p->peermap, peer->adjout_bid)) + return p; + if (!has_add_path && p->path_id_tx != 0) { + return NULL; + } + } + + return NULL; } /* @@ -481,14 +487,25 @@ adjout_prefix_first(struct rde_peer *pee */ struct adjout_prefix * adjout_prefix_next(struct rde_peer *peer, struct pt_entry *pte, - struct adjout_prefix *p) + struct adjout_prefix *last) { - struct adjout_prefix *np; + struct adjout_prefix *p; + uint32_t i; - np = RB_NEXT(prefix_index, &peer->adj_rib_out, p); - if (np == NULL || np->pt != p->pt) + if (!peer_has_add_path(peer, pte->aid, CAPA_AP_SEND)) return NULL; - return np; + + i = adjout_prefix_index(pte, last); + for (; i < pte->adjoutlen; i++) + if (pte->adjout[i].path_id_tx != last->path_id_tx) + break; + for (; i < pte->adjoutlen; i++) { + p = &pte->adjout[i]; + if (bitmap_test(&p->peermap, peer->adjout_bid)) + return p; + } + + return NULL; } /* @@ -500,16 +517,11 @@ adjout_prefix_update(struct adjout_prefi { struct adjout_attr *attrs; - if (p == NULL) { - p = adjout_prefix_alloc(); - /* initially mark DEAD so code below is skipped */ - - p->pt = pt_ref(pte); - p->path_id_tx = path_id_tx; + if (p != NULL) { + if (p->path_id_tx != path_id_tx || + bitmap_test(&p->peermap, peer->adjout_bid) == 0) + fatalx("%s: king bula is unhappy", __func__); - if (RB_INSERT(prefix_index, &peer->adj_rib_out, p) != NULL) - fatalx("%s: RB index invariant violated", __func__); - } else { /* * XXX for now treat a different path_id_tx like different * attributes and force out an update. It is unclear how @@ -527,26 +539,16 @@ adjout_prefix_update(struct adjout_prefi } /* unlink prefix so it can be relinked below */ - adjout_prefix_unlink(p, peer); + adjout_prefix_unlink(p, pte, peer); peer->stats.prefix_out_cnt--; } - /* update path_id_tx now that the prefix is unlinked */ - if (p->path_id_tx != path_id_tx) { - /* path_id_tx is part of the index so remove and re-insert p */ - RB_REMOVE(prefix_index, &peer->adj_rib_out, p); - p->path_id_tx = path_id_tx; - if (RB_INSERT(prefix_index, &peer->adj_rib_out, p) != NULL) - fatalx("%s: RB index invariant violated", __func__); - } - attrs = adjout_attr_get(state); - - adjout_prefix_link(p, peer, attrs, p->pt, p->path_id_tx); + adjout_prefix_link(pte, peer, attrs, path_id_tx); peer->stats.prefix_out_cnt++; if (peer_is_up(peer)) - pend_prefix_add(peer, p->attrs, p->pt, p->path_id_tx); + pend_prefix_add(peer, attrs, pte, path_id_tx); } /* @@ -557,115 +559,87 @@ void adjout_prefix_withdraw(struct rde_peer *peer, struct pt_entry *pte, struct adjout_prefix *p) { + if (bitmap_test(&p->peermap, peer->adjout_bid) == 0) + fatalx("%s: king bula is unhappy", __func__); + if (peer_is_up(peer)) pend_prefix_add(peer, NULL, pte, p->path_id_tx); - adjout_prefix_destroy(peer, p); + adjout_prefix_unlink(p, pte, peer); + peer->stats.prefix_out_cnt--; } void -adjout_prefix_destroy(struct rde_peer *peer, struct adjout_prefix *p) -{ - /* unlink prefix if it was linked (not dead) */ - if (!prefix_is_dead(p)) { - adjout_prefix_unlink(p, peer); - peer->stats.prefix_out_cnt--; - } - - if (!prefix_is_locked(p)) { - RB_REMOVE(prefix_index, &peer->adj_rib_out, p); - /* remove the last prefix reference before free */ - pt_unref(p->pt); - adjout_prefix_free(p); - } -} - -int adjout_prefix_reaper(struct rde_peer *peer) { - struct adjout_prefix *p, *np; - int count = RDE_REAPER_ROUNDS; - - RB_FOREACH_SAFE(p, prefix_index, &peer->adj_rib_out, np) { - adjout_prefix_destroy(peer, p); - if (count-- <= 0) - return 0; - } - return 1; + bitmap_id_put(&adjout_id_map, peer->adjout_bid); } -static struct adjout_prefix * +static struct pt_entry * prefix_restart(struct rib_context *ctx) { - struct adjout_prefix *p = NULL; + struct pt_entry *pte = NULL; struct rde_peer *peer; if ((peer = peer_get(ctx->ctx_id)) == NULL) return NULL; - if (ctx->ctx_p) - p = adjout_prefix_unlock(ctx->ctx_p); - - while (p && prefix_is_dead(p)) { - struct adjout_prefix *next; - - next = RB_NEXT(prefix_index, unused, p); - adjout_prefix_destroy(peer, p); - p = next; + /* be careful when this is the last reference to pte */ + if (ctx->ctx_pt != NULL) { + pte = ctx->ctx_pt; + if (pte->refcnt == 1) + pte = pt_next(pte); + pt_unref(ctx->ctx_pt); } - ctx->ctx_p = NULL; - return p; + ctx->ctx_pt = NULL; + return pte; } void adjout_prefix_dump_cleanup(struct rib_context *ctx) { - struct adjout_prefix *p = ctx->ctx_p; - struct rde_peer *peer; - - if ((peer = peer_get(ctx->ctx_id)) == NULL) - return; - if (prefix_is_dead(adjout_prefix_unlock(p))) - adjout_prefix_destroy(peer, p); + if (ctx->ctx_pt != NULL) + pt_unref(ctx->ctx_pt); } void adjout_prefix_dump_r(struct rib_context *ctx) { - struct adjout_prefix *p, *next; + struct pt_entry *pte, *next; + struct adjout_prefix *p; struct rde_peer *peer; unsigned int i; if ((peer = peer_get(ctx->ctx_id)) == NULL) goto done; - if (ctx->ctx_p == NULL && ctx->ctx_subtree.aid == AID_UNSPEC) - p = RB_MIN(prefix_index, &peer->adj_rib_out); + if (ctx->ctx_pt == NULL && ctx->ctx_subtree.aid == AID_UNSPEC) + pte = pt_first(ctx->ctx_aid); else - p = prefix_restart(ctx); + pte = prefix_restart(ctx); - for (i = 0; p != NULL; p = next) { - next = RB_NEXT(prefix_index, unused, p); - if (prefix_is_dead(p)) - continue; + for (i = 0; pte != NULL; pte = next) { + next = pt_next(pte); if (ctx->ctx_aid != AID_UNSPEC && - ctx->ctx_aid != p->pt->aid) + ctx->ctx_aid != pte->aid) continue; if (ctx->ctx_subtree.aid != AID_UNSPEC) { struct bgpd_addr addr; - pt_getaddr(p->pt, &addr); + pt_getaddr(pte, &addr); if (prefix_compare(&ctx->ctx_subtree, &addr, ctx->ctx_subtreelen) != 0) /* left subtree, walk is done */ break; } - if (ctx->ctx_count && i++ >= ctx->ctx_count && - !prefix_is_locked(p)) { + if (ctx->ctx_count && i++ >= ctx->ctx_count) { /* store and lock last element */ - ctx->ctx_p = adjout_prefix_lock(p); + ctx->ctx_pt = pt_ref(pte); return; } - ctx->ctx_prefix_call(peer, p->pt, p, ctx->ctx_arg); + p = adjout_prefix_first(peer, pte); + if (p == NULL) + continue; + ctx->ctx_prefix_call(peer, pte, p, ctx->ctx_arg); } done: @@ -713,7 +687,6 @@ adjout_prefix_dump_subtree(struct rde_pe int (*throttle)(void *)) { struct rib_context *ctx; - struct adjout_prefix xp; if ((ctx = calloc(1, sizeof(*ctx))) == NULL) return -1; @@ -728,11 +701,9 @@ adjout_prefix_dump_subtree(struct rde_pe ctx->ctx_subtreelen = subtreelen; /* lookup start of subtree */ - memset(&xp, 0, sizeof(xp)); - xp.pt = pt_fill(subtree, subtreelen); - ctx->ctx_p = RB_NFIND(prefix_index, &peer->adj_rib_out, &xp); - if (ctx->ctx_p) - adjout_prefix_lock(ctx->ctx_p); + ctx->ctx_pt = pt_get_next(subtree, subtreelen); + if (ctx->ctx_pt) + pt_ref(ctx->ctx_pt); /* store and lock first element */ rib_dump_insert(ctx); @@ -747,46 +718,112 @@ adjout_prefix_dump_subtree(struct rde_pe * Link a prefix into the different parent objects. */ static void -adjout_prefix_link(struct adjout_prefix *p, struct rde_peer *peer, - struct adjout_attr *attrs, struct pt_entry *pt, uint32_t path_id_tx) +adjout_prefix_link(struct pt_entry *pte, struct rde_peer *peer, + struct adjout_attr *attrs, uint32_t path_id_tx) { - p->attrs = adjout_attr_ref(attrs); - p->pt = pt_ref(pt); - p->path_id_tx = path_id_tx; + struct adjout_prefix *p; + + /* assign ids on first use to keep the bitmap as small as possible */ + if (peer->adjout_bid == 0) + if (bitmap_id_get(&adjout_id_map, &peer->adjout_bid) == -1) + fatal(__func__); + + if ((p = adjout_prefix_with_attrs(pte, path_id_tx, attrs)) == NULL) { + p = adjout_prefix_alloc(pte, path_id_tx); + p->attrs = adjout_attr_ref(attrs); + } + + if (bitmap_set(&p->peermap, peer->adjout_bid) == -1) + fatal(__func__); } /* * Unlink a prefix from the different parent objects. */ static void -adjout_prefix_unlink(struct adjout_prefix *p, struct rde_peer *peer) +adjout_prefix_unlink(struct adjout_prefix *p, struct pt_entry *pte, + struct rde_peer *peer) { - /* destroy all references to other objects */ - adjout_attr_unref(p->attrs); - p->attrs = NULL; - pt_unref(p->pt); - /* must keep p->pt valid since there is an extra ref */ + bitmap_clear(&p->peermap, peer->adjout_bid); + if (bitmap_empty(&p->peermap)) { + /* destroy all references to other objects */ + adjout_attr_unref(p->attrs); + p->attrs = NULL; + + adjout_prefix_free(pte, p); + } +} + +static void +adjout_prefix_resize(struct pt_entry *pte) +{ + struct adjout_prefix *new; + uint32_t newlen, avail; + + avail = pte->adjoutavail; + newlen = bin_of_adjout_prefixes(avail + 1); + if ((new = reallocarray(pte->adjout, newlen, sizeof(*new))) == NULL) + fatal(__func__); + rdemem.adjout_prefix_size += sizeof(*new) * (newlen - avail); + + memset(&new[avail], 0, sizeof(*new) * (newlen - avail)); + pte->adjout = new; + pte->adjoutavail = newlen; } -/* alloc and zero new entry. May not fail. */ +/* + * Insert a new entry into the pte adjout array, extending the array if needed. + * May not fail. + */ static struct adjout_prefix * -adjout_prefix_alloc(void) +adjout_prefix_alloc(struct pt_entry *pte, uint32_t path_id_tx) { struct adjout_prefix *p; + uint32_t i; - p = calloc(1, sizeof(*p)); - if (p == NULL) - fatal(__func__); + if (pte->adjoutlen + 1 > pte->adjoutavail) + adjout_prefix_resize(pte); + + /* keep array sorted by path_id_tx */ + for (i = 0; i < pte->adjoutlen; i++) { + if (pte->adjout[i].path_id_tx > path_id_tx) + break; + } + + p = &pte->adjout[i]; + /* shift reminder by one slot */ + for (i = pte->adjoutlen; &pte->adjout[i] > p; i--) + pte->adjout[i] = pte->adjout[i - 1]; + + /* initialize new element */ + p->attrs = NULL; + p->path_id_tx = path_id_tx; + bitmap_init(&p->peermap); + + pte->adjoutlen++; rdemem.adjout_prefix_cnt++; return p; } -/* free a unlinked entry */ +/* remove an entry from the pte adjout array */ static void -adjout_prefix_free(struct adjout_prefix *p) +adjout_prefix_free(struct pt_entry *pte, struct adjout_prefix *p) { + uint32_t i, idx; + + bitmap_reset(&p->peermap); + + idx = adjout_prefix_index(pte, p); + for (i = idx + 1; i < pte->adjoutlen; i++) + pte->adjout[i - 1] = pte->adjout[i]; + + p = &pte->adjout[pte->adjoutlen - 1]; + memset(p, 0, sizeof(*p)); + pte->adjoutlen--; + + /* TODO shrink array if X% empty */ + rdemem.adjout_prefix_cnt--; - free(p); } void Index: rde_peer.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v diff -u -p -r1.63 rde_peer.c --- rde_peer.c 16 Dec 2025 16:07:31 -0000 1.63 +++ rde_peer.c 17 Dec 2025 14:25:24 -0000 @@ -92,11 +92,10 @@ peer_shutdown(void) RB_FOREACH_SAFE(peer, peer_tree, &peertable, np) peer_delete(peer); - while (!RB_EMPTY(&zombietable)) - peer_reaper(NULL); - if (!RB_EMPTY(&peertable)) log_warnx("%s: free non-free table", __func__); + + /* XXX wait until all peer got reaped */ } /* @@ -435,6 +434,29 @@ peer_down(struct rde_peer *peer) peer->stats.prefix_cnt = 0; } +/* + * RIB walker callback for peer_delete / the reaper. + */ +static void +peer_reaper_upcall(struct rde_peer *peer, struct pt_entry *pte, + struct adjout_prefix *p, void *ptr) +{ + adjout_prefix_withdraw(peer, pte, p); +} + +/* + * Called after the adj-rib-out has been cleared, time to kill the zombie. + */ +static void +peer_reaper_done(void *ptr, uint8_t aid) +{ + struct rde_peer *peer = ptr; + + adjout_prefix_reaper(peer); + ibufq_free(peer->ibufq); + free(peer); +} + void peer_delete(struct rde_peer *peer) { @@ -445,13 +467,11 @@ peer_delete(struct rde_peer *peer) adjout_peer_free(peer); RB_REMOVE(peer_tree, &peertable, peer); - while (RB_INSERT(peer_tree, &zombietable, peer) != NULL) { - log_warnx("zombie peer conflict"); - peer->conf.id = arc4random(); - } /* start reaping the zombie */ - peer_reaper(peer); + if (adjout_prefix_dump_new(peer, AID_UNSPEC, RDE_RUNNER_ROUNDS, peer, + peer_reaper_upcall, peer_reaper_done, NULL) == -1) + fatal("%s: adjout_prefix_dump_new", __func__); } /* @@ -550,8 +570,8 @@ peer_blast(struct rde_peer *peer, uint8_ rde_peer_send_rrefresh(peer, aid, ROUTE_REFRESH_BEGIN_RR); /* force out all updates from the Adj-RIB-Out */ - if (adjout_prefix_dump_new(peer, aid, 0, peer, peer_blast_upcall, - peer_blast_done, NULL) == -1) + if (adjout_prefix_dump_new(peer, aid, RDE_RUNNER_ROUNDS, peer, + peer_blast_upcall, peer_blast_done, NULL) == -1) fatal("%s: adjout_prefix_dump_new", __func__); } @@ -622,22 +642,6 @@ peer_begin_rrefresh(struct rde_peer *pee struct timespec ts = { .tv_nsec = 1000 * 1000 }; nanosleep(&ts, NULL); } -} - -void -peer_reaper(struct rde_peer *peer) -{ - if (peer == NULL) - peer = RB_ROOT(&zombietable); - if (peer == NULL) - return; - - if (!adjout_prefix_reaper(peer)) - return; - - ibufq_free(peer->ibufq); - RB_REMOVE(peer_tree, &zombietable, peer); - free(peer); } /* Index: rde_prefix.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_prefix.c,v diff -u -p -r1.59 rde_prefix.c --- rde_prefix.c 16 Dec 2025 12:11:16 -0000 1.59 +++ rde_prefix.c 17 Dec 2025 14:25:24 -0000 @@ -51,6 +51,9 @@ static void pt_free(struct pt_entry *) struct pt_entry4 { RB_ENTRY(pt_entry) pt_e; + struct adjout_prefix *adjout; + uint32_t adjoutlen; + uint32_t adjoutavail; uint8_t aid; uint8_t prefixlen; uint16_t len; @@ -60,6 +63,9 @@ struct pt_entry4 { struct pt_entry6 { RB_ENTRY(pt_entry) pt_e; + struct adjout_prefix *adjout; + uint32_t adjoutlen; + uint32_t adjoutavail; uint8_t aid; uint8_t prefixlen; uint16_t len; @@ -69,6 +75,9 @@ struct pt_entry6 { struct pt_entry_vpn4 { RB_ENTRY(pt_entry) pt_e; + struct adjout_prefix *adjout; + uint32_t adjoutlen; + uint32_t adjoutavail; uint8_t aid; uint8_t prefixlen; uint16_t len; @@ -83,6 +92,9 @@ struct pt_entry_vpn4 { struct pt_entry_vpn6 { RB_ENTRY(pt_entry) pt_e; + struct adjout_prefix *adjout; + uint32_t adjoutlen; + uint32_t adjoutavail; uint8_t aid; uint8_t prefixlen; uint16_t len; @@ -97,6 +109,9 @@ struct pt_entry_vpn6 { struct pt_entry_evpn { RB_ENTRY(pt_entry) pt_e; + struct adjout_prefix *adjout; + uint32_t adjoutlen; + uint32_t adjoutavail; uint8_t aid; uint8_t prefixlen; uint16_t len; @@ -117,12 +132,15 @@ struct pt_entry_evpn { struct pt_entry_flow { RB_ENTRY(pt_entry) pt_e; + struct adjout_prefix *adjout; + uint32_t adjoutlen; + uint32_t adjoutavail; uint8_t aid; uint8_t prefixlen; /* unused ??? */ uint16_t len; uint32_t refcnt; uint64_t rd; - uint8_t flow[1]; /* NLRI */ + uint8_t flow[0]; /* NLRI */ }; #define PT_FLOW_SIZE (offsetof(struct pt_entry_flow, flow)) Index: rde_rib.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v diff -u -p -r1.284 rde_rib.c --- rde_rib.c 2 Dec 2025 13:03:35 -0000 1.284 +++ rde_rib.c 17 Dec 2025 14:25:24 -0000 @@ -465,7 +465,7 @@ rib_dump_free(struct rib_context *ctx) ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid); if (ctx->ctx_re) rib_dump_cleanup(ctx); - if (ctx->ctx_p) + if (ctx->ctx_pt) adjout_prefix_dump_cleanup(ctx); LIST_REMOVE(ctx, entry); free(ctx); Index: rde_update.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v diff -u -p -r1.191 rde_update.c --- rde_update.c 16 Dec 2025 12:16:03 -0000 1.191 +++ rde_update.c 17 Dec 2025 14:25:24 -0000 @@ -164,6 +164,7 @@ up_process_prefix(struct rde_peer *peer, struct filterstate state; struct bgpd_addr addr; int excluded = 0; + uint32_t path_id_tx = 0; /* * up_test_update() needs to run before the output filters @@ -194,11 +195,13 @@ up_process_prefix(struct rde_peer *peer, } /* from here on we know this is an update */ - if (p == (void *)-1) + if (p == (void *)-1) { + path_id_tx = new->path_id_tx; p = adjout_prefix_get(peer, new->path_id_tx, new->pt); + } up_prep_adjout(peer, &state, new->pt->aid); - adjout_prefix_update(p, peer, &state, new->pt, new->path_id_tx); + adjout_prefix_update(p, peer, &state, new->pt, path_id_tx); rde_filterstate_clean(&state); /* max prefix checker outbound */ Index: util.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/util.c,v diff -u -p -r1.97 util.c --- util.c 16 Dec 2025 15:38:55 -0000 1.97 +++ util.c 17 Dec 2025 14:25:24 -0000 @@ -1339,3 +1339,10 @@ bin_of_communities(unsigned int count) /* 8, 16, 24, ... 56, 64, 80, 96, ... */ return bin_of(count, 5, 2); } + +unsigned int +bin_of_adjout_prefixes(unsigned int count) +{ + /* 1, 2, 3, 4, 6, 8, 12, 16, 24, ... */ + return bin_of(count, 1, 1); +}