Index | Thread | Search

From:
Claudio Jeker <cjeker@diehard.n-r-g.com>
Subject:
bgpd: rewrite adj-out-rib code
To:
tech@openbsd.org
Date:
Wed, 17 Dec 2025 15:58:46 +0100

Download raw body.

Thread
  • Claudio Jeker:

    bgpd: rewrite adj-out-rib code

Fully rewrite the adj-rib-out code to not be per peer based but instead
global with a peer bitmap to know which peer holds which prefix version.

So a pt_entry now includes an array of struct adjout_prefix elements
each entry is for a different path (different set of attributes) and
includes a bitmap that tracks which peers include the prefix.

This alters most of the adjout_prefix functions in some way or another.

An optimisation on top of this is that the path_id_tx is forced to 0 for
peers that have no add-path send enabled. This way the lookup for this
common case is less deep.

The peer_reaper is now replaced with a simple adjout_prefix_dump call.

This is enough for a first step.

In general this reduces memory consumption by more than 50% especially if
the outbound filters are producing the same path for many peers. My IXP
test setup dropped from over 20G to below 5GB memory usage.
-- 
:wq Claudio

Index: bgpd.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
diff -u -p -r1.527 bgpd.h
--- bgpd.h	16 Dec 2025 15:38:55 -0000	1.527
+++ bgpd.h	17 Dec 2025 14:25:24 -0000
@@ -1396,6 +1396,7 @@ struct rde_memstats {
 	long long	path_refs;
 	long long	prefix_cnt;
 	long long	adjout_prefix_cnt;
+	long long	adjout_prefix_size;
 	long long	pend_prefix_cnt;
 	long long	pend_attr_cnt;
 	long long	rib_cnt;
@@ -1676,6 +1677,7 @@ const char	*get_baudrate(unsigned long l
 
 unsigned int	 bin_of_attrs(unsigned int);
 unsigned int	 bin_of_communities(unsigned int);
+unsigned int	 bin_of_adjout_prefixes(unsigned int);
 
 /* flowspec.c */
 int	flowspec_valid(const uint8_t *, int, int);
Index: rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
diff -u -p -r1.677 rde.c
--- rde.c	16 Dec 2025 12:16:03 -0000	1.677
+++ rde.c	17 Dec 2025 14:25:24 -0000
@@ -326,7 +326,6 @@ rde_main(int debug, int verbose)
 		    monotime_to_usec(monotime_sub(io_end, loop_start));
 
 		peer_foreach(rde_dispatch_imsg_peer, NULL);
-		peer_reaper(NULL);
 
 		peer_end = getmonotime();
 		rdemem.rde_event_peer_usec +=
Index: rde.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
diff -u -p -r1.335 rde.h
--- rde.h	16 Dec 2025 12:16:03 -0000	1.335
+++ rde.h	17 Dec 2025 14:25:24 -0000
@@ -72,7 +72,6 @@ struct rib {
  * Currently I assume that we can do that with the neighbor_ip...
  */
 RB_HEAD(peer_tree, rde_peer);
-RB_HEAD(prefix_index, adjout_prefix);
 
 CH_HEAD(pend_prefix_hash, pend_prefix);
 TAILQ_HEAD(pend_prefix_queue, pend_prefix);
@@ -88,7 +87,6 @@ struct rde_peer {
 	struct bgpd_addr		 local_v6_addr;
 	struct capabilities		 capa;
 	struct addpath_eval		 eval;
-	struct prefix_index		 adj_rib_out;
 	struct pend_attr_queue		 updates[AID_MAX];
 	struct pend_prefix_queue	 withdraws[AID_MAX];
 	struct pend_attr_hash		 pend_attrs;
@@ -96,6 +94,7 @@ struct rde_peer {
 	struct filter_head		*out_rules;
 	struct ibufqueue		*ibufq;
 	monotime_t			 staletime[AID_MAX];
+	uint32_t			 adjout_bid;
 	uint32_t			 remote_bgpid;
 	uint32_t			 path_id_tx;
 	unsigned int			 local_if_scope;
@@ -263,14 +262,19 @@ struct nexthop {
 #define NEXTHOP_CONNECTED	0x01
 };
 
+struct adjout_prefix;
+
 /* generic entry without address specific part */
 struct pt_entry {
 	RB_ENTRY(pt_entry)		 pt_e;
+	struct adjout_prefix		*adjout;
+	uint32_t			 adjoutlen;
+	uint32_t			 adjoutavail;
 	uint8_t				 aid;
 	uint8_t				 prefixlen;
 	uint16_t			 len;
 	uint32_t			 refcnt;
-	uint8_t				 data[4]; /* data depending on aid */
+	uint8_t				 data[0]; /* data depending on aid */
 };
 
 struct prefix {
@@ -318,13 +322,10 @@ struct adjout_attr {
 };
 
 struct adjout_prefix {
-	RB_ENTRY(adjout_prefix)		 index;
-	struct pt_entry			*pt;
-	struct adjout_attr		*attrs;
 	uint32_t			 path_id_tx;
-	uint8_t			 	 flags;
+	struct adjout_attr		*attrs;
+	struct bitmap			 peermap;
 };
-#define	PREFIX_ADJOUT_FLAG_LOCKED	0x01	/* locked by rib walker */
 
 struct pend_attr {
 	TAILQ_ENTRY(pend_attr)		 entry;
@@ -357,7 +358,7 @@ enum eval_mode {
 struct rib_context {
 	LIST_ENTRY(rib_context)		 entry;
 	struct rib_entry		*ctx_re;
-	struct adjout_prefix		*ctx_p;
+	struct pt_entry			*ctx_pt;
 	uint32_t			 ctx_id;
 	void		(*ctx_rib_call)(struct rib_entry *, void *);
 	void		(*ctx_prefix_call)(struct rde_peer *,
@@ -423,7 +424,6 @@ void		 peer_blast(struct rde_peer *, uin
 void		 peer_dump(struct rde_peer *, uint8_t);
 void		 peer_begin_rrefresh(struct rde_peer *, uint8_t);
 int		 peer_work_pending(void);
-void		 peer_reaper(struct rde_peer *);
 
 void		 peer_imsg_push(struct rde_peer *, struct imsg *);
 int		 peer_imsg_pop(struct rde_peer *, struct imsg *);
@@ -748,9 +748,7 @@ void		 adjout_prefix_update(struct adjou
 		    struct filterstate *, struct pt_entry *, uint32_t);
 void		 adjout_prefix_withdraw(struct rde_peer *, struct pt_entry *,
 		    struct adjout_prefix *);
-void		 adjout_prefix_destroy(struct rde_peer *,
-		    struct adjout_prefix *);
-int		 adjout_prefix_reaper(struct rde_peer *);
+void		 adjout_prefix_reaper(struct rde_peer *);
 void		 adjout_prefix_dump_cleanup(struct rib_context *);
 void		 adjout_prefix_dump_r(struct rib_context *);
 int		 adjout_prefix_dump_new(struct rde_peer *, uint8_t,
Index: rde_adjout.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_adjout.c,v
diff -u -p -r1.14 rde_adjout.c
--- rde_adjout.c	16 Dec 2025 12:16:03 -0000	1.14
+++ rde_adjout.c	17 Dec 2025 14:25:24 -0000
@@ -30,6 +30,8 @@
 #include "log.h"
 #include "chash.h"
 
+struct bitmap adjout_id_map;
+
 static struct adjout_attr	*adjout_attr_ref(struct adjout_attr *);
 static void			 adjout_attr_unref(struct adjout_attr *);
 
@@ -385,77 +387,73 @@ adjout_attr_get(struct filterstate *stat
 
 CH_GENERATE(adjout_attr_tree, adjout_attr, adjout_attr_eq, adjout_attr_hash);
 
-static inline struct adjout_prefix *
-adjout_prefix_lock(struct adjout_prefix *p)
-{
-	if (p->flags & PREFIX_ADJOUT_FLAG_LOCKED)
-		fatalx("%s: locking locked prefix", __func__);
-	p->flags |= PREFIX_ADJOUT_FLAG_LOCKED;
-	return p;
-}
-
-static inline struct adjout_prefix *
-adjout_prefix_unlock(struct adjout_prefix *p)
-{
-	if ((p->flags & PREFIX_ADJOUT_FLAG_LOCKED) == 0)
-		fatalx("%s: unlocking unlocked prefix", __func__);
-	p->flags &= ~PREFIX_ADJOUT_FLAG_LOCKED;
-	return p;
-}
-
-static inline int
-prefix_is_locked(struct adjout_prefix *p)
-{
-	return (p->flags & PREFIX_ADJOUT_FLAG_LOCKED) != 0;
-}
-
-static inline int
-prefix_is_dead(struct adjout_prefix *p)
-{
-	return p->attrs == NULL;
-}
-
-static void	 adjout_prefix_link(struct adjout_prefix *, struct rde_peer *,
-		    struct adjout_attr *, struct pt_entry *, uint32_t);
+static void	 adjout_prefix_link(struct pt_entry *, struct rde_peer *,
+		    struct adjout_attr *, uint32_t);
 static void	 adjout_prefix_unlink(struct adjout_prefix *,
-		    struct rde_peer *);
+		    struct pt_entry *, struct rde_peer *);
 
-static struct adjout_prefix	*adjout_prefix_alloc(void);
-static void			 adjout_prefix_free(struct adjout_prefix *);
+static struct adjout_prefix	*adjout_prefix_alloc(struct pt_entry *,
+				    uint32_t);
+static void			 adjout_prefix_free(struct pt_entry *,
+				    struct adjout_prefix *);
 
-/* RB tree comparison function */
-static inline int
-prefix_index_cmp(struct adjout_prefix *a, struct adjout_prefix *b)
+static inline uint32_t
+adjout_prefix_index(struct pt_entry *pte, struct adjout_prefix *p)
 {
-	int r;
-	r = pt_prefix_cmp(a->pt, b->pt);
-	if (r != 0)
-		return r;
+	ptrdiff_t idx = p - pte->adjout;
 
-	if (a->path_id_tx > b->path_id_tx)
-		return 1;
-	if (a->path_id_tx < b->path_id_tx)
-		return -1;
-	return 0;
-}
+	if (idx < 0 || idx > pte->adjoutlen)
+		fatalx("corrupt pte adjout list");
 
-RB_GENERATE_STATIC(prefix_index, adjout_prefix, index, prefix_index_cmp)
+	return idx;
+}
 
 /*
- * Search for specified prefix in the peer prefix_index.
- * Returns NULL if not found.
+ * Search for specified prefix in the pte adjout array that is for the
+ * specified path_id_tx and peer. Returns NULL if not found.
  */
 struct adjout_prefix *
 adjout_prefix_get(struct rde_peer *peer, uint32_t path_id_tx,
     struct pt_entry *pte)
 {
-	struct adjout_prefix xp;
+	struct adjout_prefix *p;
+	uint32_t i;
 
-	memset(&xp, 0, sizeof(xp));
-	xp.pt = pte;
-	xp.path_id_tx = path_id_tx;
+	for (i = 0; i < pte->adjoutlen; i++) {
+		p = &pte->adjout[i];
+		if (p->path_id_tx != path_id_tx)
+			continue;
+		if (bitmap_test(&p->peermap, peer->adjout_bid))
+			return p;
+		if (p->path_id_tx > path_id_tx)
+			break;
+	}
 
-	return RB_FIND(prefix_index, &peer->adj_rib_out, &xp);
+	return NULL;
+}
+
+/*
+ * Search for specified prefix in the pte adjout array that is for the
+ * specified path_id_tx and attrs. Returns NULL if not found.
+ */
+static struct adjout_prefix *
+adjout_prefix_with_attrs(struct pt_entry *pte, uint32_t path_id_tx,
+    struct adjout_attr *attrs)
+{
+	struct adjout_prefix *p;
+	uint32_t i;
+
+	for (i = 0; i < pte->adjoutlen; i++) {
+		p = &pte->adjout[i];
+		if (p->path_id_tx != path_id_tx)
+			continue;
+		if (p->attrs == attrs)
+			return p;
+		if (p->path_id_tx > path_id_tx)
+			break;
+	}
+
+	return NULL;
 }
 
 /*
@@ -465,15 +463,23 @@ adjout_prefix_get(struct rde_peer *peer,
 struct adjout_prefix *
 adjout_prefix_first(struct rde_peer *peer, struct pt_entry *pte)
 {
-	struct adjout_prefix xp, *np;
+	struct adjout_prefix *p;
+	uint32_t i;
+	int has_add_path = 0;
 
-	memset(&xp, 0, sizeof(xp));
-	xp.pt = pte;
+	if (peer_has_add_path(peer, pte->aid, CAPA_AP_SEND))
+		has_add_path = 1;
 
-	np = RB_NFIND(prefix_index, &peer->adj_rib_out, &xp);
-	if (np == NULL || pt_prefix_cmp(np->pt, xp.pt) != 0)
-		return NULL;
-	return np;
+	for (i = 0; i < pte->adjoutlen; i++) {
+		p = &pte->adjout[i];
+		if (bitmap_test(&p->peermap, peer->adjout_bid))
+			return p;
+		if (!has_add_path && p->path_id_tx != 0) {
+			return NULL;
+		}
+	}
+
+	return NULL;
 }
 
 /*
@@ -481,14 +487,25 @@ adjout_prefix_first(struct rde_peer *pee
  */
 struct adjout_prefix *
 adjout_prefix_next(struct rde_peer *peer, struct pt_entry *pte,
-    struct adjout_prefix *p)
+    struct adjout_prefix *last)
 {
-	struct adjout_prefix *np;
+	struct adjout_prefix *p;
+	uint32_t i;
 
-	np = RB_NEXT(prefix_index, &peer->adj_rib_out, p);
-	if (np == NULL || np->pt != p->pt)
+	if (!peer_has_add_path(peer, pte->aid, CAPA_AP_SEND))
 		return NULL;
-	return np;
+
+	i = adjout_prefix_index(pte, last);
+	for (; i < pte->adjoutlen; i++)
+		if (pte->adjout[i].path_id_tx != last->path_id_tx)
+			break;
+	for (; i < pte->adjoutlen; i++) {
+		p = &pte->adjout[i];
+		if (bitmap_test(&p->peermap, peer->adjout_bid))
+			return p;
+	}
+
+	return NULL;
 }
 
 /*
@@ -500,16 +517,11 @@ adjout_prefix_update(struct adjout_prefi
 {
 	struct adjout_attr *attrs;
 
-	if (p == NULL) {
-		p = adjout_prefix_alloc();
-		/* initially mark DEAD so code below is skipped */
-
-		p->pt = pt_ref(pte);
-		p->path_id_tx = path_id_tx;
+	if (p != NULL) {
+		if (p->path_id_tx != path_id_tx ||
+		    bitmap_test(&p->peermap, peer->adjout_bid) == 0)
+			fatalx("%s: king bula is unhappy", __func__);
 
-		if (RB_INSERT(prefix_index, &peer->adj_rib_out, p) != NULL)
-			fatalx("%s: RB index invariant violated", __func__);
-	} else {
 		/*
 		 * XXX for now treat a different path_id_tx like different
 		 * attributes and force out an update. It is unclear how
@@ -527,26 +539,16 @@ adjout_prefix_update(struct adjout_prefi
 		}
 
 		/* unlink prefix so it can be relinked below */
-		adjout_prefix_unlink(p, peer);
+		adjout_prefix_unlink(p, pte, peer);
 		peer->stats.prefix_out_cnt--;
 	}
 
-	/* update path_id_tx now that the prefix is unlinked */
-	if (p->path_id_tx != path_id_tx) {
-		/* path_id_tx is part of the index so remove and re-insert p */
-		RB_REMOVE(prefix_index, &peer->adj_rib_out, p);
-		p->path_id_tx = path_id_tx;
-		if (RB_INSERT(prefix_index, &peer->adj_rib_out, p) != NULL)
-			fatalx("%s: RB index invariant violated", __func__);
-	}
-
 	attrs = adjout_attr_get(state);
-
-	adjout_prefix_link(p, peer, attrs, p->pt, p->path_id_tx);
+	adjout_prefix_link(pte, peer, attrs, path_id_tx);
 	peer->stats.prefix_out_cnt++;
 
 	if (peer_is_up(peer))
-		pend_prefix_add(peer, p->attrs, p->pt, p->path_id_tx);
+		pend_prefix_add(peer, attrs, pte, path_id_tx);
 }
 
 /*
@@ -557,115 +559,87 @@ void
 adjout_prefix_withdraw(struct rde_peer *peer, struct pt_entry *pte,
     struct adjout_prefix *p)
 {
+	if (bitmap_test(&p->peermap, peer->adjout_bid) == 0)
+		fatalx("%s: king bula is unhappy", __func__);
+
 	if (peer_is_up(peer))
 		pend_prefix_add(peer, NULL, pte, p->path_id_tx);
 
-	adjout_prefix_destroy(peer, p);
+	adjout_prefix_unlink(p, pte, peer);
+	peer->stats.prefix_out_cnt--;
 }
 
 void
-adjout_prefix_destroy(struct rde_peer *peer, struct adjout_prefix *p)
-{
-	/* unlink prefix if it was linked (not dead) */
-	if (!prefix_is_dead(p)) {
-		adjout_prefix_unlink(p, peer);
-		peer->stats.prefix_out_cnt--;
-	}
-
-	if (!prefix_is_locked(p)) {
-		RB_REMOVE(prefix_index, &peer->adj_rib_out, p);
-		/* remove the last prefix reference before free */
-		pt_unref(p->pt);
-		adjout_prefix_free(p);
-	}
-}
-
-int
 adjout_prefix_reaper(struct rde_peer *peer)
 {
-	struct adjout_prefix *p, *np;
-	int count = RDE_REAPER_ROUNDS;
-
-	RB_FOREACH_SAFE(p, prefix_index, &peer->adj_rib_out, np) {
-		adjout_prefix_destroy(peer, p);
-		if (count-- <= 0)
-			return 0;
-	}
-	return 1;
+	bitmap_id_put(&adjout_id_map, peer->adjout_bid);
 }
 
-static struct adjout_prefix *
+static struct pt_entry *
 prefix_restart(struct rib_context *ctx)
 {
-	struct adjout_prefix *p = NULL;
+	struct pt_entry *pte = NULL;
 	struct rde_peer *peer;
 
 	if ((peer = peer_get(ctx->ctx_id)) == NULL)
 		return NULL;
 
-	if (ctx->ctx_p)
-		p = adjout_prefix_unlock(ctx->ctx_p);
-
-	while (p && prefix_is_dead(p)) {
-		struct adjout_prefix *next;
-
-		next = RB_NEXT(prefix_index, unused, p);
-		adjout_prefix_destroy(peer, p);
-		p = next;
+	/* be careful when this is the last reference to pte */
+	if (ctx->ctx_pt != NULL) {
+		pte = ctx->ctx_pt;
+		if (pte->refcnt == 1)
+			pte = pt_next(pte);
+		pt_unref(ctx->ctx_pt);
 	}
-	ctx->ctx_p = NULL;
-	return p;
+	ctx->ctx_pt = NULL;
+	return pte;
 }
 
 void
 adjout_prefix_dump_cleanup(struct rib_context *ctx)
 {
-	struct adjout_prefix *p = ctx->ctx_p;
-	struct rde_peer *peer;
-
-	if ((peer = peer_get(ctx->ctx_id)) == NULL)
-		return;
-	if (prefix_is_dead(adjout_prefix_unlock(p)))
-		adjout_prefix_destroy(peer, p);
+	if (ctx->ctx_pt != NULL)
+		pt_unref(ctx->ctx_pt);
 }
 
 void
 adjout_prefix_dump_r(struct rib_context *ctx)
 {
-	struct adjout_prefix *p, *next;
+	struct pt_entry *pte, *next;
+	struct adjout_prefix *p;
 	struct rde_peer *peer;
 	unsigned int i;
 
 	if ((peer = peer_get(ctx->ctx_id)) == NULL)
 		goto done;
 
-	if (ctx->ctx_p == NULL && ctx->ctx_subtree.aid == AID_UNSPEC)
-		p = RB_MIN(prefix_index, &peer->adj_rib_out);
+	if (ctx->ctx_pt == NULL && ctx->ctx_subtree.aid == AID_UNSPEC)
+		pte = pt_first(ctx->ctx_aid);
 	else
-		p = prefix_restart(ctx);
+		pte = prefix_restart(ctx);
 
-	for (i = 0; p != NULL; p = next) {
-		next = RB_NEXT(prefix_index, unused, p);
-		if (prefix_is_dead(p))
-			continue;
+	for (i = 0; pte != NULL; pte = next) {
+		next = pt_next(pte);
 		if (ctx->ctx_aid != AID_UNSPEC &&
-		    ctx->ctx_aid != p->pt->aid)
+		    ctx->ctx_aid != pte->aid)
 			continue;
 		if (ctx->ctx_subtree.aid != AID_UNSPEC) {
 			struct bgpd_addr addr;
-			pt_getaddr(p->pt, &addr);
+			pt_getaddr(pte, &addr);
 			if (prefix_compare(&ctx->ctx_subtree, &addr,
 			    ctx->ctx_subtreelen) != 0)
 				/* left subtree, walk is done */
 				break;
 		}
-		if (ctx->ctx_count && i++ >= ctx->ctx_count &&
-		    !prefix_is_locked(p)) {
+		if (ctx->ctx_count && i++ >= ctx->ctx_count) {
 			/* store and lock last element */
-			ctx->ctx_p = adjout_prefix_lock(p);
+			ctx->ctx_pt = pt_ref(pte);
 			return;
 		}
-		ctx->ctx_prefix_call(peer, p->pt, p, ctx->ctx_arg);
+		p = adjout_prefix_first(peer, pte);
+		if (p == NULL)
+			continue;
+		ctx->ctx_prefix_call(peer, pte, p, ctx->ctx_arg);
 	}
 
 done:
@@ -713,7 +687,6 @@ adjout_prefix_dump_subtree(struct rde_pe
     int (*throttle)(void *))
 {
 	struct rib_context *ctx;
-	struct adjout_prefix xp;
 
 	if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
 		return -1;
@@ -728,11 +701,9 @@ adjout_prefix_dump_subtree(struct rde_pe
 	ctx->ctx_subtreelen = subtreelen;
 
 	/* lookup start of subtree */
-	memset(&xp, 0, sizeof(xp));
-	xp.pt = pt_fill(subtree, subtreelen);
-	ctx->ctx_p = RB_NFIND(prefix_index, &peer->adj_rib_out, &xp);
-	if (ctx->ctx_p)
-		adjout_prefix_lock(ctx->ctx_p);
+	ctx->ctx_pt = pt_get_next(subtree, subtreelen);
+	if (ctx->ctx_pt)
+		pt_ref(ctx->ctx_pt);	/* store and lock first element */
 
 	rib_dump_insert(ctx);
 
@@ -747,46 +718,112 @@ adjout_prefix_dump_subtree(struct rde_pe
  * Link a prefix into the different parent objects.
  */
 static void
-adjout_prefix_link(struct adjout_prefix *p, struct rde_peer *peer,
-    struct adjout_attr *attrs, struct pt_entry *pt, uint32_t path_id_tx)
+adjout_prefix_link(struct pt_entry *pte, struct rde_peer *peer,
+    struct adjout_attr *attrs, uint32_t path_id_tx)
 {
-	p->attrs = adjout_attr_ref(attrs);
-	p->pt = pt_ref(pt);
-	p->path_id_tx = path_id_tx;
+	struct adjout_prefix *p;
+
+	/* assign ids on first use to keep the bitmap as small as possible */
+	if (peer->adjout_bid == 0)
+		if (bitmap_id_get(&adjout_id_map, &peer->adjout_bid) == -1)
+			fatal(__func__);
+
+	if ((p = adjout_prefix_with_attrs(pte, path_id_tx, attrs)) == NULL) {
+		p = adjout_prefix_alloc(pte, path_id_tx);
+		p->attrs = adjout_attr_ref(attrs);
+	}
+
+	if (bitmap_set(&p->peermap, peer->adjout_bid) == -1)
+		fatal(__func__);
 }
 
 /*
  * Unlink a prefix from the different parent objects.
  */
 static void
-adjout_prefix_unlink(struct adjout_prefix *p, struct rde_peer *peer)
+adjout_prefix_unlink(struct adjout_prefix *p, struct pt_entry *pte,
+    struct rde_peer *peer)
 {
-	/* destroy all references to other objects */
-	adjout_attr_unref(p->attrs);
-	p->attrs = NULL;
-	pt_unref(p->pt);
-	/* must keep p->pt valid since there is an extra ref */
+	bitmap_clear(&p->peermap, peer->adjout_bid);
+	if (bitmap_empty(&p->peermap)) {
+		/* destroy all references to other objects */
+		adjout_attr_unref(p->attrs);
+		p->attrs = NULL;
+
+		adjout_prefix_free(pte, p);
+	}
+}
+
+static void
+adjout_prefix_resize(struct pt_entry *pte)
+{
+	struct adjout_prefix *new;
+	uint32_t newlen, avail;
+
+	avail = pte->adjoutavail;
+	newlen = bin_of_adjout_prefixes(avail + 1);
+	if ((new = reallocarray(pte->adjout, newlen, sizeof(*new))) == NULL)
+		fatal(__func__);
+	rdemem.adjout_prefix_size += sizeof(*new) * (newlen - avail);
+
+	memset(&new[avail], 0, sizeof(*new) * (newlen - avail));
+	pte->adjout = new;
+	pte->adjoutavail = newlen;
 }
 
-/* alloc and zero new entry. May not fail. */
+/*
+ * Insert a new entry into the pte adjout array, extending the array if needed.
+ * May not fail.
+ */
 static struct adjout_prefix *
-adjout_prefix_alloc(void)
+adjout_prefix_alloc(struct pt_entry *pte, uint32_t path_id_tx)
 {
 	struct adjout_prefix *p;
+	uint32_t i;
 
-	p = calloc(1, sizeof(*p));
-	if (p == NULL)
-		fatal(__func__);
+	if (pte->adjoutlen + 1 > pte->adjoutavail)
+		adjout_prefix_resize(pte);
+
+	/* keep array sorted by path_id_tx */
+	for (i = 0; i < pte->adjoutlen; i++) {
+		if (pte->adjout[i].path_id_tx > path_id_tx)
+			break;
+	}
+
+	p = &pte->adjout[i];
+	/* shift reminder by one slot */
+	for (i = pte->adjoutlen; &pte->adjout[i] > p; i--)
+		pte->adjout[i] = pte->adjout[i - 1];
+
+	/* initialize new element */
+	p->attrs = NULL;
+	p->path_id_tx = path_id_tx;
+	bitmap_init(&p->peermap);
+
+	pte->adjoutlen++;
 	rdemem.adjout_prefix_cnt++;
 	return p;
 }
 
-/* free a unlinked entry */
+/* remove an entry from the pte adjout array */
 static void
-adjout_prefix_free(struct adjout_prefix *p)
+adjout_prefix_free(struct pt_entry *pte, struct adjout_prefix *p)
 {
+	uint32_t i, idx;
+
+	bitmap_reset(&p->peermap);
+
+	idx = adjout_prefix_index(pte, p);
+	for (i = idx + 1; i < pte->adjoutlen; i++)
+		pte->adjout[i - 1] = pte->adjout[i];
+
+	p = &pte->adjout[pte->adjoutlen - 1];
+	memset(p, 0, sizeof(*p));
+	pte->adjoutlen--;
+
+	/* TODO shrink array if X% empty */
+
 	rdemem.adjout_prefix_cnt--;
-	free(p);
 }
 
 void
Index: rde_peer.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v
diff -u -p -r1.63 rde_peer.c
--- rde_peer.c	16 Dec 2025 16:07:31 -0000	1.63
+++ rde_peer.c	17 Dec 2025 14:25:24 -0000
@@ -92,11 +92,10 @@ peer_shutdown(void)
 	RB_FOREACH_SAFE(peer, peer_tree, &peertable, np)
 		peer_delete(peer);
 
-	while (!RB_EMPTY(&zombietable))
-		peer_reaper(NULL);
-
 	if (!RB_EMPTY(&peertable))
 		log_warnx("%s: free non-free table", __func__);
+
+	/* XXX wait until all peer got reaped */
 }
 
 /*
@@ -435,6 +434,29 @@ peer_down(struct rde_peer *peer)
 	peer->stats.prefix_cnt = 0;
 }
 
+/*
+ * RIB walker callback for peer_delete / the reaper.
+ */
+static void
+peer_reaper_upcall(struct rde_peer *peer, struct pt_entry *pte,
+    struct adjout_prefix *p, void *ptr)
+{
+	adjout_prefix_withdraw(peer, pte, p);
+}
+
+/*
+ * Called after the adj-rib-out has been cleared, time to kill the zombie.
+ */
+static void
+peer_reaper_done(void *ptr, uint8_t aid)
+{
+	struct rde_peer		*peer = ptr;
+
+	adjout_prefix_reaper(peer);
+	ibufq_free(peer->ibufq);
+	free(peer);
+}
+
 void
 peer_delete(struct rde_peer *peer)
 {
@@ -445,13 +467,11 @@ peer_delete(struct rde_peer *peer)
 	adjout_peer_free(peer);
 
 	RB_REMOVE(peer_tree, &peertable, peer);
-	while (RB_INSERT(peer_tree, &zombietable, peer) != NULL) {
-		log_warnx("zombie peer conflict");
-		peer->conf.id = arc4random();
-	}
 
 	/* start reaping the zombie */
-	peer_reaper(peer);
+	if (adjout_prefix_dump_new(peer, AID_UNSPEC, RDE_RUNNER_ROUNDS, peer,
+	    peer_reaper_upcall, peer_reaper_done, NULL) == -1)
+		fatal("%s: adjout_prefix_dump_new", __func__);
 }
 
 /*
@@ -550,8 +570,8 @@ peer_blast(struct rde_peer *peer, uint8_
 		rde_peer_send_rrefresh(peer, aid, ROUTE_REFRESH_BEGIN_RR);
 
 	/* force out all updates from the Adj-RIB-Out */
-	if (adjout_prefix_dump_new(peer, aid, 0, peer, peer_blast_upcall,
-	    peer_blast_done, NULL) == -1)
+	if (adjout_prefix_dump_new(peer, aid, RDE_RUNNER_ROUNDS, peer,
+	    peer_blast_upcall, peer_blast_done, NULL) == -1)
 		fatal("%s: adjout_prefix_dump_new", __func__);
 }
 
@@ -622,22 +642,6 @@ peer_begin_rrefresh(struct rde_peer *pee
 		struct timespec ts = { .tv_nsec = 1000 * 1000 };
 		nanosleep(&ts, NULL);
 	}
-}
-
-void
-peer_reaper(struct rde_peer *peer)
-{
-	if (peer == NULL)
-		peer = RB_ROOT(&zombietable);
-	if (peer == NULL)
-		return;
-
-	if (!adjout_prefix_reaper(peer))
-		return;
-
-	ibufq_free(peer->ibufq);
-	RB_REMOVE(peer_tree, &zombietable, peer);
-	free(peer);
 }
 
 /*
Index: rde_prefix.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_prefix.c,v
diff -u -p -r1.59 rde_prefix.c
--- rde_prefix.c	16 Dec 2025 12:11:16 -0000	1.59
+++ rde_prefix.c	17 Dec 2025 14:25:24 -0000
@@ -51,6 +51,9 @@ static void		 pt_free(struct pt_entry *)
 
 struct pt_entry4 {
 	RB_ENTRY(pt_entry)		pt_e;
+	struct adjout_prefix		*adjout;
+	uint32_t			adjoutlen;
+	uint32_t			adjoutavail;
 	uint8_t				aid;
 	uint8_t				prefixlen;
 	uint16_t			len;
@@ -60,6 +63,9 @@ struct pt_entry4 {
 
 struct pt_entry6 {
 	RB_ENTRY(pt_entry)		pt_e;
+	struct adjout_prefix		*adjout;
+	uint32_t			adjoutlen;
+	uint32_t			adjoutavail;
 	uint8_t				aid;
 	uint8_t				prefixlen;
 	uint16_t			len;
@@ -69,6 +75,9 @@ struct pt_entry6 {
 
 struct pt_entry_vpn4 {
 	RB_ENTRY(pt_entry)		pt_e;
+	struct adjout_prefix		*adjout;
+	uint32_t			adjoutlen;
+	uint32_t			adjoutavail;
 	uint8_t				aid;
 	uint8_t				prefixlen;
 	uint16_t			len;
@@ -83,6 +92,9 @@ struct pt_entry_vpn4 {
 
 struct pt_entry_vpn6 {
 	RB_ENTRY(pt_entry)		pt_e;
+	struct adjout_prefix		*adjout;
+	uint32_t			adjoutlen;
+	uint32_t			adjoutavail;
 	uint8_t				aid;
 	uint8_t				prefixlen;
 	uint16_t			len;
@@ -97,6 +109,9 @@ struct pt_entry_vpn6 {
 
 struct pt_entry_evpn {
 	RB_ENTRY(pt_entry)		pt_e;
+	struct adjout_prefix		*adjout;
+	uint32_t			adjoutlen;
+	uint32_t			adjoutavail;
 	uint8_t				aid;
 	uint8_t				prefixlen;
 	uint16_t			len;
@@ -117,12 +132,15 @@ struct pt_entry_evpn {
 
 struct pt_entry_flow {
 	RB_ENTRY(pt_entry)		pt_e;
+	struct adjout_prefix		*adjout;
+	uint32_t			adjoutlen;
+	uint32_t			adjoutavail;
 	uint8_t				aid;
 	uint8_t				prefixlen;	/* unused ??? */
 	uint16_t			len;
 	uint32_t			refcnt;
 	uint64_t			rd;
-	uint8_t				flow[1];	/* NLRI */
+	uint8_t				flow[0];	/* NLRI */
 };
 
 #define PT_FLOW_SIZE		(offsetof(struct pt_entry_flow, flow))
Index: rde_rib.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
diff -u -p -r1.284 rde_rib.c
--- rde_rib.c	2 Dec 2025 13:03:35 -0000	1.284
+++ rde_rib.c	17 Dec 2025 14:25:24 -0000
@@ -465,7 +465,7 @@ rib_dump_free(struct rib_context *ctx)
 		ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid);
 	if (ctx->ctx_re)
 		rib_dump_cleanup(ctx);
-	if (ctx->ctx_p)
+	if (ctx->ctx_pt)
 		adjout_prefix_dump_cleanup(ctx);
 	LIST_REMOVE(ctx, entry);
 	free(ctx);
Index: rde_update.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
diff -u -p -r1.191 rde_update.c
--- rde_update.c	16 Dec 2025 12:16:03 -0000	1.191
+++ rde_update.c	17 Dec 2025 14:25:24 -0000
@@ -164,6 +164,7 @@ up_process_prefix(struct rde_peer *peer,
 	struct filterstate state;
 	struct bgpd_addr addr;
 	int excluded = 0;
+	uint32_t path_id_tx = 0;
 
 	/*
 	 * up_test_update() needs to run before the output filters
@@ -194,11 +195,13 @@ up_process_prefix(struct rde_peer *peer,
 	}
 
 	/* from here on we know this is an update */
-	if (p == (void *)-1)
+	if (p == (void *)-1) {
+		path_id_tx = new->path_id_tx;
 		p = adjout_prefix_get(peer, new->path_id_tx, new->pt);
+	}
 
 	up_prep_adjout(peer, &state, new->pt->aid);
-	adjout_prefix_update(p, peer, &state, new->pt, new->path_id_tx);
+	adjout_prefix_update(p, peer, &state, new->pt, path_id_tx);
 	rde_filterstate_clean(&state);
 
 	/* max prefix checker outbound */
Index: util.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/util.c,v
diff -u -p -r1.97 util.c
--- util.c	16 Dec 2025 15:38:55 -0000	1.97
+++ util.c	17 Dec 2025 14:25:24 -0000
@@ -1339,3 +1339,10 @@ bin_of_communities(unsigned int count)
 	/* 8, 16, 24, ... 56, 64, 80, 96, ... */
 	return bin_of(count, 5, 2);
 }
+
+unsigned int
+bin_of_adjout_prefixes(unsigned int count)
+{
+	/* 1, 2, 3, 4, 6, 8, 12, 16, 24, ... */
+	return bin_of(count, 1, 1);
+}