Download raw body.
bgpd: implement rib prefix pending queue
Next small step towards optimising add-path send.
This implements the actually pending prefix queue in struct re_entry.
Elements are added to the queue and the queue is flushed once processed.
Nothing is currently using this data. This will follow next.
Right now this uses a simple TAILQ. It is hard to estimate if this
is good enough or a better structure is needed. Only profiling this on
some large setups will give us the proper insights.
Prefixes / paths are added to the queue and replaced on every update.
Once peer_process_updates() runs for a given rib entry the queue is
flushed. In rib_remove() we just check that the queue is actually empty.
This should be implied by the re_is_queued() checked a bit earlier but
lets be sure.
--
:wq Claudio
Index: rde.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
diff -u -p -r1.355 rde.h
--- rde.h 1 Jul 2026 09:53:47 -0000 1.355
+++ rde.h 1 Jul 2026 14:37:08 -0000
@@ -42,14 +42,15 @@ LIST_HEAD(prefix_list, prefix);
TAILQ_HEAD(prefix_queue, prefix);
RB_HEAD(rib_tree, rib_entry);
TAILQ_HEAD(rib_queue, rib_entry);
-LIST_HEAD(rib_pq_head, rib_pq);
+struct pq_entry;
+TAILQ_HEAD(pq_head, pq_entry);
struct rib_entry {
RB_ENTRY(rib_entry) rib_e;
TAILQ_ENTRY(rib_entry) rib_queue;
struct prefix_queue prefix_h;
struct pt_entry *prefix;
- struct rib_pq_head rib_pq_list;
+ struct pq_head pq_head;
uint32_t pq_peer_id;
uint16_t rib_id;
uint8_t lock;
@@ -638,7 +639,9 @@ int rib_dump_subtree(uint16_t, struct
void (*)(void *, uint8_t),
int (*)(void *));
void rib_dump_terminate(void *);
-void rib_dequeue(struct rib_entry *);
+void rib_pq_enqueue(struct rib_entry *, struct rde_peer *,
+ uint32_t, struct prefix *);
+void rib_pq_dequeue(struct rib_entry *);
extern struct rib flowrib;
Index: rde_peer.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v
diff -u -p -r1.77 rde_peer.c
--- rde_peer.c 1 Jul 2026 09:53:47 -0000 1.77
+++ rde_peer.c 1 Jul 2026 14:33:30 -0000
@@ -26,12 +26,6 @@
#include "bgpd.h"
#include "rde.h"
-struct rib_pq {
- LIST_ENTRY(rib_pq) entry;
- struct prefix *new;
- uint32_t old_pathid_tx;
-};
-
struct peer_tree peertable = RB_INITIALIZER(&peertable);
struct peer_tree zombietable = RB_INITIALIZER(&zombietable);
struct rde_peer *peerself;
@@ -306,12 +301,16 @@ peer_generate_update(struct rde_peer *pe
* EVAL_REEVAL is used by config reloads (a full RIB refresh is needed)
* and for single peer RIB dumps but those call peer_generate_update()
* directly.
+ * peer, newpath and old_pathid_tx are ignored if mode is EVAL_REEVAL.
+ * In the other cases either newpath or old_pathid_tx are valid but not
+ * both at the same time.
*/
void
rde_enqueue_updates(struct rib_entry *re, struct rde_peer *peer,
- struct prefix *newpath, uint32_t old_pathid_tx, enum eval_mode mode)
+ struct prefix *newpath, uint32_t old_path_id_tx, enum eval_mode mode)
{
struct rde_peer *p;
+ uint32_t path_id_tx;
switch (mode) {
case EVAL_REEVAL:
@@ -329,16 +328,12 @@ rde_enqueue_updates(struct rib_entry *re
fatalx("bad eval mode in %s", __func__);
}
- /*
- * Enqueue only once, may need some reconsideration if queue
- * length of individual peers becomes excessivly long.
- */
- if (re->pq_mode == EVAL_NONE) {
- re->pq_peer_id = peer->conf.id;
- TAILQ_INSERT_TAIL(&peer->rib_pq_head, re, rib_queue);
- rdemem.rde_rib_entry_count++;
- peer->stats.rib_entry_count++;
- }
+ if (newpath != NULL)
+ path_id_tx = newpath->path_id_tx;
+ else
+ path_id_tx = old_path_id_tx;
+
+ rib_pq_enqueue(re, peer, path_id_tx, newpath);
/* don't downgrade pq_mode from EVAL_DEFAULT to EVAL_ALL */
if (re->pq_mode != EVAL_DEFAULT)
@@ -361,7 +356,7 @@ peer_process_updates(struct rde_peer *pe
RB_FOREACH(p, peer_tree, &peertable)
peer_generate_update(p, re, re->pq_mode, 0);
- rib_dequeue(re);
+ rib_pq_dequeue(re);
}
/*
Index: rde_rib.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
diff -u -p -r1.301 rde_rib.c
--- rde_rib.c 1 Jul 2026 09:53:47 -0000 1.301
+++ rde_rib.c 1 Jul 2026 15:59:24 -0000
@@ -41,11 +41,17 @@ uint16_t rib_size;
struct rib **ribs;
struct rib flowrib = { .id = 1, .tree = RB_INITIALIZER(&flowrib.tree) };
+struct pq_entry {
+ TAILQ_ENTRY(pq_entry) entry;
+ struct prefix *p; /* NULL for withdraws */
+ uint32_t path_id_tx;
+};
+
struct rib_entry *rib_add(struct rib *, struct pt_entry *);
static inline int rib_compare(const struct rib_entry *,
const struct rib_entry *);
-static void rib_remove(struct rib_entry *);
static inline int rib_empty(struct rib_entry *);
+static void rib_remove(struct rib_entry *);
static void rib_dump_abort(uint16_t);
RB_PROTOTYPE(rib_tree, rib_entry, rib_e, rib_compare);
@@ -321,6 +327,7 @@ rib_add(struct rib *rib, struct pt_entry
fatal("rib_add");
TAILQ_INIT(&re->prefix_h);
+ TAILQ_INIT(&re->pq_head);
re->prefix = pt_ref(pte);
re->rib_id = rib->id;
re->pq_mode = EVAL_NONE;
@@ -336,6 +343,86 @@ rib_add(struct rib *rib, struct pt_entry
return (re);
}
+static inline int
+rib_empty(struct rib_entry *re)
+{
+ return TAILQ_EMPTY(&re->prefix_h);
+}
+
+static void
+rib_pq_remove_entry(struct rib_entry *re, uint32_t path_id_tx)
+{
+ struct pq_entry *pq;
+
+ TAILQ_FOREACH(pq, &re->pq_head, entry) {
+ if (pq->path_id_tx == path_id_tx) {
+ TAILQ_REMOVE(&re->pq_head, pq, entry);
+ free(pq);
+ return;
+ }
+ }
+}
+
+static void
+rib_pq_add_entry(struct rib_entry *re, uint32_t path_id_tx, struct prefix *p)
+{
+ struct pq_entry *pq;
+
+ if ((pq = calloc(1, sizeof(*pq))) == NULL)
+ fatal(NULL);
+
+ pq->path_id_tx = path_id_tx;
+ pq->p = p;
+ TAILQ_INSERT_TAIL(&re->pq_head, pq, entry);
+}
+
+/*
+ * Enqueue prefix entry into the rib pending queue and enqueue the
+ * rib entry on the peer if needed. If p is NULL the entry is a withdraw.
+ */
+void
+rib_pq_enqueue(struct rib_entry *re, struct rde_peer *peer,
+ uint32_t path_id_tx, struct prefix *p)
+{
+ /* only store paths if add-path or 'rde evaluate all' is used */
+ if (rde_evaluate_all()) {
+ rib_pq_remove_entry(re, path_id_tx);
+ rib_pq_add_entry(re, path_id_tx, p);
+ }
+
+ /*
+ * Enqueue only once, may need some reconsideration if queue
+ * length of individual peers becomes excessivly long.
+ */
+ if (re->pq_mode == EVAL_NONE) {
+ re->pq_peer_id = peer->conf.id;
+ TAILQ_INSERT_TAIL(&peer->rib_pq_head, re, rib_queue);
+ rdemem.rde_rib_entry_count++;
+ peer->stats.rib_entry_count++;
+ }
+
+}
+
+static void
+rib_pq_free(struct rib_entry *re)
+{
+ struct pq_entry *pq;
+
+ while ((pq = TAILQ_FIRST(&re->pq_head)) != NULL) {
+ TAILQ_REMOVE(&re->pq_head, pq, entry);
+ free(pq);
+ }
+ re->pq_mode = EVAL_NONE;
+}
+
+void
+rib_pq_dequeue(struct rib_entry *re)
+{
+ rib_pq_free(re);
+ if (rib_empty(re))
+ rib_remove(re);
+}
+
static void
rib_remove(struct rib_entry *re)
{
@@ -350,23 +437,11 @@ rib_remove(struct rib_entry *re)
log_warnx("rib_remove: remove failed.");
pt_unref(re->prefix);
+ if (!TAILQ_EMPTY(&re->pq_head))
+ fatalx("rib entry removed with pending updates");
free(re);
rdemem.rib_cnt--;
-}
-
-static inline int
-rib_empty(struct rib_entry *re)
-{
- return TAILQ_EMPTY(&re->prefix_h);
-}
-
-void
-rib_dequeue(struct rib_entry *re)
-{
- re->pq_mode = EVAL_NONE;
- if (rib_empty(re))
- rib_remove(re);
}
static struct rib_entry *
bgpd: implement rib prefix pending queue