Index | Thread | Search

From:
Claudio Jeker <cjeker@diehard.n-r-g.com>
Subject:
bgpd: implement rib prefix pending queue
To:
tech@openbsd.org
Date:
Wed, 1 Jul 2026 19:59:16 +0200

Download raw body.

Thread
Next small step towards optimising add-path send.

This implements the actually pending prefix queue in struct re_entry.
Elements are added to the queue and the queue is flushed once processed.
Nothing is currently using this data. This will follow next.

Right now this uses a simple TAILQ. It is hard to estimate if this
is good enough or a better structure is needed. Only profiling this on
some large setups will give us the proper insights.

Prefixes / paths are added to the queue and replaced on every update.
Once peer_process_updates() runs for a given rib entry the queue is
flushed.  In rib_remove() we just check that the queue is actually empty.
This should be implied by the re_is_queued() checked a bit earlier but
lets be sure.

-- 
:wq Claudio

Index: rde.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
diff -u -p -r1.355 rde.h
--- rde.h	1 Jul 2026 09:53:47 -0000	1.355
+++ rde.h	1 Jul 2026 14:37:08 -0000
@@ -42,14 +42,15 @@ LIST_HEAD(prefix_list, prefix);
 TAILQ_HEAD(prefix_queue, prefix);
 RB_HEAD(rib_tree, rib_entry);
 TAILQ_HEAD(rib_queue, rib_entry);
-LIST_HEAD(rib_pq_head, rib_pq);
+struct pq_entry;
+TAILQ_HEAD(pq_head, pq_entry);
 
 struct rib_entry {
 	RB_ENTRY(rib_entry)	 rib_e;
 	TAILQ_ENTRY(rib_entry)	 rib_queue;
 	struct prefix_queue	 prefix_h;
 	struct pt_entry		*prefix;
-	struct rib_pq_head	 rib_pq_list;
+	struct pq_head		 pq_head;
 	uint32_t		 pq_peer_id;
 	uint16_t		 rib_id;
 	uint8_t			 lock;
@@ -638,7 +639,9 @@ int		 rib_dump_subtree(uint16_t, struct 
 		    void (*)(void *, uint8_t),
 		    int (*)(void *));
 void		 rib_dump_terminate(void *);
-void		 rib_dequeue(struct rib_entry *);
+void		 rib_pq_enqueue(struct rib_entry *, struct rde_peer *,
+		    uint32_t, struct prefix *);
+void		 rib_pq_dequeue(struct rib_entry *);
 
 extern struct rib flowrib;
 
Index: rde_peer.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v
diff -u -p -r1.77 rde_peer.c
--- rde_peer.c	1 Jul 2026 09:53:47 -0000	1.77
+++ rde_peer.c	1 Jul 2026 14:33:30 -0000
@@ -26,12 +26,6 @@
 #include "bgpd.h"
 #include "rde.h"
 
-struct rib_pq {
-	LIST_ENTRY(rib_pq)	entry;
-	struct prefix		*new;
-	uint32_t		old_pathid_tx;
-};
-
 struct peer_tree	 peertable = RB_INITIALIZER(&peertable);
 struct peer_tree	 zombietable = RB_INITIALIZER(&zombietable);
 struct rde_peer		*peerself;
@@ -306,12 +301,16 @@ peer_generate_update(struct rde_peer *pe
  *	EVAL_REEVAL is used by config reloads (a full RIB refresh is needed)
  *	and for single peer RIB dumps but those call peer_generate_update()
  *	directly.
+ * peer, newpath and old_pathid_tx are ignored if mode is EVAL_REEVAL.
+ * In the other cases either newpath or old_pathid_tx are valid but not
+ * both at the same time.
  */
 void
 rde_enqueue_updates(struct rib_entry *re, struct rde_peer *peer,
-    struct prefix *newpath, uint32_t old_pathid_tx, enum eval_mode mode)
+    struct prefix *newpath, uint32_t old_path_id_tx, enum eval_mode mode)
 {
 	struct rde_peer *p;
+	uint32_t path_id_tx;
 
 	switch (mode) {
 	case EVAL_REEVAL:
@@ -329,16 +328,12 @@ rde_enqueue_updates(struct rib_entry *re
 		fatalx("bad eval mode in %s", __func__);
 	}
 
-	/*
-	 * Enqueue only once, may need some reconsideration if queue
-	 * length of individual peers becomes excessivly long.
-	 */
-	if (re->pq_mode == EVAL_NONE) {
-		re->pq_peer_id = peer->conf.id;
-		TAILQ_INSERT_TAIL(&peer->rib_pq_head, re, rib_queue);
-		rdemem.rde_rib_entry_count++;
-		peer->stats.rib_entry_count++;
-	}
+	if (newpath != NULL)
+		path_id_tx = newpath->path_id_tx;
+	else
+		path_id_tx = old_path_id_tx;
+
+	rib_pq_enqueue(re, peer, path_id_tx, newpath);
 
 	/* don't downgrade pq_mode from EVAL_DEFAULT to EVAL_ALL */
 	if (re->pq_mode != EVAL_DEFAULT)
@@ -361,7 +356,7 @@ peer_process_updates(struct rde_peer *pe
 	RB_FOREACH(p, peer_tree, &peertable)
 		peer_generate_update(p, re, re->pq_mode, 0);
 
-	rib_dequeue(re);
+	rib_pq_dequeue(re);
 }
 
 /*
Index: rde_rib.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
diff -u -p -r1.301 rde_rib.c
--- rde_rib.c	1 Jul 2026 09:53:47 -0000	1.301
+++ rde_rib.c	1 Jul 2026 15:59:24 -0000
@@ -41,11 +41,17 @@ uint16_t rib_size;
 struct rib **ribs;
 struct rib flowrib = { .id = 1, .tree = RB_INITIALIZER(&flowrib.tree) };
 
+struct pq_entry {
+	TAILQ_ENTRY(pq_entry)	 entry;
+	struct prefix		*p;	/* NULL for withdraws */
+	uint32_t		 path_id_tx;
+};
+
 struct rib_entry *rib_add(struct rib *, struct pt_entry *);
 static inline int rib_compare(const struct rib_entry *,
 			const struct rib_entry *);
-static void rib_remove(struct rib_entry *);
 static inline int rib_empty(struct rib_entry *);
+static void rib_remove(struct rib_entry *);
 static void rib_dump_abort(uint16_t);
 
 RB_PROTOTYPE(rib_tree, rib_entry, rib_e, rib_compare);
@@ -321,6 +327,7 @@ rib_add(struct rib *rib, struct pt_entry
 		fatal("rib_add");
 
 	TAILQ_INIT(&re->prefix_h);
+	TAILQ_INIT(&re->pq_head);
 	re->prefix = pt_ref(pte);
 	re->rib_id = rib->id;
 	re->pq_mode = EVAL_NONE;
@@ -336,6 +343,86 @@ rib_add(struct rib *rib, struct pt_entry
 	return (re);
 }
 
+static inline int
+rib_empty(struct rib_entry *re)
+{
+	return TAILQ_EMPTY(&re->prefix_h);
+}
+
+static void
+rib_pq_remove_entry(struct rib_entry *re, uint32_t path_id_tx)
+{
+	struct pq_entry *pq;
+
+	TAILQ_FOREACH(pq, &re->pq_head, entry) {
+		if (pq->path_id_tx == path_id_tx) {
+			TAILQ_REMOVE(&re->pq_head, pq, entry);
+			free(pq);
+			return;
+		}
+	}
+}
+
+static void
+rib_pq_add_entry(struct rib_entry *re, uint32_t path_id_tx, struct prefix *p)
+{
+	struct pq_entry *pq;
+
+	if ((pq = calloc(1, sizeof(*pq))) == NULL)
+		fatal(NULL);
+
+	pq->path_id_tx = path_id_tx;
+	pq->p = p;
+	TAILQ_INSERT_TAIL(&re->pq_head, pq, entry);
+}
+
+/*
+ * Enqueue prefix entry into the rib pending queue and enqueue the
+ * rib entry on the peer if needed. If p is NULL the entry is a withdraw.
+ */
+void
+rib_pq_enqueue(struct rib_entry *re, struct rde_peer *peer,
+    uint32_t path_id_tx, struct prefix *p)
+{
+	/* only store paths if add-path or 'rde evaluate all' is used */
+	if (rde_evaluate_all()) {
+		rib_pq_remove_entry(re, path_id_tx);
+		rib_pq_add_entry(re, path_id_tx, p);
+	}
+
+	/*
+	 * Enqueue only once, may need some reconsideration if queue
+	 * length of individual peers becomes excessivly long.
+	 */
+	if (re->pq_mode == EVAL_NONE) {
+		re->pq_peer_id = peer->conf.id;
+		TAILQ_INSERT_TAIL(&peer->rib_pq_head, re, rib_queue);
+		rdemem.rde_rib_entry_count++;
+		peer->stats.rib_entry_count++;
+	}
+
+}
+
+static void
+rib_pq_free(struct rib_entry *re)
+{
+	struct pq_entry *pq;
+
+	while ((pq = TAILQ_FIRST(&re->pq_head)) != NULL) {
+		TAILQ_REMOVE(&re->pq_head, pq, entry);
+		free(pq);
+	}
+	re->pq_mode = EVAL_NONE;
+}
+
+void
+rib_pq_dequeue(struct rib_entry *re)
+{
+	rib_pq_free(re);
+	if (rib_empty(re))
+		rib_remove(re);
+}
+
 static void
 rib_remove(struct rib_entry *re)
 {
@@ -350,23 +437,11 @@ rib_remove(struct rib_entry *re)
 		log_warnx("rib_remove: remove failed.");
 
 	pt_unref(re->prefix);
+	if (!TAILQ_EMPTY(&re->pq_head))
+		fatalx("rib entry removed with pending updates");
 
 	free(re);
 	rdemem.rib_cnt--;
-}
-
-static inline int
-rib_empty(struct rib_entry *re)
-{
-	return TAILQ_EMPTY(&re->prefix_h);
-}
-
-void
-rib_dequeue(struct rib_entry *re)
-{
-	re->pq_mode = EVAL_NONE;
-	if (rib_empty(re))
-		rib_remove(re);
 }
 
 static struct rib_entry *