From: Claudio Jeker Subject: bgpd: implement rib prefix pending queue To: tech@openbsd.org Date: Wed, 1 Jul 2026 19:59:16 +0200 Next small step towards optimising add-path send. This implements the actually pending prefix queue in struct re_entry. Elements are added to the queue and the queue is flushed once processed. Nothing is currently using this data. This will follow next. Right now this uses a simple TAILQ. It is hard to estimate if this is good enough or a better structure is needed. Only profiling this on some large setups will give us the proper insights. Prefixes / paths are added to the queue and replaced on every update. Once peer_process_updates() runs for a given rib entry the queue is flushed. In rib_remove() we just check that the queue is actually empty. This should be implied by the re_is_queued() checked a bit earlier but lets be sure. -- :wq Claudio Index: rde.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v diff -u -p -r1.355 rde.h --- rde.h 1 Jul 2026 09:53:47 -0000 1.355 +++ rde.h 1 Jul 2026 14:37:08 -0000 @@ -42,14 +42,15 @@ LIST_HEAD(prefix_list, prefix); TAILQ_HEAD(prefix_queue, prefix); RB_HEAD(rib_tree, rib_entry); TAILQ_HEAD(rib_queue, rib_entry); -LIST_HEAD(rib_pq_head, rib_pq); +struct pq_entry; +TAILQ_HEAD(pq_head, pq_entry); struct rib_entry { RB_ENTRY(rib_entry) rib_e; TAILQ_ENTRY(rib_entry) rib_queue; struct prefix_queue prefix_h; struct pt_entry *prefix; - struct rib_pq_head rib_pq_list; + struct pq_head pq_head; uint32_t pq_peer_id; uint16_t rib_id; uint8_t lock; @@ -638,7 +639,9 @@ int rib_dump_subtree(uint16_t, struct void (*)(void *, uint8_t), int (*)(void *)); void rib_dump_terminate(void *); -void rib_dequeue(struct rib_entry *); +void rib_pq_enqueue(struct rib_entry *, struct rde_peer *, + uint32_t, struct prefix *); +void rib_pq_dequeue(struct rib_entry *); extern struct rib flowrib; Index: rde_peer.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v diff -u -p -r1.77 rde_peer.c --- rde_peer.c 1 Jul 2026 09:53:47 -0000 1.77 +++ rde_peer.c 1 Jul 2026 14:33:30 -0000 @@ -26,12 +26,6 @@ #include "bgpd.h" #include "rde.h" -struct rib_pq { - LIST_ENTRY(rib_pq) entry; - struct prefix *new; - uint32_t old_pathid_tx; -}; - struct peer_tree peertable = RB_INITIALIZER(&peertable); struct peer_tree zombietable = RB_INITIALIZER(&zombietable); struct rde_peer *peerself; @@ -306,12 +301,16 @@ peer_generate_update(struct rde_peer *pe * EVAL_REEVAL is used by config reloads (a full RIB refresh is needed) * and for single peer RIB dumps but those call peer_generate_update() * directly. + * peer, newpath and old_pathid_tx are ignored if mode is EVAL_REEVAL. + * In the other cases either newpath or old_pathid_tx are valid but not + * both at the same time. */ void rde_enqueue_updates(struct rib_entry *re, struct rde_peer *peer, - struct prefix *newpath, uint32_t old_pathid_tx, enum eval_mode mode) + struct prefix *newpath, uint32_t old_path_id_tx, enum eval_mode mode) { struct rde_peer *p; + uint32_t path_id_tx; switch (mode) { case EVAL_REEVAL: @@ -329,16 +328,12 @@ rde_enqueue_updates(struct rib_entry *re fatalx("bad eval mode in %s", __func__); } - /* - * Enqueue only once, may need some reconsideration if queue - * length of individual peers becomes excessivly long. - */ - if (re->pq_mode == EVAL_NONE) { - re->pq_peer_id = peer->conf.id; - TAILQ_INSERT_TAIL(&peer->rib_pq_head, re, rib_queue); - rdemem.rde_rib_entry_count++; - peer->stats.rib_entry_count++; - } + if (newpath != NULL) + path_id_tx = newpath->path_id_tx; + else + path_id_tx = old_path_id_tx; + + rib_pq_enqueue(re, peer, path_id_tx, newpath); /* don't downgrade pq_mode from EVAL_DEFAULT to EVAL_ALL */ if (re->pq_mode != EVAL_DEFAULT) @@ -361,7 +356,7 @@ peer_process_updates(struct rde_peer *pe RB_FOREACH(p, peer_tree, &peertable) peer_generate_update(p, re, re->pq_mode, 0); - rib_dequeue(re); + rib_pq_dequeue(re); } /* Index: rde_rib.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v diff -u -p -r1.301 rde_rib.c --- rde_rib.c 1 Jul 2026 09:53:47 -0000 1.301 +++ rde_rib.c 1 Jul 2026 15:59:24 -0000 @@ -41,11 +41,17 @@ uint16_t rib_size; struct rib **ribs; struct rib flowrib = { .id = 1, .tree = RB_INITIALIZER(&flowrib.tree) }; +struct pq_entry { + TAILQ_ENTRY(pq_entry) entry; + struct prefix *p; /* NULL for withdraws */ + uint32_t path_id_tx; +}; + struct rib_entry *rib_add(struct rib *, struct pt_entry *); static inline int rib_compare(const struct rib_entry *, const struct rib_entry *); -static void rib_remove(struct rib_entry *); static inline int rib_empty(struct rib_entry *); +static void rib_remove(struct rib_entry *); static void rib_dump_abort(uint16_t); RB_PROTOTYPE(rib_tree, rib_entry, rib_e, rib_compare); @@ -321,6 +327,7 @@ rib_add(struct rib *rib, struct pt_entry fatal("rib_add"); TAILQ_INIT(&re->prefix_h); + TAILQ_INIT(&re->pq_head); re->prefix = pt_ref(pte); re->rib_id = rib->id; re->pq_mode = EVAL_NONE; @@ -336,6 +343,86 @@ rib_add(struct rib *rib, struct pt_entry return (re); } +static inline int +rib_empty(struct rib_entry *re) +{ + return TAILQ_EMPTY(&re->prefix_h); +} + +static void +rib_pq_remove_entry(struct rib_entry *re, uint32_t path_id_tx) +{ + struct pq_entry *pq; + + TAILQ_FOREACH(pq, &re->pq_head, entry) { + if (pq->path_id_tx == path_id_tx) { + TAILQ_REMOVE(&re->pq_head, pq, entry); + free(pq); + return; + } + } +} + +static void +rib_pq_add_entry(struct rib_entry *re, uint32_t path_id_tx, struct prefix *p) +{ + struct pq_entry *pq; + + if ((pq = calloc(1, sizeof(*pq))) == NULL) + fatal(NULL); + + pq->path_id_tx = path_id_tx; + pq->p = p; + TAILQ_INSERT_TAIL(&re->pq_head, pq, entry); +} + +/* + * Enqueue prefix entry into the rib pending queue and enqueue the + * rib entry on the peer if needed. If p is NULL the entry is a withdraw. + */ +void +rib_pq_enqueue(struct rib_entry *re, struct rde_peer *peer, + uint32_t path_id_tx, struct prefix *p) +{ + /* only store paths if add-path or 'rde evaluate all' is used */ + if (rde_evaluate_all()) { + rib_pq_remove_entry(re, path_id_tx); + rib_pq_add_entry(re, path_id_tx, p); + } + + /* + * Enqueue only once, may need some reconsideration if queue + * length of individual peers becomes excessivly long. + */ + if (re->pq_mode == EVAL_NONE) { + re->pq_peer_id = peer->conf.id; + TAILQ_INSERT_TAIL(&peer->rib_pq_head, re, rib_queue); + rdemem.rde_rib_entry_count++; + peer->stats.rib_entry_count++; + } + +} + +static void +rib_pq_free(struct rib_entry *re) +{ + struct pq_entry *pq; + + while ((pq = TAILQ_FIRST(&re->pq_head)) != NULL) { + TAILQ_REMOVE(&re->pq_head, pq, entry); + free(pq); + } + re->pq_mode = EVAL_NONE; +} + +void +rib_pq_dequeue(struct rib_entry *re) +{ + rib_pq_free(re); + if (rib_empty(re)) + rib_remove(re); +} + static void rib_remove(struct rib_entry *re) { @@ -350,23 +437,11 @@ rib_remove(struct rib_entry *re) log_warnx("rib_remove: remove failed."); pt_unref(re->prefix); + if (!TAILQ_EMPTY(&re->pq_head)) + fatalx("rib entry removed with pending updates"); free(re); rdemem.rib_cnt--; -} - -static inline int -rib_empty(struct rib_entry *re) -{ - return TAILQ_EMPTY(&re->prefix_h); -} - -void -rib_dequeue(struct rib_entry *re) -{ - re->pq_mode = EVAL_NONE; - if (rib_empty(re)) - rib_remove(re); } static struct rib_entry *