From: Theo Buehler Subject: Re: bgpd: rework rde_enqueue_updates() to simplify the rib queue handling To: tech@openbsd.org Date: Wed, 1 Jul 2026 08:13:23 +0200 On Tue, Jun 30, 2026 at 09:33:50PM +0200, Claudio Jeker wrote: > This is the next step in preparation of implementing a full rib update > queue. This is needed to improve performance for add-path send. > > Add a new peer argument to rde_enqueue_updates() and use this to enqueue > the update on. Only enqueue it once and not like now where new updates > would requeue the update on the new peer. The effect of this needs to be > further studied and adjusted but doing this is a lot simpler and ensures > that order is somewhat kept. This also removes the need to enqueue > all withdraws onto peerself which seems like a bad idea as well. Unrelated to your diff: I was a bit confused by the doc comment above prefix_set_dmetric() mentioning prefix_evaluate(). Perhaps we could adjust that (maybe it's enough to simply say prefix_cmp() instead?) Diff reads fine to me, so ok tb Two comments inline. > > -- > :wq Claudio > > Index: rde.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v > diff -u -p -r1.707 rde.c > --- rde.c 24 Jun 2026 18:56:53 -0000 1.707 > +++ rde.c 29 Jun 2026 15:13:32 -0000 > @@ -4303,7 +4303,7 @@ rde_softreconfig_out(struct rib_entry *r > /* no valid path for prefix */ > return; > > - rde_enqueue_updates(re, NULL, 0, EVAL_REEVAL); > + rde_enqueue_updates(re, NULL, NULL, 0, EVAL_REEVAL); > } > > static void > Index: rde.h > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v > diff -u -p -r1.354 rde.h > --- rde.h 24 Jun 2026 18:56:53 -0000 1.354 > +++ rde.h 29 Jun 2026 15:08:06 -0000 > @@ -420,8 +420,8 @@ struct rde_peer *peer_add(uint32_t, stru > struct rde_filter *peer_apply_out_filter(struct rde_peer *, > struct filter_head *); > > -void rde_enqueue_updates(struct rib_entry *, struct prefix *, > - uint32_t, enum eval_mode); > +void rde_enqueue_updates(struct rib_entry *, struct rde_peer *, > + struct prefix *, uint32_t, enum eval_mode); > void peer_process_updates(struct rde_peer *, void *); > > void peer_up(struct rde_peer *, struct session_up *); > Index: rde_decide.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v > diff -u -p -r1.108 rde_decide.c > --- rde_decide.c 21 May 2026 15:20:27 -0000 1.108 > +++ rde_decide.c 29 Jun 2026 15:13:32 -0000 > @@ -534,6 +534,7 @@ void > prefix_evaluate(struct rib_entry *re, struct prefix *new, struct prefix *old) > { > struct prefix *newbest, *oldbest; > + struct rde_peer *peer; > struct rib *rib; > uint32_t old_pathid_tx = 0; > > @@ -553,6 +554,7 @@ prefix_evaluate(struct rib_entry *re, st > if (old != NULL) { > prefix_remove(old, re); > old_pathid_tx = old->path_id_tx; > + peer = prefix_peer(old); > } > if (new != NULL) { > prefix_insert(new, NULL, re); > @@ -560,6 +562,7 @@ prefix_evaluate(struct rib_entry *re, st > new = NULL; > else > old_pathid_tx = 0; > + peer = prefix_peer(new); > } > newbest = prefix_best(re); > > @@ -575,7 +578,7 @@ prefix_evaluate(struct rib_entry *re, st > */ > if ((rib->flags & F_RIB_NOFIB) == 0) > rde_send_kroute(rib, newbest, oldbest); > - rde_enqueue_updates(re, new, old_pathid_tx, EVAL_DEFAULT); > + rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_DEFAULT); > return; > } > > @@ -588,7 +591,7 @@ prefix_evaluate(struct rib_entry *re, st > /* no old path to remove and path is ineligible, skip rest */ > if (old_pathid_tx == 0 && new == NULL) > return; > - rde_enqueue_updates(re, new, old_pathid_tx, EVAL_ALL); > + rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_ALL); While at this point we know old_pathid_tx != 0 || new != NULL, so we know that peer has been initialized, this isn't super obvious and far beyond gcc's deduction capabilities (both gcc 4.2.1 and 15.1 whine). I don't see a good fix (if we do want to fix this). Doing peer = NULL at the top and changing the above to if ((old_pathid_tx == 0 && new == NULL) || peer == NULL) return; doesn't look like an improvement to me... > } > } > > @@ -598,6 +601,7 @@ prefix_evaluate_nexthop(struct prefix *p > { > struct rib_entry *re = prefix_re(p); > struct prefix *newbest, *oldbest, *new, *old; > + struct rde_peer *peer; > struct rib *rib; > uint32_t old_pathid_tx = 0; > > @@ -631,6 +635,8 @@ prefix_evaluate_nexthop(struct prefix *p > oldbest = prefix_best(re); > > old = p; > + peer = prefix_peer(p); > + > prefix_remove(old, re); > if (prefix_eligible(old)) > old_pathid_tx = old->path_id_tx; > @@ -665,7 +671,7 @@ prefix_evaluate_nexthop(struct prefix *p > */ > if ((rib->flags & F_RIB_NOFIB) == 0) > rde_send_kroute(rib, newbest, oldbest); > - rde_enqueue_updates(re, new, old_pathid_tx, EVAL_DEFAULT); > + rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_DEFAULT); > return; > } > > @@ -675,5 +681,5 @@ prefix_evaluate_nexthop(struct prefix *p > * rde_enqueue_updates() will then take care of distribution. > */ > if (rde_evaluate_all()) > - rde_enqueue_updates(re, new, old_pathid_tx, EVAL_ALL); > + rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_ALL); > } > Index: rde_peer.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v > diff -u -p -r1.76 rde_peer.c > --- rde_peer.c 24 Jun 2026 18:56:53 -0000 1.76 > +++ rde_peer.c 30 Jun 2026 19:27:08 -0000 > @@ -297,12 +297,20 @@ peer_generate_update(struct rde_peer *pe > up_generate_updates(peer, re, force_update); > } > > +/* > + * Enqueue updates into the peer queue specified by peer. The meaning of > + * mode is: > + * EVAL_DEFAULT is triggered when the best path changes. > + * EVAL_ALL is sent for any other update (needed for peers with > + * addpath or evaluate all set). > + * EVAL_REEVAL is used by config reloads (a full RIB refresh is needed) > + * and for single peer RIB dumps but those call peer_generate_update() > + * directly. > + */ > void > -rde_enqueue_updates(struct rib_entry *re, struct prefix *newpath, > - uint32_t old_pathid_tx, enum eval_mode mode) > +rde_enqueue_updates(struct rib_entry *re, struct rde_peer *peer, > + struct prefix *newpath, uint32_t old_pathid_tx, enum eval_mode mode) > { > - struct rde_peer *peer; > - > switch (mode) { > case EVAL_REEVAL: > /* skip peers which don't need to reconfigure */ Maybe that's just me, but using the function argument to iterate over the peers is a bit ugly. > @@ -313,40 +321,26 @@ rde_enqueue_updates(struct rib_entry *re > } > return; > case EVAL_DEFAULT: > - break; > case EVAL_ALL: > - /* > - * EVAL_DEFAULT is triggered when a new best path is selected. > - * EVAL_ALL is sent for any other update (needed for peers with > - * addpath or evaluate all set). > - * There can be only one EVAL_DEFAULT queued, it replaces the > - * previous one. A flag is enough. > - * A path can only exist once in the queue (old or new). > - */ > - if (re->pq_mode == EVAL_DEFAULT) > - /* already a best path update pending, nothing to do */ > - return; > - > break; > case EVAL_NONE: > fatalx("bad eval mode in %s", __func__); > } > > - if (re->pq_mode != EVAL_NONE) { > - peer = peer_get(re->pq_peer_id); > - TAILQ_REMOVE(&peer->rib_pq_head, re, rib_queue); > - rdemem.rde_rib_entry_count--; > - peer->stats.rib_entry_count--; > + /* > + * Enqueue only once, may need some reconsideration if queue > + * length of individual peers becomes excessivly long. > + */ > + if (re->pq_mode == EVAL_NONE) { > + re->pq_peer_id = peer->conf.id; > + TAILQ_INSERT_TAIL(&peer->rib_pq_head, re, rib_queue); > + rdemem.rde_rib_entry_count++; > + peer->stats.rib_entry_count++; > } > - if (newpath != NULL) > - peer = prefix_peer(newpath); > - else > - peer = peerself; > - re->pq_mode = mode; > - re->pq_peer_id = peer->conf.id; > - TAILQ_INSERT_TAIL(&peer->rib_pq_head, re, rib_queue); > - rdemem.rde_rib_entry_count++; > - peer->stats.rib_entry_count++; > + > + /* don't downgrade pq_mode from EVAL_DEFAULT to EVAL_ALL */ > + if (re->pq_mode != EVAL_DEFAULT) > + re->pq_mode = mode; > } > > void > @@ -354,7 +348,6 @@ peer_process_updates(struct rde_peer *pe > { > struct rib_entry *re; > struct rde_peer *p; > - enum eval_mode mode; > > re = TAILQ_FIRST(&peer->rib_pq_head); > if (re == NULL) > @@ -363,10 +356,8 @@ peer_process_updates(struct rde_peer *pe > rdemem.rde_rib_entry_count--; > peer->stats.rib_entry_count--; > > - mode = re->pq_mode; > - > RB_FOREACH(p, peer_tree, &peertable) > - peer_generate_update(p, re, mode, 0); > + peer_generate_update(p, re, re->pq_mode, 0); > > rib_dequeue(re); > } > Index: rde_rib.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v > diff -u -p -r1.300 rde_rib.c > --- rde_rib.c 28 May 2026 12:53:55 -0000 1.300 > +++ rde_rib.c 29 Jun 2026 15:14:06 -0000 > @@ -1065,7 +1065,7 @@ prefix_flowspec_update(struct rde_peer * > > if (old != NULL) > old_pathid_tx = old->path_id_tx; > - rde_enqueue_updates(re, new, old_pathid_tx, EVAL_DEFAULT); > + rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_DEFAULT); > > if (old != NULL) { > TAILQ_REMOVE(&re->prefix_h, old, rib_l); > @@ -1091,7 +1091,7 @@ prefix_flowspec_withdraw(struct rde_peer > p = prefix_bypeer(re, peer, 0); > if (p == NULL) > return 0; > - rde_enqueue_updates(re, NULL, p->path_id_tx, EVAL_DEFAULT); > + rde_enqueue_updates(re, peer, NULL, p->path_id_tx, EVAL_DEFAULT); > TAILQ_REMOVE(&re->prefix_h, p, rib_l); > prefix_unlink(p); > prefix_free(p); >