From: Claudio Jeker Subject: Re: bgpd: rework rde_enqueue_updates() to simplify the rib queue handling To: Theo Buehler Cc: tech@openbsd.org Date: Wed, 1 Jul 2026 10:17:30 +0200 On Wed, Jul 01, 2026 at 08:13:23AM +0200, Theo Buehler wrote: > On Tue, Jun 30, 2026 at 09:33:50PM +0200, Claudio Jeker wrote: > > This is the next step in preparation of implementing a full rib update > > queue. This is needed to improve performance for add-path send. > > > > Add a new peer argument to rde_enqueue_updates() and use this to enqueue > > the update on. Only enqueue it once and not like now where new updates > > would requeue the update on the new peer. The effect of this needs to be > > further studied and adjusted but doing this is a lot simpler and ensures > > that order is somewhat kept. This also removes the need to enqueue > > all withdraws onto peerself which seems like a bad idea as well. > > Unrelated to your diff: I was a bit confused by the doc comment above > prefix_set_dmetric() mentioning prefix_evaluate(). Perhaps we could > adjust that (maybe it's enough to simply say prefix_cmp() instead?) Will look into this but as a followup. > Diff reads fine to me, so > > ok tb > > Two comments inline. > > > > > -- > > :wq Claudio > > > > Index: rde.c > > =================================================================== > > RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v > > diff -u -p -r1.707 rde.c > > --- rde.c 24 Jun 2026 18:56:53 -0000 1.707 > > +++ rde.c 29 Jun 2026 15:13:32 -0000 > > @@ -4303,7 +4303,7 @@ rde_softreconfig_out(struct rib_entry *r > > /* no valid path for prefix */ > > return; > > > > - rde_enqueue_updates(re, NULL, 0, EVAL_REEVAL); > > + rde_enqueue_updates(re, NULL, NULL, 0, EVAL_REEVAL); > > } > > > > static void > > Index: rde.h > > =================================================================== > > RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v > > diff -u -p -r1.354 rde.h > > --- rde.h 24 Jun 2026 18:56:53 -0000 1.354 > > +++ rde.h 29 Jun 2026 15:08:06 -0000 > > @@ -420,8 +420,8 @@ struct rde_peer *peer_add(uint32_t, stru > > struct rde_filter *peer_apply_out_filter(struct rde_peer *, > > struct filter_head *); > > > > -void rde_enqueue_updates(struct rib_entry *, struct prefix *, > > - uint32_t, enum eval_mode); > > +void rde_enqueue_updates(struct rib_entry *, struct rde_peer *, > > + struct prefix *, uint32_t, enum eval_mode); > > void peer_process_updates(struct rde_peer *, void *); > > > > void peer_up(struct rde_peer *, struct session_up *); > > Index: rde_decide.c > > =================================================================== > > RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v > > diff -u -p -r1.108 rde_decide.c > > --- rde_decide.c 21 May 2026 15:20:27 -0000 1.108 > > +++ rde_decide.c 29 Jun 2026 15:13:32 -0000 > > @@ -534,6 +534,7 @@ void > > prefix_evaluate(struct rib_entry *re, struct prefix *new, struct prefix *old) > > { > > struct prefix *newbest, *oldbest; > > + struct rde_peer *peer; > > struct rib *rib; > > uint32_t old_pathid_tx = 0; > > > > @@ -553,6 +554,7 @@ prefix_evaluate(struct rib_entry *re, st > > if (old != NULL) { > > prefix_remove(old, re); > > old_pathid_tx = old->path_id_tx; > > + peer = prefix_peer(old); > > } > > if (new != NULL) { > > prefix_insert(new, NULL, re); > > @@ -560,6 +562,7 @@ prefix_evaluate(struct rib_entry *re, st > > new = NULL; > > else > > old_pathid_tx = 0; > > + peer = prefix_peer(new); > > } > > newbest = prefix_best(re); > > > > @@ -575,7 +578,7 @@ prefix_evaluate(struct rib_entry *re, st > > */ > > if ((rib->flags & F_RIB_NOFIB) == 0) > > rde_send_kroute(rib, newbest, oldbest); > > - rde_enqueue_updates(re, new, old_pathid_tx, EVAL_DEFAULT); > > + rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_DEFAULT); > > return; > > } > > > > @@ -588,7 +591,7 @@ prefix_evaluate(struct rib_entry *re, st > > /* no old path to remove and path is ineligible, skip rest */ > > if (old_pathid_tx == 0 && new == NULL) > > return; > > - rde_enqueue_updates(re, new, old_pathid_tx, EVAL_ALL); > > + rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_ALL); > > While at this point we know old_pathid_tx != 0 || new != NULL, so we > know that peer has been initialized, this isn't super obvious and far > beyond gcc's deduction capabilities (both gcc 4.2.1 and 15.1 whine). > > I don't see a good fix (if we do want to fix this). Doing peer = NULL > at the top and changing the above to > > if ((old_pathid_tx == 0 && new == NULL) || peer == NULL) > return; > > doesn't look like an improvement to me... Hmmm. I'll have a closer look. We certainly can initalize peer = NULL at the top. Not sure if we need to check for peer == NULL though. > > } > > } > > > > @@ -598,6 +601,7 @@ prefix_evaluate_nexthop(struct prefix *p > > { > > struct rib_entry *re = prefix_re(p); > > struct prefix *newbest, *oldbest, *new, *old; > > + struct rde_peer *peer; > > struct rib *rib; > > uint32_t old_pathid_tx = 0; > > > > @@ -631,6 +635,8 @@ prefix_evaluate_nexthop(struct prefix *p > > oldbest = prefix_best(re); > > > > old = p; > > + peer = prefix_peer(p); > > + > > prefix_remove(old, re); > > if (prefix_eligible(old)) > > old_pathid_tx = old->path_id_tx; > > @@ -665,7 +671,7 @@ prefix_evaluate_nexthop(struct prefix *p > > */ > > if ((rib->flags & F_RIB_NOFIB) == 0) > > rde_send_kroute(rib, newbest, oldbest); > > - rde_enqueue_updates(re, new, old_pathid_tx, EVAL_DEFAULT); > > + rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_DEFAULT); > > return; > > } > > > > @@ -675,5 +681,5 @@ prefix_evaluate_nexthop(struct prefix *p > > * rde_enqueue_updates() will then take care of distribution. > > */ > > if (rde_evaluate_all()) > > - rde_enqueue_updates(re, new, old_pathid_tx, EVAL_ALL); > > + rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_ALL); > > } > > Index: rde_peer.c > > =================================================================== > > RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v > > diff -u -p -r1.76 rde_peer.c > > --- rde_peer.c 24 Jun 2026 18:56:53 -0000 1.76 > > +++ rde_peer.c 30 Jun 2026 19:27:08 -0000 > > @@ -297,12 +297,20 @@ peer_generate_update(struct rde_peer *pe > > up_generate_updates(peer, re, force_update); > > } > > > > +/* > > + * Enqueue updates into the peer queue specified by peer. The meaning of > > + * mode is: > > + * EVAL_DEFAULT is triggered when the best path changes. > > + * EVAL_ALL is sent for any other update (needed for peers with > > + * addpath or evaluate all set). > > + * EVAL_REEVAL is used by config reloads (a full RIB refresh is needed) > > + * and for single peer RIB dumps but those call peer_generate_update() > > + * directly. > > + */ > > void > > -rde_enqueue_updates(struct rib_entry *re, struct prefix *newpath, > > - uint32_t old_pathid_tx, enum eval_mode mode) > > +rde_enqueue_updates(struct rib_entry *re, struct rde_peer *peer, > > + struct prefix *newpath, uint32_t old_pathid_tx, enum eval_mode mode) > > { > > - struct rde_peer *peer; > > - > > switch (mode) { > > case EVAL_REEVAL: > > /* skip peers which don't need to reconfigure */ > > Maybe that's just me, but using the function argument to iterate over the > peers is a bit ugly. Kind of agree, I was just lazy :) > > @@ -313,40 +321,26 @@ rde_enqueue_updates(struct rib_entry *re > > } > > return; > > case EVAL_DEFAULT: > > - break; > > case EVAL_ALL: > > - /* > > - * EVAL_DEFAULT is triggered when a new best path is selected. > > - * EVAL_ALL is sent for any other update (needed for peers with > > - * addpath or evaluate all set). > > - * There can be only one EVAL_DEFAULT queued, it replaces the > > - * previous one. A flag is enough. > > - * A path can only exist once in the queue (old or new). > > - */ > > - if (re->pq_mode == EVAL_DEFAULT) > > - /* already a best path update pending, nothing to do */ > > - return; > > - > > break; > > case EVAL_NONE: > > fatalx("bad eval mode in %s", __func__); > > } > > > > - if (re->pq_mode != EVAL_NONE) { > > - peer = peer_get(re->pq_peer_id); > > - TAILQ_REMOVE(&peer->rib_pq_head, re, rib_queue); > > - rdemem.rde_rib_entry_count--; > > - peer->stats.rib_entry_count--; > > + /* > > + * Enqueue only once, may need some reconsideration if queue > > + * length of individual peers becomes excessivly long. > > + */ > > + if (re->pq_mode == EVAL_NONE) { > > + re->pq_peer_id = peer->conf.id; > > + TAILQ_INSERT_TAIL(&peer->rib_pq_head, re, rib_queue); > > + rdemem.rde_rib_entry_count++; > > + peer->stats.rib_entry_count++; > > } > > - if (newpath != NULL) > > - peer = prefix_peer(newpath); > > - else > > - peer = peerself; > > - re->pq_mode = mode; > > - re->pq_peer_id = peer->conf.id; > > - TAILQ_INSERT_TAIL(&peer->rib_pq_head, re, rib_queue); > > - rdemem.rde_rib_entry_count++; > > - peer->stats.rib_entry_count++; > > + > > + /* don't downgrade pq_mode from EVAL_DEFAULT to EVAL_ALL */ > > + if (re->pq_mode != EVAL_DEFAULT) > > + re->pq_mode = mode; > > } > > > > void > > @@ -354,7 +348,6 @@ peer_process_updates(struct rde_peer *pe > > { > > struct rib_entry *re; > > struct rde_peer *p; > > - enum eval_mode mode; > > > > re = TAILQ_FIRST(&peer->rib_pq_head); > > if (re == NULL) > > @@ -363,10 +356,8 @@ peer_process_updates(struct rde_peer *pe > > rdemem.rde_rib_entry_count--; > > peer->stats.rib_entry_count--; > > > > - mode = re->pq_mode; > > - > > RB_FOREACH(p, peer_tree, &peertable) > > - peer_generate_update(p, re, mode, 0); > > + peer_generate_update(p, re, re->pq_mode, 0); > > > > rib_dequeue(re); > > } > > Index: rde_rib.c > > =================================================================== > > RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v > > diff -u -p -r1.300 rde_rib.c > > --- rde_rib.c 28 May 2026 12:53:55 -0000 1.300 > > +++ rde_rib.c 29 Jun 2026 15:14:06 -0000 > > @@ -1065,7 +1065,7 @@ prefix_flowspec_update(struct rde_peer * > > > > if (old != NULL) > > old_pathid_tx = old->path_id_tx; > > - rde_enqueue_updates(re, new, old_pathid_tx, EVAL_DEFAULT); > > + rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_DEFAULT); > > > > if (old != NULL) { > > TAILQ_REMOVE(&re->prefix_h, old, rib_l); > > @@ -1091,7 +1091,7 @@ prefix_flowspec_withdraw(struct rde_peer > > p = prefix_bypeer(re, peer, 0); > > if (p == NULL) > > return 0; > > - rde_enqueue_updates(re, NULL, p->path_id_tx, EVAL_DEFAULT); > > + rde_enqueue_updates(re, peer, NULL, p->path_id_tx, EVAL_DEFAULT); > > TAILQ_REMOVE(&re->prefix_h, p, rib_l); > > prefix_unlink(p); > > prefix_free(p); > > > -- :wq Claudio