Download raw body.
bgpd: rework rde_enqueue_updates() to simplify the rib queue handling
bgpd: rework rde_enqueue_updates() to simplify the rib queue handling
bgpd: rework rde_enqueue_updates() to simplify the rib queue handling
On Wed, Jul 01, 2026 at 08:13:23AM +0200, Theo Buehler wrote:
> On Tue, Jun 30, 2026 at 09:33:50PM +0200, Claudio Jeker wrote:
> > This is the next step in preparation of implementing a full rib update
> > queue. This is needed to improve performance for add-path send.
> >
> > Add a new peer argument to rde_enqueue_updates() and use this to enqueue
> > the update on. Only enqueue it once and not like now where new updates
> > would requeue the update on the new peer. The effect of this needs to be
> > further studied and adjusted but doing this is a lot simpler and ensures
> > that order is somewhat kept. This also removes the need to enqueue
> > all withdraws onto peerself which seems like a bad idea as well.
>
> Unrelated to your diff: I was a bit confused by the doc comment above
> prefix_set_dmetric() mentioning prefix_evaluate(). Perhaps we could
> adjust that (maybe it's enough to simply say prefix_cmp() instead?)
Will look into this but as a followup.
> Diff reads fine to me, so
>
> ok tb
>
> Two comments inline.
>
> >
> > --
> > :wq Claudio
> >
> > Index: rde.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
> > diff -u -p -r1.707 rde.c
> > --- rde.c 24 Jun 2026 18:56:53 -0000 1.707
> > +++ rde.c 29 Jun 2026 15:13:32 -0000
> > @@ -4303,7 +4303,7 @@ rde_softreconfig_out(struct rib_entry *r
> > /* no valid path for prefix */
> > return;
> >
> > - rde_enqueue_updates(re, NULL, 0, EVAL_REEVAL);
> > + rde_enqueue_updates(re, NULL, NULL, 0, EVAL_REEVAL);
> > }
> >
> > static void
> > Index: rde.h
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
> > diff -u -p -r1.354 rde.h
> > --- rde.h 24 Jun 2026 18:56:53 -0000 1.354
> > +++ rde.h 29 Jun 2026 15:08:06 -0000
> > @@ -420,8 +420,8 @@ struct rde_peer *peer_add(uint32_t, stru
> > struct rde_filter *peer_apply_out_filter(struct rde_peer *,
> > struct filter_head *);
> >
> > -void rde_enqueue_updates(struct rib_entry *, struct prefix *,
> > - uint32_t, enum eval_mode);
> > +void rde_enqueue_updates(struct rib_entry *, struct rde_peer *,
> > + struct prefix *, uint32_t, enum eval_mode);
> > void peer_process_updates(struct rde_peer *, void *);
> >
> > void peer_up(struct rde_peer *, struct session_up *);
> > Index: rde_decide.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v
> > diff -u -p -r1.108 rde_decide.c
> > --- rde_decide.c 21 May 2026 15:20:27 -0000 1.108
> > +++ rde_decide.c 29 Jun 2026 15:13:32 -0000
> > @@ -534,6 +534,7 @@ void
> > prefix_evaluate(struct rib_entry *re, struct prefix *new, struct prefix *old)
> > {
> > struct prefix *newbest, *oldbest;
> > + struct rde_peer *peer;
> > struct rib *rib;
> > uint32_t old_pathid_tx = 0;
> >
> > @@ -553,6 +554,7 @@ prefix_evaluate(struct rib_entry *re, st
> > if (old != NULL) {
> > prefix_remove(old, re);
> > old_pathid_tx = old->path_id_tx;
> > + peer = prefix_peer(old);
> > }
> > if (new != NULL) {
> > prefix_insert(new, NULL, re);
> > @@ -560,6 +562,7 @@ prefix_evaluate(struct rib_entry *re, st
> > new = NULL;
> > else
> > old_pathid_tx = 0;
> > + peer = prefix_peer(new);
> > }
> > newbest = prefix_best(re);
> >
> > @@ -575,7 +578,7 @@ prefix_evaluate(struct rib_entry *re, st
> > */
> > if ((rib->flags & F_RIB_NOFIB) == 0)
> > rde_send_kroute(rib, newbest, oldbest);
> > - rde_enqueue_updates(re, new, old_pathid_tx, EVAL_DEFAULT);
> > + rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_DEFAULT);
> > return;
> > }
> >
> > @@ -588,7 +591,7 @@ prefix_evaluate(struct rib_entry *re, st
> > /* no old path to remove and path is ineligible, skip rest */
> > if (old_pathid_tx == 0 && new == NULL)
> > return;
> > - rde_enqueue_updates(re, new, old_pathid_tx, EVAL_ALL);
> > + rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_ALL);
>
> While at this point we know old_pathid_tx != 0 || new != NULL, so we
> know that peer has been initialized, this isn't super obvious and far
> beyond gcc's deduction capabilities (both gcc 4.2.1 and 15.1 whine).
>
> I don't see a good fix (if we do want to fix this). Doing peer = NULL
> at the top and changing the above to
>
> if ((old_pathid_tx == 0 && new == NULL) || peer == NULL)
> return;
>
> doesn't look like an improvement to me...
Hmmm. I'll have a closer look. We certainly can initalize peer = NULL at
the top. Not sure if we need to check for peer == NULL though.
> > }
> > }
> >
> > @@ -598,6 +601,7 @@ prefix_evaluate_nexthop(struct prefix *p
> > {
> > struct rib_entry *re = prefix_re(p);
> > struct prefix *newbest, *oldbest, *new, *old;
> > + struct rde_peer *peer;
> > struct rib *rib;
> > uint32_t old_pathid_tx = 0;
> >
> > @@ -631,6 +635,8 @@ prefix_evaluate_nexthop(struct prefix *p
> > oldbest = prefix_best(re);
> >
> > old = p;
> > + peer = prefix_peer(p);
> > +
> > prefix_remove(old, re);
> > if (prefix_eligible(old))
> > old_pathid_tx = old->path_id_tx;
> > @@ -665,7 +671,7 @@ prefix_evaluate_nexthop(struct prefix *p
> > */
> > if ((rib->flags & F_RIB_NOFIB) == 0)
> > rde_send_kroute(rib, newbest, oldbest);
> > - rde_enqueue_updates(re, new, old_pathid_tx, EVAL_DEFAULT);
> > + rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_DEFAULT);
> > return;
> > }
> >
> > @@ -675,5 +681,5 @@ prefix_evaluate_nexthop(struct prefix *p
> > * rde_enqueue_updates() will then take care of distribution.
> > */
> > if (rde_evaluate_all())
> > - rde_enqueue_updates(re, new, old_pathid_tx, EVAL_ALL);
> > + rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_ALL);
> > }
> > Index: rde_peer.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v
> > diff -u -p -r1.76 rde_peer.c
> > --- rde_peer.c 24 Jun 2026 18:56:53 -0000 1.76
> > +++ rde_peer.c 30 Jun 2026 19:27:08 -0000
> > @@ -297,12 +297,20 @@ peer_generate_update(struct rde_peer *pe
> > up_generate_updates(peer, re, force_update);
> > }
> >
> > +/*
> > + * Enqueue updates into the peer queue specified by peer. The meaning of
> > + * mode is:
> > + * EVAL_DEFAULT is triggered when the best path changes.
> > + * EVAL_ALL is sent for any other update (needed for peers with
> > + * addpath or evaluate all set).
> > + * EVAL_REEVAL is used by config reloads (a full RIB refresh is needed)
> > + * and for single peer RIB dumps but those call peer_generate_update()
> > + * directly.
> > + */
> > void
> > -rde_enqueue_updates(struct rib_entry *re, struct prefix *newpath,
> > - uint32_t old_pathid_tx, enum eval_mode mode)
> > +rde_enqueue_updates(struct rib_entry *re, struct rde_peer *peer,
> > + struct prefix *newpath, uint32_t old_pathid_tx, enum eval_mode mode)
> > {
> > - struct rde_peer *peer;
> > -
> > switch (mode) {
> > case EVAL_REEVAL:
> > /* skip peers which don't need to reconfigure */
>
> Maybe that's just me, but using the function argument to iterate over the
> peers is a bit ugly.
Kind of agree, I was just lazy :)
> > @@ -313,40 +321,26 @@ rde_enqueue_updates(struct rib_entry *re
> > }
> > return;
> > case EVAL_DEFAULT:
> > - break;
> > case EVAL_ALL:
> > - /*
> > - * EVAL_DEFAULT is triggered when a new best path is selected.
> > - * EVAL_ALL is sent for any other update (needed for peers with
> > - * addpath or evaluate all set).
> > - * There can be only one EVAL_DEFAULT queued, it replaces the
> > - * previous one. A flag is enough.
> > - * A path can only exist once in the queue (old or new).
> > - */
> > - if (re->pq_mode == EVAL_DEFAULT)
> > - /* already a best path update pending, nothing to do */
> > - return;
> > -
> > break;
> > case EVAL_NONE:
> > fatalx("bad eval mode in %s", __func__);
> > }
> >
> > - if (re->pq_mode != EVAL_NONE) {
> > - peer = peer_get(re->pq_peer_id);
> > - TAILQ_REMOVE(&peer->rib_pq_head, re, rib_queue);
> > - rdemem.rde_rib_entry_count--;
> > - peer->stats.rib_entry_count--;
> > + /*
> > + * Enqueue only once, may need some reconsideration if queue
> > + * length of individual peers becomes excessivly long.
> > + */
> > + if (re->pq_mode == EVAL_NONE) {
> > + re->pq_peer_id = peer->conf.id;
> > + TAILQ_INSERT_TAIL(&peer->rib_pq_head, re, rib_queue);
> > + rdemem.rde_rib_entry_count++;
> > + peer->stats.rib_entry_count++;
> > }
> > - if (newpath != NULL)
> > - peer = prefix_peer(newpath);
> > - else
> > - peer = peerself;
> > - re->pq_mode = mode;
> > - re->pq_peer_id = peer->conf.id;
> > - TAILQ_INSERT_TAIL(&peer->rib_pq_head, re, rib_queue);
> > - rdemem.rde_rib_entry_count++;
> > - peer->stats.rib_entry_count++;
> > +
> > + /* don't downgrade pq_mode from EVAL_DEFAULT to EVAL_ALL */
> > + if (re->pq_mode != EVAL_DEFAULT)
> > + re->pq_mode = mode;
> > }
> >
> > void
> > @@ -354,7 +348,6 @@ peer_process_updates(struct rde_peer *pe
> > {
> > struct rib_entry *re;
> > struct rde_peer *p;
> > - enum eval_mode mode;
> >
> > re = TAILQ_FIRST(&peer->rib_pq_head);
> > if (re == NULL)
> > @@ -363,10 +356,8 @@ peer_process_updates(struct rde_peer *pe
> > rdemem.rde_rib_entry_count--;
> > peer->stats.rib_entry_count--;
> >
> > - mode = re->pq_mode;
> > -
> > RB_FOREACH(p, peer_tree, &peertable)
> > - peer_generate_update(p, re, mode, 0);
> > + peer_generate_update(p, re, re->pq_mode, 0);
> >
> > rib_dequeue(re);
> > }
> > Index: rde_rib.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
> > diff -u -p -r1.300 rde_rib.c
> > --- rde_rib.c 28 May 2026 12:53:55 -0000 1.300
> > +++ rde_rib.c 29 Jun 2026 15:14:06 -0000
> > @@ -1065,7 +1065,7 @@ prefix_flowspec_update(struct rde_peer *
> >
> > if (old != NULL)
> > old_pathid_tx = old->path_id_tx;
> > - rde_enqueue_updates(re, new, old_pathid_tx, EVAL_DEFAULT);
> > + rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_DEFAULT);
> >
> > if (old != NULL) {
> > TAILQ_REMOVE(&re->prefix_h, old, rib_l);
> > @@ -1091,7 +1091,7 @@ prefix_flowspec_withdraw(struct rde_peer
> > p = prefix_bypeer(re, peer, 0);
> > if (p == NULL)
> > return 0;
> > - rde_enqueue_updates(re, NULL, p->path_id_tx, EVAL_DEFAULT);
> > + rde_enqueue_updates(re, peer, NULL, p->path_id_tx, EVAL_DEFAULT);
> > TAILQ_REMOVE(&re->prefix_h, p, rib_l);
> > prefix_unlink(p);
> > prefix_free(p);
> >
>
--
:wq Claudio
bgpd: rework rde_enqueue_updates() to simplify the rib queue handling
bgpd: rework rde_enqueue_updates() to simplify the rib queue handling
bgpd: rework rde_enqueue_updates() to simplify the rib queue handling