Download raw body.
bgpd: rework rde_enqueue_updates() to simplify the rib queue handling
bgpd: rework rde_enqueue_updates() to simplify the rib queue handling
On Tue, Jun 30, 2026 at 09:33:50PM +0200, Claudio Jeker wrote:
> This is the next step in preparation of implementing a full rib update
> queue. This is needed to improve performance for add-path send.
>
> Add a new peer argument to rde_enqueue_updates() and use this to enqueue
> the update on. Only enqueue it once and not like now where new updates
> would requeue the update on the new peer. The effect of this needs to be
> further studied and adjusted but doing this is a lot simpler and ensures
> that order is somewhat kept. This also removes the need to enqueue
> all withdraws onto peerself which seems like a bad idea as well.
Unrelated to your diff: I was a bit confused by the doc comment above
prefix_set_dmetric() mentioning prefix_evaluate(). Perhaps we could
adjust that (maybe it's enough to simply say prefix_cmp() instead?)
Diff reads fine to me, so
ok tb
Two comments inline.
>
> --
> :wq Claudio
>
> Index: rde.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
> diff -u -p -r1.707 rde.c
> --- rde.c 24 Jun 2026 18:56:53 -0000 1.707
> +++ rde.c 29 Jun 2026 15:13:32 -0000
> @@ -4303,7 +4303,7 @@ rde_softreconfig_out(struct rib_entry *r
> /* no valid path for prefix */
> return;
>
> - rde_enqueue_updates(re, NULL, 0, EVAL_REEVAL);
> + rde_enqueue_updates(re, NULL, NULL, 0, EVAL_REEVAL);
> }
>
> static void
> Index: rde.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
> diff -u -p -r1.354 rde.h
> --- rde.h 24 Jun 2026 18:56:53 -0000 1.354
> +++ rde.h 29 Jun 2026 15:08:06 -0000
> @@ -420,8 +420,8 @@ struct rde_peer *peer_add(uint32_t, stru
> struct rde_filter *peer_apply_out_filter(struct rde_peer *,
> struct filter_head *);
>
> -void rde_enqueue_updates(struct rib_entry *, struct prefix *,
> - uint32_t, enum eval_mode);
> +void rde_enqueue_updates(struct rib_entry *, struct rde_peer *,
> + struct prefix *, uint32_t, enum eval_mode);
> void peer_process_updates(struct rde_peer *, void *);
>
> void peer_up(struct rde_peer *, struct session_up *);
> Index: rde_decide.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v
> diff -u -p -r1.108 rde_decide.c
> --- rde_decide.c 21 May 2026 15:20:27 -0000 1.108
> +++ rde_decide.c 29 Jun 2026 15:13:32 -0000
> @@ -534,6 +534,7 @@ void
> prefix_evaluate(struct rib_entry *re, struct prefix *new, struct prefix *old)
> {
> struct prefix *newbest, *oldbest;
> + struct rde_peer *peer;
> struct rib *rib;
> uint32_t old_pathid_tx = 0;
>
> @@ -553,6 +554,7 @@ prefix_evaluate(struct rib_entry *re, st
> if (old != NULL) {
> prefix_remove(old, re);
> old_pathid_tx = old->path_id_tx;
> + peer = prefix_peer(old);
> }
> if (new != NULL) {
> prefix_insert(new, NULL, re);
> @@ -560,6 +562,7 @@ prefix_evaluate(struct rib_entry *re, st
> new = NULL;
> else
> old_pathid_tx = 0;
> + peer = prefix_peer(new);
> }
> newbest = prefix_best(re);
>
> @@ -575,7 +578,7 @@ prefix_evaluate(struct rib_entry *re, st
> */
> if ((rib->flags & F_RIB_NOFIB) == 0)
> rde_send_kroute(rib, newbest, oldbest);
> - rde_enqueue_updates(re, new, old_pathid_tx, EVAL_DEFAULT);
> + rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_DEFAULT);
> return;
> }
>
> @@ -588,7 +591,7 @@ prefix_evaluate(struct rib_entry *re, st
> /* no old path to remove and path is ineligible, skip rest */
> if (old_pathid_tx == 0 && new == NULL)
> return;
> - rde_enqueue_updates(re, new, old_pathid_tx, EVAL_ALL);
> + rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_ALL);
While at this point we know old_pathid_tx != 0 || new != NULL, so we
know that peer has been initialized, this isn't super obvious and far
beyond gcc's deduction capabilities (both gcc 4.2.1 and 15.1 whine).
I don't see a good fix (if we do want to fix this). Doing peer = NULL
at the top and changing the above to
if ((old_pathid_tx == 0 && new == NULL) || peer == NULL)
return;
doesn't look like an improvement to me...
> }
> }
>
> @@ -598,6 +601,7 @@ prefix_evaluate_nexthop(struct prefix *p
> {
> struct rib_entry *re = prefix_re(p);
> struct prefix *newbest, *oldbest, *new, *old;
> + struct rde_peer *peer;
> struct rib *rib;
> uint32_t old_pathid_tx = 0;
>
> @@ -631,6 +635,8 @@ prefix_evaluate_nexthop(struct prefix *p
> oldbest = prefix_best(re);
>
> old = p;
> + peer = prefix_peer(p);
> +
> prefix_remove(old, re);
> if (prefix_eligible(old))
> old_pathid_tx = old->path_id_tx;
> @@ -665,7 +671,7 @@ prefix_evaluate_nexthop(struct prefix *p
> */
> if ((rib->flags & F_RIB_NOFIB) == 0)
> rde_send_kroute(rib, newbest, oldbest);
> - rde_enqueue_updates(re, new, old_pathid_tx, EVAL_DEFAULT);
> + rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_DEFAULT);
> return;
> }
>
> @@ -675,5 +681,5 @@ prefix_evaluate_nexthop(struct prefix *p
> * rde_enqueue_updates() will then take care of distribution.
> */
> if (rde_evaluate_all())
> - rde_enqueue_updates(re, new, old_pathid_tx, EVAL_ALL);
> + rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_ALL);
> }
> Index: rde_peer.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v
> diff -u -p -r1.76 rde_peer.c
> --- rde_peer.c 24 Jun 2026 18:56:53 -0000 1.76
> +++ rde_peer.c 30 Jun 2026 19:27:08 -0000
> @@ -297,12 +297,20 @@ peer_generate_update(struct rde_peer *pe
> up_generate_updates(peer, re, force_update);
> }
>
> +/*
> + * Enqueue updates into the peer queue specified by peer. The meaning of
> + * mode is:
> + * EVAL_DEFAULT is triggered when the best path changes.
> + * EVAL_ALL is sent for any other update (needed for peers with
> + * addpath or evaluate all set).
> + * EVAL_REEVAL is used by config reloads (a full RIB refresh is needed)
> + * and for single peer RIB dumps but those call peer_generate_update()
> + * directly.
> + */
> void
> -rde_enqueue_updates(struct rib_entry *re, struct prefix *newpath,
> - uint32_t old_pathid_tx, enum eval_mode mode)
> +rde_enqueue_updates(struct rib_entry *re, struct rde_peer *peer,
> + struct prefix *newpath, uint32_t old_pathid_tx, enum eval_mode mode)
> {
> - struct rde_peer *peer;
> -
> switch (mode) {
> case EVAL_REEVAL:
> /* skip peers which don't need to reconfigure */
Maybe that's just me, but using the function argument to iterate over the
peers is a bit ugly.
> @@ -313,40 +321,26 @@ rde_enqueue_updates(struct rib_entry *re
> }
> return;
> case EVAL_DEFAULT:
> - break;
> case EVAL_ALL:
> - /*
> - * EVAL_DEFAULT is triggered when a new best path is selected.
> - * EVAL_ALL is sent for any other update (needed for peers with
> - * addpath or evaluate all set).
> - * There can be only one EVAL_DEFAULT queued, it replaces the
> - * previous one. A flag is enough.
> - * A path can only exist once in the queue (old or new).
> - */
> - if (re->pq_mode == EVAL_DEFAULT)
> - /* already a best path update pending, nothing to do */
> - return;
> -
> break;
> case EVAL_NONE:
> fatalx("bad eval mode in %s", __func__);
> }
>
> - if (re->pq_mode != EVAL_NONE) {
> - peer = peer_get(re->pq_peer_id);
> - TAILQ_REMOVE(&peer->rib_pq_head, re, rib_queue);
> - rdemem.rde_rib_entry_count--;
> - peer->stats.rib_entry_count--;
> + /*
> + * Enqueue only once, may need some reconsideration if queue
> + * length of individual peers becomes excessivly long.
> + */
> + if (re->pq_mode == EVAL_NONE) {
> + re->pq_peer_id = peer->conf.id;
> + TAILQ_INSERT_TAIL(&peer->rib_pq_head, re, rib_queue);
> + rdemem.rde_rib_entry_count++;
> + peer->stats.rib_entry_count++;
> }
> - if (newpath != NULL)
> - peer = prefix_peer(newpath);
> - else
> - peer = peerself;
> - re->pq_mode = mode;
> - re->pq_peer_id = peer->conf.id;
> - TAILQ_INSERT_TAIL(&peer->rib_pq_head, re, rib_queue);
> - rdemem.rde_rib_entry_count++;
> - peer->stats.rib_entry_count++;
> +
> + /* don't downgrade pq_mode from EVAL_DEFAULT to EVAL_ALL */
> + if (re->pq_mode != EVAL_DEFAULT)
> + re->pq_mode = mode;
> }
>
> void
> @@ -354,7 +348,6 @@ peer_process_updates(struct rde_peer *pe
> {
> struct rib_entry *re;
> struct rde_peer *p;
> - enum eval_mode mode;
>
> re = TAILQ_FIRST(&peer->rib_pq_head);
> if (re == NULL)
> @@ -363,10 +356,8 @@ peer_process_updates(struct rde_peer *pe
> rdemem.rde_rib_entry_count--;
> peer->stats.rib_entry_count--;
>
> - mode = re->pq_mode;
> -
> RB_FOREACH(p, peer_tree, &peertable)
> - peer_generate_update(p, re, mode, 0);
> + peer_generate_update(p, re, re->pq_mode, 0);
>
> rib_dequeue(re);
> }
> Index: rde_rib.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
> diff -u -p -r1.300 rde_rib.c
> --- rde_rib.c 28 May 2026 12:53:55 -0000 1.300
> +++ rde_rib.c 29 Jun 2026 15:14:06 -0000
> @@ -1065,7 +1065,7 @@ prefix_flowspec_update(struct rde_peer *
>
> if (old != NULL)
> old_pathid_tx = old->path_id_tx;
> - rde_enqueue_updates(re, new, old_pathid_tx, EVAL_DEFAULT);
> + rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_DEFAULT);
>
> if (old != NULL) {
> TAILQ_REMOVE(&re->prefix_h, old, rib_l);
> @@ -1091,7 +1091,7 @@ prefix_flowspec_withdraw(struct rde_peer
> p = prefix_bypeer(re, peer, 0);
> if (p == NULL)
> return 0;
> - rde_enqueue_updates(re, NULL, p->path_id_tx, EVAL_DEFAULT);
> + rde_enqueue_updates(re, peer, NULL, p->path_id_tx, EVAL_DEFAULT);
> TAILQ_REMOVE(&re->prefix_h, p, rib_l);
> prefix_unlink(p);
> prefix_free(p);
>
bgpd: rework rde_enqueue_updates() to simplify the rib queue handling
bgpd: rework rde_enqueue_updates() to simplify the rib queue handling