Index | Thread | Search

From:
Claudio Jeker <cjeker@diehard.n-r-g.com>
Subject:
Re: bgpd: rework rde_enqueue_updates() to simplify the rib queue handling
To:
Theo Buehler <tb@theobuehler.org>
Cc:
tech@openbsd.org
Date:
Wed, 1 Jul 2026 10:17:30 +0200

Download raw body.

Thread
On Wed, Jul 01, 2026 at 08:13:23AM +0200, Theo Buehler wrote:
> On Tue, Jun 30, 2026 at 09:33:50PM +0200, Claudio Jeker wrote:
> > This is the next step in preparation of implementing a full rib update
> > queue. This is needed to improve performance for add-path send.
> > 
> > Add a new peer argument to rde_enqueue_updates() and use this to enqueue
> > the update on. Only enqueue it once and not like now where new updates
> > would requeue the update on the new peer. The effect of this needs to be
> > further studied and adjusted but doing this is a lot simpler and ensures
> > that order is somewhat kept. This also removes the need to enqueue
> > all withdraws onto peerself which seems like a bad idea as well.
> 
> Unrelated to your diff: I was a bit confused by the doc comment above
> prefix_set_dmetric() mentioning prefix_evaluate(). Perhaps we could
> adjust that (maybe it's enough to simply say prefix_cmp() instead?)

Will look into this but as a followup.
 
> Diff reads fine to me, so 
> 
> ok tb
> 
> Two comments inline.
> 
> > 
> > -- 
> > :wq Claudio
> > 
> > Index: rde.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
> > diff -u -p -r1.707 rde.c
> > --- rde.c	24 Jun 2026 18:56:53 -0000	1.707
> > +++ rde.c	29 Jun 2026 15:13:32 -0000
> > @@ -4303,7 +4303,7 @@ rde_softreconfig_out(struct rib_entry *r
> >  		/* no valid path for prefix */
> >  		return;
> >  
> > -	rde_enqueue_updates(re, NULL, 0, EVAL_REEVAL);
> > +	rde_enqueue_updates(re, NULL, NULL, 0, EVAL_REEVAL);
> >  }
> >  
> >  static void
> > Index: rde.h
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
> > diff -u -p -r1.354 rde.h
> > --- rde.h	24 Jun 2026 18:56:53 -0000	1.354
> > +++ rde.h	29 Jun 2026 15:08:06 -0000
> > @@ -420,8 +420,8 @@ struct rde_peer	*peer_add(uint32_t, stru
> >  struct rde_filter	*peer_apply_out_filter(struct rde_peer *,
> >  			    struct filter_head *);
> >  
> > -void		 rde_enqueue_updates(struct rib_entry *, struct prefix *,
> > -		    uint32_t, enum eval_mode);
> > +void		 rde_enqueue_updates(struct rib_entry *, struct rde_peer *,
> > +		    struct prefix *, uint32_t, enum eval_mode);
> >  void		 peer_process_updates(struct rde_peer *, void *);
> >  
> >  void		 peer_up(struct rde_peer *, struct session_up *);
> > Index: rde_decide.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v
> > diff -u -p -r1.108 rde_decide.c
> > --- rde_decide.c	21 May 2026 15:20:27 -0000	1.108
> > +++ rde_decide.c	29 Jun 2026 15:13:32 -0000
> > @@ -534,6 +534,7 @@ void
> >  prefix_evaluate(struct rib_entry *re, struct prefix *new, struct prefix *old)
> >  {
> >  	struct prefix	*newbest, *oldbest;
> > +	struct rde_peer	*peer;
> >  	struct rib	*rib;
> >  	uint32_t	 old_pathid_tx = 0;
> >  
> > @@ -553,6 +554,7 @@ prefix_evaluate(struct rib_entry *re, st
> >  	if (old != NULL) {
> >  		prefix_remove(old, re);
> >  		old_pathid_tx = old->path_id_tx;
> > +		peer = prefix_peer(old);
> >  	}
> >  	if (new != NULL) {
> >  		prefix_insert(new, NULL, re);
> > @@ -560,6 +562,7 @@ prefix_evaluate(struct rib_entry *re, st
> >  			new = NULL;
> >  		else
> >  			old_pathid_tx = 0;
> > +		peer = prefix_peer(new);
> >  	}
> >  	newbest = prefix_best(re);
> >  
> > @@ -575,7 +578,7 @@ prefix_evaluate(struct rib_entry *re, st
> >  		 */
> >  		if ((rib->flags & F_RIB_NOFIB) == 0)
> >  			rde_send_kroute(rib, newbest, oldbest);
> > -		rde_enqueue_updates(re, new, old_pathid_tx, EVAL_DEFAULT);
> > +		rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_DEFAULT);
> >  		return;
> >  	}
> >  
> > @@ -588,7 +591,7 @@ prefix_evaluate(struct rib_entry *re, st
> >  		/* no old path to remove and path is ineligible, skip rest */
> >  		if (old_pathid_tx == 0 && new == NULL)
> >  			return;
> > -		rde_enqueue_updates(re, new, old_pathid_tx, EVAL_ALL);
> > +		rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_ALL);
> 
> While at this point we know old_pathid_tx != 0 || new != NULL, so we
> know that peer has been initialized, this isn't super obvious and far
> beyond gcc's deduction capabilities (both gcc 4.2.1 and 15.1 whine).
> 
> I don't see a good fix (if we do want to fix this). Doing peer = NULL
> at the top and changing the above to
> 
>  		if ((old_pathid_tx == 0 && new == NULL) || peer == NULL)
> 			return;
> 
> doesn't look like an improvement to me...

Hmmm. I'll have a closer look. We certainly can initalize peer = NULL at
the top. Not sure if we need to check for peer == NULL though.
 
> >  	}
> >  }
> >  
> > @@ -598,6 +601,7 @@ prefix_evaluate_nexthop(struct prefix *p
> >  {
> >  	struct rib_entry *re = prefix_re(p);
> >  	struct prefix	*newbest, *oldbest, *new, *old;
> > +	struct rde_peer	*peer;
> >  	struct rib	*rib;
> >  	uint32_t	 old_pathid_tx = 0;
> >  
> > @@ -631,6 +635,8 @@ prefix_evaluate_nexthop(struct prefix *p
> >  	oldbest = prefix_best(re);
> >  
> >  	old = p;
> > +	peer = prefix_peer(p);
> > +
> >  	prefix_remove(old, re);
> >  	if (prefix_eligible(old))
> >  		old_pathid_tx = old->path_id_tx;
> > @@ -665,7 +671,7 @@ prefix_evaluate_nexthop(struct prefix *p
> >  		 */
> >  		if ((rib->flags & F_RIB_NOFIB) == 0)
> >  			rde_send_kroute(rib, newbest, oldbest);
> > -		rde_enqueue_updates(re, new, old_pathid_tx, EVAL_DEFAULT);
> > +		rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_DEFAULT);
> >  		return;
> >  	}
> >  
> > @@ -675,5 +681,5 @@ prefix_evaluate_nexthop(struct prefix *p
> >  	 * rde_enqueue_updates() will then take care of distribution.
> >  	 */
> >  	if (rde_evaluate_all())
> > -		rde_enqueue_updates(re, new, old_pathid_tx, EVAL_ALL);
> > +		rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_ALL);
> >  }
> > Index: rde_peer.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v
> > diff -u -p -r1.76 rde_peer.c
> > --- rde_peer.c	24 Jun 2026 18:56:53 -0000	1.76
> > +++ rde_peer.c	30 Jun 2026 19:27:08 -0000
> > @@ -297,12 +297,20 @@ peer_generate_update(struct rde_peer *pe
> >  	up_generate_updates(peer, re, force_update);
> >  }
> >  
> > +/*
> > + * Enqueue updates into the peer queue specified by peer. The meaning of
> > + * mode is:
> > + *	EVAL_DEFAULT is triggered when the best path changes.
> > + *	EVAL_ALL is sent for any other update (needed for peers with
> > + *	addpath or evaluate all set).
> > + *	EVAL_REEVAL is used by config reloads (a full RIB refresh is needed)
> > + *	and for single peer RIB dumps but those call peer_generate_update()
> > + *	directly.
> > + */
> >  void
> > -rde_enqueue_updates(struct rib_entry *re, struct prefix *newpath,
> > -    uint32_t old_pathid_tx, enum eval_mode mode)
> > +rde_enqueue_updates(struct rib_entry *re, struct rde_peer *peer,
> > +    struct prefix *newpath, uint32_t old_pathid_tx, enum eval_mode mode)
> >  {
> > -	struct rde_peer	*peer;
> > -
> >  	switch (mode) {
> >  	case EVAL_REEVAL:
> >  		/* skip peers which don't need to reconfigure */
> 
> Maybe that's just me, but using the function argument to iterate over the
> peers is a bit ugly.

Kind of agree, I was just lazy :)
 
> > @@ -313,40 +321,26 @@ rde_enqueue_updates(struct rib_entry *re
> >  		}
> >  		return;
> >  	case EVAL_DEFAULT:
> > -		break;
> >  	case EVAL_ALL:
> > -		/*
> > -		 * EVAL_DEFAULT is triggered when a new best path is selected.
> > -		 * EVAL_ALL is sent for any other update (needed for peers with
> > -		 * addpath or evaluate all set).
> > -		 * There can be only one EVAL_DEFAULT queued, it replaces the
> > -		 * previous one. A flag is enough.
> > -		 * A path can only exist once in the queue (old or new).
> > -		 */
> > -		if (re->pq_mode == EVAL_DEFAULT)
> > -			/* already a best path update pending, nothing to do */
> > -			return;
> > -
> >  		break;
> >  	case EVAL_NONE:
> >  		fatalx("bad eval mode in %s", __func__);
> >  	}
> >  
> > -	if (re->pq_mode != EVAL_NONE) {
> > -		peer = peer_get(re->pq_peer_id);
> > -		TAILQ_REMOVE(&peer->rib_pq_head, re, rib_queue);
> > -		rdemem.rde_rib_entry_count--;
> > -		peer->stats.rib_entry_count--;
> > +	/*
> > +	 * Enqueue only once, may need some reconsideration if queue
> > +	 * length of individual peers becomes excessivly long.
> > +	 */
> > +	if (re->pq_mode == EVAL_NONE) {
> > +		re->pq_peer_id = peer->conf.id;
> > +		TAILQ_INSERT_TAIL(&peer->rib_pq_head, re, rib_queue);
> > +		rdemem.rde_rib_entry_count++;
> > +		peer->stats.rib_entry_count++;
> >  	}
> > -	if (newpath != NULL)
> > -		peer = prefix_peer(newpath);
> > -	else
> > -		peer = peerself;
> > -	re->pq_mode = mode;
> > -	re->pq_peer_id = peer->conf.id;
> > -	TAILQ_INSERT_TAIL(&peer->rib_pq_head, re, rib_queue);
> > -	rdemem.rde_rib_entry_count++;
> > -	peer->stats.rib_entry_count++;
> > +
> > +	/* don't downgrade pq_mode from EVAL_DEFAULT to EVAL_ALL */
> > +	if (re->pq_mode != EVAL_DEFAULT)
> > +		re->pq_mode = mode;
> >  }
> >  
> >  void
> > @@ -354,7 +348,6 @@ peer_process_updates(struct rde_peer *pe
> >  {
> >  	struct rib_entry *re;
> >  	struct rde_peer *p;
> > -	enum eval_mode mode;
> >  
> >  	re = TAILQ_FIRST(&peer->rib_pq_head);
> >  	if (re == NULL)
> > @@ -363,10 +356,8 @@ peer_process_updates(struct rde_peer *pe
> >  	rdemem.rde_rib_entry_count--;
> >  	peer->stats.rib_entry_count--;
> >  
> > -	mode = re->pq_mode;
> > -
> >  	RB_FOREACH(p, peer_tree, &peertable)
> > -		peer_generate_update(p, re, mode, 0);
> > +		peer_generate_update(p, re, re->pq_mode, 0);
> >  
> >  	rib_dequeue(re);
> >  }
> > Index: rde_rib.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
> > diff -u -p -r1.300 rde_rib.c
> > --- rde_rib.c	28 May 2026 12:53:55 -0000	1.300
> > +++ rde_rib.c	29 Jun 2026 15:14:06 -0000
> > @@ -1065,7 +1065,7 @@ prefix_flowspec_update(struct rde_peer *
> >  
> >  	if (old != NULL)
> >  		old_pathid_tx = old->path_id_tx;
> > -	rde_enqueue_updates(re, new, old_pathid_tx, EVAL_DEFAULT);
> > +	rde_enqueue_updates(re, peer, new, old_pathid_tx, EVAL_DEFAULT);
> >  
> >  	if (old != NULL) {
> >  		TAILQ_REMOVE(&re->prefix_h, old, rib_l);
> > @@ -1091,7 +1091,7 @@ prefix_flowspec_withdraw(struct rde_peer
> >  	p = prefix_bypeer(re, peer, 0);
> >  	if (p == NULL)
> >  		return 0;
> > -	rde_enqueue_updates(re, NULL, p->path_id_tx, EVAL_DEFAULT);
> > +	rde_enqueue_updates(re, peer, NULL, p->path_id_tx, EVAL_DEFAULT);
> >  	TAILQ_REMOVE(&re->prefix_h, p, rib_l);
> >  	prefix_unlink(p);
> >  	prefix_free(p);
> > 
> 

-- 
:wq Claudio