Download raw body.
bgpd: cache Adj-RIB-Out when a peer goes down
On Thu, Dec 12, 2024 at 01:46:33PM +0100, Claudio Jeker wrote:
> When a session goes down there is no real reason to remove the Adj-RIB-Out
> instead it is actually better to keep it synced so that when the peer
> reconnects there is no need to repopulate the full thing.
>
> This diff does exactly that. It caches the Adj-RIB-Out for up to 1h.
> Now when the session is reestablished the peer settings are checked to see
> if any params that matter changed. If so a full sync is needed else the
> RDE can just start blasting out the RIB.
>
> For this introduce a IMSG_SESSION_DELETE that tells the RDE to remove the
> peer and split peer_down into a part that takes the session down (and
> clears the Adj-RIB-In) and a part the frees the peer (peer_delete).
> The SE now sends an IMSG_SESSION_ADD command on first connect and skips
> that imsg on later connects unless IMSG_SESSION_DELETE was called before.
> During config reload the IMSG_SESSION_ADD calls only need to happen when
> the RDE actually has that information. For all this to work I added a
> rdesession member to struct peer to keep track of the RDE session state.
>
> Most of the heavy lifting was already committed so this diff is reasonably
> simple :)
> --
> :wq Claudio
Thanks for going the extra mile of making this pleasant and reasonably
easy to review. Much appreciated.
There's one really stupid typo/bug below, otherwise ok.
> Index: bgpd.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
> diff -u -p -r1.504 bgpd.h
> --- bgpd.h 11 Dec 2024 09:19:44 -0000 1.504
> +++ bgpd.h 11 Dec 2024 09:20:17 -0000
> @@ -683,6 +683,7 @@ enum imsg_type {
> IMSG_SESSION_ADD,
> IMSG_SESSION_UP,
> IMSG_SESSION_DOWN,
> + IMSG_SESSION_DELETE,
> IMSG_SESSION_STALE,
> IMSG_SESSION_NOGRACE,
> IMSG_SESSION_FLUSH,
> Index: rde.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
> diff -u -p -r1.645 rde.c
> --- rde.c 11 Dec 2024 09:19:44 -0000 1.645
> +++ rde.c 11 Dec 2024 10:34:35 -0000
> @@ -433,6 +433,12 @@ rde_dispatch_imsg_session(struct imsgbuf
> }
> peer_down(peer);
> break;
> + case IMSG_SESSION_DELETE:
> + /* silently ignore deletes for unknown peers */
> + if ((peer = peer_get(peerid)) == NULL)
> + break;
> + peer_delete(peer);
> + break;
> case IMSG_SESSION_STALE:
> case IMSG_SESSION_NOGRACE:
> case IMSG_SESSION_FLUSH:
> Index: rde.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
> diff -u -p -r1.308 rde.h
> --- rde.h 11 Dec 2024 09:19:44 -0000 1.308
> +++ rde.h 11 Dec 2024 09:20:52 -0000
> @@ -289,7 +289,7 @@ struct prefix {
> #define PREFIX_FLAG_WITHDRAW 0x0001 /* enqueued on withdraw queue */
> #define PREFIX_FLAG_UPDATE 0x0002 /* enqueued on update queue */
> #define PREFIX_FLAG_DEAD 0x0004 /* locked but removed */
> -#define PREFIX_FLAG_STALE 0x0008 /* stale entry (graceful reload) */
> +#define PREFIX_FLAG_STALE 0x0008 /* stale entry (for addpath) */
> #define PREFIX_FLAG_MASK 0x000f /* mask for the prefix types */
> #define PREFIX_FLAG_ADJOUT 0x0010 /* prefix is in the adj-out rib */
> #define PREFIX_FLAG_EOR 0x0020 /* prefix is EoR */
> @@ -369,6 +369,7 @@ void rde_generate_updates(struct rib_e
>
> void peer_up(struct rde_peer *, struct session_up *);
> void peer_down(struct rde_peer *);
> +void peer_delete(struct rde_peer *);
> void peer_flush(struct rde_peer *, uint8_t, time_t);
> void peer_stale(struct rde_peer *, uint8_t, int);
> void peer_blast(struct rde_peer *, uint8_t);
> @@ -603,6 +604,7 @@ void prefix_adjout_update(struct prefi
> struct filterstate *, struct pt_entry *, uint32_t);
> void prefix_adjout_withdraw(struct prefix *);
> void prefix_adjout_destroy(struct prefix *);
> +void prefix_adjout_flush_pending(struct rde_peer *);
> int prefix_adjout_reaper(struct rde_peer *);
> int prefix_dump_new(struct rde_peer *, uint8_t, unsigned int,
> void *, void (*)(struct prefix *, void *),
> Index: rde_peer.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v
> diff -u -p -r1.41 rde_peer.c
> --- rde_peer.c 11 Dec 2024 09:19:44 -0000 1.41
> +++ rde_peer.c 11 Dec 2024 09:22:44 -0000
> @@ -82,7 +82,7 @@ peer_shutdown(void)
> struct rde_peer *peer, *np;
>
> RB_FOREACH_SAFE(peer, peer_tree, &peertable, np)
> - peer_down(peer);
> + peer_delete(peer);
>
> while (!RB_EMPTY(&zombietable))
> peer_reaper(NULL);
> @@ -241,7 +241,8 @@ peer_generate_update(struct rde_peer *pe
> /* skip ourself */
> if (peer == peerself)
> return;
> - if (!peer_is_up(peer))
> + /* skip peers that never had a session open */
> + if (peer->state == PEER_NONE)
> return;
> /* skip peers using a different rib */
> if (peer->loc_rib_id != re->rib_id)
> @@ -286,28 +287,6 @@ rde_generate_updates(struct rib_entry *r
> /*
> * Various RIB walker callbacks.
> */
> -static void
> -peer_adjout_clear_upcall(struct prefix *p, void *arg)
> -{
> - prefix_adjout_destroy(p);
> -}
> -
> -static void
> -peer_adjout_stale_upcall(struct prefix *p, void *arg)
> -{
> - if (p->flags & PREFIX_FLAG_DEAD) {
> - return;
> - } else if (p->flags & PREFIX_FLAG_WITHDRAW) {
> - /* no need to keep stale withdraws, they miss all attributes */
> - prefix_adjout_destroy(p);
> - return;
> - } else if (p->flags & PREFIX_FLAG_UPDATE) {
> - RB_REMOVE(prefix_tree, &prefix_peer(p)->updates[p->pt->aid], p);
> - p->flags &= ~PREFIX_FLAG_UPDATE;
> - }
> - p->flags |= PREFIX_FLAG_STALE;
> -}
> -
> struct peer_flush {
> struct rde_peer *peer;
> time_t staletime;
> @@ -361,6 +340,7 @@ void
> peer_up(struct rde_peer *peer, struct session_up *sup)
> {
> uint8_t i;
> + int force_sync = 1;
>
> if (peer->state == PEER_ERR) {
> /*
> @@ -369,21 +349,33 @@ peer_up(struct rde_peer *peer, struct se
> */
> rib_dump_terminate(peer);
> peer_imsg_flush(peer);
> - if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL,
> - peer_adjout_clear_upcall, NULL, NULL) == -1)
> - fatal("%s: prefix_dump_new", __func__);
> peer_flush(peer, AID_UNSPEC, 0);
> peer->stats.prefix_cnt = 0;
> - peer->stats.prefix_out_cnt = 0;
> peer->state = PEER_DOWN;
> }
> - peer->remote_bgpid = sup->remote_bgpid;
> - peer->short_as = sup->short_as;
> +
> + /*
> + * Check if no value changed during flap to decide if the RIB
> + * is in sync. The capa check is maybe too strict but it should
> + * not matter for normal operation.
> + */
> + if (memcmp(&peer->remote_addr, &sup->remote_addr,
> + sizeof(sup->remote_addr)) == 0 &&
> + memcmp(&peer->local_v4_addr, &sup->local_v4_addr,
> + sizeof(sup->local_v4_addr)) == 0 &&
> + memcmp(&peer->local_v6_addr, &sup->local_v6_addr,
> + sizeof(sup->local_v6_addr)) == 0 &&
> + memcpy(&peer->capa, &sup->capa, sizeof(peer->capa)) == 0)
memcmp, not memcpy. Also use sizeof(sup->capa) to match the others.
> + force_sync = 0;
> +
> peer->remote_addr = sup->remote_addr;
> peer->local_v4_addr = sup->local_v4_addr;
> peer->local_v6_addr = sup->local_v6_addr;
> - peer->local_if_scope = sup->if_scope;
> memcpy(&peer->capa, &sup->capa, sizeof(peer->capa));
> + /* the Adj-RIB-Out does not depend on those */
> + peer->remote_bgpid = sup->remote_bgpid;
> + peer->local_if_scope = sup->if_scope;
> + peer->short_as = sup->short_as;
>
> /* clear eor markers depending on GR flags */
> if (peer->capa.grestart.restart) {
> @@ -396,9 +388,16 @@ peer_up(struct rde_peer *peer, struct se
> }
> peer->state = PEER_UP;
>
> - for (i = AID_MIN; i < AID_MAX; i++) {
> - if (peer->capa.mp[i])
> - peer_dump(peer, i);
> + if (!force_sync) {
> + for (i = AID_MIN; i < AID_MAX; i++) {
> + if (peer->capa.mp[i])
> + peer_blast(peer, i);
> + }
> + } else {
> + for (i = AID_MIN; i < AID_MAX; i++) {
> + if (peer->capa.mp[i])
> + peer_dump(peer, i);
> + }
> }
> }
>
> @@ -416,16 +415,24 @@ peer_down(struct rde_peer *peer)
> * and flush all pending imsg from the SE.
> */
> rib_dump_terminate(peer);
> + prefix_adjout_flush_pending(peer);
> peer_imsg_flush(peer);
>
> /* flush Adj-RIB-In */
> peer_flush(peer, AID_UNSPEC, 0);
> peer->stats.prefix_cnt = 0;
> +}
> +
> +void
> +peer_delete(struct rde_peer *peer)
> +{
> + if (peer->state != PEER_DOWN)
> + peer_down(peer);
>
> /* free filters */
> filterlist_free(peer->out_rules);
> - RB_REMOVE(peer_tree, &peertable, peer);
>
> + RB_REMOVE(peer_tree, &peertable, peer);
> while (RB_INSERT(peer_tree, &zombietable, peer) != NULL) {
> log_warnx("zombie peer conflict");
> peer->conf.id = arc4random();
> @@ -481,17 +488,12 @@ peer_stale(struct rde_peer *peer, uint8_
> * and flush all pending imsg from the SE.
> */
> rib_dump_terminate(peer);
> + prefix_adjout_flush_pending(peer);
> peer_imsg_flush(peer);
>
> if (flushall)
> peer_flush(peer, aid, 0);
>
> - /* XXX this is not quite correct */
> - /* mark Adj-RIB-Out stale for this peer */
> - if (prefix_dump_new(peer, aid, 0, NULL,
> - peer_adjout_stale_upcall, NULL, NULL) == -1)
> - fatal("%s: prefix_dump_new", __func__);
> -
> /* make sure new prefixes start on a higher timestamp */
> while (now >= getmonotime())
> sleep(1);
> @@ -504,10 +506,7 @@ peer_stale(struct rde_peer *peer, uint8_
> static void
> peer_blast_upcall(struct prefix *p, void *ptr)
> {
> - if (p->flags & PREFIX_FLAG_STALE) {
> - /* remove stale entries */
> - prefix_adjout_destroy(p);
> - } else if (p->flags & PREFIX_FLAG_DEAD) {
> + if (p->flags & PREFIX_FLAG_DEAD) {
> /* ignore dead prefixes, they will go away soon */
> } else if ((p->flags & PREFIX_FLAG_MASK) == 0) {
> /* put entries on the update queue if not already on a queue */
> Index: rde_rib.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
> diff -u -p -r1.265 rde_rib.c
> --- rde_rib.c 11 Dec 2024 09:19:44 -0000 1.265
> +++ rde_rib.c 11 Dec 2024 09:23:04 -0000
> @@ -1273,7 +1273,6 @@ prefix_adjout_update(struct prefix *p, s
> /* nothing changed */
> p->validation_state = state->vstate;
> p->lastchange = getmonotime();
> - p->flags &= ~PREFIX_FLAG_STALE;
> return;
> }
>
> @@ -1344,7 +1343,6 @@ prefix_adjout_withdraw(struct prefix *p)
> /* already a withdraw, shortcut */
> if (p->flags & PREFIX_FLAG_WITHDRAW) {
> p->lastchange = getmonotime();
> - p->flags &= ~PREFIX_FLAG_STALE;
> return;
> }
> /* pending update just got withdrawn */
> @@ -1414,6 +1412,24 @@ prefix_adjout_destroy(struct prefix *p)
> /* remove the last prefix reference before free */
> pt_unref(p->pt);
> prefix_free(p);
> + }
> +}
> +
> +void
> +prefix_adjout_flush_pending(struct rde_peer *peer)
> +{
> + struct prefix *p, *np;
> + uint8_t aid;
> +
> + for (aid = AID_MIN; aid < AID_MAX; aid++) {
> + RB_FOREACH_SAFE(p, prefix_tree, &peer->withdraws[aid], np) {
> + prefix_adjout_destroy(p);
> + }
> + RB_FOREACH_SAFE(p, prefix_tree, &peer->updates[aid], np) {
> + p->flags &= ~PREFIX_FLAG_UPDATE;
> + RB_REMOVE(prefix_tree, &peer->updates[aid], p);
> + peer->stats.pending_update--;
> + }
> }
> }
>
> Index: session.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
> diff -u -p -r1.502 session.c
> --- session.c 10 Dec 2024 14:34:51 -0000 1.502
> +++ session.c 12 Dec 2024 10:49:44 -0000
> @@ -270,6 +270,9 @@ session_main(int debug, int verbose)
> NULL);
> timer_remove_all(&p->timers);
> tcp_md5_del_listener(conf, p);
> + if (imsg_rde(IMSG_SESSION_DELETE,
> + p->conf.id, NULL, 0) == -1)
> + fatalx("imsg_compose error");
> msgbuf_free(p->wbuf);
> RB_REMOVE(peer_head, &conf->peers, p);
> log_peer_warnx(&p->conf, "removed");
> @@ -405,6 +408,12 @@ session_main(int debug, int verbose)
> case Timer_SessionDown:
> timer_stop(&p->timers,
> Timer_SessionDown);
> +
> + if (imsg_rde(IMSG_SESSION_DELETE,
> + p->conf.id, NULL, 0) == -1)
> + fatalx("imsg_compose error");
> + p->rdesession = 0;
> +
> /* finally delete this cloned peer */
> if (p->template)
> p->reconf_action =
> @@ -3462,9 +3471,13 @@ session_up(struct peer *p)
>
> timer_stop(&p->timers, Timer_SessionDown);
>
> - if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
> - &p->conf, sizeof(p->conf)) == -1)
> - fatalx("imsg_compose error");
> + if (!p->rdesession) {
> + /* inform rde about new peer */
> + if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
> + &p->conf, sizeof(p->conf)) == -1)
> + fatalx("imsg_compose error");
> + p->rdesession = 1;
> + }
>
> if (p->local.aid == AID_INET) {
> sup.local_v4_addr = p->local;
> @@ -3632,10 +3645,15 @@ merge_peers(struct bgpd_config *c, struc
> imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
> p->conf.id, 0, -1, NULL, 0);
>
> - /* sync the RDE in case we keep the peer */
> - if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
> - &p->conf, sizeof(struct peer_config)) == -1)
> - fatalx("imsg_compose error");
> + /*
> + * If the session is established or the SessionDown timer is
> + * running sync with the RDE
> + */
> + if (p->rdesession) {
> + if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
> + &p->conf, sizeof(struct peer_config)) == -1)
> + fatalx("imsg_compose error");
> + }
>
> /* apply the config to all clones of a template */
> if (p->conf.template) {
> @@ -3645,9 +3663,13 @@ merge_peers(struct bgpd_config *c, struc
> continue;
> session_template_clone(xp, NULL, xp->conf.id,
> xp->conf.remote_as);
> - if (imsg_rde(IMSG_SESSION_ADD, xp->conf.id,
> - &xp->conf, sizeof(xp->conf)) == -1)
> - fatalx("imsg_compose error");
> +
> + if (p->rdesession) {
> + if (imsg_rde(IMSG_SESSION_ADD,
> + xp->conf.id, &xp->conf,
> + sizeof(xp->conf)) == -1)
> + fatalx("imsg_compose error");
> + }
> }
> }
> }
> Index: session.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/session.h,v
> diff -u -p -r1.181 session.h
> --- session.h 10 Dec 2024 14:34:51 -0000 1.181
> +++ session.h 12 Dec 2024 10:47:14 -0000
> @@ -228,6 +228,7 @@ struct peer {
> uint8_t passive;
> uint8_t throttled;
> uint8_t rpending;
> + uint8_t rdesession;
> };
>
> extern time_t pauseaccept;
>
bgpd: cache Adj-RIB-Out when a peer goes down