From: Theo Buehler Subject: Re: bgpd: cache Adj-RIB-Out when a peer goes down To: tech@openbsd.org Date: Thu, 12 Dec 2024 14:34:57 +0100 On Thu, Dec 12, 2024 at 01:46:33PM +0100, Claudio Jeker wrote: > When a session goes down there is no real reason to remove the Adj-RIB-Out > instead it is actually better to keep it synced so that when the peer > reconnects there is no need to repopulate the full thing. > > This diff does exactly that. It caches the Adj-RIB-Out for up to 1h. > Now when the session is reestablished the peer settings are checked to see > if any params that matter changed. If so a full sync is needed else the > RDE can just start blasting out the RIB. > > For this introduce a IMSG_SESSION_DELETE that tells the RDE to remove the > peer and split peer_down into a part that takes the session down (and > clears the Adj-RIB-In) and a part the frees the peer (peer_delete). > The SE now sends an IMSG_SESSION_ADD command on first connect and skips > that imsg on later connects unless IMSG_SESSION_DELETE was called before. > During config reload the IMSG_SESSION_ADD calls only need to happen when > the RDE actually has that information. For all this to work I added a > rdesession member to struct peer to keep track of the RDE session state. > > Most of the heavy lifting was already committed so this diff is reasonably > simple :) > -- > :wq Claudio Thanks for going the extra mile of making this pleasant and reasonably easy to review. Much appreciated. There's one really stupid typo/bug below, otherwise ok. > Index: bgpd.h > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v > diff -u -p -r1.504 bgpd.h > --- bgpd.h 11 Dec 2024 09:19:44 -0000 1.504 > +++ bgpd.h 11 Dec 2024 09:20:17 -0000 > @@ -683,6 +683,7 @@ enum imsg_type { > IMSG_SESSION_ADD, > IMSG_SESSION_UP, > IMSG_SESSION_DOWN, > + IMSG_SESSION_DELETE, > IMSG_SESSION_STALE, > IMSG_SESSION_NOGRACE, > IMSG_SESSION_FLUSH, > Index: rde.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v > diff -u -p -r1.645 rde.c > --- rde.c 11 Dec 2024 09:19:44 -0000 1.645 > +++ rde.c 11 Dec 2024 10:34:35 -0000 > @@ -433,6 +433,12 @@ rde_dispatch_imsg_session(struct imsgbuf > } > peer_down(peer); > break; > + case IMSG_SESSION_DELETE: > + /* silently ignore deletes for unknown peers */ > + if ((peer = peer_get(peerid)) == NULL) > + break; > + peer_delete(peer); > + break; > case IMSG_SESSION_STALE: > case IMSG_SESSION_NOGRACE: > case IMSG_SESSION_FLUSH: > Index: rde.h > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v > diff -u -p -r1.308 rde.h > --- rde.h 11 Dec 2024 09:19:44 -0000 1.308 > +++ rde.h 11 Dec 2024 09:20:52 -0000 > @@ -289,7 +289,7 @@ struct prefix { > #define PREFIX_FLAG_WITHDRAW 0x0001 /* enqueued on withdraw queue */ > #define PREFIX_FLAG_UPDATE 0x0002 /* enqueued on update queue */ > #define PREFIX_FLAG_DEAD 0x0004 /* locked but removed */ > -#define PREFIX_FLAG_STALE 0x0008 /* stale entry (graceful reload) */ > +#define PREFIX_FLAG_STALE 0x0008 /* stale entry (for addpath) */ > #define PREFIX_FLAG_MASK 0x000f /* mask for the prefix types */ > #define PREFIX_FLAG_ADJOUT 0x0010 /* prefix is in the adj-out rib */ > #define PREFIX_FLAG_EOR 0x0020 /* prefix is EoR */ > @@ -369,6 +369,7 @@ void rde_generate_updates(struct rib_e > > void peer_up(struct rde_peer *, struct session_up *); > void peer_down(struct rde_peer *); > +void peer_delete(struct rde_peer *); > void peer_flush(struct rde_peer *, uint8_t, time_t); > void peer_stale(struct rde_peer *, uint8_t, int); > void peer_blast(struct rde_peer *, uint8_t); > @@ -603,6 +604,7 @@ void prefix_adjout_update(struct prefi > struct filterstate *, struct pt_entry *, uint32_t); > void prefix_adjout_withdraw(struct prefix *); > void prefix_adjout_destroy(struct prefix *); > +void prefix_adjout_flush_pending(struct rde_peer *); > int prefix_adjout_reaper(struct rde_peer *); > int prefix_dump_new(struct rde_peer *, uint8_t, unsigned int, > void *, void (*)(struct prefix *, void *), > Index: rde_peer.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v > diff -u -p -r1.41 rde_peer.c > --- rde_peer.c 11 Dec 2024 09:19:44 -0000 1.41 > +++ rde_peer.c 11 Dec 2024 09:22:44 -0000 > @@ -82,7 +82,7 @@ peer_shutdown(void) > struct rde_peer *peer, *np; > > RB_FOREACH_SAFE(peer, peer_tree, &peertable, np) > - peer_down(peer); > + peer_delete(peer); > > while (!RB_EMPTY(&zombietable)) > peer_reaper(NULL); > @@ -241,7 +241,8 @@ peer_generate_update(struct rde_peer *pe > /* skip ourself */ > if (peer == peerself) > return; > - if (!peer_is_up(peer)) > + /* skip peers that never had a session open */ > + if (peer->state == PEER_NONE) > return; > /* skip peers using a different rib */ > if (peer->loc_rib_id != re->rib_id) > @@ -286,28 +287,6 @@ rde_generate_updates(struct rib_entry *r > /* > * Various RIB walker callbacks. > */ > -static void > -peer_adjout_clear_upcall(struct prefix *p, void *arg) > -{ > - prefix_adjout_destroy(p); > -} > - > -static void > -peer_adjout_stale_upcall(struct prefix *p, void *arg) > -{ > - if (p->flags & PREFIX_FLAG_DEAD) { > - return; > - } else if (p->flags & PREFIX_FLAG_WITHDRAW) { > - /* no need to keep stale withdraws, they miss all attributes */ > - prefix_adjout_destroy(p); > - return; > - } else if (p->flags & PREFIX_FLAG_UPDATE) { > - RB_REMOVE(prefix_tree, &prefix_peer(p)->updates[p->pt->aid], p); > - p->flags &= ~PREFIX_FLAG_UPDATE; > - } > - p->flags |= PREFIX_FLAG_STALE; > -} > - > struct peer_flush { > struct rde_peer *peer; > time_t staletime; > @@ -361,6 +340,7 @@ void > peer_up(struct rde_peer *peer, struct session_up *sup) > { > uint8_t i; > + int force_sync = 1; > > if (peer->state == PEER_ERR) { > /* > @@ -369,21 +349,33 @@ peer_up(struct rde_peer *peer, struct se > */ > rib_dump_terminate(peer); > peer_imsg_flush(peer); > - if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL, > - peer_adjout_clear_upcall, NULL, NULL) == -1) > - fatal("%s: prefix_dump_new", __func__); > peer_flush(peer, AID_UNSPEC, 0); > peer->stats.prefix_cnt = 0; > - peer->stats.prefix_out_cnt = 0; > peer->state = PEER_DOWN; > } > - peer->remote_bgpid = sup->remote_bgpid; > - peer->short_as = sup->short_as; > + > + /* > + * Check if no value changed during flap to decide if the RIB > + * is in sync. The capa check is maybe too strict but it should > + * not matter for normal operation. > + */ > + if (memcmp(&peer->remote_addr, &sup->remote_addr, > + sizeof(sup->remote_addr)) == 0 && > + memcmp(&peer->local_v4_addr, &sup->local_v4_addr, > + sizeof(sup->local_v4_addr)) == 0 && > + memcmp(&peer->local_v6_addr, &sup->local_v6_addr, > + sizeof(sup->local_v6_addr)) == 0 && > + memcpy(&peer->capa, &sup->capa, sizeof(peer->capa)) == 0) memcmp, not memcpy. Also use sizeof(sup->capa) to match the others. > + force_sync = 0; > + > peer->remote_addr = sup->remote_addr; > peer->local_v4_addr = sup->local_v4_addr; > peer->local_v6_addr = sup->local_v6_addr; > - peer->local_if_scope = sup->if_scope; > memcpy(&peer->capa, &sup->capa, sizeof(peer->capa)); > + /* the Adj-RIB-Out does not depend on those */ > + peer->remote_bgpid = sup->remote_bgpid; > + peer->local_if_scope = sup->if_scope; > + peer->short_as = sup->short_as; > > /* clear eor markers depending on GR flags */ > if (peer->capa.grestart.restart) { > @@ -396,9 +388,16 @@ peer_up(struct rde_peer *peer, struct se > } > peer->state = PEER_UP; > > - for (i = AID_MIN; i < AID_MAX; i++) { > - if (peer->capa.mp[i]) > - peer_dump(peer, i); > + if (!force_sync) { > + for (i = AID_MIN; i < AID_MAX; i++) { > + if (peer->capa.mp[i]) > + peer_blast(peer, i); > + } > + } else { > + for (i = AID_MIN; i < AID_MAX; i++) { > + if (peer->capa.mp[i]) > + peer_dump(peer, i); > + } > } > } > > @@ -416,16 +415,24 @@ peer_down(struct rde_peer *peer) > * and flush all pending imsg from the SE. > */ > rib_dump_terminate(peer); > + prefix_adjout_flush_pending(peer); > peer_imsg_flush(peer); > > /* flush Adj-RIB-In */ > peer_flush(peer, AID_UNSPEC, 0); > peer->stats.prefix_cnt = 0; > +} > + > +void > +peer_delete(struct rde_peer *peer) > +{ > + if (peer->state != PEER_DOWN) > + peer_down(peer); > > /* free filters */ > filterlist_free(peer->out_rules); > - RB_REMOVE(peer_tree, &peertable, peer); > > + RB_REMOVE(peer_tree, &peertable, peer); > while (RB_INSERT(peer_tree, &zombietable, peer) != NULL) { > log_warnx("zombie peer conflict"); > peer->conf.id = arc4random(); > @@ -481,17 +488,12 @@ peer_stale(struct rde_peer *peer, uint8_ > * and flush all pending imsg from the SE. > */ > rib_dump_terminate(peer); > + prefix_adjout_flush_pending(peer); > peer_imsg_flush(peer); > > if (flushall) > peer_flush(peer, aid, 0); > > - /* XXX this is not quite correct */ > - /* mark Adj-RIB-Out stale for this peer */ > - if (prefix_dump_new(peer, aid, 0, NULL, > - peer_adjout_stale_upcall, NULL, NULL) == -1) > - fatal("%s: prefix_dump_new", __func__); > - > /* make sure new prefixes start on a higher timestamp */ > while (now >= getmonotime()) > sleep(1); > @@ -504,10 +506,7 @@ peer_stale(struct rde_peer *peer, uint8_ > static void > peer_blast_upcall(struct prefix *p, void *ptr) > { > - if (p->flags & PREFIX_FLAG_STALE) { > - /* remove stale entries */ > - prefix_adjout_destroy(p); > - } else if (p->flags & PREFIX_FLAG_DEAD) { > + if (p->flags & PREFIX_FLAG_DEAD) { > /* ignore dead prefixes, they will go away soon */ > } else if ((p->flags & PREFIX_FLAG_MASK) == 0) { > /* put entries on the update queue if not already on a queue */ > Index: rde_rib.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v > diff -u -p -r1.265 rde_rib.c > --- rde_rib.c 11 Dec 2024 09:19:44 -0000 1.265 > +++ rde_rib.c 11 Dec 2024 09:23:04 -0000 > @@ -1273,7 +1273,6 @@ prefix_adjout_update(struct prefix *p, s > /* nothing changed */ > p->validation_state = state->vstate; > p->lastchange = getmonotime(); > - p->flags &= ~PREFIX_FLAG_STALE; > return; > } > > @@ -1344,7 +1343,6 @@ prefix_adjout_withdraw(struct prefix *p) > /* already a withdraw, shortcut */ > if (p->flags & PREFIX_FLAG_WITHDRAW) { > p->lastchange = getmonotime(); > - p->flags &= ~PREFIX_FLAG_STALE; > return; > } > /* pending update just got withdrawn */ > @@ -1414,6 +1412,24 @@ prefix_adjout_destroy(struct prefix *p) > /* remove the last prefix reference before free */ > pt_unref(p->pt); > prefix_free(p); > + } > +} > + > +void > +prefix_adjout_flush_pending(struct rde_peer *peer) > +{ > + struct prefix *p, *np; > + uint8_t aid; > + > + for (aid = AID_MIN; aid < AID_MAX; aid++) { > + RB_FOREACH_SAFE(p, prefix_tree, &peer->withdraws[aid], np) { > + prefix_adjout_destroy(p); > + } > + RB_FOREACH_SAFE(p, prefix_tree, &peer->updates[aid], np) { > + p->flags &= ~PREFIX_FLAG_UPDATE; > + RB_REMOVE(prefix_tree, &peer->updates[aid], p); > + peer->stats.pending_update--; > + } > } > } > > Index: session.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/session.c,v > diff -u -p -r1.502 session.c > --- session.c 10 Dec 2024 14:34:51 -0000 1.502 > +++ session.c 12 Dec 2024 10:49:44 -0000 > @@ -270,6 +270,9 @@ session_main(int debug, int verbose) > NULL); > timer_remove_all(&p->timers); > tcp_md5_del_listener(conf, p); > + if (imsg_rde(IMSG_SESSION_DELETE, > + p->conf.id, NULL, 0) == -1) > + fatalx("imsg_compose error"); > msgbuf_free(p->wbuf); > RB_REMOVE(peer_head, &conf->peers, p); > log_peer_warnx(&p->conf, "removed"); > @@ -405,6 +408,12 @@ session_main(int debug, int verbose) > case Timer_SessionDown: > timer_stop(&p->timers, > Timer_SessionDown); > + > + if (imsg_rde(IMSG_SESSION_DELETE, > + p->conf.id, NULL, 0) == -1) > + fatalx("imsg_compose error"); > + p->rdesession = 0; > + > /* finally delete this cloned peer */ > if (p->template) > p->reconf_action = > @@ -3462,9 +3471,13 @@ session_up(struct peer *p) > > timer_stop(&p->timers, Timer_SessionDown); > > - if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, > - &p->conf, sizeof(p->conf)) == -1) > - fatalx("imsg_compose error"); > + if (!p->rdesession) { > + /* inform rde about new peer */ > + if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, > + &p->conf, sizeof(p->conf)) == -1) > + fatalx("imsg_compose error"); > + p->rdesession = 1; > + } > > if (p->local.aid == AID_INET) { > sup.local_v4_addr = p->local; > @@ -3632,10 +3645,15 @@ merge_peers(struct bgpd_config *c, struc > imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, > p->conf.id, 0, -1, NULL, 0); > > - /* sync the RDE in case we keep the peer */ > - if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, > - &p->conf, sizeof(struct peer_config)) == -1) > - fatalx("imsg_compose error"); > + /* > + * If the session is established or the SessionDown timer is > + * running sync with the RDE > + */ > + if (p->rdesession) { > + if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, > + &p->conf, sizeof(struct peer_config)) == -1) > + fatalx("imsg_compose error"); > + } > > /* apply the config to all clones of a template */ > if (p->conf.template) { > @@ -3645,9 +3663,13 @@ merge_peers(struct bgpd_config *c, struc > continue; > session_template_clone(xp, NULL, xp->conf.id, > xp->conf.remote_as); > - if (imsg_rde(IMSG_SESSION_ADD, xp->conf.id, > - &xp->conf, sizeof(xp->conf)) == -1) > - fatalx("imsg_compose error"); > + > + if (p->rdesession) { > + if (imsg_rde(IMSG_SESSION_ADD, > + xp->conf.id, &xp->conf, > + sizeof(xp->conf)) == -1) > + fatalx("imsg_compose error"); > + } > } > } > } > Index: session.h > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/session.h,v > diff -u -p -r1.181 session.h > --- session.h 10 Dec 2024 14:34:51 -0000 1.181 > +++ session.h 12 Dec 2024 10:47:14 -0000 > @@ -228,6 +228,7 @@ struct peer { > uint8_t passive; > uint8_t throttled; > uint8_t rpending; > + uint8_t rdesession; > }; > > extern time_t pauseaccept; >