Download raw body.
bgpd: cache Adj-RIB-Out when a peer goes down
When a session goes down there is no real reason to remove the Adj-RIB-Out
instead it is actually better to keep it synced so that when the peer
reconnects there is no need to repopulate the full thing.
This diff does exactly that. It caches the Adj-RIB-Out for up to 1h.
Now when the session is reestablished the peer settings are checked to see
if any params that matter changed. If so a full sync is needed else the
RDE can just start blasting out the RIB.
For this introduce a IMSG_SESSION_DELETE that tells the RDE to remove the
peer and split peer_down into a part that takes the session down (and
clears the Adj-RIB-In) and a part the frees the peer (peer_delete).
The SE now sends an IMSG_SESSION_ADD command on first connect and skips
that imsg on later connects unless IMSG_SESSION_DELETE was called before.
During config reload the IMSG_SESSION_ADD calls only need to happen when
the RDE actually has that information. For all this to work I added a
rdesession member to struct peer to keep track of the RDE session state.
Most of the heavy lifting was already committed so this diff is reasonably
simple :)
--
:wq Claudio
Index: bgpd.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
diff -u -p -r1.504 bgpd.h
--- bgpd.h 11 Dec 2024 09:19:44 -0000 1.504
+++ bgpd.h 11 Dec 2024 09:20:17 -0000
@@ -683,6 +683,7 @@ enum imsg_type {
IMSG_SESSION_ADD,
IMSG_SESSION_UP,
IMSG_SESSION_DOWN,
+ IMSG_SESSION_DELETE,
IMSG_SESSION_STALE,
IMSG_SESSION_NOGRACE,
IMSG_SESSION_FLUSH,
Index: rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
diff -u -p -r1.645 rde.c
--- rde.c 11 Dec 2024 09:19:44 -0000 1.645
+++ rde.c 11 Dec 2024 10:34:35 -0000
@@ -433,6 +433,12 @@ rde_dispatch_imsg_session(struct imsgbuf
}
peer_down(peer);
break;
+ case IMSG_SESSION_DELETE:
+ /* silently ignore deletes for unknown peers */
+ if ((peer = peer_get(peerid)) == NULL)
+ break;
+ peer_delete(peer);
+ break;
case IMSG_SESSION_STALE:
case IMSG_SESSION_NOGRACE:
case IMSG_SESSION_FLUSH:
Index: rde.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
diff -u -p -r1.308 rde.h
--- rde.h 11 Dec 2024 09:19:44 -0000 1.308
+++ rde.h 11 Dec 2024 09:20:52 -0000
@@ -289,7 +289,7 @@ struct prefix {
#define PREFIX_FLAG_WITHDRAW 0x0001 /* enqueued on withdraw queue */
#define PREFIX_FLAG_UPDATE 0x0002 /* enqueued on update queue */
#define PREFIX_FLAG_DEAD 0x0004 /* locked but removed */
-#define PREFIX_FLAG_STALE 0x0008 /* stale entry (graceful reload) */
+#define PREFIX_FLAG_STALE 0x0008 /* stale entry (for addpath) */
#define PREFIX_FLAG_MASK 0x000f /* mask for the prefix types */
#define PREFIX_FLAG_ADJOUT 0x0010 /* prefix is in the adj-out rib */
#define PREFIX_FLAG_EOR 0x0020 /* prefix is EoR */
@@ -369,6 +369,7 @@ void rde_generate_updates(struct rib_e
void peer_up(struct rde_peer *, struct session_up *);
void peer_down(struct rde_peer *);
+void peer_delete(struct rde_peer *);
void peer_flush(struct rde_peer *, uint8_t, time_t);
void peer_stale(struct rde_peer *, uint8_t, int);
void peer_blast(struct rde_peer *, uint8_t);
@@ -603,6 +604,7 @@ void prefix_adjout_update(struct prefi
struct filterstate *, struct pt_entry *, uint32_t);
void prefix_adjout_withdraw(struct prefix *);
void prefix_adjout_destroy(struct prefix *);
+void prefix_adjout_flush_pending(struct rde_peer *);
int prefix_adjout_reaper(struct rde_peer *);
int prefix_dump_new(struct rde_peer *, uint8_t, unsigned int,
void *, void (*)(struct prefix *, void *),
Index: rde_peer.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v
diff -u -p -r1.41 rde_peer.c
--- rde_peer.c 11 Dec 2024 09:19:44 -0000 1.41
+++ rde_peer.c 11 Dec 2024 09:22:44 -0000
@@ -82,7 +82,7 @@ peer_shutdown(void)
struct rde_peer *peer, *np;
RB_FOREACH_SAFE(peer, peer_tree, &peertable, np)
- peer_down(peer);
+ peer_delete(peer);
while (!RB_EMPTY(&zombietable))
peer_reaper(NULL);
@@ -241,7 +241,8 @@ peer_generate_update(struct rde_peer *pe
/* skip ourself */
if (peer == peerself)
return;
- if (!peer_is_up(peer))
+ /* skip peers that never had a session open */
+ if (peer->state == PEER_NONE)
return;
/* skip peers using a different rib */
if (peer->loc_rib_id != re->rib_id)
@@ -286,28 +287,6 @@ rde_generate_updates(struct rib_entry *r
/*
* Various RIB walker callbacks.
*/
-static void
-peer_adjout_clear_upcall(struct prefix *p, void *arg)
-{
- prefix_adjout_destroy(p);
-}
-
-static void
-peer_adjout_stale_upcall(struct prefix *p, void *arg)
-{
- if (p->flags & PREFIX_FLAG_DEAD) {
- return;
- } else if (p->flags & PREFIX_FLAG_WITHDRAW) {
- /* no need to keep stale withdraws, they miss all attributes */
- prefix_adjout_destroy(p);
- return;
- } else if (p->flags & PREFIX_FLAG_UPDATE) {
- RB_REMOVE(prefix_tree, &prefix_peer(p)->updates[p->pt->aid], p);
- p->flags &= ~PREFIX_FLAG_UPDATE;
- }
- p->flags |= PREFIX_FLAG_STALE;
-}
-
struct peer_flush {
struct rde_peer *peer;
time_t staletime;
@@ -361,6 +340,7 @@ void
peer_up(struct rde_peer *peer, struct session_up *sup)
{
uint8_t i;
+ int force_sync = 1;
if (peer->state == PEER_ERR) {
/*
@@ -369,21 +349,33 @@ peer_up(struct rde_peer *peer, struct se
*/
rib_dump_terminate(peer);
peer_imsg_flush(peer);
- if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL,
- peer_adjout_clear_upcall, NULL, NULL) == -1)
- fatal("%s: prefix_dump_new", __func__);
peer_flush(peer, AID_UNSPEC, 0);
peer->stats.prefix_cnt = 0;
- peer->stats.prefix_out_cnt = 0;
peer->state = PEER_DOWN;
}
- peer->remote_bgpid = sup->remote_bgpid;
- peer->short_as = sup->short_as;
+
+ /*
+ * Check if no value changed during flap to decide if the RIB
+ * is in sync. The capa check is maybe too strict but it should
+ * not matter for normal operation.
+ */
+ if (memcmp(&peer->remote_addr, &sup->remote_addr,
+ sizeof(sup->remote_addr)) == 0 &&
+ memcmp(&peer->local_v4_addr, &sup->local_v4_addr,
+ sizeof(sup->local_v4_addr)) == 0 &&
+ memcmp(&peer->local_v6_addr, &sup->local_v6_addr,
+ sizeof(sup->local_v6_addr)) == 0 &&
+ memcpy(&peer->capa, &sup->capa, sizeof(peer->capa)) == 0)
+ force_sync = 0;
+
peer->remote_addr = sup->remote_addr;
peer->local_v4_addr = sup->local_v4_addr;
peer->local_v6_addr = sup->local_v6_addr;
- peer->local_if_scope = sup->if_scope;
memcpy(&peer->capa, &sup->capa, sizeof(peer->capa));
+ /* the Adj-RIB-Out does not depend on those */
+ peer->remote_bgpid = sup->remote_bgpid;
+ peer->local_if_scope = sup->if_scope;
+ peer->short_as = sup->short_as;
/* clear eor markers depending on GR flags */
if (peer->capa.grestart.restart) {
@@ -396,9 +388,16 @@ peer_up(struct rde_peer *peer, struct se
}
peer->state = PEER_UP;
- for (i = AID_MIN; i < AID_MAX; i++) {
- if (peer->capa.mp[i])
- peer_dump(peer, i);
+ if (!force_sync) {
+ for (i = AID_MIN; i < AID_MAX; i++) {
+ if (peer->capa.mp[i])
+ peer_blast(peer, i);
+ }
+ } else {
+ for (i = AID_MIN; i < AID_MAX; i++) {
+ if (peer->capa.mp[i])
+ peer_dump(peer, i);
+ }
}
}
@@ -416,16 +415,24 @@ peer_down(struct rde_peer *peer)
* and flush all pending imsg from the SE.
*/
rib_dump_terminate(peer);
+ prefix_adjout_flush_pending(peer);
peer_imsg_flush(peer);
/* flush Adj-RIB-In */
peer_flush(peer, AID_UNSPEC, 0);
peer->stats.prefix_cnt = 0;
+}
+
+void
+peer_delete(struct rde_peer *peer)
+{
+ if (peer->state != PEER_DOWN)
+ peer_down(peer);
/* free filters */
filterlist_free(peer->out_rules);
- RB_REMOVE(peer_tree, &peertable, peer);
+ RB_REMOVE(peer_tree, &peertable, peer);
while (RB_INSERT(peer_tree, &zombietable, peer) != NULL) {
log_warnx("zombie peer conflict");
peer->conf.id = arc4random();
@@ -481,17 +488,12 @@ peer_stale(struct rde_peer *peer, uint8_
* and flush all pending imsg from the SE.
*/
rib_dump_terminate(peer);
+ prefix_adjout_flush_pending(peer);
peer_imsg_flush(peer);
if (flushall)
peer_flush(peer, aid, 0);
- /* XXX this is not quite correct */
- /* mark Adj-RIB-Out stale for this peer */
- if (prefix_dump_new(peer, aid, 0, NULL,
- peer_adjout_stale_upcall, NULL, NULL) == -1)
- fatal("%s: prefix_dump_new", __func__);
-
/* make sure new prefixes start on a higher timestamp */
while (now >= getmonotime())
sleep(1);
@@ -504,10 +506,7 @@ peer_stale(struct rde_peer *peer, uint8_
static void
peer_blast_upcall(struct prefix *p, void *ptr)
{
- if (p->flags & PREFIX_FLAG_STALE) {
- /* remove stale entries */
- prefix_adjout_destroy(p);
- } else if (p->flags & PREFIX_FLAG_DEAD) {
+ if (p->flags & PREFIX_FLAG_DEAD) {
/* ignore dead prefixes, they will go away soon */
} else if ((p->flags & PREFIX_FLAG_MASK) == 0) {
/* put entries on the update queue if not already on a queue */
Index: rde_rib.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
diff -u -p -r1.265 rde_rib.c
--- rde_rib.c 11 Dec 2024 09:19:44 -0000 1.265
+++ rde_rib.c 11 Dec 2024 09:23:04 -0000
@@ -1273,7 +1273,6 @@ prefix_adjout_update(struct prefix *p, s
/* nothing changed */
p->validation_state = state->vstate;
p->lastchange = getmonotime();
- p->flags &= ~PREFIX_FLAG_STALE;
return;
}
@@ -1344,7 +1343,6 @@ prefix_adjout_withdraw(struct prefix *p)
/* already a withdraw, shortcut */
if (p->flags & PREFIX_FLAG_WITHDRAW) {
p->lastchange = getmonotime();
- p->flags &= ~PREFIX_FLAG_STALE;
return;
}
/* pending update just got withdrawn */
@@ -1414,6 +1412,24 @@ prefix_adjout_destroy(struct prefix *p)
/* remove the last prefix reference before free */
pt_unref(p->pt);
prefix_free(p);
+ }
+}
+
+void
+prefix_adjout_flush_pending(struct rde_peer *peer)
+{
+ struct prefix *p, *np;
+ uint8_t aid;
+
+ for (aid = AID_MIN; aid < AID_MAX; aid++) {
+ RB_FOREACH_SAFE(p, prefix_tree, &peer->withdraws[aid], np) {
+ prefix_adjout_destroy(p);
+ }
+ RB_FOREACH_SAFE(p, prefix_tree, &peer->updates[aid], np) {
+ p->flags &= ~PREFIX_FLAG_UPDATE;
+ RB_REMOVE(prefix_tree, &peer->updates[aid], p);
+ peer->stats.pending_update--;
+ }
}
}
Index: session.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
diff -u -p -r1.502 session.c
--- session.c 10 Dec 2024 14:34:51 -0000 1.502
+++ session.c 12 Dec 2024 10:49:44 -0000
@@ -270,6 +270,9 @@ session_main(int debug, int verbose)
NULL);
timer_remove_all(&p->timers);
tcp_md5_del_listener(conf, p);
+ if (imsg_rde(IMSG_SESSION_DELETE,
+ p->conf.id, NULL, 0) == -1)
+ fatalx("imsg_compose error");
msgbuf_free(p->wbuf);
RB_REMOVE(peer_head, &conf->peers, p);
log_peer_warnx(&p->conf, "removed");
@@ -405,6 +408,12 @@ session_main(int debug, int verbose)
case Timer_SessionDown:
timer_stop(&p->timers,
Timer_SessionDown);
+
+ if (imsg_rde(IMSG_SESSION_DELETE,
+ p->conf.id, NULL, 0) == -1)
+ fatalx("imsg_compose error");
+ p->rdesession = 0;
+
/* finally delete this cloned peer */
if (p->template)
p->reconf_action =
@@ -3462,9 +3471,13 @@ session_up(struct peer *p)
timer_stop(&p->timers, Timer_SessionDown);
- if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
- &p->conf, sizeof(p->conf)) == -1)
- fatalx("imsg_compose error");
+ if (!p->rdesession) {
+ /* inform rde about new peer */
+ if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
+ &p->conf, sizeof(p->conf)) == -1)
+ fatalx("imsg_compose error");
+ p->rdesession = 1;
+ }
if (p->local.aid == AID_INET) {
sup.local_v4_addr = p->local;
@@ -3632,10 +3645,15 @@ merge_peers(struct bgpd_config *c, struc
imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
p->conf.id, 0, -1, NULL, 0);
- /* sync the RDE in case we keep the peer */
- if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
- &p->conf, sizeof(struct peer_config)) == -1)
- fatalx("imsg_compose error");
+ /*
+ * If the session is established or the SessionDown timer is
+ * running sync with the RDE
+ */
+ if (p->rdesession) {
+ if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
+ &p->conf, sizeof(struct peer_config)) == -1)
+ fatalx("imsg_compose error");
+ }
/* apply the config to all clones of a template */
if (p->conf.template) {
@@ -3645,9 +3663,13 @@ merge_peers(struct bgpd_config *c, struc
continue;
session_template_clone(xp, NULL, xp->conf.id,
xp->conf.remote_as);
- if (imsg_rde(IMSG_SESSION_ADD, xp->conf.id,
- &xp->conf, sizeof(xp->conf)) == -1)
- fatalx("imsg_compose error");
+
+ if (p->rdesession) {
+ if (imsg_rde(IMSG_SESSION_ADD,
+ xp->conf.id, &xp->conf,
+ sizeof(xp->conf)) == -1)
+ fatalx("imsg_compose error");
+ }
}
}
}
Index: session.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/session.h,v
diff -u -p -r1.181 session.h
--- session.h 10 Dec 2024 14:34:51 -0000 1.181
+++ session.h 12 Dec 2024 10:47:14 -0000
@@ -228,6 +228,7 @@ struct peer {
uint8_t passive;
uint8_t throttled;
uint8_t rpending;
+ uint8_t rdesession;
};
extern time_t pauseaccept;
bgpd: cache Adj-RIB-Out when a peer goes down