From: Claudio Jeker Subject: bgpd needs a reaper To: tech@openbsd.org Date: Wed, 11 Dec 2024 07:17:11 +0100 When a peer is removed in the RDE the Adj-RIB-Out can be cleared in the background. So implement a reaper and put the dead peers on a zombielist. Now in peer_down run the reaper once so that session with only a few prefixes are immediatly gone. Not sure if that is premature optimisation. While there fixup peer_down prototype and instead call it from peer_shutdown(). The shutdown code is a bit funky and only runs when started with debug since it is stupid to clean up before exit. -- :wq Claudio Index: bgpd.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v diff -u -p -r1.503 bgpd.h --- bgpd.h 10 Dec 2024 14:34:51 -0000 1.503 +++ bgpd.h 11 Dec 2024 05:17:14 -0000 @@ -131,6 +131,7 @@ * IMSG_XON message will be sent and the RDE will produce more messages again. */ #define RDE_RUNNER_ROUNDS 100 +#define RDE_REAPER_ROUNDS 5000 #define SESS_MSG_HIGH_MARK 2000 #define SESS_MSG_LOW_MARK 500 #define CTL_MSG_HIGH_MARK 500 Index: rde.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v diff -u -p -r1.644 rde.c --- rde.c 10 Dec 2024 13:40:02 -0000 1.644 +++ rde.c 11 Dec 2024 05:12:34 -0000 @@ -260,7 +260,7 @@ rde_main(int debug, int verbose) } } - if (peer_imsg_pending() || rde_update_queue_pending() || + if (peer_work_pending() || rde_update_queue_pending() || nexthop_pending() || rib_dump_pending()) timeout = 0; @@ -309,6 +309,7 @@ rde_main(int debug, int verbose) } peer_foreach(rde_dispatch_imsg_peer, NULL); + peer_reaper(NULL); rib_dump_runner(); nexthop_runner(); if (ibuf_se && imsgbuf_queuelen(ibuf_se) < SESS_MSG_HIGH_MARK) { @@ -430,7 +431,7 @@ rde_dispatch_imsg_session(struct imsgbuf "IMSG_SESSION_DOWN", peerid); break; } - peer_down(peer, NULL); + peer_down(peer); break; case IMSG_SESSION_STALE: case IMSG_SESSION_NOGRACE: @@ -4681,7 +4682,7 @@ rde_shutdown(void) */ /* First all peers go down */ - peer_foreach(peer_down, NULL); + peer_shutdown(); /* free filters */ filterlist_free(out_rules); @@ -4696,7 +4697,6 @@ rde_shutdown(void) path_shutdown(); attr_shutdown(); pt_shutdown(); - peer_shutdown(); } struct rde_prefixset * Index: rde.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v diff -u -p -r1.307 rde.h --- rde.h 10 Dec 2024 13:40:02 -0000 1.307 +++ rde.h 11 Dec 2024 06:09:52 -0000 @@ -368,16 +368,17 @@ void rde_generate_updates(struct rib_e struct prefix *, enum eval_mode); void peer_up(struct rde_peer *, struct session_up *); -void peer_down(struct rde_peer *, void *); +void peer_down(struct rde_peer *); void peer_flush(struct rde_peer *, uint8_t, time_t); void peer_stale(struct rde_peer *, uint8_t, int); void peer_blast(struct rde_peer *, uint8_t); void peer_dump(struct rde_peer *, uint8_t); void peer_begin_rrefresh(struct rde_peer *, uint8_t); +int peer_work_pending(void); +void peer_reaper(struct rde_peer *); void peer_imsg_push(struct rde_peer *, struct imsg *); int peer_imsg_pop(struct rde_peer *, struct imsg *); -int peer_imsg_pending(void); void peer_imsg_flush(struct rde_peer *); static inline int @@ -602,6 +603,7 @@ void prefix_adjout_update(struct prefi struct filterstate *, struct pt_entry *, uint32_t); void prefix_adjout_withdraw(struct prefix *); void prefix_adjout_destroy(struct prefix *); +int prefix_adjout_reaper(struct rde_peer *); int prefix_dump_new(struct rde_peer *, uint8_t, unsigned int, void *, void (*)(struct prefix *, void *), void (*)(void *, uint8_t), int (*)(void *)); Index: rde_peer.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v diff -u -p -r1.40 rde_peer.c --- rde_peer.c 10 Dec 2024 13:40:02 -0000 1.40 +++ rde_peer.c 11 Dec 2024 05:20:06 -0000 @@ -26,7 +26,8 @@ #include "bgpd.h" #include "rde.h" -struct peer_tree peertable; +struct peer_tree peertable = RB_INITIALIZER(&peertable); +struct peer_tree zombietable = RB_INITIALIZER(&zombietable); struct rde_peer *peerself; static long imsg_pending; @@ -67,8 +68,6 @@ peer_init(struct filter_head *rules) { struct peer_config pc; - RB_INIT(&peertable); - memset(&pc, 0, sizeof(pc)); snprintf(pc.descr, sizeof(pc.descr), "LOCAL"); pc.id = PEER_ID_SELF; @@ -80,6 +79,14 @@ peer_init(struct filter_head *rules) void peer_shutdown(void) { + struct rde_peer *peer, *np; + + RB_FOREACH_SAFE(peer, peer_tree, &peertable, np) + peer_down(peer); + + while (!RB_EMPTY(&zombietable)) + peer_reaper(NULL); + if (!RB_EMPTY(&peertable)) log_warnx("%s: free non-free table", __func__); } @@ -400,7 +407,7 @@ peer_up(struct rde_peer *peer, struct se * this peer and clean up. */ void -peer_down(struct rde_peer *peer, void *bula) +peer_down(struct rde_peer *peer) { peer->remote_bgpid = 0; peer->state = PEER_DOWN; @@ -411,21 +418,21 @@ peer_down(struct rde_peer *peer, void *b rib_dump_terminate(peer); peer_imsg_flush(peer); - /* flush Adj-RIB-Out */ - if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL, - peer_adjout_clear_upcall, NULL, NULL) == -1) - fatal("%s: prefix_dump_new", __func__); - /* flush Adj-RIB-In */ peer_flush(peer, AID_UNSPEC, 0); peer->stats.prefix_cnt = 0; - peer->stats.prefix_out_cnt = 0; /* free filters */ filterlist_free(peer->out_rules); - RB_REMOVE(peer_tree, &peertable, peer); - free(peer); + + while (RB_INSERT(peer_tree, &zombietable, peer) != NULL) { + log_warnx("zombie peer conflict"); + peer->conf.id = arc4random(); + } + + /* start reaping the zombie */ + peer_reaper(peer); } /* @@ -612,6 +619,34 @@ peer_begin_rrefresh(struct rde_peer *pee sleep(1); } +void +peer_reaper(struct rde_peer *peer) +{ + if (peer == NULL) + peer = RB_ROOT(&zombietable); + if (peer == NULL) + return; + + if (!prefix_adjout_reaper(peer)) + return; + + RB_REMOVE(peer_tree, &zombietable, peer); + free(peer); +} + +/* + * Check if any imsg are pending or any zombie peers are around. + * Return 0 if no work is pending. + */ +int +peer_work_pending(void) +{ + if (!RB_EMPTY(&zombietable)) + return 1; + return imsg_pending != 0; +} + + /* * move an imsg from src to dst, disconnecting any dynamic memory from src. */ @@ -657,15 +692,6 @@ peer_imsg_pop(struct rde_peer *peer, str imsg_pending--; return 1; -} - -/* - * Check if any imsg are pending, return 0 if none are pending - */ -int -peer_imsg_pending(void) -{ - return imsg_pending != 0; } /* Index: rde_rib.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v diff -u -p -r1.264 rde_rib.c --- rde_rib.c 10 Dec 2024 20:06:11 -0000 1.264 +++ rde_rib.c 11 Dec 2024 05:14:31 -0000 @@ -1417,6 +1417,20 @@ prefix_adjout_destroy(struct prefix *p) } } +int +prefix_adjout_reaper(struct rde_peer *peer) +{ + struct prefix *p, *np; + int count = RDE_REAPER_ROUNDS; + + RB_FOREACH_SAFE(p, prefix_index, &peer->adj_rib_out, np) { + prefix_adjout_destroy(p); + if (count-- <= 0) + return 0; + } + return 1; +} + static struct prefix * prefix_restart(struct rib_context *ctx) {