Index | Thread | Search

From:
Claudio Jeker <cjeker@diehard.n-r-g.com>
Subject:
bgpd needs a reaper
To:
tech@openbsd.org
Date:
Wed, 11 Dec 2024 07:17:11 +0100

Download raw body.

Thread
When a peer is removed in the RDE the Adj-RIB-Out can be cleared in the
background. So implement a reaper and put the dead peers on a zombielist.
Now in peer_down run the reaper once so that session with only a few
prefixes are immediatly gone. Not sure if that is premature optimisation.

While there fixup peer_down prototype and instead call it from
peer_shutdown(). The shutdown code is a bit funky and only runs when
started with debug since it is stupid to clean up before exit.

-- 
:wq Claudio

Index: bgpd.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
diff -u -p -r1.503 bgpd.h
--- bgpd.h	10 Dec 2024 14:34:51 -0000	1.503
+++ bgpd.h	11 Dec 2024 05:17:14 -0000
@@ -131,6 +131,7 @@
  * IMSG_XON message will be sent and the RDE will produce more messages again.
  */
 #define RDE_RUNNER_ROUNDS	100
+#define RDE_REAPER_ROUNDS	5000
 #define SESS_MSG_HIGH_MARK	2000
 #define SESS_MSG_LOW_MARK	500
 #define CTL_MSG_HIGH_MARK	500
Index: rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
diff -u -p -r1.644 rde.c
--- rde.c	10 Dec 2024 13:40:02 -0000	1.644
+++ rde.c	11 Dec 2024 05:12:34 -0000
@@ -260,7 +260,7 @@ rde_main(int debug, int verbose)
 			}
 		}
 
-		if (peer_imsg_pending() || rde_update_queue_pending() ||
+		if (peer_work_pending() || rde_update_queue_pending() ||
 		    nexthop_pending() || rib_dump_pending())
 			timeout = 0;
 
@@ -309,6 +309,7 @@ rde_main(int debug, int verbose)
 		}
 
 		peer_foreach(rde_dispatch_imsg_peer, NULL);
+		peer_reaper(NULL);
 		rib_dump_runner();
 		nexthop_runner();
 		if (ibuf_se && imsgbuf_queuelen(ibuf_se) < SESS_MSG_HIGH_MARK) {
@@ -430,7 +431,7 @@ rde_dispatch_imsg_session(struct imsgbuf
 				    "IMSG_SESSION_DOWN", peerid);
 				break;
 			}
-			peer_down(peer, NULL);
+			peer_down(peer);
 			break;
 		case IMSG_SESSION_STALE:
 		case IMSG_SESSION_NOGRACE:
@@ -4681,7 +4682,7 @@ rde_shutdown(void)
 	 */
 
 	/* First all peers go down */
-	peer_foreach(peer_down, NULL);
+	peer_shutdown();
 
 	/* free filters */
 	filterlist_free(out_rules);
@@ -4696,7 +4697,6 @@ rde_shutdown(void)
 	path_shutdown();
 	attr_shutdown();
 	pt_shutdown();
-	peer_shutdown();
 }
 
 struct rde_prefixset *
Index: rde.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
diff -u -p -r1.307 rde.h
--- rde.h	10 Dec 2024 13:40:02 -0000	1.307
+++ rde.h	11 Dec 2024 06:09:52 -0000
@@ -368,16 +368,17 @@ void		 rde_generate_updates(struct rib_e
 		    struct prefix *, enum eval_mode);
 
 void		 peer_up(struct rde_peer *, struct session_up *);
-void		 peer_down(struct rde_peer *, void *);
+void		 peer_down(struct rde_peer *);
 void		 peer_flush(struct rde_peer *, uint8_t, time_t);
 void		 peer_stale(struct rde_peer *, uint8_t, int);
 void		 peer_blast(struct rde_peer *, uint8_t);
 void		 peer_dump(struct rde_peer *, uint8_t);
 void		 peer_begin_rrefresh(struct rde_peer *, uint8_t);
+int		 peer_work_pending(void);
+void		 peer_reaper(struct rde_peer *);
 
 void		 peer_imsg_push(struct rde_peer *, struct imsg *);
 int		 peer_imsg_pop(struct rde_peer *, struct imsg *);
-int		 peer_imsg_pending(void);
 void		 peer_imsg_flush(struct rde_peer *);
 
 static inline int
@@ -602,6 +603,7 @@ void		 prefix_adjout_update(struct prefi
 		    struct filterstate *, struct pt_entry *, uint32_t);
 void		 prefix_adjout_withdraw(struct prefix *);
 void		 prefix_adjout_destroy(struct prefix *);
+int		 prefix_adjout_reaper(struct rde_peer *);
 int		 prefix_dump_new(struct rde_peer *, uint8_t, unsigned int,
 		    void *, void (*)(struct prefix *, void *),
 		    void (*)(void *, uint8_t), int (*)(void *));
Index: rde_peer.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v
diff -u -p -r1.40 rde_peer.c
--- rde_peer.c	10 Dec 2024 13:40:02 -0000	1.40
+++ rde_peer.c	11 Dec 2024 05:20:06 -0000
@@ -26,7 +26,8 @@
 #include "bgpd.h"
 #include "rde.h"
 
-struct peer_tree	 peertable;
+struct peer_tree	 peertable = RB_INITIALIZER(&peertable);
+struct peer_tree	 zombietable = RB_INITIALIZER(&zombietable);
 struct rde_peer		*peerself;
 static long		 imsg_pending;
 
@@ -67,8 +68,6 @@ peer_init(struct filter_head *rules)
 {
 	struct peer_config pc;
 
-	RB_INIT(&peertable);
-
 	memset(&pc, 0, sizeof(pc));
 	snprintf(pc.descr, sizeof(pc.descr), "LOCAL");
 	pc.id = PEER_ID_SELF;
@@ -80,6 +79,14 @@ peer_init(struct filter_head *rules)
 void
 peer_shutdown(void)
 {
+	struct rde_peer *peer, *np;
+
+	RB_FOREACH_SAFE(peer, peer_tree, &peertable, np)
+		peer_down(peer);
+
+	while (!RB_EMPTY(&zombietable))
+		peer_reaper(NULL);
+
 	if (!RB_EMPTY(&peertable))
 		log_warnx("%s: free non-free table", __func__);
 }
@@ -400,7 +407,7 @@ peer_up(struct rde_peer *peer, struct se
  * this peer and clean up.
  */
 void
-peer_down(struct rde_peer *peer, void *bula)
+peer_down(struct rde_peer *peer)
 {
 	peer->remote_bgpid = 0;
 	peer->state = PEER_DOWN;
@@ -411,21 +418,21 @@ peer_down(struct rde_peer *peer, void *b
 	rib_dump_terminate(peer);
 	peer_imsg_flush(peer);
 
-	/* flush Adj-RIB-Out */
-	if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL,
-	    peer_adjout_clear_upcall, NULL, NULL) == -1)
-		fatal("%s: prefix_dump_new", __func__);
-
 	/* flush Adj-RIB-In */
 	peer_flush(peer, AID_UNSPEC, 0);
 	peer->stats.prefix_cnt = 0;
-	peer->stats.prefix_out_cnt = 0;
 
 	/* free filters */
 	filterlist_free(peer->out_rules);
-
 	RB_REMOVE(peer_tree, &peertable, peer);
-	free(peer);
+
+	while (RB_INSERT(peer_tree, &zombietable, peer) != NULL) {
+		log_warnx("zombie peer conflict");
+		peer->conf.id = arc4random();
+	}
+
+	/* start reaping the zombie */
+	peer_reaper(peer);
 }
 
 /*
@@ -612,6 +619,34 @@ peer_begin_rrefresh(struct rde_peer *pee
 		sleep(1);
 }
 
+void
+peer_reaper(struct rde_peer *peer)
+{
+	if (peer == NULL)
+		peer = RB_ROOT(&zombietable);
+	if (peer == NULL)
+		return;
+
+	if (!prefix_adjout_reaper(peer))
+		return;
+
+	RB_REMOVE(peer_tree, &zombietable, peer);
+	free(peer);
+}
+
+/*
+ * Check if any imsg are pending or any zombie peers are around.
+ * Return 0 if no work is pending.
+ */
+int
+peer_work_pending(void)
+{
+	if (!RB_EMPTY(&zombietable))
+		return 1;
+	return imsg_pending != 0;
+}
+
+
 /*
  * move an imsg from src to dst, disconnecting any dynamic memory from src.
  */
@@ -657,15 +692,6 @@ peer_imsg_pop(struct rde_peer *peer, str
 	imsg_pending--;
 
 	return 1;
-}
-
-/*
- * Check if any imsg are pending, return 0 if none are pending
- */
-int
-peer_imsg_pending(void)
-{
-	return imsg_pending != 0;
 }
 
 /*
Index: rde_rib.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
diff -u -p -r1.264 rde_rib.c
--- rde_rib.c	10 Dec 2024 20:06:11 -0000	1.264
+++ rde_rib.c	11 Dec 2024 05:14:31 -0000
@@ -1417,6 +1417,20 @@ prefix_adjout_destroy(struct prefix *p)
 	}
 }
 
+int
+prefix_adjout_reaper(struct rde_peer *peer)
+{
+	struct prefix *p, *np;
+	int count = RDE_REAPER_ROUNDS;
+
+	RB_FOREACH_SAFE(p, prefix_index, &peer->adj_rib_out, np) {
+		prefix_adjout_destroy(p);
+		if (count-- <= 0)
+			return 0;
+	}
+	return 1;
+}
+
 static struct prefix *
 prefix_restart(struct rib_context *ctx)
 {