Index | Thread | Search

From:
Claudio Jeker <cjeker@diehard.n-r-g.com>
Subject:
bgpd: track rde main event loop performance
To:
tech@openbsd.org
Date:
Wed, 12 Nov 2025 15:36:40 +0100

Download raw body.

Thread
This diff adds some counters to track how much time the main event loop
uses for which part of the loop.

This helps a lot to better understand why stuff is slow. Also by having
the loop count allows one to calculate how frequently the loop runs.
I find this very useful for my ongoing work on making bgpd faster.
-- 
:wq Claudio

Index: bgpctl/ometric.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/ometric.c,v
diff -u -p -r1.10 ometric.c
--- bgpctl/ometric.c	6 Jan 2023 13:26:57 -0000	1.10
+++ bgpctl/ometric.c	10 Nov 2025 13:44:31 -0000
@@ -514,6 +514,17 @@ ometric_set_int_with_labels(struct ometr
 }
 
 void
+ometric_set_float_with_labels(struct ometric *om, double val,
+    const char **keys, const char **values, struct olabels *ol)
+{
+	struct olabels *extra;
+
+	extra = olabels_add_extras(ol, keys, values);
+	ometric_set_float(om, val, extra);
+	olabels_free(extra);
+}
+
+void
 ometric_set_timespec_with_labels(struct ometric *om, struct timespec *ts,
     const char **keys, const char **values, struct olabels *ol)
 {
Index: bgpctl/ometric.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/ometric.h,v
diff -u -p -r1.6 ometric.h
--- bgpctl/ometric.h	6 Jan 2023 13:26:57 -0000	1.6
+++ bgpctl/ometric.h	10 Nov 2025 13:44:31 -0000
@@ -48,6 +48,8 @@ void	ometric_set_info(struct ometric *, 
 void	ometric_set_state(struct ometric *, const char *, struct olabels *);
 void	ometric_set_int_with_labels(struct ometric *, uint64_t, const char **,
 	    const char **, struct olabels *);
+void	ometric_set_float_with_labels(struct ometric *, double, const char **,
+	    const char **, struct olabels *);
 void	ometric_set_timespec_with_labels(struct ometric *, struct timespec *,
 	    const char **, const char **, struct olabels *);
 #define OKV(...)		(const char *[]){ __VA_ARGS__, NULL }
Index: bgpctl/output.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/output.c,v
diff -u -p -r1.61 output.c
--- bgpctl/output.c	10 Mar 2025 14:08:25 -0000	1.61
+++ bgpctl/output.c	12 Nov 2025 14:31:55 -0000
@@ -1102,6 +1102,17 @@ show_rib_mem(struct rde_memstats *stats)
 	    stats->attr_data));
 	printf("Sets using %s of memory\n", fmt_mem(stats->aset_size +
 	    stats->pset_size));
+
+	printf("\nRDE timing statistics\n");
+	printf("%10lld usec spent in the event loop for %llu rounds\n",
+	    stats->rde_event_loop_usec, stats->rde_event_loop_count);
+	printf("%10lld usec spent on io\n", stats->rde_event_io_usec);
+	printf("%10lld usec spent on peers\n", stats->rde_event_peer_usec);
+	printf("%10lld usec spent on rib dumps\n",
+	    stats->rde_event_ribdump_usec);
+	printf("%10lld usec spent on nexthops\n",
+	    stats->rde_event_nexthop_usec);
+	printf("%10lld usec spent on updates\n", stats->rde_event_update_usec);
 }
 
 static void
Index: bgpctl/output_json.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/output_json.c,v
diff -u -p -r1.52 output_json.c
--- bgpctl/output_json.c	10 Mar 2025 14:08:25 -0000	1.52
+++ bgpctl/output_json.c	12 Nov 2025 14:31:57 -0000
@@ -934,6 +934,16 @@ json_rib_mem(struct rde_memstats *stats)
 	json_rib_mem_element("total", UINT64_MAX,
 	    stats->aset_size + stats->pset_size, UINT64_MAX);
 	json_do_end();
+
+	json_do_object("evloop", 0);
+	json_do_uint("count", stats->rde_event_loop_count);
+	json_do_uint("loop_usec", stats->rde_event_loop_usec);
+	json_do_uint("io_usec", stats->rde_event_io_usec);
+	json_do_uint("peer_usec", stats->rde_event_peer_usec);
+	json_do_uint("ribdump_usec", stats->rde_event_ribdump_usec);
+	json_do_uint("nexthop_usec", stats->rde_event_nexthop_usec);
+	json_do_uint("update_usec", stats->rde_event_update_usec);
+	json_do_end();
 }
 
 static void
Index: bgpctl/output_ometric.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/output_ometric.c,v
diff -u -p -r1.16 output_ometric.c
--- bgpctl/output_ometric.c	30 Oct 2025 12:43:53 -0000	1.16
+++ bgpctl/output_ometric.c	12 Nov 2025 14:32:05 -0000
@@ -49,6 +49,7 @@ struct ometric *peer_rr_borr_transmit, *
 struct ometric *peer_rr_eorr_transmit, *peer_rr_eorr_receive;
 struct ometric *rde_mem_size, *rde_mem_count, *rde_mem_ref_count;
 struct ometric *rde_set_size, *rde_set_count, *rde_table_count;
+struct ometric *rde_evloop_count, *rde_evloop_time;
 
 struct timespec start_time, end_time;
 
@@ -166,6 +167,11 @@ ometric_head(struct parse_result *arg)
 	    "bgpd_rde_set_usage_objects", "number of object in set");
 	rde_table_count = ometric_new(OMT_GAUGE,
 	    "bgpd_rde_set_usage_tables", "number of as_set tables");
+
+	rde_evloop_count = ometric_new(OMT_COUNTER,
+	    "bgpd_rde_evloop", "number of times the evloop ran");
+	rde_evloop_time = ometric_new(OMT_COUNTER,
+	    "bgpd_rde_evloop_seconds", "RDE evloop time usage");
 }
 
 static void
@@ -320,6 +326,26 @@ ometric_rib_mem(struct rde_memstats *sta
 	    OKV("type"), OKV("prefix_set"), NULL);
 	ometric_rib_mem_element("set_total", UINT64_MAX,
 	    stats->aset_size + stats->pset_size, UINT64_MAX);
+
+	ometric_set_int(rde_evloop_count, stats->rde_event_loop_count, NULL);
+	ometric_set_float_with_labels(rde_evloop_time,
+	    (double)stats->rde_event_loop_usec / (1000.0 * 1000.0) ,
+	    OKV("stage"), OKV("main"), NULL);
+	ometric_set_float_with_labels(rde_evloop_time,
+	    (double)stats->rde_event_io_usec / (1000.0 * 1000.0) ,
+	    OKV("stage"), OKV("io"), NULL);
+	ometric_set_float_with_labels(rde_evloop_time,
+	    (double)stats->rde_event_peer_usec / (1000.0 * 1000.0) ,
+	    OKV("stage"), OKV("peer"), NULL);
+	ometric_set_float_with_labels(rde_evloop_time,
+	    (double)stats->rde_event_ribdump_usec / (1000.0 * 1000.0) ,
+	    OKV("stage"), OKV("ribdumps"), NULL);
+	ometric_set_float_with_labels(rde_evloop_time,
+	    (double)stats->rde_event_nexthop_usec / (1000.0 * 1000.0) ,
+	    OKV("stage"), OKV("nexthop"), NULL);
+	ometric_set_float_with_labels(rde_evloop_time,
+	    (double)stats->rde_event_update_usec / (1000.0 * 1000.0) ,
+	    OKV("stage"), OKV("update"), NULL);
 }
 
 static void
Index: bgpd/bgpd.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
diff -u -p -r1.521 bgpd.h
--- bgpd/bgpd.h	4 Nov 2025 10:47:25 -0000	1.521
+++ bgpd/bgpd.h	12 Nov 2025 14:28:21 -0000
@@ -1410,6 +1410,13 @@ struct rde_memstats {
 	long long	aset_nmemb;
 	long long	pset_cnt;
 	long long	pset_size;
+	long long	rde_event_loop_count;
+	long long	rde_event_loop_usec;
+	long long	rde_event_io_usec;
+	long long	rde_event_peer_usec;
+	long long	rde_event_ribdump_usec;
+	long long	rde_event_nexthop_usec;
+	long long	rde_event_update_usec;
 };
 
 #define	MRT_FILE_LEN	512
Index: bgpd/monotime.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/monotime.h,v
diff -u -p -r1.1 monotime.h
--- bgpd/monotime.h	20 Feb 2025 19:47:31 -0000	1.1
+++ bgpd/monotime.h	10 Nov 2025 13:44:31 -0000
@@ -72,6 +72,12 @@ monotime_sub(monotime_t minu, monotime_t
 }
 
 static inline long long
+monotime_to_usec(monotime_t mt)
+{
+	return mt.monotime / (MONOTIME_RES / (1000 * 1000ULL));
+}
+
+static inline long long
 monotime_to_msec(monotime_t mt)
 {
 	return mt.monotime / (MONOTIME_RES / 1000);
Index: bgpd/rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
diff -u -p -r1.661 rde.c
--- bgpd/rde.c	4 Nov 2025 15:01:09 -0000	1.661
+++ bgpd/rde.c	12 Nov 2025 14:31:14 -0000
@@ -164,6 +164,7 @@ rde_main(int debug, int verbose)
 	struct passwd		*pw;
 	struct pollfd		*pfd = NULL;
 	struct rde_mrt_ctx	*mctx, *xmctx;
+	monotime_t		 loop_start, io_end, peer_end, dump_end, nh_end;
 	void			*newp;
 	u_int			 pfd_elms = 0, i, j;
 	int			 timeout;
@@ -223,6 +224,7 @@ rde_main(int debug, int verbose)
 	conf = new_config();
 	log_info("route decision engine ready");
 
+	loop_start = getmonotime();
 	while (rde_quit == 0) {
 		if (pfd_elms < PFD_PIPE_COUNT + rde_mrt_cnt) {
 			if ((newp = reallocarray(pfd,
@@ -266,12 +268,18 @@ rde_main(int debug, int verbose)
 		    nexthop_pending() || rib_dump_pending())
 			timeout = 0;
 
+		rdemem.rde_event_loop_usec +=
+		    monotime_to_usec(monotime_sub(getmonotime(), loop_start));
+
 		if (poll(pfd, i, timeout) == -1) {
 			if (errno == EINTR)
 				continue;
 			fatal("poll error");
 		}
 
+		rdemem.rde_event_loop_count++;
+		loop_start = getmonotime();
+
 		if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) {
 			log_warnx("RDE: Lost connection to parent");
 			rde_quit = 1;
@@ -312,14 +320,37 @@ rde_main(int debug, int verbose)
 			mctx = LIST_NEXT(mctx, entry);
 		}
 
+		io_end = getmonotime();
+		rdemem.rde_event_io_usec +=
+		    monotime_to_usec(monotime_sub(io_end, loop_start));
+
 		peer_foreach(rde_dispatch_imsg_peer, NULL);
 		peer_reaper(NULL);
+
+		peer_end = getmonotime();
+		rdemem.rde_event_peer_usec +=
+		    monotime_to_usec(monotime_sub(peer_end, io_end));
+
 		rib_dump_runner();
+
+		dump_end = getmonotime();
+		rdemem.rde_event_ribdump_usec +=
+		    monotime_to_usec(monotime_sub(dump_end, peer_end));
+
 		nexthop_runner();
+
+		nh_end = getmonotime();
+		rdemem.rde_event_nexthop_usec +=
+		    monotime_to_usec(monotime_sub(nh_end, dump_end));
+
 		if (ibuf_se && imsgbuf_queuelen(ibuf_se) < SESS_MSG_HIGH_MARK) {
 			for (aid = AID_MIN; aid < AID_MAX; aid++)
 				rde_update_queue_runner(aid);
 		}
+
+		rdemem.rde_event_update_usec +=
+		    monotime_to_usec(monotime_sub(getmonotime(), nh_end));
+
 		/* commit pftable once per poll loop */
 		rde_commit_pftable();
 	}