Download raw body.
bgpd: track rde main event loop performance
This diff adds some counters to track how much time the main event loop
uses for which part of the loop.
This helps a lot to better understand why stuff is slow. Also by having
the loop count allows one to calculate how frequently the loop runs.
I find this very useful for my ongoing work on making bgpd faster.
--
:wq Claudio
Index: bgpctl/ometric.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/ometric.c,v
diff -u -p -r1.10 ometric.c
--- bgpctl/ometric.c 6 Jan 2023 13:26:57 -0000 1.10
+++ bgpctl/ometric.c 10 Nov 2025 13:44:31 -0000
@@ -514,6 +514,17 @@ ometric_set_int_with_labels(struct ometr
}
void
+ometric_set_float_with_labels(struct ometric *om, double val,
+ const char **keys, const char **values, struct olabels *ol)
+{
+ struct olabels *extra;
+
+ extra = olabels_add_extras(ol, keys, values);
+ ometric_set_float(om, val, extra);
+ olabels_free(extra);
+}
+
+void
ometric_set_timespec_with_labels(struct ometric *om, struct timespec *ts,
const char **keys, const char **values, struct olabels *ol)
{
Index: bgpctl/ometric.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/ometric.h,v
diff -u -p -r1.6 ometric.h
--- bgpctl/ometric.h 6 Jan 2023 13:26:57 -0000 1.6
+++ bgpctl/ometric.h 10 Nov 2025 13:44:31 -0000
@@ -48,6 +48,8 @@ void ometric_set_info(struct ometric *,
void ometric_set_state(struct ometric *, const char *, struct olabels *);
void ometric_set_int_with_labels(struct ometric *, uint64_t, const char **,
const char **, struct olabels *);
+void ometric_set_float_with_labels(struct ometric *, double, const char **,
+ const char **, struct olabels *);
void ometric_set_timespec_with_labels(struct ometric *, struct timespec *,
const char **, const char **, struct olabels *);
#define OKV(...) (const char *[]){ __VA_ARGS__, NULL }
Index: bgpctl/output.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/output.c,v
diff -u -p -r1.61 output.c
--- bgpctl/output.c 10 Mar 2025 14:08:25 -0000 1.61
+++ bgpctl/output.c 12 Nov 2025 14:31:55 -0000
@@ -1102,6 +1102,17 @@ show_rib_mem(struct rde_memstats *stats)
stats->attr_data));
printf("Sets using %s of memory\n", fmt_mem(stats->aset_size +
stats->pset_size));
+
+ printf("\nRDE timing statistics\n");
+ printf("%10lld usec spent in the event loop for %llu rounds\n",
+ stats->rde_event_loop_usec, stats->rde_event_loop_count);
+ printf("%10lld usec spent on io\n", stats->rde_event_io_usec);
+ printf("%10lld usec spent on peers\n", stats->rde_event_peer_usec);
+ printf("%10lld usec spent on rib dumps\n",
+ stats->rde_event_ribdump_usec);
+ printf("%10lld usec spent on nexthops\n",
+ stats->rde_event_nexthop_usec);
+ printf("%10lld usec spent on updates\n", stats->rde_event_update_usec);
}
static void
Index: bgpctl/output_json.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/output_json.c,v
diff -u -p -r1.52 output_json.c
--- bgpctl/output_json.c 10 Mar 2025 14:08:25 -0000 1.52
+++ bgpctl/output_json.c 12 Nov 2025 14:31:57 -0000
@@ -934,6 +934,16 @@ json_rib_mem(struct rde_memstats *stats)
json_rib_mem_element("total", UINT64_MAX,
stats->aset_size + stats->pset_size, UINT64_MAX);
json_do_end();
+
+ json_do_object("evloop", 0);
+ json_do_uint("count", stats->rde_event_loop_count);
+ json_do_uint("loop_usec", stats->rde_event_loop_usec);
+ json_do_uint("io_usec", stats->rde_event_io_usec);
+ json_do_uint("peer_usec", stats->rde_event_peer_usec);
+ json_do_uint("ribdump_usec", stats->rde_event_ribdump_usec);
+ json_do_uint("nexthop_usec", stats->rde_event_nexthop_usec);
+ json_do_uint("update_usec", stats->rde_event_update_usec);
+ json_do_end();
}
static void
Index: bgpctl/output_ometric.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/output_ometric.c,v
diff -u -p -r1.16 output_ometric.c
--- bgpctl/output_ometric.c 30 Oct 2025 12:43:53 -0000 1.16
+++ bgpctl/output_ometric.c 12 Nov 2025 14:32:05 -0000
@@ -49,6 +49,7 @@ struct ometric *peer_rr_borr_transmit, *
struct ometric *peer_rr_eorr_transmit, *peer_rr_eorr_receive;
struct ometric *rde_mem_size, *rde_mem_count, *rde_mem_ref_count;
struct ometric *rde_set_size, *rde_set_count, *rde_table_count;
+struct ometric *rde_evloop_count, *rde_evloop_time;
struct timespec start_time, end_time;
@@ -166,6 +167,11 @@ ometric_head(struct parse_result *arg)
"bgpd_rde_set_usage_objects", "number of object in set");
rde_table_count = ometric_new(OMT_GAUGE,
"bgpd_rde_set_usage_tables", "number of as_set tables");
+
+ rde_evloop_count = ometric_new(OMT_COUNTER,
+ "bgpd_rde_evloop", "number of times the evloop ran");
+ rde_evloop_time = ometric_new(OMT_COUNTER,
+ "bgpd_rde_evloop_seconds", "RDE evloop time usage");
}
static void
@@ -320,6 +326,26 @@ ometric_rib_mem(struct rde_memstats *sta
OKV("type"), OKV("prefix_set"), NULL);
ometric_rib_mem_element("set_total", UINT64_MAX,
stats->aset_size + stats->pset_size, UINT64_MAX);
+
+ ometric_set_int(rde_evloop_count, stats->rde_event_loop_count, NULL);
+ ometric_set_float_with_labels(rde_evloop_time,
+ (double)stats->rde_event_loop_usec / (1000.0 * 1000.0) ,
+ OKV("stage"), OKV("main"), NULL);
+ ometric_set_float_with_labels(rde_evloop_time,
+ (double)stats->rde_event_io_usec / (1000.0 * 1000.0) ,
+ OKV("stage"), OKV("io"), NULL);
+ ometric_set_float_with_labels(rde_evloop_time,
+ (double)stats->rde_event_peer_usec / (1000.0 * 1000.0) ,
+ OKV("stage"), OKV("peer"), NULL);
+ ometric_set_float_with_labels(rde_evloop_time,
+ (double)stats->rde_event_ribdump_usec / (1000.0 * 1000.0) ,
+ OKV("stage"), OKV("ribdumps"), NULL);
+ ometric_set_float_with_labels(rde_evloop_time,
+ (double)stats->rde_event_nexthop_usec / (1000.0 * 1000.0) ,
+ OKV("stage"), OKV("nexthop"), NULL);
+ ometric_set_float_with_labels(rde_evloop_time,
+ (double)stats->rde_event_update_usec / (1000.0 * 1000.0) ,
+ OKV("stage"), OKV("update"), NULL);
}
static void
Index: bgpd/bgpd.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
diff -u -p -r1.521 bgpd.h
--- bgpd/bgpd.h 4 Nov 2025 10:47:25 -0000 1.521
+++ bgpd/bgpd.h 12 Nov 2025 14:28:21 -0000
@@ -1410,6 +1410,13 @@ struct rde_memstats {
long long aset_nmemb;
long long pset_cnt;
long long pset_size;
+ long long rde_event_loop_count;
+ long long rde_event_loop_usec;
+ long long rde_event_io_usec;
+ long long rde_event_peer_usec;
+ long long rde_event_ribdump_usec;
+ long long rde_event_nexthop_usec;
+ long long rde_event_update_usec;
};
#define MRT_FILE_LEN 512
Index: bgpd/monotime.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/monotime.h,v
diff -u -p -r1.1 monotime.h
--- bgpd/monotime.h 20 Feb 2025 19:47:31 -0000 1.1
+++ bgpd/monotime.h 10 Nov 2025 13:44:31 -0000
@@ -72,6 +72,12 @@ monotime_sub(monotime_t minu, monotime_t
}
static inline long long
+monotime_to_usec(monotime_t mt)
+{
+ return mt.monotime / (MONOTIME_RES / (1000 * 1000ULL));
+}
+
+static inline long long
monotime_to_msec(monotime_t mt)
{
return mt.monotime / (MONOTIME_RES / 1000);
Index: bgpd/rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
diff -u -p -r1.661 rde.c
--- bgpd/rde.c 4 Nov 2025 15:01:09 -0000 1.661
+++ bgpd/rde.c 12 Nov 2025 14:31:14 -0000
@@ -164,6 +164,7 @@ rde_main(int debug, int verbose)
struct passwd *pw;
struct pollfd *pfd = NULL;
struct rde_mrt_ctx *mctx, *xmctx;
+ monotime_t loop_start, io_end, peer_end, dump_end, nh_end;
void *newp;
u_int pfd_elms = 0, i, j;
int timeout;
@@ -223,6 +224,7 @@ rde_main(int debug, int verbose)
conf = new_config();
log_info("route decision engine ready");
+ loop_start = getmonotime();
while (rde_quit == 0) {
if (pfd_elms < PFD_PIPE_COUNT + rde_mrt_cnt) {
if ((newp = reallocarray(pfd,
@@ -266,12 +268,18 @@ rde_main(int debug, int verbose)
nexthop_pending() || rib_dump_pending())
timeout = 0;
+ rdemem.rde_event_loop_usec +=
+ monotime_to_usec(monotime_sub(getmonotime(), loop_start));
+
if (poll(pfd, i, timeout) == -1) {
if (errno == EINTR)
continue;
fatal("poll error");
}
+ rdemem.rde_event_loop_count++;
+ loop_start = getmonotime();
+
if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) {
log_warnx("RDE: Lost connection to parent");
rde_quit = 1;
@@ -312,14 +320,37 @@ rde_main(int debug, int verbose)
mctx = LIST_NEXT(mctx, entry);
}
+ io_end = getmonotime();
+ rdemem.rde_event_io_usec +=
+ monotime_to_usec(monotime_sub(io_end, loop_start));
+
peer_foreach(rde_dispatch_imsg_peer, NULL);
peer_reaper(NULL);
+
+ peer_end = getmonotime();
+ rdemem.rde_event_peer_usec +=
+ monotime_to_usec(monotime_sub(peer_end, io_end));
+
rib_dump_runner();
+
+ dump_end = getmonotime();
+ rdemem.rde_event_ribdump_usec +=
+ monotime_to_usec(monotime_sub(dump_end, peer_end));
+
nexthop_runner();
+
+ nh_end = getmonotime();
+ rdemem.rde_event_nexthop_usec +=
+ monotime_to_usec(monotime_sub(nh_end, dump_end));
+
if (ibuf_se && imsgbuf_queuelen(ibuf_se) < SESS_MSG_HIGH_MARK) {
for (aid = AID_MIN; aid < AID_MAX; aid++)
rde_update_queue_runner(aid);
}
+
+ rdemem.rde_event_update_usec +=
+ monotime_to_usec(monotime_sub(getmonotime(), nh_end));
+
/* commit pftable once per poll loop */
rde_commit_pftable();
}
bgpd: track rde main event loop performance