From: Claudio Jeker Subject: bgpd: track rde main event loop performance To: tech@openbsd.org Date: Wed, 12 Nov 2025 15:36:40 +0100 This diff adds some counters to track how much time the main event loop uses for which part of the loop. This helps a lot to better understand why stuff is slow. Also by having the loop count allows one to calculate how frequently the loop runs. I find this very useful for my ongoing work on making bgpd faster. -- :wq Claudio Index: bgpctl/ometric.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpctl/ometric.c,v diff -u -p -r1.10 ometric.c --- bgpctl/ometric.c 6 Jan 2023 13:26:57 -0000 1.10 +++ bgpctl/ometric.c 10 Nov 2025 13:44:31 -0000 @@ -514,6 +514,17 @@ ometric_set_int_with_labels(struct ometr } void +ometric_set_float_with_labels(struct ometric *om, double val, + const char **keys, const char **values, struct olabels *ol) +{ + struct olabels *extra; + + extra = olabels_add_extras(ol, keys, values); + ometric_set_float(om, val, extra); + olabels_free(extra); +} + +void ometric_set_timespec_with_labels(struct ometric *om, struct timespec *ts, const char **keys, const char **values, struct olabels *ol) { Index: bgpctl/ometric.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpctl/ometric.h,v diff -u -p -r1.6 ometric.h --- bgpctl/ometric.h 6 Jan 2023 13:26:57 -0000 1.6 +++ bgpctl/ometric.h 10 Nov 2025 13:44:31 -0000 @@ -48,6 +48,8 @@ void ometric_set_info(struct ometric *, void ometric_set_state(struct ometric *, const char *, struct olabels *); void ometric_set_int_with_labels(struct ometric *, uint64_t, const char **, const char **, struct olabels *); +void ometric_set_float_with_labels(struct ometric *, double, const char **, + const char **, struct olabels *); void ometric_set_timespec_with_labels(struct ometric *, struct timespec *, const char **, const char **, struct olabels *); #define OKV(...) (const char *[]){ __VA_ARGS__, NULL } Index: bgpctl/output.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpctl/output.c,v diff -u -p -r1.61 output.c --- bgpctl/output.c 10 Mar 2025 14:08:25 -0000 1.61 +++ bgpctl/output.c 12 Nov 2025 14:31:55 -0000 @@ -1102,6 +1102,17 @@ show_rib_mem(struct rde_memstats *stats) stats->attr_data)); printf("Sets using %s of memory\n", fmt_mem(stats->aset_size + stats->pset_size)); + + printf("\nRDE timing statistics\n"); + printf("%10lld usec spent in the event loop for %llu rounds\n", + stats->rde_event_loop_usec, stats->rde_event_loop_count); + printf("%10lld usec spent on io\n", stats->rde_event_io_usec); + printf("%10lld usec spent on peers\n", stats->rde_event_peer_usec); + printf("%10lld usec spent on rib dumps\n", + stats->rde_event_ribdump_usec); + printf("%10lld usec spent on nexthops\n", + stats->rde_event_nexthop_usec); + printf("%10lld usec spent on updates\n", stats->rde_event_update_usec); } static void Index: bgpctl/output_json.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpctl/output_json.c,v diff -u -p -r1.52 output_json.c --- bgpctl/output_json.c 10 Mar 2025 14:08:25 -0000 1.52 +++ bgpctl/output_json.c 12 Nov 2025 14:31:57 -0000 @@ -934,6 +934,16 @@ json_rib_mem(struct rde_memstats *stats) json_rib_mem_element("total", UINT64_MAX, stats->aset_size + stats->pset_size, UINT64_MAX); json_do_end(); + + json_do_object("evloop", 0); + json_do_uint("count", stats->rde_event_loop_count); + json_do_uint("loop_usec", stats->rde_event_loop_usec); + json_do_uint("io_usec", stats->rde_event_io_usec); + json_do_uint("peer_usec", stats->rde_event_peer_usec); + json_do_uint("ribdump_usec", stats->rde_event_ribdump_usec); + json_do_uint("nexthop_usec", stats->rde_event_nexthop_usec); + json_do_uint("update_usec", stats->rde_event_update_usec); + json_do_end(); } static void Index: bgpctl/output_ometric.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpctl/output_ometric.c,v diff -u -p -r1.16 output_ometric.c --- bgpctl/output_ometric.c 30 Oct 2025 12:43:53 -0000 1.16 +++ bgpctl/output_ometric.c 12 Nov 2025 14:32:05 -0000 @@ -49,6 +49,7 @@ struct ometric *peer_rr_borr_transmit, * struct ometric *peer_rr_eorr_transmit, *peer_rr_eorr_receive; struct ometric *rde_mem_size, *rde_mem_count, *rde_mem_ref_count; struct ometric *rde_set_size, *rde_set_count, *rde_table_count; +struct ometric *rde_evloop_count, *rde_evloop_time; struct timespec start_time, end_time; @@ -166,6 +167,11 @@ ometric_head(struct parse_result *arg) "bgpd_rde_set_usage_objects", "number of object in set"); rde_table_count = ometric_new(OMT_GAUGE, "bgpd_rde_set_usage_tables", "number of as_set tables"); + + rde_evloop_count = ometric_new(OMT_COUNTER, + "bgpd_rde_evloop", "number of times the evloop ran"); + rde_evloop_time = ometric_new(OMT_COUNTER, + "bgpd_rde_evloop_seconds", "RDE evloop time usage"); } static void @@ -320,6 +326,26 @@ ometric_rib_mem(struct rde_memstats *sta OKV("type"), OKV("prefix_set"), NULL); ometric_rib_mem_element("set_total", UINT64_MAX, stats->aset_size + stats->pset_size, UINT64_MAX); + + ometric_set_int(rde_evloop_count, stats->rde_event_loop_count, NULL); + ometric_set_float_with_labels(rde_evloop_time, + (double)stats->rde_event_loop_usec / (1000.0 * 1000.0) , + OKV("stage"), OKV("main"), NULL); + ometric_set_float_with_labels(rde_evloop_time, + (double)stats->rde_event_io_usec / (1000.0 * 1000.0) , + OKV("stage"), OKV("io"), NULL); + ometric_set_float_with_labels(rde_evloop_time, + (double)stats->rde_event_peer_usec / (1000.0 * 1000.0) , + OKV("stage"), OKV("peer"), NULL); + ometric_set_float_with_labels(rde_evloop_time, + (double)stats->rde_event_ribdump_usec / (1000.0 * 1000.0) , + OKV("stage"), OKV("ribdumps"), NULL); + ometric_set_float_with_labels(rde_evloop_time, + (double)stats->rde_event_nexthop_usec / (1000.0 * 1000.0) , + OKV("stage"), OKV("nexthop"), NULL); + ometric_set_float_with_labels(rde_evloop_time, + (double)stats->rde_event_update_usec / (1000.0 * 1000.0) , + OKV("stage"), OKV("update"), NULL); } static void Index: bgpd/bgpd.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v diff -u -p -r1.521 bgpd.h --- bgpd/bgpd.h 4 Nov 2025 10:47:25 -0000 1.521 +++ bgpd/bgpd.h 12 Nov 2025 14:28:21 -0000 @@ -1410,6 +1410,13 @@ struct rde_memstats { long long aset_nmemb; long long pset_cnt; long long pset_size; + long long rde_event_loop_count; + long long rde_event_loop_usec; + long long rde_event_io_usec; + long long rde_event_peer_usec; + long long rde_event_ribdump_usec; + long long rde_event_nexthop_usec; + long long rde_event_update_usec; }; #define MRT_FILE_LEN 512 Index: bgpd/monotime.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/monotime.h,v diff -u -p -r1.1 monotime.h --- bgpd/monotime.h 20 Feb 2025 19:47:31 -0000 1.1 +++ bgpd/monotime.h 10 Nov 2025 13:44:31 -0000 @@ -72,6 +72,12 @@ monotime_sub(monotime_t minu, monotime_t } static inline long long +monotime_to_usec(monotime_t mt) +{ + return mt.monotime / (MONOTIME_RES / (1000 * 1000ULL)); +} + +static inline long long monotime_to_msec(monotime_t mt) { return mt.monotime / (MONOTIME_RES / 1000); Index: bgpd/rde.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v diff -u -p -r1.661 rde.c --- bgpd/rde.c 4 Nov 2025 15:01:09 -0000 1.661 +++ bgpd/rde.c 12 Nov 2025 14:31:14 -0000 @@ -164,6 +164,7 @@ rde_main(int debug, int verbose) struct passwd *pw; struct pollfd *pfd = NULL; struct rde_mrt_ctx *mctx, *xmctx; + monotime_t loop_start, io_end, peer_end, dump_end, nh_end; void *newp; u_int pfd_elms = 0, i, j; int timeout; @@ -223,6 +224,7 @@ rde_main(int debug, int verbose) conf = new_config(); log_info("route decision engine ready"); + loop_start = getmonotime(); while (rde_quit == 0) { if (pfd_elms < PFD_PIPE_COUNT + rde_mrt_cnt) { if ((newp = reallocarray(pfd, @@ -266,12 +268,18 @@ rde_main(int debug, int verbose) nexthop_pending() || rib_dump_pending()) timeout = 0; + rdemem.rde_event_loop_usec += + monotime_to_usec(monotime_sub(getmonotime(), loop_start)); + if (poll(pfd, i, timeout) == -1) { if (errno == EINTR) continue; fatal("poll error"); } + rdemem.rde_event_loop_count++; + loop_start = getmonotime(); + if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) { log_warnx("RDE: Lost connection to parent"); rde_quit = 1; @@ -312,14 +320,37 @@ rde_main(int debug, int verbose) mctx = LIST_NEXT(mctx, entry); } + io_end = getmonotime(); + rdemem.rde_event_io_usec += + monotime_to_usec(monotime_sub(io_end, loop_start)); + peer_foreach(rde_dispatch_imsg_peer, NULL); peer_reaper(NULL); + + peer_end = getmonotime(); + rdemem.rde_event_peer_usec += + monotime_to_usec(monotime_sub(peer_end, io_end)); + rib_dump_runner(); + + dump_end = getmonotime(); + rdemem.rde_event_ribdump_usec += + monotime_to_usec(monotime_sub(dump_end, peer_end)); + nexthop_runner(); + + nh_end = getmonotime(); + rdemem.rde_event_nexthop_usec += + monotime_to_usec(monotime_sub(nh_end, dump_end)); + if (ibuf_se && imsgbuf_queuelen(ibuf_se) < SESS_MSG_HIGH_MARK) { for (aid = AID_MIN; aid < AID_MAX; aid++) rde_update_queue_runner(aid); } + + rdemem.rde_event_update_usec += + monotime_to_usec(monotime_sub(getmonotime(), nh_end)); + /* commit pftable once per poll loop */ rde_commit_pftable(); }