Index | Thread | Search

From:
Claudio Jeker <cjeker@diehard.n-r-g.com>
Subject:
bgpd: more RDE stats: aspa, ibufq and rib entry queue
To:
tech@openbsd.org
Date:
Mon, 2 Mar 2026 11:10:18 +0100

Download raw body.

Thread
This adds a few more stat values to the RDE.

This includes stats for the ASPA set and stats for the various queues
in the RDE.  The queue stats are tracked per-peer and globally.

This also includes some minor adjustemnts in the peer up/down handling.
It removes a copied version of peer_down() in peer_up() with a peer_down()
call and it ensures that the RIB entry queue is not leaked in the
peer_delete() case.
-- 
:wq Claudio

Index: bgpctl/output.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/output.c,v
diff -u -p -r1.69 output.c
--- bgpctl/output.c	13 Feb 2026 18:27:40 -0000	1.69
+++ bgpctl/output.c	2 Mar 2026 10:03:40 -0000
@@ -227,6 +227,7 @@ show_neighbor_msgstats(struct peer *p)
 	    p->stats.msg_rcvd_open + p->stats.msg_rcvd_notification +
 	    p->stats.msg_rcvd_update + p->stats.msg_rcvd_keepalive +
 	    p->stats.msg_rcvd_rrefresh);
+
 	printf("  Update statistics:\n");
 	printf("  %-15s %-10s %-10s %-10s\n", "", "Sent", "Received",
 	    "Pending");
@@ -247,6 +248,13 @@ show_neighbor_msgstats(struct peer *p)
 	    p->stats.refresh_sent_borr, p->stats.refresh_rcvd_borr);
 	printf("  %-15s %10llu %10llu\n", "End-of-RR",
 	    p->stats.refresh_sent_eorr, p->stats.refresh_rcvd_eorr);
+
+	printf("  Queue statistics:\n");
+	printf("  %-15s %-10s %-10s\n", "", "Count", "Size");
+	printf("  %-15s %10llu %10llu\n", "ibuf queue",
+	    p->stats.ibufq_msg_count, p->stats.ibufq_payload_size);
+	printf("  %-15s %10llu %-10s\n", "rib queue",
+	    p->stats.rib_entry_count, "-");
 }
 
 static void
@@ -1117,6 +1125,8 @@ show_rib_mem(struct rde_memstats *stats)
 	    fmt_mem(stats->aset_size));
 	printf("%10lld prefix-set elements using %s of memory\n",
 	    stats->pset_cnt, fmt_mem(stats->pset_size));
+	printf("%10lld aspa-set elements using %s of memory\n",
+	    stats->aspa_cnt, fmt_mem(stats->aspa_size));
 	printf("RIB using %s of memory\n", fmt_mem(pts +
 	    stats->prefix_cnt * sizeof(struct prefix) +
 	    stats->adjout_prefix_cnt * sizeof(struct adjout_prefix) +
@@ -1128,8 +1138,13 @@ show_rib_mem(struct rde_memstats *stats)
 	    stats->aspath_size + stats->attr_cnt * sizeof(struct attr) +
 	    stats->attr_data));
 	printf("Sets and filters using %s of memory\n",
-	    fmt_mem(stats->aset_size + stats->pset_size +
+	    fmt_mem(stats->aset_size + stats->pset_size + stats->aspa_size +
 	    stats->filter_set_size));
+
+	printf("\nRDE queue statistics\n");
+	printf("%10lld messages queued holding %s of data\n",
+	    stats->rde_ibufq_msg_count, fmt_mem(stats->rde_ibufq_payload_size));
+	printf("%10lld rib entries queued\n", stats->rde_rib_entry_count);
 
 	printf("\nRDE timing statistics\n");
 	printf("%10lld usec spent in the event loop for %llu rounds\n",
Index: bgpctl/output_json.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/output_json.c,v
diff -u -p -r1.60 output_json.c
--- bgpctl/output_json.c	13 Feb 2026 18:27:40 -0000	1.60
+++ bgpctl/output_json.c	2 Mar 2026 10:03:40 -0000
@@ -217,6 +217,18 @@ json_neighbor_stats(struct peer *p)
 
 	json_do_end();
 
+	json_do_object("queue", 0);
+
+	json_do_object("count", 1);
+	json_do_uint("ibuf_queue", p->stats.ibufq_msg_count);
+	json_do_uint("rib_entry", p->stats.rib_entry_count);
+	json_do_end();
+	json_do_object("size", 1);
+	json_do_uint("ibuf_queue", p->stats.ibufq_payload_size);
+	json_do_end();
+
+	json_do_end();
+
 	json_do_end();
 }
 
@@ -953,8 +965,17 @@ json_rib_mem(struct rde_memstats *stats)
 	    UINT64_MAX);
 	json_rib_mem_element("prefix_set", stats->pset_cnt, stats->pset_size,
 	    UINT64_MAX);
+	json_rib_mem_element("aspa_set", stats->aspa_cnt, stats->aspa_size,
+	    UINT64_MAX);
 	json_rib_mem_element("total", UINT64_MAX,
-	    stats->aset_size + stats->pset_size, UINT64_MAX);
+	    stats->aset_size + stats->pset_size + stats->aspa_size, UINT64_MAX);
+	json_do_end();
+
+	json_do_object("queue", 0);
+	json_rib_mem_element("ibuf_queue", stats->rde_ibufq_msg_count,
+	    stats->rde_ibufq_payload_size, UINT64_MAX);
+	json_rib_mem_element("rib_entry", stats->rde_rib_entry_count,
+	    UINT64_MAX, UINT64_MAX);
 	json_do_end();
 
 	json_do_object("evloop", 0);
Index: bgpctl/output_ometric.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpctl/output_ometric.c,v
diff -u -p -r1.24 output_ometric.c
--- bgpctl/output_ometric.c	13 Feb 2026 18:27:40 -0000	1.24
+++ bgpctl/output_ometric.c	2 Mar 2026 10:03:40 -0000
@@ -47,8 +47,10 @@ struct ometric *peer_withdraw_transmit, 
 struct ometric *peer_rr_req_transmit, *peer_rr_req_receive;
 struct ometric *peer_rr_borr_transmit, *peer_rr_borr_receive;
 struct ometric *peer_rr_eorr_transmit, *peer_rr_eorr_receive;
+struct ometric *peer_queue_count, *peer_queue_size;
 struct ometric *rde_mem_size, *rde_mem_count, *rde_mem_ref_count;
 struct ometric *rde_set_size, *rde_set_count, *rde_table_count;
+struct ometric *rde_queue_size, *rde_queue_count;
 struct ometric *rde_evloop_count, *rde_evloop_time;
 
 struct timespec start_time, end_time;
@@ -152,6 +154,11 @@ ometric_head(struct parse_result *arg)
 	peer_rr_eorr_receive = ometric_new(OMT_COUNTER,
 	    "bgpd_peer_route_refresh_eorr_receive",
 	    "number of ext. route-refresh EORR messages received from peer");
+	peer_queue_count = ometric_new(OMT_GAUGE,
+	    "bgpd_peer_queue_usage_objects", "number of object on queue");
+	peer_queue_size = ometric_new(OMT_GAUGE,
+	    "bgpd_peer_queue_memory_usage_bytes",
+	    "memory usage of queue in bytes");
 
 	/* RDE memory statistics */
 	rde_mem_size = ometric_new(OMT_GAUGE,
@@ -168,6 +175,11 @@ ometric_head(struct parse_result *arg)
 	rde_table_count = ometric_new(OMT_GAUGE,
 	    "bgpd_rde_set_usage_tables", "number of as_set tables");
 
+	rde_queue_size = ometric_new(OMT_GAUGE,
+	    "bgpd_rde_queue_bytes", "memory usage of queued objects in bytes");
+	rde_queue_count = ometric_new(OMT_GAUGE,
+	    "bgpd_rde_queue_objects", "number of object in queue");
+
 	rde_evloop_count = ometric_new(OMT_COUNTER,
 	    "bgpd_rde_evloop", "number of times the evloop ran");
 	rde_evloop_time = ometric_new(OMT_COUNTER,
@@ -257,6 +269,13 @@ ometric_neighbor_stats(struct peer *p, s
 	ometric_set_int(peer_rr_eorr_transmit, p->stats.refresh_sent_eorr, ol);
 	ometric_set_int(peer_rr_eorr_receive, p->stats.refresh_rcvd_eorr, ol);
 
+	ometric_set_int_with_labels(peer_queue_count, p->stats.ibufq_msg_count,
+	    OKV("type"), OKV("ibuf_queue"), ol);
+	ometric_set_int_with_labels(peer_queue_count, p->stats.rib_entry_count,
+	    OKV("type"), OKV("rib_entry"), ol);
+	ometric_set_int_with_labels(peer_queue_size,
+	    p->stats.ibufq_payload_size, OKV("type"), OKV("ibuf_queue"), ol);
+
 	olabels_free(ol);
 	free(descr);
 }
@@ -342,10 +361,22 @@ ometric_rib_mem(struct rde_memstats *sta
 	    OKV("type"), OKV("as_set"), NULL);
 	ometric_set_int_with_labels(rde_set_size, stats->pset_size,
 	    OKV("type"), OKV("prefix_set"), NULL);
+	ometric_set_int_with_labels(rde_set_size, stats->aspa_size,
+	    OKV("type"), OKV("aspa_set"), NULL);
 	ometric_set_int_with_labels(rde_set_count, stats->pset_cnt,
 	    OKV("type"), OKV("prefix_set"), NULL);
+	ometric_set_int_with_labels(rde_set_count, stats->aspa_cnt,
+	    OKV("type"), OKV("aspa_set"), NULL);
 	ometric_rib_mem_element("set_total", UINT64_MAX,
-	    stats->aset_size + stats->pset_size, UINT64_MAX);
+	    stats->aset_size + stats->pset_size + stats->aspa_size, UINT64_MAX);
+
+	ometric_set_int_with_labels(rde_queue_count, stats->rde_ibufq_msg_count,
+	    OKV("type"), OKV("ibuf_queue"), NULL);
+	ometric_set_int_with_labels(rde_queue_count, stats->rde_rib_entry_count,
+	    OKV("type"), OKV("rib_entry"), NULL);
+	ometric_set_int_with_labels(rde_queue_size,
+	    stats->rde_ibufq_payload_size, OKV("type"), OKV("ibuf_queue"),
+	    NULL);
 
 	ometric_set_int(rde_evloop_count, stats->rde_event_loop_count, NULL);
 	ometric_set_float_with_labels(rde_evloop_time,
Index: bgpd/bgpd.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
diff -u -p -r1.533 bgpd.h
--- bgpd/bgpd.h	13 Feb 2026 12:47:36 -0000	1.533
+++ bgpd/bgpd.h	2 Mar 2026 10:03:40 -0000
@@ -548,6 +548,9 @@ struct rde_peer_stats {
 	uint64_t			 prefix_sent_update;
 	uint64_t			 prefix_sent_withdraw;
 	uint64_t			 prefix_sent_eor;
+	uint64_t			 rib_entry_count;
+	uint64_t			 ibufq_msg_count;
+	uint64_t			 ibufq_payload_size;
 	uint32_t			 prefix_cnt;
 	uint32_t			 prefix_out_cnt;
 	uint32_t			 pending_update;
@@ -1420,12 +1423,17 @@ struct rde_memstats {
 	long long	aset_nmemb;
 	long long	pset_cnt;
 	long long	pset_size;
+	long long	aspa_cnt;
+	long long	aspa_size;
 	long long	filter_cnt;
 	long long	filter_size;
 	long long	filter_refs;
 	long long	filter_set_cnt;
 	long long	filter_set_size;
 	long long	filter_set_refs;
+	long long	rde_rib_entry_count;
+	long long	rde_ibufq_msg_count;
+	long long	rde_ibufq_payload_size;
 	long long	rde_event_loop_count;
 	long long	rde_event_loop_usec;
 	long long	rde_event_io_usec;
Index: bgpd/control.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/control.c,v
diff -u -p -r1.136 control.c
--- bgpd/control.c	4 Feb 2026 11:41:11 -0000	1.136
+++ bgpd/control.c	2 Mar 2026 10:03:40 -0000
@@ -581,6 +581,9 @@ control_imsg_relay(struct imsg *imsg, st
 		peer.stats.prefix_sent_eor = stats.prefix_sent_eor;
 		peer.stats.pending_update = stats.pending_update;
 		peer.stats.pending_withdraw = stats.pending_withdraw;
+		peer.stats.rib_entry_count = stats.rib_entry_count;
+		peer.stats.ibufq_msg_count = stats.ibufq_msg_count;
+		peer.stats.ibufq_payload_size = stats.ibufq_payload_size;
 		peer.stats.msg_queue_len = msgbuf_queuelen(p->wbuf);
 
 		return imsg_compose(&c->imsgbuf, type, 0, pid, -1,
Index: bgpd/rde_aspa.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_aspa.c,v
diff -u -p -r1.6 rde_aspa.c
--- bgpd/rde_aspa.c	20 Feb 2025 19:47:31 -0000	1.6
+++ bgpd/rde_aspa.c	2 Mar 2026 10:03:40 -0000
@@ -363,6 +363,9 @@ aspa_table_prep(uint32_t entries, size_t
 	ra->maxdata = datasize / sizeof(ra->data[0]);
 	ra->lastchange = getmonotime();
 
+	rdemem.aspa_cnt += ra->maxset;
+	rdemem.aspa_size += ra->maxset * sizeof(ra->sets[0]) +
+	    ra->maxdata * sizeof(ra->data[0]);
 	return ra;
 }
 
@@ -421,6 +424,9 @@ aspa_table_free(struct rde_aspa *ra)
 {
 	if (ra == NULL)
 		return;
+	rdemem.aspa_cnt -= ra->maxset;
+	rdemem.aspa_size -= ra->maxset * sizeof(ra->sets[0]) +
+	    ra->maxdata * sizeof(ra->data[0]);
 	free(ra->table);
 	free(ra->sets);
 	free(ra->data);
Index: bgpd/rde_peer.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_peer.c,v
diff -u -p -r1.67 rde_peer.c
--- bgpd/rde_peer.c	13 Feb 2026 12:47:36 -0000	1.67
+++ bgpd/rde_peer.c	2 Mar 2026 10:03:40 -0000
@@ -336,6 +336,8 @@ rde_generate_updates(struct rib_entry *r
 	if (re->pq_mode != EVAL_NONE) {
 		peer = peer_get(re->pq_peer_id);
 		TAILQ_REMOVE(&peer->rib_pq_head, re, rib_queue);
+		rdemem.rde_rib_entry_count--;
+		peer->stats.rib_entry_count--;
 	}
 	if (newpath != NULL)
 		peer = prefix_peer(newpath);
@@ -344,6 +346,8 @@ rde_generate_updates(struct rib_entry *r
 	re->pq_mode = mode;
 	re->pq_peer_id = peer->conf.id;
 	TAILQ_INSERT_TAIL(&peer->rib_pq_head, re, rib_queue);
+	rdemem.rde_rib_entry_count++;
+	peer->stats.rib_entry_count++;
 }
 
 void
@@ -357,6 +361,8 @@ peer_process_updates(struct rde_peer *pe
 	if (re == NULL)
 		return;
 	TAILQ_REMOVE(&peer->rib_pq_head, re, rib_queue);
+	rdemem.rde_rib_entry_count--;
+	peer->stats.rib_entry_count--;
 
 	mode = re->pq_mode;
 
@@ -430,11 +436,7 @@ peer_up(struct rde_peer *peer, struct se
 		 * There is a race condition when doing PEER_ERR -> PEER_DOWN.
 		 * So just do a full reset of the peer here.
 		 */
-		rib_dump_terminate(peer);
-		peer_imsg_flush(peer);
-		peer_flush(peer, AID_UNSPEC, monotime_clear());
-		peer->stats.prefix_cnt = 0;
-		peer->state = PEER_DOWN;
+		peer_down(peer);
 	}
 
 	/*
@@ -538,6 +540,10 @@ peer_delete(struct rde_peer *peer)
 	rde_filter_unref(peer->out_rules);
 	adjout_peer_free(peer);
 
+	TAILQ_CONCAT(&peerself->rib_pq_head, &peer->rib_pq_head, rib_queue);
+	peerself->stats.rib_entry_count += peer->stats.rib_entry_count;
+	peer->stats.rib_entry_count = 0;
+
 	RB_REMOVE(peer_tree, &peertable, peer);
 
 	/* start reaping the zombie */
@@ -744,6 +750,11 @@ peer_work_pending(void)
 void
 peer_imsg_push(struct rde_peer *peer, struct imsg *imsg)
 {
+	peer->stats.ibufq_msg_count++;
+	rdemem.rde_ibufq_msg_count++;
+	peer->stats.ibufq_payload_size += imsg_get_len(imsg);
+	rdemem.rde_ibufq_payload_size += imsg_get_len(imsg);
+
 	imsg_ibufq_push(peer->ibufq, imsg);
 }
 
@@ -758,6 +769,10 @@ peer_imsg_pop(struct rde_peer *peer, str
 	case 0:
 		return 0;
 	case 1:
+		peer->stats.ibufq_msg_count--;
+		rdemem.rde_ibufq_msg_count--;
+		peer->stats.ibufq_payload_size -= imsg_get_len(imsg);
+		rdemem.rde_ibufq_payload_size -= imsg_get_len(imsg);
 		return 1;
 	default:
 		fatal("imsg_ibufq_pop");
@@ -771,4 +786,9 @@ void
 peer_imsg_flush(struct rde_peer *peer)
 {
 	ibufq_flush(peer->ibufq);
+
+	rdemem.rde_ibufq_msg_count -= peer->stats.ibufq_msg_count;
+	rdemem.rde_ibufq_payload_size -= peer->stats.ibufq_payload_size;
+	peer->stats.ibufq_msg_count = 0;
+	peer->stats.ibufq_payload_size = 0;
 }
Index: bgpd/session.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/session.h,v
diff -u -p -r1.193 session.h
--- bgpd/session.h	4 Nov 2025 10:47:25 -0000	1.193
+++ bgpd/session.h	2 Mar 2026 10:03:40 -0000
@@ -144,6 +144,9 @@ struct peer_stats {
 	unsigned long long	 prefix_sent_update;
 	unsigned long long	 prefix_sent_withdraw;
 	unsigned long long	 prefix_sent_eor;
+	unsigned long long	 rib_entry_count;
+	unsigned long long	 ibufq_msg_count;
+	unsigned long long	 ibufq_payload_size;
 	monotime_t		 last_updown;
 	monotime_t		 last_read;
 	monotime_t		 last_write;