Index | Thread | Search

From:
Christian Ludwig <cludwig@genua.de>
Subject:
Re: dt: Count skipped clock cycles as dropped events
To:
Martin Pieuchot <mpi@grenadille.net>
Cc:
<tech@openbsd.org>
Date:
Wed, 13 Nov 2024 17:54:34 +0100

Download raw body.

Thread
On Thu, Nov 07, 2024 at 10:36:17AM +0100 Martin Pieuchot wrote:
> On 05/11/24(Tue) 14:07, Christian Ludwig wrote:
> > If the clock interrupt advances for more than one tick, we obviously
> > lost a number of clock ticks. That might happen in a virtual machine
> > that does not get scheduled in time. There is no point in creating a
> > cluster of events with basically the same timestamp and identical stack
> > traces. Generate only one event instead and drop all skipped ticks.
> 
> My understanding of the current behavior is that if a periodic timer is
> scheduled with a small resolution, depending on the load, it might be
> blocked by other workload (locks, spl...).

You are right. But the current maximum timer resolution is 100hz for
btrace timers. I doubt that anything is blocking that long. That might
change once we actually do allow higher resolutions. But in the current
situation an over-provisioned virtual machine host is the most likely
scenario to see this.

> I agree that for profiling purposes it is not useful to generate the
> same event multiple times.  However I'd find useful to have a custom
> error counter such that we can figure out that the clock resolution
> is too small with the current workload.
> 
> I'd also suggest to add another error counter for recursion we prevented
> in the precedent diff.  Would you agree?

Agreed. Updated diff below, now with counters for skipped clock ticks
and recursion events.


 - Christian

---
 sys/dev/dt/dt_dev.c          | 23 +++++++++++++++++++++--
 sys/dev/dt/dt_prov_profile.c | 17 ++++++++++-------
 sys/dev/dt/dtvar.h           |  3 +++
 usr.sbin/btrace/btrace.c     |  4 ++++
 4 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/sys/dev/dt/dt_dev.c b/sys/dev/dt/dt_dev.c
index 70273c4ae032..dc52d277be78 100644
--- a/sys/dev/dt/dt_dev.c
+++ b/sys/dev/dt/dt_dev.c
@@ -102,6 +102,8 @@ struct dt_cpubuf {
 
 	/* Counters */
 	unsigned int		 dc_dropevt;	/* [p] # of events dropped */
+	unsigned int		 dc_skiptick;	/* [p] # of ticks skipped */
+	unsigned int		 dc_recurevt;	/* [p] # of recursive events */
 	unsigned int		 dc_readevt;	/* [r] # of events read */
 };
 
@@ -490,19 +492,24 @@ int
 dt_ioctl_get_stats(struct dt_softc *sc, struct dtioc_stat *dtst)
 {
 	struct dt_cpubuf *dc;
-	uint64_t readevt = 0, dropevt = 0;
+	uint64_t readevt, dropevt, skiptick, recurevt;
 	int i;
 
+	readevt = dropevt = skiptick = 0;
 	for (i = 0; i < ncpusfound; i++) {
 		dc = &sc->ds_cpu[i];
 
 		membar_consumer();
 		dropevt += dc->dc_dropevt;
+		skiptick = dc->dc_skiptick;
+		recurevt = dc->dc_recurevt;
 		readevt += dc->dc_readevt;
 	}
 
 	dtst->dtst_readevt = readevt;
 	dtst->dtst_dropevt = dropevt;
+	dtst->dtst_skiptick = skiptick;
+	dtst->dtst_recurevt = recurevt;
 	return 0;
 }
 
@@ -725,6 +732,15 @@ dt_pcb_purge(struct dt_pcb_list *plist)
 	}
 }
 
+void
+dt_pcb_ring_skiptick(struct dt_pcb *dp, unsigned int skip)
+{
+	struct dt_cpubuf *dc = &dp->dp_sc->ds_cpu[cpu_number()];
+
+	dc->dc_skiptick += skip;
+	membar_producer();
+}
+
 /*
  * Get a reference to the next free event state from the ring.
  */
@@ -736,8 +752,11 @@ dt_pcb_ring_get(struct dt_pcb *dp, int profiling)
 	int prod, cons, distance;
 	struct dt_cpubuf *dc = &dp->dp_sc->ds_cpu[cpu_number()];
 
-	if (dc->dc_inevt == 1)
+	if (dc->dc_inevt == 1) {
+		dc->dc_recurevt++;
+		membar_producer();
 		return NULL;
+	}
 
 	dc->dc_inevt = 1;
 
diff --git a/sys/dev/dt/dt_prov_profile.c b/sys/dev/dt/dt_prov_profile.c
index 62900152e147..5cefe8a714da 100644
--- a/sys/dev/dt/dt_prov_profile.c
+++ b/sys/dev/dt/dt_prov_profile.c
@@ -101,15 +101,18 @@ dt_prov_profile_alloc(struct dt_probe *dtp, struct dt_softc *sc,
 void
 dt_clock(struct clockrequest *cr, void *cf, void *arg)
 {
-	uint64_t count, i;
+	uint64_t count;
 	struct dt_evt *dtev;
 	struct dt_pcb *dp = arg;
 
 	count = clockrequest_advance(cr, dp->dp_nsecs);
-	for (i = 0; i < count; i++) {
-		dtev = dt_pcb_ring_get(dp, 1);
-		if (dtev == NULL)
-			return;
-		dt_pcb_ring_consume(dp, dtev);
-	}
+	if (count == 0)
+		return;
+	else if (count > 1)
+		dt_pcb_ring_skiptick(dp, count - 1);
+
+	dtev = dt_pcb_ring_get(dp, 1);
+	if (dtev == NULL)
+		return;
+	dt_pcb_ring_consume(dp, dtev);
 }
diff --git a/sys/dev/dt/dtvar.h b/sys/dev/dt/dtvar.h
index c6cfa7f66b3b..19d0c68a47dc 100644
--- a/sys/dev/dt/dtvar.h
+++ b/sys/dev/dt/dtvar.h
@@ -116,6 +116,8 @@ struct dtioc_req {
 struct dtioc_stat {
 	uint64_t		 dtst_readevt;	/* events read */
 	uint64_t		 dtst_dropevt;	/* events dropped */
+	uint64_t		 dtst_skiptick;	/* clock ticks skipped */
+	uint64_t		 dtst_recurevt;	/* recursive events */
 };
 
 struct dtioc_getaux {
@@ -179,6 +181,7 @@ struct dt_pcb	*dt_pcb_alloc(struct dt_probe *, struct dt_softc *);
 void		 dt_pcb_free(struct dt_pcb *);
 void		 dt_pcb_purge(struct dt_pcb_list *);
 
+void		 dt_pcb_ring_skiptick(struct dt_pcb *, unsigned int);
 struct dt_evt	*dt_pcb_ring_get(struct dt_pcb *, int);
 void		 dt_pcb_ring_consume(struct dt_pcb *, struct dt_evt *);
 
diff --git a/usr.sbin/btrace/btrace.c b/usr.sbin/btrace/btrace.c
index aa75a12f6762..941cd9d4c56a 100644
--- a/usr.sbin/btrace/btrace.c
+++ b/usr.sbin/btrace/btrace.c
@@ -452,6 +452,10 @@ rules_do(int fd)
 		fprintf(stderr, "%llu events read\n", dtst.dtst_readevt);
 		fprintf(stderr, "%llu events dropped\n", dtst.dtst_dropevt);
 		fprintf(stderr, "%llu events filtered\n", bt_filtered);
+		fprintf(stderr, "%llu clock ticks skipped\n",
+			dtst.dtst_skiptick);
+		fprintf(stderr, "%llu recursive events dropped\n",
+			dtst.dtst_recurevt);
 	}
 }
 
-- 
2.34.1