Index | Thread | Search

From:
Scott Cheloha <scottcheloha@gmail.com>
Subject:
Re: dt(4), hardclock: move profiling entry points to dedicated clockintrs
To:
mpi@openbsd.org
Cc:
tech@openbsd.org
Date:
Thu, 8 Feb 2024 22:08:13 -0600

Download raw body.

Thread
On Thu, Feb 08, 2024 at 05:19:40PM +0100, Martin Pieuchot wrote:
> On 07/02/24(Wed) 20:22, Scott Cheloha wrote:
> > On Tue, Feb 06, 2024 at 03:24:20PM +0100, Martin Pieuchot wrote:
> > > On 05/02/24(Mon) 19:37, Scott Cheloha wrote:
> > > > On Fri, Jan 26, 2024 at 12:28:12PM -0600, Scott Cheloha wrote:
> > > > > This patch moves the dt(4) profiling entry points from hardclock()
> > > > > into dedicated clockintr callbacks.  It depends on the clockintr_unbind
> > > > > patch I sent out separately, so be sure to apply that first.
> > > > > 
> > > > > [...]
> > > > 
> > > > Updated patch:
> > > > 
> > > > - Don't advance/cancel dp_clockintr when dp_nsecs is zero.
> > > > - Update DT_FA_PROFILE for i386, macppc
> > > 
> > > I can't test those.
> > [...]
> > We can combine them, but in multiple previous emails you have
> > requested that they remain separate.
> 
> Because they were reasons to do so which are now gone.
> 
> > So, which do you want?  Either works for me.
> 
> Merge them.
> 
> > > [...] 
> > > Here this is complex.  Since we are no longer using SMR in the callback
> > > to get a reference to `dp' we should not do the following before we are
> > > sure the callback won't run.  That would imply clockintr_unbind(), no?
> > > Some caution should be taken with clockintr_unbind() because we are
> > > holding a lock.
> > > 
> > > >  		dtp->dtp_recording--;
> > > >  		dtp->dtp_prov->dtpv_recording--;
> > > >  		SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb, dp_pnext);
> > 
> > We should not unbind here.  All we need to do is cancel dp_clockintr.
> 
> No, we must ensure no other CPU has a valid reference on the descriptor
> before decrementing the counter.   That's what I already said in my
> previous email in different words.
> 
> > With the patch, dt_prov_profile_clock() doesn't walk the SMR list
> > (dtp_pcbs) anymore and it doesn't check dtpv_recording or
> > dtp_recording anymore, so we don't need to do anything but
> > advance/cancel dp_clockintr when starting/stopping recording when
> > dp_nsecs is zero.  Check the updated patch.  Is that acceptable?
> 
> No it's not.  [...]

Here's a revised patch based on our discussion off-list.  It's not
exactly what we talked about re. dtpv_dealloc(), but I think it's
pretty close to a workable starting point.  We can iterate on it once
it's in the tree.

- Merge the profile/interval callbacks and dt_prov_profile_fire()
  into a single callback, dt_clock().

- Move the clockintr_bind() and clockintr_stagger() calls from
  dtpv_alloc() to dt_ioctl_record_start().

- Add a dp_cpu member to struct dt_pcb so dt_ioctl_record_start()
  knows where to bind each dp_clockintr.

- In dt_ioctl_record_stop(), replace clockintr_cancel() with
  clockintr_unbind(). The execution barrier ensures the reference
  to the enclosing PCB is dead before the caller returns.

  As we discussed, it's not ideal to block while holding dt_lock, but
  in practice blocking here is rare and dt_clock() does not run for very
  long.

- Update the locking annotation for dp_clockintr.  Both
  clockintr_bind() and clockintr_unbind() are now called
  within the global dt_lock ('D').

Regress passes.  DT_FA_PROFILE values still yield correct-looking
traces.

Index: sys/dev/dt/dt_prov_profile.c
===================================================================
RCS file: /cvs/src/sys/dev/dt/dt_prov_profile.c,v
diff -u -p -r1.5 dt_prov_profile.c
--- sys/dev/dt/dt_prov_profile.c	26 Apr 2023 16:53:59 -0000	1.5
+++ sys/dev/dt/dt_prov_profile.c	8 Feb 2024 23:03:44 -0000
@@ -20,6 +20,7 @@
 #include <sys/systm.h>
 #include <sys/param.h>
 #include <sys/atomic.h>
+#include <sys/clockintr.h>
 
 #include <dev/dt/dtvar.h>
 
@@ -31,13 +32,11 @@ struct dt_probe	*dtpp_interval;		/* glob
 
 int	dt_prov_profile_alloc(struct dt_probe *, struct dt_softc *,
 	    struct dt_pcb_list *, struct dtioc_req *);
-int	dt_prov_profile_enter(struct dt_provider *, ...);
-int	dt_prov_interval_enter(struct dt_provider *, ...);
 
 struct dt_provider dt_prov_profile = {
 	.dtpv_name	= "profile",
 	.dtpv_alloc	= dt_prov_profile_alloc,
-	.dtpv_enter	= dt_prov_profile_enter,
+	.dtpv_enter	= NULL,
 	.dtpv_leave	= NULL,
 	.dtpv_dealloc	= NULL,
 };
@@ -45,7 +44,7 @@ struct dt_provider dt_prov_profile = {
 struct dt_provider dt_prov_interval = {
 	.dtpv_name	= "interval",
 	.dtpv_alloc	= dt_prov_profile_alloc,
-	.dtpv_enter	= dt_prov_interval_enter,
+	.dtpv_enter	= NULL,
 	.dtpv_leave	= NULL,
 	.dtpv_dealloc	= NULL,
 };
@@ -90,8 +89,8 @@ dt_prov_profile_alloc(struct dt_probe *d
 			return ENOMEM;
 		}
 
-		dp->dp_maxtick = hz / dtrq->dtrq_rate;
-		dp->dp_cpuid = ci->ci_cpuid;
+		dp->dp_nsecs = SEC_TO_NSEC(1) / dtrq->dtrq_rate;
+		dp->dp_cpu = ci;
 
 		dp->dp_filter = dtrq->dtrq_filter;
 		dp->dp_evtflags = dtrq->dtrq_evtflags & DTEVT_PROV_PROFILE;
@@ -101,51 +100,18 @@ dt_prov_profile_alloc(struct dt_probe *d
 	return 0;
 }
 
-static inline void
-dt_prov_profile_fire(struct dt_pcb *dp)
+void
+dt_clock(struct clockrequest *cr, void *cf, void *arg)
 {
+	uint64_t count, i;
 	struct dt_evt *dtev;
+	struct dt_pcb *dp = arg;
 
-	if (++dp->dp_nticks < dp->dp_maxtick)
-		return;
-
-	dtev = dt_pcb_ring_get(dp, 1);
-	if (dtev == NULL)
-		return;
-	dt_pcb_ring_consume(dp, dtev);
-	dp->dp_nticks = 0;
-}
-
-int
-dt_prov_profile_enter(struct dt_provider *dtpv, ...)
-{
-	struct cpu_info *ci = curcpu();
-	struct dt_pcb *dp;
-
-	KASSERT(dtpv == &dt_prov_profile);
-
-	smr_read_enter();
-	SMR_SLIST_FOREACH(dp, &dtpp_profile->dtp_pcbs, dp_pnext) {
-		if (dp->dp_cpuid != ci->ci_cpuid)
-			continue;
-
-		dt_prov_profile_fire(dp);
+	count = clockrequest_advance(cr, dp->dp_nsecs);
+	for (i = 0; i < count; i++) {
+		dtev = dt_pcb_ring_get(dp, 1);
+		if (dtev == NULL)
+			return;
+		dt_pcb_ring_consume(dp, dtev);
 	}
-	smr_read_leave();
-	return 0;
-}
-
-int
-dt_prov_interval_enter(struct dt_provider *dtpv, ...)
-{
-	struct dt_pcb *dp;
-
-	KASSERT(dtpv == &dt_prov_interval);
-
-	smr_read_enter();
-	SMR_SLIST_FOREACH(dp, &dtpp_interval->dtp_pcbs, dp_pnext) {
-		dt_prov_profile_fire(dp);
-	}
-	smr_read_leave();
-	return 0;
 }
Index: sys/dev/dt/dt_dev.c
===================================================================
RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v
diff -u -p -r1.29 dt_dev.c
--- sys/dev/dt/dt_dev.c	2 Jan 2024 16:32:48 -0000	1.29
+++ sys/dev/dt/dt_dev.c	8 Feb 2024 23:03:44 -0000
@@ -19,6 +19,7 @@
 #include <sys/types.h>
 #include <sys/systm.h>
 #include <sys/param.h>
+#include <sys/clockintr.h>
 #include <sys/device.h>
 #include <sys/exec_elf.h>
 #include <sys/malloc.h>
@@ -56,13 +57,13 @@
  *	proc_trampoline+0x1c
  */
 #if defined(__amd64__)
-#define DT_FA_PROFILE	7
+#define DT_FA_PROFILE	5
 #define DT_FA_STATIC	2
 #elif defined(__i386__)
-#define DT_FA_PROFILE	8
+#define DT_FA_PROFILE	5
 #define DT_FA_STATIC	2
 #elif defined(__macppc__)
-#define DT_FA_PROFILE  7
+#define DT_FA_PROFILE  5
 #define DT_FA_STATIC   2
 #elif defined(__octeon__)
 #define DT_FA_PROFILE	6
@@ -492,6 +493,14 @@ dt_ioctl_record_start(struct dt_softc *s
 		SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp, dp_pnext);
 		dtp->dtp_recording++;
 		dtp->dtp_prov->dtpv_recording++;
+
+		if (dp->dp_nsecs != 0) {
+			clockintr_bind(&dp->dp_clockintr, dp->dp_cpu, dt_clock,
+			    dp);
+			clockintr_stagger(&dp->dp_clockintr, dp->dp_nsecs,
+			    CPU_INFO_UNIT(dp->dp_cpu), MAXCPUS);
+			clockintr_advance(&dp->dp_clockintr, dp->dp_nsecs);
+		}
 	}
 	rw_exit_write(&dt_lock);
 
@@ -517,6 +526,13 @@ dt_ioctl_record_stop(struct dt_softc *sc
 	rw_enter_write(&dt_lock);
 	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
 		struct dt_probe *dtp = dp->dp_dtp;
+
+		/*
+		 * Set an execution barrier to ensure dt_clock() has
+		 * released its shared reference to dp.
+		 */
+		if (dp->dp_nsecs != 0)
+			clockintr_unbind(&dp->dp_clockintr, CL_BARRIER);
 
 		dtp->dtp_recording--;
 		dtp->dtp_prov->dtpv_recording--;
Index: sys/dev/dt/dtvar.h
===================================================================
RCS file: /cvs/src/sys/dev/dt/dtvar.h,v
diff -u -p -r1.17 dtvar.h
--- sys/dev/dt/dtvar.h	26 Apr 2023 16:53:59 -0000	1.17
+++ sys/dev/dt/dtvar.h	8 Feb 2024 23:03:44 -0000
@@ -175,6 +175,7 @@ int		dtioc_req_isvalid(struct dtioc_req 
  * userland read(2)s them.
  *
  *  Locks used to protect struct members in this file:
+ *	D	dt_lock
  *	I	immutable after creation
  *	K	kernel lock
  *	K,S	kernel lock for writing and SMR for reading
@@ -197,9 +198,9 @@ struct dt_pcb {
 	struct dt_filter	 dp_filter;	/* [I] filter to match */
 
 	/* Provider specific fields. */
-	unsigned int		 dp_cpuid;	/* [I] on which CPU */
-	unsigned int		 dp_maxtick;	/* [I] freq. of profiling */
-	unsigned int		 dp_nticks;	/* [c] current tick count */
+	struct clockintr	 dp_clockintr;	/* [D] profiling handle */
+	uint64_t		 dp_nsecs;	/* [I] profiling period */
+	struct cpu_info		*dp_cpu;	/* [I] on which CPU */
 
 	/* Counters */
 	uint64_t		 dp_dropevt;	/* [m] # dropped event */
@@ -270,6 +271,7 @@ struct dt_probe *dt_dev_alloc_probe(cons
 		    struct dt_provider *);
 void		 dt_dev_register_probe(struct dt_probe *);
 
+void		 dt_clock(struct clockrequest *, void *, void *);
 
 extern volatile uint32_t	dt_tracing;	/* currently tracing? */
 
Index: sys/kern/kern_clock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clock.c,v
diff -u -p -r1.121 kern_clock.c
--- sys/kern/kern_clock.c	17 Oct 2023 00:04:02 -0000	1.121
+++ sys/kern/kern_clock.c	8 Feb 2024 23:03:44 -0000
@@ -50,11 +50,6 @@
 #include <sys/sched.h>
 #include <sys/timetc.h>
 
-#include "dt.h"
-#if NDT > 0
-#include <dev/dt/dtvar.h>
-#endif
-
 /*
  * Clock handling routines.
  *
@@ -145,12 +140,6 @@ initclocks(void)
 void
 hardclock(struct clockframe *frame)
 {
-#if NDT > 0
-	DT_ENTER(profile, NULL);
-	if (CPU_IS_PRIMARY(curcpu()))
-		DT_ENTER(interval, NULL);
-#endif
-
 	/*
 	 * If we are not the primary CPU, we're not allowed to do
 	 * any more work.