Index | Thread | Search

From:
Scott Cheloha <scottcheloha@gmail.com>
Subject:
Re: dt(4), hardclock: move profiling entry points to dedicated clockintrs
To:
tech@openbsd.org
Cc:
mpi@openbsd.org
Date:
Wed, 7 Feb 2024 20:22:34 -0600

Download raw body.

Thread
On Tue, Feb 06, 2024 at 03:24:20PM +0100, Martin Pieuchot wrote:
> On 05/02/24(Mon) 19:37, Scott Cheloha wrote:
> > On Fri, Jan 26, 2024 at 12:28:12PM -0600, Scott Cheloha wrote:
> > > This patch moves the dt(4) profiling entry points from hardclock()
> > > into dedicated clockintr callbacks.  It depends on the clockintr_unbind
> > > patch I sent out separately, so be sure to apply that first.
> > > 
> > > [...]
> > 
> > Updated patch:
> > 
> > - Don't advance/cancel dp_clockintr when dp_nsecs is zero.
> > - Update DT_FA_PROFILE for i386, macppc
> 
> I can't test those.

Well, for sake of documenting it, on my i386 with a local APIC the
stack at idle now looks like this:

dt_pcb_ring_get+0x111
dt_prov_profile_clock+0x3a
clockintr_dispatch+0x21d
lapic_clockintr+0xb
cpu_paenable+0x210
acpicpu_idle+0x193
cpu_idle_cycle+0xc
proc_trampoline+0x8
kernel

We truncate at cpu_paenable.

> Comments below.
> 
> > Index: sys/dev/dt/dt_prov_profile.c
> > ===================================================================
> > RCS file: /cvs/src/sys/dev/dt/dt_prov_profile.c,v
> > diff -u -p -r1.5 dt_prov_profile.c
> > --- sys/dev/dt/dt_prov_profile.c	26 Apr 2023 16:53:59 -0000	1.5
> > +++ sys/dev/dt/dt_prov_profile.c	6 Feb 2024 01:34:45 -0000
> > @@ -20,6 +20,7 @@
> >  #include <sys/systm.h>
> >  #include <sys/param.h>
> >  #include <sys/atomic.h>
> > +#include <sys/clockintr.h>
> >  
> >  #include <dev/dt/dtvar.h>
> >  
> > @@ -31,13 +32,13 @@ struct dt_probe	*dtpp_interval;		/* glob
> >  
> >  int	dt_prov_profile_alloc(struct dt_probe *, struct dt_softc *,
> >  	    struct dt_pcb_list *, struct dtioc_req *);
> > -int	dt_prov_profile_enter(struct dt_provider *, ...);
> > -int	dt_prov_interval_enter(struct dt_provider *, ...);
> > +void	dt_prov_profile_clock(struct clockrequest *, void *, void *);
> > +void	dt_prov_interval_clock(struct clockrequest *, void *, void *);
> >  
> >  struct dt_provider dt_prov_profile = {
> >  	.dtpv_name	= "profile",
> >  	.dtpv_alloc	= dt_prov_profile_alloc,
> > -	.dtpv_enter	= dt_prov_profile_enter,
> > +	.dtpv_enter	= NULL,
> >  	.dtpv_leave	= NULL,
> >  	.dtpv_dealloc	= NULL,
> >  };
> > @@ -45,7 +46,7 @@ struct dt_provider dt_prov_profile = {
> >  struct dt_provider dt_prov_interval = {
> >  	.dtpv_name	= "interval",
> >  	.dtpv_alloc	= dt_prov_profile_alloc,
> > -	.dtpv_enter	= dt_prov_interval_enter,
> > +	.dtpv_enter	= NULL,
> >  	.dtpv_leave	= NULL,
> >  	.dtpv_dealloc	= NULL,
> >  };
> > @@ -70,6 +71,7 @@ dt_prov_profile_alloc(struct dt_probe *d
> >  {
> >  	struct dt_pcb *dp;
> >  	struct cpu_info *ci;
> > +	void (*callback)(struct clockrequest *, void *, void *);
> >  	CPU_INFO_ITERATOR cii;
> >  	extern int hz;
> >  
> > @@ -80,6 +82,11 @@ dt_prov_profile_alloc(struct dt_probe *d
> >  	if (dtrq->dtrq_rate <= 0 || dtrq->dtrq_rate > hz)
> >  		return EOPNOTSUPP;
> >  
> > +	if (dtp == dtpp_interval)
> > +		callback = dt_prov_interval_clock;
> > +	else
> > +		callback = dt_prov_profile_clock;
> 
> I see no difference between the two callbacks, can we get rid of
> dt_prov_interval_clock()?

We can combine them, but in multiple previous emails you have
requested that they remain separate.

So, which do you want?  Either works for me.

If they're separate, you can distinguish between them in ddb(4), e.g.:

ddb{3}> show all clockintr
               UPTIME
     10775.549901920

          EXPIRATION  STATE  CPU                 ARG  NAME
     10775.170000000   pend    0  0x0000000000000000  clockintr_hardclock
     10775.171066271   pend    0  0x0000000000000000  statclock
     10775.200000000   pend    0  0x0000000000000000  roundrobin
->   10776.000000000   pend    0  0xffff800001c41100  dt_prov_interval_clock
->   10776.000000000   pend    0  0xffff800001b90a00  dt_prov_profile_clock
         0.000000000   idle    0  0x0000000000000000  itimer_update
         0.000000000   idle    0  0x0000000000000000  profclock
     10775.171093750   pend    7  0x0000000000000000  clockintr_hardclock
     10775.173450226   pend    7  0x0000000000000000  statclock
     10775.201093750   pend    7  0x0000000000000000  roundrobin
->   10776.109375000   pend    7  0xffff800001cabd00  dt_prov_profile_clock
         0.001093750   idle    7  0x0000000000000000  itimer_update
         0.000109375   idle    7  0x0000000000000000  profclock
               [...]

... which might be useful?  Otherwise they're currently identical.

> >  	CPU_INFO_FOREACH(cii, ci) {
> >  		if (!CPU_IS_PRIMARY(ci) && (dtp == dtpp_interval))
> >  			continue;
> > @@ -90,8 +97,11 @@ dt_prov_profile_alloc(struct dt_probe *d
> >  			return ENOMEM;
> >  		}
> >  
> > -		dp->dp_maxtick = hz / dtrq->dtrq_rate;
> > +		dp->dp_nsecs = SEC_TO_NSEC(1) / dtrq->dtrq_rate;
> >  		dp->dp_cpuid = ci->ci_cpuid;
> 
> `dp_cpuid' is no longer used now that clockintr_stagger() bind a request
> to a given CPU.  You can get rid of it.

Removed.

> > +		clockintr_bind(&dp->dp_clockintr, ci, callback, dp);
> > +		clockintr_stagger(&dp->dp_clockintr, dp->dp_nsecs,
> > +		    CPU_INFO_UNIT(ci), MAXCPUS);
> >  
> >  		dp->dp_filter = dtrq->dtrq_filter;
> >  		dp->dp_evtflags = dtrq->dtrq_evtflags & DTEVT_PROV_PROFILE;
> > @@ -102,50 +112,35 @@ dt_prov_profile_alloc(struct dt_probe *d
> >  }
> >  
> >  static inline void
> > -dt_prov_profile_fire(struct dt_pcb *dp)
> > +dt_prov_profile_fire(struct dt_pcb *dp, uint64_t count)
> >  {
> > +	uint64_t i;
> >  	struct dt_evt *dtev;
> >  
> > -	if (++dp->dp_nticks < dp->dp_maxtick)
> > -		return;
> > -
> > -	dtev = dt_pcb_ring_get(dp, 1);
> > -	if (dtev == NULL)
> > -		return;
> > -	dt_pcb_ring_consume(dp, dtev);
> > -	dp->dp_nticks = 0;
> > +	for (i = 0; i < count; i++) {
> > +		dtev = dt_pcb_ring_get(dp, 1);
> > +		if (dtev == NULL)
> > +			return;
> > +		dt_pcb_ring_consume(dp, dtev);
> > +	}
> >  }
> >  
> > -int
> > -dt_prov_profile_enter(struct dt_provider *dtpv, ...)
> > +void
> > +dt_prov_profile_clock(struct clockrequest *cr, void *cf, void *arg)
> >  {
> > -	struct cpu_info *ci = curcpu();
> > -	struct dt_pcb *dp;
> > -
> > -	KASSERT(dtpv == &dt_prov_profile);
> > -
> > -	smr_read_enter();
> > -	SMR_SLIST_FOREACH(dp, &dtpp_profile->dtp_pcbs, dp_pnext) {
> > -		if (dp->dp_cpuid != ci->ci_cpuid)
> > -			continue;
> > +	uint64_t count;
> > +	struct dt_pcb *dp = arg;
> >  
> > -		dt_prov_profile_fire(dp);
> > -	}
> > -	smr_read_leave();
> > -	return 0;
> > +	count = clockrequest_advance(cr, dp->dp_nsecs);
> > +	dt_prov_profile_fire(dp, count);
> >  }
> >  
> > -int
> > -dt_prov_interval_enter(struct dt_provider *dtpv, ...)
> > +void
> > +dt_prov_interval_clock(struct clockrequest *cr, void *cf, void *arg)
> >  {
> > -	struct dt_pcb *dp;
> > +	uint64_t count;
> > +	struct dt_pcb *dp = arg;
> >  
> > -	KASSERT(dtpv == &dt_prov_interval);
> > -
> > -	smr_read_enter();
> > -	SMR_SLIST_FOREACH(dp, &dtpp_interval->dtp_pcbs, dp_pnext) {
> > -		dt_prov_profile_fire(dp);
> > -	}
> > -	smr_read_leave();
> > -	return 0;
> > +	count = clockrequest_advance(cr, dp->dp_nsecs);
> > +	dt_prov_profile_fire(dp, count);
> 
> >  }
> > Index: sys/dev/dt/dt_dev.c
> > ===================================================================
> > RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v
> > diff -u -p -r1.29 dt_dev.c
> > --- sys/dev/dt/dt_dev.c	2 Jan 2024 16:32:48 -0000	1.29
> > +++ sys/dev/dt/dt_dev.c	6 Feb 2024 01:34:45 -0000
> > @@ -19,6 +19,7 @@
> >  #include <sys/types.h>
> >  #include <sys/systm.h>
> >  #include <sys/param.h>
> > +#include <sys/clockintr.h>
> >  #include <sys/device.h>
> >  #include <sys/exec_elf.h>
> >  #include <sys/malloc.h>
> > @@ -56,13 +57,13 @@
> >   *	proc_trampoline+0x1c
> >   */
> >  #if defined(__amd64__)
> > -#define DT_FA_PROFILE	7
> > +#define DT_FA_PROFILE	5
> >  #define DT_FA_STATIC	2
> >  #elif defined(__i386__)
> > -#define DT_FA_PROFILE	8
> > +#define DT_FA_PROFILE	5
> >  #define DT_FA_STATIC	2
> >  #elif defined(__macppc__)
> > -#define DT_FA_PROFILE  7
> > +#define DT_FA_PROFILE  5
> >  #define DT_FA_STATIC   2
> >  #elif defined(__octeon__)
> >  #define DT_FA_PROFILE	6
> > @@ -492,6 +493,8 @@ dt_ioctl_record_start(struct dt_softc *s
> >  		SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp, dp_pnext);
> >  		dtp->dtp_recording++;
> >  		dtp->dtp_prov->dtpv_recording++;
> > +		if (dp->dp_nsecs != 0)
> > +			clockintr_advance(&dp->dp_clockintr, dp->dp_nsecs);
> >  	}
> >  	rw_exit_write(&dt_lock);
> >  
> > @@ -518,6 +521,8 @@ dt_ioctl_record_stop(struct dt_softc *sc
> >  	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
> >  		struct dt_probe *dtp = dp->dp_dtp;
> >  
> > +		if (dp->dp_nsecs != 0)
> > +			clockintr_cancel(&dp->dp_clockintr);
> 
> Here this is complex.  Since we are no longer using SMR in the callback
> to get a reference to `dp' we should not do the following before we are
> sure the callback won't run.  That would imply clockintr_unbind(), no?
> Some caution should be taken with clockintr_unbind() because we are
> holding a lock.
> 
> >  		dtp->dtp_recording--;
> >  		dtp->dtp_prov->dtpv_recording--;
> >  		SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb, dp_pnext);

We should not unbind here.  All we need to do is cancel dp_clockintr.

With the patch, dt_prov_profile_clock() doesn't walk the SMR list
(dtp_pcbs) anymore and it doesn't check dtpv_recording or
dtp_recording anymore, so we don't need to do anything but
advance/cancel dp_clockintr when starting/stopping recording when
dp_nsecs is zero.  Check the updated patch.  Is that acceptable?

        if (dp->dp_nsecs == 0) {
                struct dt_probe *dtp = dp->dp_dtp;

                SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp,
                    dp_pnext);
                dtp->dtp_recording++;
                dtp->dtp_prov->dtpv_recording++;
        } else {
                clockintr_advance(&dp->dp_clockintr, dp->dp_nsecs);
        }

> > @@ -679,6 +684,15 @@ dt_pcb_free(struct dt_pcb *dp)
> >  {
> >  	if (dp == NULL)
> >  		return;
> > +
> > +	/*
> > +	 * dp_clockintr is bound if dp_nsecs is non-zero.  Its callback
> > +	 * has a reference to dp, so set an execution barrier and block
> > +	 * until it is no longer running before freeing dp.
> > +	 */
> > +	if (dp->dp_nsecs != 0)
> > +		clockintr_unbind(&dp->dp_clockintr, CL_BARRIER);
> > +
> 
> This is too late and should not be necessary.  dtclose() calls 
> ctl_record_stop() which should take care of the barrier before
> decrementing counters.

I don't know what you mean by "too late".  Too late for what?

If the callback is going to have a reference to the enclosing PCB
then we need an execution barrier.

Assuming we need the barrier, we can put it in one of three places:
(1) dt_ioctl_record_stop(), (2) dt_pcb_free(), or (3) someplace
between the two functions.

As for (1), dt_ioctl_record_stop() is not ideal because we're holding
dt_lock.  It's _possible_.  I would need to change clockintr_cancel()
to support CL_BARRIER.  But it's not ideal because of the lock.

As for (2), I think you're saying we're not allowed to do the barrier
in dt_pcb_free().  Right?

That leaves (3).  So, can we put the barrier in dt_pcb_purge()?  See
the updated patch.

> >  	free(dp->dp_ring, M_DT, DT_EVTRING_SIZE * sizeof(*dp->dp_ring));
> >  	free(dp, M_DT, sizeof(*dp));
> >  }
> > Index: sys/dev/dt/dtvar.h
> > ===================================================================
> > RCS file: /cvs/src/sys/dev/dt/dtvar.h,v
> > diff -u -p -r1.17 dtvar.h
> > --- sys/dev/dt/dtvar.h	26 Apr 2023 16:53:59 -0000	1.17
> > +++ sys/dev/dt/dtvar.h	6 Feb 2024 01:34:46 -0000
> > @@ -197,9 +197,9 @@ struct dt_pcb {
> >  	struct dt_filter	 dp_filter;	/* [I] filter to match */
> >  
> >  	/* Provider specific fields. */
> > +	struct clockintr	 dp_clockintr;	/* [?] profiling handle */
> 
> If you don't know it's the kernel lock K.

Updated.

Index: sys/dev/dt/dt_prov_profile.c
===================================================================
RCS file: /cvs/src/sys/dev/dt/dt_prov_profile.c,v
diff -u -p -r1.5 dt_prov_profile.c
--- sys/dev/dt/dt_prov_profile.c	26 Apr 2023 16:53:59 -0000	1.5
+++ sys/dev/dt/dt_prov_profile.c	8 Feb 2024 01:47:11 -0000
@@ -20,6 +20,7 @@
 #include <sys/systm.h>
 #include <sys/param.h>
 #include <sys/atomic.h>
+#include <sys/clockintr.h>
 
 #include <dev/dt/dtvar.h>
 
@@ -31,13 +32,13 @@ struct dt_probe	*dtpp_interval;		/* glob
 
 int	dt_prov_profile_alloc(struct dt_probe *, struct dt_softc *,
 	    struct dt_pcb_list *, struct dtioc_req *);
-int	dt_prov_profile_enter(struct dt_provider *, ...);
-int	dt_prov_interval_enter(struct dt_provider *, ...);
+void	dt_prov_profile_clock(struct clockrequest *, void *, void *);
+void	dt_prov_interval_clock(struct clockrequest *, void *, void *);
 
 struct dt_provider dt_prov_profile = {
 	.dtpv_name	= "profile",
 	.dtpv_alloc	= dt_prov_profile_alloc,
-	.dtpv_enter	= dt_prov_profile_enter,
+	.dtpv_enter	= NULL,
 	.dtpv_leave	= NULL,
 	.dtpv_dealloc	= NULL,
 };
@@ -45,7 +46,7 @@ struct dt_provider dt_prov_profile = {
 struct dt_provider dt_prov_interval = {
 	.dtpv_name	= "interval",
 	.dtpv_alloc	= dt_prov_profile_alloc,
-	.dtpv_enter	= dt_prov_interval_enter,
+	.dtpv_enter	= NULL,
 	.dtpv_leave	= NULL,
 	.dtpv_dealloc	= NULL,
 };
@@ -70,6 +71,7 @@ dt_prov_profile_alloc(struct dt_probe *d
 {
 	struct dt_pcb *dp;
 	struct cpu_info *ci;
+	void (*callback)(struct clockrequest *, void *, void *);
 	CPU_INFO_ITERATOR cii;
 	extern int hz;
 
@@ -80,6 +82,11 @@ dt_prov_profile_alloc(struct dt_probe *d
 	if (dtrq->dtrq_rate <= 0 || dtrq->dtrq_rate > hz)
 		return EOPNOTSUPP;
 
+	if (dtp == dtpp_interval)
+		callback = dt_prov_interval_clock;
+	else
+		callback = dt_prov_profile_clock;
+
 	CPU_INFO_FOREACH(cii, ci) {
 		if (!CPU_IS_PRIMARY(ci) && (dtp == dtpp_interval))
 			continue;
@@ -90,8 +97,10 @@ dt_prov_profile_alloc(struct dt_probe *d
 			return ENOMEM;
 		}
 
-		dp->dp_maxtick = hz / dtrq->dtrq_rate;
-		dp->dp_cpuid = ci->ci_cpuid;
+		dp->dp_nsecs = SEC_TO_NSEC(1) / dtrq->dtrq_rate;
+		clockintr_bind(&dp->dp_clockintr, ci, callback, dp);
+		clockintr_stagger(&dp->dp_clockintr, dp->dp_nsecs,
+		    CPU_INFO_UNIT(ci), MAXCPUS);
 
 		dp->dp_filter = dtrq->dtrq_filter;
 		dp->dp_evtflags = dtrq->dtrq_evtflags & DTEVT_PROV_PROFILE;
@@ -102,50 +111,35 @@ dt_prov_profile_alloc(struct dt_probe *d
 }
 
 static inline void
-dt_prov_profile_fire(struct dt_pcb *dp)
+dt_prov_profile_fire(struct dt_pcb *dp, uint64_t count)
 {
+	uint64_t i;
 	struct dt_evt *dtev;
 
-	if (++dp->dp_nticks < dp->dp_maxtick)
-		return;
-
-	dtev = dt_pcb_ring_get(dp, 1);
-	if (dtev == NULL)
-		return;
-	dt_pcb_ring_consume(dp, dtev);
-	dp->dp_nticks = 0;
+	for (i = 0; i < count; i++) {
+		dtev = dt_pcb_ring_get(dp, 1);
+		if (dtev == NULL)
+			return;
+		dt_pcb_ring_consume(dp, dtev);
+	}
 }
 
-int
-dt_prov_profile_enter(struct dt_provider *dtpv, ...)
+void
+dt_prov_profile_clock(struct clockrequest *cr, void *cf, void *arg)
 {
-	struct cpu_info *ci = curcpu();
-	struct dt_pcb *dp;
-
-	KASSERT(dtpv == &dt_prov_profile);
-
-	smr_read_enter();
-	SMR_SLIST_FOREACH(dp, &dtpp_profile->dtp_pcbs, dp_pnext) {
-		if (dp->dp_cpuid != ci->ci_cpuid)
-			continue;
+	uint64_t count;
+	struct dt_pcb *dp = arg;
 
-		dt_prov_profile_fire(dp);
-	}
-	smr_read_leave();
-	return 0;
+	count = clockrequest_advance(cr, dp->dp_nsecs);
+	dt_prov_profile_fire(dp, count);
 }
 
-int
-dt_prov_interval_enter(struct dt_provider *dtpv, ...)
+void
+dt_prov_interval_clock(struct clockrequest *cr, void *cf, void *arg)
 {
-	struct dt_pcb *dp;
+	uint64_t count;
+	struct dt_pcb *dp = arg;
 
-	KASSERT(dtpv == &dt_prov_interval);
-
-	smr_read_enter();
-	SMR_SLIST_FOREACH(dp, &dtpp_interval->dtp_pcbs, dp_pnext) {
-		dt_prov_profile_fire(dp);
-	}
-	smr_read_leave();
-	return 0;
+	count = clockrequest_advance(cr, dp->dp_nsecs);
+	dt_prov_profile_fire(dp, count);
 }
Index: sys/dev/dt/dt_dev.c
===================================================================
RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v
diff -u -p -r1.29 dt_dev.c
--- sys/dev/dt/dt_dev.c	2 Jan 2024 16:32:48 -0000	1.29
+++ sys/dev/dt/dt_dev.c	8 Feb 2024 01:47:12 -0000
@@ -19,6 +19,7 @@
 #include <sys/types.h>
 #include <sys/systm.h>
 #include <sys/param.h>
+#include <sys/clockintr.h>
 #include <sys/device.h>
 #include <sys/exec_elf.h>
 #include <sys/malloc.h>
@@ -56,13 +57,13 @@
  *	proc_trampoline+0x1c
  */
 #if defined(__amd64__)
-#define DT_FA_PROFILE	7
+#define DT_FA_PROFILE	5
 #define DT_FA_STATIC	2
 #elif defined(__i386__)
-#define DT_FA_PROFILE	8
+#define DT_FA_PROFILE	5
 #define DT_FA_STATIC	2
 #elif defined(__macppc__)
-#define DT_FA_PROFILE  7
+#define DT_FA_PROFILE  5
 #define DT_FA_STATIC   2
 #elif defined(__octeon__)
 #define DT_FA_PROFILE	6
@@ -487,11 +488,16 @@ dt_ioctl_record_start(struct dt_softc *s
 
 	rw_enter_write(&dt_lock);
 	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
-		struct dt_probe *dtp = dp->dp_dtp;
+		if (dp->dp_nsecs == 0) {
+			struct dt_probe *dtp = dp->dp_dtp;
 
-		SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp, dp_pnext);
-		dtp->dtp_recording++;
-		dtp->dtp_prov->dtpv_recording++;
+			SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp,
+			    dp_pnext);
+			dtp->dtp_recording++;
+			dtp->dtp_prov->dtpv_recording++;
+		} else {
+			clockintr_advance(&dp->dp_clockintr, dp->dp_nsecs);
+		}
 	}
 	rw_exit_write(&dt_lock);
 
@@ -516,11 +522,16 @@ dt_ioctl_record_stop(struct dt_softc *sc
 
 	rw_enter_write(&dt_lock);
 	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
-		struct dt_probe *dtp = dp->dp_dtp;
+		if (dp->dp_nsecs == 0) {
+			struct dt_probe *dtp = dp->dp_dtp;
 
-		dtp->dtp_recording--;
-		dtp->dtp_prov->dtpv_recording--;
-		SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb, dp_pnext);
+			dtp->dtp_recording--;
+			dtp->dtp_prov->dtpv_recording--;
+			SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb,
+			    dp_pnext);
+		} else {
+			clockintr_cancel(&dp->dp_clockintr);
+		}
 	}
 	rw_exit_write(&dt_lock);
 
@@ -690,6 +701,16 @@ dt_pcb_purge(struct dt_pcb_list *plist)
 
 	while ((dp = TAILQ_FIRST(plist)) != NULL) {
 		TAILQ_REMOVE(plist, dp, dp_snext);
+
+		/*
+		 * dp_clockintr is bound if dp_nsecs is non-zero.  Its
+		 * callback has a reference to dp, so set an execution
+		 * barrier and block until it finishes running before
+		 * freeing dp.
+		 */
+		if (dp->dp_nsecs != 0)
+			clockintr_unbind(&dp->dp_clockintr, CL_BARRIER);
+
 		dt_pcb_free(dp);
 	}
 }
Index: sys/dev/dt/dtvar.h
===================================================================
RCS file: /cvs/src/sys/dev/dt/dtvar.h,v
diff -u -p -r1.17 dtvar.h
--- sys/dev/dt/dtvar.h	26 Apr 2023 16:53:59 -0000	1.17
+++ sys/dev/dt/dtvar.h	8 Feb 2024 01:47:12 -0000
@@ -197,9 +197,8 @@ struct dt_pcb {
 	struct dt_filter	 dp_filter;	/* [I] filter to match */
 
 	/* Provider specific fields. */
-	unsigned int		 dp_cpuid;	/* [I] on which CPU */
-	unsigned int		 dp_maxtick;	/* [I] freq. of profiling */
-	unsigned int		 dp_nticks;	/* [c] current tick count */
+	struct clockintr	 dp_clockintr;	/* [K] profiling handle */
+	uint64_t		 dp_nsecs;	/* [I] profiling period */
 
 	/* Counters */
 	uint64_t		 dp_dropevt;	/* [m] # dropped event */
Index: sys/kern/kern_clock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clock.c,v
diff -u -p -r1.121 kern_clock.c
--- sys/kern/kern_clock.c	17 Oct 2023 00:04:02 -0000	1.121
+++ sys/kern/kern_clock.c	8 Feb 2024 01:47:12 -0000
@@ -50,11 +50,6 @@
 #include <sys/sched.h>
 #include <sys/timetc.h>
 
-#include "dt.h"
-#if NDT > 0
-#include <dev/dt/dtvar.h>
-#endif
-
 /*
  * Clock handling routines.
  *
@@ -145,12 +140,6 @@ initclocks(void)
 void
 hardclock(struct clockframe *frame)
 {
-#if NDT > 0
-	DT_ENTER(profile, NULL);
-	if (CPU_IS_PRIMARY(curcpu()))
-		DT_ENTER(interval, NULL);
-#endif
-
 	/*
 	 * If we are not the primary CPU, we're not allowed to do
 	 * any more work.