Download raw body.
dt(4), hardclock: move profiling entry points to dedicated clockintrs
dt(4), hardclock: move profiling entry points to dedicated clockintrs
On Fri, Jan 26, 2024 at 12:28:12PM -0600, Scott Cheloha wrote:
> This patch moves the dt(4) profiling entry points from hardclock()
> into dedicated clockintr callbacks. It depends on the clockintr_unbind
> patch I sent out separately, so be sure to apply that first.
>
> - In struct dt_pcb, replace dp_maxtick and dp_nticks with dp_nsecs.
> This is the PCB's profiling period.
>
> - Each struct dt_pcb gets its own struct clockintr, dp_clockintr.
> dp_clockintr is bound during dt_prov_profile_alloc(). We stagger
> dp_clockintr to simulate the current behavior under hardclock().
>
> Note that dp_clockintr is given a reference to its enclosing
> PCB when it is bound. More on this in a second.
>
> - Replace dt_prov_profile_enter/dt_prov_interval_enter with
> dt_prov_profile_clock/dt_prov_profile_interval. These are
> your clockintr callback functions. Because the prototypes
> now don't match the other *_enter() functions I figure
> changing the names is reasonable.
>
> Because dp_clockintr has a copy of the PCB pointer we no longer
> need to search for the PCB with SMR_SLIST_FOREACH. This reduces
> profiling overhead, especially on larger systems.
>
> - dt_prov_profile_fire() now takes a count of profiling ticks and
> runs a loop to grab as many EVTs as it needs. If the callback
> is delayed, it will assign multiple "hits" to the current PC,
> just like we do now under hardclock().
>
> - dt_pcb_free() unbinds the clockintr. Because dp_clockintr has
> a reference to the enclosing PCB we set an execution barrier
> here to ensure it is safe to free the PCB. In practice this
> will rarely block, but you need to set the barrier to be sure.
>
> DT_FA_PROFILE has been updated for amd64. The unpruned stack now
> looks like this:
>
> dt_pcb_ring_get
> dt_prov_profile_clock
> clockintr_dispatch
> lapic_clockintr
> Xresume_lapic_ltimer
> acpicpu_idle
> sched_idle
> proc_trampoline
> kernel
>
> I can test other platforms and update the DT_FA_PROFILE values
> for everything but powerpc64 before committing. The new value
> for powerpc64 is probably 4, but somebody else needs to confirm.
Updated patch:
- Don't advance/cancel dp_clockintr when dp_nsecs is zero.
- Update DT_FA_PROFILE for i386, macppc
Index: sys/dev/dt/dt_prov_profile.c
===================================================================
RCS file: /cvs/src/sys/dev/dt/dt_prov_profile.c,v
diff -u -p -r1.5 dt_prov_profile.c
--- sys/dev/dt/dt_prov_profile.c 26 Apr 2023 16:53:59 -0000 1.5
+++ sys/dev/dt/dt_prov_profile.c 6 Feb 2024 01:34:45 -0000
@@ -20,6 +20,7 @@
#include <sys/systm.h>
#include <sys/param.h>
#include <sys/atomic.h>
+#include <sys/clockintr.h>
#include <dev/dt/dtvar.h>
@@ -31,13 +32,13 @@ struct dt_probe *dtpp_interval; /* glob
int dt_prov_profile_alloc(struct dt_probe *, struct dt_softc *,
struct dt_pcb_list *, struct dtioc_req *);
-int dt_prov_profile_enter(struct dt_provider *, ...);
-int dt_prov_interval_enter(struct dt_provider *, ...);
+void dt_prov_profile_clock(struct clockrequest *, void *, void *);
+void dt_prov_interval_clock(struct clockrequest *, void *, void *);
struct dt_provider dt_prov_profile = {
.dtpv_name = "profile",
.dtpv_alloc = dt_prov_profile_alloc,
- .dtpv_enter = dt_prov_profile_enter,
+ .dtpv_enter = NULL,
.dtpv_leave = NULL,
.dtpv_dealloc = NULL,
};
@@ -45,7 +46,7 @@ struct dt_provider dt_prov_profile = {
struct dt_provider dt_prov_interval = {
.dtpv_name = "interval",
.dtpv_alloc = dt_prov_profile_alloc,
- .dtpv_enter = dt_prov_interval_enter,
+ .dtpv_enter = NULL,
.dtpv_leave = NULL,
.dtpv_dealloc = NULL,
};
@@ -70,6 +71,7 @@ dt_prov_profile_alloc(struct dt_probe *d
{
struct dt_pcb *dp;
struct cpu_info *ci;
+ void (*callback)(struct clockrequest *, void *, void *);
CPU_INFO_ITERATOR cii;
extern int hz;
@@ -80,6 +82,11 @@ dt_prov_profile_alloc(struct dt_probe *d
if (dtrq->dtrq_rate <= 0 || dtrq->dtrq_rate > hz)
return EOPNOTSUPP;
+ if (dtp == dtpp_interval)
+ callback = dt_prov_interval_clock;
+ else
+ callback = dt_prov_profile_clock;
+
CPU_INFO_FOREACH(cii, ci) {
if (!CPU_IS_PRIMARY(ci) && (dtp == dtpp_interval))
continue;
@@ -90,8 +97,11 @@ dt_prov_profile_alloc(struct dt_probe *d
return ENOMEM;
}
- dp->dp_maxtick = hz / dtrq->dtrq_rate;
+ dp->dp_nsecs = SEC_TO_NSEC(1) / dtrq->dtrq_rate;
dp->dp_cpuid = ci->ci_cpuid;
+ clockintr_bind(&dp->dp_clockintr, ci, callback, dp);
+ clockintr_stagger(&dp->dp_clockintr, dp->dp_nsecs,
+ CPU_INFO_UNIT(ci), MAXCPUS);
dp->dp_filter = dtrq->dtrq_filter;
dp->dp_evtflags = dtrq->dtrq_evtflags & DTEVT_PROV_PROFILE;
@@ -102,50 +112,35 @@ dt_prov_profile_alloc(struct dt_probe *d
}
static inline void
-dt_prov_profile_fire(struct dt_pcb *dp)
+dt_prov_profile_fire(struct dt_pcb *dp, uint64_t count)
{
+ uint64_t i;
struct dt_evt *dtev;
- if (++dp->dp_nticks < dp->dp_maxtick)
- return;
-
- dtev = dt_pcb_ring_get(dp, 1);
- if (dtev == NULL)
- return;
- dt_pcb_ring_consume(dp, dtev);
- dp->dp_nticks = 0;
+ for (i = 0; i < count; i++) {
+ dtev = dt_pcb_ring_get(dp, 1);
+ if (dtev == NULL)
+ return;
+ dt_pcb_ring_consume(dp, dtev);
+ }
}
-int
-dt_prov_profile_enter(struct dt_provider *dtpv, ...)
+void
+dt_prov_profile_clock(struct clockrequest *cr, void *cf, void *arg)
{
- struct cpu_info *ci = curcpu();
- struct dt_pcb *dp;
-
- KASSERT(dtpv == &dt_prov_profile);
-
- smr_read_enter();
- SMR_SLIST_FOREACH(dp, &dtpp_profile->dtp_pcbs, dp_pnext) {
- if (dp->dp_cpuid != ci->ci_cpuid)
- continue;
+ uint64_t count;
+ struct dt_pcb *dp = arg;
- dt_prov_profile_fire(dp);
- }
- smr_read_leave();
- return 0;
+ count = clockrequest_advance(cr, dp->dp_nsecs);
+ dt_prov_profile_fire(dp, count);
}
-int
-dt_prov_interval_enter(struct dt_provider *dtpv, ...)
+void
+dt_prov_interval_clock(struct clockrequest *cr, void *cf, void *arg)
{
- struct dt_pcb *dp;
+ uint64_t count;
+ struct dt_pcb *dp = arg;
- KASSERT(dtpv == &dt_prov_interval);
-
- smr_read_enter();
- SMR_SLIST_FOREACH(dp, &dtpp_interval->dtp_pcbs, dp_pnext) {
- dt_prov_profile_fire(dp);
- }
- smr_read_leave();
- return 0;
+ count = clockrequest_advance(cr, dp->dp_nsecs);
+ dt_prov_profile_fire(dp, count);
}
Index: sys/dev/dt/dt_dev.c
===================================================================
RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v
diff -u -p -r1.29 dt_dev.c
--- sys/dev/dt/dt_dev.c 2 Jan 2024 16:32:48 -0000 1.29
+++ sys/dev/dt/dt_dev.c 6 Feb 2024 01:34:45 -0000
@@ -19,6 +19,7 @@
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/param.h>
+#include <sys/clockintr.h>
#include <sys/device.h>
#include <sys/exec_elf.h>
#include <sys/malloc.h>
@@ -56,13 +57,13 @@
* proc_trampoline+0x1c
*/
#if defined(__amd64__)
-#define DT_FA_PROFILE 7
+#define DT_FA_PROFILE 5
#define DT_FA_STATIC 2
#elif defined(__i386__)
-#define DT_FA_PROFILE 8
+#define DT_FA_PROFILE 5
#define DT_FA_STATIC 2
#elif defined(__macppc__)
-#define DT_FA_PROFILE 7
+#define DT_FA_PROFILE 5
#define DT_FA_STATIC 2
#elif defined(__octeon__)
#define DT_FA_PROFILE 6
@@ -492,6 +493,8 @@ dt_ioctl_record_start(struct dt_softc *s
SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp, dp_pnext);
dtp->dtp_recording++;
dtp->dtp_prov->dtpv_recording++;
+ if (dp->dp_nsecs != 0)
+ clockintr_advance(&dp->dp_clockintr, dp->dp_nsecs);
}
rw_exit_write(&dt_lock);
@@ -518,6 +521,8 @@ dt_ioctl_record_stop(struct dt_softc *sc
TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
struct dt_probe *dtp = dp->dp_dtp;
+ if (dp->dp_nsecs != 0)
+ clockintr_cancel(&dp->dp_clockintr);
dtp->dtp_recording--;
dtp->dtp_prov->dtpv_recording--;
SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb, dp_pnext);
@@ -679,6 +684,15 @@ dt_pcb_free(struct dt_pcb *dp)
{
if (dp == NULL)
return;
+
+ /*
+ * dp_clockintr is bound if dp_nsecs is non-zero. Its callback
+ * has a reference to dp, so set an execution barrier and block
+ * until it is no longer running before freeing dp.
+ */
+ if (dp->dp_nsecs != 0)
+ clockintr_unbind(&dp->dp_clockintr, CL_BARRIER);
+
free(dp->dp_ring, M_DT, DT_EVTRING_SIZE * sizeof(*dp->dp_ring));
free(dp, M_DT, sizeof(*dp));
}
Index: sys/dev/dt/dtvar.h
===================================================================
RCS file: /cvs/src/sys/dev/dt/dtvar.h,v
diff -u -p -r1.17 dtvar.h
--- sys/dev/dt/dtvar.h 26 Apr 2023 16:53:59 -0000 1.17
+++ sys/dev/dt/dtvar.h 6 Feb 2024 01:34:46 -0000
@@ -197,9 +197,9 @@ struct dt_pcb {
struct dt_filter dp_filter; /* [I] filter to match */
/* Provider specific fields. */
+ struct clockintr dp_clockintr; /* [?] profiling handle */
+ uint64_t dp_nsecs; /* [I] profiling period */
unsigned int dp_cpuid; /* [I] on which CPU */
- unsigned int dp_maxtick; /* [I] freq. of profiling */
- unsigned int dp_nticks; /* [c] current tick count */
/* Counters */
uint64_t dp_dropevt; /* [m] # dropped event */
Index: sys/kern/kern_clock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clock.c,v
diff -u -p -r1.121 kern_clock.c
--- sys/kern/kern_clock.c 17 Oct 2023 00:04:02 -0000 1.121
+++ sys/kern/kern_clock.c 6 Feb 2024 01:34:46 -0000
@@ -50,11 +50,6 @@
#include <sys/sched.h>
#include <sys/timetc.h>
-#include "dt.h"
-#if NDT > 0
-#include <dev/dt/dtvar.h>
-#endif
-
/*
* Clock handling routines.
*
@@ -145,12 +140,6 @@ initclocks(void)
void
hardclock(struct clockframe *frame)
{
-#if NDT > 0
- DT_ENTER(profile, NULL);
- if (CPU_IS_PRIMARY(curcpu()))
- DT_ENTER(interval, NULL);
-#endif
-
/*
* If we are not the primary CPU, we're not allowed to do
* any more work.
dt(4), hardclock: move profiling entry points to dedicated clockintrs
dt(4), hardclock: move profiling entry points to dedicated clockintrs