Download raw body.
dt(4), hardclock: move profiling entry points to dedicated clockintrs
dt(4), hardclock: move profiling entry points to dedicated clockintrs
dt(4), hardclock: move profiling entry points to dedicated clockintrs
On Thu, Feb 08, 2024 at 05:19:40PM +0100, Martin Pieuchot wrote:
> On 07/02/24(Wed) 20:22, Scott Cheloha wrote:
> > On Tue, Feb 06, 2024 at 03:24:20PM +0100, Martin Pieuchot wrote:
> > > On 05/02/24(Mon) 19:37, Scott Cheloha wrote:
> > > > On Fri, Jan 26, 2024 at 12:28:12PM -0600, Scott Cheloha wrote:
> > > > > This patch moves the dt(4) profiling entry points from hardclock()
> > > > > into dedicated clockintr callbacks. It depends on the clockintr_unbind
> > > > > patch I sent out separately, so be sure to apply that first.
> > > > >
> > > > > [...]
> > > >
> > > > Updated patch:
> > > >
> > > > - Don't advance/cancel dp_clockintr when dp_nsecs is zero.
> > > > - Update DT_FA_PROFILE for i386, macppc
> > >
> > > I can't test those.
> > [...]
> > We can combine them, but in multiple previous emails you have
> > requested that they remain separate.
>
> Because they were reasons to do so which are now gone.
>
> > So, which do you want? Either works for me.
>
> Merge them.
>
> > > [...]
> > > Here this is complex. Since we are no longer using SMR in the callback
> > > to get a reference to `dp' we should not do the following before we are
> > > sure the callback won't run. That would imply clockintr_unbind(), no?
> > > Some caution should be taken with clockintr_unbind() because we are
> > > holding a lock.
> > >
> > > > dtp->dtp_recording--;
> > > > dtp->dtp_prov->dtpv_recording--;
> > > > SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb, dp_pnext);
> >
> > We should not unbind here. All we need to do is cancel dp_clockintr.
>
> No, we must ensure no other CPU has a valid reference on the descriptor
> before decrementing the counter. That's what I already said in my
> previous email in different words.
>
> > With the patch, dt_prov_profile_clock() doesn't walk the SMR list
> > (dtp_pcbs) anymore and it doesn't check dtpv_recording or
> > dtp_recording anymore, so we don't need to do anything but
> > advance/cancel dp_clockintr when starting/stopping recording when
> > dp_nsecs is zero. Check the updated patch. Is that acceptable?
>
> No it's not. [...]
Here's a revised patch based on our discussion off-list. It's not
exactly what we talked about re. dtpv_dealloc(), but I think it's
pretty close to a workable starting point. We can iterate on it once
it's in the tree.
- Merge the profile/interval callbacks and dt_prov_profile_fire()
into a single callback, dt_clock().
- Move the clockintr_bind() and clockintr_stagger() calls from
dtpv_alloc() to dt_ioctl_record_start().
- Add a dp_cpu member to struct dt_pcb so dt_ioctl_record_start()
knows where to bind each dp_clockintr.
- In dt_ioctl_record_stop(), replace clockintr_cancel() with
clockintr_unbind(). The execution barrier ensures the reference
to the enclosing PCB is dead before the caller returns.
As we discussed, it's not ideal to block while holding dt_lock, but
in practice blocking here is rare and dt_clock() does not run for very
long.
- Update the locking annotation for dp_clockintr. Both
clockintr_bind() and clockintr_unbind() are now called
within the global dt_lock ('D').
Regress passes. DT_FA_PROFILE values still yield correct-looking
traces.
Index: sys/dev/dt/dt_prov_profile.c
===================================================================
RCS file: /cvs/src/sys/dev/dt/dt_prov_profile.c,v
diff -u -p -r1.5 dt_prov_profile.c
--- sys/dev/dt/dt_prov_profile.c 26 Apr 2023 16:53:59 -0000 1.5
+++ sys/dev/dt/dt_prov_profile.c 8 Feb 2024 23:03:44 -0000
@@ -20,6 +20,7 @@
#include <sys/systm.h>
#include <sys/param.h>
#include <sys/atomic.h>
+#include <sys/clockintr.h>
#include <dev/dt/dtvar.h>
@@ -31,13 +32,11 @@ struct dt_probe *dtpp_interval; /* glob
int dt_prov_profile_alloc(struct dt_probe *, struct dt_softc *,
struct dt_pcb_list *, struct dtioc_req *);
-int dt_prov_profile_enter(struct dt_provider *, ...);
-int dt_prov_interval_enter(struct dt_provider *, ...);
struct dt_provider dt_prov_profile = {
.dtpv_name = "profile",
.dtpv_alloc = dt_prov_profile_alloc,
- .dtpv_enter = dt_prov_profile_enter,
+ .dtpv_enter = NULL,
.dtpv_leave = NULL,
.dtpv_dealloc = NULL,
};
@@ -45,7 +44,7 @@ struct dt_provider dt_prov_profile = {
struct dt_provider dt_prov_interval = {
.dtpv_name = "interval",
.dtpv_alloc = dt_prov_profile_alloc,
- .dtpv_enter = dt_prov_interval_enter,
+ .dtpv_enter = NULL,
.dtpv_leave = NULL,
.dtpv_dealloc = NULL,
};
@@ -90,8 +89,8 @@ dt_prov_profile_alloc(struct dt_probe *d
return ENOMEM;
}
- dp->dp_maxtick = hz / dtrq->dtrq_rate;
- dp->dp_cpuid = ci->ci_cpuid;
+ dp->dp_nsecs = SEC_TO_NSEC(1) / dtrq->dtrq_rate;
+ dp->dp_cpu = ci;
dp->dp_filter = dtrq->dtrq_filter;
dp->dp_evtflags = dtrq->dtrq_evtflags & DTEVT_PROV_PROFILE;
@@ -101,51 +100,18 @@ dt_prov_profile_alloc(struct dt_probe *d
return 0;
}
-static inline void
-dt_prov_profile_fire(struct dt_pcb *dp)
+void
+dt_clock(struct clockrequest *cr, void *cf, void *arg)
{
+ uint64_t count, i;
struct dt_evt *dtev;
+ struct dt_pcb *dp = arg;
- if (++dp->dp_nticks < dp->dp_maxtick)
- return;
-
- dtev = dt_pcb_ring_get(dp, 1);
- if (dtev == NULL)
- return;
- dt_pcb_ring_consume(dp, dtev);
- dp->dp_nticks = 0;
-}
-
-int
-dt_prov_profile_enter(struct dt_provider *dtpv, ...)
-{
- struct cpu_info *ci = curcpu();
- struct dt_pcb *dp;
-
- KASSERT(dtpv == &dt_prov_profile);
-
- smr_read_enter();
- SMR_SLIST_FOREACH(dp, &dtpp_profile->dtp_pcbs, dp_pnext) {
- if (dp->dp_cpuid != ci->ci_cpuid)
- continue;
-
- dt_prov_profile_fire(dp);
+ count = clockrequest_advance(cr, dp->dp_nsecs);
+ for (i = 0; i < count; i++) {
+ dtev = dt_pcb_ring_get(dp, 1);
+ if (dtev == NULL)
+ return;
+ dt_pcb_ring_consume(dp, dtev);
}
- smr_read_leave();
- return 0;
-}
-
-int
-dt_prov_interval_enter(struct dt_provider *dtpv, ...)
-{
- struct dt_pcb *dp;
-
- KASSERT(dtpv == &dt_prov_interval);
-
- smr_read_enter();
- SMR_SLIST_FOREACH(dp, &dtpp_interval->dtp_pcbs, dp_pnext) {
- dt_prov_profile_fire(dp);
- }
- smr_read_leave();
- return 0;
}
Index: sys/dev/dt/dt_dev.c
===================================================================
RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v
diff -u -p -r1.29 dt_dev.c
--- sys/dev/dt/dt_dev.c 2 Jan 2024 16:32:48 -0000 1.29
+++ sys/dev/dt/dt_dev.c 8 Feb 2024 23:03:44 -0000
@@ -19,6 +19,7 @@
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/param.h>
+#include <sys/clockintr.h>
#include <sys/device.h>
#include <sys/exec_elf.h>
#include <sys/malloc.h>
@@ -56,13 +57,13 @@
* proc_trampoline+0x1c
*/
#if defined(__amd64__)
-#define DT_FA_PROFILE 7
+#define DT_FA_PROFILE 5
#define DT_FA_STATIC 2
#elif defined(__i386__)
-#define DT_FA_PROFILE 8
+#define DT_FA_PROFILE 5
#define DT_FA_STATIC 2
#elif defined(__macppc__)
-#define DT_FA_PROFILE 7
+#define DT_FA_PROFILE 5
#define DT_FA_STATIC 2
#elif defined(__octeon__)
#define DT_FA_PROFILE 6
@@ -492,6 +493,14 @@ dt_ioctl_record_start(struct dt_softc *s
SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp, dp_pnext);
dtp->dtp_recording++;
dtp->dtp_prov->dtpv_recording++;
+
+ if (dp->dp_nsecs != 0) {
+ clockintr_bind(&dp->dp_clockintr, dp->dp_cpu, dt_clock,
+ dp);
+ clockintr_stagger(&dp->dp_clockintr, dp->dp_nsecs,
+ CPU_INFO_UNIT(dp->dp_cpu), MAXCPUS);
+ clockintr_advance(&dp->dp_clockintr, dp->dp_nsecs);
+ }
}
rw_exit_write(&dt_lock);
@@ -517,6 +526,13 @@ dt_ioctl_record_stop(struct dt_softc *sc
rw_enter_write(&dt_lock);
TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
struct dt_probe *dtp = dp->dp_dtp;
+
+ /*
+ * Set an execution barrier to ensure dt_clock() has
+ * released its shared reference to dp.
+ */
+ if (dp->dp_nsecs != 0)
+ clockintr_unbind(&dp->dp_clockintr, CL_BARRIER);
dtp->dtp_recording--;
dtp->dtp_prov->dtpv_recording--;
Index: sys/dev/dt/dtvar.h
===================================================================
RCS file: /cvs/src/sys/dev/dt/dtvar.h,v
diff -u -p -r1.17 dtvar.h
--- sys/dev/dt/dtvar.h 26 Apr 2023 16:53:59 -0000 1.17
+++ sys/dev/dt/dtvar.h 8 Feb 2024 23:03:44 -0000
@@ -175,6 +175,7 @@ int dtioc_req_isvalid(struct dtioc_req
* userland read(2)s them.
*
* Locks used to protect struct members in this file:
+ * D dt_lock
* I immutable after creation
* K kernel lock
* K,S kernel lock for writing and SMR for reading
@@ -197,9 +198,9 @@ struct dt_pcb {
struct dt_filter dp_filter; /* [I] filter to match */
/* Provider specific fields. */
- unsigned int dp_cpuid; /* [I] on which CPU */
- unsigned int dp_maxtick; /* [I] freq. of profiling */
- unsigned int dp_nticks; /* [c] current tick count */
+ struct clockintr dp_clockintr; /* [D] profiling handle */
+ uint64_t dp_nsecs; /* [I] profiling period */
+ struct cpu_info *dp_cpu; /* [I] on which CPU */
/* Counters */
uint64_t dp_dropevt; /* [m] # dropped event */
@@ -270,6 +271,7 @@ struct dt_probe *dt_dev_alloc_probe(cons
struct dt_provider *);
void dt_dev_register_probe(struct dt_probe *);
+void dt_clock(struct clockrequest *, void *, void *);
extern volatile uint32_t dt_tracing; /* currently tracing? */
Index: sys/kern/kern_clock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clock.c,v
diff -u -p -r1.121 kern_clock.c
--- sys/kern/kern_clock.c 17 Oct 2023 00:04:02 -0000 1.121
+++ sys/kern/kern_clock.c 8 Feb 2024 23:03:44 -0000
@@ -50,11 +50,6 @@
#include <sys/sched.h>
#include <sys/timetc.h>
-#include "dt.h"
-#if NDT > 0
-#include <dev/dt/dtvar.h>
-#endif
-
/*
* Clock handling routines.
*
@@ -145,12 +140,6 @@ initclocks(void)
void
hardclock(struct clockframe *frame)
{
-#if NDT > 0
- DT_ENTER(profile, NULL);
- if (CPU_IS_PRIMARY(curcpu()))
- DT_ENTER(interval, NULL);
-#endif
-
/*
* If we are not the primary CPU, we're not allowed to do
* any more work.
dt(4), hardclock: move profiling entry points to dedicated clockintrs
dt(4), hardclock: move profiling entry points to dedicated clockintrs
dt(4), hardclock: move profiling entry points to dedicated clockintrs