From: Martin Pieuchot Subject: Re: dt: Distinguish rules from each other To: Christian Ludwig Cc: tech@openbsd.org Date: Sat, 28 Sep 2024 14:50:45 +0200 On 15/09/24(Sun) 21:53, Christian Ludwig wrote: > There can be multiple rules with the same probe in a btrace script, e.g. > > interval:hz:3 { print("3"); } > interval:hz:1 { print("1"); } > > When allocating a new probe in the kernel, we loose the mapping between > the event read from dt(4) and the corresponding userland probe in a > btrace script. The result is that btrace executes all actions for the > same kernel probe, no matter which one actually fired. > > Allow userland to stamp a (different) label for each probe. That label > does not get touched by the kernel. It is only carried through into the > event ring. After reading the event, userland can map the originating > probe back from the event. Keeping the labels unique, to be able to > distinguish events, is the duty of the userland process. > > btrace stamps the rule into the PCB request. Now it directly gets back > the matching rule from each event. I'm now found of passing a pointer to the kernel and back to userland. Why not start by disallowing multiple rules of the same probe? Is there any use for them apart from "interval" and "profile"? > --- > regress/usr.sbin/btrace/Makefile | 3 ++- > regress/usr.sbin/btrace/interval.bt | 10 ++++++++++ > regress/usr.sbin/btrace/interval.ok | 4 ++++ > sys/dev/dt/dt_dev.c | 1 + > sys/dev/dt/dt_prov_kprobe.c | 1 + > sys/dev/dt/dt_prov_profile.c | 1 + > sys/dev/dt/dt_prov_static.c | 1 + > sys/dev/dt/dt_prov_syscall.c | 1 + > sys/dev/dt/dtvar.h | 3 +++ > usr.sbin/btrace/Makefile | 2 ++ > usr.sbin/btrace/btrace.c | 23 ++++++++++++++--------- > usr.sbin/btrace/printf.c | 1 + > 12 files changed, 41 insertions(+), 10 deletions(-) > create mode 100644 regress/usr.sbin/btrace/interval.bt > create mode 100644 regress/usr.sbin/btrace/interval.ok > > diff --git a/regress/usr.sbin/btrace/Makefile b/regress/usr.sbin/btrace/Makefile > index 10fc07fd0913..6c075c51f701 100644 > --- a/regress/usr.sbin/btrace/Makefile > +++ b/regress/usr.sbin/btrace/Makefile > @@ -14,7 +14,8 @@ BT_LANG_SCRIPTS= arithm beginend beginend-argn boolean comments \ > BT_ARG_LANG_SCRIPTS= staticv str > > # scripts that use kernel probes > -BT_KERN_SCRIPTS= argn empty-stmts filters mapoverwrite multiprobe > +BT_KERN_SCRIPTS= argn empty-stmts filters interval mapoverwrite \ > + multiprobe > > REGRESS_EXPECTED_FAILURES= run-maxoperand > > diff --git a/regress/usr.sbin/btrace/interval.bt b/regress/usr.sbin/btrace/interval.bt > new file mode 100644 > index 000000000000..6dbaeab9af30 > --- /dev/null > +++ b/regress/usr.sbin/btrace/interval.bt > @@ -0,0 +1,10 @@ > +interval:hz:1 > +{ > + print("1"); > + exit(); > +} > + > +interval:hz:3 > +{ > + print("3"); > +} > diff --git a/regress/usr.sbin/btrace/interval.ok b/regress/usr.sbin/btrace/interval.ok > new file mode 100644 > index 000000000000..601478fef2fc > --- /dev/null > +++ b/regress/usr.sbin/btrace/interval.ok > @@ -0,0 +1,4 @@ > +3 > +3 > +3 > +1 > diff --git a/sys/dev/dt/dt_dev.c b/sys/dev/dt/dt_dev.c > index 05ce734abf44..fefad7047e1c 100644 > --- a/sys/dev/dt/dt_dev.c > +++ b/sys/dev/dt/dt_dev.c > @@ -702,6 +702,7 @@ dt_pcb_ring_get(struct dt_pcb *dp, int profiling) > dtev->dtev_cpu = cpu_number(); > dtev->dtev_pid = p->p_p->ps_pid; > dtev->dtev_tid = p->p_tid + THREAD_PID_OFFSET; > + dtev->dtev_lbl = dp->dp_lbl; > nanotime(&dtev->dtev_tsp); > > if (ISSET(dp->dp_evtflags, DTEVT_EXECNAME)) > diff --git a/sys/dev/dt/dt_prov_kprobe.c b/sys/dev/dt/dt_prov_kprobe.c > index f929dd56d6fc..c504d433b489 100644 > --- a/sys/dev/dt/dt_prov_kprobe.c > +++ b/sys/dev/dt/dt_prov_kprobe.c > @@ -263,6 +263,7 @@ dt_prov_kprobe_alloc(struct dt_probe *dtp, struct dt_softc *sc, > intr_restore(s); > } > > + dp->dp_lbl = dtrq->dtrq_lbl; > dp->dp_evtflags = dtrq->dtrq_evtflags & DTEVT_PROV_KPROBE; > TAILQ_INSERT_HEAD(plist, dp, dp_snext); return 0; > diff --git a/sys/dev/dt/dt_prov_profile.c b/sys/dev/dt/dt_prov_profile.c > index 62900152e147..d5435e2e0469 100644 > --- a/sys/dev/dt/dt_prov_profile.c > +++ b/sys/dev/dt/dt_prov_profile.c > @@ -88,6 +88,7 @@ dt_prov_profile_alloc(struct dt_probe *dtp, struct dt_softc *sc, > return ENOMEM; > } > > + dp->dp_lbl = dtrq->dtrq_lbl; > dp->dp_nsecs = SEC_TO_NSEC(1) / dtrq->dtrq_rate; > dp->dp_cpu = ci; > > diff --git a/sys/dev/dt/dt_prov_static.c b/sys/dev/dt/dt_prov_static.c > index 4a87bc9161ab..b7ca44994502 100644 > --- a/sys/dev/dt/dt_prov_static.c > +++ b/sys/dev/dt/dt_prov_static.c > @@ -185,6 +185,7 @@ dt_prov_static_alloc(struct dt_probe *dtp, struct dt_softc *sc, > if (dp == NULL) > return ENOMEM; > > + dp->dp_lbl = dtrq->dtrq_lbl; > dp->dp_evtflags = dtrq->dtrq_evtflags; > TAILQ_INSERT_HEAD(plist, dp, dp_snext); > > diff --git a/sys/dev/dt/dt_prov_syscall.c b/sys/dev/dt/dt_prov_syscall.c > index 03c561eece8a..707c8e9ddf0d 100644 > --- a/sys/dev/dt/dt_prov_syscall.c > +++ b/sys/dev/dt/dt_prov_syscall.c > @@ -109,6 +109,7 @@ dt_prov_syscall_alloc(struct dt_probe *dtp, struct dt_softc *sc, > if (dp == NULL) > return ENOMEM; > > + dp->dp_lbl = dtrq->dtrq_lbl; > dp->dp_evtflags = dtrq->dtrq_evtflags & DTEVT_PROV_SYSCALL; > TAILQ_INSERT_HEAD(plist, dp, dp_snext); > > diff --git a/sys/dev/dt/dtvar.h b/sys/dev/dt/dtvar.h > index 5bb7ab0d7715..d9b9dde77de7 100644 > --- a/sys/dev/dt/dtvar.h > +++ b/sys/dev/dt/dtvar.h > @@ -44,6 +44,7 @@ > */ > struct dt_evt { > unsigned int dtev_pbn; /* Probe number */ > + uintptr_t dtev_lbl; /* User-provided label */ > unsigned int dtev_cpu; /* CPU id */ > pid_t dtev_pid; /* ID of current process */ > pid_t dtev_tid; /* ID of current thread */ > @@ -110,6 +111,7 @@ struct dtioc_arg { > struct dtioc_req { > uint32_t dtrq_pbn; /* probe number */ > uint32_t dtrq_rate; /* number of ticks */ > + uintptr_t dtrq_lbl; /* user-provided label */ > uint64_t dtrq_evtflags; /* states to record */ > }; > > @@ -172,6 +174,7 @@ struct dt_pcb { > struct dt_softc *dp_sc; /* [I] related softc */ > struct dt_probe *dp_dtp; /* [I] related probe */ > uint64_t dp_evtflags; /* [I] event states to record */ > + uintptr_t dp_lbl; /* [I] label */ > > /* Provider specific fields. */ > struct clockintr dp_clockintr; /* [D] profiling handle */ > diff --git a/usr.sbin/btrace/Makefile b/usr.sbin/btrace/Makefile > index 3c4a58f3610d..f613d0a80d0d 100644 > --- a/usr.sbin/btrace/Makefile > +++ b/usr.sbin/btrace/Makefile > @@ -5,6 +5,8 @@ MAN= bt.5 btrace.8 > > SRCS= bt_parse.y btrace.c ksyms.c map.c printf.c > > +CFLAGS+= -I/usr/src/sys > + > # Use syscall names generated from sys/kern/makesyscalls.sh > SRCS+= syscalls.c > CFLAGS+= -DPTRACE -DKTRACE -DACCOUNTING -DNFSCLIENT -DSYSVSHM -DSYSVSEM > diff --git a/usr.sbin/btrace/btrace.c b/usr.sbin/btrace/btrace.c > index aa75a12f6762..ba0600b5b631 100644 > --- a/usr.sbin/btrace/btrace.c > +++ b/usr.sbin/btrace/btrace.c > @@ -31,6 +31,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -540,6 +541,7 @@ rules_setup(int fd) > dtrq->dtrq_pbn = dtpi->dtpi_pbn; > dtrq->dtrq_rate = bp->bp_rate; > dtrq->dtrq_evtflags = evtflags; > + dtrq->dtrq_lbl = (uintptr_t)r; > if (dtrq->dtrq_evtflags & DTEVT_KSTACK) > dokstack = 1; > bp->bp_cookie = dtrq; > @@ -588,16 +590,19 @@ rules_apply(int fd, struct dt_evt *dtev) > struct bt_rule *r; > struct bt_probe *bp; > > - TAILQ_FOREACH(r, &g_rules, br_next) { > - SLIST_FOREACH(bp, &r->br_probes, bp_next) { > - if (bp->bp_type != B_PT_PROBE || > - bp->bp_pbn != dtev->dtev_pbn) > - continue; > + /* > + * We have stamped the rule as label in the dtioc_req during setup. > + * The kernel simply passes it on to events. > + */ > + r = (struct bt_rule *)dtev->dtev_lbl; > + SLIST_FOREACH(bp, &r->br_probes, bp_next) { > + if (bp->bp_type != B_PT_PROBE || > + bp->bp_pbn != dtev->dtev_pbn) > + continue; > > - dtai_cache(fd, &dt_dtpis[dtev->dtev_pbn - 1]); > - if (rule_eval(r, dtev)) > - return 1; > - } > + dtai_cache(fd, &dt_dtpis[dtev->dtev_pbn - 1]); > + if (rule_eval(r, dtev)) > + return 1; > } > return 0; > } > diff --git a/usr.sbin/btrace/printf.c b/usr.sbin/btrace/printf.c > index 49d046ab45d4..0190d095c3cf 100644 > --- a/usr.sbin/btrace/printf.c > +++ b/usr.sbin/btrace/printf.c > @@ -35,6 +35,7 @@ > #include > #include > #include > +#include > #include > #include > #include > -- > 2.34.1 >