Index | Thread | Search

From:
Christian Ludwig <cludwig@genua.de>
Subject:
dt: Distinguish rules from each other
To:
<tech@openbsd.org>
Date:
Sun, 15 Sep 2024 21:53:34 +0200

Download raw body.

Thread
Hi,

There can be multiple rules with the same probe in a btrace script, e.g.

 interval:hz:3 { print("3"); }
 interval:hz:1 { print("1"); }

When allocating a new probe in the kernel, we loose the mapping between
the event read from dt(4) and the corresponding userland probe in a
btrace script. The result is that btrace executes all actions for the
same kernel probe, no matter which one actually fired.

Allow userland to stamp a (different) label for each probe. That label
does not get touched by the kernel. It is only carried through into the
event ring. After reading the event, userland can map the originating
probe back from the event. Keeping the labels unique, to be able to
distinguish events, is the duty of the userland process.

btrace stamps the rule into the PCB request. Now it directly gets back
the matching rule from each event.


So long,


 - Christian

---
 regress/usr.sbin/btrace/Makefile    |  3 ++-
 regress/usr.sbin/btrace/interval.bt | 10 ++++++++++
 regress/usr.sbin/btrace/interval.ok |  4 ++++
 sys/dev/dt/dt_dev.c                 |  1 +
 sys/dev/dt/dt_prov_kprobe.c         |  1 +
 sys/dev/dt/dt_prov_profile.c        |  1 +
 sys/dev/dt/dt_prov_static.c         |  1 +
 sys/dev/dt/dt_prov_syscall.c        |  1 +
 sys/dev/dt/dtvar.h                  |  3 +++
 usr.sbin/btrace/Makefile            |  2 ++
 usr.sbin/btrace/btrace.c            | 23 ++++++++++++++---------
 usr.sbin/btrace/printf.c            |  1 +
 12 files changed, 41 insertions(+), 10 deletions(-)
 create mode 100644 regress/usr.sbin/btrace/interval.bt
 create mode 100644 regress/usr.sbin/btrace/interval.ok

diff --git a/regress/usr.sbin/btrace/Makefile b/regress/usr.sbin/btrace/Makefile
index 10fc07fd0913..6c075c51f701 100644
--- a/regress/usr.sbin/btrace/Makefile
+++ b/regress/usr.sbin/btrace/Makefile
@@ -14,7 +14,8 @@ BT_LANG_SCRIPTS=	arithm beginend beginend-argn boolean comments \
 BT_ARG_LANG_SCRIPTS=	staticv str
 
 # scripts that use kernel probes
-BT_KERN_SCRIPTS=	argn empty-stmts filters mapoverwrite multiprobe
+BT_KERN_SCRIPTS=	argn empty-stmts filters interval mapoverwrite \
+			multiprobe
 
 REGRESS_EXPECTED_FAILURES=	run-maxoperand
 
diff --git a/regress/usr.sbin/btrace/interval.bt b/regress/usr.sbin/btrace/interval.bt
new file mode 100644
index 000000000000..6dbaeab9af30
--- /dev/null
+++ b/regress/usr.sbin/btrace/interval.bt
@@ -0,0 +1,10 @@
+interval:hz:1
+{
+	print("1");
+	exit();
+}
+
+interval:hz:3
+{
+	print("3");
+}
diff --git a/regress/usr.sbin/btrace/interval.ok b/regress/usr.sbin/btrace/interval.ok
new file mode 100644
index 000000000000..601478fef2fc
--- /dev/null
+++ b/regress/usr.sbin/btrace/interval.ok
@@ -0,0 +1,4 @@
+3
+3
+3
+1
diff --git a/sys/dev/dt/dt_dev.c b/sys/dev/dt/dt_dev.c
index 05ce734abf44..fefad7047e1c 100644
--- a/sys/dev/dt/dt_dev.c
+++ b/sys/dev/dt/dt_dev.c
@@ -702,6 +702,7 @@ dt_pcb_ring_get(struct dt_pcb *dp, int profiling)
 	dtev->dtev_cpu = cpu_number();
 	dtev->dtev_pid = p->p_p->ps_pid;
 	dtev->dtev_tid = p->p_tid + THREAD_PID_OFFSET;
+	dtev->dtev_lbl = dp->dp_lbl;
 	nanotime(&dtev->dtev_tsp);
 
 	if (ISSET(dp->dp_evtflags, DTEVT_EXECNAME))
diff --git a/sys/dev/dt/dt_prov_kprobe.c b/sys/dev/dt/dt_prov_kprobe.c
index f929dd56d6fc..c504d433b489 100644
--- a/sys/dev/dt/dt_prov_kprobe.c
+++ b/sys/dev/dt/dt_prov_kprobe.c
@@ -263,6 +263,7 @@ dt_prov_kprobe_alloc(struct dt_probe *dtp, struct dt_softc *sc,
 		intr_restore(s);
 	}
 
+	dp->dp_lbl = dtrq->dtrq_lbl;
 	dp->dp_evtflags = dtrq->dtrq_evtflags & DTEVT_PROV_KPROBE;
 	TAILQ_INSERT_HEAD(plist, dp, dp_snext);
 	return 0;
diff --git a/sys/dev/dt/dt_prov_profile.c b/sys/dev/dt/dt_prov_profile.c
index 62900152e147..d5435e2e0469 100644
--- a/sys/dev/dt/dt_prov_profile.c
+++ b/sys/dev/dt/dt_prov_profile.c
@@ -88,6 +88,7 @@ dt_prov_profile_alloc(struct dt_probe *dtp, struct dt_softc *sc,
 			return ENOMEM;
 		}
 
+		dp->dp_lbl = dtrq->dtrq_lbl;
 		dp->dp_nsecs = SEC_TO_NSEC(1) / dtrq->dtrq_rate;
 		dp->dp_cpu = ci;
 
diff --git a/sys/dev/dt/dt_prov_static.c b/sys/dev/dt/dt_prov_static.c
index 4a87bc9161ab..b7ca44994502 100644
--- a/sys/dev/dt/dt_prov_static.c
+++ b/sys/dev/dt/dt_prov_static.c
@@ -185,6 +185,7 @@ dt_prov_static_alloc(struct dt_probe *dtp, struct dt_softc *sc,
 	if (dp == NULL)
 		return ENOMEM;
 
+	dp->dp_lbl = dtrq->dtrq_lbl;
 	dp->dp_evtflags = dtrq->dtrq_evtflags;
 	TAILQ_INSERT_HEAD(plist, dp, dp_snext);
 
diff --git a/sys/dev/dt/dt_prov_syscall.c b/sys/dev/dt/dt_prov_syscall.c
index 03c561eece8a..707c8e9ddf0d 100644
--- a/sys/dev/dt/dt_prov_syscall.c
+++ b/sys/dev/dt/dt_prov_syscall.c
@@ -109,6 +109,7 @@ dt_prov_syscall_alloc(struct dt_probe *dtp, struct dt_softc *sc,
 	if (dp == NULL)
 		return ENOMEM;
 
+	dp->dp_lbl = dtrq->dtrq_lbl;
 	dp->dp_evtflags = dtrq->dtrq_evtflags & DTEVT_PROV_SYSCALL;
 	TAILQ_INSERT_HEAD(plist, dp, dp_snext);
 
diff --git a/sys/dev/dt/dtvar.h b/sys/dev/dt/dtvar.h
index 5bb7ab0d7715..d9b9dde77de7 100644
--- a/sys/dev/dt/dtvar.h
+++ b/sys/dev/dt/dtvar.h
@@ -44,6 +44,7 @@
  */
 struct dt_evt {
 	unsigned int		dtev_pbn;	/* Probe number */
+	uintptr_t		dtev_lbl;	/* User-provided label */
 	unsigned int		dtev_cpu;	/* CPU id */
 	pid_t			dtev_pid;	/* ID of current process */
 	pid_t			dtev_tid;	/* ID of current thread */
@@ -110,6 +111,7 @@ struct dtioc_arg {
 struct dtioc_req {
 	uint32_t		 dtrq_pbn;	/* probe number */
 	uint32_t		 dtrq_rate;	/* number of ticks */
+	uintptr_t		 dtrq_lbl;	/* user-provided label */
 	uint64_t		 dtrq_evtflags;	/* states to record */
 };
 
@@ -172,6 +174,7 @@ struct dt_pcb {
 	struct dt_softc		*dp_sc;		/* [I] related softc */
 	struct dt_probe		*dp_dtp;	/* [I] related probe */
 	uint64_t		 dp_evtflags;	/* [I] event states to record */
+	uintptr_t		 dp_lbl;	/* [I] label */
 
 	/* Provider specific fields. */
 	struct clockintr	 dp_clockintr;	/* [D] profiling handle */
diff --git a/usr.sbin/btrace/Makefile b/usr.sbin/btrace/Makefile
index 3c4a58f3610d..f613d0a80d0d 100644
--- a/usr.sbin/btrace/Makefile
+++ b/usr.sbin/btrace/Makefile
@@ -5,6 +5,8 @@ MAN=		bt.5 btrace.8
 
 SRCS=		bt_parse.y btrace.c ksyms.c map.c printf.c
 
+CFLAGS+=	-I/usr/src/sys
+
 # Use syscall names generated from sys/kern/makesyscalls.sh
 SRCS+=		syscalls.c
 CFLAGS+=	-DPTRACE -DKTRACE -DACCOUNTING -DNFSCLIENT -DSYSVSHM -DSYSVSEM
diff --git a/usr.sbin/btrace/btrace.c b/usr.sbin/btrace/btrace.c
index aa75a12f6762..ba0600b5b631 100644
--- a/usr.sbin/btrace/btrace.c
+++ b/usr.sbin/btrace/btrace.c
@@ -31,6 +31,7 @@
 #include <signal.h>
 #include <stdarg.h>
 #include <stdbool.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -540,6 +541,7 @@ rules_setup(int fd)
 			dtrq->dtrq_pbn = dtpi->dtpi_pbn;
 			dtrq->dtrq_rate = bp->bp_rate;
 			dtrq->dtrq_evtflags = evtflags;
+			dtrq->dtrq_lbl = (uintptr_t)r;
 			if (dtrq->dtrq_evtflags & DTEVT_KSTACK)
 				dokstack = 1;
 			bp->bp_cookie = dtrq;
@@ -588,16 +590,19 @@ rules_apply(int fd, struct dt_evt *dtev)
 	struct bt_rule *r;
 	struct bt_probe *bp;
 
-	TAILQ_FOREACH(r, &g_rules, br_next) {
-		SLIST_FOREACH(bp, &r->br_probes, bp_next) {
-			if (bp->bp_type != B_PT_PROBE ||
-			    bp->bp_pbn != dtev->dtev_pbn)
-				continue;
+	/*
+	 * We have stamped the rule as label in the dtioc_req during setup.
+	 * The kernel simply passes it on to events.
+	 */
+	r = (struct bt_rule *)dtev->dtev_lbl;
+	SLIST_FOREACH(bp, &r->br_probes, bp_next) {
+		if (bp->bp_type != B_PT_PROBE ||
+		    bp->bp_pbn != dtev->dtev_pbn)
+			continue;
 
-			dtai_cache(fd, &dt_dtpis[dtev->dtev_pbn - 1]);
-			if (rule_eval(r, dtev))
-				return 1;
-		}
+		dtai_cache(fd, &dt_dtpis[dtev->dtev_pbn - 1]);
+		if (rule_eval(r, dtev))
+			return 1;
 	}
 	return 0;
 }
diff --git a/usr.sbin/btrace/printf.c b/usr.sbin/btrace/printf.c
index 49d046ab45d4..0190d095c3cf 100644
--- a/usr.sbin/btrace/printf.c
+++ b/usr.sbin/btrace/printf.c
@@ -35,6 +35,7 @@
 #include <err.h>
 #include <errno.h>
 #include <limits.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-- 
2.34.1