Index | Thread | Search

From:
Scott Cheloha <scottcheloha@gmail.com>
Subject:
dt(4), btrace(8): interval, profile: support additional units: s, ms, us
To:
tech@openbsd.org
Cc:
mpi@openbsd.org
Date:
Wed, 28 Feb 2024 20:46:18 -0600

Download raw body.

Thread
This patch adds support for arbitrary execution periods to the
interval and profile providers, e.g. "interval:s:3" and
"profile:us:500".

dt(4) patch notes:

- struct dtioc_req: add dtrq_nsecs

  We need this to support execution periods larger than one second,
  e.g. "interval:s:5".

  The kernel code uses nanoseconds, so I figure the userspace API
  ought to take a count of nanoseconds, hence "dtrq_nsecs".

- dt_prov_profile_alloc: use dtrq_nsecs if dtrq_rate is zero

  This preserves existing behavior.  If you set dtrq_rate, we use
  that, just like we do currently.  If dtrq_rate is zero, we fall
  back to dtrq_nsecs.

  The execution period cannot be smaller than 200us.  If the execution
  period is too small you run the risk of wedging the CPU.  I borrowed
  this lower bound from DTrace.

  The execution period cannot be larger than UINT32_MAX seconds.  This
  is arbitrary, we have to cut it off somewhere.

btrace(8) patch notes:

- struct bt_probe: replace bp_rate with bp_nsecs

  To mirror the change in struct dtioc_req.

- grammar: "hz" does not need to be a reserved word

  Unless I'm missing something, "hz" doesn't need to be reserved.
  Making it not-reserved simplifies the patch.

- grammar: pname: match STRING:STRING:NUMBER

  If we match STRING:STRING:NUMBER, pass the NUMBER in as the fourth
  argument to bp_new ("number").  In bp_new(), we then check if prov
  is "interval" or "profile".  If so, we try to convert number to a
  count of nanoseconds according to func, which must be one of the
  supported time units: hz, ms, s, us.

- dtpi_get_by_value: match all interval/profile probes

  interval and profile probes are validated by the parser.  Once
  we're in dtpi_get_by_value() we only need to compare with prov.
  The dtpi_func and dtpi_name strings are irrelevant.

To try it out, apply/rebuild and then maybe try this BT script:

$ cat arbitrary-interval.bt
BEGIN {
	@start = nsecs;
	@count = 1;
}

interval:us:654321 {
	$actual = nsecs - @start;
	$expected = @count * 654321000;
	$error = $actual - $expected
	printf("interval %d actual %d.%09d expected %d.%09d error %d.%09d\n",
	    @count, $actual / 1000000000, $actual % 1000000000,
	    $expected / 1000000000, $expected % 1000000000,
	    $error / 1000000000, $error % 1000000000);
	@count = @count + 1;
	if (@count == 10)
		exit();
}
$ doas btrace arbitrary-interval.bt
interval 1 actual 0.654541257 expected 0.654321000 error 0.000220257
interval 2 actual 1.309049560 expected 1.308642000 error 0.000407560
interval 3 actual 1.963371682 expected 1.962963000 error 0.000408682
interval 4 actual 2.617566580 expected 2.617284000 error 0.000282580
interval 5 actual 3.272013793 expected 3.271605000 error 0.000408793
interval 6 actual 3.926167631 expected 3.925926000 error 0.000241631
interval 7 actual 4.580656581 expected 4.580247000 error 0.000409581
interval 8 actual 5.234976280 expected 5.234568000 error 0.000408280
interval 9 actual 5.889304251 expected 5.888889000 error 0.000415251

Regress passes.

Thoughts?

Index: sys/dev/dt/dtvar.h
===================================================================
RCS file: /cvs/src/sys/dev/dt/dtvar.h,v
diff -u -p -r1.18 dtvar.h
--- sys/dev/dt/dtvar.h	9 Feb 2024 17:42:18 -0000	1.18
+++ sys/dev/dt/dtvar.h	29 Feb 2024 02:01:29 -0000
@@ -130,7 +130,8 @@ struct dtioc_arg {
 struct dtioc_req {
 	uint32_t		 dtrq_pbn;	/* probe number */
 	struct dt_filter	 dtrq_filter;	/* probe filter */
-	uint32_t		 dtrq_rate;	/* number of ticks */
+	uint32_t		 dtrq_rate;	/* execution frequency */
+	uint64_t		 dtrq_nsecs;	/* execution period */
 	uint64_t		 dtrq_evtflags;	/* states to record */
 };
 
Index: sys/dev/dt/dt_prov_profile.c
===================================================================
RCS file: /cvs/src/sys/dev/dt/dt_prov_profile.c,v
diff -u -p -r1.6 dt_prov_profile.c
--- sys/dev/dt/dt_prov_profile.c	9 Feb 2024 17:42:18 -0000	1.6
+++ sys/dev/dt/dt_prov_profile.c	29 Feb 2024 02:01:29 -0000
@@ -67,16 +67,20 @@ int
 dt_prov_profile_alloc(struct dt_probe *dtp, struct dt_softc *sc,
     struct dt_pcb_list *plist, struct dtioc_req *dtrq)
 {
+	uint64_t nsecs;
 	struct dt_pcb *dp;
 	struct cpu_info *ci;
 	CPU_INFO_ITERATOR cii;
-	extern int hz;
 
 	KASSERT(dtioc_req_isvalid(dtrq));
 	KASSERT(TAILQ_EMPTY(plist));
 	KASSERT(dtp == dtpp_profile || dtp == dtpp_interval);
 
-	if (dtrq->dtrq_rate <= 0 || dtrq->dtrq_rate > hz)
+	if (dtrq->dtrq_rate != 0)
+		nsecs = SEC_TO_NSEC(1) / dtrq->dtrq_rate;
+	else
+		nsecs = dtrq->dtrq_nsecs;
+	if (nsecs < USEC_TO_NSEC(200) || nsecs > SEC_TO_NSEC(UINT32_MAX))
 		return EOPNOTSUPP;
 
 	CPU_INFO_FOREACH(cii, ci) {
@@ -89,7 +93,7 @@ dt_prov_profile_alloc(struct dt_probe *d
 			return ENOMEM;
 		}
 
-		dp->dp_nsecs = SEC_TO_NSEC(1) / dtrq->dtrq_rate;
+		dp->dp_nsecs = nsecs;
 		dp->dp_cpu = ci;
 
 		dp->dp_filter = dtrq->dtrq_filter;
Index: usr.sbin/btrace/bt_parse.y
===================================================================
RCS file: /cvs/src/usr.sbin/btrace/bt_parse.y,v
diff -u -p -r1.59 bt_parse.y
--- usr.sbin/btrace/bt_parse.y	12 Feb 2024 15:11:06 -0000	1.59
+++ usr.sbin/btrace/bt_parse.y	29 Feb 2024 02:01:30 -0000
@@ -62,7 +62,7 @@ struct bt_arg		g_maxba = BA_INITIALIZER(
 
 struct bt_rule	*br_new(struct bt_probe *, struct bt_filter *,
 		     struct bt_stmt *);
-struct bt_probe	*bp_new(const char *, const char *, const char *, int32_t);
+struct bt_probe	*bp_new(const char *, const char *, const char *, long);
 struct bt_arg	*ba_append(struct bt_arg *, struct bt_arg *);
 struct bt_arg	*ba_op(enum bt_argtype, struct bt_arg *, struct bt_arg *);
 struct bt_stmt	*bs_new(enum bt_action, struct bt_arg *, struct bt_var *);
@@ -119,7 +119,7 @@ static int 	 beflag = 0;		/* BEGIN/END p
 %token	<v.i>		ERROR ENDFILT
 %token	<v.i>		OP_EQ OP_NE OP_LE OP_LT OP_GE OP_GT OP_LAND OP_LOR
 /* Builtins */
-%token	<v.i>		BUILTIN BEGIN END HZ IF STR
+%token	<v.i>		BUILTIN BEGIN END IF STR
 /* Functions and Map operators */
 %token  <v.i>		F_DELETE F_PRINT
 %token	<v.i>		MFUNC FUNC0 FUNC1 FUNCN OP1 OP2 OP4 MOP0 MOP1
@@ -155,7 +155,7 @@ probe	: { pflag = 1; } pname		{ $$ = $2;
 	;
 
 pname	: STRING ':' STRING ':' STRING	{ $$ = bp_new($1, $3, $5, 0); }
-	| STRING ':' HZ ':' NUMBER	{ $$ = bp_new($1, "hz", NULL, $5); }
+	| STRING ':' STRING ':' NUMBER	{ $$ = bp_new($1, $3, NULL, $5); }
 	;
 
 mentry	: GVAR '[' vargs ']'		{ $$ = bm_find($1, $3); }
@@ -349,18 +349,73 @@ bt_new(struct bt_arg *ba, struct bt_stmt
 	return bs_new(B_AC_TEST, bop, (struct bt_var *)condbs);
 
 }
+
+/*
+ * interval and profile are pseudo-providers.  They don't implement
+ * function:name probes in the kernel.  Instead, the btrace utility
+ * accepts certain time units as functions.  The name is then converted
+ * from a number to an execution period according to the given unit.
+ */
+static int
+bp_is_pseudo_provider(const char *prov)
+{
+	return strcmp(prov, "interval") == 0 || strcmp(prov, "profile") == 0;
+}
+
+/*
+ * interval and profile support the same units.
+ */
+static void
+bp_time_unit_to_nsec(const char *func, long value, uint64_t *nsecs)
+{
+	static const struct {
+		const char *name;
+		enum { UNIT_HZ, UNIT_MS, UNIT_S, UNIT_US } id;
+		long long max;
+	} unit[] = {
+		{ .name = "hz", .id = UNIT_HZ, .max = 1000000000LL },
+		{ .name = "ms", .id = UNIT_MS, .max = LLONG_MAX / 1000000 },
+		{ .name = "s", .id = UNIT_S, .max = LLONG_MAX / 1000000000 },
+		{ .name = "us", .id = UNIT_US, .max = LLONG_MAX / 1000 },
+	};
+	size_t i;
+
+	for (i = 0; i < nitems(unit); i++) {
+		if (strcmp(unit[i].name, func) == 0) {
+			if (value < 1)
+				errx(1, "number is invalid: %ld", value);
+			if (value > unit[i].max)
+				errx(1, "number is too large: %ld", value);
+			switch (unit[i].id) {
+			case UNIT_HZ:
+				*nsecs = 1000000000LLU / value;
+				return;
+			case UNIT_MS:
+				*nsecs = value * 1000000LLU;
+				return;
+			case UNIT_S:
+				*nsecs = value * 1000000000LLU;
+				return;
+			case UNIT_US:
+				*nsecs = value * 1000LLU;
+				return;
+			default:
+				abort();
+			}
+		}
+	}
+	errx(1, "invalid unit: %s", func);
+}
+
 /* Create a new probe */
 struct bt_probe *
-bp_new(const char *prov, const char *func, const char *name, int32_t rate)
+bp_new(const char *prov, const char *func, const char *name, long number)
 {
 	struct bt_probe *bp;
 	enum bt_ptype ptype;
 
-	if (rate < 0 || rate > INT32_MAX)
-		errx(1, "only positive values permitted");
-
 	if (prov == NULL && func == NULL && name == NULL)
-		ptype = rate; /* BEGIN or END */
+		ptype = number;	/* BEGIN or END */
 	else
 		ptype = B_PT_PROBE;
 
@@ -370,7 +425,8 @@ bp_new(const char *prov, const char *fun
 	bp->bp_prov = prov;
 	bp->bp_func = func;
 	bp->bp_name = name;
-	bp->bp_rate = rate;
+	if (prov != NULL && bp_is_pseudo_provider(prov))
+		bp_time_unit_to_nsec(func, number, &bp->bp_nsecs);
 	bp->bp_type = ptype;
 
 	return bp;
@@ -716,7 +772,6 @@ lookup(char *s)
 		{ "delete",	F_DELETE,	B_AC_DELETE },
 		{ "exit",	FUNC0,		B_AC_EXIT },
 		{ "hist",	OP1,		0 },
-		{ "hz",		HZ,		0 },
 		{ "if",		IF,		0 },
 		{ "kstack",	BUILTIN,	B_AT_BI_KSTACK },
 		{ "lhist",	OP4,		0 },
@@ -1047,14 +1102,10 @@ again:
 			/*
 			 * Probe lexer backdoor, interpret the token as a string
 			 * rather than a keyword. Otherwise, reserved keywords
-			 * would conflict with syscall names. The exception to
-			 * this is 'hz', which hopefully will never be a
-			 * syscall.
+			 * would conflict with syscall names.
 			 */
-			if (kwp->token != HZ) {
-				yylval.v.string = kwp->word;
-				return STRING;
-			}
+			yylval.v.string = kwp->word;
+			return STRING;
 		} else if (beflag) {
 			/* Interpret tokens in a BEGIN/END context. */
 			if (kwp->type >= B_AT_BI_ARG0 &&
Index: usr.sbin/btrace/bt_parser.h
===================================================================
RCS file: /cvs/src/usr.sbin/btrace/bt_parser.h,v
diff -u -p -r1.25 bt_parser.h
--- usr.sbin/btrace/bt_parser.h	12 Oct 2023 15:16:44 -0000	1.25
+++ usr.sbin/btrace/bt_parser.h	29 Feb 2024 02:01:30 -0000
@@ -31,7 +31,7 @@
  *
  *	"provider:function:name"
  * or
- *	"provider:time_unit:rate"
+ *	"provider:time_unit:number"
  *
  * Multiple probes can be associated to the same action.
  */
@@ -40,7 +40,7 @@ struct bt_probe {
 	const char		*bp_prov;	/* provider */
 	const char		*bp_func;	/* function or time unit */
 	const char		*bp_name;
-	uint32_t		 bp_rate;
+	uint64_t		 bp_nsecs;
 #define bp_unit	bp_func
 	enum bt_ptype {
 		 B_PT_BEGIN = 1,
Index: usr.sbin/btrace/btrace.c
===================================================================
RCS file: /cvs/src/usr.sbin/btrace/btrace.c,v
diff -u -p -r1.89 btrace.c
--- usr.sbin/btrace/btrace.c	27 Feb 2024 12:38:12 -0000	1.89
+++ usr.sbin/btrace/btrace.c	29 Feb 2024 02:01:30 -0000
@@ -363,10 +363,10 @@ dtpi_func(struct dtioc_probe_info *dtpi)
 	return syscallnames[idx];
 }
 
-int
-dtpi_is_unit(const char *unit)
+static int
+dtpi_is_pseudo_provider(const char *prov)
 {
-	return !strncmp("hz", unit, sizeof("hz"));
+	return strcmp(prov, "interval") == 0 || strcmp(prov, "profile") == 0;
 }
 
 struct dtioc_probe_info *
@@ -377,21 +377,27 @@ dtpi_get_by_value(const char *prov, cons
 
 	dtpi = dt_dtpis;
 	for (i = 0; i < dt_ndtpi; i++, dtpi++) {
-		if (prov != NULL &&
-		    strncmp(prov, dtpi->dtpi_prov, DTNAMESIZE))
-			continue;
+		if (prov != NULL) {
+			if (strncmp(prov, dtpi->dtpi_prov, DTNAMESIZE))
+				continue;
 
-		if (func != NULL) {
-			if (dtpi_is_unit(func))
-				return dtpi;
+			/*
+			 * Pseudo-providers don't have real functions or
+			 * names set by the kernel.  Pass the probe back
+			 * to the caller and let them handle it.
+			 */
+			if (dtpi_is_pseudo_provider(prov))
+				goto match;
+		}
 
+		if (func != NULL) {
 			if (strncmp(func, dtpi_func(dtpi), DTNAMESIZE))
 				continue;
 		}
 
 		if (strncmp(name, dtpi->dtpi_name, DTNAMESIZE))
 			continue;
-
+match:
 		debug("matched probe %s:%s:%s\n", dtpi->dtpi_prov,
 		    dtpi_func(dtpi), dtpi->dtpi_name);
 		return dtpi;
@@ -537,7 +543,7 @@ rules_setup(int fd)
 
 			bp->bp_pbn = dtpi->dtpi_pbn;
 			dtrq->dtrq_pbn = dtpi->dtpi_pbn;
-			dtrq->dtrq_rate = bp->bp_rate;
+			dtrq->dtrq_nsecs = bp->bp_nsecs;
 			dtrq->dtrq_evtflags = evtflags;
 			if (dtrq->dtrq_evtflags & DTEVT_KSTACK)
 				dokstack = 1;
@@ -2052,9 +2058,9 @@ debug_probe_name(struct bt_probe *bp)
 
 	assert(bp->bp_type == B_PT_PROBE);
 
-	if (bp->bp_rate) {
-		snprintf(buf, sizeof(buf), "%s:%s:%u", bp->bp_prov,
-		    bp->bp_unit, bp->bp_rate);
+	if (bp->bp_nsecs) {
+		snprintf(buf, sizeof(buf), "%s:%s:%llu", bp->bp_prov,
+		    bp->bp_unit, bp->bp_nsecs);
 	} else {
 		snprintf(buf, sizeof(buf), "%s:%s:%s", bp->bp_prov,
 		    bp->bp_unit, bp->bp_name);