From: Scott Cheloha Subject: dt(4), btrace(8): interval, profile: support additional units: s, ms, us To: tech@openbsd.org Cc: mpi@openbsd.org Date: Wed, 28 Feb 2024 20:46:18 -0600 This patch adds support for arbitrary execution periods to the interval and profile providers, e.g. "interval:s:3" and "profile:us:500". dt(4) patch notes: - struct dtioc_req: add dtrq_nsecs We need this to support execution periods larger than one second, e.g. "interval:s:5". The kernel code uses nanoseconds, so I figure the userspace API ought to take a count of nanoseconds, hence "dtrq_nsecs". - dt_prov_profile_alloc: use dtrq_nsecs if dtrq_rate is zero This preserves existing behavior. If you set dtrq_rate, we use that, just like we do currently. If dtrq_rate is zero, we fall back to dtrq_nsecs. The execution period cannot be smaller than 200us. If the execution period is too small you run the risk of wedging the CPU. I borrowed this lower bound from DTrace. The execution period cannot be larger than UINT32_MAX seconds. This is arbitrary, we have to cut it off somewhere. btrace(8) patch notes: - struct bt_probe: replace bp_rate with bp_nsecs To mirror the change in struct dtioc_req. - grammar: "hz" does not need to be a reserved word Unless I'm missing something, "hz" doesn't need to be reserved. Making it not-reserved simplifies the patch. - grammar: pname: match STRING:STRING:NUMBER If we match STRING:STRING:NUMBER, pass the NUMBER in as the fourth argument to bp_new ("number"). In bp_new(), we then check if prov is "interval" or "profile". If so, we try to convert number to a count of nanoseconds according to func, which must be one of the supported time units: hz, ms, s, us. - dtpi_get_by_value: match all interval/profile probes interval and profile probes are validated by the parser. Once we're in dtpi_get_by_value() we only need to compare with prov. The dtpi_func and dtpi_name strings are irrelevant. To try it out, apply/rebuild and then maybe try this BT script: $ cat arbitrary-interval.bt BEGIN { @start = nsecs; @count = 1; } interval:us:654321 { $actual = nsecs - @start; $expected = @count * 654321000; $error = $actual - $expected printf("interval %d actual %d.%09d expected %d.%09d error %d.%09d\n", @count, $actual / 1000000000, $actual % 1000000000, $expected / 1000000000, $expected % 1000000000, $error / 1000000000, $error % 1000000000); @count = @count + 1; if (@count == 10) exit(); } $ doas btrace arbitrary-interval.bt interval 1 actual 0.654541257 expected 0.654321000 error 0.000220257 interval 2 actual 1.309049560 expected 1.308642000 error 0.000407560 interval 3 actual 1.963371682 expected 1.962963000 error 0.000408682 interval 4 actual 2.617566580 expected 2.617284000 error 0.000282580 interval 5 actual 3.272013793 expected 3.271605000 error 0.000408793 interval 6 actual 3.926167631 expected 3.925926000 error 0.000241631 interval 7 actual 4.580656581 expected 4.580247000 error 0.000409581 interval 8 actual 5.234976280 expected 5.234568000 error 0.000408280 interval 9 actual 5.889304251 expected 5.888889000 error 0.000415251 Regress passes. Thoughts? Index: sys/dev/dt/dtvar.h =================================================================== RCS file: /cvs/src/sys/dev/dt/dtvar.h,v diff -u -p -r1.18 dtvar.h --- sys/dev/dt/dtvar.h 9 Feb 2024 17:42:18 -0000 1.18 +++ sys/dev/dt/dtvar.h 29 Feb 2024 02:01:29 -0000 @@ -130,7 +130,8 @@ struct dtioc_arg { struct dtioc_req { uint32_t dtrq_pbn; /* probe number */ struct dt_filter dtrq_filter; /* probe filter */ - uint32_t dtrq_rate; /* number of ticks */ + uint32_t dtrq_rate; /* execution frequency */ + uint64_t dtrq_nsecs; /* execution period */ uint64_t dtrq_evtflags; /* states to record */ }; Index: sys/dev/dt/dt_prov_profile.c =================================================================== RCS file: /cvs/src/sys/dev/dt/dt_prov_profile.c,v diff -u -p -r1.6 dt_prov_profile.c --- sys/dev/dt/dt_prov_profile.c 9 Feb 2024 17:42:18 -0000 1.6 +++ sys/dev/dt/dt_prov_profile.c 29 Feb 2024 02:01:29 -0000 @@ -67,16 +67,20 @@ int dt_prov_profile_alloc(struct dt_probe *dtp, struct dt_softc *sc, struct dt_pcb_list *plist, struct dtioc_req *dtrq) { + uint64_t nsecs; struct dt_pcb *dp; struct cpu_info *ci; CPU_INFO_ITERATOR cii; - extern int hz; KASSERT(dtioc_req_isvalid(dtrq)); KASSERT(TAILQ_EMPTY(plist)); KASSERT(dtp == dtpp_profile || dtp == dtpp_interval); - if (dtrq->dtrq_rate <= 0 || dtrq->dtrq_rate > hz) + if (dtrq->dtrq_rate != 0) + nsecs = SEC_TO_NSEC(1) / dtrq->dtrq_rate; + else + nsecs = dtrq->dtrq_nsecs; + if (nsecs < USEC_TO_NSEC(200) || nsecs > SEC_TO_NSEC(UINT32_MAX)) return EOPNOTSUPP; CPU_INFO_FOREACH(cii, ci) { @@ -89,7 +93,7 @@ dt_prov_profile_alloc(struct dt_probe *d return ENOMEM; } - dp->dp_nsecs = SEC_TO_NSEC(1) / dtrq->dtrq_rate; + dp->dp_nsecs = nsecs; dp->dp_cpu = ci; dp->dp_filter = dtrq->dtrq_filter; Index: usr.sbin/btrace/bt_parse.y =================================================================== RCS file: /cvs/src/usr.sbin/btrace/bt_parse.y,v diff -u -p -r1.59 bt_parse.y --- usr.sbin/btrace/bt_parse.y 12 Feb 2024 15:11:06 -0000 1.59 +++ usr.sbin/btrace/bt_parse.y 29 Feb 2024 02:01:30 -0000 @@ -62,7 +62,7 @@ struct bt_arg g_maxba = BA_INITIALIZER( struct bt_rule *br_new(struct bt_probe *, struct bt_filter *, struct bt_stmt *); -struct bt_probe *bp_new(const char *, const char *, const char *, int32_t); +struct bt_probe *bp_new(const char *, const char *, const char *, long); struct bt_arg *ba_append(struct bt_arg *, struct bt_arg *); struct bt_arg *ba_op(enum bt_argtype, struct bt_arg *, struct bt_arg *); struct bt_stmt *bs_new(enum bt_action, struct bt_arg *, struct bt_var *); @@ -119,7 +119,7 @@ static int beflag = 0; /* BEGIN/END p %token ERROR ENDFILT %token OP_EQ OP_NE OP_LE OP_LT OP_GE OP_GT OP_LAND OP_LOR /* Builtins */ -%token BUILTIN BEGIN END HZ IF STR +%token BUILTIN BEGIN END IF STR /* Functions and Map operators */ %token F_DELETE F_PRINT %token MFUNC FUNC0 FUNC1 FUNCN OP1 OP2 OP4 MOP0 MOP1 @@ -155,7 +155,7 @@ probe : { pflag = 1; } pname { $$ = $2; ; pname : STRING ':' STRING ':' STRING { $$ = bp_new($1, $3, $5, 0); } - | STRING ':' HZ ':' NUMBER { $$ = bp_new($1, "hz", NULL, $5); } + | STRING ':' STRING ':' NUMBER { $$ = bp_new($1, $3, NULL, $5); } ; mentry : GVAR '[' vargs ']' { $$ = bm_find($1, $3); } @@ -349,18 +349,73 @@ bt_new(struct bt_arg *ba, struct bt_stmt return bs_new(B_AC_TEST, bop, (struct bt_var *)condbs); } + +/* + * interval and profile are pseudo-providers. They don't implement + * function:name probes in the kernel. Instead, the btrace utility + * accepts certain time units as functions. The name is then converted + * from a number to an execution period according to the given unit. + */ +static int +bp_is_pseudo_provider(const char *prov) +{ + return strcmp(prov, "interval") == 0 || strcmp(prov, "profile") == 0; +} + +/* + * interval and profile support the same units. + */ +static void +bp_time_unit_to_nsec(const char *func, long value, uint64_t *nsecs) +{ + static const struct { + const char *name; + enum { UNIT_HZ, UNIT_MS, UNIT_S, UNIT_US } id; + long long max; + } unit[] = { + { .name = "hz", .id = UNIT_HZ, .max = 1000000000LL }, + { .name = "ms", .id = UNIT_MS, .max = LLONG_MAX / 1000000 }, + { .name = "s", .id = UNIT_S, .max = LLONG_MAX / 1000000000 }, + { .name = "us", .id = UNIT_US, .max = LLONG_MAX / 1000 }, + }; + size_t i; + + for (i = 0; i < nitems(unit); i++) { + if (strcmp(unit[i].name, func) == 0) { + if (value < 1) + errx(1, "number is invalid: %ld", value); + if (value > unit[i].max) + errx(1, "number is too large: %ld", value); + switch (unit[i].id) { + case UNIT_HZ: + *nsecs = 1000000000LLU / value; + return; + case UNIT_MS: + *nsecs = value * 1000000LLU; + return; + case UNIT_S: + *nsecs = value * 1000000000LLU; + return; + case UNIT_US: + *nsecs = value * 1000LLU; + return; + default: + abort(); + } + } + } + errx(1, "invalid unit: %s", func); +} + /* Create a new probe */ struct bt_probe * -bp_new(const char *prov, const char *func, const char *name, int32_t rate) +bp_new(const char *prov, const char *func, const char *name, long number) { struct bt_probe *bp; enum bt_ptype ptype; - if (rate < 0 || rate > INT32_MAX) - errx(1, "only positive values permitted"); - if (prov == NULL && func == NULL && name == NULL) - ptype = rate; /* BEGIN or END */ + ptype = number; /* BEGIN or END */ else ptype = B_PT_PROBE; @@ -370,7 +425,8 @@ bp_new(const char *prov, const char *fun bp->bp_prov = prov; bp->bp_func = func; bp->bp_name = name; - bp->bp_rate = rate; + if (prov != NULL && bp_is_pseudo_provider(prov)) + bp_time_unit_to_nsec(func, number, &bp->bp_nsecs); bp->bp_type = ptype; return bp; @@ -716,7 +772,6 @@ lookup(char *s) { "delete", F_DELETE, B_AC_DELETE }, { "exit", FUNC0, B_AC_EXIT }, { "hist", OP1, 0 }, - { "hz", HZ, 0 }, { "if", IF, 0 }, { "kstack", BUILTIN, B_AT_BI_KSTACK }, { "lhist", OP4, 0 }, @@ -1047,14 +1102,10 @@ again: /* * Probe lexer backdoor, interpret the token as a string * rather than a keyword. Otherwise, reserved keywords - * would conflict with syscall names. The exception to - * this is 'hz', which hopefully will never be a - * syscall. + * would conflict with syscall names. */ - if (kwp->token != HZ) { - yylval.v.string = kwp->word; - return STRING; - } + yylval.v.string = kwp->word; + return STRING; } else if (beflag) { /* Interpret tokens in a BEGIN/END context. */ if (kwp->type >= B_AT_BI_ARG0 && Index: usr.sbin/btrace/bt_parser.h =================================================================== RCS file: /cvs/src/usr.sbin/btrace/bt_parser.h,v diff -u -p -r1.25 bt_parser.h --- usr.sbin/btrace/bt_parser.h 12 Oct 2023 15:16:44 -0000 1.25 +++ usr.sbin/btrace/bt_parser.h 29 Feb 2024 02:01:30 -0000 @@ -31,7 +31,7 @@ * * "provider:function:name" * or - * "provider:time_unit:rate" + * "provider:time_unit:number" * * Multiple probes can be associated to the same action. */ @@ -40,7 +40,7 @@ struct bt_probe { const char *bp_prov; /* provider */ const char *bp_func; /* function or time unit */ const char *bp_name; - uint32_t bp_rate; + uint64_t bp_nsecs; #define bp_unit bp_func enum bt_ptype { B_PT_BEGIN = 1, Index: usr.sbin/btrace/btrace.c =================================================================== RCS file: /cvs/src/usr.sbin/btrace/btrace.c,v diff -u -p -r1.89 btrace.c --- usr.sbin/btrace/btrace.c 27 Feb 2024 12:38:12 -0000 1.89 +++ usr.sbin/btrace/btrace.c 29 Feb 2024 02:01:30 -0000 @@ -363,10 +363,10 @@ dtpi_func(struct dtioc_probe_info *dtpi) return syscallnames[idx]; } -int -dtpi_is_unit(const char *unit) +static int +dtpi_is_pseudo_provider(const char *prov) { - return !strncmp("hz", unit, sizeof("hz")); + return strcmp(prov, "interval") == 0 || strcmp(prov, "profile") == 0; } struct dtioc_probe_info * @@ -377,21 +377,27 @@ dtpi_get_by_value(const char *prov, cons dtpi = dt_dtpis; for (i = 0; i < dt_ndtpi; i++, dtpi++) { - if (prov != NULL && - strncmp(prov, dtpi->dtpi_prov, DTNAMESIZE)) - continue; + if (prov != NULL) { + if (strncmp(prov, dtpi->dtpi_prov, DTNAMESIZE)) + continue; - if (func != NULL) { - if (dtpi_is_unit(func)) - return dtpi; + /* + * Pseudo-providers don't have real functions or + * names set by the kernel. Pass the probe back + * to the caller and let them handle it. + */ + if (dtpi_is_pseudo_provider(prov)) + goto match; + } + if (func != NULL) { if (strncmp(func, dtpi_func(dtpi), DTNAMESIZE)) continue; } if (strncmp(name, dtpi->dtpi_name, DTNAMESIZE)) continue; - +match: debug("matched probe %s:%s:%s\n", dtpi->dtpi_prov, dtpi_func(dtpi), dtpi->dtpi_name); return dtpi; @@ -537,7 +543,7 @@ rules_setup(int fd) bp->bp_pbn = dtpi->dtpi_pbn; dtrq->dtrq_pbn = dtpi->dtpi_pbn; - dtrq->dtrq_rate = bp->bp_rate; + dtrq->dtrq_nsecs = bp->bp_nsecs; dtrq->dtrq_evtflags = evtflags; if (dtrq->dtrq_evtflags & DTEVT_KSTACK) dokstack = 1; @@ -2052,9 +2058,9 @@ debug_probe_name(struct bt_probe *bp) assert(bp->bp_type == B_PT_PROBE); - if (bp->bp_rate) { - snprintf(buf, sizeof(buf), "%s:%s:%u", bp->bp_prov, - bp->bp_unit, bp->bp_rate); + if (bp->bp_nsecs) { + snprintf(buf, sizeof(buf), "%s:%s:%llu", bp->bp_prov, + bp->bp_unit, bp->bp_nsecs); } else { snprintf(buf, sizeof(buf), "%s:%s:%s", bp->bp_prov, bp->bp_unit, bp->bp_name);