From: Alexandr Nedvedicky Subject: Re: stacktrace_save_utrace() should be more robust To: Ted Unangst , Mark Kettenis , tech@openbsd.org Date: Thu, 22 May 2025 11:30:10 +0200 Hello, On Thu, May 22, 2025 at 07:17:07AM +0200, Claudio Jeker wrote: > On Thu, May 22, 2025 at 05:09:17AM +0200, Alexandr Nedvedicky wrote: > > Hello, > > > > On Wed, May 21, 2025 at 08:15:48PM -0400, Ted Unangst wrote: > > > > > > > > Is it possible to use two loops here? One loop to get out of the kernel, > > > and then the second that does the copyin? I think there is not so much > > > code in common that duplicating is a problem, and it will be more clear > > > what we're trying to do in each loop. > > > > > > > I like this idea. it makes code lot more clear. > > updated diff follows. > > I never looked into this proper but instead of walking the full kernel > stack, can't we start with the information in the trapframe? > Don't remember if there is a pointer to it somewhere in the pcb. > I think I've managed to get working proof-of-concept (see diff below). it works for amd64. However I'm not sure if it is direction we want to go now. Perhaps later. I've cloned existing stactrace_save_utrace() which is a current workhorse for all dtrace probes (syscall, profiling static). I've made a cf_stacktrace_save_utrace() function which takes a clockframe as additional argument. Using a clockframe (intrframe) we can reach for userland stack immediately without iterating over kernel frames like we do in stacktrace_save_utrace(). Similar mechanism seen in cf_stacktrace_save_utrace() can be used for syscall dt probes where we can use trapframe instead of clockframe/intrframe. However I think we still need to get fix for stacktrace_save_utrace() in because it will remain a workhorse for static probes (given I understand things right here). For now I would like to get this trap/intr frame shortcut aside and get fix for stacktrace_save_utrace(). I like tedu's idea it works fine for i386/amd64. Frame shortcut for stack tracing can be revisited later. thanks and regards sashan --------8<---------------8<---------------8<------------------8<-------- diff --git a/sys/arch/amd64/amd64/db_trace.c b/sys/arch/amd64/amd64/db_trace.c index 1e7fcf2208a..256080e32e7 100644 --- a/sys/arch/amd64/amd64/db_trace.c +++ b/sys/arch/amd64/amd64/db_trace.c @@ -326,6 +326,29 @@ stacktrace_save_utrace(struct stacktrace *st) curcpu()->ci_inatomic--; } +void +cf_stacktrace_save_utrace(struct stacktrace *st, void *cf) +{ + struct clockframe *cframe = (struct clockframe *)cf; + struct callframe f, *frame; + + st->st_count = 0; + + if (!CLKF_USERMODE(cframe)) + return; + + curcpu()->ci_inatomic++; + st->st_pc[st->st_count++] = cframe->if_rip; + frame = (struct callframe *)cframe->if_rbp; + while (st->st_count < STACKTRACE_MAX) { + if (copyin(frame, &f, sizeof(f)) != 0) + break; + st->st_pc[st->st_count++] = f.f_retaddr; + frame = f.f_frame; + } + curcpu()->ci_inatomic--; +} + vaddr_t db_get_pc(struct trapframe *tf) { diff --git a/sys/dev/dt/dt_dev.c b/sys/dev/dt/dt_dev.c index e7ffa2df160..df50297f84e 100644 --- a/sys/dev/dt/dt_dev.c +++ b/sys/dev/dt/dt_dev.c @@ -824,7 +824,7 @@ dt_pcb_ring_skiptick(struct dt_pcb *dp, unsigned int skip) * Get a reference to the next free event state from the ring. */ struct dt_evt * -dt_pcb_ring_get(struct dt_pcb *dp, int profiling) +dt_pcb_ring_get(struct dt_pcb *dp, int profiling, void *cf) { struct proc *p = curproc; struct dt_evt *dtev; @@ -873,8 +873,12 @@ dt_pcb_ring_get(struct dt_pcb *dp, int profiling) else stacktrace_save_at(&dtev->dtev_kstack, DT_FA_STATIC); } - if (ISSET(dp->dp_evtflags, DTEVT_USTACK)) - stacktrace_save_utrace(&dtev->dtev_ustack); + if (ISSET(dp->dp_evtflags, DTEVT_USTACK)) { + if (cf != NULL) + cf_stacktrace_save_utrace(&dtev->dtev_ustack, cf); + else + stacktrace_save_utrace(&dtev->dtev_ustack); + } return dtev; } diff --git a/sys/dev/dt/dt_prov_kprobe.c b/sys/dev/dt/dt_prov_kprobe.c index 5de2f8cd99a..ffc7411d10b 100644 --- a/sys/dev/dt/dt_prov_kprobe.c +++ b/sys/dev/dt/dt_prov_kprobe.c @@ -341,7 +341,7 @@ dt_prov_kprobe_hook(struct dt_provider *dtpv, ...) SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) { struct dt_evt *dtev; - dtev = dt_pcb_ring_get(dp, 0); + dtev = dt_pcb_ring_get(dp, 0, NULL); if (dtev == NULL) continue; @@ -381,7 +381,7 @@ dt_prov_kprobe_hook(struct dt_provider *dtpv, ...) SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) { struct dt_evt *dtev; - dtev = dt_pcb_ring_get(dp, 0); + dtev = dt_pcb_ring_get(dp, 0, NULL); if (dtev == NULL) continue; diff --git a/sys/dev/dt/dt_prov_profile.c b/sys/dev/dt/dt_prov_profile.c index 545a20153e5..e3389ba63a4 100644 --- a/sys/dev/dt/dt_prov_profile.c +++ b/sys/dev/dt/dt_prov_profile.c @@ -112,7 +112,7 @@ dt_clock(struct clockrequest *cr, void *cf, void *arg) else if (count > 1) dt_pcb_ring_skiptick(dp, count - 1); - dtev = dt_pcb_ring_get(dp, 1); + dtev = dt_pcb_ring_get(dp, 1, cf); if (dtev == NULL) return; dt_pcb_ring_consume(dp, dtev); diff --git a/sys/dev/dt/dt_prov_static.c b/sys/dev/dt/dt_prov_static.c index 677b2a1ef72..2eb7e8adf6a 100644 --- a/sys/dev/dt/dt_prov_static.c +++ b/sys/dev/dt/dt_prov_static.c @@ -219,7 +219,7 @@ dt_prov_static_hook(struct dt_provider *dtpv, ...) SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) { struct dt_evt *dtev; - dtev = dt_pcb_ring_get(dp, 0); + dtev = dt_pcb_ring_get(dp, 0, NULL); if (dtev == NULL) continue; diff --git a/sys/dev/dt/dt_prov_syscall.c b/sys/dev/dt/dt_prov_syscall.c index 03c561eece8..f5e508a693e 100644 --- a/sys/dev/dt/dt_prov_syscall.c +++ b/sys/dev/dt/dt_prov_syscall.c @@ -146,7 +146,8 @@ dt_prov_syscall_entry(struct dt_provider *dtpv, ...) SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) { struct dt_evt *dtev; - dtev = dt_pcb_ring_get(dp, 0); + /* XXX we may pass a traprframe here instead of NULL */ + dtev = dt_pcb_ring_get(dp, 0, NULL); if (dtev == NULL) continue; @@ -189,7 +190,8 @@ dt_prov_syscall_return(struct dt_provider *dtpv, ...) SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) { struct dt_evt *dtev; - dtev = dt_pcb_ring_get(dp, 0); + /* XXX we can pass a trapframe here instead of NULL */ + dtev = dt_pcb_ring_get(dp, 0, NULL); if (dtev == NULL) continue; diff --git a/sys/dev/dt/dtvar.h b/sys/dev/dt/dtvar.h index 77ac3fa05df..f88420ae3b0 100644 --- a/sys/dev/dt/dtvar.h +++ b/sys/dev/dt/dtvar.h @@ -199,7 +199,7 @@ void dt_pcb_free(struct dt_pcb *); void dt_pcb_purge(struct dt_pcb_list *); void dt_pcb_ring_skiptick(struct dt_pcb *, unsigned int); -struct dt_evt *dt_pcb_ring_get(struct dt_pcb *, int); +struct dt_evt *dt_pcb_ring_get(struct dt_pcb *, int, void *); void dt_pcb_ring_consume(struct dt_pcb *, struct dt_evt *); /* diff --git a/sys/sys/stacktrace.h b/sys/sys/stacktrace.h index 1348b715db2..48edfa900f6 100644 --- a/sys/sys/stacktrace.h +++ b/sys/sys/stacktrace.h @@ -30,6 +30,7 @@ struct stacktrace { void stacktrace_print(struct stacktrace *, int (*)(const char *, ...)); void stacktrace_save_at(struct stacktrace *, unsigned int); void stacktrace_save_utrace(struct stacktrace *); +void cf_stacktrace_save_utrace(struct stacktrace *, void *); static inline void stacktrace_save(struct stacktrace *st)