Download raw body.
stacktrace_save_utrace() should be more robust
Hello,
On Thu, May 22, 2025 at 07:17:07AM +0200, Claudio Jeker wrote:
> On Thu, May 22, 2025 at 05:09:17AM +0200, Alexandr Nedvedicky wrote:
> > Hello,
> >
> > On Wed, May 21, 2025 at 08:15:48PM -0400, Ted Unangst wrote:
> > </snip>
> > >
> > > Is it possible to use two loops here? One loop to get out of the kernel,
> > > and then the second that does the copyin? I think there is not so much
> > > code in common that duplicating is a problem, and it will be more clear
> > > what we're trying to do in each loop.
> > >
> >
> > I like this idea. it makes code lot more clear.
> > updated diff follows.
>
> I never looked into this proper but instead of walking the full kernel
> stack, can't we start with the information in the trapframe?
> Don't remember if there is a pointer to it somewhere in the pcb.
>
I think I've managed to get working proof-of-concept (see diff below).
it works for amd64. However I'm not sure if it is direction we want
to go now. Perhaps later. I've cloned existing stactrace_save_utrace()
which is a current workhorse for all dtrace probes (syscall, profiling
static). I've made a cf_stacktrace_save_utrace() function which takes
a clockframe as additional argument. Using a clockframe (intrframe)
we can reach for userland stack immediately without iterating over
kernel frames like we do in stacktrace_save_utrace(). Similar mechanism
seen in cf_stacktrace_save_utrace() can be used for syscall dt probes
where we can use trapframe instead of clockframe/intrframe.
However I think we still need to get fix for stacktrace_save_utrace() in
because it will remain a workhorse for static probes (given I understand
things right here).
For now I would like to get this trap/intr frame shortcut aside and
get fix for stacktrace_save_utrace(). I like tedu's idea it works
fine for i386/amd64. Frame shortcut for stack tracing can
be revisited later.
thanks and
regards
sashan
--------8<---------------8<---------------8<------------------8<--------
diff --git a/sys/arch/amd64/amd64/db_trace.c b/sys/arch/amd64/amd64/db_trace.c
index 1e7fcf2208a..256080e32e7 100644
--- a/sys/arch/amd64/amd64/db_trace.c
+++ b/sys/arch/amd64/amd64/db_trace.c
@@ -326,6 +326,29 @@ stacktrace_save_utrace(struct stacktrace *st)
curcpu()->ci_inatomic--;
}
+void
+cf_stacktrace_save_utrace(struct stacktrace *st, void *cf)
+{
+ struct clockframe *cframe = (struct clockframe *)cf;
+ struct callframe f, *frame;
+
+ st->st_count = 0;
+
+ if (!CLKF_USERMODE(cframe))
+ return;
+
+ curcpu()->ci_inatomic++;
+ st->st_pc[st->st_count++] = cframe->if_rip;
+ frame = (struct callframe *)cframe->if_rbp;
+ while (st->st_count < STACKTRACE_MAX) {
+ if (copyin(frame, &f, sizeof(f)) != 0)
+ break;
+ st->st_pc[st->st_count++] = f.f_retaddr;
+ frame = f.f_frame;
+ }
+ curcpu()->ci_inatomic--;
+}
+
vaddr_t
db_get_pc(struct trapframe *tf)
{
diff --git a/sys/dev/dt/dt_dev.c b/sys/dev/dt/dt_dev.c
index e7ffa2df160..df50297f84e 100644
--- a/sys/dev/dt/dt_dev.c
+++ b/sys/dev/dt/dt_dev.c
@@ -824,7 +824,7 @@ dt_pcb_ring_skiptick(struct dt_pcb *dp, unsigned int skip)
* Get a reference to the next free event state from the ring.
*/
struct dt_evt *
-dt_pcb_ring_get(struct dt_pcb *dp, int profiling)
+dt_pcb_ring_get(struct dt_pcb *dp, int profiling, void *cf)
{
struct proc *p = curproc;
struct dt_evt *dtev;
@@ -873,8 +873,12 @@ dt_pcb_ring_get(struct dt_pcb *dp, int profiling)
else
stacktrace_save_at(&dtev->dtev_kstack, DT_FA_STATIC);
}
- if (ISSET(dp->dp_evtflags, DTEVT_USTACK))
- stacktrace_save_utrace(&dtev->dtev_ustack);
+ if (ISSET(dp->dp_evtflags, DTEVT_USTACK)) {
+ if (cf != NULL)
+ cf_stacktrace_save_utrace(&dtev->dtev_ustack, cf);
+ else
+ stacktrace_save_utrace(&dtev->dtev_ustack);
+ }
return dtev;
}
diff --git a/sys/dev/dt/dt_prov_kprobe.c b/sys/dev/dt/dt_prov_kprobe.c
index 5de2f8cd99a..ffc7411d10b 100644
--- a/sys/dev/dt/dt_prov_kprobe.c
+++ b/sys/dev/dt/dt_prov_kprobe.c
@@ -341,7 +341,7 @@ dt_prov_kprobe_hook(struct dt_provider *dtpv, ...)
SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
struct dt_evt *dtev;
- dtev = dt_pcb_ring_get(dp, 0);
+ dtev = dt_pcb_ring_get(dp, 0, NULL);
if (dtev == NULL)
continue;
@@ -381,7 +381,7 @@ dt_prov_kprobe_hook(struct dt_provider *dtpv, ...)
SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
struct dt_evt *dtev;
- dtev = dt_pcb_ring_get(dp, 0);
+ dtev = dt_pcb_ring_get(dp, 0, NULL);
if (dtev == NULL)
continue;
diff --git a/sys/dev/dt/dt_prov_profile.c b/sys/dev/dt/dt_prov_profile.c
index 545a20153e5..e3389ba63a4 100644
--- a/sys/dev/dt/dt_prov_profile.c
+++ b/sys/dev/dt/dt_prov_profile.c
@@ -112,7 +112,7 @@ dt_clock(struct clockrequest *cr, void *cf, void *arg)
else if (count > 1)
dt_pcb_ring_skiptick(dp, count - 1);
- dtev = dt_pcb_ring_get(dp, 1);
+ dtev = dt_pcb_ring_get(dp, 1, cf);
if (dtev == NULL)
return;
dt_pcb_ring_consume(dp, dtev);
diff --git a/sys/dev/dt/dt_prov_static.c b/sys/dev/dt/dt_prov_static.c
index 677b2a1ef72..2eb7e8adf6a 100644
--- a/sys/dev/dt/dt_prov_static.c
+++ b/sys/dev/dt/dt_prov_static.c
@@ -219,7 +219,7 @@ dt_prov_static_hook(struct dt_provider *dtpv, ...)
SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
struct dt_evt *dtev;
- dtev = dt_pcb_ring_get(dp, 0);
+ dtev = dt_pcb_ring_get(dp, 0, NULL);
if (dtev == NULL)
continue;
diff --git a/sys/dev/dt/dt_prov_syscall.c b/sys/dev/dt/dt_prov_syscall.c
index 03c561eece8..f5e508a693e 100644
--- a/sys/dev/dt/dt_prov_syscall.c
+++ b/sys/dev/dt/dt_prov_syscall.c
@@ -146,7 +146,8 @@ dt_prov_syscall_entry(struct dt_provider *dtpv, ...)
SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
struct dt_evt *dtev;
- dtev = dt_pcb_ring_get(dp, 0);
+ /* XXX we may pass a traprframe here instead of NULL */
+ dtev = dt_pcb_ring_get(dp, 0, NULL);
if (dtev == NULL)
continue;
@@ -189,7 +190,8 @@ dt_prov_syscall_return(struct dt_provider *dtpv, ...)
SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
struct dt_evt *dtev;
- dtev = dt_pcb_ring_get(dp, 0);
+ /* XXX we can pass a trapframe here instead of NULL */
+ dtev = dt_pcb_ring_get(dp, 0, NULL);
if (dtev == NULL)
continue;
diff --git a/sys/dev/dt/dtvar.h b/sys/dev/dt/dtvar.h
index 77ac3fa05df..f88420ae3b0 100644
--- a/sys/dev/dt/dtvar.h
+++ b/sys/dev/dt/dtvar.h
@@ -199,7 +199,7 @@ void dt_pcb_free(struct dt_pcb *);
void dt_pcb_purge(struct dt_pcb_list *);
void dt_pcb_ring_skiptick(struct dt_pcb *, unsigned int);
-struct dt_evt *dt_pcb_ring_get(struct dt_pcb *, int);
+struct dt_evt *dt_pcb_ring_get(struct dt_pcb *, int, void *);
void dt_pcb_ring_consume(struct dt_pcb *, struct dt_evt *);
/*
diff --git a/sys/sys/stacktrace.h b/sys/sys/stacktrace.h
index 1348b715db2..48edfa900f6 100644
--- a/sys/sys/stacktrace.h
+++ b/sys/sys/stacktrace.h
@@ -30,6 +30,7 @@ struct stacktrace {
void stacktrace_print(struct stacktrace *, int (*)(const char *, ...));
void stacktrace_save_at(struct stacktrace *, unsigned int);
void stacktrace_save_utrace(struct stacktrace *);
+void cf_stacktrace_save_utrace(struct stacktrace *, void *);
static inline void
stacktrace_save(struct stacktrace *st)
stacktrace_save_utrace() should be more robust