Index | Thread | Search

From:
Alexandr Nedvedicky <sashan@fastmail.net>
Subject:
Re: stacktrace_save_utrace() should be more robust
To:
Ted Unangst <tedu@tedunangst.com>, Mark Kettenis <mark.kettenis@xs4all.nl>, tech@openbsd.org
Date:
Thu, 22 May 2025 11:30:10 +0200

Download raw body.

Thread
Hello,

On Thu, May 22, 2025 at 07:17:07AM +0200, Claudio Jeker wrote:
> On Thu, May 22, 2025 at 05:09:17AM +0200, Alexandr Nedvedicky wrote:
> > Hello,
> > 
> > On Wed, May 21, 2025 at 08:15:48PM -0400, Ted Unangst wrote:
> > </snip>
> > > 
> > > Is it possible to use two loops here? One loop to get out of the kernel,
> > > and then the second that does the copyin? I think there is not so much
> > > code in common that duplicating is a problem, and it will be more clear
> > > what we're trying to do in each loop.
> > > 
> > 
> >     I like this idea. it makes code lot more clear.
> >     updated diff follows.
> 
> I never looked into this proper but instead of walking the full kernel
> stack, can't we start with the information in the trapframe?
> Don't remember if there is a pointer to it somewhere in the pcb.
>  

    I think I've managed to get working proof-of-concept (see diff below).
    it works for amd64. However I'm not sure if it is direction we want
    to go now. Perhaps later. I've cloned existing stactrace_save_utrace()
    which is a current workhorse for all dtrace probes (syscall, profiling
    static). I've made a cf_stacktrace_save_utrace() function which takes
    a clockframe as additional argument. Using a clockframe (intrframe)
    we can reach for userland stack immediately without iterating over
    kernel frames like we do in stacktrace_save_utrace(). Similar mechanism
    seen in cf_stacktrace_save_utrace() can be used for syscall dt probes
    where we can use trapframe instead of clockframe/intrframe.

    However I think we still need to get fix for stacktrace_save_utrace() in 
    because it will remain a workhorse for static probes (given I understand
    things right here).

For now I would like to get this trap/intr frame shortcut aside and
get fix for stacktrace_save_utrace(). I like tedu's idea it works
fine for i386/amd64. Frame shortcut for stack tracing can
be revisited later.

thanks and
regards
sashan


--------8<---------------8<---------------8<------------------8<--------
diff --git a/sys/arch/amd64/amd64/db_trace.c b/sys/arch/amd64/amd64/db_trace.c
index 1e7fcf2208a..256080e32e7 100644
--- a/sys/arch/amd64/amd64/db_trace.c
+++ b/sys/arch/amd64/amd64/db_trace.c
@@ -326,6 +326,29 @@ stacktrace_save_utrace(struct stacktrace *st)
 	curcpu()->ci_inatomic--;
 }
 
+void
+cf_stacktrace_save_utrace(struct stacktrace *st, void *cf)
+{
+	struct clockframe *cframe = (struct clockframe *)cf;
+	struct callframe f, *frame;
+
+	st->st_count = 0;
+
+	if (!CLKF_USERMODE(cframe))
+		return;
+
+	curcpu()->ci_inatomic++;
+	st->st_pc[st->st_count++] = cframe->if_rip;
+	frame = (struct callframe *)cframe->if_rbp;
+	while (st->st_count < STACKTRACE_MAX) {
+		if (copyin(frame, &f, sizeof(f)) != 0)
+			break;
+		st->st_pc[st->st_count++] = f.f_retaddr;
+		frame = f.f_frame;
+	}
+	curcpu()->ci_inatomic--;
+}
+
 vaddr_t
 db_get_pc(struct trapframe *tf)
 {
diff --git a/sys/dev/dt/dt_dev.c b/sys/dev/dt/dt_dev.c
index e7ffa2df160..df50297f84e 100644
--- a/sys/dev/dt/dt_dev.c
+++ b/sys/dev/dt/dt_dev.c
@@ -824,7 +824,7 @@ dt_pcb_ring_skiptick(struct dt_pcb *dp, unsigned int skip)
  * Get a reference to the next free event state from the ring.
  */
 struct dt_evt *
-dt_pcb_ring_get(struct dt_pcb *dp, int profiling)
+dt_pcb_ring_get(struct dt_pcb *dp, int profiling, void *cf)
 {
 	struct proc *p = curproc;
 	struct dt_evt *dtev;
@@ -873,8 +873,12 @@ dt_pcb_ring_get(struct dt_pcb *dp, int profiling)
 		else
 			stacktrace_save_at(&dtev->dtev_kstack, DT_FA_STATIC);
 	}
-	if (ISSET(dp->dp_evtflags, DTEVT_USTACK))
-		stacktrace_save_utrace(&dtev->dtev_ustack);
+	if (ISSET(dp->dp_evtflags, DTEVT_USTACK)) {
+		if (cf != NULL)
+			cf_stacktrace_save_utrace(&dtev->dtev_ustack, cf);
+		else
+			stacktrace_save_utrace(&dtev->dtev_ustack);
+	}
 
 	return dtev;
 }
diff --git a/sys/dev/dt/dt_prov_kprobe.c b/sys/dev/dt/dt_prov_kprobe.c
index 5de2f8cd99a..ffc7411d10b 100644
--- a/sys/dev/dt/dt_prov_kprobe.c
+++ b/sys/dev/dt/dt_prov_kprobe.c
@@ -341,7 +341,7 @@ dt_prov_kprobe_hook(struct dt_provider *dtpv, ...)
 		SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
 			struct dt_evt *dtev;
 
-			dtev = dt_pcb_ring_get(dp, 0);
+			dtev = dt_pcb_ring_get(dp, 0, NULL);
 			if (dtev == NULL)
 				continue;
 
@@ -381,7 +381,7 @@ dt_prov_kprobe_hook(struct dt_provider *dtpv, ...)
 		SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
 			struct dt_evt *dtev;
 
-			dtev = dt_pcb_ring_get(dp, 0);
+			dtev = dt_pcb_ring_get(dp, 0, NULL);
 			if (dtev == NULL)
 				continue;
 
diff --git a/sys/dev/dt/dt_prov_profile.c b/sys/dev/dt/dt_prov_profile.c
index 545a20153e5..e3389ba63a4 100644
--- a/sys/dev/dt/dt_prov_profile.c
+++ b/sys/dev/dt/dt_prov_profile.c
@@ -112,7 +112,7 @@ dt_clock(struct clockrequest *cr, void *cf, void *arg)
 	else if (count > 1)
 		dt_pcb_ring_skiptick(dp, count - 1);
 
-	dtev = dt_pcb_ring_get(dp, 1);
+	dtev = dt_pcb_ring_get(dp, 1, cf);
 	if (dtev == NULL)
 		return;
 	dt_pcb_ring_consume(dp, dtev);
diff --git a/sys/dev/dt/dt_prov_static.c b/sys/dev/dt/dt_prov_static.c
index 677b2a1ef72..2eb7e8adf6a 100644
--- a/sys/dev/dt/dt_prov_static.c
+++ b/sys/dev/dt/dt_prov_static.c
@@ -219,7 +219,7 @@ dt_prov_static_hook(struct dt_provider *dtpv, ...)
 	SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
 		struct dt_evt *dtev;
 
-		dtev = dt_pcb_ring_get(dp, 0);
+		dtev = dt_pcb_ring_get(dp, 0, NULL);
 		if (dtev == NULL)
 			continue;
 
diff --git a/sys/dev/dt/dt_prov_syscall.c b/sys/dev/dt/dt_prov_syscall.c
index 03c561eece8..f5e508a693e 100644
--- a/sys/dev/dt/dt_prov_syscall.c
+++ b/sys/dev/dt/dt_prov_syscall.c
@@ -146,7 +146,8 @@ dt_prov_syscall_entry(struct dt_provider *dtpv, ...)
 	SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
 		struct dt_evt *dtev;
 
-		dtev = dt_pcb_ring_get(dp, 0);
+		/* XXX we may pass a traprframe here instead of NULL */
+		dtev = dt_pcb_ring_get(dp, 0, NULL);
 		if (dtev == NULL)
 			continue;
 
@@ -189,7 +190,8 @@ dt_prov_syscall_return(struct dt_provider *dtpv, ...)
 	SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
 		struct dt_evt *dtev;
 
-		dtev = dt_pcb_ring_get(dp, 0);
+		/* XXX we can pass a trapframe here instead of NULL */
+		dtev = dt_pcb_ring_get(dp, 0, NULL);
 		if (dtev == NULL)
 			continue;
 
diff --git a/sys/dev/dt/dtvar.h b/sys/dev/dt/dtvar.h
index 77ac3fa05df..f88420ae3b0 100644
--- a/sys/dev/dt/dtvar.h
+++ b/sys/dev/dt/dtvar.h
@@ -199,7 +199,7 @@ void		 dt_pcb_free(struct dt_pcb *);
 void		 dt_pcb_purge(struct dt_pcb_list *);
 
 void		 dt_pcb_ring_skiptick(struct dt_pcb *, unsigned int);
-struct dt_evt	*dt_pcb_ring_get(struct dt_pcb *, int);
+struct dt_evt	*dt_pcb_ring_get(struct dt_pcb *, int, void *);
 void		 dt_pcb_ring_consume(struct dt_pcb *, struct dt_evt *);
 
 /*
diff --git a/sys/sys/stacktrace.h b/sys/sys/stacktrace.h
index 1348b715db2..48edfa900f6 100644
--- a/sys/sys/stacktrace.h
+++ b/sys/sys/stacktrace.h
@@ -30,6 +30,7 @@ struct stacktrace {
 void	stacktrace_print(struct stacktrace *, int (*)(const char *, ...));
 void	stacktrace_save_at(struct stacktrace *, unsigned int);
 void	stacktrace_save_utrace(struct stacktrace *);
+void	cf_stacktrace_save_utrace(struct stacktrace *, void *);
 
 static inline void
 stacktrace_save(struct stacktrace *st)