Download raw body.
more parnoia for stacktrace_save_utrace()
> Date: Sat, 2 Aug 2025 15:28:39 +0200
> From: Alexandr Nedvedicky <sashan@fastmail.net>
>
> Hello,
>
> The testing with uprofile.bt script revealed the kernel may crash
> while traversing a kernel stack towards userland to capture the
> stacktrace. The current INKERNEL() check is not sufficient, we
> need to test if frame fits into a kernel stack range.
>
> For traversal of userland stack we rely on copyin() to fail
> when when it is presented with bogus address. The newly added
> call to uvm_map_lookup_entry() checks the validity of the last
> return address we keep in tracebuffer when copyin() fails
> for the last current frame.
>
> Perhaps we might want to use uvm_map_lookup_entry() for every
> return address we got to make sure it comes from executable
> memory. However I have no idea how much expensive that might be.
>
> OK to commit diff below?
Hmm...
> ----8<-------8<-------8<-------8<-------8<------------8<----
> diff --git a/sys/arch/amd64/amd64/db_trace.c b/sys/arch/amd64/amd64/db_trace.c
> index 624d7ed1a73..f4202bf663f 100644
> --- a/sys/arch/amd64/amd64/db_trace.c
> +++ b/sys/arch/amd64/amd64/db_trace.c
> @@ -290,8 +290,12 @@ stacktrace_save_at(struct stacktrace *st, unsigned int skip)
> void
> stacktrace_save_utrace(struct stacktrace *st)
> {
> - struct callframe f, *frame, *lastframe;
> + struct callframe f, *frame, *lastframe, *topframe;
> struct pcb *pcb = curpcb;
> + struct process *ps = curproc->p_p;;
> + struct vm_map_entry *e;
> + vaddr_t retaddr;
> + int ok;
>
> st->st_count = 0;
>
> @@ -300,13 +304,15 @@ stacktrace_save_utrace(struct stacktrace *st)
>
> lastframe = NULL;
> frame = __builtin_frame_address(0);
> + topframe = frame;
> KASSERT(INKERNEL(frame));
>
> curcpu()->ci_inatomic++;
> /*
> * skip kernel frames
> */
> - while (frame != NULL && lastframe < frame && INKERNEL(frame)) {
> + while (frame != NULL && frame >= topframe &&
> + frame <= (struct callframe *)pcb->pcb_kstack) {
Why not keep the "lastframe < frame" condition and just replace the
INKERNEL(frame) check with the "frame <= (struct callframe *)pcb->pcb_kstack"
check? That is stricter since it enforces the stack order.
> lastframe = frame;
> frame = frame->f_frame;
> }
> @@ -318,8 +324,29 @@ stacktrace_save_utrace(struct stacktrace *st)
> st->st_pc[st->st_count++] = lastframe->f_retaddr;
>
> while (frame != NULL && st->st_count < STACKTRACE_MAX) {
> - if (copyin(frame, &f, sizeof(f)) != 0)
> + if (copyin(frame, &f, sizeof(f)) != 0) {
> + /*
> + * Check sanity of return address in last
> + * frame we've saved. If look up fails,
> + * then we just discard the last frame from
> + * trace buffer.
> + */
> + retaddr = (st->st_count == 0) ?
> + st->st_pc[0] : st->st_pc[st->st_count - 1];
> + vm_map_lock_read(&ps->ps_vmspace->vm_map);
> + ok = uvm_map_lookup_entry(&ps->ps_vmspace->vm_map,
> + retaddr, &e);
> + if (ok == 0 || (e->etype & UVM_ET_OBJ) == 0 ||
I don't think that UVM_ET_OBJ check makes sense. I think there are
cases where userland runs code in non-mmapped pages. For example code
in JIT engines. But even the edges of shared library text might be
non-mmapped pages.
> + (e->protection & PROT_EXEC) == 0) {
> + if (st->st_count == 0)
> + st->st_pc[0] = 0;
> + else
> + st->st_count--;
> + }
> + vm_map_unlock_read(&ps->ps_vmspace->vm_map);
> +
> break;
> + }
I (still?) don't understand what you're trying to fix here. But what
made you abandon the idea of validating the frames themselves?
> st->st_pc[st->st_count++] = f.f_retaddr;
> frame = f.f_frame;
> }
> diff --git a/sys/arch/i386/i386/db_trace.c b/sys/arch/i386/i386/db_trace.c
> index b7c8b68711a..fd34107e8a9 100644
> --- a/sys/arch/i386/i386/db_trace.c
> +++ b/sys/arch/i386/i386/db_trace.c
> @@ -289,8 +289,12 @@ stacktrace_save_at(struct stacktrace *st, unsigned int skip)
> void
> stacktrace_save_utrace(struct stacktrace *st)
> {
> - struct callframe f, *frame, *lastframe;
> + struct callframe f, *frame, *lastframe, *topframe;
> struct pcb *pcb = curpcb;
> + struct process *ps = curproc->p_p;;
> + struct vm_map_entry *e;
> + vaddr_t retaddr;
> + int ok;
>
> st->st_count = 0;
>
> @@ -299,13 +303,15 @@ stacktrace_save_utrace(struct stacktrace *st)
>
> lastframe = NULL;
> frame = __builtin_frame_address(0);
> + topframe = frame;
> KASSERT(INKERNEL(frame));
>
> curcpu()->ci_inatomic++;
> /*
> * skip kernel frames
> */
> - while (frame != NULL && lastframe < frame && INKERNEL(frame)) {
> + while (frame != NULL && frame >= topframe &&
> + frame <= (struct callframe *)pcb->pcb_kstack) {
> lastframe = frame;
> frame = frame->f_frame;
> }
> @@ -317,8 +323,29 @@ stacktrace_save_utrace(struct stacktrace *st)
> st->st_pc[st->st_count++] = lastframe->f_retaddr;
>
> while (frame != NULL && st->st_count < STACKTRACE_MAX) {
> - if (copyin(frame, &f, sizeof(f)) != 0)
> + if (copyin(frame, &f, sizeof(f)) != 0) {
> + /*
> + * Check sanity of return address in last
> + * frame we've saved. If look up fails,
> + * then we just discard the last frame from
> + * trace buffer.
> + */
> + retaddr = (st->st_count == 0) ?
> + st->st_pc[0] : st->st_pc[st->st_count - 1];
> + vm_map_lock_read(&ps->ps_vmspace->vm_map);
> + ok = uvm_map_lookup_entry(&ps->ps_vmspace->vm_map,
> + retaddr, &e);
> + if (ok == 0 || (e->etype & UVM_ET_OBJ) == 0 ||
> + (e->protection & PROT_EXEC) == 0) {
> + if (st->st_count == 0)
> + st->st_pc[0] = 0;
> + else
> + st->st_count--;
> + }
> + vm_map_unlock_read(&ps->ps_vmspace->vm_map);
> +
> break;
> + }
> st->st_pc[st->st_count++] = f.f_retaddr;
> frame = f.f_frame;
> }
>
>
more parnoia for stacktrace_save_utrace()