From: Mark Kettenis Subject: Re: more parnoia for stacktrace_save_utrace() To: Alexandr Nedvedicky Cc: tech@openbsd.org Date: Sat, 02 Aug 2025 17:12:35 +0200 > Date: Sat, 2 Aug 2025 15:28:39 +0200 > From: Alexandr Nedvedicky > > Hello, > > The testing with uprofile.bt script revealed the kernel may crash > while traversing a kernel stack towards userland to capture the > stacktrace. The current INKERNEL() check is not sufficient, we > need to test if frame fits into a kernel stack range. > > For traversal of userland stack we rely on copyin() to fail > when when it is presented with bogus address. The newly added > call to uvm_map_lookup_entry() checks the validity of the last > return address we keep in tracebuffer when copyin() fails > for the last current frame. > > Perhaps we might want to use uvm_map_lookup_entry() for every > return address we got to make sure it comes from executable > memory. However I have no idea how much expensive that might be. > > OK to commit diff below? Hmm... > ----8<-------8<-------8<-------8<-------8<------------8<---- > diff --git a/sys/arch/amd64/amd64/db_trace.c b/sys/arch/amd64/amd64/db_trace.c > index 624d7ed1a73..f4202bf663f 100644 > --- a/sys/arch/amd64/amd64/db_trace.c > +++ b/sys/arch/amd64/amd64/db_trace.c > @@ -290,8 +290,12 @@ stacktrace_save_at(struct stacktrace *st, unsigned int skip) > void > stacktrace_save_utrace(struct stacktrace *st) > { > - struct callframe f, *frame, *lastframe; > + struct callframe f, *frame, *lastframe, *topframe; > struct pcb *pcb = curpcb; > + struct process *ps = curproc->p_p;; > + struct vm_map_entry *e; > + vaddr_t retaddr; > + int ok; > > st->st_count = 0; > > @@ -300,13 +304,15 @@ stacktrace_save_utrace(struct stacktrace *st) > > lastframe = NULL; > frame = __builtin_frame_address(0); > + topframe = frame; > KASSERT(INKERNEL(frame)); > > curcpu()->ci_inatomic++; > /* > * skip kernel frames > */ > - while (frame != NULL && lastframe < frame && INKERNEL(frame)) { > + while (frame != NULL && frame >= topframe && > + frame <= (struct callframe *)pcb->pcb_kstack) { Why not keep the "lastframe < frame" condition and just replace the INKERNEL(frame) check with the "frame <= (struct callframe *)pcb->pcb_kstack" check? That is stricter since it enforces the stack order. > lastframe = frame; > frame = frame->f_frame; > } > @@ -318,8 +324,29 @@ stacktrace_save_utrace(struct stacktrace *st) > st->st_pc[st->st_count++] = lastframe->f_retaddr; > > while (frame != NULL && st->st_count < STACKTRACE_MAX) { > - if (copyin(frame, &f, sizeof(f)) != 0) > + if (copyin(frame, &f, sizeof(f)) != 0) { > + /* > + * Check sanity of return address in last > + * frame we've saved. If look up fails, > + * then we just discard the last frame from > + * trace buffer. > + */ > + retaddr = (st->st_count == 0) ? > + st->st_pc[0] : st->st_pc[st->st_count - 1]; > + vm_map_lock_read(&ps->ps_vmspace->vm_map); > + ok = uvm_map_lookup_entry(&ps->ps_vmspace->vm_map, > + retaddr, &e); > + if (ok == 0 || (e->etype & UVM_ET_OBJ) == 0 || I don't think that UVM_ET_OBJ check makes sense. I think there are cases where userland runs code in non-mmapped pages. For example code in JIT engines. But even the edges of shared library text might be non-mmapped pages. > + (e->protection & PROT_EXEC) == 0) { > + if (st->st_count == 0) > + st->st_pc[0] = 0; > + else > + st->st_count--; > + } > + vm_map_unlock_read(&ps->ps_vmspace->vm_map); > + > break; > + } I (still?) don't understand what you're trying to fix here. But what made you abandon the idea of validating the frames themselves? > st->st_pc[st->st_count++] = f.f_retaddr; > frame = f.f_frame; > } > diff --git a/sys/arch/i386/i386/db_trace.c b/sys/arch/i386/i386/db_trace.c > index b7c8b68711a..fd34107e8a9 100644 > --- a/sys/arch/i386/i386/db_trace.c > +++ b/sys/arch/i386/i386/db_trace.c > @@ -289,8 +289,12 @@ stacktrace_save_at(struct stacktrace *st, unsigned int skip) > void > stacktrace_save_utrace(struct stacktrace *st) > { > - struct callframe f, *frame, *lastframe; > + struct callframe f, *frame, *lastframe, *topframe; > struct pcb *pcb = curpcb; > + struct process *ps = curproc->p_p;; > + struct vm_map_entry *e; > + vaddr_t retaddr; > + int ok; > > st->st_count = 0; > > @@ -299,13 +303,15 @@ stacktrace_save_utrace(struct stacktrace *st) > > lastframe = NULL; > frame = __builtin_frame_address(0); > + topframe = frame; > KASSERT(INKERNEL(frame)); > > curcpu()->ci_inatomic++; > /* > * skip kernel frames > */ > - while (frame != NULL && lastframe < frame && INKERNEL(frame)) { > + while (frame != NULL && frame >= topframe && > + frame <= (struct callframe *)pcb->pcb_kstack) { > lastframe = frame; > frame = frame->f_frame; > } > @@ -317,8 +323,29 @@ stacktrace_save_utrace(struct stacktrace *st) > st->st_pc[st->st_count++] = lastframe->f_retaddr; > > while (frame != NULL && st->st_count < STACKTRACE_MAX) { > - if (copyin(frame, &f, sizeof(f)) != 0) > + if (copyin(frame, &f, sizeof(f)) != 0) { > + /* > + * Check sanity of return address in last > + * frame we've saved. If look up fails, > + * then we just discard the last frame from > + * trace buffer. > + */ > + retaddr = (st->st_count == 0) ? > + st->st_pc[0] : st->st_pc[st->st_count - 1]; > + vm_map_lock_read(&ps->ps_vmspace->vm_map); > + ok = uvm_map_lookup_entry(&ps->ps_vmspace->vm_map, > + retaddr, &e); > + if (ok == 0 || (e->etype & UVM_ET_OBJ) == 0 || > + (e->protection & PROT_EXEC) == 0) { > + if (st->st_count == 0) > + st->st_pc[0] = 0; > + else > + st->st_count--; > + } > + vm_map_unlock_read(&ps->ps_vmspace->vm_map); > + > break; > + } > st->st_pc[st->st_count++] = f.f_retaddr; > frame = f.f_frame; > } > >