Index | Thread | Search

From:
Mark Kettenis <mark.kettenis@xs4all.nl>
Subject:
Re: more parnoia for stacktrace_save_utrace()
To:
Alexandr Nedvedicky <sashan@fastmail.net>
Cc:
tech@openbsd.org
Date:
Sat, 02 Aug 2025 17:12:35 +0200

Download raw body.

Thread
> Date: Sat, 2 Aug 2025 15:28:39 +0200
> From: Alexandr Nedvedicky <sashan@fastmail.net>
> 
> Hello,
> 
> The testing with uprofile.bt script revealed the kernel may crash
> while traversing a kernel stack towards userland to capture the
> stacktrace. The current INKERNEL() check is not sufficient, we
> need to test if frame fits into a kernel stack range.
> 
> For traversal of userland stack we rely on copyin() to fail
> when when it is presented with bogus address. The newly added
> call to uvm_map_lookup_entry() checks the validity of the last
> return address we keep in tracebuffer when copyin() fails
> for the last current frame.
> 
> Perhaps we might want to use uvm_map_lookup_entry() for every
> return address we got to make sure it comes from executable
> memory. However I have no idea how much expensive that might be.
> 
> OK to commit diff below?

Hmm...

> ----8<-------8<-------8<-------8<-------8<------------8<----
> diff --git a/sys/arch/amd64/amd64/db_trace.c b/sys/arch/amd64/amd64/db_trace.c
> index 624d7ed1a73..f4202bf663f 100644
> --- a/sys/arch/amd64/amd64/db_trace.c
> +++ b/sys/arch/amd64/amd64/db_trace.c
> @@ -290,8 +290,12 @@ stacktrace_save_at(struct stacktrace *st, unsigned int skip)
>  void
>  stacktrace_save_utrace(struct stacktrace *st)
>  {
> -	struct callframe f, *frame, *lastframe;
> +	struct callframe f, *frame, *lastframe, *topframe;
>  	struct pcb *pcb = curpcb;
> +	struct process *ps = curproc->p_p;;
> +	struct vm_map_entry *e;
> +	vaddr_t retaddr;
> +	int ok;
>  
>  	st->st_count = 0;
>  
> @@ -300,13 +304,15 @@ stacktrace_save_utrace(struct stacktrace *st)
>  
>  	lastframe = NULL;
>  	frame = __builtin_frame_address(0);
> +	topframe = frame;
>  	KASSERT(INKERNEL(frame));
>  
>  	curcpu()->ci_inatomic++;
>  	/*
>  	 * skip kernel frames
>  	 */
> -	while (frame != NULL && lastframe < frame && INKERNEL(frame)) {
> +	while (frame != NULL && frame >= topframe &&
> +	    frame <= (struct callframe *)pcb->pcb_kstack) {

Why not keep the "lastframe < frame" condition and just replace the
INKERNEL(frame) check with the "frame <= (struct callframe *)pcb->pcb_kstack"
check?  That is stricter since it enforces the stack order.

>  		lastframe = frame;
>  		frame = frame->f_frame;
>  	}
> @@ -318,8 +324,29 @@ stacktrace_save_utrace(struct stacktrace *st)
>  		st->st_pc[st->st_count++] = lastframe->f_retaddr;
>  
>  	while (frame != NULL && st->st_count < STACKTRACE_MAX) {
> -		if (copyin(frame, &f, sizeof(f)) != 0)
> +		if (copyin(frame, &f, sizeof(f)) != 0) {
> +			/*
> +			 * Check sanity of return address in last
> +			 * frame we've saved. If look up fails,
> +			 * then we just discard the last frame from
> +			 * trace buffer.
> +			 */
> +			retaddr = (st->st_count == 0) ?
> +			    st->st_pc[0] : st->st_pc[st->st_count - 1];
> +			vm_map_lock_read(&ps->ps_vmspace->vm_map);
> +			ok = uvm_map_lookup_entry(&ps->ps_vmspace->vm_map,
> +			    retaddr, &e);
> +			if (ok == 0 || (e->etype & UVM_ET_OBJ) == 0 ||

I don't think that UVM_ET_OBJ check makes sense.  I think there are
cases where userland runs code in non-mmapped pages.  For example code
in JIT engines.  But even the edges of shared library text might be
non-mmapped pages.

> +			    (e->protection & PROT_EXEC) == 0) {
> +				if (st->st_count == 0)
> +					st->st_pc[0] = 0;
> +				else
> +					st->st_count--;
> +			}
> +			vm_map_unlock_read(&ps->ps_vmspace->vm_map);
> +
>  			break;
> +		}

I (still?) don't understand what you're trying to fix here.  But what
made you abandon the idea of validating the frames themselves?

>  		st->st_pc[st->st_count++] = f.f_retaddr;
>  		frame = f.f_frame;
>  	}
> diff --git a/sys/arch/i386/i386/db_trace.c b/sys/arch/i386/i386/db_trace.c
> index b7c8b68711a..fd34107e8a9 100644
> --- a/sys/arch/i386/i386/db_trace.c
> +++ b/sys/arch/i386/i386/db_trace.c
> @@ -289,8 +289,12 @@ stacktrace_save_at(struct stacktrace *st, unsigned int skip)
>  void
>  stacktrace_save_utrace(struct stacktrace *st)
>  {
> -	struct callframe f, *frame, *lastframe;
> +	struct callframe f, *frame, *lastframe, *topframe;
>  	struct pcb *pcb = curpcb;
> +	struct process *ps = curproc->p_p;;
> +	struct vm_map_entry *e;
> +	vaddr_t retaddr;
> +	int ok;
>  
>  	st->st_count = 0;
>  
> @@ -299,13 +303,15 @@ stacktrace_save_utrace(struct stacktrace *st)
>  
>  	lastframe = NULL;
>  	frame = __builtin_frame_address(0);
> +	topframe = frame;
>  	KASSERT(INKERNEL(frame));
>  
>  	curcpu()->ci_inatomic++;
>  	/*
>  	 * skip kernel frames
>  	 */
> -	while (frame != NULL && lastframe < frame && INKERNEL(frame)) {
> +	while (frame != NULL && frame >= topframe &&
> +	    frame <= (struct callframe *)pcb->pcb_kstack) {
>  		lastframe = frame;
>  		frame = frame->f_frame;
>  	}
> @@ -317,8 +323,29 @@ stacktrace_save_utrace(struct stacktrace *st)
>  		st->st_pc[st->st_count++] = lastframe->f_retaddr;
>  
>  	while (frame != NULL && st->st_count < STACKTRACE_MAX) {
> -		if (copyin(frame, &f, sizeof(f)) != 0)
> +		if (copyin(frame, &f, sizeof(f)) != 0) {
> +			/*
> +			 * Check sanity of return address in last
> +			 * frame we've saved. If look up fails,
> +			 * then we just discard the last frame from
> +			 * trace buffer.
> +			 */
> +			retaddr = (st->st_count == 0) ?
> +			    st->st_pc[0] : st->st_pc[st->st_count - 1];
> +			vm_map_lock_read(&ps->ps_vmspace->vm_map);
> +			ok = uvm_map_lookup_entry(&ps->ps_vmspace->vm_map,
> +			    retaddr, &e);
> +			if (ok == 0 || (e->etype & UVM_ET_OBJ) == 0 ||
> +			    (e->protection & PROT_EXEC) == 0) {
> +				if (st->st_count == 0)
> +					st->st_pc[0] = 0;
> +				else
> +					st->st_count--;
> +			}
> +			vm_map_unlock_read(&ps->ps_vmspace->vm_map);
> +
>  			break;
> +		}
>  		st->st_pc[st->st_count++] = f.f_retaddr;
>  		frame = f.f_frame;
>  	}
> 
>