Index | Thread | Search

From:
Claudio Jeker <cjeker@diehard.n-r-g.com>
Subject:
Re: patch: relax ni_pledge panic
To:
Theo de Raadt <deraadt@openbsd.org>
Cc:
Mark Kettenis <mark.kettenis@xs4all.nl>, semarie@kapouay.eu.org, tech@openbsd.org
Date:
Fri, 7 Feb 2025 17:53:44 +0100

Download raw body.

Thread
  • Theo de Raadt:

    patch: relax ni_pledge panic

  • On Fri, Feb 07, 2025 at 07:46:19AM -0700, Theo de Raadt wrote:
    > This makes a per-thread copy p_pledge of p_p->ps_pledge as we enter
    > each system call, and changes all the pledge checks to occur against
    > this per-thread variable.
    > 
    > This means any system call that does multiple sleeps, will honour the
    > pledge promises at invocation time.  Something expensive like open(), will
    > see a consistant value.
    > 
    > I also propose we change unveil() to force single_thread_set interlock.
    > In the diff, there's two parts to this -- adding unveils, and deleting
    > the entire unveil setting when pledge drops all filesystem access.
    
    I like this, if this goes in many of those READ_ONCE wrappers can die
    since p_pledge can be accessed without fear of concurrent changes.
    
    I need to look closer at the single_thread_set calls. 
    The one in sys_pledge looks ok since the call is at the tail end of the
    syscall. The sys_unveil one I need to double check when I have time.
    
    The problem with single_thread_set is that it can call exit1 or sleep
    which must happen in a spot that is safe.
    
    > Index: sys/sys/proc.h
    > ===================================================================
    > RCS file: /cvs/src/sys/sys/proc.h,v
    > diff -u -p -u -r1.378 proc.h
    > --- sys/sys/proc.h	25 Jan 2025 19:21:40 -0000	1.378
    > +++ sys/sys/proc.h	7 Feb 2025 14:12:00 -0000
    > @@ -395,6 +395,7 @@ struct proc {
    >  	u_char	p_usrpri;	/* [S] Priority based on p_estcpu & ps_nice */
    >  	u_int	p_estcpu;		/* [S] Time averaged val of p_cpticks */
    >  	int	p_pledge_syscall;	/* Cache of current syscall */
    > +	uint64_t p_pledge;		/* [o] copy of p_p->ps_pledge */
    >  
    >  	struct	ucred *p_ucred;		/* [o] cached credentials */
    >  	struct	sigaltstack p_sigstk;	/* sp & on stack state variable */
    > Index: sys/kern/kern_event.c
    > ===================================================================
    > RCS file: /cvs/src/sys/kern/kern_event.c,v
    > diff -u -p -u -r1.200 kern_event.c
    > --- sys/kern/kern_event.c	6 Aug 2024 08:44:54 -0000	1.200
    > +++ sys/kern/kern_event.c	6 Feb 2025 18:52:15 -0000
    > @@ -343,7 +343,7 @@ filt_procattach(struct knote *kn)
    >  	int nolock;
    >  
    >  	if ((curproc->p_p->ps_flags & PS_PLEDGE) &&
    > -	    (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0)
    > +	    (curproc->p_pledge & PLEDGE_PROC) == 0)
    >  		return pledge_fail(curproc, EPERM, PLEDGE_PROC);
    >  
    >  	if (kn->kn_id > PID_MAX)
    > Index: sys/kern/kern_pledge.c
    > ===================================================================
    > RCS file: /cvs/src/sys/kern/kern_pledge.c,v
    > diff -u -p -u -r1.321 kern_pledge.c
    > --- sys/kern/kern_pledge.c	6 Oct 2024 23:39:24 -0000	1.321
    > +++ sys/kern/kern_pledge.c	7 Feb 2025 14:32:01 -0000
    > @@ -451,7 +451,7 @@ sys_pledge(struct proc *p, void *v, regi
    >  	} */	*uap = v;
    >  	struct process *pr = p->p_p;
    >  	uint64_t promises, execpromises;
    > -	int error;
    > +	int error = 0;
    >  	int unveil_cleanup = 0;
    >  
    >  	/* Check for any error in user input */
    > @@ -481,16 +481,16 @@ sys_pledge(struct proc *p, void *v, regi
    >  		/* Only permit reductions */
    >  		if (ISSET(pr->ps_flags, PS_PLEDGE) &&
    >  		    (((promises | pr->ps_pledge) != pr->ps_pledge))) {
    > -			mtx_leave(&pr->ps_mtx);
    > -			return (EPERM);
    > +			error = EPERM;
    > +			goto fail;
    >  		}
    >  	}
    >  	if (SCARG(uap, execpromises)) {
    >  		/* Only permit reductions */
    >  		if (ISSET(pr->ps_flags, PS_EXECPLEDGE) &&
    >  		    (((execpromises | pr->ps_execpledge) != pr->ps_execpledge))) {
    > -			mtx_leave(&pr->ps_mtx);
    > -			return (EPERM);
    > +			error = EPERM;
    > +			goto fail;
    >  		}
    >  	}
    >  
    > @@ -509,19 +509,24 @@ sys_pledge(struct proc *p, void *v, regi
    >  		atomic_setbits_int(&pr->ps_flags, PS_EXECPLEDGE);
    >  	}
    >  
    > +fail:
    >  	mtx_leave(&pr->ps_mtx);
    >  
    >  	if (unveil_cleanup) {
    >  		/*
    >  		 * Kill off unveil and drop unveil vnode refs if we no
    > -		 * longer are holding any path-accessing pledge
    > +		 * longer are holding any path-accessing pledge, must
    > +		 * be done single-tread so that other sleeping syscalls
    > +		 * finish first before we change this.
    >  		 */
    > +		single_thread_set(p, SINGLE_UNWIND);
    >  		KERNEL_LOCK();
    >  		unveil_destroy(pr);
    >  		KERNEL_UNLOCK();
    > +		single_thread_clear(p);
    >  	}
    >  
    > -	return (0);
    > +	return (error);
    >  }
    >  
    >  int
    > @@ -536,7 +541,8 @@ pledge_syscall(struct proc *p, int code,
    >  	if (pledge_syscalls[code] == PLEDGE_ALWAYS)
    >  		return (0);
    >  
    > -	if (p->p_p->ps_pledge & pledge_syscalls[code])
    > +	p->p_pledge = p->p_p->ps_pledge;	/* pledge checks are per-thread */
    > +	if (p->p_pledge & pledge_syscalls[code])
    >  		return (0);
    >  
    >  	*tval = pledge_syscalls[code];
    > @@ -559,7 +565,7 @@ pledge_fail(struct proc *p, int error, u
    >  	if (KTRPOINT(p, KTR_PLEDGE))
    >  		ktrpledge(p, error, code, p->p_pledge_syscall);
    >  #endif
    > -	if (p->p_p->ps_pledge & PLEDGE_ERROR)
    > +	if (p->p_pledge & PLEDGE_ERROR)
    >  		return (ENOSYS);
    >  
    >  	KERNEL_LOCK();
    > @@ -593,7 +599,7 @@ pledge_namei(struct proc *p, struct name
    >  	if ((p->p_p->ps_flags & PS_PLEDGE) == 0 ||
    >  	    (p->p_p->ps_flags & PS_COREDUMP))
    >  		return (0);
    > -	pledge = READ_ONCE(p->p_p->ps_pledge);
    > +	pledge = READ_ONCE(p->p_pledge);
    >  
    >  	if (ni->ni_pledge == 0)
    >  		panic("pledge_namei: ni_pledge");
    > @@ -729,7 +735,7 @@ pledge_namei(struct proc *p, struct name
    >  
    >  	/*
    >  	 * Ensure each flag of ni_pledge has counterpart allowing it in
    > -	 * ps_pledge.
    > +	 * p_pledge.
    >  	 */
    >  	if (ni->ni_pledge & ~pledge)
    >  		return (pledge_fail(p, EPERM, (ni->ni_pledge & ~pledge)));
    > @@ -748,7 +754,7 @@ pledge_recvfd(struct proc *p, struct fil
    >  
    >  	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
    >  		return (0);
    > -	if ((p->p_p->ps_pledge & PLEDGE_RECVFD) == 0)
    > +	if ((p->p_pledge & PLEDGE_RECVFD) == 0)
    >  		return pledge_fail(p, EPERM, PLEDGE_RECVFD);
    >  
    >  	switch (fp->f_type) {
    > @@ -776,7 +782,7 @@ pledge_sendfd(struct proc *p, struct fil
    >  
    >  	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
    >  		return (0);
    > -	if ((p->p_p->ps_pledge & PLEDGE_SENDFD) == 0)
    > +	if ((p->p_pledge & PLEDGE_SENDFD) == 0)
    >  		return pledge_fail(p, EPERM, PLEDGE_SENDFD);
    >  
    >  	switch (fp->f_type) {
    > @@ -804,7 +810,7 @@ pledge_sysctl(struct proc *p, int miblen
    >  
    >  	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
    >  		return (0);
    > -	pledge = READ_ONCE(p->p_p->ps_pledge);
    > +	pledge = READ_ONCE(p->p_pledge);
    >  
    >  	if (new)
    >  		return pledge_fail(p, EFAULT, 0);
    > @@ -1010,7 +1016,7 @@ pledge_chown(struct proc *p, uid_t uid, 
    >  	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
    >  		return (0);
    >  
    > -	if (p->p_p->ps_pledge & PLEDGE_CHOWNUID)
    > +	if (p->p_pledge & PLEDGE_CHOWNUID)
    >  		return (0);
    >  
    >  	if (uid != -1 && uid != p->p_ucred->cr_uid)
    > @@ -1028,7 +1034,7 @@ pledge_adjtime(struct proc *p, const voi
    >  	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
    >  		return (0);
    >  
    > -	if ((p->p_p->ps_pledge & PLEDGE_SETTIME))
    > +	if ((p->p_pledge & PLEDGE_SETTIME))
    >  		return (0);
    >  	if (delta)
    >  		return (EPERM);
    > @@ -1041,7 +1047,7 @@ pledge_sendit(struct proc *p, const void
    >  	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
    >  		return (0);
    >  
    > -	if ((p->p_p->ps_pledge & (PLEDGE_INET | PLEDGE_UNIX | PLEDGE_DNS)))
    > +	if ((p->p_pledge & (PLEDGE_INET | PLEDGE_UNIX | PLEDGE_DNS)))
    >  		return (0);		/* may use address */
    >  	if (to == NULL)
    >  		return (0);		/* behaves just like write */
    > @@ -1057,7 +1063,7 @@ pledge_ioctl(struct proc *p, long com, s
    >  
    >  	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
    >  		return (0);
    > -	pledge = READ_ONCE(p->p_p->ps_pledge);
    > +	pledge = READ_ONCE(p->p_pledge);
    >  
    >  	/*
    >  	 * The ioctl's which are always allowed.
    > @@ -1365,7 +1371,7 @@ pledge_sockopt(struct proc *p, int set, 
    >  
    >  	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
    >  		return (0);
    > -	pledge = READ_ONCE(p->p_p->ps_pledge);
    > +	pledge = READ_ONCE(p->p_pledge);
    >  
    >  	/* Always allow these, which are too common to reject */
    >  	switch (level) {
    > @@ -1502,7 +1508,7 @@ pledge_socket(struct proc *p, int domain
    >  
    >  	if (!ISSET(p->p_p->ps_flags, PS_PLEDGE))
    >  		return 0;
    > -	pledge = READ_ONCE(p->p_p->ps_pledge);
    > +	pledge = READ_ONCE(p->p_pledge);
    >  
    >  	if (ISSET(state, SS_DNS)) {
    >  		if (ISSET(pledge, PLEDGE_DNS))
    > @@ -1534,7 +1540,7 @@ pledge_flock(struct proc *p)
    >  	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
    >  		return (0);
    >  
    > -	if ((p->p_p->ps_pledge & PLEDGE_FLOCK))
    > +	if ((p->p_pledge & PLEDGE_FLOCK))
    >  		return (0);
    >  	return (pledge_fail(p, EPERM, PLEDGE_FLOCK));
    >  }
    > @@ -1545,7 +1551,7 @@ pledge_swapctl(struct proc *p, int cmd)
    >  	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
    >  		return (0);
    >  
    > -	if (p->p_p->ps_pledge & PLEDGE_VMINFO) {
    > +	if (p->p_pledge & PLEDGE_VMINFO) {
    >  		switch (cmd) {
    >  		case SWAP_NSWAP:
    >  		case SWAP_STATS:
    > @@ -1580,7 +1586,7 @@ pledge_fcntl(struct proc *p, int cmd)
    >  {
    >  	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
    >  		return (0);
    > -	if ((p->p_p->ps_pledge & PLEDGE_PROC) == 0 && cmd == F_SETOWN)
    > +	if ((p->p_pledge & PLEDGE_PROC) == 0 && cmd == F_SETOWN)
    >  		return pledge_fail(p, EPERM, PLEDGE_PROC);
    >  	return (0);
    >  }
    > @@ -1590,7 +1596,7 @@ pledge_kill(struct proc *p, pid_t pid)
    >  {
    >  	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
    >  		return 0;
    > -	if (p->p_p->ps_pledge & PLEDGE_PROC)
    > +	if (p->p_pledge & PLEDGE_PROC)
    >  		return 0;
    >  	if (pid == 0 || pid == p->p_p->ps_pid)
    >  		return 0;
    > @@ -1615,7 +1621,7 @@ pledge_protexec(struct proc *p, int prot
    >  	/* Before kbind(2) call, ld.so and crt may create EXEC mappings */
    >  	if (p->p_p->ps_kbind_addr == 0 && p->p_p->ps_kbind_cookie == 0)
    >  		return 0;
    > -	if (!(p->p_p->ps_pledge & PLEDGE_PROTEXEC) && (prot & PROT_EXEC))
    > +	if (!(p->p_pledge & PLEDGE_PROTEXEC) && (prot & PROT_EXEC))
    >  		return pledge_fail(p, EPERM, PLEDGE_PROTEXEC);
    >  	return 0;
    >  }
    > Index: sys/kern/vfs_syscalls.c
    > ===================================================================
    > RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
    > diff -u -p -u -r1.372 vfs_syscalls.c
    > --- sys/kern/vfs_syscalls.c	29 Jan 2025 14:57:19 -0000	1.372
    > +++ sys/kern/vfs_syscalls.c	7 Feb 2025 14:36:44 -0000
    > @@ -1016,9 +1016,11 @@ sys_unveil(struct proc *p, void *v, regi
    >  	if (nd.ni_dvp && nd.ni_dvp != nd.ni_vp)
    >  		VOP_UNLOCK(nd.ni_dvp);
    >  
    > -	if (allow)
    > +	if (allow) {
    > +		single_thread_set(p, SINGLE_UNWIND);
    >  		error = unveil_add(p, &nd, permissions);
    > -	else
    > +		single_thread_clear(p);
    > +	} else
    >  		error = EPERM;
    >  
    >  	/* release vref from namei, but not vref from unveil_add */
    > Index: sys/dev/vmm/vmm.c
    > ===================================================================
    > RCS file: /cvs/src/sys/dev/vmm/vmm.c,v
    > diff -u -p -u -r1.3 vmm.c
    > --- sys/dev/vmm/vmm.c	27 Aug 2024 09:16:03 -0000	1.3
    > +++ sys/dev/vmm/vmm.c	6 Feb 2025 19:14:14 -0000
    > @@ -184,7 +184,7 @@ vm_find(uint32_t id, struct vm **res)
    >  			 * The managing vmm parent process can lookup all
    >  			 * all VMs and is indicated by PLEDGE_PROC.
    >  			 */
    > -			if (((p->p_p->ps_pledge &
    > +			if (((p->p_pledge &
    >  			    (PLEDGE_VMM | PLEDGE_PROC)) == PLEDGE_VMM) &&
    >  			    (vm->vm_creator_pid != p->p_p->ps_pid))
    >  				ret = EPERM;
    > @@ -291,7 +291,7 @@ pledge_ioctl_vmm(struct proc *p, long co
    >  	case VMM_IOC_INFO:
    >  	case VMM_IOC_SHAREMEM:
    >  		/* The "parent" process in vmd forks and manages VMs */
    > -		if (p->p_p->ps_pledge & PLEDGE_PROC)
    > +		if (p->p_pledge & PLEDGE_PROC)
    >  			return (0);
    >  		break;
    >  	case VMM_IOC_TERM:
    > 
    
    -- 
    :wq Claudio
    
    
    
  • Theo de Raadt:

    patch: relax ni_pledge panic