Index | Thread | Search

From:
"Theo de Raadt" <deraadt@openbsd.org>
Subject:
Re: patch: relax ni_pledge panic
Cc:
Mark Kettenis <mark.kettenis@xs4all.nl>, semarie@kapouay.eu.org, tech@openbsd.org
Date:
Fri, 07 Feb 2025 07:46:19 -0700

Download raw body.

Thread
  • Theo de Raadt:

    patch: relax ni_pledge panic

  • This makes a per-thread copy p_pledge of p_p->ps_pledge as we enter
    each system call, and changes all the pledge checks to occur against
    this per-thread variable.
    
    This means any system call that does multiple sleeps, will honour the
    pledge promises at invocation time.  Something expensive like open(), will
    see a consistant value.
    
    I also propose we change unveil() to force single_thread_set interlock.
    In the diff, there's two parts to this -- adding unveils, and deleting
    the entire unveil setting when pledge drops all filesystem access.
    
    Index: sys/sys/proc.h
    ===================================================================
    RCS file: /cvs/src/sys/sys/proc.h,v
    diff -u -p -u -r1.378 proc.h
    --- sys/sys/proc.h	25 Jan 2025 19:21:40 -0000	1.378
    +++ sys/sys/proc.h	7 Feb 2025 14:12:00 -0000
    @@ -395,6 +395,7 @@ struct proc {
     	u_char	p_usrpri;	/* [S] Priority based on p_estcpu & ps_nice */
     	u_int	p_estcpu;		/* [S] Time averaged val of p_cpticks */
     	int	p_pledge_syscall;	/* Cache of current syscall */
    +	uint64_t p_pledge;		/* [o] copy of p_p->ps_pledge */
     
     	struct	ucred *p_ucred;		/* [o] cached credentials */
     	struct	sigaltstack p_sigstk;	/* sp & on stack state variable */
    Index: sys/kern/kern_event.c
    ===================================================================
    RCS file: /cvs/src/sys/kern/kern_event.c,v
    diff -u -p -u -r1.200 kern_event.c
    --- sys/kern/kern_event.c	6 Aug 2024 08:44:54 -0000	1.200
    +++ sys/kern/kern_event.c	6 Feb 2025 18:52:15 -0000
    @@ -343,7 +343,7 @@ filt_procattach(struct knote *kn)
     	int nolock;
     
     	if ((curproc->p_p->ps_flags & PS_PLEDGE) &&
    -	    (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0)
    +	    (curproc->p_pledge & PLEDGE_PROC) == 0)
     		return pledge_fail(curproc, EPERM, PLEDGE_PROC);
     
     	if (kn->kn_id > PID_MAX)
    Index: sys/kern/kern_pledge.c
    ===================================================================
    RCS file: /cvs/src/sys/kern/kern_pledge.c,v
    diff -u -p -u -r1.321 kern_pledge.c
    --- sys/kern/kern_pledge.c	6 Oct 2024 23:39:24 -0000	1.321
    +++ sys/kern/kern_pledge.c	7 Feb 2025 14:32:01 -0000
    @@ -451,7 +451,7 @@ sys_pledge(struct proc *p, void *v, regi
     	} */	*uap = v;
     	struct process *pr = p->p_p;
     	uint64_t promises, execpromises;
    -	int error;
    +	int error = 0;
     	int unveil_cleanup = 0;
     
     	/* Check for any error in user input */
    @@ -481,16 +481,16 @@ sys_pledge(struct proc *p, void *v, regi
     		/* Only permit reductions */
     		if (ISSET(pr->ps_flags, PS_PLEDGE) &&
     		    (((promises | pr->ps_pledge) != pr->ps_pledge))) {
    -			mtx_leave(&pr->ps_mtx);
    -			return (EPERM);
    +			error = EPERM;
    +			goto fail;
     		}
     	}
     	if (SCARG(uap, execpromises)) {
     		/* Only permit reductions */
     		if (ISSET(pr->ps_flags, PS_EXECPLEDGE) &&
     		    (((execpromises | pr->ps_execpledge) != pr->ps_execpledge))) {
    -			mtx_leave(&pr->ps_mtx);
    -			return (EPERM);
    +			error = EPERM;
    +			goto fail;
     		}
     	}
     
    @@ -509,19 +509,24 @@ sys_pledge(struct proc *p, void *v, regi
     		atomic_setbits_int(&pr->ps_flags, PS_EXECPLEDGE);
     	}
     
    +fail:
     	mtx_leave(&pr->ps_mtx);
     
     	if (unveil_cleanup) {
     		/*
     		 * Kill off unveil and drop unveil vnode refs if we no
    -		 * longer are holding any path-accessing pledge
    +		 * longer are holding any path-accessing pledge, must
    +		 * be done single-tread so that other sleeping syscalls
    +		 * finish first before we change this.
     		 */
    +		single_thread_set(p, SINGLE_UNWIND);
     		KERNEL_LOCK();
     		unveil_destroy(pr);
     		KERNEL_UNLOCK();
    +		single_thread_clear(p);
     	}
     
    -	return (0);
    +	return (error);
     }
     
     int
    @@ -536,7 +541,8 @@ pledge_syscall(struct proc *p, int code,
     	if (pledge_syscalls[code] == PLEDGE_ALWAYS)
     		return (0);
     
    -	if (p->p_p->ps_pledge & pledge_syscalls[code])
    +	p->p_pledge = p->p_p->ps_pledge;	/* pledge checks are per-thread */
    +	if (p->p_pledge & pledge_syscalls[code])
     		return (0);
     
     	*tval = pledge_syscalls[code];
    @@ -559,7 +565,7 @@ pledge_fail(struct proc *p, int error, u
     	if (KTRPOINT(p, KTR_PLEDGE))
     		ktrpledge(p, error, code, p->p_pledge_syscall);
     #endif
    -	if (p->p_p->ps_pledge & PLEDGE_ERROR)
    +	if (p->p_pledge & PLEDGE_ERROR)
     		return (ENOSYS);
     
     	KERNEL_LOCK();
    @@ -593,7 +599,7 @@ pledge_namei(struct proc *p, struct name
     	if ((p->p_p->ps_flags & PS_PLEDGE) == 0 ||
     	    (p->p_p->ps_flags & PS_COREDUMP))
     		return (0);
    -	pledge = READ_ONCE(p->p_p->ps_pledge);
    +	pledge = READ_ONCE(p->p_pledge);
     
     	if (ni->ni_pledge == 0)
     		panic("pledge_namei: ni_pledge");
    @@ -729,7 +735,7 @@ pledge_namei(struct proc *p, struct name
     
     	/*
     	 * Ensure each flag of ni_pledge has counterpart allowing it in
    -	 * ps_pledge.
    +	 * p_pledge.
     	 */
     	if (ni->ni_pledge & ~pledge)
     		return (pledge_fail(p, EPERM, (ni->ni_pledge & ~pledge)));
    @@ -748,7 +754,7 @@ pledge_recvfd(struct proc *p, struct fil
     
     	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
     		return (0);
    -	if ((p->p_p->ps_pledge & PLEDGE_RECVFD) == 0)
    +	if ((p->p_pledge & PLEDGE_RECVFD) == 0)
     		return pledge_fail(p, EPERM, PLEDGE_RECVFD);
     
     	switch (fp->f_type) {
    @@ -776,7 +782,7 @@ pledge_sendfd(struct proc *p, struct fil
     
     	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
     		return (0);
    -	if ((p->p_p->ps_pledge & PLEDGE_SENDFD) == 0)
    +	if ((p->p_pledge & PLEDGE_SENDFD) == 0)
     		return pledge_fail(p, EPERM, PLEDGE_SENDFD);
     
     	switch (fp->f_type) {
    @@ -804,7 +810,7 @@ pledge_sysctl(struct proc *p, int miblen
     
     	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
     		return (0);
    -	pledge = READ_ONCE(p->p_p->ps_pledge);
    +	pledge = READ_ONCE(p->p_pledge);
     
     	if (new)
     		return pledge_fail(p, EFAULT, 0);
    @@ -1010,7 +1016,7 @@ pledge_chown(struct proc *p, uid_t uid, 
     	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
     		return (0);
     
    -	if (p->p_p->ps_pledge & PLEDGE_CHOWNUID)
    +	if (p->p_pledge & PLEDGE_CHOWNUID)
     		return (0);
     
     	if (uid != -1 && uid != p->p_ucred->cr_uid)
    @@ -1028,7 +1034,7 @@ pledge_adjtime(struct proc *p, const voi
     	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
     		return (0);
     
    -	if ((p->p_p->ps_pledge & PLEDGE_SETTIME))
    +	if ((p->p_pledge & PLEDGE_SETTIME))
     		return (0);
     	if (delta)
     		return (EPERM);
    @@ -1041,7 +1047,7 @@ pledge_sendit(struct proc *p, const void
     	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
     		return (0);
     
    -	if ((p->p_p->ps_pledge & (PLEDGE_INET | PLEDGE_UNIX | PLEDGE_DNS)))
    +	if ((p->p_pledge & (PLEDGE_INET | PLEDGE_UNIX | PLEDGE_DNS)))
     		return (0);		/* may use address */
     	if (to == NULL)
     		return (0);		/* behaves just like write */
    @@ -1057,7 +1063,7 @@ pledge_ioctl(struct proc *p, long com, s
     
     	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
     		return (0);
    -	pledge = READ_ONCE(p->p_p->ps_pledge);
    +	pledge = READ_ONCE(p->p_pledge);
     
     	/*
     	 * The ioctl's which are always allowed.
    @@ -1365,7 +1371,7 @@ pledge_sockopt(struct proc *p, int set, 
     
     	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
     		return (0);
    -	pledge = READ_ONCE(p->p_p->ps_pledge);
    +	pledge = READ_ONCE(p->p_pledge);
     
     	/* Always allow these, which are too common to reject */
     	switch (level) {
    @@ -1502,7 +1508,7 @@ pledge_socket(struct proc *p, int domain
     
     	if (!ISSET(p->p_p->ps_flags, PS_PLEDGE))
     		return 0;
    -	pledge = READ_ONCE(p->p_p->ps_pledge);
    +	pledge = READ_ONCE(p->p_pledge);
     
     	if (ISSET(state, SS_DNS)) {
     		if (ISSET(pledge, PLEDGE_DNS))
    @@ -1534,7 +1540,7 @@ pledge_flock(struct proc *p)
     	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
     		return (0);
     
    -	if ((p->p_p->ps_pledge & PLEDGE_FLOCK))
    +	if ((p->p_pledge & PLEDGE_FLOCK))
     		return (0);
     	return (pledge_fail(p, EPERM, PLEDGE_FLOCK));
     }
    @@ -1545,7 +1551,7 @@ pledge_swapctl(struct proc *p, int cmd)
     	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
     		return (0);
     
    -	if (p->p_p->ps_pledge & PLEDGE_VMINFO) {
    +	if (p->p_pledge & PLEDGE_VMINFO) {
     		switch (cmd) {
     		case SWAP_NSWAP:
     		case SWAP_STATS:
    @@ -1580,7 +1586,7 @@ pledge_fcntl(struct proc *p, int cmd)
     {
     	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
     		return (0);
    -	if ((p->p_p->ps_pledge & PLEDGE_PROC) == 0 && cmd == F_SETOWN)
    +	if ((p->p_pledge & PLEDGE_PROC) == 0 && cmd == F_SETOWN)
     		return pledge_fail(p, EPERM, PLEDGE_PROC);
     	return (0);
     }
    @@ -1590,7 +1596,7 @@ pledge_kill(struct proc *p, pid_t pid)
     {
     	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
     		return 0;
    -	if (p->p_p->ps_pledge & PLEDGE_PROC)
    +	if (p->p_pledge & PLEDGE_PROC)
     		return 0;
     	if (pid == 0 || pid == p->p_p->ps_pid)
     		return 0;
    @@ -1615,7 +1621,7 @@ pledge_protexec(struct proc *p, int prot
     	/* Before kbind(2) call, ld.so and crt may create EXEC mappings */
     	if (p->p_p->ps_kbind_addr == 0 && p->p_p->ps_kbind_cookie == 0)
     		return 0;
    -	if (!(p->p_p->ps_pledge & PLEDGE_PROTEXEC) && (prot & PROT_EXEC))
    +	if (!(p->p_pledge & PLEDGE_PROTEXEC) && (prot & PROT_EXEC))
     		return pledge_fail(p, EPERM, PLEDGE_PROTEXEC);
     	return 0;
     }
    Index: sys/kern/vfs_syscalls.c
    ===================================================================
    RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
    diff -u -p -u -r1.372 vfs_syscalls.c
    --- sys/kern/vfs_syscalls.c	29 Jan 2025 14:57:19 -0000	1.372
    +++ sys/kern/vfs_syscalls.c	7 Feb 2025 14:36:44 -0000
    @@ -1016,9 +1016,11 @@ sys_unveil(struct proc *p, void *v, regi
     	if (nd.ni_dvp && nd.ni_dvp != nd.ni_vp)
     		VOP_UNLOCK(nd.ni_dvp);
     
    -	if (allow)
    +	if (allow) {
    +		single_thread_set(p, SINGLE_UNWIND);
     		error = unveil_add(p, &nd, permissions);
    -	else
    +		single_thread_clear(p);
    +	} else
     		error = EPERM;
     
     	/* release vref from namei, but not vref from unveil_add */
    Index: sys/dev/vmm/vmm.c
    ===================================================================
    RCS file: /cvs/src/sys/dev/vmm/vmm.c,v
    diff -u -p -u -r1.3 vmm.c
    --- sys/dev/vmm/vmm.c	27 Aug 2024 09:16:03 -0000	1.3
    +++ sys/dev/vmm/vmm.c	6 Feb 2025 19:14:14 -0000
    @@ -184,7 +184,7 @@ vm_find(uint32_t id, struct vm **res)
     			 * The managing vmm parent process can lookup all
     			 * all VMs and is indicated by PLEDGE_PROC.
     			 */
    -			if (((p->p_p->ps_pledge &
    +			if (((p->p_pledge &
     			    (PLEDGE_VMM | PLEDGE_PROC)) == PLEDGE_VMM) &&
     			    (vm->vm_creator_pid != p->p_p->ps_pid))
     				ret = EPERM;
    @@ -291,7 +291,7 @@ pledge_ioctl_vmm(struct proc *p, long co
     	case VMM_IOC_INFO:
     	case VMM_IOC_SHAREMEM:
     		/* The "parent" process in vmd forks and manages VMs */
    -		if (p->p_p->ps_pledge & PLEDGE_PROC)
    +		if (p->p_pledge & PLEDGE_PROC)
     			return (0);
     		break;
     	case VMM_IOC_TERM:
    
    
    
  • Theo de Raadt:

    patch: relax ni_pledge panic