Index | Thread | Search

From:
"Theo de Raadt" <deraadt@openbsd.org>
Subject:
Re: patch: relax ni_pledge panic
To:
Mark Kettenis <mark.kettenis@xs4all.nl>, semarie@kapouay.eu.org, tech@openbsd.org
Date:
Fri, 07 Feb 2025 10:18:56 -0700

Download raw body.

Thread
Claudio Jeker <cjeker@diehard.n-r-g.com> wrote:

> I like this, if this goes in many of those READ_ONCE wrappers can die
> since p_pledge can be accessed without fear of concurrent changes.

Yes, a later diff had that.  Those parts included now.

> I need to look closer at the single_thread_set calls. 
> The one in sys_pledge looks ok since the call is at the tail end of the
> syscall. The sys_unveil one I need to double check when I have time.
> 
> The problem with single_thread_set is that it can call exit1 or sleep
> which must happen in a spot that is safe.

Very scary.  I now enforce it for the entire going-to-do-work part of
sys_unveil, so that it leaks no objects in that case.

Index: sys/sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
diff -u -p -u -r1.378 proc.h
--- sys/sys/proc.h	25 Jan 2025 19:21:40 -0000	1.378
+++ sys/sys/proc.h	7 Feb 2025 14:12:00 -0000
@@ -395,6 +395,7 @@ struct proc {
 	u_char	p_usrpri;	/* [S] Priority based on p_estcpu & ps_nice */
 	u_int	p_estcpu;		/* [S] Time averaged val of p_cpticks */
 	int	p_pledge_syscall;	/* Cache of current syscall */
+	uint64_t p_pledge;		/* [o] copy of p_p->ps_pledge */
 
 	struct	ucred *p_ucred;		/* [o] cached credentials */
 	struct	sigaltstack p_sigstk;	/* sp & on stack state variable */
Index: sys/kern/kern_event.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_event.c,v
diff -u -p -u -r1.200 kern_event.c
--- sys/kern/kern_event.c	6 Aug 2024 08:44:54 -0000	1.200
+++ sys/kern/kern_event.c	6 Feb 2025 18:52:15 -0000
@@ -343,7 +343,7 @@ filt_procattach(struct knote *kn)
 	int nolock;
 
 	if ((curproc->p_p->ps_flags & PS_PLEDGE) &&
-	    (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0)
+	    (curproc->p_pledge & PLEDGE_PROC) == 0)
 		return pledge_fail(curproc, EPERM, PLEDGE_PROC);
 
 	if (kn->kn_id > PID_MAX)
Index: sys/kern/kern_pledge.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_pledge.c,v
diff -u -p -u -r1.321 kern_pledge.c
--- sys/kern/kern_pledge.c	6 Oct 2024 23:39:24 -0000	1.321
+++ sys/kern/kern_pledge.c	7 Feb 2025 15:04:11 -0000
@@ -451,7 +451,7 @@ sys_pledge(struct proc *p, void *v, regi
 	} */	*uap = v;
 	struct process *pr = p->p_p;
 	uint64_t promises, execpromises;
-	int error;
+	int error = 0;
 	int unveil_cleanup = 0;
 
 	/* Check for any error in user input */
@@ -481,16 +481,16 @@ sys_pledge(struct proc *p, void *v, regi
 		/* Only permit reductions */
 		if (ISSET(pr->ps_flags, PS_PLEDGE) &&
 		    (((promises | pr->ps_pledge) != pr->ps_pledge))) {
-			mtx_leave(&pr->ps_mtx);
-			return (EPERM);
+			error = EPERM;
+			goto fail;
 		}
 	}
 	if (SCARG(uap, execpromises)) {
 		/* Only permit reductions */
 		if (ISSET(pr->ps_flags, PS_EXECPLEDGE) &&
 		    (((execpromises | pr->ps_execpledge) != pr->ps_execpledge))) {
-			mtx_leave(&pr->ps_mtx);
-			return (EPERM);
+			error = EPERM;
+			goto fail;
 		}
 	}
 
@@ -509,19 +509,24 @@ sys_pledge(struct proc *p, void *v, regi
 		atomic_setbits_int(&pr->ps_flags, PS_EXECPLEDGE);
 	}
 
+fail:
 	mtx_leave(&pr->ps_mtx);
 
 	if (unveil_cleanup) {
 		/*
 		 * Kill off unveil and drop unveil vnode refs if we no
-		 * longer are holding any path-accessing pledge
+		 * longer are holding any path-accessing pledge, must
+		 * be done single-tread so that other sleeping syscalls
+		 * finish first before we change this.
 		 */
+		single_thread_set(p, SINGLE_UNWIND);
 		KERNEL_LOCK();
 		unveil_destroy(pr);
 		KERNEL_UNLOCK();
+		single_thread_clear(p);
 	}
 
-	return (0);
+	return (error);
 }
 
 int
@@ -536,7 +541,8 @@ pledge_syscall(struct proc *p, int code,
 	if (pledge_syscalls[code] == PLEDGE_ALWAYS)
 		return (0);
 
-	if (p->p_p->ps_pledge & pledge_syscalls[code])
+	p->p_pledge = p->p_p->ps_pledge;	/* pledge checks are per-thread */
+	if (p->p_pledge & pledge_syscalls[code])
 		return (0);
 
 	*tval = pledge_syscalls[code];
@@ -559,7 +565,7 @@ pledge_fail(struct proc *p, int error, u
 	if (KTRPOINT(p, KTR_PLEDGE))
 		ktrpledge(p, error, code, p->p_pledge_syscall);
 #endif
-	if (p->p_p->ps_pledge & PLEDGE_ERROR)
+	if (p->p_pledge & PLEDGE_ERROR)
 		return (ENOSYS);
 
 	KERNEL_LOCK();
@@ -593,7 +599,7 @@ pledge_namei(struct proc *p, struct name
 	if ((p->p_p->ps_flags & PS_PLEDGE) == 0 ||
 	    (p->p_p->ps_flags & PS_COREDUMP))
 		return (0);
-	pledge = READ_ONCE(p->p_p->ps_pledge);
+	pledge = p->p_pledge;
 
 	if (ni->ni_pledge == 0)
 		panic("pledge_namei: ni_pledge");
@@ -604,8 +610,7 @@ pledge_namei(struct proc *p, struct name
 	 */
 
 	/* Doing a permitted execve() */
-	if ((ni->ni_pledge & PLEDGE_EXEC) &&
-	    (pledge & PLEDGE_EXEC))
+	if ((ni->ni_pledge & PLEDGE_EXEC) && (pledge & PLEDGE_EXEC))
 		return (0);
 
 	error = canonpath(origpath, path, sizeof(path));
@@ -729,7 +734,7 @@ pledge_namei(struct proc *p, struct name
 
 	/*
 	 * Ensure each flag of ni_pledge has counterpart allowing it in
-	 * ps_pledge.
+	 * p_pledge.
 	 */
 	if (ni->ni_pledge & ~pledge)
 		return (pledge_fail(p, EPERM, (ni->ni_pledge & ~pledge)));
@@ -748,7 +753,7 @@ pledge_recvfd(struct proc *p, struct fil
 
 	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
 		return (0);
-	if ((p->p_p->ps_pledge & PLEDGE_RECVFD) == 0)
+	if ((p->p_pledge & PLEDGE_RECVFD) == 0)
 		return pledge_fail(p, EPERM, PLEDGE_RECVFD);
 
 	switch (fp->f_type) {
@@ -776,7 +781,7 @@ pledge_sendfd(struct proc *p, struct fil
 
 	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
 		return (0);
-	if ((p->p_p->ps_pledge & PLEDGE_SENDFD) == 0)
+	if ((p->p_pledge & PLEDGE_SENDFD) == 0)
 		return pledge_fail(p, EPERM, PLEDGE_SENDFD);
 
 	switch (fp->f_type) {
@@ -804,7 +809,7 @@ pledge_sysctl(struct proc *p, int miblen
 
 	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
 		return (0);
-	pledge = READ_ONCE(p->p_p->ps_pledge);
+	pledge = p->p_pledge;
 
 	if (new)
 		return pledge_fail(p, EFAULT, 0);
@@ -1010,7 +1015,7 @@ pledge_chown(struct proc *p, uid_t uid, 
 	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
 		return (0);
 
-	if (p->p_p->ps_pledge & PLEDGE_CHOWNUID)
+	if (p->p_pledge & PLEDGE_CHOWNUID)
 		return (0);
 
 	if (uid != -1 && uid != p->p_ucred->cr_uid)
@@ -1028,7 +1033,7 @@ pledge_adjtime(struct proc *p, const voi
 	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
 		return (0);
 
-	if ((p->p_p->ps_pledge & PLEDGE_SETTIME))
+	if ((p->p_pledge & PLEDGE_SETTIME))
 		return (0);
 	if (delta)
 		return (EPERM);
@@ -1041,7 +1046,7 @@ pledge_sendit(struct proc *p, const void
 	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
 		return (0);
 
-	if ((p->p_p->ps_pledge & (PLEDGE_INET | PLEDGE_UNIX | PLEDGE_DNS)))
+	if ((p->p_pledge & (PLEDGE_INET | PLEDGE_UNIX | PLEDGE_DNS)))
 		return (0);		/* may use address */
 	if (to == NULL)
 		return (0);		/* behaves just like write */
@@ -1057,7 +1062,7 @@ pledge_ioctl(struct proc *p, long com, s
 
 	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
 		return (0);
-	pledge = READ_ONCE(p->p_p->ps_pledge);
+	pledge = p->p_pledge;
 
 	/*
 	 * The ioctl's which are always allowed.
@@ -1365,7 +1370,7 @@ pledge_sockopt(struct proc *p, int set, 
 
 	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
 		return (0);
-	pledge = READ_ONCE(p->p_p->ps_pledge);
+	pledge = p->p_pledge;
 
 	/* Always allow these, which are too common to reject */
 	switch (level) {
@@ -1502,7 +1507,7 @@ pledge_socket(struct proc *p, int domain
 
 	if (!ISSET(p->p_p->ps_flags, PS_PLEDGE))
 		return 0;
-	pledge = READ_ONCE(p->p_p->ps_pledge);
+	pledge = p->p_pledge;
 
 	if (ISSET(state, SS_DNS)) {
 		if (ISSET(pledge, PLEDGE_DNS))
@@ -1534,7 +1539,7 @@ pledge_flock(struct proc *p)
 	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
 		return (0);
 
-	if ((p->p_p->ps_pledge & PLEDGE_FLOCK))
+	if ((p->p_pledge & PLEDGE_FLOCK))
 		return (0);
 	return (pledge_fail(p, EPERM, PLEDGE_FLOCK));
 }
@@ -1545,7 +1550,7 @@ pledge_swapctl(struct proc *p, int cmd)
 	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
 		return (0);
 
-	if (p->p_p->ps_pledge & PLEDGE_VMINFO) {
+	if (p->p_pledge & PLEDGE_VMINFO) {
 		switch (cmd) {
 		case SWAP_NSWAP:
 		case SWAP_STATS:
@@ -1580,7 +1585,7 @@ pledge_fcntl(struct proc *p, int cmd)
 {
 	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
 		return (0);
-	if ((p->p_p->ps_pledge & PLEDGE_PROC) == 0 && cmd == F_SETOWN)
+	if ((p->p_pledge & PLEDGE_PROC) == 0 && cmd == F_SETOWN)
 		return pledge_fail(p, EPERM, PLEDGE_PROC);
 	return (0);
 }
@@ -1590,7 +1595,7 @@ pledge_kill(struct proc *p, pid_t pid)
 {
 	if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
 		return 0;
-	if (p->p_p->ps_pledge & PLEDGE_PROC)
+	if (p->p_pledge & PLEDGE_PROC)
 		return 0;
 	if (pid == 0 || pid == p->p_p->ps_pid)
 		return 0;
@@ -1615,7 +1620,7 @@ pledge_protexec(struct proc *p, int prot
 	/* Before kbind(2) call, ld.so and crt may create EXEC mappings */
 	if (p->p_p->ps_kbind_addr == 0 && p->p_p->ps_kbind_cookie == 0)
 		return 0;
-	if (!(p->p_p->ps_pledge & PLEDGE_PROTEXEC) && (prot & PROT_EXEC))
+	if (!(p->p_pledge & PLEDGE_PROTEXEC) && (prot & PROT_EXEC))
 		return pledge_fail(p, EPERM, PLEDGE_PROTEXEC);
 	return 0;
 }
Index: sys/kern/vfs_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
diff -u -p -u -r1.372 vfs_syscalls.c
--- sys/kern/vfs_syscalls.c	29 Jan 2025 14:57:19 -0000	1.372
+++ sys/kern/vfs_syscalls.c	7 Feb 2025 17:12:25 -0000
@@ -966,6 +966,9 @@ sys_unveil(struct proc *p, void *v, regi
 	    sizeof(permissions), NULL);
 	if (error)
 		return (error);
+
+	single_thread_set(p, SINGLE_UNWIND);
+
 	pathname = pool_get(&namei_pool, PR_WAITOK);
 	error = copyinstr(SCARG(uap, path), pathname, MAXPATHLEN, &pathlen);
 	if (error)
@@ -1031,6 +1034,7 @@ sys_unveil(struct proc *p, void *v, regi
 end:
 	pool_put(&namei_pool, pathname);
 
+	single_thread_clear(p);
 	return (error);
 }
 
Index: sys/dev/vmm/vmm.c
===================================================================
RCS file: /cvs/src/sys/dev/vmm/vmm.c,v
diff -u -p -u -r1.3 vmm.c
--- sys/dev/vmm/vmm.c	27 Aug 2024 09:16:03 -0000	1.3
+++ sys/dev/vmm/vmm.c	6 Feb 2025 19:14:14 -0000
@@ -184,7 +184,7 @@ vm_find(uint32_t id, struct vm **res)
 			 * The managing vmm parent process can lookup all
 			 * all VMs and is indicated by PLEDGE_PROC.
 			 */
-			if (((p->p_p->ps_pledge &
+			if (((p->p_pledge &
 			    (PLEDGE_VMM | PLEDGE_PROC)) == PLEDGE_VMM) &&
 			    (vm->vm_creator_pid != p->p_p->ps_pid))
 				ret = EPERM;
@@ -291,7 +291,7 @@ pledge_ioctl_vmm(struct proc *p, long co
 	case VMM_IOC_INFO:
 	case VMM_IOC_SHAREMEM:
 		/* The "parent" process in vmd forks and manages VMs */
-		if (p->p_p->ps_pledge & PLEDGE_PROC)
+		if (p->p_pledge & PLEDGE_PROC)
 			return (0);
 		break;
 	case VMM_IOC_TERM: