Download raw body.
patch: relax ni_pledge panic
This makes a per-thread copy p_pledge of p_p->ps_pledge as we enter
each system call, and changes all the pledge checks to occur against
this per-thread variable.
This means any system call that does multiple sleeps, will honour the
pledge promises at invocation time. Something expensive like open(), will
see a consistant value.
I also propose we change unveil() to force single_thread_set interlock.
In the diff, there's two parts to this -- adding unveils, and deleting
the entire unveil setting when pledge drops all filesystem access.
Index: sys/sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
diff -u -p -u -r1.378 proc.h
--- sys/sys/proc.h 25 Jan 2025 19:21:40 -0000 1.378
+++ sys/sys/proc.h 7 Feb 2025 14:12:00 -0000
@@ -395,6 +395,7 @@ struct proc {
u_char p_usrpri; /* [S] Priority based on p_estcpu & ps_nice */
u_int p_estcpu; /* [S] Time averaged val of p_cpticks */
int p_pledge_syscall; /* Cache of current syscall */
+ uint64_t p_pledge; /* [o] copy of p_p->ps_pledge */
struct ucred *p_ucred; /* [o] cached credentials */
struct sigaltstack p_sigstk; /* sp & on stack state variable */
Index: sys/kern/kern_event.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_event.c,v
diff -u -p -u -r1.200 kern_event.c
--- sys/kern/kern_event.c 6 Aug 2024 08:44:54 -0000 1.200
+++ sys/kern/kern_event.c 6 Feb 2025 18:52:15 -0000
@@ -343,7 +343,7 @@ filt_procattach(struct knote *kn)
int nolock;
if ((curproc->p_p->ps_flags & PS_PLEDGE) &&
- (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0)
+ (curproc->p_pledge & PLEDGE_PROC) == 0)
return pledge_fail(curproc, EPERM, PLEDGE_PROC);
if (kn->kn_id > PID_MAX)
Index: sys/kern/kern_pledge.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_pledge.c,v
diff -u -p -u -r1.321 kern_pledge.c
--- sys/kern/kern_pledge.c 6 Oct 2024 23:39:24 -0000 1.321
+++ sys/kern/kern_pledge.c 7 Feb 2025 14:32:01 -0000
@@ -451,7 +451,7 @@ sys_pledge(struct proc *p, void *v, regi
} */ *uap = v;
struct process *pr = p->p_p;
uint64_t promises, execpromises;
- int error;
+ int error = 0;
int unveil_cleanup = 0;
/* Check for any error in user input */
@@ -481,16 +481,16 @@ sys_pledge(struct proc *p, void *v, regi
/* Only permit reductions */
if (ISSET(pr->ps_flags, PS_PLEDGE) &&
(((promises | pr->ps_pledge) != pr->ps_pledge))) {
- mtx_leave(&pr->ps_mtx);
- return (EPERM);
+ error = EPERM;
+ goto fail;
}
}
if (SCARG(uap, execpromises)) {
/* Only permit reductions */
if (ISSET(pr->ps_flags, PS_EXECPLEDGE) &&
(((execpromises | pr->ps_execpledge) != pr->ps_execpledge))) {
- mtx_leave(&pr->ps_mtx);
- return (EPERM);
+ error = EPERM;
+ goto fail;
}
}
@@ -509,19 +509,24 @@ sys_pledge(struct proc *p, void *v, regi
atomic_setbits_int(&pr->ps_flags, PS_EXECPLEDGE);
}
+fail:
mtx_leave(&pr->ps_mtx);
if (unveil_cleanup) {
/*
* Kill off unveil and drop unveil vnode refs if we no
- * longer are holding any path-accessing pledge
+ * longer are holding any path-accessing pledge, must
+ * be done single-tread so that other sleeping syscalls
+ * finish first before we change this.
*/
+ single_thread_set(p, SINGLE_UNWIND);
KERNEL_LOCK();
unveil_destroy(pr);
KERNEL_UNLOCK();
+ single_thread_clear(p);
}
- return (0);
+ return (error);
}
int
@@ -536,7 +541,8 @@ pledge_syscall(struct proc *p, int code,
if (pledge_syscalls[code] == PLEDGE_ALWAYS)
return (0);
- if (p->p_p->ps_pledge & pledge_syscalls[code])
+ p->p_pledge = p->p_p->ps_pledge; /* pledge checks are per-thread */
+ if (p->p_pledge & pledge_syscalls[code])
return (0);
*tval = pledge_syscalls[code];
@@ -559,7 +565,7 @@ pledge_fail(struct proc *p, int error, u
if (KTRPOINT(p, KTR_PLEDGE))
ktrpledge(p, error, code, p->p_pledge_syscall);
#endif
- if (p->p_p->ps_pledge & PLEDGE_ERROR)
+ if (p->p_pledge & PLEDGE_ERROR)
return (ENOSYS);
KERNEL_LOCK();
@@ -593,7 +599,7 @@ pledge_namei(struct proc *p, struct name
if ((p->p_p->ps_flags & PS_PLEDGE) == 0 ||
(p->p_p->ps_flags & PS_COREDUMP))
return (0);
- pledge = READ_ONCE(p->p_p->ps_pledge);
+ pledge = READ_ONCE(p->p_pledge);
if (ni->ni_pledge == 0)
panic("pledge_namei: ni_pledge");
@@ -729,7 +735,7 @@ pledge_namei(struct proc *p, struct name
/*
* Ensure each flag of ni_pledge has counterpart allowing it in
- * ps_pledge.
+ * p_pledge.
*/
if (ni->ni_pledge & ~pledge)
return (pledge_fail(p, EPERM, (ni->ni_pledge & ~pledge)));
@@ -748,7 +754,7 @@ pledge_recvfd(struct proc *p, struct fil
if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
return (0);
- if ((p->p_p->ps_pledge & PLEDGE_RECVFD) == 0)
+ if ((p->p_pledge & PLEDGE_RECVFD) == 0)
return pledge_fail(p, EPERM, PLEDGE_RECVFD);
switch (fp->f_type) {
@@ -776,7 +782,7 @@ pledge_sendfd(struct proc *p, struct fil
if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
return (0);
- if ((p->p_p->ps_pledge & PLEDGE_SENDFD) == 0)
+ if ((p->p_pledge & PLEDGE_SENDFD) == 0)
return pledge_fail(p, EPERM, PLEDGE_SENDFD);
switch (fp->f_type) {
@@ -804,7 +810,7 @@ pledge_sysctl(struct proc *p, int miblen
if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
return (0);
- pledge = READ_ONCE(p->p_p->ps_pledge);
+ pledge = READ_ONCE(p->p_pledge);
if (new)
return pledge_fail(p, EFAULT, 0);
@@ -1010,7 +1016,7 @@ pledge_chown(struct proc *p, uid_t uid,
if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
return (0);
- if (p->p_p->ps_pledge & PLEDGE_CHOWNUID)
+ if (p->p_pledge & PLEDGE_CHOWNUID)
return (0);
if (uid != -1 && uid != p->p_ucred->cr_uid)
@@ -1028,7 +1034,7 @@ pledge_adjtime(struct proc *p, const voi
if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
return (0);
- if ((p->p_p->ps_pledge & PLEDGE_SETTIME))
+ if ((p->p_pledge & PLEDGE_SETTIME))
return (0);
if (delta)
return (EPERM);
@@ -1041,7 +1047,7 @@ pledge_sendit(struct proc *p, const void
if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
return (0);
- if ((p->p_p->ps_pledge & (PLEDGE_INET | PLEDGE_UNIX | PLEDGE_DNS)))
+ if ((p->p_pledge & (PLEDGE_INET | PLEDGE_UNIX | PLEDGE_DNS)))
return (0); /* may use address */
if (to == NULL)
return (0); /* behaves just like write */
@@ -1057,7 +1063,7 @@ pledge_ioctl(struct proc *p, long com, s
if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
return (0);
- pledge = READ_ONCE(p->p_p->ps_pledge);
+ pledge = READ_ONCE(p->p_pledge);
/*
* The ioctl's which are always allowed.
@@ -1365,7 +1371,7 @@ pledge_sockopt(struct proc *p, int set,
if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
return (0);
- pledge = READ_ONCE(p->p_p->ps_pledge);
+ pledge = READ_ONCE(p->p_pledge);
/* Always allow these, which are too common to reject */
switch (level) {
@@ -1502,7 +1508,7 @@ pledge_socket(struct proc *p, int domain
if (!ISSET(p->p_p->ps_flags, PS_PLEDGE))
return 0;
- pledge = READ_ONCE(p->p_p->ps_pledge);
+ pledge = READ_ONCE(p->p_pledge);
if (ISSET(state, SS_DNS)) {
if (ISSET(pledge, PLEDGE_DNS))
@@ -1534,7 +1540,7 @@ pledge_flock(struct proc *p)
if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
return (0);
- if ((p->p_p->ps_pledge & PLEDGE_FLOCK))
+ if ((p->p_pledge & PLEDGE_FLOCK))
return (0);
return (pledge_fail(p, EPERM, PLEDGE_FLOCK));
}
@@ -1545,7 +1551,7 @@ pledge_swapctl(struct proc *p, int cmd)
if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
return (0);
- if (p->p_p->ps_pledge & PLEDGE_VMINFO) {
+ if (p->p_pledge & PLEDGE_VMINFO) {
switch (cmd) {
case SWAP_NSWAP:
case SWAP_STATS:
@@ -1580,7 +1586,7 @@ pledge_fcntl(struct proc *p, int cmd)
{
if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
return (0);
- if ((p->p_p->ps_pledge & PLEDGE_PROC) == 0 && cmd == F_SETOWN)
+ if ((p->p_pledge & PLEDGE_PROC) == 0 && cmd == F_SETOWN)
return pledge_fail(p, EPERM, PLEDGE_PROC);
return (0);
}
@@ -1590,7 +1596,7 @@ pledge_kill(struct proc *p, pid_t pid)
{
if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
return 0;
- if (p->p_p->ps_pledge & PLEDGE_PROC)
+ if (p->p_pledge & PLEDGE_PROC)
return 0;
if (pid == 0 || pid == p->p_p->ps_pid)
return 0;
@@ -1615,7 +1621,7 @@ pledge_protexec(struct proc *p, int prot
/* Before kbind(2) call, ld.so and crt may create EXEC mappings */
if (p->p_p->ps_kbind_addr == 0 && p->p_p->ps_kbind_cookie == 0)
return 0;
- if (!(p->p_p->ps_pledge & PLEDGE_PROTEXEC) && (prot & PROT_EXEC))
+ if (!(p->p_pledge & PLEDGE_PROTEXEC) && (prot & PROT_EXEC))
return pledge_fail(p, EPERM, PLEDGE_PROTEXEC);
return 0;
}
Index: sys/kern/vfs_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
diff -u -p -u -r1.372 vfs_syscalls.c
--- sys/kern/vfs_syscalls.c 29 Jan 2025 14:57:19 -0000 1.372
+++ sys/kern/vfs_syscalls.c 7 Feb 2025 14:36:44 -0000
@@ -1016,9 +1016,11 @@ sys_unveil(struct proc *p, void *v, regi
if (nd.ni_dvp && nd.ni_dvp != nd.ni_vp)
VOP_UNLOCK(nd.ni_dvp);
- if (allow)
+ if (allow) {
+ single_thread_set(p, SINGLE_UNWIND);
error = unveil_add(p, &nd, permissions);
- else
+ single_thread_clear(p);
+ } else
error = EPERM;
/* release vref from namei, but not vref from unveil_add */
Index: sys/dev/vmm/vmm.c
===================================================================
RCS file: /cvs/src/sys/dev/vmm/vmm.c,v
diff -u -p -u -r1.3 vmm.c
--- sys/dev/vmm/vmm.c 27 Aug 2024 09:16:03 -0000 1.3
+++ sys/dev/vmm/vmm.c 6 Feb 2025 19:14:14 -0000
@@ -184,7 +184,7 @@ vm_find(uint32_t id, struct vm **res)
* The managing vmm parent process can lookup all
* all VMs and is indicated by PLEDGE_PROC.
*/
- if (((p->p_p->ps_pledge &
+ if (((p->p_pledge &
(PLEDGE_VMM | PLEDGE_PROC)) == PLEDGE_VMM) &&
(vm->vm_creator_pid != p->p_p->ps_pid))
ret = EPERM;
@@ -291,7 +291,7 @@ pledge_ioctl_vmm(struct proc *p, long co
case VMM_IOC_INFO:
case VMM_IOC_SHAREMEM:
/* The "parent" process in vmd forks and manages VMs */
- if (p->p_p->ps_pledge & PLEDGE_PROC)
+ if (p->p_pledge & PLEDGE_PROC)
return (0);
break;
case VMM_IOC_TERM:
patch: relax ni_pledge panic