From: Claudio Jeker Subject: Re: patch: relax ni_pledge panic To: Theo de Raadt Cc: Mark Kettenis , semarie@kapouay.eu.org, tech@openbsd.org Date: Fri, 7 Feb 2025 17:53:44 +0100 On Fri, Feb 07, 2025 at 07:46:19AM -0700, Theo de Raadt wrote: > This makes a per-thread copy p_pledge of p_p->ps_pledge as we enter > each system call, and changes all the pledge checks to occur against > this per-thread variable. > > This means any system call that does multiple sleeps, will honour the > pledge promises at invocation time. Something expensive like open(), will > see a consistant value. > > I also propose we change unveil() to force single_thread_set interlock. > In the diff, there's two parts to this -- adding unveils, and deleting > the entire unveil setting when pledge drops all filesystem access. I like this, if this goes in many of those READ_ONCE wrappers can die since p_pledge can be accessed without fear of concurrent changes. I need to look closer at the single_thread_set calls. The one in sys_pledge looks ok since the call is at the tail end of the syscall. The sys_unveil one I need to double check when I have time. The problem with single_thread_set is that it can call exit1 or sleep which must happen in a spot that is safe. > Index: sys/sys/proc.h > =================================================================== > RCS file: /cvs/src/sys/sys/proc.h,v > diff -u -p -u -r1.378 proc.h > --- sys/sys/proc.h 25 Jan 2025 19:21:40 -0000 1.378 > +++ sys/sys/proc.h 7 Feb 2025 14:12:00 -0000 > @@ -395,6 +395,7 @@ struct proc { > u_char p_usrpri; /* [S] Priority based on p_estcpu & ps_nice */ > u_int p_estcpu; /* [S] Time averaged val of p_cpticks */ > int p_pledge_syscall; /* Cache of current syscall */ > + uint64_t p_pledge; /* [o] copy of p_p->ps_pledge */ > > struct ucred *p_ucred; /* [o] cached credentials */ > struct sigaltstack p_sigstk; /* sp & on stack state variable */ > Index: sys/kern/kern_event.c > =================================================================== > RCS file: /cvs/src/sys/kern/kern_event.c,v > diff -u -p -u -r1.200 kern_event.c > --- sys/kern/kern_event.c 6 Aug 2024 08:44:54 -0000 1.200 > +++ sys/kern/kern_event.c 6 Feb 2025 18:52:15 -0000 > @@ -343,7 +343,7 @@ filt_procattach(struct knote *kn) > int nolock; > > if ((curproc->p_p->ps_flags & PS_PLEDGE) && > - (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0) > + (curproc->p_pledge & PLEDGE_PROC) == 0) > return pledge_fail(curproc, EPERM, PLEDGE_PROC); > > if (kn->kn_id > PID_MAX) > Index: sys/kern/kern_pledge.c > =================================================================== > RCS file: /cvs/src/sys/kern/kern_pledge.c,v > diff -u -p -u -r1.321 kern_pledge.c > --- sys/kern/kern_pledge.c 6 Oct 2024 23:39:24 -0000 1.321 > +++ sys/kern/kern_pledge.c 7 Feb 2025 14:32:01 -0000 > @@ -451,7 +451,7 @@ sys_pledge(struct proc *p, void *v, regi > } */ *uap = v; > struct process *pr = p->p_p; > uint64_t promises, execpromises; > - int error; > + int error = 0; > int unveil_cleanup = 0; > > /* Check for any error in user input */ > @@ -481,16 +481,16 @@ sys_pledge(struct proc *p, void *v, regi > /* Only permit reductions */ > if (ISSET(pr->ps_flags, PS_PLEDGE) && > (((promises | pr->ps_pledge) != pr->ps_pledge))) { > - mtx_leave(&pr->ps_mtx); > - return (EPERM); > + error = EPERM; > + goto fail; > } > } > if (SCARG(uap, execpromises)) { > /* Only permit reductions */ > if (ISSET(pr->ps_flags, PS_EXECPLEDGE) && > (((execpromises | pr->ps_execpledge) != pr->ps_execpledge))) { > - mtx_leave(&pr->ps_mtx); > - return (EPERM); > + error = EPERM; > + goto fail; > } > } > > @@ -509,19 +509,24 @@ sys_pledge(struct proc *p, void *v, regi > atomic_setbits_int(&pr->ps_flags, PS_EXECPLEDGE); > } > > +fail: > mtx_leave(&pr->ps_mtx); > > if (unveil_cleanup) { > /* > * Kill off unveil and drop unveil vnode refs if we no > - * longer are holding any path-accessing pledge > + * longer are holding any path-accessing pledge, must > + * be done single-tread so that other sleeping syscalls > + * finish first before we change this. > */ > + single_thread_set(p, SINGLE_UNWIND); > KERNEL_LOCK(); > unveil_destroy(pr); > KERNEL_UNLOCK(); > + single_thread_clear(p); > } > > - return (0); > + return (error); > } > > int > @@ -536,7 +541,8 @@ pledge_syscall(struct proc *p, int code, > if (pledge_syscalls[code] == PLEDGE_ALWAYS) > return (0); > > - if (p->p_p->ps_pledge & pledge_syscalls[code]) > + p->p_pledge = p->p_p->ps_pledge; /* pledge checks are per-thread */ > + if (p->p_pledge & pledge_syscalls[code]) > return (0); > > *tval = pledge_syscalls[code]; > @@ -559,7 +565,7 @@ pledge_fail(struct proc *p, int error, u > if (KTRPOINT(p, KTR_PLEDGE)) > ktrpledge(p, error, code, p->p_pledge_syscall); > #endif > - if (p->p_p->ps_pledge & PLEDGE_ERROR) > + if (p->p_pledge & PLEDGE_ERROR) > return (ENOSYS); > > KERNEL_LOCK(); > @@ -593,7 +599,7 @@ pledge_namei(struct proc *p, struct name > if ((p->p_p->ps_flags & PS_PLEDGE) == 0 || > (p->p_p->ps_flags & PS_COREDUMP)) > return (0); > - pledge = READ_ONCE(p->p_p->ps_pledge); > + pledge = READ_ONCE(p->p_pledge); > > if (ni->ni_pledge == 0) > panic("pledge_namei: ni_pledge"); > @@ -729,7 +735,7 @@ pledge_namei(struct proc *p, struct name > > /* > * Ensure each flag of ni_pledge has counterpart allowing it in > - * ps_pledge. > + * p_pledge. > */ > if (ni->ni_pledge & ~pledge) > return (pledge_fail(p, EPERM, (ni->ni_pledge & ~pledge))); > @@ -748,7 +754,7 @@ pledge_recvfd(struct proc *p, struct fil > > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return (0); > - if ((p->p_p->ps_pledge & PLEDGE_RECVFD) == 0) > + if ((p->p_pledge & PLEDGE_RECVFD) == 0) > return pledge_fail(p, EPERM, PLEDGE_RECVFD); > > switch (fp->f_type) { > @@ -776,7 +782,7 @@ pledge_sendfd(struct proc *p, struct fil > > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return (0); > - if ((p->p_p->ps_pledge & PLEDGE_SENDFD) == 0) > + if ((p->p_pledge & PLEDGE_SENDFD) == 0) > return pledge_fail(p, EPERM, PLEDGE_SENDFD); > > switch (fp->f_type) { > @@ -804,7 +810,7 @@ pledge_sysctl(struct proc *p, int miblen > > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return (0); > - pledge = READ_ONCE(p->p_p->ps_pledge); > + pledge = READ_ONCE(p->p_pledge); > > if (new) > return pledge_fail(p, EFAULT, 0); > @@ -1010,7 +1016,7 @@ pledge_chown(struct proc *p, uid_t uid, > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return (0); > > - if (p->p_p->ps_pledge & PLEDGE_CHOWNUID) > + if (p->p_pledge & PLEDGE_CHOWNUID) > return (0); > > if (uid != -1 && uid != p->p_ucred->cr_uid) > @@ -1028,7 +1034,7 @@ pledge_adjtime(struct proc *p, const voi > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return (0); > > - if ((p->p_p->ps_pledge & PLEDGE_SETTIME)) > + if ((p->p_pledge & PLEDGE_SETTIME)) > return (0); > if (delta) > return (EPERM); > @@ -1041,7 +1047,7 @@ pledge_sendit(struct proc *p, const void > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return (0); > > - if ((p->p_p->ps_pledge & (PLEDGE_INET | PLEDGE_UNIX | PLEDGE_DNS))) > + if ((p->p_pledge & (PLEDGE_INET | PLEDGE_UNIX | PLEDGE_DNS))) > return (0); /* may use address */ > if (to == NULL) > return (0); /* behaves just like write */ > @@ -1057,7 +1063,7 @@ pledge_ioctl(struct proc *p, long com, s > > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return (0); > - pledge = READ_ONCE(p->p_p->ps_pledge); > + pledge = READ_ONCE(p->p_pledge); > > /* > * The ioctl's which are always allowed. > @@ -1365,7 +1371,7 @@ pledge_sockopt(struct proc *p, int set, > > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return (0); > - pledge = READ_ONCE(p->p_p->ps_pledge); > + pledge = READ_ONCE(p->p_pledge); > > /* Always allow these, which are too common to reject */ > switch (level) { > @@ -1502,7 +1508,7 @@ pledge_socket(struct proc *p, int domain > > if (!ISSET(p->p_p->ps_flags, PS_PLEDGE)) > return 0; > - pledge = READ_ONCE(p->p_p->ps_pledge); > + pledge = READ_ONCE(p->p_pledge); > > if (ISSET(state, SS_DNS)) { > if (ISSET(pledge, PLEDGE_DNS)) > @@ -1534,7 +1540,7 @@ pledge_flock(struct proc *p) > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return (0); > > - if ((p->p_p->ps_pledge & PLEDGE_FLOCK)) > + if ((p->p_pledge & PLEDGE_FLOCK)) > return (0); > return (pledge_fail(p, EPERM, PLEDGE_FLOCK)); > } > @@ -1545,7 +1551,7 @@ pledge_swapctl(struct proc *p, int cmd) > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return (0); > > - if (p->p_p->ps_pledge & PLEDGE_VMINFO) { > + if (p->p_pledge & PLEDGE_VMINFO) { > switch (cmd) { > case SWAP_NSWAP: > case SWAP_STATS: > @@ -1580,7 +1586,7 @@ pledge_fcntl(struct proc *p, int cmd) > { > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return (0); > - if ((p->p_p->ps_pledge & PLEDGE_PROC) == 0 && cmd == F_SETOWN) > + if ((p->p_pledge & PLEDGE_PROC) == 0 && cmd == F_SETOWN) > return pledge_fail(p, EPERM, PLEDGE_PROC); > return (0); > } > @@ -1590,7 +1596,7 @@ pledge_kill(struct proc *p, pid_t pid) > { > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return 0; > - if (p->p_p->ps_pledge & PLEDGE_PROC) > + if (p->p_pledge & PLEDGE_PROC) > return 0; > if (pid == 0 || pid == p->p_p->ps_pid) > return 0; > @@ -1615,7 +1621,7 @@ pledge_protexec(struct proc *p, int prot > /* Before kbind(2) call, ld.so and crt may create EXEC mappings */ > if (p->p_p->ps_kbind_addr == 0 && p->p_p->ps_kbind_cookie == 0) > return 0; > - if (!(p->p_p->ps_pledge & PLEDGE_PROTEXEC) && (prot & PROT_EXEC)) > + if (!(p->p_pledge & PLEDGE_PROTEXEC) && (prot & PROT_EXEC)) > return pledge_fail(p, EPERM, PLEDGE_PROTEXEC); > return 0; > } > Index: sys/kern/vfs_syscalls.c > =================================================================== > RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v > diff -u -p -u -r1.372 vfs_syscalls.c > --- sys/kern/vfs_syscalls.c 29 Jan 2025 14:57:19 -0000 1.372 > +++ sys/kern/vfs_syscalls.c 7 Feb 2025 14:36:44 -0000 > @@ -1016,9 +1016,11 @@ sys_unveil(struct proc *p, void *v, regi > if (nd.ni_dvp && nd.ni_dvp != nd.ni_vp) > VOP_UNLOCK(nd.ni_dvp); > > - if (allow) > + if (allow) { > + single_thread_set(p, SINGLE_UNWIND); > error = unveil_add(p, &nd, permissions); > - else > + single_thread_clear(p); > + } else > error = EPERM; > > /* release vref from namei, but not vref from unveil_add */ > Index: sys/dev/vmm/vmm.c > =================================================================== > RCS file: /cvs/src/sys/dev/vmm/vmm.c,v > diff -u -p -u -r1.3 vmm.c > --- sys/dev/vmm/vmm.c 27 Aug 2024 09:16:03 -0000 1.3 > +++ sys/dev/vmm/vmm.c 6 Feb 2025 19:14:14 -0000 > @@ -184,7 +184,7 @@ vm_find(uint32_t id, struct vm **res) > * The managing vmm parent process can lookup all > * all VMs and is indicated by PLEDGE_PROC. > */ > - if (((p->p_p->ps_pledge & > + if (((p->p_pledge & > (PLEDGE_VMM | PLEDGE_PROC)) == PLEDGE_VMM) && > (vm->vm_creator_pid != p->p_p->ps_pid)) > ret = EPERM; > @@ -291,7 +291,7 @@ pledge_ioctl_vmm(struct proc *p, long co > case VMM_IOC_INFO: > case VMM_IOC_SHAREMEM: > /* The "parent" process in vmd forks and manages VMs */ > - if (p->p_p->ps_pledge & PLEDGE_PROC) > + if (p->p_pledge & PLEDGE_PROC) > return (0); > break; > case VMM_IOC_TERM: > -- :wq Claudio