From: "Theo de Raadt" Subject: Re: patch: relax ni_pledge panic Cc: Mark Kettenis , semarie@kapouay.eu.org, tech@openbsd.org Date: Fri, 07 Feb 2025 07:46:19 -0700 This makes a per-thread copy p_pledge of p_p->ps_pledge as we enter each system call, and changes all the pledge checks to occur against this per-thread variable. This means any system call that does multiple sleeps, will honour the pledge promises at invocation time. Something expensive like open(), will see a consistant value. I also propose we change unveil() to force single_thread_set interlock. In the diff, there's two parts to this -- adding unveils, and deleting the entire unveil setting when pledge drops all filesystem access. Index: sys/sys/proc.h =================================================================== RCS file: /cvs/src/sys/sys/proc.h,v diff -u -p -u -r1.378 proc.h --- sys/sys/proc.h 25 Jan 2025 19:21:40 -0000 1.378 +++ sys/sys/proc.h 7 Feb 2025 14:12:00 -0000 @@ -395,6 +395,7 @@ struct proc { u_char p_usrpri; /* [S] Priority based on p_estcpu & ps_nice */ u_int p_estcpu; /* [S] Time averaged val of p_cpticks */ int p_pledge_syscall; /* Cache of current syscall */ + uint64_t p_pledge; /* [o] copy of p_p->ps_pledge */ struct ucred *p_ucred; /* [o] cached credentials */ struct sigaltstack p_sigstk; /* sp & on stack state variable */ Index: sys/kern/kern_event.c =================================================================== RCS file: /cvs/src/sys/kern/kern_event.c,v diff -u -p -u -r1.200 kern_event.c --- sys/kern/kern_event.c 6 Aug 2024 08:44:54 -0000 1.200 +++ sys/kern/kern_event.c 6 Feb 2025 18:52:15 -0000 @@ -343,7 +343,7 @@ filt_procattach(struct knote *kn) int nolock; if ((curproc->p_p->ps_flags & PS_PLEDGE) && - (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0) + (curproc->p_pledge & PLEDGE_PROC) == 0) return pledge_fail(curproc, EPERM, PLEDGE_PROC); if (kn->kn_id > PID_MAX) Index: sys/kern/kern_pledge.c =================================================================== RCS file: /cvs/src/sys/kern/kern_pledge.c,v diff -u -p -u -r1.321 kern_pledge.c --- sys/kern/kern_pledge.c 6 Oct 2024 23:39:24 -0000 1.321 +++ sys/kern/kern_pledge.c 7 Feb 2025 14:32:01 -0000 @@ -451,7 +451,7 @@ sys_pledge(struct proc *p, void *v, regi } */ *uap = v; struct process *pr = p->p_p; uint64_t promises, execpromises; - int error; + int error = 0; int unveil_cleanup = 0; /* Check for any error in user input */ @@ -481,16 +481,16 @@ sys_pledge(struct proc *p, void *v, regi /* Only permit reductions */ if (ISSET(pr->ps_flags, PS_PLEDGE) && (((promises | pr->ps_pledge) != pr->ps_pledge))) { - mtx_leave(&pr->ps_mtx); - return (EPERM); + error = EPERM; + goto fail; } } if (SCARG(uap, execpromises)) { /* Only permit reductions */ if (ISSET(pr->ps_flags, PS_EXECPLEDGE) && (((execpromises | pr->ps_execpledge) != pr->ps_execpledge))) { - mtx_leave(&pr->ps_mtx); - return (EPERM); + error = EPERM; + goto fail; } } @@ -509,19 +509,24 @@ sys_pledge(struct proc *p, void *v, regi atomic_setbits_int(&pr->ps_flags, PS_EXECPLEDGE); } +fail: mtx_leave(&pr->ps_mtx); if (unveil_cleanup) { /* * Kill off unveil and drop unveil vnode refs if we no - * longer are holding any path-accessing pledge + * longer are holding any path-accessing pledge, must + * be done single-tread so that other sleeping syscalls + * finish first before we change this. */ + single_thread_set(p, SINGLE_UNWIND); KERNEL_LOCK(); unveil_destroy(pr); KERNEL_UNLOCK(); + single_thread_clear(p); } - return (0); + return (error); } int @@ -536,7 +541,8 @@ pledge_syscall(struct proc *p, int code, if (pledge_syscalls[code] == PLEDGE_ALWAYS) return (0); - if (p->p_p->ps_pledge & pledge_syscalls[code]) + p->p_pledge = p->p_p->ps_pledge; /* pledge checks are per-thread */ + if (p->p_pledge & pledge_syscalls[code]) return (0); *tval = pledge_syscalls[code]; @@ -559,7 +565,7 @@ pledge_fail(struct proc *p, int error, u if (KTRPOINT(p, KTR_PLEDGE)) ktrpledge(p, error, code, p->p_pledge_syscall); #endif - if (p->p_p->ps_pledge & PLEDGE_ERROR) + if (p->p_pledge & PLEDGE_ERROR) return (ENOSYS); KERNEL_LOCK(); @@ -593,7 +599,7 @@ pledge_namei(struct proc *p, struct name if ((p->p_p->ps_flags & PS_PLEDGE) == 0 || (p->p_p->ps_flags & PS_COREDUMP)) return (0); - pledge = READ_ONCE(p->p_p->ps_pledge); + pledge = READ_ONCE(p->p_pledge); if (ni->ni_pledge == 0) panic("pledge_namei: ni_pledge"); @@ -729,7 +735,7 @@ pledge_namei(struct proc *p, struct name /* * Ensure each flag of ni_pledge has counterpart allowing it in - * ps_pledge. + * p_pledge. */ if (ni->ni_pledge & ~pledge) return (pledge_fail(p, EPERM, (ni->ni_pledge & ~pledge))); @@ -748,7 +754,7 @@ pledge_recvfd(struct proc *p, struct fil if ((p->p_p->ps_flags & PS_PLEDGE) == 0) return (0); - if ((p->p_p->ps_pledge & PLEDGE_RECVFD) == 0) + if ((p->p_pledge & PLEDGE_RECVFD) == 0) return pledge_fail(p, EPERM, PLEDGE_RECVFD); switch (fp->f_type) { @@ -776,7 +782,7 @@ pledge_sendfd(struct proc *p, struct fil if ((p->p_p->ps_flags & PS_PLEDGE) == 0) return (0); - if ((p->p_p->ps_pledge & PLEDGE_SENDFD) == 0) + if ((p->p_pledge & PLEDGE_SENDFD) == 0) return pledge_fail(p, EPERM, PLEDGE_SENDFD); switch (fp->f_type) { @@ -804,7 +810,7 @@ pledge_sysctl(struct proc *p, int miblen if ((p->p_p->ps_flags & PS_PLEDGE) == 0) return (0); - pledge = READ_ONCE(p->p_p->ps_pledge); + pledge = READ_ONCE(p->p_pledge); if (new) return pledge_fail(p, EFAULT, 0); @@ -1010,7 +1016,7 @@ pledge_chown(struct proc *p, uid_t uid, if ((p->p_p->ps_flags & PS_PLEDGE) == 0) return (0); - if (p->p_p->ps_pledge & PLEDGE_CHOWNUID) + if (p->p_pledge & PLEDGE_CHOWNUID) return (0); if (uid != -1 && uid != p->p_ucred->cr_uid) @@ -1028,7 +1034,7 @@ pledge_adjtime(struct proc *p, const voi if ((p->p_p->ps_flags & PS_PLEDGE) == 0) return (0); - if ((p->p_p->ps_pledge & PLEDGE_SETTIME)) + if ((p->p_pledge & PLEDGE_SETTIME)) return (0); if (delta) return (EPERM); @@ -1041,7 +1047,7 @@ pledge_sendit(struct proc *p, const void if ((p->p_p->ps_flags & PS_PLEDGE) == 0) return (0); - if ((p->p_p->ps_pledge & (PLEDGE_INET | PLEDGE_UNIX | PLEDGE_DNS))) + if ((p->p_pledge & (PLEDGE_INET | PLEDGE_UNIX | PLEDGE_DNS))) return (0); /* may use address */ if (to == NULL) return (0); /* behaves just like write */ @@ -1057,7 +1063,7 @@ pledge_ioctl(struct proc *p, long com, s if ((p->p_p->ps_flags & PS_PLEDGE) == 0) return (0); - pledge = READ_ONCE(p->p_p->ps_pledge); + pledge = READ_ONCE(p->p_pledge); /* * The ioctl's which are always allowed. @@ -1365,7 +1371,7 @@ pledge_sockopt(struct proc *p, int set, if ((p->p_p->ps_flags & PS_PLEDGE) == 0) return (0); - pledge = READ_ONCE(p->p_p->ps_pledge); + pledge = READ_ONCE(p->p_pledge); /* Always allow these, which are too common to reject */ switch (level) { @@ -1502,7 +1508,7 @@ pledge_socket(struct proc *p, int domain if (!ISSET(p->p_p->ps_flags, PS_PLEDGE)) return 0; - pledge = READ_ONCE(p->p_p->ps_pledge); + pledge = READ_ONCE(p->p_pledge); if (ISSET(state, SS_DNS)) { if (ISSET(pledge, PLEDGE_DNS)) @@ -1534,7 +1540,7 @@ pledge_flock(struct proc *p) if ((p->p_p->ps_flags & PS_PLEDGE) == 0) return (0); - if ((p->p_p->ps_pledge & PLEDGE_FLOCK)) + if ((p->p_pledge & PLEDGE_FLOCK)) return (0); return (pledge_fail(p, EPERM, PLEDGE_FLOCK)); } @@ -1545,7 +1551,7 @@ pledge_swapctl(struct proc *p, int cmd) if ((p->p_p->ps_flags & PS_PLEDGE) == 0) return (0); - if (p->p_p->ps_pledge & PLEDGE_VMINFO) { + if (p->p_pledge & PLEDGE_VMINFO) { switch (cmd) { case SWAP_NSWAP: case SWAP_STATS: @@ -1580,7 +1586,7 @@ pledge_fcntl(struct proc *p, int cmd) { if ((p->p_p->ps_flags & PS_PLEDGE) == 0) return (0); - if ((p->p_p->ps_pledge & PLEDGE_PROC) == 0 && cmd == F_SETOWN) + if ((p->p_pledge & PLEDGE_PROC) == 0 && cmd == F_SETOWN) return pledge_fail(p, EPERM, PLEDGE_PROC); return (0); } @@ -1590,7 +1596,7 @@ pledge_kill(struct proc *p, pid_t pid) { if ((p->p_p->ps_flags & PS_PLEDGE) == 0) return 0; - if (p->p_p->ps_pledge & PLEDGE_PROC) + if (p->p_pledge & PLEDGE_PROC) return 0; if (pid == 0 || pid == p->p_p->ps_pid) return 0; @@ -1615,7 +1621,7 @@ pledge_protexec(struct proc *p, int prot /* Before kbind(2) call, ld.so and crt may create EXEC mappings */ if (p->p_p->ps_kbind_addr == 0 && p->p_p->ps_kbind_cookie == 0) return 0; - if (!(p->p_p->ps_pledge & PLEDGE_PROTEXEC) && (prot & PROT_EXEC)) + if (!(p->p_pledge & PLEDGE_PROTEXEC) && (prot & PROT_EXEC)) return pledge_fail(p, EPERM, PLEDGE_PROTEXEC); return 0; } Index: sys/kern/vfs_syscalls.c =================================================================== RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v diff -u -p -u -r1.372 vfs_syscalls.c --- sys/kern/vfs_syscalls.c 29 Jan 2025 14:57:19 -0000 1.372 +++ sys/kern/vfs_syscalls.c 7 Feb 2025 14:36:44 -0000 @@ -1016,9 +1016,11 @@ sys_unveil(struct proc *p, void *v, regi if (nd.ni_dvp && nd.ni_dvp != nd.ni_vp) VOP_UNLOCK(nd.ni_dvp); - if (allow) + if (allow) { + single_thread_set(p, SINGLE_UNWIND); error = unveil_add(p, &nd, permissions); - else + single_thread_clear(p); + } else error = EPERM; /* release vref from namei, but not vref from unveil_add */ Index: sys/dev/vmm/vmm.c =================================================================== RCS file: /cvs/src/sys/dev/vmm/vmm.c,v diff -u -p -u -r1.3 vmm.c --- sys/dev/vmm/vmm.c 27 Aug 2024 09:16:03 -0000 1.3 +++ sys/dev/vmm/vmm.c 6 Feb 2025 19:14:14 -0000 @@ -184,7 +184,7 @@ vm_find(uint32_t id, struct vm **res) * The managing vmm parent process can lookup all * all VMs and is indicated by PLEDGE_PROC. */ - if (((p->p_p->ps_pledge & + if (((p->p_pledge & (PLEDGE_VMM | PLEDGE_PROC)) == PLEDGE_VMM) && (vm->vm_creator_pid != p->p_p->ps_pid)) ret = EPERM; @@ -291,7 +291,7 @@ pledge_ioctl_vmm(struct proc *p, long co case VMM_IOC_INFO: case VMM_IOC_SHAREMEM: /* The "parent" process in vmd forks and manages VMs */ - if (p->p_p->ps_pledge & PLEDGE_PROC) + if (p->p_pledge & PLEDGE_PROC) return (0); break; case VMM_IOC_TERM: