Download raw body.
patch: relax ni_pledge panic
On Fri, Feb 07, 2025 at 07:46:19AM -0700, Theo de Raadt wrote:
> This makes a per-thread copy p_pledge of p_p->ps_pledge as we enter
> each system call, and changes all the pledge checks to occur against
> this per-thread variable.
>
> This means any system call that does multiple sleeps, will honour the
> pledge promises at invocation time. Something expensive like open(), will
> see a consistant value.
>
> I also propose we change unveil() to force single_thread_set interlock.
> In the diff, there's two parts to this -- adding unveils, and deleting
> the entire unveil setting when pledge drops all filesystem access.
I like this, if this goes in many of those READ_ONCE wrappers can die
since p_pledge can be accessed without fear of concurrent changes.
I need to look closer at the single_thread_set calls.
The one in sys_pledge looks ok since the call is at the tail end of the
syscall. The sys_unveil one I need to double check when I have time.
The problem with single_thread_set is that it can call exit1 or sleep
which must happen in a spot that is safe.
> Index: sys/sys/proc.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/proc.h,v
> diff -u -p -u -r1.378 proc.h
> --- sys/sys/proc.h 25 Jan 2025 19:21:40 -0000 1.378
> +++ sys/sys/proc.h 7 Feb 2025 14:12:00 -0000
> @@ -395,6 +395,7 @@ struct proc {
> u_char p_usrpri; /* [S] Priority based on p_estcpu & ps_nice */
> u_int p_estcpu; /* [S] Time averaged val of p_cpticks */
> int p_pledge_syscall; /* Cache of current syscall */
> + uint64_t p_pledge; /* [o] copy of p_p->ps_pledge */
>
> struct ucred *p_ucred; /* [o] cached credentials */
> struct sigaltstack p_sigstk; /* sp & on stack state variable */
> Index: sys/kern/kern_event.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_event.c,v
> diff -u -p -u -r1.200 kern_event.c
> --- sys/kern/kern_event.c 6 Aug 2024 08:44:54 -0000 1.200
> +++ sys/kern/kern_event.c 6 Feb 2025 18:52:15 -0000
> @@ -343,7 +343,7 @@ filt_procattach(struct knote *kn)
> int nolock;
>
> if ((curproc->p_p->ps_flags & PS_PLEDGE) &&
> - (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0)
> + (curproc->p_pledge & PLEDGE_PROC) == 0)
> return pledge_fail(curproc, EPERM, PLEDGE_PROC);
>
> if (kn->kn_id > PID_MAX)
> Index: sys/kern/kern_pledge.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_pledge.c,v
> diff -u -p -u -r1.321 kern_pledge.c
> --- sys/kern/kern_pledge.c 6 Oct 2024 23:39:24 -0000 1.321
> +++ sys/kern/kern_pledge.c 7 Feb 2025 14:32:01 -0000
> @@ -451,7 +451,7 @@ sys_pledge(struct proc *p, void *v, regi
> } */ *uap = v;
> struct process *pr = p->p_p;
> uint64_t promises, execpromises;
> - int error;
> + int error = 0;
> int unveil_cleanup = 0;
>
> /* Check for any error in user input */
> @@ -481,16 +481,16 @@ sys_pledge(struct proc *p, void *v, regi
> /* Only permit reductions */
> if (ISSET(pr->ps_flags, PS_PLEDGE) &&
> (((promises | pr->ps_pledge) != pr->ps_pledge))) {
> - mtx_leave(&pr->ps_mtx);
> - return (EPERM);
> + error = EPERM;
> + goto fail;
> }
> }
> if (SCARG(uap, execpromises)) {
> /* Only permit reductions */
> if (ISSET(pr->ps_flags, PS_EXECPLEDGE) &&
> (((execpromises | pr->ps_execpledge) != pr->ps_execpledge))) {
> - mtx_leave(&pr->ps_mtx);
> - return (EPERM);
> + error = EPERM;
> + goto fail;
> }
> }
>
> @@ -509,19 +509,24 @@ sys_pledge(struct proc *p, void *v, regi
> atomic_setbits_int(&pr->ps_flags, PS_EXECPLEDGE);
> }
>
> +fail:
> mtx_leave(&pr->ps_mtx);
>
> if (unveil_cleanup) {
> /*
> * Kill off unveil and drop unveil vnode refs if we no
> - * longer are holding any path-accessing pledge
> + * longer are holding any path-accessing pledge, must
> + * be done single-tread so that other sleeping syscalls
> + * finish first before we change this.
> */
> + single_thread_set(p, SINGLE_UNWIND);
> KERNEL_LOCK();
> unveil_destroy(pr);
> KERNEL_UNLOCK();
> + single_thread_clear(p);
> }
>
> - return (0);
> + return (error);
> }
>
> int
> @@ -536,7 +541,8 @@ pledge_syscall(struct proc *p, int code,
> if (pledge_syscalls[code] == PLEDGE_ALWAYS)
> return (0);
>
> - if (p->p_p->ps_pledge & pledge_syscalls[code])
> + p->p_pledge = p->p_p->ps_pledge; /* pledge checks are per-thread */
> + if (p->p_pledge & pledge_syscalls[code])
> return (0);
>
> *tval = pledge_syscalls[code];
> @@ -559,7 +565,7 @@ pledge_fail(struct proc *p, int error, u
> if (KTRPOINT(p, KTR_PLEDGE))
> ktrpledge(p, error, code, p->p_pledge_syscall);
> #endif
> - if (p->p_p->ps_pledge & PLEDGE_ERROR)
> + if (p->p_pledge & PLEDGE_ERROR)
> return (ENOSYS);
>
> KERNEL_LOCK();
> @@ -593,7 +599,7 @@ pledge_namei(struct proc *p, struct name
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0 ||
> (p->p_p->ps_flags & PS_COREDUMP))
> return (0);
> - pledge = READ_ONCE(p->p_p->ps_pledge);
> + pledge = READ_ONCE(p->p_pledge);
>
> if (ni->ni_pledge == 0)
> panic("pledge_namei: ni_pledge");
> @@ -729,7 +735,7 @@ pledge_namei(struct proc *p, struct name
>
> /*
> * Ensure each flag of ni_pledge has counterpart allowing it in
> - * ps_pledge.
> + * p_pledge.
> */
> if (ni->ni_pledge & ~pledge)
> return (pledge_fail(p, EPERM, (ni->ni_pledge & ~pledge)));
> @@ -748,7 +754,7 @@ pledge_recvfd(struct proc *p, struct fil
>
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return (0);
> - if ((p->p_p->ps_pledge & PLEDGE_RECVFD) == 0)
> + if ((p->p_pledge & PLEDGE_RECVFD) == 0)
> return pledge_fail(p, EPERM, PLEDGE_RECVFD);
>
> switch (fp->f_type) {
> @@ -776,7 +782,7 @@ pledge_sendfd(struct proc *p, struct fil
>
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return (0);
> - if ((p->p_p->ps_pledge & PLEDGE_SENDFD) == 0)
> + if ((p->p_pledge & PLEDGE_SENDFD) == 0)
> return pledge_fail(p, EPERM, PLEDGE_SENDFD);
>
> switch (fp->f_type) {
> @@ -804,7 +810,7 @@ pledge_sysctl(struct proc *p, int miblen
>
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return (0);
> - pledge = READ_ONCE(p->p_p->ps_pledge);
> + pledge = READ_ONCE(p->p_pledge);
>
> if (new)
> return pledge_fail(p, EFAULT, 0);
> @@ -1010,7 +1016,7 @@ pledge_chown(struct proc *p, uid_t uid,
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return (0);
>
> - if (p->p_p->ps_pledge & PLEDGE_CHOWNUID)
> + if (p->p_pledge & PLEDGE_CHOWNUID)
> return (0);
>
> if (uid != -1 && uid != p->p_ucred->cr_uid)
> @@ -1028,7 +1034,7 @@ pledge_adjtime(struct proc *p, const voi
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return (0);
>
> - if ((p->p_p->ps_pledge & PLEDGE_SETTIME))
> + if ((p->p_pledge & PLEDGE_SETTIME))
> return (0);
> if (delta)
> return (EPERM);
> @@ -1041,7 +1047,7 @@ pledge_sendit(struct proc *p, const void
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return (0);
>
> - if ((p->p_p->ps_pledge & (PLEDGE_INET | PLEDGE_UNIX | PLEDGE_DNS)))
> + if ((p->p_pledge & (PLEDGE_INET | PLEDGE_UNIX | PLEDGE_DNS)))
> return (0); /* may use address */
> if (to == NULL)
> return (0); /* behaves just like write */
> @@ -1057,7 +1063,7 @@ pledge_ioctl(struct proc *p, long com, s
>
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return (0);
> - pledge = READ_ONCE(p->p_p->ps_pledge);
> + pledge = READ_ONCE(p->p_pledge);
>
> /*
> * The ioctl's which are always allowed.
> @@ -1365,7 +1371,7 @@ pledge_sockopt(struct proc *p, int set,
>
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return (0);
> - pledge = READ_ONCE(p->p_p->ps_pledge);
> + pledge = READ_ONCE(p->p_pledge);
>
> /* Always allow these, which are too common to reject */
> switch (level) {
> @@ -1502,7 +1508,7 @@ pledge_socket(struct proc *p, int domain
>
> if (!ISSET(p->p_p->ps_flags, PS_PLEDGE))
> return 0;
> - pledge = READ_ONCE(p->p_p->ps_pledge);
> + pledge = READ_ONCE(p->p_pledge);
>
> if (ISSET(state, SS_DNS)) {
> if (ISSET(pledge, PLEDGE_DNS))
> @@ -1534,7 +1540,7 @@ pledge_flock(struct proc *p)
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return (0);
>
> - if ((p->p_p->ps_pledge & PLEDGE_FLOCK))
> + if ((p->p_pledge & PLEDGE_FLOCK))
> return (0);
> return (pledge_fail(p, EPERM, PLEDGE_FLOCK));
> }
> @@ -1545,7 +1551,7 @@ pledge_swapctl(struct proc *p, int cmd)
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return (0);
>
> - if (p->p_p->ps_pledge & PLEDGE_VMINFO) {
> + if (p->p_pledge & PLEDGE_VMINFO) {
> switch (cmd) {
> case SWAP_NSWAP:
> case SWAP_STATS:
> @@ -1580,7 +1586,7 @@ pledge_fcntl(struct proc *p, int cmd)
> {
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return (0);
> - if ((p->p_p->ps_pledge & PLEDGE_PROC) == 0 && cmd == F_SETOWN)
> + if ((p->p_pledge & PLEDGE_PROC) == 0 && cmd == F_SETOWN)
> return pledge_fail(p, EPERM, PLEDGE_PROC);
> return (0);
> }
> @@ -1590,7 +1596,7 @@ pledge_kill(struct proc *p, pid_t pid)
> {
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return 0;
> - if (p->p_p->ps_pledge & PLEDGE_PROC)
> + if (p->p_pledge & PLEDGE_PROC)
> return 0;
> if (pid == 0 || pid == p->p_p->ps_pid)
> return 0;
> @@ -1615,7 +1621,7 @@ pledge_protexec(struct proc *p, int prot
> /* Before kbind(2) call, ld.so and crt may create EXEC mappings */
> if (p->p_p->ps_kbind_addr == 0 && p->p_p->ps_kbind_cookie == 0)
> return 0;
> - if (!(p->p_p->ps_pledge & PLEDGE_PROTEXEC) && (prot & PROT_EXEC))
> + if (!(p->p_pledge & PLEDGE_PROTEXEC) && (prot & PROT_EXEC))
> return pledge_fail(p, EPERM, PLEDGE_PROTEXEC);
> return 0;
> }
> Index: sys/kern/vfs_syscalls.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
> diff -u -p -u -r1.372 vfs_syscalls.c
> --- sys/kern/vfs_syscalls.c 29 Jan 2025 14:57:19 -0000 1.372
> +++ sys/kern/vfs_syscalls.c 7 Feb 2025 14:36:44 -0000
> @@ -1016,9 +1016,11 @@ sys_unveil(struct proc *p, void *v, regi
> if (nd.ni_dvp && nd.ni_dvp != nd.ni_vp)
> VOP_UNLOCK(nd.ni_dvp);
>
> - if (allow)
> + if (allow) {
> + single_thread_set(p, SINGLE_UNWIND);
> error = unveil_add(p, &nd, permissions);
> - else
> + single_thread_clear(p);
> + } else
> error = EPERM;
>
> /* release vref from namei, but not vref from unveil_add */
> Index: sys/dev/vmm/vmm.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/vmm/vmm.c,v
> diff -u -p -u -r1.3 vmm.c
> --- sys/dev/vmm/vmm.c 27 Aug 2024 09:16:03 -0000 1.3
> +++ sys/dev/vmm/vmm.c 6 Feb 2025 19:14:14 -0000
> @@ -184,7 +184,7 @@ vm_find(uint32_t id, struct vm **res)
> * The managing vmm parent process can lookup all
> * all VMs and is indicated by PLEDGE_PROC.
> */
> - if (((p->p_p->ps_pledge &
> + if (((p->p_pledge &
> (PLEDGE_VMM | PLEDGE_PROC)) == PLEDGE_VMM) &&
> (vm->vm_creator_pid != p->p_p->ps_pid))
> ret = EPERM;
> @@ -291,7 +291,7 @@ pledge_ioctl_vmm(struct proc *p, long co
> case VMM_IOC_INFO:
> case VMM_IOC_SHAREMEM:
> /* The "parent" process in vmd forks and manages VMs */
> - if (p->p_p->ps_pledge & PLEDGE_PROC)
> + if (p->p_pledge & PLEDGE_PROC)
> return (0);
> break;
> case VMM_IOC_TERM:
>
--
:wq Claudio
patch: relax ni_pledge panic