From: Philip Guenther Subject: Re: [PATCH]: Add POSIX O_CLOFORK flag To: Ricardo Branco Cc: tech@openbsd.org Date: Sat, 21 Jun 2025 17:35:17 -0700 On Sat, Jun 21, 2025 at 4:44 PM Philip Guenther wrote: > > On Sat, Jun 21, 2025 at 4:04 PM Ricardo Branco wrote: > > > > This initial patch adds support for POSIX O_CLOFORK (close-on-fork) flag. > > > > If there's interest, I can update manpages and fill the TODO list in the PR: > > https://github.com/openbsd/src/pull/46 > > > > I uploaded the full test-suite from Illumos adapted to OpenBSD there. > > > > Work also being done to add this flag on: > > > > - FreeBSD: https://github.com/freebsd/freebsd-src/pull/1698 > > - DragonflyBSD: https://github.com/DragonFlyBSD/DragonFlyBSD/pull/28 > > > > The discussion for adding this flag was done in the FreeBSD PR. > > Nope. I implemented this myself last summer, but after Damien Miller > suggest that OpenSSH would want to clear the flag on inherited fds we > decided the specified behavior of O_CLOFORK being inherited across > exec is insecure, unnecessary for purpose, and kinda insane. I opened > a ticket with austin group: > https://austingroupbugs.net/view.php?id=1851 > > Geoff Clare was going to reach out to other implementations to get > feedback but nothing has happened since. > > Maybe we should say that more than 10 months was sufficient for > austin-group to address a potential security issue, in which case I'll > rebase my diff, but with clearing the flag on exec because WTH were > they thinking. Rebased diff, with cleared-on-exec behavior, attached, in case you want to play with it, Richardo. Regress tests would be wonderful :) Philip Index: sys/sys/fcntl.h =================================================================== RCS file: /data/src/openbsd/src/sys/sys/fcntl.h,v diff -u -p -r1.22 fcntl.h --- sys/sys/fcntl.h 21 Jan 2019 18:09:21 -0000 1.22 +++ sys/sys/fcntl.h 22 Jun 2025 00:08:45 -0000 @@ -83,22 +83,24 @@ #define O_EXLOCK 0x0020 /* open with exclusive file lock */ #define O_ASYNC 0x0040 /* signal pgrp when data ready */ #define O_FSYNC 0x0080 /* backwards compatibility */ -#define O_NOFOLLOW 0x0100 /* if path is a symlink, don't follow */ #endif #if __POSIX_VISIBLE >= 199309 || __XPG_VISIBLE >= 420 #define O_SYNC 0x0080 /* synchronous writes */ -#endif -#define O_CREAT 0x0200 /* create if nonexistent */ -#define O_TRUNC 0x0400 /* truncate to zero length */ -#define O_EXCL 0x0800 /* error if already exists */ - /* - * POSIX 1003.1 specifies a higher granularity for synchronous operations + * POSIX 1003.1 permits a higher granularity for synchronous operations * than we support. Since synchronicity is all or nothing in OpenBSD * we just define these to be the same as O_SYNC. */ #define O_DSYNC O_SYNC /* synchronous data writes */ #define O_RSYNC O_SYNC /* synchronous reads */ +#endif + +/* defined by POSIX Issue 7 */ +#define O_NOFOLLOW 0x0100 /* if path is a symlink, don't follow */ + +#define O_CREAT 0x0200 /* create if nonexistent */ +#define O_TRUNC 0x0400 /* truncate to zero length */ +#define O_EXCL 0x0800 /* error if already exists */ /* defined by POSIX 1003.1; BSD default, this bit is not required */ #define O_NOCTTY 0x8000 /* don't assign controlling terminal */ @@ -107,6 +109,9 @@ #define O_CLOEXEC 0x10000 /* atomically set FD_CLOEXEC */ #define O_DIRECTORY 0x20000 /* fail if not a directory */ +/* defined by POSIX Issue 8 */ +#define O_CLOFORK 0x40000 /* atomically set FD_CLOFORK */ + #ifdef _KERNEL /* * convert from open() flags to/from fflags; convert O_RD/WR to FREAD/FWRITE. @@ -158,9 +163,30 @@ #if __BSD_VISIBLE #define F_ISATTY 11 /* used by isatty(3) */ #endif +#if __POSIX_VISIBLE >= 202405 +#define F_DUPFD_CLOFORK 12 /* duplicate with FD_CLOFORK set */ +#if __not_yet +#define F_GETOWN_EX 13 /* get SIGIO/SIGURG proc/pgrp */ +#define F_SETOWN_EX 14 /* set SIGIO/SIGURG proc/pgrp */ +#define F_OFD_GETLK 15 /* get flock-style locking info */ +#define F_OFD_SETLK 16 /* set flock-style locking info */ +#define F_OFD_SETLKW 17 /* F_OFD_SETLK; wait if blocked */ + +struct f_owner_ex { + int type; + pid_t pid; +}; +/* type values */ +#define F_OWNER_PID 1 /* pid is process ID */ +#define F_OWNER_PGRP 2 /* pid is process group ID */ +#endif /* __not_yet */ +#endif /* file descriptor flags (F_GETFD, F_SETFD) */ #define FD_CLOEXEC 1 /* close-on-exec flag */ +#if __POSIX_VISIBLE >= 202405 +#define FD_CLOFORK 4 /* close-on-fork flag */ +#endif /* record locking flags (F_GETLK, F_SETLK, F_SETLKW) */ #define F_RDLCK 1 /* shared or read lock */ Index: sys/sys/filedesc.h =================================================================== RCS file: /data/src/openbsd/src/sys/sys/filedesc.h,v diff -u -p -r1.47 filedesc.h --- sys/sys/filedesc.h 10 May 2025 09:44:39 -0000 1.47 +++ sys/sys/filedesc.h 22 Jun 2025 00:08:45 -0000 @@ -115,6 +115,7 @@ struct filedesc0 { */ #define UF_EXCLOSE 0x01 /* auto-close on exec */ #define UF_PLEDGED 0x02 /* open after pledge(2) */ +#define UF_FORKCLOSE 0x04 /* auto-close on fork */ /* * Flags on the file descriptor table. @@ -143,7 +144,7 @@ void fdfree(struct proc *p); int fdrelease(struct proc *p, int); void fdinsert(struct filedesc *, int, int, struct file *); void fdremove(struct filedesc *, int); -void fdcloseexec(struct proc *); +void fdprepforexec(struct proc *); struct file *fd_iterfile(struct file *, struct proc *); struct file *fd_getfile(struct filedesc *, int); struct file *fd_getfile_mode(struct filedesc *, int, int); Index: sys/sys/socket.h =================================================================== RCS file: /data/src/openbsd/src/sys/sys/socket.h,v diff -u -p -r1.107 socket.h --- sys/sys/socket.h 19 Apr 2025 04:12:36 -0000 1.107 +++ sys/sys/socket.h 22 Jun 2025 00:08:45 -0000 @@ -72,14 +72,19 @@ typedef __sa_family_t sa_family_t; /* so /* * Socket creation flags */ -#if __BSD_VISIBLE +#if __POSIX_VISIBLE >= 202405 || __BSD_VISIBLE #define SOCK_CLOEXEC 0x8000 /* set FD_CLOEXEC */ #define SOCK_NONBLOCK 0x4000 /* set O_NONBLOCK */ +#endif +#if __BSD_VISIBLE #ifdef _KERNEL #define SOCK_NONBLOCK_INHERIT 0x2000 /* inherit O_NONBLOCK from listener */ #endif #define SOCK_DNS 0x1000 /* set SS_DNS */ #endif /* __BSD_VISIBLE */ +#if __POSIX_VISIBLE >= 202405 +#define SOCK_CLOFORK 0x0800 /* set FD_CLOFORK */ +#endif /* * Option flags per-socket. @@ -511,6 +516,7 @@ struct timespec; #define MSG_NOSIGNAL 0x400 /* do not send SIGPIPE */ #define MSG_CMSG_CLOEXEC 0x800 /* set FD_CLOEXEC on received fds */ #define MSG_WAITFORONE 0x1000 /* nonblocking but wait for one msg */ +#define MSG_CMSG_CLOFORK 0x2000 /* set FD_CLOFORK on received fds */ /* * Header for ancillary data objects in msg_control buffer. @@ -586,7 +592,7 @@ int sockatmark(int); int socket(int, int, int); int socketpair(int, int, int, int *); -#if __BSD_VISIBLE +#if __POSIX_VISIBLE >= 202405 || __BSD_VISIBLE int accept4(int, struct sockaddr *__restrict, socklen_t *__restrict, int); #endif Index: sys/kern/kern_descrip.c =================================================================== RCS file: /data/src/openbsd/src/sys/kern/kern_descrip.c,v diff -u -p -r1.211 kern_descrip.c --- sys/kern/kern_descrip.c 10 May 2025 09:44:39 -0000 1.211 +++ sys/kern/kern_descrip.c 22 Jun 2025 00:08:45 -0000 @@ -80,6 +80,7 @@ int dodup3(struct proc *, int, int, int, #define DUPF_CLOEXEC 0x01 #define DUPF_DUP2 0x02 +#define DUPF_CLOFORK 0x04 struct pool file_pool; struct pool fdesc_pool; @@ -336,7 +337,7 @@ sys_dup3(struct proc *p, void *v, regist if (SCARG(uap, from) == SCARG(uap, to)) return (EINVAL); - if (SCARG(uap, flags) & ~O_CLOEXEC) + if (SCARG(uap, flags) & ~(O_CLOEXEC | O_CLOFORK)) return (EINVAL); return (dodup3(p, SCARG(uap, from), SCARG(uap, to), SCARG(uap, flags), retval)); @@ -388,6 +389,8 @@ restart: dupflags = DUPF_DUP2; if (flags & O_CLOEXEC) dupflags |= DUPF_CLOEXEC; + if (flags & O_CLOFORK) + dupflags |= DUPF_CLOFORK; /* No need for FRELE(), finishdup() uses current ref. */ return (finishdup(p, fp, old, new, retval, dupflags)); @@ -423,6 +426,7 @@ restart: case F_DUPFD: case F_DUPFD_CLOEXEC: + case F_DUPFD_CLOFORK: newmin = (long)SCARG(uap, arg); if ((u_int)newmin >= lim_cur(RLIMIT_NOFILE) || (u_int)newmin >= atomic_load_int(&maxfiles)) { @@ -444,6 +448,8 @@ restart: if (SCARG(uap, cmd) == F_DUPFD_CLOEXEC) dupflags |= DUPF_CLOEXEC; + if (SCARG(uap, cmd) == F_DUPFD_CLOFORK) + dupflags |= DUPF_CLOFORK; /* No need for FRELE(), finishdup() uses current ref. */ error = finishdup(p, fp, fd, i, retval, dupflags); @@ -452,16 +458,17 @@ restart: case F_GETFD: fdplock(fdp); - *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0; + *retval = (fdp->fd_ofileflags[fd] & UF_EXCLOSE ? FD_CLOEXEC : 0) + | (fdp->fd_ofileflags[fd] & UF_FORKCLOSE ? FD_CLOFORK : 0); fdpunlock(fdp); break; case F_SETFD: fdplock(fdp); - if ((long)SCARG(uap, arg) & 1) - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; - else - fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; + i = ((long)SCARG(uap, arg) & FD_CLOEXEC ? UF_EXCLOSE : 0) | + ((long)SCARG(uap, arg) & FD_CLOFORK ? UF_FORKCLOSE : 0); + fdp->fd_ofileflags[fd] = (fdp->fd_ofileflags[fd] & + ~(UF_EXCLOSE | UF_FORKCLOSE)) | i; fdpunlock(fdp); break; @@ -667,9 +674,12 @@ finishdup(struct proc *p, struct file *f fdp->fd_ofiles[new] = fp; mtx_leave(&fdp->fd_fplock); - fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE; + fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & + ~(UF_EXCLOSE | UF_FORKCLOSE); if (dupflags & DUPF_CLOEXEC) fdp->fd_ofileflags[new] |= UF_EXCLOSE; + if (dupflags & DUPF_CLOFORK) + fdp->fd_ofileflags[new] |= UF_FORKCLOSE; *retval = new; if (oldfp != NULL) { @@ -711,7 +721,7 @@ fdinsert(struct filedesc *fdp, int fd, i fdp->fd_ofiles[fd] = fp; mtx_leave(&fdp->fd_fplock); - fdp->fd_ofileflags[fd] |= (flags & UF_EXCLOSE); + fdp->fd_ofileflags[fd] |= (flags & (UF_EXCLOSE | UF_FORKCLOSE)); } void @@ -1141,7 +1151,9 @@ fdcopy(struct process *pr) struct file *fp = fdp->fd_ofiles[i]; if (fp != NULL) { + int fileflags = fdp->fd_ofileflags[i]; /* + * If the UF_FORKCLOSE flag is set, skip the fd. * XXX Gruesome hack. If count gets too high, fail * to copy an fd, since fdcopy()'s callers do not * permit it to indicate failure yet. @@ -1150,6 +1162,7 @@ fdcopy(struct process *pr) * their internal consistency, so close them here. */ if (fp->f_count >= FDUP_MAX_COUNT || + (fileflags & UF_FORKCLOSE) || fp->f_type == DTYPE_KQUEUE) { if (i < newfdp->fd_freefile) newfdp->fd_freefile = i; @@ -1158,7 +1171,7 @@ fdcopy(struct process *pr) FREF(fp); newfdp->fd_ofiles[i] = fp; - newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i]; + newfdp->fd_ofileflags[i] = fileflags; fd_used(newfdp, i); } } @@ -1414,17 +1427,18 @@ dupfdopen(struct proc *p, int indx, int } /* - * Close any files on exec? + * Doing an exec, so handle fd flags: do close-on-exec and clear + * pledged and close-on-fork */ void -fdcloseexec(struct proc *p) +fdprepforexec(struct proc *p) { struct filedesc *fdp = p->p_fd; int fd; fdplock(fdp); for (fd = 0; fd <= fdp->fd_lastfile; fd++) { - fdp->fd_ofileflags[fd] &= ~UF_PLEDGED; + fdp->fd_ofileflags[fd] &= ~(UF_PLEDGED | UF_FORKCLOSE); if (fdp->fd_ofileflags[fd] & UF_EXCLOSE) { /* fdrelease() unlocks fdp. */ (void) fdrelease(p, fd); Index: sys/kern/kern_exec.c =================================================================== RCS file: /data/src/openbsd/src/sys/kern/kern_exec.c,v diff -u -p -r1.264 kern_exec.c --- sys/kern/kern_exec.c 31 May 2025 12:40:33 -0000 1.264 +++ sys/kern/kern_exec.c 22 Jun 2025 00:08:45 -0000 @@ -272,6 +272,7 @@ sys_execve(struct proc *p, void *v, regi struct ps_strings arginfo; struct vmspace *vm = p->p_vmspace; struct vnode *otvp; + int i; /* * Get other threads to stop, if contested return ERESTART, @@ -531,7 +532,7 @@ sys_execve(struct proc *p, void *v, regi } stopprofclock(pr); /* stop profiling */ - fdcloseexec(p); /* handle close on exec */ + fdprepforexec(p); /* handle close on exec and close on fork */ execsigs(p); /* reset caught signals */ TCB_SET(p, NULL); /* reset the TCB address */ pr->ps_kbind_addr = 0; /* reset the kbind bits */ @@ -601,8 +602,6 @@ sys_execve(struct proc *p, void *v, regi * MNT_NOEXEC has already been used to disable s[ug]id. */ if ((attr.va_mode & (VSUID | VSGID)) && proc_cansugid(p)) { - int i; - atomic_setbits_int(&pr->ps_flags, PS_SUGID|PS_SUGIDEXEC); #ifdef KTRACE @@ -618,66 +617,63 @@ sys_execve(struct proc *p, void *v, regi cred->cr_uid = attr.va_uid; if (attr.va_mode & VSGID) cred->cr_gid = attr.va_gid; + } else + atomic_clearbits_int(&pr->ps_flags, PS_SUGID); + + /* + * A few caveats apply to stdin, stdout, and stderr. + */ + fdplock(p->p_fd); + for (i = 0; i < 3; i++) { + struct file *fp = NULL; + + /* + * NOTE - This will never return NULL because of immature fds + * since only kernel-threads share the file descriptor table. + */ + fp = fd_getfile(p->p_fd, i); /* - * For set[ug]id processes, a few caveats apply to - * stdin, stdout, and stderr. + * Ensure that stdin, stdout, and stderr are already + * allocated. We do not want userland to accidentally + * allocate descriptors in this range which has implied + * meaning to libc. */ - error = 0; - fdplock(p->p_fd); - for (i = 0; i < 3; i++) { - struct file *fp = NULL; - - /* - * NOTE - This will never return NULL because of - * immature fds. The file descriptor table is not - * shared because we're suid. - */ - fp = fd_getfile(p->p_fd, i); - - /* - * Ensure that stdin, stdout, and stderr are already - * allocated. We do not want userland to accidentally - * allocate descriptors in this range which has implied - * meaning to libc. - */ - if (fp == NULL) { - short flags = FREAD | (i == 0 ? 0 : FWRITE); - struct vnode *vp; - int indx; + if (fp == NULL) { + short flags = FREAD | (i == 0 ? 0 : FWRITE); + struct vnode *vp; + int indx; - if ((error = falloc(p, &fp, &indx)) != 0) - break; + if ((error = falloc(p, &fp, &indx)) != 0) + break; #ifdef DIAGNOSTIC - if (indx != i) - panic("sys_execve: falloc indx != i"); + if (indx != i) + panic("sys_execve: falloc indx != i"); #endif - if ((error = cdevvp(getnulldev(), &vp)) != 0) { - fdremove(p->p_fd, indx); - closef(fp, p); - break; - } - if ((error = VOP_OPEN(vp, flags, cred, p)) != 0) { - fdremove(p->p_fd, indx); - closef(fp, p); - vrele(vp); - break; - } - if (flags & FWRITE) - vp->v_writecount++; - fp->f_flag = flags; - fp->f_type = DTYPE_VNODE; - fp->f_ops = &vnops; - fp->f_data = (caddr_t)vp; - fdinsert(p->p_fd, indx, 0, fp); + if ((error = cdevvp(getnulldev(), &vp)) != 0) { + fdremove(p->p_fd, indx); + closef(fp, p); + break; } - FRELE(fp, p); + if ((error = VOP_OPEN(vp, flags, cred, p)) != 0) { + fdremove(p->p_fd, indx); + closef(fp, p); + vrele(vp); + break; + } + if (flags & FWRITE) + vp->v_writecount++; + fp->f_flag = flags; + fp->f_type = DTYPE_VNODE; + fp->f_ops = &vnops; + fp->f_data = (caddr_t)vp; + fdinsert(p->p_fd, indx, 0, fp); } - fdpunlock(p->p_fd); - if (error) - goto exec_abort; - } else - atomic_clearbits_int(&pr->ps_flags, PS_SUGID); + FRELE(fp, p); + } + fdpunlock(p->p_fd); + if (error) + goto exec_abort; /* * Reset the saved ugids and update the process's copy of the Index: sys/kern/sys_pipe.c =================================================================== RCS file: /data/src/openbsd/src/sys/kern/sys_pipe.c,v diff -u -p -r1.148 sys_pipe.c --- sys/kern/sys_pipe.c 30 Dec 2024 02:46:00 -0000 1.148 +++ sys/kern/sys_pipe.c 22 Jun 2025 00:08:45 -0000 @@ -162,7 +162,7 @@ sys_pipe2(struct proc *p, void *v, regis syscallarg(int) flags; } */ *uap = v; - if (SCARG(uap, flags) & ~(O_CLOEXEC | FNONBLOCK)) + if (SCARG(uap, flags) & ~(O_CLOEXEC | O_CLOFORK | FNONBLOCK)) return (EINVAL); return (dopipe(p, SCARG(uap, fdp), SCARG(uap, flags))); @@ -175,9 +175,10 @@ dopipe(struct proc *p, int *ufds, int fl struct file *rf, *wf; struct pipe_pair *pp; struct pipe *rpipe, *wpipe = NULL; - int fds[2], cloexec, error; + int fds[2], fdflags, error; - cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0; + fdflags = ((flags & O_CLOEXEC) ? UF_EXCLOSE : 0) + | ((flags & O_CLOFORK) ? UF_FORKCLOSE : 0); pp = pipe_pair_create(); if (pp == NULL) @@ -203,8 +204,8 @@ dopipe(struct proc *p, int *ufds, int fl wf->f_data = wpipe; wf->f_ops = &pipeops; - fdinsert(fdp, fds[0], cloexec, rf); - fdinsert(fdp, fds[1], cloexec, wf); + fdinsert(fdp, fds[0], fdflags, rf); + fdinsert(fdp, fds[1], fdflags, wf); error = copyout(fds, ufds, sizeof(fds)); if (error == 0) { Index: sys/kern/uipc_syscalls.c =================================================================== RCS file: /data/src/openbsd/src/sys/kern/uipc_syscalls.c,v diff -u -p -r1.223 uipc_syscalls.c --- sys/kern/uipc_syscalls.c 20 Jun 2025 14:34:34 -0000 1.223 +++ sys/kern/uipc_syscalls.c 22 Jun 2025 00:10:23 -0000 @@ -81,7 +81,7 @@ sys_socket(struct proc *p, void *v, regi struct file *fp; int type = SCARG(uap, type); int domain = SCARG(uap, domain); - int fd, cloexec, nonblock, fflag, error; + int fd, fdflags, nonblock, fflag, error; unsigned int ss = 0; if ((type & SOCK_DNS) && !(domain == AF_INET || domain == AF_INET6)) @@ -93,8 +93,9 @@ sys_socket(struct proc *p, void *v, regi if (error) return (error); - type &= ~(SOCK_CLOEXEC | SOCK_NONBLOCK | SOCK_DNS); - cloexec = (SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0; + type &= ~(SOCK_CLOEXEC | SOCK_CLOFORK | SOCK_NONBLOCK | SOCK_DNS); + fdflags = ((SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0) + | ((SCARG(uap, type) & SOCK_CLOFORK) ? UF_FORKCLOSE : 0); nonblock = SCARG(uap, type) & SOCK_NONBLOCK; fflag = FREAD | FWRITE | (nonblock ? FNONBLOCK : 0); @@ -113,7 +114,7 @@ sys_socket(struct proc *p, void *v, regi fp->f_ops = &socketops; so->so_state |= ss; fp->f_data = so; - fdinsert(fdp, fd, cloexec, fp); + fdinsert(fdp, fd, fdflags, fp); fdpunlock(fdp); FRELE(fp, p); *retval = fd; @@ -240,7 +241,7 @@ sys_accept4(struct proc *p, void *v, reg syscallarg(socklen_t *) int flags; } */ *uap = v; - if (SCARG(uap, flags) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + if (SCARG(uap, flags) & ~(SOCK_CLOEXEC | SOCK_CLOFORK | SOCK_NONBLOCK)) return (EINVAL); return (doaccept(p, SCARG(uap, s), SCARG(uap, name), @@ -257,9 +258,10 @@ doaccept(struct proc *p, int sock, struc socklen_t namelen; int error, tmpfd; struct socket *head, *so; - int cloexec, nflag; + int fdflags, nflag; - cloexec = (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0; + fdflags = ((flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0) + | ((flags & SOCK_CLOFORK) ? UF_FORKCLOSE : 0); if (name && (error = copyin(anamelen, &namelen, sizeof (namelen)))) return (error); @@ -346,7 +348,7 @@ doaccept(struct proc *p, int sock, struc } fdplock(fdp); - fdinsert(fdp, tmpfd, cloexec, fp); + fdinsert(fdp, tmpfd, fdflags, fp); fdpunlock(fdp); FRELE(fp, p); *retval = tmpfd; @@ -457,10 +459,11 @@ sys_socketpair(struct proc *p, void *v, struct filedesc *fdp = p->p_fd; struct file *fp1 = NULL, *fp2 = NULL; struct socket *so1, *so2; - int type, cloexec, nonblock, fflag, error, sv[2]; + int type, fdflags, nonblock, fflag, error, sv[2]; - type = SCARG(uap, type) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK); - cloexec = (SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0; + type = SCARG(uap, type) & ~(SOCK_CLOEXEC | SOCK_CLOFORK | SOCK_NONBLOCK); + fdflags = ((SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0) + | ((SCARG(uap, type) & SOCK_CLOFORK) ? UF_FORKCLOSE : 0); nonblock = SCARG(uap, type) & SOCK_NONBLOCK; fflag = FREAD | FWRITE | (nonblock ? FNONBLOCK : 0); @@ -498,8 +501,8 @@ sys_socketpair(struct proc *p, void *v, fp2->f_data = so2; error = copyout(sv, SCARG(uap, rsv), 2 * sizeof (int)); if (error == 0) { - fdinsert(fdp, sv[0], cloexec, fp1); - fdinsert(fdp, sv[1], cloexec, fp2); + fdinsert(fdp, sv[0], fdflags, fp1); + fdinsert(fdp, sv[1], fdflags, fp2); fdpunlock(fdp); #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) Index: sys/kern/uipc_usrreq.c =================================================================== RCS file: /data/src/openbsd/src/sys/kern/uipc_usrreq.c,v diff -u -p -r1.220 uipc_usrreq.c --- sys/kern/uipc_usrreq.c 12 Jun 2025 20:37:58 -0000 1.220 +++ sys/kern/uipc_usrreq.c 22 Jun 2025 00:08:45 -0000 @@ -1146,6 +1146,8 @@ restart: fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED); if (flags & MSG_CMSG_CLOEXEC) fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE; + if (flags & MSG_CMSG_CLOFORK) + fdp->fd_ofileflags[fds[i]] |= UF_FORKCLOSE; rp++; } Index: sys/kern/vfs_syscalls.c =================================================================== RCS file: /data/src/openbsd/src/sys/kern/vfs_syscalls.c,v diff -u -p -r1.375 vfs_syscalls.c --- sys/kern/vfs_syscalls.c 27 Apr 2025 00:58:55 -0000 1.375 +++ sys/kern/vfs_syscalls.c 22 Jun 2025 00:08:45 -0000 @@ -1086,7 +1086,7 @@ doopenat(struct proc *p, int fd, const c struct file *fp; struct vnode *vp; struct vattr vattr; - int flags, cloexec, cmode; + int flags, fdflags, cmode; int type, indx, error, localtrunc = 0; struct flock lf; struct nameidata nd; @@ -1099,7 +1099,8 @@ doopenat(struct proc *p, int fd, const c return (error); } - cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0; + fdflags = ((oflags & O_CLOEXEC) ? UF_EXCLOSE : 0) + | ((oflags & O_CLOFORK) ? UF_FORKCLOSE : 0); fdplock(fdp); if ((error = falloc(p, &fp, &indx)) != 0) { @@ -1200,7 +1201,7 @@ doopenat(struct proc *p, int fd, const c KERNEL_UNLOCK(); *retval = indx; fdplock(fdp); - fdinsert(fdp, indx, cloexec, fp); + fdinsert(fdp, indx, fdflags, fp); fdpunlock(fdp); FRELE(fp, p); return (error); @@ -1224,7 +1225,7 @@ sys___tmpfd(struct proc *p, void *v, reg struct file *fp; struct vnode *vp; int oflags = SCARG(uap, flags); - int flags, cloexec, cmode; + int flags, fdflags, cmode; int indx, error; unsigned int i; struct nameidata nd; @@ -1232,9 +1233,11 @@ sys___tmpfd(struct proc *p, void *v, reg static const char *letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-"; /* most flags are hardwired */ - oflags = O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW | (oflags & O_CLOEXEC); + oflags = O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW | + (oflags & (O_CLOEXEC | O_CLOFORK)); - cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0; + fdflags = ((oflags & O_CLOEXEC) ? UF_EXCLOSE : 0) + | ((oflags & O_CLOFORK) ? UF_FORKCLOSE : 0); fdplock(fdp); if ((error = falloc(p, &fp, &indx)) != 0) { @@ -1270,7 +1273,7 @@ sys___tmpfd(struct proc *p, void *v, reg VOP_UNLOCK(vp); *retval = indx; fdplock(fdp); - fdinsert(fdp, indx, cloexec, fp); + fdinsert(fdp, indx, fdflags, fp); fdpunlock(fdp); FRELE(fp, p); @@ -1352,7 +1355,7 @@ sys_fhopen(struct proc *p, void *v, regi struct vnode *vp = NULL; struct mount *mp; struct ucred *cred = p->p_ucred; - int flags, cloexec; + int flags, fdflags; int type, indx, error=0; struct flock lf; struct vattr va; @@ -1370,7 +1373,8 @@ sys_fhopen(struct proc *p, void *v, regi if ((flags & O_CREAT)) return (EINVAL); - cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0; + fdflags = ((flags & O_CLOEXEC) ? UF_EXCLOSE : 0) + | ((flags & O_CLOFORK) ? UF_FORKCLOSE : 0); fdplock(fdp); if ((error = falloc(p, &fp, &indx)) != 0) { @@ -1456,7 +1460,7 @@ sys_fhopen(struct proc *p, void *v, regi VOP_UNLOCK(vp); *retval = indx; fdplock(fdp); - fdinsert(fdp, indx, cloexec, fp); + fdinsert(fdp, indx, fdflags, fp); fdpunlock(fdp); FRELE(fp, p); return (0); Index: lib/libc/gen/opendir.c =================================================================== RCS file: /data/src/openbsd/src/lib/libc/gen/opendir.c,v diff -u -p -r1.31 opendir.c --- lib/libc/gen/opendir.c 15 Apr 2024 15:47:58 -0000 1.31 +++ lib/libc/gen/opendir.c 22 Jun 2025 00:08:45 -0000 @@ -82,7 +82,9 @@ fdopendir(int fd) * POSIX doesn't require fdopendir() to set * FD_CLOEXEC, so it's okay for this to fail. */ - (void)fcntl(fd, F_SETFD, FD_CLOEXEC); + flags = fcntl(fd, F_GETFD); + if (flags != -1 && (flags & FD_CLOEXEC) == 0) + (void)fcntl(fd, F_SETFD, flags | FD_CLOEXEC); } return (dirp); } Index: lib/libc/gen/shm_open.3 =================================================================== RCS file: /data/src/openbsd/src/lib/libc/gen/shm_open.3,v diff -u -p -r1.5 shm_open.3 --- lib/libc/gen/shm_open.3 5 May 2015 06:29:15 -0000 1.5 +++ lib/libc/gen/shm_open.3 22 Jun 2025 00:08:45 -0000 @@ -45,7 +45,7 @@ and must include at least or .Dv O_RDWR and may also include a combination of -.Dv O_CREAT , O_EXCL , O_CLOEXEC , O_NOFOLLOW , +.Dv O_CREAT , O_EXCL , O_CLOEXEC , O_CLOFORK , O_NOFOLLOW , or .Dv O_TRUNC . This implementation forces the @@ -82,7 +82,8 @@ and appear in .St -p1003.1-2001 . Using -.Dv O_CLOEXEC +.Dv O_CLOEXEC , +.Dv O_CLOFORK , or .Dv O_NOFOLLOW with Index: lib/libc/gen/shm_open.c =================================================================== RCS file: /data/src/openbsd/src/lib/libc/gen/shm_open.c,v diff -u -p -r1.9 shm_open.c --- lib/libc/gen/shm_open.c 10 Sep 2017 18:20:00 -0000 1.9 +++ lib/libc/gen/shm_open.c 22 Jun 2025 00:08:45 -0000 @@ -31,8 +31,9 @@ /* "/tmp/" + sha256 + ".shm" */ #define SHM_PATH_SIZE (5 + SHA256_DIGEST_STRING_LENGTH + 4) -/* O_CLOEXEC and O_NOFOLLOW are extensions to POSIX */ -#define OK_FLAGS (O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC | O_NOFOLLOW) +/* O_CLOEXEC, O_CLOFORK, and O_NOFOLLOW are extensions to POSIX */ +#define OK_FLAGS \ + (O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC | O_CLOFORK | O_NOFOLLOW) static void makeshmpath(const char *origpath, char *shmpath, size_t len) Index: lib/libc/sys/accept.2 =================================================================== RCS file: /data/src/openbsd/src/lib/libc/sys/accept.2,v diff -u -p -r1.30 accept.2 --- lib/libc/sys/accept.2 11 Sep 2022 06:38:11 -0000 1.30 +++ lib/libc/sys/accept.2 22 Jun 2025 00:08:45 -0000 @@ -58,19 +58,18 @@ call extracts the first connection reque connections, creates a new socket with the same non-blocking I/O mode as .Fa s , and allocates a new file descriptor for the socket with the -close-on-exec flag clear. +close-on-exec and close-on-fork flags clear. .Pp The .Fn accept4 -system call is similar, however the non-blocking I/O mode of the -new socket is determined by the -.Dv SOCK_NONBLOCK -flag in the -.Fa flags -argument and the close-on-exec flag on the new file descriptor is +system call is similar, however the non-blocking I/O mode, +close-on-exec flag, +and close-on-fork flag on the new file descriptor are determined by the -.Dv SOCK_CLOEXEC -flag in the +.Dv SOCK_NONBLOCK , SOCK_CLOEXEC , +and +.Dv SOCK_CLOFORK +flags, respectively, in the .Fa flags argument. .Pp @@ -204,11 +203,10 @@ is invalid. .Sh STANDARDS The .Fn accept -function conforms to -.St -p1003.1-2008 . -The +and .Fn accept4 -function is expected to conform to a future revision of that standard. +functions conform to +.St -p1003.1-2024 . .Sh HISTORY The .Fn accept Index: lib/libc/sys/dup.2 =================================================================== RCS file: /data/src/openbsd/src/lib/libc/sys/dup.2,v diff -u -p -r1.20 dup.2 --- lib/libc/sys/dup.2 25 Jun 2018 16:06:27 -0000 1.20 +++ lib/libc/sys/dup.2 22 Jun 2025 00:08:45 -0000 @@ -86,7 +86,8 @@ object reference to the file must be obt additional .Xr open 2 call. -The close-on-exec flag on the new file descriptor is unset. +The close-on-exec and close-on-fork flags on the new file descriptor +are unset. .Pp In .Fn dup2 , @@ -101,18 +102,21 @@ When equals .Fa oldd , .Fn dup2 -just returns without affecting the close-on-exec flag. +just returns without affecting the close-on-exec or close-on-fork flags. .Pp In .Fn dup3 , -both the value of the new descriptor and the close-on-exec flag on -the new file descriptor are specified: +the value of the new descriptor and the close-on-exec and close-on-fork +flags on +the new file descriptor are all specified: .Fa newd specifies the value and the .Dv O_CLOEXEC -bit in +and +.Dv O_CLOFORK +bits in .Fa flags -specifies the close-on-exec flag. +specify the close-on-exec and close-on-forks flag, respectively. Unlike .Fn dup2 , if @@ -192,14 +196,13 @@ is invalid. .Xr socketpair 2 , .Xr getdtablesize 3 .Sh STANDARDS -.Fn dup -and -.Fn dup2 -conform to -.St -p1003.1-2008 . The +.Fn dup , +.Fn dup2 , +and .Fn dup3 -function is expected to conform to a future revision of that standard. +functions conform to +.St -p1003.1-2024 . .Sh HISTORY The .Fn dup Index: lib/libc/sys/fcntl.2 =================================================================== RCS file: /data/src/openbsd/src/lib/libc/sys/fcntl.2,v diff -u -p -r1.36 fcntl.2 --- lib/libc/sys/fcntl.2 29 Dec 2022 02:12:41 -0000 1.36 +++ lib/libc/sys/fcntl.2 22 Jun 2025 00:08:45 -0000 @@ -95,22 +95,47 @@ flag associated with the new file descri is closed when .Xr execve 2 is called. +.It Dv F_DUPFD_CLOFORK +Like +.Dv F_DUPFD , +but the +.Dv FD_CLOFORK +flag associated with the new file descriptor is set, so the file descriptor +is closed when +.Xr fork 2 +or +.Xr vfork 2 +is called. .It Dv F_GETFD -Get the close-on-exec flag associated with the file descriptor +Get the close-on-exec and close-on-fork flags associated with the +file descriptor .Fa fd as -.Dv FD_CLOEXEC . +.Dv FD_CLOEXEC +and +.Dv FD_CLOFORK . If the returned value ANDed with .Dv FD_CLOEXEC is 0, the file will remain open across .Fn exec , otherwise the file will be closed upon execution of -.Fn exec +.Fn exec ; +if the returned value ANDed with +.Dv FD_CLOFORK +is 0, +the file will remain open across +.Fn fork +and +.Fn vfork , +otherwise the file will be closed upon execution of +.Fn fork +or +.Fn vfork .Fa ( arg is ignored). .It Dv F_SETFD -Set the close-on-exec flag associated with +Set the close-on-exec and close-on-fork flags associated with .Fa fd to .Fa arg , @@ -118,8 +143,10 @@ where .Fa arg (interpreted as an .Vt int ) -is either 0 or -.Dv FD_CLOEXEC , +is the bitwise OR of zero or more of +.Dv FD_CLOEXEC +and +.Dv FD_CLOFORK , as described above. .It Dv F_GETFL Get file status flags associated with the file descriptor @@ -392,8 +419,14 @@ as follows: A new file descriptor. .It Dv F_DUPFD_CLOEXEC A new file descriptor. +.It Dv F_DUPFD_CLOFORK +A new file descriptor. .It Dv F_GETFD -Value of flag (only the low-order bit is defined). +Value of file descriptor flags (only the +.Dv FD_CLOEXEC +and +.Dv FD_CLOFORK +bits are defined). .It Dv F_GETFL Value of flags. .It Dv F_GETOWN Index: lib/libc/sys/open.2 =================================================================== RCS file: /data/src/openbsd/src/lib/libc/sys/open.2,v diff -u -p -r1.51 open.2 --- lib/libc/sys/open.2 31 Mar 2022 17:27:16 -0000 1.51 +++ lib/libc/sys/open.2 22 Jun 2025 00:08:45 -0000 @@ -109,6 +109,11 @@ Set .Dv FD_CLOEXEC (the close-on-exec flag) on the new file descriptor. +.It Dv O_CLOFORK +Set +.Dv FD_CLOFORK +(the close-on-fork flag) +on the new file descriptor. .It Dv O_DIRECTORY Error if .Fa path Index: lib/libc/sys/pipe.2 =================================================================== RCS file: /data/src/openbsd/src/lib/libc/sys/pipe.2,v diff -u -p -r1.18 pipe.2 --- lib/libc/sys/pipe.2 10 Dec 2014 19:30:22 -0000 1.18 +++ lib/libc/sys/pipe.2 22 Jun 2025 00:08:45 -0000 @@ -79,15 +79,14 @@ The .Fn pipe2 function is identical to .Fn pipe -except that the non-blocking I/O mode on both ends of the pipe is +except that the non-blocking I/O mode, +close-on-exec flag, +and close-on-fork flag are determined by the -.Dv O_NONBLOCK -flag in the -.Fa flags -argument and the close-on-exec flag on both the new file descriptors -is determined by the -.Dv O_CLOEXEC -flag in the +.Dv O_NONBLOCK , O_CLOEXEC , +and +.Dv O_CLOFORK +flags, respectively, in the .Fa flags argument. .Sh RETURN VALUES @@ -125,11 +124,10 @@ is invalid. .Sh STANDARDS The .Fn pipe -function conforms to -.St -p1003.1-2008 . -The +and .Fn pipe2 -function is expected to conform to a future revision of that standard. +functions conform to +.St -p1003.1-2024 . .Pp As an extension, the pipe provided is actually capable of moving data bidirectionally. Index: lib/libc/sys/socket.2 =================================================================== RCS file: /data/src/openbsd/src/lib/libc/sys/socket.2,v diff -u -p -r1.44 socket.2 --- lib/libc/sys/socket.2 31 Mar 2022 17:27:16 -0000 1.44 +++ lib/libc/sys/socket.2 22 Jun 2025 00:08:45 -0000 @@ -103,6 +103,8 @@ argument: .Bl -tag -width "SOCK_NONBLOCKX" -offset indent -compact .It SOCK_CLOEXEC Set close-on-exec flag on the new descriptor. +.It SOCK_CLOFORK +Set close-on-fork flag on the new descriptor. .It SOCK_NONBLOCK Set non-blocking I/O mode on the new socket. .It SOCK_DNS @@ -282,12 +284,7 @@ is denied. The .Fn socket function conforms to -.St -p1003.1-2008 . -The -.Dv SOCK_CLOEXEC -and -.Dv SOCK_NONBLOCK -flags are expected to conform to a future revision of that standard. +.St -p1003.1-2024 . .Pp The .Dv SOCK_DNS Index: lib/libc/sys/socketpair.2 =================================================================== RCS file: /data/src/openbsd/src/lib/libc/sys/socketpair.2,v diff -u -p -r1.21 socketpair.2 --- lib/libc/sys/socketpair.2 8 Apr 2018 18:46:43 -0000 1.21 +++ lib/libc/sys/socketpair.2 22 Jun 2025 00:08:45 -0000 @@ -75,6 +75,8 @@ argument: .Bl -tag -width "SOCK_NONBLOCKX" -offset indent -compact .It SOCK_CLOEXEC Set close-on-exec flag on both the new descriptors. +.It SOCK_CLOFORK +Set close-on-fork flag on both the new descriptors. .It SOCK_NONBLOCK Set non-blocking I/O mode on both the new sockets. .El @@ -113,12 +115,7 @@ process address space. The .Fn socketpair function conforms to -.St -p1003.1-2008 . -The -.Dv SOCK_CLOEXEC -and -.Dv SOCK_NONBLOCK -flags are expected to conform to a future revision of that standard. +.St -p1003.1-2024 . .Sh HISTORY The .Fn socketpair Index: lib/libc/sys/w_fcntl.c =================================================================== RCS file: /data/src/openbsd/src/lib/libc/sys/w_fcntl.c,v diff -u -p -r1.1 w_fcntl.c --- lib/libc/sys/w_fcntl.c 7 May 2016 19:05:22 -0000 1.1 +++ lib/libc/sys/w_fcntl.c 22 Jun 2025 00:08:45 -0000 @@ -29,6 +29,7 @@ fcntl(int fd, int cmd, ...) switch (cmd) { case F_DUPFD: case F_DUPFD_CLOEXEC: + case F_DUPFD_CLOFORK: case F_SETFD: case F_SETFL: case F_SETOWN: Index: lib/libc/sys/execve.2 =================================================================== RCS file: /data/src/openbsd/src/lib/libc/sys/execve.2,v diff -u -p -r1.58 execve.2 --- lib/libc/sys/execve.2 13 Oct 2022 21:37:05 -0000 1.58 +++ lib/libc/sys/execve.2 22 Jun 2025 00:32:58 -0000 @@ -108,9 +108,10 @@ flag is set (see .Xr close 2 and .Xr fcntl 2 ) . -Descriptors that remain open are unaffected by -.Fn execve . -In the case of a new setuid or setgid executable being executed, if +Other descriptors remain open after +.Fn execve , +however the close-on-fork flag is cleared. +If file descriptors 0, 1, or 2 (representing stdin, stdout, and stderr) are currently unallocated, these descriptors will be opened to point to some system file like @@ -329,6 +330,15 @@ The .Fn execve function first appeared in .At v7 . +.Pp +In +.Ox 2.4 , +.Fn execve +started ensuring that file descriptors 0, 1, and 2 are open when +starting a setuid or setgid process. +In +.Ox 7.8 +that was extended to all processes. .Sh CAVEATS If a program is .Em setuid Index: lib/libc/stdlib/mkstemp.c =================================================================== RCS file: /data/src/openbsd/src/lib/libc/stdlib/mkstemp.c,v diff -u -p -r1.1 mkstemp.c --- lib/libc/stdlib/mkstemp.c 19 Jan 2024 19:45:02 -0000 1.1 +++ lib/libc/stdlib/mkstemp.c 22 Jun 2025 00:08:45 -0000 @@ -20,7 +20,8 @@ #include #include -#define MKOSTEMP_FLAGS (O_APPEND | O_CLOEXEC | O_DSYNC | O_RSYNC | O_SYNC) +#define MKOSTEMP_FLAGS \ + (O_APPEND | O_CLOEXEC | O_CLOFORK | O_DSYNC | O_RSYNC | O_SYNC) static int mkstemp_cb(const char *path, int flags) Index: lib/libc/stdlib/mktemp.3 =================================================================== RCS file: /data/src/openbsd/src/lib/libc/stdlib/mktemp.3,v diff -u -p -r1.2 mktemp.3 --- lib/libc/stdlib/mktemp.3 1 Mar 2024 21:30:40 -0000 1.2 +++ lib/libc/stdlib/mktemp.3 22 Jun 2025 00:08:45 -0000 @@ -119,6 +119,8 @@ system call: Append on each write. .It Dv O_CLOEXEC Set the close-on-exec flag on the new file descriptor. +.It Dv O_CLOFORK +Set the close-on-fork flag on the new file descriptor. .It Dv O_SYNC Perform synchronous I/O operations. .El @@ -345,18 +347,16 @@ function. .Xr tmpnam 3 .Sh STANDARDS The -.Fn mkdtemp +.Fn mkdtemp , +.Fn mkostemp , and .Fn mkstemp functions conform to the -.St -p1003.1-2008 +.St -p1003.1-2024 specification. The ability to specify more than six .Em X Ns s is an extension to that standard. -The -.Fn mkostemp -function is expected to conform to a future revision of that standard. .Pp The .Fn mktemp