From: Bryan Steele Subject: Re: [PATCH]: Add POSIX O_CLOFORK flag To: tech@openbsd.org Cc: rbranco@suse.de Date: Sat, 21 Jun 2025 19:39:19 -0400 On Sat, Jun 21, 2025 at 11:35:36PM +0200, Ricardo Branco wrote: > This initial patch adds support for POSIX O_CLOFORK (close-on-fork) flag. > > If there's interest, I can update manpages and fill the TODO list in the PR: > https://github.com/openbsd/src/pull/46 > > I uploaded the full test-suite from Illumos adapted to OpenBSD there. > > Work also being done to add this flag on: > > - FreeBSD: https://github.com/freebsd/freebsd-src/pull/1698 > - DragonflyBSD: https://github.com/DragonFlyBSD/DragonFlyBSD/pull/28 > > The discussion for adding this flag was done in the FreeBSD PR. > > Best, > Ricardo Philip Guenther raised an issue last summer about FD_CLOFORK/O_CLOFORK on the Austin Group lists which remains 'Open' on the bug tracker. Without looking at your implementation closely, I'm wondering how your implementation addresses this, as well as on other systems. Because at least as specified by POSIX, this doesn't seem like something we actually want. https://www.mail-archive.com/austin-group-l@opengroup.org/msg12812.html https://www.austingroupbugs.net/view.php?id=1851 https://mastodon.social/@guenther@bsd.network/112900798127391478 -Bryan. > --- > sys/kern/kern_descrip.c | 32 ++++++++++++++++++++++---------- > sys/kern/sys_pipe.c | 11 ++++++----- > sys/kern/uipc_syscalls.c | 29 ++++++++++++++++------------- > sys/kern/uipc_usrreq.c | 2 ++ > sys/kern/vfs_syscalls.c | 24 ++++++++++++++---------- > sys/sys/fcntl.h | 5 +++++ > sys/sys/filedesc.h | 1 + > sys/sys/socket.h | 2 ++ > usr.bin/fstat/fstat.c | 2 ++ > 9 files changed, 70 insertions(+), 38 deletions(-) > > diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c > index 3e57566b820..550b1c3ae9e 100644 > --- a/sys/kern/kern_descrip.c > +++ b/sys/kern/kern_descrip.c > @@ -80,6 +80,7 @@ int dodup3(struct proc *, int, int, int, register_t *); > > #define DUPF_CLOEXEC 0x01 > #define DUPF_DUP2 0x02 > +#define DUPF_CLOFORK 0x04 > > struct pool file_pool; > struct pool fdesc_pool; > @@ -336,7 +337,7 @@ sys_dup3(struct proc *p, void *v, register_t *retval) > > if (SCARG(uap, from) == SCARG(uap, to)) > return (EINVAL); > - if (SCARG(uap, flags) & ~O_CLOEXEC) > + if (SCARG(uap, flags) & ~(O_CLOEXEC | O_CLOFORK)) > return (EINVAL); > return (dodup3(p, SCARG(uap, from), SCARG(uap, to), > SCARG(uap, flags), retval)); > @@ -388,6 +389,8 @@ restart: > dupflags = DUPF_DUP2; > if (flags & O_CLOEXEC) > dupflags |= DUPF_CLOEXEC; > + if (flags & O_CLOFORK) > + dupflags |= DUPF_CLOFORK; > > /* No need for FRELE(), finishdup() uses current ref. */ > return (finishdup(p, fp, old, new, retval, dupflags)); > @@ -423,6 +426,7 @@ restart: > > case F_DUPFD: > case F_DUPFD_CLOEXEC: > + case F_DUPFD_CLOFORK: > newmin = (long)SCARG(uap, arg); > if ((u_int)newmin >= lim_cur(RLIMIT_NOFILE) || > (u_int)newmin >= atomic_load_int(&maxfiles)) { > @@ -444,6 +448,8 @@ restart: > > if (SCARG(uap, cmd) == F_DUPFD_CLOEXEC) > dupflags |= DUPF_CLOEXEC; > + else if (SCARG(uap, cmd) == F_DUPFD_CLOFORK) > + dupflags |= DUPF_CLOFORK; > > /* No need for FRELE(), finishdup() uses current ref. */ > error = finishdup(p, fp, fd, i, retval, dupflags); > @@ -452,16 +458,17 @@ restart: > > case F_GETFD: > fdplock(fdp); > - *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0; > + *retval = > + ((fdp->fd_ofileflags[fd] & UF_EXCLOSE) ? FD_CLOEXEC : 0) | > + ((fdp->fd_ofileflags[fd] & UF_FOCLOSE) ? FD_CLOFORK : 0); > fdpunlock(fdp); > break; > > case F_SETFD: > fdplock(fdp); > - if ((long)SCARG(uap, arg) & 1) > - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; > - else > - fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; > + fdp->fd_ofileflags[fd] = > + (((long)SCARG(uap, arg) & FD_CLOEXEC) ? UF_EXCLOSE : 0) | > + (((long)SCARG(uap, arg) & FD_CLOFORK) ? UF_FOCLOSE : 0); > fdpunlock(fdp); > break; > > @@ -667,9 +674,12 @@ finishdup(struct proc *p, struct file *fp, int old, int new, > fdp->fd_ofiles[new] = fp; > mtx_leave(&fdp->fd_fplock); > > - fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE; > + fdp->fd_ofileflags[new] = > + fdp->fd_ofileflags[old] & ~(UF_EXCLOSE | UF_FOCLOSE); > if (dupflags & DUPF_CLOEXEC) > fdp->fd_ofileflags[new] |= UF_EXCLOSE; > + if (dupflags & DUPF_CLOFORK) > + fdp->fd_ofileflags[new] |= UF_FOCLOSE; > *retval = new; > > if (oldfp != NULL) { > @@ -711,7 +721,7 @@ fdinsert(struct filedesc *fdp, int fd, int flags, struct file *fp) > fdp->fd_ofiles[fd] = fp; > mtx_leave(&fdp->fd_fplock); > > - fdp->fd_ofileflags[fd] |= (flags & UF_EXCLOSE); > + fdp->fd_ofileflags[fd] |= (flags & (UF_EXCLOSE | UF_FOCLOSE)); > } > > void > @@ -1150,6 +1160,7 @@ fdcopy(struct process *pr) > * their internal consistency, so close them here. > */ > if (fp->f_count >= FDUP_MAX_COUNT || > + (fdp->fd_ofileflags[i] & UF_FOCLOSE) != 0 || > fp->f_type == DTYPE_KQUEUE) { > if (i < newfdp->fd_freefile) > newfdp->fd_freefile = i; > @@ -1407,8 +1418,9 @@ dupfdopen(struct proc *p, int indx, int mode) > fdp->fd_ofiles[indx] = wfp; > mtx_leave(&fdp->fd_fplock); > > - fdp->fd_ofileflags[indx] = (fdp->fd_ofileflags[indx] & UF_EXCLOSE) | > - (fdp->fd_ofileflags[dupfd] & ~UF_EXCLOSE); > + fdp->fd_ofileflags[indx] = > + (fdp->fd_ofileflags[indx] & (UF_EXCLOSE | UF_FOCLOSE)) | > + (fdp->fd_ofileflags[dupfd] & ~(UF_EXCLOSE | UF_FOCLOSE)); > > return (0); > } > diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c > index 12254a052da..d278647d382 100644 > --- a/sys/kern/sys_pipe.c > +++ b/sys/kern/sys_pipe.c > @@ -162,7 +162,7 @@ sys_pipe2(struct proc *p, void *v, register_t *retval) > syscallarg(int) flags; > } */ *uap = v; > > - if (SCARG(uap, flags) & ~(O_CLOEXEC | FNONBLOCK)) > + if (SCARG(uap, flags) & ~(O_CLOEXEC | O_CLOFORK | FNONBLOCK)) > return (EINVAL); > > return (dopipe(p, SCARG(uap, fdp), SCARG(uap, flags))); > @@ -175,9 +175,10 @@ dopipe(struct proc *p, int *ufds, int flags) > struct file *rf, *wf; > struct pipe_pair *pp; > struct pipe *rpipe, *wpipe = NULL; > - int fds[2], cloexec, error; > + int fds[2], fdflags, error; > > - cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0; > + fdflags = ((flags & O_CLOEXEC) ? UF_EXCLOSE : 0) | > + ((flags & O_CLOFORK) ? UF_FOCLOSE : 0); > > pp = pipe_pair_create(); > if (pp == NULL) > @@ -203,8 +204,8 @@ dopipe(struct proc *p, int *ufds, int flags) > wf->f_data = wpipe; > wf->f_ops = &pipeops; > > - fdinsert(fdp, fds[0], cloexec, rf); > - fdinsert(fdp, fds[1], cloexec, wf); > + fdinsert(fdp, fds[0], fdflags, rf); > + fdinsert(fdp, fds[1], fdflags, wf); > > error = copyout(fds, ufds, sizeof(fds)); > if (error == 0) { > diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c > index 7a93c571a29..00833af2705 100644 > --- a/sys/kern/uipc_syscalls.c > +++ b/sys/kern/uipc_syscalls.c > @@ -81,7 +81,7 @@ sys_socket(struct proc *p, void *v, register_t *retval) > struct file *fp; > int type = SCARG(uap, type); > int domain = SCARG(uap, domain); > - int fd, cloexec, nonblock, fflag, error; > + int fd, fdflags, nonblock, fflag, error; > unsigned int ss = 0; > > if ((type & SOCK_DNS) && !(domain == AF_INET || domain == AF_INET6)) > @@ -93,8 +93,9 @@ sys_socket(struct proc *p, void *v, register_t *retval) > if (error) > return (error); > > - type &= ~(SOCK_CLOEXEC | SOCK_NONBLOCK | SOCK_DNS); > - cloexec = (SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0; > + type &= ~(SOCK_CLOEXEC | SOCK_CLOFORK | SOCK_NONBLOCK | SOCK_DNS); > + fdflags = ((SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0) | > + ((SCARG(uap, type) & SOCK_CLOFORK) ? UF_FOCLOSE : 0); > nonblock = SCARG(uap, type) & SOCK_NONBLOCK; > fflag = FREAD | FWRITE | (nonblock ? FNONBLOCK : 0); > > @@ -113,7 +114,7 @@ sys_socket(struct proc *p, void *v, register_t *retval) > fp->f_ops = &socketops; > so->so_state |= ss; > fp->f_data = so; > - fdinsert(fdp, fd, cloexec, fp); > + fdinsert(fdp, fd, fdflags, fp); > fdpunlock(fdp); > FRELE(fp, p); > *retval = fd; > @@ -240,7 +241,7 @@ sys_accept4(struct proc *p, void *v, register_t *retval) > syscallarg(socklen_t *) int flags; > } */ *uap = v; > > - if (SCARG(uap, flags) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) > + if (SCARG(uap, flags) & ~(SOCK_CLOEXEC | SOCK_CLOFORK | SOCK_NONBLOCK)) > return (EINVAL); > > return (doaccept(p, SCARG(uap, s), SCARG(uap, name), > @@ -257,9 +258,10 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen, > socklen_t namelen; > int error, tmpfd; > struct socket *head, *so; > - int cloexec, nflag; > + int fdflags, nflag; > > - cloexec = (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0; > + fdflags = ((flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0) | > + ((flags & SOCK_CLOFORK) ? UF_FOCLOSE : 0); > > if (name && (error = copyin(anamelen, &namelen, sizeof (namelen)))) > return (error); > @@ -346,7 +348,7 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen, > } > > fdplock(fdp); > - fdinsert(fdp, tmpfd, cloexec, fp); > + fdinsert(fdp, tmpfd, fdflags, fp); > fdpunlock(fdp); > FRELE(fp, p); > *retval = tmpfd; > @@ -457,10 +459,11 @@ sys_socketpair(struct proc *p, void *v, register_t *retval) > struct filedesc *fdp = p->p_fd; > struct file *fp1 = NULL, *fp2 = NULL; > struct socket *so1, *so2; > - int type, cloexec, nonblock, fflag, error, sv[2]; > + int type, fdflags, nonblock, fflag, error, sv[2]; > > - type = SCARG(uap, type) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK); > - cloexec = (SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0; > + type = SCARG(uap, type) & ~(SOCK_CLOEXEC | SOCK_CLOFORK | SOCK_NONBLOCK); > + fdflags = ((SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0) | > + ((SCARG(uap, type) & SOCK_CLOFORK) ? UF_FOCLOSE : 0); > nonblock = SCARG(uap, type) & SOCK_NONBLOCK; > fflag = FREAD | FWRITE | (nonblock ? FNONBLOCK : 0); > > @@ -498,8 +501,8 @@ sys_socketpair(struct proc *p, void *v, register_t *retval) > fp2->f_data = so2; > error = copyout(sv, SCARG(uap, rsv), 2 * sizeof (int)); > if (error == 0) { > - fdinsert(fdp, sv[0], cloexec, fp1); > - fdinsert(fdp, sv[1], cloexec, fp2); > + fdinsert(fdp, sv[0], fdflags, fp1); > + fdinsert(fdp, sv[1], fdflags, fp2); > fdpunlock(fdp); > #ifdef KTRACE > if (KTRPOINT(p, KTR_STRUCT)) > diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c > index f50a040d1e8..b025c071f41 100644 > --- a/sys/kern/uipc_usrreq.c > +++ b/sys/kern/uipc_usrreq.c > @@ -1146,6 +1146,8 @@ restart: > fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED); > if (flags & MSG_CMSG_CLOEXEC) > fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE; > + if (flags & MSG_CMSG_CLOFORK) > + fdp->fd_ofileflags[fds[i]] |= UF_FOCLOSE; > > rp++; > } > diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c > index 1f5731712a0..32734b2a3f4 100644 > --- a/sys/kern/vfs_syscalls.c > +++ b/sys/kern/vfs_syscalls.c > @@ -1086,7 +1086,7 @@ doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode, > struct file *fp; > struct vnode *vp; > struct vattr vattr; > - int flags, cloexec, cmode; > + int flags, fdflags, cmode; > int type, indx, error, localtrunc = 0; > struct flock lf; > struct nameidata nd; > @@ -1099,7 +1099,8 @@ doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode, > return (error); > } > > - cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0; > + fdflags = ((oflags & O_CLOEXEC) ? UF_EXCLOSE : 0) | > + ((oflags & O_CLOFORK) ? UF_FOCLOSE : 0); > > fdplock(fdp); > if ((error = falloc(p, &fp, &indx)) != 0) { > @@ -1200,7 +1201,7 @@ doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode, > KERNEL_UNLOCK(); > *retval = indx; > fdplock(fdp); > - fdinsert(fdp, indx, cloexec, fp); > + fdinsert(fdp, indx, fdflags, fp); > fdpunlock(fdp); > FRELE(fp, p); > return (error); > @@ -1224,7 +1225,7 @@ sys___tmpfd(struct proc *p, void *v, register_t *retval) > struct file *fp; > struct vnode *vp; > int oflags = SCARG(uap, flags); > - int flags, cloexec, cmode; > + int flags, fdflags, cmode; > int indx, error; > unsigned int i; > struct nameidata nd; > @@ -1232,9 +1233,11 @@ sys___tmpfd(struct proc *p, void *v, register_t *retval) > static const char *letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-"; > > /* most flags are hardwired */ > - oflags = O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW | (oflags & O_CLOEXEC); > + oflags = O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW | > + (oflags & (O_CLOEXEC | O_CLOFORK)); > > - cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0; > + fdflags = ((oflags & O_CLOEXEC) ? UF_EXCLOSE : 0) | > + ((oflags & O_CLOFORK) ? UF_FOCLOSE : 0); > > fdplock(fdp); > if ((error = falloc(p, &fp, &indx)) != 0) { > @@ -1270,7 +1273,7 @@ sys___tmpfd(struct proc *p, void *v, register_t *retval) > VOP_UNLOCK(vp); > *retval = indx; > fdplock(fdp); > - fdinsert(fdp, indx, cloexec, fp); > + fdinsert(fdp, indx, fdflags, fp); > fdpunlock(fdp); > FRELE(fp, p); > > @@ -1352,7 +1355,7 @@ sys_fhopen(struct proc *p, void *v, register_t *retval) > struct vnode *vp = NULL; > struct mount *mp; > struct ucred *cred = p->p_ucred; > - int flags, cloexec; > + int flags, fdflags; > int type, indx, error=0; > struct flock lf; > struct vattr va; > @@ -1370,7 +1373,8 @@ sys_fhopen(struct proc *p, void *v, register_t *retval) > if ((flags & O_CREAT)) > return (EINVAL); > > - cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0; > + fdflags = ((flags & O_CLOEXEC) ? UF_EXCLOSE : 0) | > + ((flags & O_CLOFORK) ? UF_FOCLOSE : 0); > > fdplock(fdp); > if ((error = falloc(p, &fp, &indx)) != 0) { > @@ -1456,7 +1460,7 @@ sys_fhopen(struct proc *p, void *v, register_t *retval) > VOP_UNLOCK(vp); > *retval = indx; > fdplock(fdp); > - fdinsert(fdp, indx, cloexec, fp); > + fdinsert(fdp, indx, fdflags, fp); > fdpunlock(fdp); > FRELE(fp, p); > return (0); > diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h > index e964ea49dde..bd6e329afa1 100644 > --- a/sys/sys/fcntl.h > +++ b/sys/sys/fcntl.h > @@ -106,6 +106,7 @@ > /* defined by POSIX Issue 7 */ > #define O_CLOEXEC 0x10000 /* atomically set FD_CLOEXEC */ > #define O_DIRECTORY 0x20000 /* fail if not a directory */ > +#define O_CLOFORK 0x40000 /* atomically set FD_CLOFORK */ > > #ifdef _KERNEL > /* > @@ -158,9 +159,13 @@ > #if __BSD_VISIBLE > #define F_ISATTY 11 /* used by isatty(3) */ > #endif > +#if __POSIX_VISIBLE >= 202405 > +#define F_DUPFD_CLOFORK 12 /* duplicate with FD_CLOFORK set */ > +#endif > > /* file descriptor flags (F_GETFD, F_SETFD) */ > #define FD_CLOEXEC 1 /* close-on-exec flag */ > +#define FD_CLOFORK 2 /* close-on-fork flag */ > > /* record locking flags (F_GETLK, F_SETLK, F_SETLKW) */ > #define F_RDLCK 1 /* shared or read lock */ > diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h > index 50bc7734a02..38857d98fd7 100644 > --- a/sys/sys/filedesc.h > +++ b/sys/sys/filedesc.h > @@ -115,6 +115,7 @@ struct filedesc0 { > */ > #define UF_EXCLOSE 0x01 /* auto-close on exec */ > #define UF_PLEDGED 0x02 /* open after pledge(2) */ > +#define UF_FOCLOSE 0x04 /* auto-close on fork */ > > /* > * Flags on the file descriptor table. > diff --git a/sys/sys/socket.h b/sys/sys/socket.h > index 4fd50d29274..4e4987ca255 100644 > --- a/sys/sys/socket.h > +++ b/sys/sys/socket.h > @@ -79,6 +79,7 @@ typedef __sa_family_t sa_family_t; /* sockaddr address family type */ > #define SOCK_NONBLOCK_INHERIT 0x2000 /* inherit O_NONBLOCK from listener */ > #endif > #define SOCK_DNS 0x1000 /* set SS_DNS */ > +#define SOCK_CLOFORK 0x0800 /* set FD_CLOFORK */ > #endif /* __BSD_VISIBLE */ > > /* > @@ -511,6 +512,7 @@ struct timespec; > #define MSG_NOSIGNAL 0x400 /* do not send SIGPIPE */ > #define MSG_CMSG_CLOEXEC 0x800 /* set FD_CLOEXEC on received fds */ > #define MSG_WAITFORONE 0x1000 /* nonblocking but wait for one msg */ > +#define MSG_CMSG_CLOFORK 0x2000 /* set FD_CLOFORK on received fds */ > > /* > * Header for ancillary data objects in msg_control buffer. > diff --git a/usr.bin/fstat/fstat.c b/usr.bin/fstat/fstat.c > index a74d3a6e916..acd1ffe26cc 100644 > --- a/usr.bin/fstat/fstat.c > +++ b/usr.bin/fstat/fstat.c > @@ -482,6 +482,8 @@ vtrans(struct kinfo_file *kf) > strlcat(rwep, "w", sizeof rwep); > if (kf->fd_ofileflags & UF_EXCLOSE) > strlcat(rwep, "e", sizeof rwep); > + if (kf->fd_ofileflags & UF_FOCLOSE) > + strlcat(rwep, "f", sizeof rwep); > if (kf->fd_ofileflags & UF_PLEDGED) > strlcat(rwep, "p", sizeof rwep); > printf(" %4s", rwep); > -- > 2.49.0 >