From: Ricardo Branco Subject: [PATCH]: Add POSIX O_CLOFORK flag To: tech@openbsd.org Date: Sat, 21 Jun 2025 23:35:36 +0200 This initial patch adds support for POSIX O_CLOFORK (close-on-fork) flag. If there's interest, I can update manpages and fill the TODO list in the PR: https://github.com/openbsd/src/pull/46 I uploaded the full test-suite from Illumos adapted to OpenBSD there. Work also being done to add this flag on: - FreeBSD: https://github.com/freebsd/freebsd-src/pull/1698 - DragonflyBSD: https://github.com/DragonFlyBSD/DragonFlyBSD/pull/28 The discussion for adding this flag was done in the FreeBSD PR. Best, Ricardo --- sys/kern/kern_descrip.c | 32 ++++++++++++++++++++++---------- sys/kern/sys_pipe.c | 11 ++++++----- sys/kern/uipc_syscalls.c | 29 ++++++++++++++++------------- sys/kern/uipc_usrreq.c | 2 ++ sys/kern/vfs_syscalls.c | 24 ++++++++++++++---------- sys/sys/fcntl.h | 5 +++++ sys/sys/filedesc.h | 1 + sys/sys/socket.h | 2 ++ usr.bin/fstat/fstat.c | 2 ++ 9 files changed, 70 insertions(+), 38 deletions(-) diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 3e57566b820..550b1c3ae9e 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -80,6 +80,7 @@ int dodup3(struct proc *, int, int, int, register_t *); #define DUPF_CLOEXEC 0x01 #define DUPF_DUP2 0x02 +#define DUPF_CLOFORK 0x04 struct pool file_pool; struct pool fdesc_pool; @@ -336,7 +337,7 @@ sys_dup3(struct proc *p, void *v, register_t *retval) if (SCARG(uap, from) == SCARG(uap, to)) return (EINVAL); - if (SCARG(uap, flags) & ~O_CLOEXEC) + if (SCARG(uap, flags) & ~(O_CLOEXEC | O_CLOFORK)) return (EINVAL); return (dodup3(p, SCARG(uap, from), SCARG(uap, to), SCARG(uap, flags), retval)); @@ -388,6 +389,8 @@ restart: dupflags = DUPF_DUP2; if (flags & O_CLOEXEC) dupflags |= DUPF_CLOEXEC; + if (flags & O_CLOFORK) + dupflags |= DUPF_CLOFORK; /* No need for FRELE(), finishdup() uses current ref. */ return (finishdup(p, fp, old, new, retval, dupflags)); @@ -423,6 +426,7 @@ restart: case F_DUPFD: case F_DUPFD_CLOEXEC: + case F_DUPFD_CLOFORK: newmin = (long)SCARG(uap, arg); if ((u_int)newmin >= lim_cur(RLIMIT_NOFILE) || (u_int)newmin >= atomic_load_int(&maxfiles)) { @@ -444,6 +448,8 @@ restart: if (SCARG(uap, cmd) == F_DUPFD_CLOEXEC) dupflags |= DUPF_CLOEXEC; + else if (SCARG(uap, cmd) == F_DUPFD_CLOFORK) + dupflags |= DUPF_CLOFORK; /* No need for FRELE(), finishdup() uses current ref. */ error = finishdup(p, fp, fd, i, retval, dupflags); @@ -452,16 +458,17 @@ restart: case F_GETFD: fdplock(fdp); - *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0; + *retval = + ((fdp->fd_ofileflags[fd] & UF_EXCLOSE) ? FD_CLOEXEC : 0) | + ((fdp->fd_ofileflags[fd] & UF_FOCLOSE) ? FD_CLOFORK : 0); fdpunlock(fdp); break; case F_SETFD: fdplock(fdp); - if ((long)SCARG(uap, arg) & 1) - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; - else - fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; + fdp->fd_ofileflags[fd] = + (((long)SCARG(uap, arg) & FD_CLOEXEC) ? UF_EXCLOSE : 0) | + (((long)SCARG(uap, arg) & FD_CLOFORK) ? UF_FOCLOSE : 0); fdpunlock(fdp); break; @@ -667,9 +674,12 @@ finishdup(struct proc *p, struct file *fp, int old, int new, fdp->fd_ofiles[new] = fp; mtx_leave(&fdp->fd_fplock); - fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE; + fdp->fd_ofileflags[new] = + fdp->fd_ofileflags[old] & ~(UF_EXCLOSE | UF_FOCLOSE); if (dupflags & DUPF_CLOEXEC) fdp->fd_ofileflags[new] |= UF_EXCLOSE; + if (dupflags & DUPF_CLOFORK) + fdp->fd_ofileflags[new] |= UF_FOCLOSE; *retval = new; if (oldfp != NULL) { @@ -711,7 +721,7 @@ fdinsert(struct filedesc *fdp, int fd, int flags, struct file *fp) fdp->fd_ofiles[fd] = fp; mtx_leave(&fdp->fd_fplock); - fdp->fd_ofileflags[fd] |= (flags & UF_EXCLOSE); + fdp->fd_ofileflags[fd] |= (flags & (UF_EXCLOSE | UF_FOCLOSE)); } void @@ -1150,6 +1160,7 @@ fdcopy(struct process *pr) * their internal consistency, so close them here. */ if (fp->f_count >= FDUP_MAX_COUNT || + (fdp->fd_ofileflags[i] & UF_FOCLOSE) != 0 || fp->f_type == DTYPE_KQUEUE) { if (i < newfdp->fd_freefile) newfdp->fd_freefile = i; @@ -1407,8 +1418,9 @@ dupfdopen(struct proc *p, int indx, int mode) fdp->fd_ofiles[indx] = wfp; mtx_leave(&fdp->fd_fplock); - fdp->fd_ofileflags[indx] = (fdp->fd_ofileflags[indx] & UF_EXCLOSE) | - (fdp->fd_ofileflags[dupfd] & ~UF_EXCLOSE); + fdp->fd_ofileflags[indx] = + (fdp->fd_ofileflags[indx] & (UF_EXCLOSE | UF_FOCLOSE)) | + (fdp->fd_ofileflags[dupfd] & ~(UF_EXCLOSE | UF_FOCLOSE)); return (0); } diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 12254a052da..d278647d382 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -162,7 +162,7 @@ sys_pipe2(struct proc *p, void *v, register_t *retval) syscallarg(int) flags; } */ *uap = v; - if (SCARG(uap, flags) & ~(O_CLOEXEC | FNONBLOCK)) + if (SCARG(uap, flags) & ~(O_CLOEXEC | O_CLOFORK | FNONBLOCK)) return (EINVAL); return (dopipe(p, SCARG(uap, fdp), SCARG(uap, flags))); @@ -175,9 +175,10 @@ dopipe(struct proc *p, int *ufds, int flags) struct file *rf, *wf; struct pipe_pair *pp; struct pipe *rpipe, *wpipe = NULL; - int fds[2], cloexec, error; + int fds[2], fdflags, error; - cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0; + fdflags = ((flags & O_CLOEXEC) ? UF_EXCLOSE : 0) | + ((flags & O_CLOFORK) ? UF_FOCLOSE : 0); pp = pipe_pair_create(); if (pp == NULL) @@ -203,8 +204,8 @@ dopipe(struct proc *p, int *ufds, int flags) wf->f_data = wpipe; wf->f_ops = &pipeops; - fdinsert(fdp, fds[0], cloexec, rf); - fdinsert(fdp, fds[1], cloexec, wf); + fdinsert(fdp, fds[0], fdflags, rf); + fdinsert(fdp, fds[1], fdflags, wf); error = copyout(fds, ufds, sizeof(fds)); if (error == 0) { diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index 7a93c571a29..00833af2705 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -81,7 +81,7 @@ sys_socket(struct proc *p, void *v, register_t *retval) struct file *fp; int type = SCARG(uap, type); int domain = SCARG(uap, domain); - int fd, cloexec, nonblock, fflag, error; + int fd, fdflags, nonblock, fflag, error; unsigned int ss = 0; if ((type & SOCK_DNS) && !(domain == AF_INET || domain == AF_INET6)) @@ -93,8 +93,9 @@ sys_socket(struct proc *p, void *v, register_t *retval) if (error) return (error); - type &= ~(SOCK_CLOEXEC | SOCK_NONBLOCK | SOCK_DNS); - cloexec = (SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0; + type &= ~(SOCK_CLOEXEC | SOCK_CLOFORK | SOCK_NONBLOCK | SOCK_DNS); + fdflags = ((SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0) | + ((SCARG(uap, type) & SOCK_CLOFORK) ? UF_FOCLOSE : 0); nonblock = SCARG(uap, type) & SOCK_NONBLOCK; fflag = FREAD | FWRITE | (nonblock ? FNONBLOCK : 0); @@ -113,7 +114,7 @@ sys_socket(struct proc *p, void *v, register_t *retval) fp->f_ops = &socketops; so->so_state |= ss; fp->f_data = so; - fdinsert(fdp, fd, cloexec, fp); + fdinsert(fdp, fd, fdflags, fp); fdpunlock(fdp); FRELE(fp, p); *retval = fd; @@ -240,7 +241,7 @@ sys_accept4(struct proc *p, void *v, register_t *retval) syscallarg(socklen_t *) int flags; } */ *uap = v; - if (SCARG(uap, flags) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + if (SCARG(uap, flags) & ~(SOCK_CLOEXEC | SOCK_CLOFORK | SOCK_NONBLOCK)) return (EINVAL); return (doaccept(p, SCARG(uap, s), SCARG(uap, name), @@ -257,9 +258,10 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen, socklen_t namelen; int error, tmpfd; struct socket *head, *so; - int cloexec, nflag; + int fdflags, nflag; - cloexec = (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0; + fdflags = ((flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0) | + ((flags & SOCK_CLOFORK) ? UF_FOCLOSE : 0); if (name && (error = copyin(anamelen, &namelen, sizeof (namelen)))) return (error); @@ -346,7 +348,7 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen, } fdplock(fdp); - fdinsert(fdp, tmpfd, cloexec, fp); + fdinsert(fdp, tmpfd, fdflags, fp); fdpunlock(fdp); FRELE(fp, p); *retval = tmpfd; @@ -457,10 +459,11 @@ sys_socketpair(struct proc *p, void *v, register_t *retval) struct filedesc *fdp = p->p_fd; struct file *fp1 = NULL, *fp2 = NULL; struct socket *so1, *so2; - int type, cloexec, nonblock, fflag, error, sv[2]; + int type, fdflags, nonblock, fflag, error, sv[2]; - type = SCARG(uap, type) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK); - cloexec = (SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0; + type = SCARG(uap, type) & ~(SOCK_CLOEXEC | SOCK_CLOFORK | SOCK_NONBLOCK); + fdflags = ((SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0) | + ((SCARG(uap, type) & SOCK_CLOFORK) ? UF_FOCLOSE : 0); nonblock = SCARG(uap, type) & SOCK_NONBLOCK; fflag = FREAD | FWRITE | (nonblock ? FNONBLOCK : 0); @@ -498,8 +501,8 @@ sys_socketpair(struct proc *p, void *v, register_t *retval) fp2->f_data = so2; error = copyout(sv, SCARG(uap, rsv), 2 * sizeof (int)); if (error == 0) { - fdinsert(fdp, sv[0], cloexec, fp1); - fdinsert(fdp, sv[1], cloexec, fp2); + fdinsert(fdp, sv[0], fdflags, fp1); + fdinsert(fdp, sv[1], fdflags, fp2); fdpunlock(fdp); #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index f50a040d1e8..b025c071f41 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -1146,6 +1146,8 @@ restart: fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED); if (flags & MSG_CMSG_CLOEXEC) fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE; + if (flags & MSG_CMSG_CLOFORK) + fdp->fd_ofileflags[fds[i]] |= UF_FOCLOSE; rp++; } diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 1f5731712a0..32734b2a3f4 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -1086,7 +1086,7 @@ doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode, struct file *fp; struct vnode *vp; struct vattr vattr; - int flags, cloexec, cmode; + int flags, fdflags, cmode; int type, indx, error, localtrunc = 0; struct flock lf; struct nameidata nd; @@ -1099,7 +1099,8 @@ doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode, return (error); } - cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0; + fdflags = ((oflags & O_CLOEXEC) ? UF_EXCLOSE : 0) | + ((oflags & O_CLOFORK) ? UF_FOCLOSE : 0); fdplock(fdp); if ((error = falloc(p, &fp, &indx)) != 0) { @@ -1200,7 +1201,7 @@ doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode, KERNEL_UNLOCK(); *retval = indx; fdplock(fdp); - fdinsert(fdp, indx, cloexec, fp); + fdinsert(fdp, indx, fdflags, fp); fdpunlock(fdp); FRELE(fp, p); return (error); @@ -1224,7 +1225,7 @@ sys___tmpfd(struct proc *p, void *v, register_t *retval) struct file *fp; struct vnode *vp; int oflags = SCARG(uap, flags); - int flags, cloexec, cmode; + int flags, fdflags, cmode; int indx, error; unsigned int i; struct nameidata nd; @@ -1232,9 +1233,11 @@ sys___tmpfd(struct proc *p, void *v, register_t *retval) static const char *letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-"; /* most flags are hardwired */ - oflags = O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW | (oflags & O_CLOEXEC); + oflags = O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW | + (oflags & (O_CLOEXEC | O_CLOFORK)); - cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0; + fdflags = ((oflags & O_CLOEXEC) ? UF_EXCLOSE : 0) | + ((oflags & O_CLOFORK) ? UF_FOCLOSE : 0); fdplock(fdp); if ((error = falloc(p, &fp, &indx)) != 0) { @@ -1270,7 +1273,7 @@ sys___tmpfd(struct proc *p, void *v, register_t *retval) VOP_UNLOCK(vp); *retval = indx; fdplock(fdp); - fdinsert(fdp, indx, cloexec, fp); + fdinsert(fdp, indx, fdflags, fp); fdpunlock(fdp); FRELE(fp, p); @@ -1352,7 +1355,7 @@ sys_fhopen(struct proc *p, void *v, register_t *retval) struct vnode *vp = NULL; struct mount *mp; struct ucred *cred = p->p_ucred; - int flags, cloexec; + int flags, fdflags; int type, indx, error=0; struct flock lf; struct vattr va; @@ -1370,7 +1373,8 @@ sys_fhopen(struct proc *p, void *v, register_t *retval) if ((flags & O_CREAT)) return (EINVAL); - cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0; + fdflags = ((flags & O_CLOEXEC) ? UF_EXCLOSE : 0) | + ((flags & O_CLOFORK) ? UF_FOCLOSE : 0); fdplock(fdp); if ((error = falloc(p, &fp, &indx)) != 0) { @@ -1456,7 +1460,7 @@ sys_fhopen(struct proc *p, void *v, register_t *retval) VOP_UNLOCK(vp); *retval = indx; fdplock(fdp); - fdinsert(fdp, indx, cloexec, fp); + fdinsert(fdp, indx, fdflags, fp); fdpunlock(fdp); FRELE(fp, p); return (0); diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h index e964ea49dde..bd6e329afa1 100644 --- a/sys/sys/fcntl.h +++ b/sys/sys/fcntl.h @@ -106,6 +106,7 @@ /* defined by POSIX Issue 7 */ #define O_CLOEXEC 0x10000 /* atomically set FD_CLOEXEC */ #define O_DIRECTORY 0x20000 /* fail if not a directory */ +#define O_CLOFORK 0x40000 /* atomically set FD_CLOFORK */ #ifdef _KERNEL /* @@ -158,9 +159,13 @@ #if __BSD_VISIBLE #define F_ISATTY 11 /* used by isatty(3) */ #endif +#if __POSIX_VISIBLE >= 202405 +#define F_DUPFD_CLOFORK 12 /* duplicate with FD_CLOFORK set */ +#endif /* file descriptor flags (F_GETFD, F_SETFD) */ #define FD_CLOEXEC 1 /* close-on-exec flag */ +#define FD_CLOFORK 2 /* close-on-fork flag */ /* record locking flags (F_GETLK, F_SETLK, F_SETLKW) */ #define F_RDLCK 1 /* shared or read lock */ diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h index 50bc7734a02..38857d98fd7 100644 --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -115,6 +115,7 @@ struct filedesc0 { */ #define UF_EXCLOSE 0x01 /* auto-close on exec */ #define UF_PLEDGED 0x02 /* open after pledge(2) */ +#define UF_FOCLOSE 0x04 /* auto-close on fork */ /* * Flags on the file descriptor table. diff --git a/sys/sys/socket.h b/sys/sys/socket.h index 4fd50d29274..4e4987ca255 100644 --- a/sys/sys/socket.h +++ b/sys/sys/socket.h @@ -79,6 +79,7 @@ typedef __sa_family_t sa_family_t; /* sockaddr address family type */ #define SOCK_NONBLOCK_INHERIT 0x2000 /* inherit O_NONBLOCK from listener */ #endif #define SOCK_DNS 0x1000 /* set SS_DNS */ +#define SOCK_CLOFORK 0x0800 /* set FD_CLOFORK */ #endif /* __BSD_VISIBLE */ /* @@ -511,6 +512,7 @@ struct timespec; #define MSG_NOSIGNAL 0x400 /* do not send SIGPIPE */ #define MSG_CMSG_CLOEXEC 0x800 /* set FD_CLOEXEC on received fds */ #define MSG_WAITFORONE 0x1000 /* nonblocking but wait for one msg */ +#define MSG_CMSG_CLOFORK 0x2000 /* set FD_CLOFORK on received fds */ /* * Header for ancillary data objects in msg_control buffer. diff --git a/usr.bin/fstat/fstat.c b/usr.bin/fstat/fstat.c index a74d3a6e916..acd1ffe26cc 100644 --- a/usr.bin/fstat/fstat.c +++ b/usr.bin/fstat/fstat.c @@ -482,6 +482,8 @@ vtrans(struct kinfo_file *kf) strlcat(rwep, "w", sizeof rwep); if (kf->fd_ofileflags & UF_EXCLOSE) strlcat(rwep, "e", sizeof rwep); + if (kf->fd_ofileflags & UF_FOCLOSE) + strlcat(rwep, "f", sizeof rwep); if (kf->fd_ofileflags & UF_PLEDGED) strlcat(rwep, "p", sizeof rwep); printf(" %4s", rwep); -- 2.49.0