Index | Thread | Search

From:
Ricardo Branco <rbranco@suse.de>
Subject:
Re: [PATCH]: Add POSIX O_CLOFORK flag
To:
Philip Guenther <guenther@gmail.com>
Cc:
tech@openbsd.org
Date:
Sun, 22 Jun 2025 19:30:52 +0200

Download raw body.

Thread
  • Ricardo Branco:

    [PATCH]: Add POSIX O_CLOFORK flag

  • On 6/22/25 2:35 AM, Philip Guenther wrote:
    > On Sat, Jun 21, 2025 at 4:44 PM Philip Guenther <guenther@gmail.com> wrote:
    >> On Sat, Jun 21, 2025 at 4:04 PM Ricardo Branco <rbranco@suse.de> wrote:
    >>> This initial patch adds support for POSIX O_CLOFORK (close-on-fork) flag.
    >>>
    >>> If there's interest, I can update manpages and fill the TODO list in the PR:
    >>> https://github.com/openbsd/src/pull/46
    >>>
    >>> I uploaded the full test-suite from Illumos adapted to OpenBSD there.
    >>>
    >>> Work also being done to add this flag on:
    >>>
    >>> - FreeBSD: https://github.com/freebsd/freebsd-src/pull/1698
    >>> - DragonflyBSD: https://github.com/DragonFlyBSD/DragonFlyBSD/pull/28
    >>>
    >>> The discussion for adding this flag was done in the FreeBSD PR.
    >> Nope.  I implemented this myself last summer, but after Damien Miller
    >> suggest that OpenSSH would want to clear the flag on inherited fds we
    >> decided the specified behavior of O_CLOFORK being inherited across
    >> exec is insecure, unnecessary for purpose, and kinda insane.  I opened
    >> a ticket with austin group:
    >>      https://austingroupbugs.net/view.php?id=1851
    >>
    >> Geoff Clare was going to reach out to other implementations to get
    >> feedback but nothing has happened since.  <shrug>
    >>
    >> Maybe we should say that more than 10 months was sufficient for
    >> austin-group to address a potential security issue, in which case I'll
    >> rebase my diff, but with clearing the flag on exec because WTH were
    >> they thinking.
    > Rebased diff, with cleared-on-exec behavior, attached, in case you
    > want to play with it, Richardo.
    > Regress tests would be wonderful :)
    >
    > Philip
    I modified your code a bit and tested this patch with a minimal tweak
    to the Illumos testsuite which contains hundreds of tests and it works!
    
    If you're ok with it, I'll extend the current regression tests with 
    O_CLOEXEC
    to include O_CLOFORK.
    
    Didn't check the manpages.  I dunno how we should document the
    deviation from POSIX.
    
    I uploaded this gist to github containing the diff to the oclo tests:
    https://gist.github.com/ricardobranco777/2d073c9f025eec6a87ce27bbbf60ac99
    
    Apply to the C files from:
    https://github.com/illumos/illumos-gate/tree/master/usr/src/test/os-tests/tests/oclo
    
    Best,
    Ricardo
    From 2bd21db4e48d499abaac013009cda6d0769e0049 Mon Sep 17 00:00:00 2001
    From: Ricardo Branco <rbranco@suse.de>
    Date: Sun, 22 Jun 2025 12:06:02 +0200
    Subject: [PATCH] kern: Add support for POSIX O_CLOFORK flag
    
    Co-authored-by: Philip Guenther <guenther@gmail.com>
    ---
     lib/libc/gen/opendir.c    |   4 +-
     lib/libc/gen/shm_open.3   |   5 +-
     lib/libc/gen/shm_open.c   |   5 +-
     lib/libc/stdlib/mkstemp.c |   3 +-
     lib/libc/stdlib/mktemp.3  |  10 ++--
     lib/libc/sys/accept.2     |  24 ++++-----
     lib/libc/sys/dup.2        |  27 +++++-----
     lib/libc/sys/execve.2     |  16 ++++--
     lib/libc/sys/fcntl.2      |  47 ++++++++++++++---
     lib/libc/sys/open.2       |   5 ++
     lib/libc/sys/pipe.2       |  22 ++++----
     lib/libc/sys/socket.2     |   9 ++--
     lib/libc/sys/socketpair.2 |   9 ++--
     lib/libc/sys/w_fcntl.c    |   1 +
     sys/kern/kern_descrip.c   |  43 ++++++++++-----
     sys/kern/kern_exec.c      | 108 ++++++++++++++++++--------------------
     sys/kern/sys_pipe.c       |  11 ++--
     sys/kern/uipc_syscalls.c  |  29 +++++-----
     sys/kern/uipc_usrreq.c    |   2 +
     sys/kern/vfs_syscalls.c   |  24 +++++----
     sys/sys/fcntl.h           |  25 ++++++---
     sys/sys/filedesc.h        |   3 +-
     sys/sys/socket.h          |  10 +++-
     usr.bin/fstat/fstat.c     |   2 +
     24 files changed, 266 insertions(+), 178 deletions(-)
    
    diff --git a/lib/libc/gen/opendir.c b/lib/libc/gen/opendir.c
    index ef198924efb..0f09ac875e4 100644
    --- a/lib/libc/gen/opendir.c
    +++ b/lib/libc/gen/opendir.c
    @@ -82,7 +82,9 @@ fdopendir(int fd)
     		 * POSIX doesn't require fdopendir() to set
     		 * FD_CLOEXEC, so it's okay for this to fail.
     		 */
    -		(void)fcntl(fd, F_SETFD, FD_CLOEXEC);
    +		flags = fcntl(fd, F_GETFD);
    +		if (flags != -1 && (flags & FD_CLOEXEC) == 0)
    +			(void)fcntl(fd, F_SETFD, flags | FD_CLOEXEC);
     	}
     	return (dirp);
     }
    diff --git a/lib/libc/gen/shm_open.3 b/lib/libc/gen/shm_open.3
    index 02e3c3aba65..1bbfedc82db 100644
    --- a/lib/libc/gen/shm_open.3
    +++ b/lib/libc/gen/shm_open.3
    @@ -45,7 +45,7 @@ and must include at least
     or
     .Dv O_RDWR
     and may also include a combination of
    -.Dv O_CREAT , O_EXCL , O_CLOEXEC , O_NOFOLLOW ,
    +.Dv O_CREAT , O_EXCL , O_CLOEXEC , O_CLOFORK , O_NOFOLLOW ,
     or
     .Dv O_TRUNC .
     This implementation forces the
    @@ -82,7 +82,8 @@ and
     appear in
     .St -p1003.1-2001 .
     Using
    -.Dv O_CLOEXEC
    +.Dv O_CLOEXEC ,
    +.Dv O_CLOFORK ,
     or
     .Dv O_NOFOLLOW
     with
    diff --git a/lib/libc/gen/shm_open.c b/lib/libc/gen/shm_open.c
    index 106c7e2261d..c32494d2903 100644
    --- a/lib/libc/gen/shm_open.c
    +++ b/lib/libc/gen/shm_open.c
    @@ -31,8 +31,9 @@
     /* "/tmp/" + sha256 + ".shm" */
     #define SHM_PATH_SIZE (5 + SHA256_DIGEST_STRING_LENGTH + 4)
     
    -/* O_CLOEXEC and O_NOFOLLOW are extensions to POSIX */
    -#define OK_FLAGS	(O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC | O_NOFOLLOW)
    +/* O_CLOEXEC, O_CLOFORK, and O_NOFOLLOW are extensions to POSIX */
    +#define OK_FLAGS \
    +	(O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC | O_CLOFORK | O_NOFOLLOW)
     
     static void
     makeshmpath(const char *origpath, char *shmpath, size_t len)
    diff --git a/lib/libc/stdlib/mkstemp.c b/lib/libc/stdlib/mkstemp.c
    index 75a9d27d1a5..fde2d6e06e8 100644
    --- a/lib/libc/stdlib/mkstemp.c
    +++ b/lib/libc/stdlib/mkstemp.c
    @@ -20,7 +20,8 @@
     #include <fcntl.h>
     #include <stdlib.h>
     
    -#define MKOSTEMP_FLAGS	(O_APPEND | O_CLOEXEC | O_DSYNC | O_RSYNC | O_SYNC)
    +#define MKOSTEMP_FLAGS \
    +	(O_APPEND | O_CLOEXEC | O_CLOFORK | O_DSYNC | O_RSYNC | O_SYNC)
     
     static int
     mkstemp_cb(const char *path, int flags)
    diff --git a/lib/libc/stdlib/mktemp.3 b/lib/libc/stdlib/mktemp.3
    index 83b7c9eb301..bd63f08e5da 100644
    --- a/lib/libc/stdlib/mktemp.3
    +++ b/lib/libc/stdlib/mktemp.3
    @@ -119,6 +119,8 @@ system call:
     Append on each write.
     .It Dv O_CLOEXEC
     Set the close-on-exec flag on the new file descriptor.
    +.It Dv O_CLOFORK
    +Set the close-on-fork flag on the new file descriptor.
     .It Dv O_SYNC
     Perform synchronous I/O operations.
     .El
    @@ -345,18 +347,16 @@ function.
     .Xr tmpnam 3
     .Sh STANDARDS
     The
    -.Fn mkdtemp
    +.Fn mkdtemp ,
    +.Fn mkostemp ,
     and
     .Fn mkstemp
     functions conform to the
    -.St -p1003.1-2008
    +.St -p1003.1-2024
     specification.
     The ability to specify more than six
     .Em X Ns s
     is an extension to that standard.
    -The
    -.Fn mkostemp
    -function is expected to conform to a future revision of that standard.
     .Pp
     The
     .Fn mktemp
    diff --git a/lib/libc/sys/accept.2 b/lib/libc/sys/accept.2
    index 7272841b794..04df643b4ad 100644
    --- a/lib/libc/sys/accept.2
    +++ b/lib/libc/sys/accept.2
    @@ -58,19 +58,18 @@ call extracts the first connection request on the queue of pending
     connections, creates a new socket with the same non-blocking I/O mode as
     .Fa s ,
     and allocates a new file descriptor for the socket with the
    -close-on-exec flag clear.
    +close-on-exec and close-on-fork flags clear.
     .Pp
     The
     .Fn accept4
    -system call is similar, however the non-blocking I/O mode of the
    -new socket is determined by the
    -.Dv SOCK_NONBLOCK
    -flag in the
    -.Fa flags
    -argument and the close-on-exec flag on the new file descriptor is
    +system call is similar, however the non-blocking I/O mode,
    +close-on-exec flag,
    +and close-on-fork flag on the new file descriptor are
     determined by the
    -.Dv SOCK_CLOEXEC
    -flag in the
    +.Dv SOCK_NONBLOCK , SOCK_CLOEXEC ,
    +and
    +.Dv SOCK_CLOFORK
    +flags, respectively, in the
     .Fa flags
     argument.
     .Pp
    @@ -204,11 +203,10 @@ is invalid.
     .Sh STANDARDS
     The
     .Fn accept
    -function conforms to
    -.St -p1003.1-2008 .
    -The
    +and
     .Fn accept4
    -function is expected to conform to a future revision of that standard.
    +functions conform to
    +.St -p1003.1-2024 .
     .Sh HISTORY
     The
     .Fn accept
    diff --git a/lib/libc/sys/dup.2 b/lib/libc/sys/dup.2
    index 948f1082a80..8ed985e5d8a 100644
    --- a/lib/libc/sys/dup.2
    +++ b/lib/libc/sys/dup.2
    @@ -86,7 +86,8 @@ object reference to the file must be obtained by issuing an
     additional
     .Xr open 2
     call.
    -The close-on-exec flag on the new file descriptor is unset.
    +The close-on-exec and close-on-fork flags on the new file descriptor
    +are unset.
     .Pp
     In
     .Fn dup2 ,
    @@ -101,18 +102,21 @@ When
     equals
     .Fa oldd ,
     .Fn dup2
    -just returns without affecting the close-on-exec flag.
    +just returns without affecting the close-on-exec or close-on-fork flags.
     .Pp
     In
     .Fn dup3 ,
    -both the value of the new descriptor and the close-on-exec flag on
    -the new file descriptor are specified:
    +the value of the new descriptor and the close-on-exec and close-on-fork
    +flags on
    +the new file descriptor are all specified:
     .Fa newd
     specifies the value and the
     .Dv O_CLOEXEC
    -bit in
    +and
    +.Dv O_CLOFORK
    +bits in
     .Fa flags
    -specifies the close-on-exec flag.
    +specify the close-on-exec and close-on-forks flag, respectively.
     Unlike
     .Fn dup2 ,
     if
    @@ -192,14 +196,13 @@ is invalid.
     .Xr socketpair 2 ,
     .Xr getdtablesize 3
     .Sh STANDARDS
    -.Fn dup
    -and
    -.Fn dup2
    -conform to
    -.St -p1003.1-2008 .
     The
    +.Fn dup ,
    +.Fn dup2 ,
    +and
     .Fn dup3
    -function is expected to conform to a future revision of that standard.
    +functions conform to
    +.St -p1003.1-2024 .
     .Sh HISTORY
     The
     .Fn dup
    diff --git a/lib/libc/sys/execve.2 b/lib/libc/sys/execve.2
    index 0d5c1b03f23..54b49996180 100644
    --- a/lib/libc/sys/execve.2
    +++ b/lib/libc/sys/execve.2
    @@ -108,9 +108,10 @@ flag is set (see
     .Xr close 2
     and
     .Xr fcntl 2 ) .
    -Descriptors that remain open are unaffected by
    -.Fn execve .
    -In the case of a new setuid or setgid executable being executed, if
    +Other descriptors remain open after
    +.Fn execve ,
    +however the close-on-fork flag is cleared.
    +If
     file descriptors 0, 1, or 2 (representing stdin, stdout, and stderr)
     are currently unallocated, these descriptors will be opened to point to
     some system file like
    @@ -329,6 +330,15 @@ The
     .Fn execve
     function first appeared in
     .At v7 .
    +.Pp
    +In
    +.Ox 2.4 ,
    +.Fn execve
    +started ensuring that file descriptors 0, 1, and 2 are open when
    +starting a setuid or setgid process.
    +In
    +.Ox 7.8
    +that was extended to all processes.
     .Sh CAVEATS
     If a program is
     .Em setuid
    diff --git a/lib/libc/sys/fcntl.2 b/lib/libc/sys/fcntl.2
    index d45896800e3..3a48f3ff6e5 100644
    --- a/lib/libc/sys/fcntl.2
    +++ b/lib/libc/sys/fcntl.2
    @@ -95,22 +95,47 @@ flag associated with the new file descriptor is set, so the file descriptor
     is closed when
     .Xr execve 2
     is called.
    +.It Dv F_DUPFD_CLOFORK
    +Like
    +.Dv F_DUPFD ,
    +but the
    +.Dv FD_CLOFORK
    +flag associated with the new file descriptor is set, so the file descriptor
    +is closed when
    +.Xr fork 2
    +or
    +.Xr vfork 2
    +is called.
     .It Dv F_GETFD
    -Get the close-on-exec flag associated with the file descriptor
    +Get the close-on-exec and close-on-fork flags associated with the
    +file descriptor
     .Fa fd
     as
    -.Dv FD_CLOEXEC .
    +.Dv FD_CLOEXEC
    +and
    +.Dv FD_CLOFORK .
     If the returned value ANDed with
     .Dv FD_CLOEXEC
     is 0,
     the file will remain open across
     .Fn exec ,
     otherwise the file will be closed upon execution of
    -.Fn exec
    +.Fn exec ;
    +if the returned value ANDed with
    +.Dv FD_CLOFORK
    +is 0,
    +the file will remain open across
    +.Fn fork
    +and
    +.Fn vfork ,
    +otherwise the file will be closed upon execution of
    +.Fn fork
    +or
    +.Fn vfork
     .Fa ( arg
     is ignored).
     .It Dv F_SETFD
    -Set the close-on-exec flag associated with
    +Set the close-on-exec and close-on-fork flags associated with
     .Fa fd
     to
     .Fa arg ,
    @@ -118,8 +143,10 @@ where
     .Fa arg
     (interpreted as an
     .Vt int )
    -is either 0 or
    -.Dv FD_CLOEXEC ,
    +is the bitwise OR of zero or more of
    +.Dv FD_CLOEXEC
    +and
    +.Dv FD_CLOFORK ,
     as described above.
     .It Dv F_GETFL
     Get file status flags associated with the file descriptor
    @@ -392,8 +419,14 @@ as follows:
     A new file descriptor.
     .It Dv F_DUPFD_CLOEXEC
     A new file descriptor.
    +.It Dv F_DUPFD_CLOFORK
    +A new file descriptor.
     .It Dv F_GETFD
    -Value of flag (only the low-order bit is defined).
    +Value of file descriptor flags (only the
    +.Dv FD_CLOEXEC
    +and
    +.Dv FD_CLOFORK
    +bits are defined).
     .It Dv F_GETFL
     Value of flags.
     .It Dv F_GETOWN
    diff --git a/lib/libc/sys/open.2 b/lib/libc/sys/open.2
    index c8e056bbd92..69489681811 100644
    --- a/lib/libc/sys/open.2
    +++ b/lib/libc/sys/open.2
    @@ -109,6 +109,11 @@ Set
     .Dv FD_CLOEXEC
     (the close-on-exec flag)
     on the new file descriptor.
    +.It Dv O_CLOFORK
    +Set
    +.Dv FD_CLOFORK
    +(the close-on-fork flag)
    +on the new file descriptor.
     .It Dv O_DIRECTORY
     Error if
     .Fa path
    diff --git a/lib/libc/sys/pipe.2 b/lib/libc/sys/pipe.2
    index ca7a2d708a5..79b03c4bd6f 100644
    --- a/lib/libc/sys/pipe.2
    +++ b/lib/libc/sys/pipe.2
    @@ -79,15 +79,14 @@ The
     .Fn pipe2
     function is identical to
     .Fn pipe
    -except that the non-blocking I/O mode on both ends of the pipe is
    +except that the non-blocking I/O mode,
    +close-on-exec flag,
    +and close-on-fork flag are
     determined by the
    -.Dv O_NONBLOCK
    -flag in the
    -.Fa flags
    -argument and the close-on-exec flag on both the new file descriptors
    -is determined by the
    -.Dv O_CLOEXEC
    -flag in the
    +.Dv O_NONBLOCK , O_CLOEXEC ,
    +and
    +.Dv O_CLOFORK
    +flags, respectively, in the
     .Fa flags
     argument.
     .Sh RETURN VALUES
    @@ -125,11 +124,10 @@ is invalid.
     .Sh STANDARDS
     The
     .Fn pipe
    -function conforms to
    -.St -p1003.1-2008 .
    -The
    +and
     .Fn pipe2
    -function is expected to conform to a future revision of that standard.
    +functions conform to
    +.St -p1003.1-2024 .
     .Pp
     As an extension, the pipe provided is actually capable of moving
     data bidirectionally.
    diff --git a/lib/libc/sys/socket.2 b/lib/libc/sys/socket.2
    index 89848869c92..ce7fc774ecf 100644
    --- a/lib/libc/sys/socket.2
    +++ b/lib/libc/sys/socket.2
    @@ -103,6 +103,8 @@ argument:
     .Bl -tag -width "SOCK_NONBLOCKX" -offset indent -compact
     .It SOCK_CLOEXEC
     Set close-on-exec flag on the new descriptor.
    +.It SOCK_CLOFORK
    +Set close-on-fork flag on the new descriptor.
     .It SOCK_NONBLOCK
     Set non-blocking I/O mode on the new socket.
     .It SOCK_DNS
    @@ -282,12 +284,7 @@ is denied.
     The
     .Fn socket
     function conforms to
    -.St -p1003.1-2008 .
    -The
    -.Dv SOCK_CLOEXEC
    -and
    -.Dv SOCK_NONBLOCK
    -flags are expected to conform to a future revision of that standard.
    +.St -p1003.1-2024 .
     .Pp
     The
     .Dv SOCK_DNS
    diff --git a/lib/libc/sys/socketpair.2 b/lib/libc/sys/socketpair.2
    index 28225c556d7..c6908d0f609 100644
    --- a/lib/libc/sys/socketpair.2
    +++ b/lib/libc/sys/socketpair.2
    @@ -75,6 +75,8 @@ argument:
     .Bl -tag -width "SOCK_NONBLOCKX" -offset indent -compact
     .It SOCK_CLOEXEC
     Set close-on-exec flag on both the new descriptors.
    +.It SOCK_CLOFORK
    +Set close-on-fork flag on both the new descriptors.
     .It SOCK_NONBLOCK
     Set non-blocking I/O mode on both the new sockets.
     .El
    @@ -113,12 +115,7 @@ process address space.
     The
     .Fn socketpair
     function conforms to
    -.St -p1003.1-2008 .
    -The
    -.Dv SOCK_CLOEXEC
    -and
    -.Dv SOCK_NONBLOCK
    -flags are expected to conform to a future revision of that standard.
    +.St -p1003.1-2024 .
     .Sh HISTORY
     The
     .Fn socketpair
    diff --git a/lib/libc/sys/w_fcntl.c b/lib/libc/sys/w_fcntl.c
    index c30367ad32c..4b7e7c67bc6 100644
    --- a/lib/libc/sys/w_fcntl.c
    +++ b/lib/libc/sys/w_fcntl.c
    @@ -29,6 +29,7 @@ fcntl(int fd, int cmd, ...)
     	switch (cmd) {
     	case F_DUPFD:
     	case F_DUPFD_CLOEXEC:
    +	case F_DUPFD_CLOFORK:
     	case F_SETFD:
     	case F_SETFL:
     	case F_SETOWN:
    diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
    index 3e57566b820..a3b0f099d96 100644
    --- a/sys/kern/kern_descrip.c
    +++ b/sys/kern/kern_descrip.c
    @@ -80,6 +80,7 @@ int dodup3(struct proc *, int, int, int, register_t *);
     
     #define DUPF_CLOEXEC	0x01
     #define DUPF_DUP2	0x02
    +#define DUPF_CLOFORK	0x04
     
     struct pool file_pool;
     struct pool fdesc_pool;
    @@ -336,7 +337,7 @@ sys_dup3(struct proc *p, void *v, register_t *retval)
     
     	if (SCARG(uap, from) == SCARG(uap, to))
     		return (EINVAL);
    -	if (SCARG(uap, flags) & ~O_CLOEXEC)
    +	if (SCARG(uap, flags) & ~(O_CLOEXEC | O_CLOFORK))
     		return (EINVAL);
     	return (dodup3(p, SCARG(uap, from), SCARG(uap, to),
     	    SCARG(uap, flags), retval));
    @@ -388,6 +389,8 @@ restart:
     	dupflags = DUPF_DUP2;
     	if (flags & O_CLOEXEC)
     		dupflags |= DUPF_CLOEXEC;
    +	if (flags & O_CLOFORK)
    +		dupflags |= DUPF_CLOFORK;
     
     	/* No need for FRELE(), finishdup() uses current ref. */
     	return (finishdup(p, fp, old, new, retval, dupflags));
    @@ -423,6 +426,7 @@ restart:
     
     	case F_DUPFD:
     	case F_DUPFD_CLOEXEC:
    +	case F_DUPFD_CLOFORK:
     		newmin = (long)SCARG(uap, arg);
     		if ((u_int)newmin >= lim_cur(RLIMIT_NOFILE) ||
     		    (u_int)newmin >= atomic_load_int(&maxfiles)) {
    @@ -444,6 +448,8 @@ restart:
     
     			if (SCARG(uap, cmd) == F_DUPFD_CLOEXEC)
     				dupflags |= DUPF_CLOEXEC;
    +			if (SCARG(uap, cmd) == F_DUPFD_CLOFORK)
    +				dupflags |= DUPF_CLOFORK;
     
     			/* No need for FRELE(), finishdup() uses current ref. */
     			error = finishdup(p, fp, fd, i, retval, dupflags);
    @@ -452,16 +458,17 @@ restart:
     
     	case F_GETFD:
     		fdplock(fdp);
    -		*retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0;
    +		*retval = (fdp->fd_ofileflags[fd] & UF_EXCLOSE   ? FD_CLOEXEC : 0)
    +			| (fdp->fd_ofileflags[fd] & UF_FORKCLOSE ? FD_CLOFORK : 0);
     		fdpunlock(fdp);
     		break;
     
     	case F_SETFD:
     		fdplock(fdp);
    -		if ((long)SCARG(uap, arg) & 1)
    -			fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
    -		else
    -			fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
    +		fdp->fd_ofileflags[fd] =
    +		    (fdp->fd_ofileflags[fd] & ~(UF_EXCLOSE | UF_FORKCLOSE)) |
    +		    ((SCARG(uap, arg) & FD_CLOEXEC) ? UF_EXCLOSE : 0) |
    +		    ((SCARG(uap, arg) & FD_CLOFORK) ? UF_FORKCLOSE : 0);
     		fdpunlock(fdp);
     		break;
     
    @@ -667,9 +674,12 @@ finishdup(struct proc *p, struct file *fp, int old, int new,
     	fdp->fd_ofiles[new] = fp;
     	mtx_leave(&fdp->fd_fplock);
     
    -	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE;
    +	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &
    +	    ~(UF_EXCLOSE | UF_FORKCLOSE);
     	if (dupflags & DUPF_CLOEXEC)
     		fdp->fd_ofileflags[new] |= UF_EXCLOSE;
    +	if (dupflags & DUPF_CLOFORK)
    +		fdp->fd_ofileflags[new] |= UF_FORKCLOSE;
     	*retval = new;
     
     	if (oldfp != NULL) {
    @@ -711,7 +721,7 @@ fdinsert(struct filedesc *fdp, int fd, int flags, struct file *fp)
     	fdp->fd_ofiles[fd] = fp;
     	mtx_leave(&fdp->fd_fplock);
     
    -	fdp->fd_ofileflags[fd] |= (flags & UF_EXCLOSE);
    +	fdp->fd_ofileflags[fd] |= (flags & (UF_EXCLOSE | UF_FORKCLOSE));
     }
     
     void
    @@ -1141,7 +1151,9 @@ fdcopy(struct process *pr)
     		struct file *fp = fdp->fd_ofiles[i];
     
     		if (fp != NULL) {
    +			int fileflags = fdp->fd_ofileflags[i];
     			/*
    +			 * If the UF_FORKCLOSE flag is set, skip the fd.
     			 * XXX Gruesome hack. If count gets too high, fail
     			 * to copy an fd, since fdcopy()'s callers do not
     			 * permit it to indicate failure yet.
    @@ -1150,6 +1162,7 @@ fdcopy(struct process *pr)
     			 * their internal consistency, so close them here.
     			 */
     			if (fp->f_count >= FDUP_MAX_COUNT ||
    +			    (fileflags & UF_FORKCLOSE) ||
     			    fp->f_type == DTYPE_KQUEUE) {
     				if (i < newfdp->fd_freefile)
     					newfdp->fd_freefile = i;
    @@ -1158,7 +1171,7 @@ fdcopy(struct process *pr)
     
     			FREF(fp);
     			newfdp->fd_ofiles[i] = fp;
    -			newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i];
    +			newfdp->fd_ofileflags[i] = fileflags;
     			fd_used(newfdp, i);
     		}
     	}
    @@ -1407,24 +1420,26 @@ dupfdopen(struct proc *p, int indx, int mode)
     	fdp->fd_ofiles[indx] = wfp;
     	mtx_leave(&fdp->fd_fplock);
     
    -	fdp->fd_ofileflags[indx] = (fdp->fd_ofileflags[indx] & UF_EXCLOSE) |
    -	    (fdp->fd_ofileflags[dupfd] & ~UF_EXCLOSE);
    +	fdp->fd_ofileflags[indx] =
    +	    (fdp->fd_ofileflags[indx] & (UF_EXCLOSE | UF_FORKCLOSE)) |
    +	    (fdp->fd_ofileflags[dupfd] & ~(UF_EXCLOSE | UF_FORKCLOSE));
     
     	return (0);
     }
     
     /*
    - * Close any files on exec?
    + * Doing an exec, so handle fd flags: do close-on-exec and clear
    + * pledged and close-on-fork
      */
     void
    -fdcloseexec(struct proc *p)
    +fdprepforexec(struct proc *p)
     {
     	struct filedesc *fdp = p->p_fd;
     	int fd;
     
     	fdplock(fdp);
     	for (fd = 0; fd <= fdp->fd_lastfile; fd++) {
    -		fdp->fd_ofileflags[fd] &= ~UF_PLEDGED;
    +		fdp->fd_ofileflags[fd] &= ~(UF_PLEDGED | UF_FORKCLOSE);
     		if (fdp->fd_ofileflags[fd] & UF_EXCLOSE) {
     			/* fdrelease() unlocks fdp. */
     			(void) fdrelease(p, fd);
    diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
    index 86e5b078bfd..dab621ac3ca 100644
    --- a/sys/kern/kern_exec.c
    +++ b/sys/kern/kern_exec.c
    @@ -272,6 +272,7 @@ sys_execve(struct proc *p, void *v, register_t *retval)
     	struct ps_strings arginfo;
     	struct vmspace *vm = p->p_vmspace;
     	struct vnode *otvp;
    +	int i;
     
     	/*
     	 * Get other threads to stop, if contested return ERESTART,
    @@ -531,7 +532,7 @@ sys_execve(struct proc *p, void *v, register_t *retval)
     	}
     
     	stopprofclock(pr);	/* stop profiling */
    -	fdcloseexec(p);		/* handle close on exec */
    +	fdprepforexec(p);	/* handle close on exec and close on fork */
     	execsigs(p);		/* reset caught signals */
     	TCB_SET(p, NULL);	/* reset the TCB address */
     	pr->ps_kbind_addr = 0;	/* reset the kbind bits */
    @@ -601,8 +602,6 @@ sys_execve(struct proc *p, void *v, register_t *retval)
     	 * MNT_NOEXEC has already been used to disable s[ug]id.
     	 */
     	if ((attr.va_mode & (VSUID | VSGID)) && proc_cansugid(p)) {
    -		int i;
    -
     		atomic_setbits_int(&pr->ps_flags, PS_SUGID|PS_SUGIDEXEC);
     
     #ifdef KTRACE
    @@ -618,66 +617,63 @@ sys_execve(struct proc *p, void *v, register_t *retval)
     			cred->cr_uid = attr.va_uid;
     		if (attr.va_mode & VSGID)
     			cred->cr_gid = attr.va_gid;
    +	} else
    +		atomic_clearbits_int(&pr->ps_flags, PS_SUGID);
    +
    +	/*
    +	 * A few caveats apply to stdin, stdout, and stderr.
    +	 */
    +	fdplock(p->p_fd);
    +	for (i = 0; i < 3; i++) {
    +		struct file *fp = NULL;
     
     		/*
    -		 * For set[ug]id processes, a few caveats apply to
    -		 * stdin, stdout, and stderr.
    +		 * NOTE - This will never return NULL because of immature fds
    +		 * since only kernel-threads share the file descriptor table.
     		 */
    -		error = 0;
    -		fdplock(p->p_fd);
    -		for (i = 0; i < 3; i++) {
    -			struct file *fp = NULL;
    -
    -			/*
    -			 * NOTE - This will never return NULL because of
    -			 * immature fds. The file descriptor table is not
    -			 * shared because we're suid.
    -			 */
    -			fp = fd_getfile(p->p_fd, i);
    -
    -			/*
    -			 * Ensure that stdin, stdout, and stderr are already
    -			 * allocated.  We do not want userland to accidentally
    -			 * allocate descriptors in this range which has implied
    -			 * meaning to libc.
    -			 */
    -			if (fp == NULL) {
    -				short flags = FREAD | (i == 0 ? 0 : FWRITE);
    -				struct vnode *vp;
    -				int indx;
    -
    -				if ((error = falloc(p, &fp, &indx)) != 0)
    -					break;
    +		fp = fd_getfile(p->p_fd, i);
    +
    +		/*
    +		 * Ensure that stdin, stdout, and stderr are already
    +		 * allocated.  We do not want userland to accidentally
    +		 * allocate descriptors in this range which has implied
    +		 * meaning to libc.
    +		 */
    +		if (fp == NULL) {
    +			short flags = FREAD | (i == 0 ? 0 : FWRITE);
    +			struct vnode *vp;
    +			int indx;
    +
    +			if ((error = falloc(p, &fp, &indx)) != 0)
    +				break;
     #ifdef DIAGNOSTIC
    -				if (indx != i)
    -					panic("sys_execve: falloc indx != i");
    +			if (indx != i)
    +				panic("sys_execve: falloc indx != i");
     #endif
    -				if ((error = cdevvp(getnulldev(), &vp)) != 0) {
    -					fdremove(p->p_fd, indx);
    -					closef(fp, p);
    -					break;
    -				}
    -				if ((error = VOP_OPEN(vp, flags, cred, p)) != 0) {
    -					fdremove(p->p_fd, indx);
    -					closef(fp, p);
    -					vrele(vp);
    -					break;
    -				}
    -				if (flags & FWRITE)
    -					vp->v_writecount++;
    -				fp->f_flag = flags;
    -				fp->f_type = DTYPE_VNODE;
    -				fp->f_ops = &vnops;
    -				fp->f_data = (caddr_t)vp;
    -				fdinsert(p->p_fd, indx, 0, fp);
    +			if ((error = cdevvp(getnulldev(), &vp)) != 0) {
    +				fdremove(p->p_fd, indx);
    +				closef(fp, p);
    +				break;
    +			}
    +			if ((error = VOP_OPEN(vp, flags, cred, p)) != 0) {
    +				fdremove(p->p_fd, indx);
    +				closef(fp, p);
    +				vrele(vp);
    +				break;
     			}
    -			FRELE(fp, p);
    +			if (flags & FWRITE)
    +				vp->v_writecount++;
    +			fp->f_flag = flags;
    +			fp->f_type = DTYPE_VNODE;
    +			fp->f_ops = &vnops;
    +			fp->f_data = (caddr_t)vp;
    +			fdinsert(p->p_fd, indx, 0, fp);
     		}
    -		fdpunlock(p->p_fd);
    -		if (error)
    -			goto exec_abort;
    -	} else
    -		atomic_clearbits_int(&pr->ps_flags, PS_SUGID);
    +		FRELE(fp, p);
    +	}
    +	fdpunlock(p->p_fd);
    +	if (error)
    +		goto exec_abort;
     
     	/*
     	 * Reset the saved ugids and update the process's copy of the
    diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c
    index 12254a052da..c2c119dd5f3 100644
    --- a/sys/kern/sys_pipe.c
    +++ b/sys/kern/sys_pipe.c
    @@ -162,7 +162,7 @@ sys_pipe2(struct proc *p, void *v, register_t *retval)
     		syscallarg(int) flags;
     	} */ *uap = v;
     
    -	if (SCARG(uap, flags) & ~(O_CLOEXEC | FNONBLOCK))
    +	if (SCARG(uap, flags) & ~(O_CLOEXEC | O_CLOFORK | FNONBLOCK))
     		return (EINVAL);
     
     	return (dopipe(p, SCARG(uap, fdp), SCARG(uap, flags)));
    @@ -175,9 +175,10 @@ dopipe(struct proc *p, int *ufds, int flags)
     	struct file *rf, *wf;
     	struct pipe_pair *pp;
     	struct pipe *rpipe, *wpipe = NULL;
    -	int fds[2], cloexec, error;
    +	int fds[2], fdflags, error;
     
    -	cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
    +	fdflags = ((flags & O_CLOEXEC) ? UF_EXCLOSE : 0)
    +	    | ((flags & O_CLOFORK) ? UF_FORKCLOSE : 0);
     
     	pp = pipe_pair_create();
     	if (pp == NULL)
    @@ -203,8 +204,8 @@ dopipe(struct proc *p, int *ufds, int flags)
     	wf->f_data = wpipe;
     	wf->f_ops = &pipeops;
     
    -	fdinsert(fdp, fds[0], cloexec, rf);
    -	fdinsert(fdp, fds[1], cloexec, wf);
    +	fdinsert(fdp, fds[0], fdflags, rf);
    +	fdinsert(fdp, fds[1], fdflags, wf);
     
     	error = copyout(fds, ufds, sizeof(fds));
     	if (error == 0) {
    diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
    index 7a93c571a29..21f3c7794b1 100644
    --- a/sys/kern/uipc_syscalls.c
    +++ b/sys/kern/uipc_syscalls.c
    @@ -81,7 +81,7 @@ sys_socket(struct proc *p, void *v, register_t *retval)
     	struct file *fp;
     	int type = SCARG(uap, type);
     	int domain = SCARG(uap, domain);
    -	int fd, cloexec, nonblock, fflag, error;
    +	int fd, fdflags, nonblock, fflag, error;
     	unsigned int ss = 0;
     
     	if ((type & SOCK_DNS) && !(domain == AF_INET || domain == AF_INET6))
    @@ -93,8 +93,9 @@ sys_socket(struct proc *p, void *v, register_t *retval)
     	if (error)
     		return (error);
     
    -	type &= ~(SOCK_CLOEXEC | SOCK_NONBLOCK | SOCK_DNS);
    -	cloexec = (SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
    +	type &= ~(SOCK_CLOEXEC | SOCK_CLOFORK | SOCK_NONBLOCK | SOCK_DNS);
    +	fdflags = ((SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0)
    +	    | ((SCARG(uap, type) & SOCK_CLOFORK) ? UF_FORKCLOSE : 0);
     	nonblock = SCARG(uap, type) & SOCK_NONBLOCK;
     	fflag = FREAD | FWRITE | (nonblock ? FNONBLOCK : 0);
     
    @@ -113,7 +114,7 @@ sys_socket(struct proc *p, void *v, register_t *retval)
     		fp->f_ops = &socketops;
     		so->so_state |= ss;
     		fp->f_data = so;
    -		fdinsert(fdp, fd, cloexec, fp);
    +		fdinsert(fdp, fd, fdflags, fp);
     		fdpunlock(fdp);
     		FRELE(fp, p);
     		*retval = fd;
    @@ -240,7 +241,7 @@ sys_accept4(struct proc *p, void *v, register_t *retval)
     		syscallarg(socklen_t *) int flags;
     	} */ *uap = v;
     
    -	if (SCARG(uap, flags) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
    +	if (SCARG(uap, flags) & ~(SOCK_CLOEXEC | SOCK_CLOFORK | SOCK_NONBLOCK))
     		return (EINVAL);
     
     	return (doaccept(p, SCARG(uap, s), SCARG(uap, name),
    @@ -257,9 +258,10 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen,
     	socklen_t namelen;
     	int error, tmpfd;
     	struct socket *head, *so;
    -	int cloexec, nflag;
    +	int fdflags, nflag;
     
    -	cloexec = (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
    +	fdflags = ((flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0)
    +	    | ((flags & SOCK_CLOFORK) ? UF_FORKCLOSE : 0);
     
     	if (name && (error = copyin(anamelen, &namelen, sizeof (namelen))))
     		return (error);
    @@ -346,7 +348,7 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen,
     	}
     
     	fdplock(fdp);
    -	fdinsert(fdp, tmpfd, cloexec, fp);
    +	fdinsert(fdp, tmpfd, fdflags, fp);
     	fdpunlock(fdp);
     	FRELE(fp, p);
     	*retval = tmpfd;
    @@ -457,10 +459,11 @@ sys_socketpair(struct proc *p, void *v, register_t *retval)
     	struct filedesc *fdp = p->p_fd;
     	struct file *fp1 = NULL, *fp2 = NULL;
     	struct socket *so1, *so2;
    -	int type, cloexec, nonblock, fflag, error, sv[2];
    +	int type, fdflags, nonblock, fflag, error, sv[2];
     
    -	type  = SCARG(uap, type) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK);
    -	cloexec = (SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
    +	type  = SCARG(uap, type) & ~(SOCK_CLOEXEC | SOCK_CLOFORK | SOCK_NONBLOCK);
    +	fdflags = ((SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0)
    +	    | ((SCARG(uap, type) & SOCK_CLOFORK) ? UF_FORKCLOSE : 0);
     	nonblock = SCARG(uap, type) & SOCK_NONBLOCK;
     	fflag = FREAD | FWRITE | (nonblock ? FNONBLOCK : 0);
     
    @@ -498,8 +501,8 @@ sys_socketpair(struct proc *p, void *v, register_t *retval)
     	fp2->f_data = so2;
     	error = copyout(sv, SCARG(uap, rsv), 2 * sizeof (int));
     	if (error == 0) {
    -		fdinsert(fdp, sv[0], cloexec, fp1);
    -		fdinsert(fdp, sv[1], cloexec, fp2);
    +		fdinsert(fdp, sv[0], fdflags, fp1);
    +		fdinsert(fdp, sv[1], fdflags, fp2);
     		fdpunlock(fdp);
     #ifdef KTRACE
     		if (KTRPOINT(p, KTR_STRUCT))
    diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
    index f50a040d1e8..3bb90b33b4c 100644
    --- a/sys/kern/uipc_usrreq.c
    +++ b/sys/kern/uipc_usrreq.c
    @@ -1146,6 +1146,8 @@ restart:
     		fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED);
     		if (flags & MSG_CMSG_CLOEXEC)
     			fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE;
    +		if (flags & MSG_CMSG_CLOFORK)
    +			fdp->fd_ofileflags[fds[i]] |= UF_FORKCLOSE;
     
     		rp++;
     	}
    diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
    index 1f5731712a0..7a6ff816bb6 100644
    --- a/sys/kern/vfs_syscalls.c
    +++ b/sys/kern/vfs_syscalls.c
    @@ -1086,7 +1086,7 @@ doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode,
     	struct file *fp;
     	struct vnode *vp;
     	struct vattr vattr;
    -	int flags, cloexec, cmode;
    +	int flags, fdflags, cmode;
     	int type, indx, error, localtrunc = 0;
     	struct flock lf;
     	struct nameidata nd;
    @@ -1099,7 +1099,8 @@ doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode,
     			return (error);
     	}
     
    -	cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
    +	fdflags = ((oflags & O_CLOEXEC) ? UF_EXCLOSE : 0)
    +	    | ((oflags & O_CLOFORK) ? UF_FORKCLOSE : 0);
     
     	fdplock(fdp);
     	if ((error = falloc(p, &fp, &indx)) != 0) {
    @@ -1200,7 +1201,7 @@ doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode,
     	KERNEL_UNLOCK();
     	*retval = indx;
     	fdplock(fdp);
    -	fdinsert(fdp, indx, cloexec, fp);
    +	fdinsert(fdp, indx, fdflags, fp);
     	fdpunlock(fdp);
     	FRELE(fp, p);
     	return (error);
    @@ -1224,7 +1225,7 @@ sys___tmpfd(struct proc *p, void *v, register_t *retval)
     	struct file *fp;
     	struct vnode *vp;
     	int oflags = SCARG(uap, flags);
    -	int flags, cloexec, cmode;
    +	int flags, fdflags, cmode;
     	int indx, error;
     	unsigned int i;
     	struct nameidata nd;
    @@ -1232,9 +1233,11 @@ sys___tmpfd(struct proc *p, void *v, register_t *retval)
     	static const char *letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-";
     
     	/* most flags are hardwired */
    -	oflags = O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW | (oflags & O_CLOEXEC);
    +	oflags = O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW |
    +	    (oflags & (O_CLOEXEC | O_CLOFORK));
     
    -	cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
    +	fdflags = ((oflags & O_CLOEXEC) ? UF_EXCLOSE : 0)
    +	    | ((oflags & O_CLOFORK) ? UF_FORKCLOSE : 0);
     
     	fdplock(fdp);
     	if ((error = falloc(p, &fp, &indx)) != 0) {
    @@ -1270,7 +1273,7 @@ sys___tmpfd(struct proc *p, void *v, register_t *retval)
     	VOP_UNLOCK(vp);
     	*retval = indx;
     	fdplock(fdp);
    -	fdinsert(fdp, indx, cloexec, fp);
    +	fdinsert(fdp, indx, fdflags, fp);
     	fdpunlock(fdp);
     	FRELE(fp, p);
     
    @@ -1352,7 +1355,7 @@ sys_fhopen(struct proc *p, void *v, register_t *retval)
     	struct vnode *vp = NULL;
     	struct mount *mp;
     	struct ucred *cred = p->p_ucred;
    -	int flags, cloexec;
    +	int flags, fdflags;
     	int type, indx, error=0;
     	struct flock lf;
     	struct vattr va;
    @@ -1370,7 +1373,8 @@ sys_fhopen(struct proc *p, void *v, register_t *retval)
     	if ((flags & O_CREAT))
     		return (EINVAL);
     
    -	cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
    +	fdflags = ((flags & O_CLOEXEC) ? UF_EXCLOSE : 0)
    +	    | ((flags & O_CLOFORK) ? UF_FORKCLOSE : 0);
     
     	fdplock(fdp);
     	if ((error = falloc(p, &fp, &indx)) != 0) {
    @@ -1456,7 +1460,7 @@ sys_fhopen(struct proc *p, void *v, register_t *retval)
     	VOP_UNLOCK(vp);
     	*retval = indx;
     	fdplock(fdp);
    -	fdinsert(fdp, indx, cloexec, fp);
    +	fdinsert(fdp, indx, fdflags, fp);
     	fdpunlock(fdp);
     	FRELE(fp, p);
     	return (0);
    diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h
    index e964ea49dde..33a7f32bc6c 100644
    --- a/sys/sys/fcntl.h
    +++ b/sys/sys/fcntl.h
    @@ -83,22 +83,24 @@
     #define	O_EXLOCK	0x0020		/* open with exclusive file lock */
     #define	O_ASYNC		0x0040		/* signal pgrp when data ready */
     #define	O_FSYNC		0x0080		/* backwards compatibility */
    -#define	O_NOFOLLOW	0x0100		/* if path is a symlink, don't follow */
     #endif
     #if __POSIX_VISIBLE >= 199309 || __XPG_VISIBLE >= 420
     #define	O_SYNC		0x0080		/* synchronous writes */
    -#endif
    -#define	O_CREAT		0x0200		/* create if nonexistent */
    -#define	O_TRUNC		0x0400		/* truncate to zero length */
    -#define	O_EXCL		0x0800		/* error if already exists */
    -
     /*
    - * POSIX 1003.1 specifies a higher granularity for synchronous operations
    + * POSIX 1003.1 permits a higher granularity for synchronous operations
      * than we support.  Since synchronicity is all or nothing in OpenBSD
      * we just define these to be the same as O_SYNC.
      */
     #define	O_DSYNC		O_SYNC		/* synchronous data writes */
     #define	O_RSYNC		O_SYNC		/* synchronous reads */
    +#endif
    +
    +/* defined by POSIX Issue 7 */
    +#define	O_NOFOLLOW	0x0100		/* if path is a symlink, don't follow */
    +
    +#define	O_CREAT		0x0200		/* create if nonexistent */
    +#define	O_TRUNC		0x0400		/* truncate to zero length */
    +#define	O_EXCL		0x0800		/* error if already exists */
     
     /* defined by POSIX 1003.1; BSD default, this bit is not required */
     #define	O_NOCTTY	0x8000		/* don't assign controlling terminal */
    @@ -107,6 +109,9 @@
     #define	O_CLOEXEC	0x10000		/* atomically set FD_CLOEXEC */
     #define	O_DIRECTORY	0x20000		/* fail if not a directory */
     
    +/* defined by POSIX Issue 8 */
    +#define	O_CLOFORK	0x40000		/* atomically set FD_CLOFORK */
    +
     #ifdef _KERNEL
     /*
      * convert from open() flags to/from fflags; convert O_RD/WR to FREAD/FWRITE.
    @@ -158,9 +163,15 @@
     #if __BSD_VISIBLE
     #define F_ISATTY	11		/* used by isatty(3) */
     #endif
    +#if __POSIX_VISIBLE >= 202405
    +#define	F_DUPFD_CLOFORK	12		/* duplicate with FD_CLOFORK set */
    +#endif
     
     /* file descriptor flags (F_GETFD, F_SETFD) */
     #define	FD_CLOEXEC	1		/* close-on-exec flag */
    +#if __POSIX_VISIBLE >= 202405
    +#define	FD_CLOFORK	4		/* close-on-fork flag */
    +#endif
     
     /* record locking flags (F_GETLK, F_SETLK, F_SETLKW) */
     #define	F_RDLCK		1		/* shared or read lock */
    diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h
    index 50bc7734a02..6302cbeb793 100644
    --- a/sys/sys/filedesc.h
    +++ b/sys/sys/filedesc.h
    @@ -115,6 +115,7 @@ struct filedesc0 {
      */
     #define	UF_EXCLOSE 	0x01		/* auto-close on exec */
     #define	UF_PLEDGED 	0x02		/* open after pledge(2) */
    +#define	UF_FORKCLOSE 	0x04		/* auto-close on fork */
     
     /*
      * Flags on the file descriptor table.
    @@ -143,7 +144,7 @@ void	fdfree(struct proc *p);
     int	fdrelease(struct proc *p, int);
     void	fdinsert(struct filedesc *, int, int, struct file *);
     void	fdremove(struct filedesc *, int);
    -void	fdcloseexec(struct proc *);
    +void	fdprepforexec(struct proc *);
     struct file *fd_iterfile(struct file *, struct proc *);
     struct file *fd_getfile(struct filedesc *, int);
     struct file *fd_getfile_mode(struct filedesc *, int, int);
    diff --git a/sys/sys/socket.h b/sys/sys/socket.h
    index 4fd50d29274..e0635bd3656 100644
    --- a/sys/sys/socket.h
    +++ b/sys/sys/socket.h
    @@ -72,14 +72,19 @@ typedef	__sa_family_t	sa_family_t;	/* sockaddr address family type */
     /*
      * Socket creation flags
      */
    -#if __BSD_VISIBLE
    +#if __POSIX_VISIBLE >= 202405 || __BSD_VISIBLE
     #define	SOCK_CLOEXEC		0x8000	/* set FD_CLOEXEC */
     #define	SOCK_NONBLOCK		0x4000	/* set O_NONBLOCK */
    +#endif
    +#if __BSD_VISIBLE
     #ifdef _KERNEL
     #define	SOCK_NONBLOCK_INHERIT	0x2000	/* inherit O_NONBLOCK from listener */
     #endif
     #define	SOCK_DNS		0x1000	/* set SS_DNS */
     #endif /* __BSD_VISIBLE */
    +#if __POSIX_VISIBLE >= 202405
    +#define	SOCK_CLOFORK		0x0800	/* set FD_CLOFORK */
    +#endif
     
     /*
      * Option flags per-socket.
    @@ -511,6 +516,7 @@ struct timespec;
     #define	MSG_NOSIGNAL		0x400	/* do not send SIGPIPE */
     #define	MSG_CMSG_CLOEXEC	0x800	/* set FD_CLOEXEC on received fds */
     #define	MSG_WAITFORONE		0x1000	/* nonblocking but wait for one msg */
    +#define	MSG_CMSG_CLOFORK	0x2000	/* set FD_CLOFORK on received fds */
     
     /*
      * Header for ancillary data objects in msg_control buffer.
    @@ -586,7 +592,7 @@ int	sockatmark(int);
     int	socket(int, int, int);
     int	socketpair(int, int, int, int *);
     
    -#if __BSD_VISIBLE
    +#if __POSIX_VISIBLE >= 202405 || __BSD_VISIBLE
     int	accept4(int, struct sockaddr *__restrict, socklen_t *__restrict, int);
     #endif
     
    diff --git a/usr.bin/fstat/fstat.c b/usr.bin/fstat/fstat.c
    index a74d3a6e916..e4dbc1638c5 100644
    --- a/usr.bin/fstat/fstat.c
    +++ b/usr.bin/fstat/fstat.c
    @@ -482,6 +482,8 @@ vtrans(struct kinfo_file *kf)
     		strlcat(rwep, "w", sizeof rwep);
     	if (kf->fd_ofileflags & UF_EXCLOSE)
     		strlcat(rwep, "e", sizeof rwep);
    +	if (kf->fd_ofileflags & UF_FORKCLOSE)
    +		strlcat(rwep, "f", sizeof rwep);
     	if (kf->fd_ofileflags & UF_PLEDGED)
     		strlcat(rwep, "p", sizeof rwep);
     	printf(" %4s", rwep);
    -- 
    2.49.0
    
    
  • Ricardo Branco:

    [PATCH]: Add POSIX O_CLOFORK flag