Index | Thread | Search

From:
Ricardo Branco <rbranco@suse.de>
Subject:
Re: [PATCH]: Add POSIX O_CLOFORK flag
To:
Theo Buehler <tb@theobuehler.org>
Cc:
Philip Guenther <guenther@gmail.com>, tech@openbsd.org
Date:
Mon, 30 Jun 2025 19:54:19 +0200

Download raw body.

Thread
The attached tarball is the OpenBSD port for the latest illumos-os-tests
containing the previously sent patches.

For completeness I'm attaching the 2 commits to OpenBSD-src

Best,
Ricardo

On 6/30/25 9:23 AM, Ricardo Branco wrote:
> Illumos updated their implementation and also the test code.
>
> https://github.com/illumos/illumos-gate/commit/b3ff81dc6673bee7f291d9d66a832cb3e1004f49 
>
>
> Attached patch to the tests.  This is now simplified and we just call
> fork() instead of simulating Solaris forkx as there's no need for that.
>
> These tests pass.  I think we're good to go.
>
> Best,
> Ricardo
>
> On 6/25/25 12:09 PM, Theo Buehler wrote:
>>> How can I do this?  Extending current tests is not worth it, imo.
>> That would look something like this.
>>
>> Attached is a tarball for a new port, illumos-os-tests.tgz, to be
>> extracted in /usr/ports/devel. It bundles the CDDL and all the stuff
>> beneath os-tests in illumos-gate with the patches from your gist applied
>> into a port installing these files in /usr/local/share/illumos-os-tests.
>>
>> I also attached a package, illumos-os-tests-20250625.tgz, which you can
>> install as root with
>>
>> # env TRUSTED_PKG_PATH=/path/to/tgz pkg_add illumos-os-tests
>>
>> so you don't need to fiddle with the ports tree yourself.
>>
>> This part of the kern_descrip.c diff from your
>> 2bd21db4e48d499abaac013009cda6d0769e0049 doesn't compile:
>>
>>       case F_SETFD:
>>           fdplock(fdp);
>> -        if ((long)SCARG(uap, arg) & 1)
>> -            fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
>> -        else
>> -            fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
>> +        fdp->fd_ofileflags[fd] =
>> +            (fdp->fd_ofileflags[fd] & ~(UF_EXCLOSE | UF_FORKCLOSE)) |
>> +            ((SCARG(uap, arg) & FD_CLOEXEC) ? UF_EXCLOSE : 0) |
>> +            ((SCARG(uap, arg) & FD_CLOFORK) ? UF_FORKCLOSE : 0);
>>           fdpunlock(fdp);
>>           break;
>>
>> /sys/kern/kern_descrip.c:470:25: error: invalid operands to binary 
>> expression ('void *' and 'int')
>>    470 |                     ((SCARG(uap, arg) & FD_CLOEXEC) ? 
>> UF_EXCLOSE : 0) |
>>        |                       ~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
>>
>> I assume the two SCARG should be (long)SCARG to match what was removed.
>>
>>> These tests are more than enough... Still waiting for a response if
>>> they can relicense them to BSD though.
>> Finally, here's a diff for the regress tests. With the (long)SCARG
>> change applied to your diff and with the illumos-os-tests package
>> installed, these tests compile and pass on my amd64 laptop. If the
>> package isn't installed, you get
>>
>> ===> illumos
>> ===> illumos/oclo
>> package illumos-os-tests is required for this regress
>> SKIPPED
>>
>> commit db54671d22ce4adf21cb47b7201bb3633526d24f
>> Author: Theo Buehler <tb@openbsd.org>
>> Date:   Wed Jun 25 11:10:19 2025 +0200
>>
>>      Hook illumos-os-tests for oclo to libc regress
>>           Based on work by Ricardo Branco
>>           Change-Id: I6a6a6964af35f8d5c9eaeb3606b26b88b2ff2d19
>>
>> diff --git a/regress/lib/libc/Makefile b/regress/lib/libc/Makefile
>> index 59d043c62fe..8bdc7809717 100644
>> --- a/regress/lib/libc/Makefile
>> +++ b/regress/lib/libc/Makefile
>> @@ -11,7 +11,7 @@ SUBDIR+= ffs fmemopen fnmatch fpclassify fread
>>   SUBDIR+= gcvt getaddrinfo getcap getopt getopt_long glob
>>   SUBDIR+= hash
>>   SUBDIR+= hsearch
>> -SUBDIR+= ieeefp ifnameindex
>> +SUBDIR+= ieeefp ifnameindex illumos
>>   SUBDIR+= ldexp locale longjmp
>>   SUBDIR+= malloc mkstemp modf
>>   SUBDIR+= netdb
>> diff --git a/regress/lib/libc/illumos/Makefile 
>> b/regress/lib/libc/illumos/Makefile
>> new file mode 100644
>> index 00000000000..7fdceb00349
>> --- /dev/null
>> +++ b/regress/lib/libc/illumos/Makefile
>> @@ -0,0 +1,7 @@
>> +#    $OpenBSD$
>> +
>> +SUBDIR += oclo
>> +
>> +install:
>> +
>> +.include <bsd.subdir.mk>
>> diff --git a/regress/lib/libc/illumos/Makefile.inc 
>> b/regress/lib/libc/illumos/Makefile.inc
>> new file mode 100644
>> index 00000000000..574d9f3d970
>> --- /dev/null
>> +++ b/regress/lib/libc/illumos/Makefile.inc
>> @@ -0,0 +1,7 @@
>> +ILLUMOS_OS_TESTDIR = /usr/local/share/illumos-os-tests
>> +
>> +.if !exists(${ILLUMOS_OS_TESTDIR})
>> +regress:
>> +    @echo package illumos-os-tests is required for this regress
>> +    @echo SKIPPED
>> +.endif
>> diff --git a/regress/lib/libc/illumos/oclo/Makefile 
>> b/regress/lib/libc/illumos/oclo/Makefile
>> new file mode 100644
>> index 00000000000..d808c54ab2a
>> --- /dev/null
>> +++ b/regress/lib/libc/illumos/oclo/Makefile
>> @@ -0,0 +1,14 @@
>> +#    $OpenBSD$
>> +
>> +.if exists(/usr/local/share/illumos-os-tests)
>> +
>> +PROGS =        oclo
>> +PROGS +=    oclo_errors
>> +PROGS +=    ocloexec_verify
>> +
>> +LDADD_ocloexec_verify = -lkvm
>> +
>> +.PATH: /usr/local/share/illumos-os-tests/tests/oclo
>> +.endif
>> +
>> +.include <bsd.regress.mk>
From 7710b3b4eac62d8c301e368a6818f6997ce1eccc Mon Sep 17 00:00:00 2001
From: Ricardo Branco <rbranco@suse.de>
Date: Sun, 22 Jun 2025 12:06:02 +0200
Subject: [PATCH 1/2] kern: Add support for POSIX O_CLOFORK flag

Co-authored-by: Philip Guenther <guenther@gmail.com>
---
 lib/libc/gen/opendir.c    |   4 +-
 lib/libc/gen/shm_open.3   |   5 +-
 lib/libc/gen/shm_open.c   |   5 +-
 lib/libc/stdlib/mkstemp.c |   3 +-
 lib/libc/stdlib/mktemp.3  |  10 ++--
 lib/libc/sys/accept.2     |  24 ++++-----
 lib/libc/sys/dup.2        |  27 +++++-----
 lib/libc/sys/execve.2     |  16 ++++--
 lib/libc/sys/fcntl.2      |  47 ++++++++++++++---
 lib/libc/sys/open.2       |   5 ++
 lib/libc/sys/pipe.2       |  22 ++++----
 lib/libc/sys/socket.2     |   9 ++--
 lib/libc/sys/socketpair.2 |   9 ++--
 lib/libc/sys/w_fcntl.c    |   1 +
 sys/kern/kern_descrip.c   |  43 ++++++++++-----
 sys/kern/kern_exec.c      | 108 ++++++++++++++++++--------------------
 sys/kern/sys_pipe.c       |  11 ++--
 sys/kern/uipc_syscalls.c  |  29 +++++-----
 sys/kern/uipc_usrreq.c    |   2 +
 sys/kern/vfs_syscalls.c   |  24 +++++----
 sys/sys/fcntl.h           |  25 ++++++---
 sys/sys/filedesc.h        |   3 +-
 sys/sys/socket.h          |  10 +++-
 usr.bin/fstat/fstat.c     |   2 +
 24 files changed, 266 insertions(+), 178 deletions(-)

diff --git a/lib/libc/gen/opendir.c b/lib/libc/gen/opendir.c
index ef198924efb..0f09ac875e4 100644
--- a/lib/libc/gen/opendir.c
+++ b/lib/libc/gen/opendir.c
@@ -82,7 +82,9 @@ fdopendir(int fd)
 		 * POSIX doesn't require fdopendir() to set
 		 * FD_CLOEXEC, so it's okay for this to fail.
 		 */
-		(void)fcntl(fd, F_SETFD, FD_CLOEXEC);
+		flags = fcntl(fd, F_GETFD);
+		if (flags != -1 && (flags & FD_CLOEXEC) == 0)
+			(void)fcntl(fd, F_SETFD, flags | FD_CLOEXEC);
 	}
 	return (dirp);
 }
diff --git a/lib/libc/gen/shm_open.3 b/lib/libc/gen/shm_open.3
index 02e3c3aba65..1bbfedc82db 100644
--- a/lib/libc/gen/shm_open.3
+++ b/lib/libc/gen/shm_open.3
@@ -45,7 +45,7 @@ and must include at least
 or
 .Dv O_RDWR
 and may also include a combination of
-.Dv O_CREAT , O_EXCL , O_CLOEXEC , O_NOFOLLOW ,
+.Dv O_CREAT , O_EXCL , O_CLOEXEC , O_CLOFORK , O_NOFOLLOW ,
 or
 .Dv O_TRUNC .
 This implementation forces the
@@ -82,7 +82,8 @@ and
 appear in
 .St -p1003.1-2001 .
 Using
-.Dv O_CLOEXEC
+.Dv O_CLOEXEC ,
+.Dv O_CLOFORK ,
 or
 .Dv O_NOFOLLOW
 with
diff --git a/lib/libc/gen/shm_open.c b/lib/libc/gen/shm_open.c
index 106c7e2261d..c32494d2903 100644
--- a/lib/libc/gen/shm_open.c
+++ b/lib/libc/gen/shm_open.c
@@ -31,8 +31,9 @@
 /* "/tmp/" + sha256 + ".shm" */
 #define SHM_PATH_SIZE (5 + SHA256_DIGEST_STRING_LENGTH + 4)
 
-/* O_CLOEXEC and O_NOFOLLOW are extensions to POSIX */
-#define OK_FLAGS	(O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC | O_NOFOLLOW)
+/* O_CLOEXEC, O_CLOFORK, and O_NOFOLLOW are extensions to POSIX */
+#define OK_FLAGS \
+	(O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC | O_CLOFORK | O_NOFOLLOW)
 
 static void
 makeshmpath(const char *origpath, char *shmpath, size_t len)
diff --git a/lib/libc/stdlib/mkstemp.c b/lib/libc/stdlib/mkstemp.c
index 75a9d27d1a5..fde2d6e06e8 100644
--- a/lib/libc/stdlib/mkstemp.c
+++ b/lib/libc/stdlib/mkstemp.c
@@ -20,7 +20,8 @@
 #include <fcntl.h>
 #include <stdlib.h>
 
-#define MKOSTEMP_FLAGS	(O_APPEND | O_CLOEXEC | O_DSYNC | O_RSYNC | O_SYNC)
+#define MKOSTEMP_FLAGS \
+	(O_APPEND | O_CLOEXEC | O_CLOFORK | O_DSYNC | O_RSYNC | O_SYNC)
 
 static int
 mkstemp_cb(const char *path, int flags)
diff --git a/lib/libc/stdlib/mktemp.3 b/lib/libc/stdlib/mktemp.3
index 83b7c9eb301..bd63f08e5da 100644
--- a/lib/libc/stdlib/mktemp.3
+++ b/lib/libc/stdlib/mktemp.3
@@ -119,6 +119,8 @@ system call:
 Append on each write.
 .It Dv O_CLOEXEC
 Set the close-on-exec flag on the new file descriptor.
+.It Dv O_CLOFORK
+Set the close-on-fork flag on the new file descriptor.
 .It Dv O_SYNC
 Perform synchronous I/O operations.
 .El
@@ -345,18 +347,16 @@ function.
 .Xr tmpnam 3
 .Sh STANDARDS
 The
-.Fn mkdtemp
+.Fn mkdtemp ,
+.Fn mkostemp ,
 and
 .Fn mkstemp
 functions conform to the
-.St -p1003.1-2008
+.St -p1003.1-2024
 specification.
 The ability to specify more than six
 .Em X Ns s
 is an extension to that standard.
-The
-.Fn mkostemp
-function is expected to conform to a future revision of that standard.
 .Pp
 The
 .Fn mktemp
diff --git a/lib/libc/sys/accept.2 b/lib/libc/sys/accept.2
index 7272841b794..04df643b4ad 100644
--- a/lib/libc/sys/accept.2
+++ b/lib/libc/sys/accept.2
@@ -58,19 +58,18 @@ call extracts the first connection request on the queue of pending
 connections, creates a new socket with the same non-blocking I/O mode as
 .Fa s ,
 and allocates a new file descriptor for the socket with the
-close-on-exec flag clear.
+close-on-exec and close-on-fork flags clear.
 .Pp
 The
 .Fn accept4
-system call is similar, however the non-blocking I/O mode of the
-new socket is determined by the
-.Dv SOCK_NONBLOCK
-flag in the
-.Fa flags
-argument and the close-on-exec flag on the new file descriptor is
+system call is similar, however the non-blocking I/O mode,
+close-on-exec flag,
+and close-on-fork flag on the new file descriptor are
 determined by the
-.Dv SOCK_CLOEXEC
-flag in the
+.Dv SOCK_NONBLOCK , SOCK_CLOEXEC ,
+and
+.Dv SOCK_CLOFORK
+flags, respectively, in the
 .Fa flags
 argument.
 .Pp
@@ -204,11 +203,10 @@ is invalid.
 .Sh STANDARDS
 The
 .Fn accept
-function conforms to
-.St -p1003.1-2008 .
-The
+and
 .Fn accept4
-function is expected to conform to a future revision of that standard.
+functions conform to
+.St -p1003.1-2024 .
 .Sh HISTORY
 The
 .Fn accept
diff --git a/lib/libc/sys/dup.2 b/lib/libc/sys/dup.2
index 948f1082a80..8ed985e5d8a 100644
--- a/lib/libc/sys/dup.2
+++ b/lib/libc/sys/dup.2
@@ -86,7 +86,8 @@ object reference to the file must be obtained by issuing an
 additional
 .Xr open 2
 call.
-The close-on-exec flag on the new file descriptor is unset.
+The close-on-exec and close-on-fork flags on the new file descriptor
+are unset.
 .Pp
 In
 .Fn dup2 ,
@@ -101,18 +102,21 @@ When
 equals
 .Fa oldd ,
 .Fn dup2
-just returns without affecting the close-on-exec flag.
+just returns without affecting the close-on-exec or close-on-fork flags.
 .Pp
 In
 .Fn dup3 ,
-both the value of the new descriptor and the close-on-exec flag on
-the new file descriptor are specified:
+the value of the new descriptor and the close-on-exec and close-on-fork
+flags on
+the new file descriptor are all specified:
 .Fa newd
 specifies the value and the
 .Dv O_CLOEXEC
-bit in
+and
+.Dv O_CLOFORK
+bits in
 .Fa flags
-specifies the close-on-exec flag.
+specify the close-on-exec and close-on-forks flag, respectively.
 Unlike
 .Fn dup2 ,
 if
@@ -192,14 +196,13 @@ is invalid.
 .Xr socketpair 2 ,
 .Xr getdtablesize 3
 .Sh STANDARDS
-.Fn dup
-and
-.Fn dup2
-conform to
-.St -p1003.1-2008 .
 The
+.Fn dup ,
+.Fn dup2 ,
+and
 .Fn dup3
-function is expected to conform to a future revision of that standard.
+functions conform to
+.St -p1003.1-2024 .
 .Sh HISTORY
 The
 .Fn dup
diff --git a/lib/libc/sys/execve.2 b/lib/libc/sys/execve.2
index 0d5c1b03f23..54b49996180 100644
--- a/lib/libc/sys/execve.2
+++ b/lib/libc/sys/execve.2
@@ -108,9 +108,10 @@ flag is set (see
 .Xr close 2
 and
 .Xr fcntl 2 ) .
-Descriptors that remain open are unaffected by
-.Fn execve .
-In the case of a new setuid or setgid executable being executed, if
+Other descriptors remain open after
+.Fn execve ,
+however the close-on-fork flag is cleared.
+If
 file descriptors 0, 1, or 2 (representing stdin, stdout, and stderr)
 are currently unallocated, these descriptors will be opened to point to
 some system file like
@@ -329,6 +330,15 @@ The
 .Fn execve
 function first appeared in
 .At v7 .
+.Pp
+In
+.Ox 2.4 ,
+.Fn execve
+started ensuring that file descriptors 0, 1, and 2 are open when
+starting a setuid or setgid process.
+In
+.Ox 7.8
+that was extended to all processes.
 .Sh CAVEATS
 If a program is
 .Em setuid
diff --git a/lib/libc/sys/fcntl.2 b/lib/libc/sys/fcntl.2
index d45896800e3..3a48f3ff6e5 100644
--- a/lib/libc/sys/fcntl.2
+++ b/lib/libc/sys/fcntl.2
@@ -95,22 +95,47 @@ flag associated with the new file descriptor is set, so the file descriptor
 is closed when
 .Xr execve 2
 is called.
+.It Dv F_DUPFD_CLOFORK
+Like
+.Dv F_DUPFD ,
+but the
+.Dv FD_CLOFORK
+flag associated with the new file descriptor is set, so the file descriptor
+is closed when
+.Xr fork 2
+or
+.Xr vfork 2
+is called.
 .It Dv F_GETFD
-Get the close-on-exec flag associated with the file descriptor
+Get the close-on-exec and close-on-fork flags associated with the
+file descriptor
 .Fa fd
 as
-.Dv FD_CLOEXEC .
+.Dv FD_CLOEXEC
+and
+.Dv FD_CLOFORK .
 If the returned value ANDed with
 .Dv FD_CLOEXEC
 is 0,
 the file will remain open across
 .Fn exec ,
 otherwise the file will be closed upon execution of
-.Fn exec
+.Fn exec ;
+if the returned value ANDed with
+.Dv FD_CLOFORK
+is 0,
+the file will remain open across
+.Fn fork
+and
+.Fn vfork ,
+otherwise the file will be closed upon execution of
+.Fn fork
+or
+.Fn vfork
 .Fa ( arg
 is ignored).
 .It Dv F_SETFD
-Set the close-on-exec flag associated with
+Set the close-on-exec and close-on-fork flags associated with
 .Fa fd
 to
 .Fa arg ,
@@ -118,8 +143,10 @@ where
 .Fa arg
 (interpreted as an
 .Vt int )
-is either 0 or
-.Dv FD_CLOEXEC ,
+is the bitwise OR of zero or more of
+.Dv FD_CLOEXEC
+and
+.Dv FD_CLOFORK ,
 as described above.
 .It Dv F_GETFL
 Get file status flags associated with the file descriptor
@@ -392,8 +419,14 @@ as follows:
 A new file descriptor.
 .It Dv F_DUPFD_CLOEXEC
 A new file descriptor.
+.It Dv F_DUPFD_CLOFORK
+A new file descriptor.
 .It Dv F_GETFD
-Value of flag (only the low-order bit is defined).
+Value of file descriptor flags (only the
+.Dv FD_CLOEXEC
+and
+.Dv FD_CLOFORK
+bits are defined).
 .It Dv F_GETFL
 Value of flags.
 .It Dv F_GETOWN
diff --git a/lib/libc/sys/open.2 b/lib/libc/sys/open.2
index c8e056bbd92..69489681811 100644
--- a/lib/libc/sys/open.2
+++ b/lib/libc/sys/open.2
@@ -109,6 +109,11 @@ Set
 .Dv FD_CLOEXEC
 (the close-on-exec flag)
 on the new file descriptor.
+.It Dv O_CLOFORK
+Set
+.Dv FD_CLOFORK
+(the close-on-fork flag)
+on the new file descriptor.
 .It Dv O_DIRECTORY
 Error if
 .Fa path
diff --git a/lib/libc/sys/pipe.2 b/lib/libc/sys/pipe.2
index ca7a2d708a5..79b03c4bd6f 100644
--- a/lib/libc/sys/pipe.2
+++ b/lib/libc/sys/pipe.2
@@ -79,15 +79,14 @@ The
 .Fn pipe2
 function is identical to
 .Fn pipe
-except that the non-blocking I/O mode on both ends of the pipe is
+except that the non-blocking I/O mode,
+close-on-exec flag,
+and close-on-fork flag are
 determined by the
-.Dv O_NONBLOCK
-flag in the
-.Fa flags
-argument and the close-on-exec flag on both the new file descriptors
-is determined by the
-.Dv O_CLOEXEC
-flag in the
+.Dv O_NONBLOCK , O_CLOEXEC ,
+and
+.Dv O_CLOFORK
+flags, respectively, in the
 .Fa flags
 argument.
 .Sh RETURN VALUES
@@ -125,11 +124,10 @@ is invalid.
 .Sh STANDARDS
 The
 .Fn pipe
-function conforms to
-.St -p1003.1-2008 .
-The
+and
 .Fn pipe2
-function is expected to conform to a future revision of that standard.
+functions conform to
+.St -p1003.1-2024 .
 .Pp
 As an extension, the pipe provided is actually capable of moving
 data bidirectionally.
diff --git a/lib/libc/sys/socket.2 b/lib/libc/sys/socket.2
index 89848869c92..ce7fc774ecf 100644
--- a/lib/libc/sys/socket.2
+++ b/lib/libc/sys/socket.2
@@ -103,6 +103,8 @@ argument:
 .Bl -tag -width "SOCK_NONBLOCKX" -offset indent -compact
 .It SOCK_CLOEXEC
 Set close-on-exec flag on the new descriptor.
+.It SOCK_CLOFORK
+Set close-on-fork flag on the new descriptor.
 .It SOCK_NONBLOCK
 Set non-blocking I/O mode on the new socket.
 .It SOCK_DNS
@@ -282,12 +284,7 @@ is denied.
 The
 .Fn socket
 function conforms to
-.St -p1003.1-2008 .
-The
-.Dv SOCK_CLOEXEC
-and
-.Dv SOCK_NONBLOCK
-flags are expected to conform to a future revision of that standard.
+.St -p1003.1-2024 .
 .Pp
 The
 .Dv SOCK_DNS
diff --git a/lib/libc/sys/socketpair.2 b/lib/libc/sys/socketpair.2
index 28225c556d7..c6908d0f609 100644
--- a/lib/libc/sys/socketpair.2
+++ b/lib/libc/sys/socketpair.2
@@ -75,6 +75,8 @@ argument:
 .Bl -tag -width "SOCK_NONBLOCKX" -offset indent -compact
 .It SOCK_CLOEXEC
 Set close-on-exec flag on both the new descriptors.
+.It SOCK_CLOFORK
+Set close-on-fork flag on both the new descriptors.
 .It SOCK_NONBLOCK
 Set non-blocking I/O mode on both the new sockets.
 .El
@@ -113,12 +115,7 @@ process address space.
 The
 .Fn socketpair
 function conforms to
-.St -p1003.1-2008 .
-The
-.Dv SOCK_CLOEXEC
-and
-.Dv SOCK_NONBLOCK
-flags are expected to conform to a future revision of that standard.
+.St -p1003.1-2024 .
 .Sh HISTORY
 The
 .Fn socketpair
diff --git a/lib/libc/sys/w_fcntl.c b/lib/libc/sys/w_fcntl.c
index c30367ad32c..4b7e7c67bc6 100644
--- a/lib/libc/sys/w_fcntl.c
+++ b/lib/libc/sys/w_fcntl.c
@@ -29,6 +29,7 @@ fcntl(int fd, int cmd, ...)
 	switch (cmd) {
 	case F_DUPFD:
 	case F_DUPFD_CLOEXEC:
+	case F_DUPFD_CLOFORK:
 	case F_SETFD:
 	case F_SETFL:
 	case F_SETOWN:
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index 3e57566b820..d138d6fcdd8 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -80,6 +80,7 @@ int dodup3(struct proc *, int, int, int, register_t *);
 
 #define DUPF_CLOEXEC	0x01
 #define DUPF_DUP2	0x02
+#define DUPF_CLOFORK	0x04
 
 struct pool file_pool;
 struct pool fdesc_pool;
@@ -336,7 +337,7 @@ sys_dup3(struct proc *p, void *v, register_t *retval)
 
 	if (SCARG(uap, from) == SCARG(uap, to))
 		return (EINVAL);
-	if (SCARG(uap, flags) & ~O_CLOEXEC)
+	if (SCARG(uap, flags) & ~(O_CLOEXEC | O_CLOFORK))
 		return (EINVAL);
 	return (dodup3(p, SCARG(uap, from), SCARG(uap, to),
 	    SCARG(uap, flags), retval));
@@ -388,6 +389,8 @@ restart:
 	dupflags = DUPF_DUP2;
 	if (flags & O_CLOEXEC)
 		dupflags |= DUPF_CLOEXEC;
+	if (flags & O_CLOFORK)
+		dupflags |= DUPF_CLOFORK;
 
 	/* No need for FRELE(), finishdup() uses current ref. */
 	return (finishdup(p, fp, old, new, retval, dupflags));
@@ -423,6 +426,7 @@ restart:
 
 	case F_DUPFD:
 	case F_DUPFD_CLOEXEC:
+	case F_DUPFD_CLOFORK:
 		newmin = (long)SCARG(uap, arg);
 		if ((u_int)newmin >= lim_cur(RLIMIT_NOFILE) ||
 		    (u_int)newmin >= atomic_load_int(&maxfiles)) {
@@ -444,6 +448,8 @@ restart:
 
 			if (SCARG(uap, cmd) == F_DUPFD_CLOEXEC)
 				dupflags |= DUPF_CLOEXEC;
+			if (SCARG(uap, cmd) == F_DUPFD_CLOFORK)
+				dupflags |= DUPF_CLOFORK;
 
 			/* No need for FRELE(), finishdup() uses current ref. */
 			error = finishdup(p, fp, fd, i, retval, dupflags);
@@ -452,16 +458,17 @@ restart:
 
 	case F_GETFD:
 		fdplock(fdp);
-		*retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0;
+		*retval = (fdp->fd_ofileflags[fd] & UF_EXCLOSE   ? FD_CLOEXEC : 0)
+			| (fdp->fd_ofileflags[fd] & UF_FORKCLOSE ? FD_CLOFORK : 0);
 		fdpunlock(fdp);
 		break;
 
 	case F_SETFD:
 		fdplock(fdp);
-		if ((long)SCARG(uap, arg) & 1)
-			fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
-		else
-			fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
+		fdp->fd_ofileflags[fd] =
+		    (fdp->fd_ofileflags[fd] & ~(UF_EXCLOSE | UF_FORKCLOSE)) |
+		    (((long)SCARG(uap, arg) & FD_CLOEXEC) ? UF_EXCLOSE : 0) |
+		    (((long)SCARG(uap, arg) & FD_CLOFORK) ? UF_FORKCLOSE : 0);
 		fdpunlock(fdp);
 		break;
 
@@ -667,9 +674,12 @@ finishdup(struct proc *p, struct file *fp, int old, int new,
 	fdp->fd_ofiles[new] = fp;
 	mtx_leave(&fdp->fd_fplock);
 
-	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE;
+	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &
+	    ~(UF_EXCLOSE | UF_FORKCLOSE);
 	if (dupflags & DUPF_CLOEXEC)
 		fdp->fd_ofileflags[new] |= UF_EXCLOSE;
+	if (dupflags & DUPF_CLOFORK)
+		fdp->fd_ofileflags[new] |= UF_FORKCLOSE;
 	*retval = new;
 
 	if (oldfp != NULL) {
@@ -711,7 +721,7 @@ fdinsert(struct filedesc *fdp, int fd, int flags, struct file *fp)
 	fdp->fd_ofiles[fd] = fp;
 	mtx_leave(&fdp->fd_fplock);
 
-	fdp->fd_ofileflags[fd] |= (flags & UF_EXCLOSE);
+	fdp->fd_ofileflags[fd] |= (flags & (UF_EXCLOSE | UF_FORKCLOSE));
 }
 
 void
@@ -1141,7 +1151,9 @@ fdcopy(struct process *pr)
 		struct file *fp = fdp->fd_ofiles[i];
 
 		if (fp != NULL) {
+			int fileflags = fdp->fd_ofileflags[i];
 			/*
+			 * If the UF_FORKCLOSE flag is set, skip the fd.
 			 * XXX Gruesome hack. If count gets too high, fail
 			 * to copy an fd, since fdcopy()'s callers do not
 			 * permit it to indicate failure yet.
@@ -1150,6 +1162,7 @@ fdcopy(struct process *pr)
 			 * their internal consistency, so close them here.
 			 */
 			if (fp->f_count >= FDUP_MAX_COUNT ||
+			    (fileflags & UF_FORKCLOSE) ||
 			    fp->f_type == DTYPE_KQUEUE) {
 				if (i < newfdp->fd_freefile)
 					newfdp->fd_freefile = i;
@@ -1158,7 +1171,7 @@ fdcopy(struct process *pr)
 
 			FREF(fp);
 			newfdp->fd_ofiles[i] = fp;
-			newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i];
+			newfdp->fd_ofileflags[i] = fileflags;
 			fd_used(newfdp, i);
 		}
 	}
@@ -1407,24 +1420,26 @@ dupfdopen(struct proc *p, int indx, int mode)
 	fdp->fd_ofiles[indx] = wfp;
 	mtx_leave(&fdp->fd_fplock);
 
-	fdp->fd_ofileflags[indx] = (fdp->fd_ofileflags[indx] & UF_EXCLOSE) |
-	    (fdp->fd_ofileflags[dupfd] & ~UF_EXCLOSE);
+	fdp->fd_ofileflags[indx] =
+	    (fdp->fd_ofileflags[indx] & (UF_EXCLOSE | UF_FORKCLOSE)) |
+	    (fdp->fd_ofileflags[dupfd] & ~(UF_EXCLOSE | UF_FORKCLOSE));
 
 	return (0);
 }
 
 /*
- * Close any files on exec?
+ * Doing an exec, so handle fd flags: do close-on-exec and clear
+ * pledged and close-on-fork
  */
 void
-fdcloseexec(struct proc *p)
+fdprepforexec(struct proc *p)
 {
 	struct filedesc *fdp = p->p_fd;
 	int fd;
 
 	fdplock(fdp);
 	for (fd = 0; fd <= fdp->fd_lastfile; fd++) {
-		fdp->fd_ofileflags[fd] &= ~UF_PLEDGED;
+		fdp->fd_ofileflags[fd] &= ~(UF_PLEDGED | UF_FORKCLOSE);
 		if (fdp->fd_ofileflags[fd] & UF_EXCLOSE) {
 			/* fdrelease() unlocks fdp. */
 			(void) fdrelease(p, fd);
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 86e5b078bfd..dab621ac3ca 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -272,6 +272,7 @@ sys_execve(struct proc *p, void *v, register_t *retval)
 	struct ps_strings arginfo;
 	struct vmspace *vm = p->p_vmspace;
 	struct vnode *otvp;
+	int i;
 
 	/*
 	 * Get other threads to stop, if contested return ERESTART,
@@ -531,7 +532,7 @@ sys_execve(struct proc *p, void *v, register_t *retval)
 	}
 
 	stopprofclock(pr);	/* stop profiling */
-	fdcloseexec(p);		/* handle close on exec */
+	fdprepforexec(p);	/* handle close on exec and close on fork */
 	execsigs(p);		/* reset caught signals */
 	TCB_SET(p, NULL);	/* reset the TCB address */
 	pr->ps_kbind_addr = 0;	/* reset the kbind bits */
@@ -601,8 +602,6 @@ sys_execve(struct proc *p, void *v, register_t *retval)
 	 * MNT_NOEXEC has already been used to disable s[ug]id.
 	 */
 	if ((attr.va_mode & (VSUID | VSGID)) && proc_cansugid(p)) {
-		int i;
-
 		atomic_setbits_int(&pr->ps_flags, PS_SUGID|PS_SUGIDEXEC);
 
 #ifdef KTRACE
@@ -618,66 +617,63 @@ sys_execve(struct proc *p, void *v, register_t *retval)
 			cred->cr_uid = attr.va_uid;
 		if (attr.va_mode & VSGID)
 			cred->cr_gid = attr.va_gid;
+	} else
+		atomic_clearbits_int(&pr->ps_flags, PS_SUGID);
+
+	/*
+	 * A few caveats apply to stdin, stdout, and stderr.
+	 */
+	fdplock(p->p_fd);
+	for (i = 0; i < 3; i++) {
+		struct file *fp = NULL;
 
 		/*
-		 * For set[ug]id processes, a few caveats apply to
-		 * stdin, stdout, and stderr.
+		 * NOTE - This will never return NULL because of immature fds
+		 * since only kernel-threads share the file descriptor table.
 		 */
-		error = 0;
-		fdplock(p->p_fd);
-		for (i = 0; i < 3; i++) {
-			struct file *fp = NULL;
-
-			/*
-			 * NOTE - This will never return NULL because of
-			 * immature fds. The file descriptor table is not
-			 * shared because we're suid.
-			 */
-			fp = fd_getfile(p->p_fd, i);
-
-			/*
-			 * Ensure that stdin, stdout, and stderr are already
-			 * allocated.  We do not want userland to accidentally
-			 * allocate descriptors in this range which has implied
-			 * meaning to libc.
-			 */
-			if (fp == NULL) {
-				short flags = FREAD | (i == 0 ? 0 : FWRITE);
-				struct vnode *vp;
-				int indx;
-
-				if ((error = falloc(p, &fp, &indx)) != 0)
-					break;
+		fp = fd_getfile(p->p_fd, i);
+
+		/*
+		 * Ensure that stdin, stdout, and stderr are already
+		 * allocated.  We do not want userland to accidentally
+		 * allocate descriptors in this range which has implied
+		 * meaning to libc.
+		 */
+		if (fp == NULL) {
+			short flags = FREAD | (i == 0 ? 0 : FWRITE);
+			struct vnode *vp;
+			int indx;
+
+			if ((error = falloc(p, &fp, &indx)) != 0)
+				break;
 #ifdef DIAGNOSTIC
-				if (indx != i)
-					panic("sys_execve: falloc indx != i");
+			if (indx != i)
+				panic("sys_execve: falloc indx != i");
 #endif
-				if ((error = cdevvp(getnulldev(), &vp)) != 0) {
-					fdremove(p->p_fd, indx);
-					closef(fp, p);
-					break;
-				}
-				if ((error = VOP_OPEN(vp, flags, cred, p)) != 0) {
-					fdremove(p->p_fd, indx);
-					closef(fp, p);
-					vrele(vp);
-					break;
-				}
-				if (flags & FWRITE)
-					vp->v_writecount++;
-				fp->f_flag = flags;
-				fp->f_type = DTYPE_VNODE;
-				fp->f_ops = &vnops;
-				fp->f_data = (caddr_t)vp;
-				fdinsert(p->p_fd, indx, 0, fp);
+			if ((error = cdevvp(getnulldev(), &vp)) != 0) {
+				fdremove(p->p_fd, indx);
+				closef(fp, p);
+				break;
+			}
+			if ((error = VOP_OPEN(vp, flags, cred, p)) != 0) {
+				fdremove(p->p_fd, indx);
+				closef(fp, p);
+				vrele(vp);
+				break;
 			}
-			FRELE(fp, p);
+			if (flags & FWRITE)
+				vp->v_writecount++;
+			fp->f_flag = flags;
+			fp->f_type = DTYPE_VNODE;
+			fp->f_ops = &vnops;
+			fp->f_data = (caddr_t)vp;
+			fdinsert(p->p_fd, indx, 0, fp);
 		}
-		fdpunlock(p->p_fd);
-		if (error)
-			goto exec_abort;
-	} else
-		atomic_clearbits_int(&pr->ps_flags, PS_SUGID);
+		FRELE(fp, p);
+	}
+	fdpunlock(p->p_fd);
+	if (error)
+		goto exec_abort;
 
 	/*
 	 * Reset the saved ugids and update the process's copy of the
diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c
index 12254a052da..c2c119dd5f3 100644
--- a/sys/kern/sys_pipe.c
+++ b/sys/kern/sys_pipe.c
@@ -162,7 +162,7 @@ sys_pipe2(struct proc *p, void *v, register_t *retval)
 		syscallarg(int) flags;
 	} */ *uap = v;
 
-	if (SCARG(uap, flags) & ~(O_CLOEXEC | FNONBLOCK))
+	if (SCARG(uap, flags) & ~(O_CLOEXEC | O_CLOFORK | FNONBLOCK))
 		return (EINVAL);
 
 	return (dopipe(p, SCARG(uap, fdp), SCARG(uap, flags)));
@@ -175,9 +175,10 @@ dopipe(struct proc *p, int *ufds, int flags)
 	struct file *rf, *wf;
 	struct pipe_pair *pp;
 	struct pipe *rpipe, *wpipe = NULL;
-	int fds[2], cloexec, error;
+	int fds[2], fdflags, error;
 
-	cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
+	fdflags = ((flags & O_CLOEXEC) ? UF_EXCLOSE : 0)
+	    | ((flags & O_CLOFORK) ? UF_FORKCLOSE : 0);
 
 	pp = pipe_pair_create();
 	if (pp == NULL)
@@ -203,8 +204,8 @@ dopipe(struct proc *p, int *ufds, int flags)
 	wf->f_data = wpipe;
 	wf->f_ops = &pipeops;
 
-	fdinsert(fdp, fds[0], cloexec, rf);
-	fdinsert(fdp, fds[1], cloexec, wf);
+	fdinsert(fdp, fds[0], fdflags, rf);
+	fdinsert(fdp, fds[1], fdflags, wf);
 
 	error = copyout(fds, ufds, sizeof(fds));
 	if (error == 0) {
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index 7a93c571a29..21f3c7794b1 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -81,7 +81,7 @@ sys_socket(struct proc *p, void *v, register_t *retval)
 	struct file *fp;
 	int type = SCARG(uap, type);
 	int domain = SCARG(uap, domain);
-	int fd, cloexec, nonblock, fflag, error;
+	int fd, fdflags, nonblock, fflag, error;
 	unsigned int ss = 0;
 
 	if ((type & SOCK_DNS) && !(domain == AF_INET || domain == AF_INET6))
@@ -93,8 +93,9 @@ sys_socket(struct proc *p, void *v, register_t *retval)
 	if (error)
 		return (error);
 
-	type &= ~(SOCK_CLOEXEC | SOCK_NONBLOCK | SOCK_DNS);
-	cloexec = (SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
+	type &= ~(SOCK_CLOEXEC | SOCK_CLOFORK | SOCK_NONBLOCK | SOCK_DNS);
+	fdflags = ((SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0)
+	    | ((SCARG(uap, type) & SOCK_CLOFORK) ? UF_FORKCLOSE : 0);
 	nonblock = SCARG(uap, type) & SOCK_NONBLOCK;
 	fflag = FREAD | FWRITE | (nonblock ? FNONBLOCK : 0);
 
@@ -113,7 +114,7 @@ sys_socket(struct proc *p, void *v, register_t *retval)
 		fp->f_ops = &socketops;
 		so->so_state |= ss;
 		fp->f_data = so;
-		fdinsert(fdp, fd, cloexec, fp);
+		fdinsert(fdp, fd, fdflags, fp);
 		fdpunlock(fdp);
 		FRELE(fp, p);
 		*retval = fd;
@@ -240,7 +241,7 @@ sys_accept4(struct proc *p, void *v, register_t *retval)
 		syscallarg(socklen_t *) int flags;
 	} */ *uap = v;
 
-	if (SCARG(uap, flags) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+	if (SCARG(uap, flags) & ~(SOCK_CLOEXEC | SOCK_CLOFORK | SOCK_NONBLOCK))
 		return (EINVAL);
 
 	return (doaccept(p, SCARG(uap, s), SCARG(uap, name),
@@ -257,9 +258,10 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen,
 	socklen_t namelen;
 	int error, tmpfd;
 	struct socket *head, *so;
-	int cloexec, nflag;
+	int fdflags, nflag;
 
-	cloexec = (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
+	fdflags = ((flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0)
+	    | ((flags & SOCK_CLOFORK) ? UF_FORKCLOSE : 0);
 
 	if (name && (error = copyin(anamelen, &namelen, sizeof (namelen))))
 		return (error);
@@ -346,7 +348,7 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen,
 	}
 
 	fdplock(fdp);
-	fdinsert(fdp, tmpfd, cloexec, fp);
+	fdinsert(fdp, tmpfd, fdflags, fp);
 	fdpunlock(fdp);
 	FRELE(fp, p);
 	*retval = tmpfd;
@@ -457,10 +459,11 @@ sys_socketpair(struct proc *p, void *v, register_t *retval)
 	struct filedesc *fdp = p->p_fd;
 	struct file *fp1 = NULL, *fp2 = NULL;
 	struct socket *so1, *so2;
-	int type, cloexec, nonblock, fflag, error, sv[2];
+	int type, fdflags, nonblock, fflag, error, sv[2];
 
-	type  = SCARG(uap, type) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK);
-	cloexec = (SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
+	type  = SCARG(uap, type) & ~(SOCK_CLOEXEC | SOCK_CLOFORK | SOCK_NONBLOCK);
+	fdflags = ((SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0)
+	    | ((SCARG(uap, type) & SOCK_CLOFORK) ? UF_FORKCLOSE : 0);
 	nonblock = SCARG(uap, type) & SOCK_NONBLOCK;
 	fflag = FREAD | FWRITE | (nonblock ? FNONBLOCK : 0);
 
@@ -498,8 +501,8 @@ sys_socketpair(struct proc *p, void *v, register_t *retval)
 	fp2->f_data = so2;
 	error = copyout(sv, SCARG(uap, rsv), 2 * sizeof (int));
 	if (error == 0) {
-		fdinsert(fdp, sv[0], cloexec, fp1);
-		fdinsert(fdp, sv[1], cloexec, fp2);
+		fdinsert(fdp, sv[0], fdflags, fp1);
+		fdinsert(fdp, sv[1], fdflags, fp2);
 		fdpunlock(fdp);
 #ifdef KTRACE
 		if (KTRPOINT(p, KTR_STRUCT))
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index f50a040d1e8..3bb90b33b4c 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -1146,6 +1146,8 @@ restart:
 		fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED);
 		if (flags & MSG_CMSG_CLOEXEC)
 			fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE;
+		if (flags & MSG_CMSG_CLOFORK)
+			fdp->fd_ofileflags[fds[i]] |= UF_FORKCLOSE;
 
 		rp++;
 	}
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 1f5731712a0..7a6ff816bb6 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -1086,7 +1086,7 @@ doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode,
 	struct file *fp;
 	struct vnode *vp;
 	struct vattr vattr;
-	int flags, cloexec, cmode;
+	int flags, fdflags, cmode;
 	int type, indx, error, localtrunc = 0;
 	struct flock lf;
 	struct nameidata nd;
@@ -1099,7 +1099,8 @@ doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode,
 			return (error);
 	}
 
-	cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
+	fdflags = ((oflags & O_CLOEXEC) ? UF_EXCLOSE : 0)
+	    | ((oflags & O_CLOFORK) ? UF_FORKCLOSE : 0);
 
 	fdplock(fdp);
 	if ((error = falloc(p, &fp, &indx)) != 0) {
@@ -1200,7 +1201,7 @@ doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode,
 	KERNEL_UNLOCK();
 	*retval = indx;
 	fdplock(fdp);
-	fdinsert(fdp, indx, cloexec, fp);
+	fdinsert(fdp, indx, fdflags, fp);
 	fdpunlock(fdp);
 	FRELE(fp, p);
 	return (error);
@@ -1224,7 +1225,7 @@ sys___tmpfd(struct proc *p, void *v, register_t *retval)
 	struct file *fp;
 	struct vnode *vp;
 	int oflags = SCARG(uap, flags);
-	int flags, cloexec, cmode;
+	int flags, fdflags, cmode;
 	int indx, error;
 	unsigned int i;
 	struct nameidata nd;
@@ -1232,9 +1233,11 @@ sys___tmpfd(struct proc *p, void *v, register_t *retval)
 	static const char *letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-";
 
 	/* most flags are hardwired */
-	oflags = O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW | (oflags & O_CLOEXEC);
+	oflags = O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW |
+	    (oflags & (O_CLOEXEC | O_CLOFORK));
 
-	cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
+	fdflags = ((oflags & O_CLOEXEC) ? UF_EXCLOSE : 0)
+	    | ((oflags & O_CLOFORK) ? UF_FORKCLOSE : 0);
 
 	fdplock(fdp);
 	if ((error = falloc(p, &fp, &indx)) != 0) {
@@ -1270,7 +1273,7 @@ sys___tmpfd(struct proc *p, void *v, register_t *retval)
 	VOP_UNLOCK(vp);
 	*retval = indx;
 	fdplock(fdp);
-	fdinsert(fdp, indx, cloexec, fp);
+	fdinsert(fdp, indx, fdflags, fp);
 	fdpunlock(fdp);
 	FRELE(fp, p);
 
@@ -1352,7 +1355,7 @@ sys_fhopen(struct proc *p, void *v, register_t *retval)
 	struct vnode *vp = NULL;
 	struct mount *mp;
 	struct ucred *cred = p->p_ucred;
-	int flags, cloexec;
+	int flags, fdflags;
 	int type, indx, error=0;
 	struct flock lf;
 	struct vattr va;
@@ -1370,7 +1373,8 @@ sys_fhopen(struct proc *p, void *v, register_t *retval)
 	if ((flags & O_CREAT))
 		return (EINVAL);
 
-	cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
+	fdflags = ((flags & O_CLOEXEC) ? UF_EXCLOSE : 0)
+	    | ((flags & O_CLOFORK) ? UF_FORKCLOSE : 0);
 
 	fdplock(fdp);
 	if ((error = falloc(p, &fp, &indx)) != 0) {
@@ -1456,7 +1460,7 @@ sys_fhopen(struct proc *p, void *v, register_t *retval)
 	VOP_UNLOCK(vp);
 	*retval = indx;
 	fdplock(fdp);
-	fdinsert(fdp, indx, cloexec, fp);
+	fdinsert(fdp, indx, fdflags, fp);
 	fdpunlock(fdp);
 	FRELE(fp, p);
 	return (0);
diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h
index e964ea49dde..33a7f32bc6c 100644
--- a/sys/sys/fcntl.h
+++ b/sys/sys/fcntl.h
@@ -83,22 +83,24 @@
 #define	O_EXLOCK	0x0020		/* open with exclusive file lock */
 #define	O_ASYNC		0x0040		/* signal pgrp when data ready */
 #define	O_FSYNC		0x0080		/* backwards compatibility */
-#define	O_NOFOLLOW	0x0100		/* if path is a symlink, don't follow */
 #endif
 #if __POSIX_VISIBLE >= 199309 || __XPG_VISIBLE >= 420
 #define	O_SYNC		0x0080		/* synchronous writes */
-#endif
-#define	O_CREAT		0x0200		/* create if nonexistent */
-#define	O_TRUNC		0x0400		/* truncate to zero length */
-#define	O_EXCL		0x0800		/* error if already exists */
-
 /*
- * POSIX 1003.1 specifies a higher granularity for synchronous operations
+ * POSIX 1003.1 permits a higher granularity for synchronous operations
  * than we support.  Since synchronicity is all or nothing in OpenBSD
  * we just define these to be the same as O_SYNC.
  */
 #define	O_DSYNC		O_SYNC		/* synchronous data writes */
 #define	O_RSYNC		O_SYNC		/* synchronous reads */
+#endif
+
+/* defined by POSIX Issue 7 */
+#define	O_NOFOLLOW	0x0100		/* if path is a symlink, don't follow */
+
+#define	O_CREAT		0x0200		/* create if nonexistent */
+#define	O_TRUNC		0x0400		/* truncate to zero length */
+#define	O_EXCL		0x0800		/* error if already exists */
 
 /* defined by POSIX 1003.1; BSD default, this bit is not required */
 #define	O_NOCTTY	0x8000		/* don't assign controlling terminal */
@@ -107,6 +109,9 @@
 #define	O_CLOEXEC	0x10000		/* atomically set FD_CLOEXEC */
 #define	O_DIRECTORY	0x20000		/* fail if not a directory */
 
+/* defined by POSIX Issue 8 */
+#define	O_CLOFORK	0x40000		/* atomically set FD_CLOFORK */
+
 #ifdef _KERNEL
 /*
  * convert from open() flags to/from fflags; convert O_RD/WR to FREAD/FWRITE.
@@ -158,9 +163,15 @@
 #if __BSD_VISIBLE
 #define F_ISATTY	11		/* used by isatty(3) */
 #endif
+#if __POSIX_VISIBLE >= 202405
+#define	F_DUPFD_CLOFORK	12		/* duplicate with FD_CLOFORK set */
+#endif
 
 /* file descriptor flags (F_GETFD, F_SETFD) */
 #define	FD_CLOEXEC	1		/* close-on-exec flag */
+#if __POSIX_VISIBLE >= 202405
+#define	FD_CLOFORK	4		/* close-on-fork flag */
+#endif
 
 /* record locking flags (F_GETLK, F_SETLK, F_SETLKW) */
 #define	F_RDLCK		1		/* shared or read lock */
diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h
index 50bc7734a02..6302cbeb793 100644
--- a/sys/sys/filedesc.h
+++ b/sys/sys/filedesc.h
@@ -115,6 +115,7 @@ struct filedesc0 {
  */
 #define	UF_EXCLOSE 	0x01		/* auto-close on exec */
 #define	UF_PLEDGED 	0x02		/* open after pledge(2) */
+#define	UF_FORKCLOSE 	0x04		/* auto-close on fork */
 
 /*
  * Flags on the file descriptor table.
@@ -143,7 +144,7 @@ void	fdfree(struct proc *p);
 int	fdrelease(struct proc *p, int);
 void	fdinsert(struct filedesc *, int, int, struct file *);
 void	fdremove(struct filedesc *, int);
-void	fdcloseexec(struct proc *);
+void	fdprepforexec(struct proc *);
 struct file *fd_iterfile(struct file *, struct proc *);
 struct file *fd_getfile(struct filedesc *, int);
 struct file *fd_getfile_mode(struct filedesc *, int, int);
diff --git a/sys/sys/socket.h b/sys/sys/socket.h
index 4fd50d29274..e0635bd3656 100644
--- a/sys/sys/socket.h
+++ b/sys/sys/socket.h
@@ -72,14 +72,19 @@ typedef	__sa_family_t	sa_family_t;	/* sockaddr address family type */
 /*
  * Socket creation flags
  */
-#if __BSD_VISIBLE
+#if __POSIX_VISIBLE >= 202405 || __BSD_VISIBLE
 #define	SOCK_CLOEXEC		0x8000	/* set FD_CLOEXEC */
 #define	SOCK_NONBLOCK		0x4000	/* set O_NONBLOCK */
+#endif
+#if __BSD_VISIBLE
 #ifdef _KERNEL
 #define	SOCK_NONBLOCK_INHERIT	0x2000	/* inherit O_NONBLOCK from listener */
 #endif
 #define	SOCK_DNS		0x1000	/* set SS_DNS */
 #endif /* __BSD_VISIBLE */
+#if __POSIX_VISIBLE >= 202405
+#define	SOCK_CLOFORK		0x0800	/* set FD_CLOFORK */
+#endif
 
 /*
  * Option flags per-socket.
@@ -511,6 +516,7 @@ struct timespec;
 #define	MSG_NOSIGNAL		0x400	/* do not send SIGPIPE */
 #define	MSG_CMSG_CLOEXEC	0x800	/* set FD_CLOEXEC on received fds */
 #define	MSG_WAITFORONE		0x1000	/* nonblocking but wait for one msg */
+#define	MSG_CMSG_CLOFORK	0x2000	/* set FD_CLOFORK on received fds */
 
 /*
  * Header for ancillary data objects in msg_control buffer.
@@ -586,7 +592,7 @@ int	sockatmark(int);
 int	socket(int, int, int);
 int	socketpair(int, int, int, int *);
 
-#if __BSD_VISIBLE
+#if __POSIX_VISIBLE >= 202405 || __BSD_VISIBLE
 int	accept4(int, struct sockaddr *__restrict, socklen_t *__restrict, int);
 #endif
 
diff --git a/usr.bin/fstat/fstat.c b/usr.bin/fstat/fstat.c
index a74d3a6e916..e4dbc1638c5 100644
--- a/usr.bin/fstat/fstat.c
+++ b/usr.bin/fstat/fstat.c
@@ -482,6 +482,8 @@ vtrans(struct kinfo_file *kf)
 		strlcat(rwep, "w", sizeof rwep);
 	if (kf->fd_ofileflags & UF_EXCLOSE)
 		strlcat(rwep, "e", sizeof rwep);
+	if (kf->fd_ofileflags & UF_FORKCLOSE)
+		strlcat(rwep, "f", sizeof rwep);
 	if (kf->fd_ofileflags & UF_PLEDGED)
 		strlcat(rwep, "p", sizeof rwep);
 	printf(" %4s", rwep);
-- 
2.49.0

From 740c517dfedd264ebf352a710b2de359f551ed2b Mon Sep 17 00:00:00 2001
From: Ricardo Branco <rbranco@suse.de>
Date: Wed, 25 Jun 2025 13:02:51 +0200
Subject: [PATCH 2/2] Hook illumos-os-tests for oclo to libc regress

Co-authored-by: Theo Buehler <tb@openbsd.org>
---
 regress/lib/libc/Makefile              |  2 +-
 regress/lib/libc/illumos/Makefile      |  7 +++++++
 regress/lib/libc/illumos/Makefile.inc  |  7 +++++++
 regress/lib/libc/illumos/oclo/Makefile | 14 ++++++++++++++
 4 files changed, 29 insertions(+), 1 deletion(-)
 create mode 100644 regress/lib/libc/illumos/Makefile
 create mode 100644 regress/lib/libc/illumos/Makefile.inc
 create mode 100644 regress/lib/libc/illumos/oclo/Makefile

diff --git a/regress/lib/libc/Makefile b/regress/lib/libc/Makefile
index 59d043c62fe..8bdc7809717 100644
--- a/regress/lib/libc/Makefile
+++ b/regress/lib/libc/Makefile
@@ -11,7 +11,7 @@ SUBDIR+= ffs fmemopen fnmatch fpclassify fread
 SUBDIR+= gcvt getaddrinfo getcap getopt getopt_long glob
 SUBDIR+= hash
 SUBDIR+= hsearch
-SUBDIR+= ieeefp ifnameindex
+SUBDIR+= ieeefp ifnameindex illumos
 SUBDIR+= ldexp locale longjmp
 SUBDIR+= malloc mkstemp modf
 SUBDIR+= netdb
diff --git a/regress/lib/libc/illumos/Makefile b/regress/lib/libc/illumos/Makefile
new file mode 100644
index 00000000000..7fdceb00349
--- /dev/null
+++ b/regress/lib/libc/illumos/Makefile
@@ -0,0 +1,7 @@
+#	$OpenBSD$
+
+SUBDIR += oclo
+
+install:
+
+.include <bsd.subdir.mk>
diff --git a/regress/lib/libc/illumos/Makefile.inc b/regress/lib/libc/illumos/Makefile.inc
new file mode 100644
index 00000000000..574d9f3d970
--- /dev/null
+++ b/regress/lib/libc/illumos/Makefile.inc
@@ -0,0 +1,7 @@
+ILLUMOS_OS_TESTDIR = /usr/local/share/illumos-os-tests
+
+.if !exists(${ILLUMOS_OS_TESTDIR})
+regress:
+	@echo package illumos-os-tests is required for this regress
+	@echo SKIPPED
+.endif
diff --git a/regress/lib/libc/illumos/oclo/Makefile b/regress/lib/libc/illumos/oclo/Makefile
new file mode 100644
index 00000000000..d808c54ab2a
--- /dev/null
+++ b/regress/lib/libc/illumos/oclo/Makefile
@@ -0,0 +1,14 @@
+#	$OpenBSD$
+
+.if exists(/usr/local/share/illumos-os-tests)
+
+PROGS =		oclo
+PROGS +=	oclo_errors
+PROGS +=	ocloexec_verify
+
+LDADD_ocloexec_verify = -lkvm
+
+.PATH: /usr/local/share/illumos-os-tests/tests/oclo
+.endif
+
+.include <bsd.regress.mk>
-- 
2.49.0