Index | Thread | Search

From:
Job Snijders <job@bsd.nl>
Subject:
pax: add write_opt=nouidgid option for tar/ustar formats
To:
tech@openbsd.org
Date:
Mon, 5 Jan 2026 21:38:28 +0000

Download raw body.

Thread
For certain archive production operations, when writing archives, I
consider it very helpful to normalize (zeroize) some aspects of the
tar/ustar header on the fly - to avoid having to do "chown -R ..." as an
extra step beforehand, especially when dealing with massive file system
hierarchies.

On other systems, for example, Debian, to zeroize the user ID & group ID
I use a '-M' option to achieve the desired normalization, example:

	$ ls -1 | pax -w -x ustar -M dist -f ./archive.tar

Their pax man page lists wide range of normalization options via -M:
https://github.com/MirBSD/mircpio/blob/master/pax.1#L680-L730
That pax utility seems to be a fork of 'our' pax implementation.
However, I'm not so sure it would be worth our while to try to pull in
all the permutations their -M option offers.

I thought that instead of implementing (a subset of) '-M', a smaller
diff along the lines of the existing 'write_opt=nodir' option would be
better.

OK? Thoughts?

Index: pax.1
===================================================================
RCS file: /cvs/src/bin/pax/pax.1,v
diff -u -p -r1.80 pax.1
--- pax.1	30 Nov 2024 06:59:12 -0000	1.80
+++ pax.1	5 Jan 2026 20:51:38 -0000
@@ -456,7 +456,7 @@ In general,
 take the form:
 .Ar name Ns = Ns Ar value .
 .Pp
-The following options are available for the
+The following normalization options are available for the
 .Cm ustar
 and old
 .Bx
@@ -466,6 +466,8 @@ formats:
 .Bl -tag -width Ds -compact
 .It Cm write_opt=nodir
 When writing archives, omit the storage of directories.
+.It Cm write_opt=nouidgid
+When writing archives, do not preserve the user ID and group ID.
 .El
 .It Fl P
 Do not follow symbolic links, perform a physical file system traversal.
Index: tar.c
===================================================================
RCS file: /cvs/src/bin/pax/tar.c,v
diff -u -p -r1.87 tar.c
--- tar.c	6 Jul 2025 19:25:51 -0000	1.87
+++ tar.c	5 Jan 2026 20:51:38 -0000
@@ -86,6 +86,7 @@ static gid_t gid_warn;
  */
 
 int tar_nodir;				/* do not write dirs under old tar */
+int tar_nouidgid;			/* do not preserve ownership */
 char *gnu_name_string;			/* GNU ././@LongLink hackery name */
 char *gnu_link_string;			/* GNU ././@LongLink hackery link */
 
@@ -360,26 +361,33 @@ tar_opt(void)
 	OPLIST *opt;
 
 	while ((opt = opt_next()) != NULL) {
-		if (strcmp(opt->name, TAR_OPTION) ||
-		    strcmp(opt->value, TAR_NODIR)) {
-			paxwarn(1, "Unknown tar format -o option/value pair %s=%s",
-			    opt->name, opt->value);
-			paxwarn(1,"%s=%s is the only supported tar format option",
-			    TAR_OPTION, TAR_NODIR);
-			return(-1);
-		}
-
 		/*
-		 * we only support one option, and only when writing
+		 * Only two options are supported, and only when writing.
 		 */
+		if (strcmp(opt->name, TAR_OPTION) != 0) {
+			paxwarn(1, "Unknown tar format -o option/value pair "
+			    "%s=%s", opt->name, opt->value);
+			return -1;
+		}
+		if (strcmp(opt->value, TAR_NODIR) != 0 &&
+		    strcmp(opt->value, TAR_NOUIDGID) != 0) {
+			paxwarn(1, "Unknown tar format -o option/value pair"
+			    "%s=%s", opt->name, opt->value);
+			return -1;
+		}
 		if ((act != APPND) && (act != ARCHIVE)) {
 			paxwarn(1, "%s=%s is only supported when writing.",
 			    opt->name, opt->value);
-			return(-1);
+			return -1;
 		}
-		tar_nodir = 1;
+
+		if (strcmp(opt->value, TAR_NODIR) == 0)
+			tar_nodir = 1;
+		if (strcmp(opt->value, TAR_NOUIDGID) == 0)
+			tar_nouidgid = 1;
 	}
-	return(0);
+
+	return 0;
 }
 
 
@@ -643,12 +651,17 @@ tar_wr(ARCHD *arcn)
 	/*
 	 * copy those fields that are independent of the type
 	 */
-	if (ul_oct(arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) ||
-	    ull_oct(arcn->sb.st_mtime < 0 ? 0 : arcn->sb.st_mtime, hd->mtime,
-		sizeof(hd->mtime), 1) ||
-	    ul_oct(arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) ||
-	    ul_oct(arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0))
+	if (ul_oct(arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0))
+		goto out;
+	if (ull_oct(arcn->sb.st_mtime < 0 ? 0 : arcn->sb.st_mtime, hd->mtime,
+	    sizeof(hd->mtime), 1))
 		goto out;
+	if (!tar_nouidgid) {
+		if (ul_oct(arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0))
+			goto out;
+		if (ul_oct(arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0))
+			goto out;
+	}
 
 	/*
 	 * calculate and add the checksum, then write the header. A return of
@@ -1280,7 +1293,10 @@ wr_ustar_or_pax(ARCHD *arcn, int ustar)
 	 * set the remaining fields. Some versions want all 16 bits of mode
 	 * we better humor them (they really do not meet spec though)....
 	 */
-	if (ul_oct(arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3)) {
+	if (tar_nouidgid) {
+		if (ul_oct(0, hd->uid, sizeof(hd->uid), 3))
+			goto out;
+	} else if (ul_oct(arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3)) {
 		if (uid_nobody == 0) {
 			if (uid_from_user("nobody", &uid_nobody) == -1)
 				goto out;
@@ -1294,7 +1310,10 @@ wr_ustar_or_pax(ARCHD *arcn, int ustar)
 		if (ul_oct(uid_nobody, hd->uid, sizeof(hd->uid), 3))
 			goto out;
 	}
-	if (ul_oct(arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3)) {
+	if (tar_nouidgid) {
+		if (ul_oct(0, hd->gid, sizeof(hd->gid), 3))
+			goto out;
+	} else if (ul_oct(arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3)) {
 		if (gid_nobody == 0) {
 			if (gid_from_group("nobody", &gid_nobody) == -1)
 				goto out;
@@ -1338,7 +1357,7 @@ wr_ustar_or_pax(ARCHD *arcn, int ustar)
 #endif
 	if (ul_oct(arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3))
 		goto out;
-	if (!Nflag) {
+	if (!Nflag && !tar_nouidgid) {
 		if ((name = user_from_uid(arcn->sb.st_uid, 1)) != NULL)
 			strncpy(hd->uname, name, sizeof(hd->uname));
 		if ((name = group_from_gid(arcn->sb.st_gid, 1)) != NULL)
Index: tar.h
===================================================================
RCS file: /cvs/src/bin/pax/tar.h,v
diff -u -p -r1.9 tar.h
--- tar.h	8 Jan 2014 06:43:34 -0000	1.9
+++ tar.h	5 Jan 2026 20:51:38 -0000
@@ -118,6 +118,7 @@ typedef struct {
  */
 #define TAR_NODIR	"nodir"
 #define TAR_OPTION	"write_opt"
+#define TAR_NOUIDGID	"nouidgid"
 
 /*
  * default device names