Index | Thread | Search

From:
Jeremie Courreges-Anglas <jca@wxcvbn.org>
Subject:
tar(1) write format selection
To:
tech@openbsd.org
Cc:
millert@openbsd.org
Date:
Mon, 15 Apr 2024 19:48:22 +0200

Download raw body.

Thread
One drawback of switching tar(1) to use 'pax' write format by default
is that tar(1) doesn't give the user a generic way to specify a format
(-o and -O suck).  So the diff below uses -F for that.  GNU tar uses
-H (already used) and FreeBSD/NetBSD tar use --format and we don't
want long options.

-o handling becomes slightly brittle but I have plans to
 butcher^Wimprove that option anyway.

Input and reviews welcome.  ok?


Index: options.c
===================================================================
RCS file: /cvs/src/bin/pax/options.c,v
diff -u -p -r1.108 options.c
--- options.c	15 Apr 2024 17:33:10 -0000	1.108
+++ options.c	15 Apr 2024 17:44:05 -0000
@@ -725,9 +725,10 @@ static void
 tar_options(int argc, char **argv)
 {
 	int c;
-	int Oflag = 0;
 	int nincfiles = 0;
 	int incfiles_max = 0;
+	unsigned int i;
+	unsigned int format = F_TAR;
 	struct incfile {
 		char *file;
 		char *dir;
@@ -743,7 +744,7 @@ tar_options(int argc, char **argv)
 	 * process option flags
 	 */
 	while ((c = getoldopt(argc, argv,
-	    "b:cef:hjmopqruts:vwxzBC:HI:LNOPXZ014578")) != -1) {
+	    "b:cef:hjmopqruts:vwxzBC:F:HI:LNOPXZ014578")) != -1) {
 		switch (c) {
 		case 'b':
 			/*
@@ -792,10 +793,10 @@ tar_options(int argc, char **argv)
 			pmtime = 0;
 			break;
 		case 'O':
-			Oflag = 1;
+			format = F_OTAR;
 			break;
 		case 'o':
-			Oflag = 2;
+			format = F_OTAR;
 			tar_nodir = 1;
 			break;
 		case 'p':
@@ -868,6 +869,24 @@ tar_options(int argc, char **argv)
 			havechd++;
 			chdname = optarg;
 			break;
+		case 'F':
+			for (i = 0; i < sizeof(fsub)/sizeof(FSUB); ++i)
+				if (fsub[i].name != NULL &&
+				    strcmp(fsub[i].name, optarg) == 0)
+					break;
+			if (i < sizeof(fsub)/sizeof(FSUB)) {
+				format = i;
+				break;
+			}
+			paxwarn(1, "Unknown -F format: %s", optarg);
+			(void)fputs("tar: Known -F formats are:", stderr);
+			for (i = 0; i < (sizeof(fsub)/sizeof(FSUB)); ++i)
+				if (fsub[i].name != NULL)
+					(void)fprintf(stderr, " %s",
+					    fsub[i].name);
+			(void)fputs("\n\n", stderr);
+			tar_usage();
+			break;
 		case 'H':
 			/*
 			 * follow command line symlinks only
@@ -1042,7 +1061,7 @@ tar_options(int argc, char **argv)
 		break;
 	case ARCHIVE:
 	case APPND:
-		frmt = &(fsub[Oflag ? F_OTAR : F_TAR]);
+		frmt = &fsub[format];
 
 		if (chdname != NULL) {	/* initial chdir() */
 			if (ftree_add(chdname, 1) < 0)
@@ -1704,11 +1723,12 @@ void
 tar_usage(void)
 {
 	(void)fputs(
-	    "usage: tar {crtux}[014578befHhjLmNOoPpqsvwXZz]\n"
-	    "           [blocking-factor | archive | replstr] [-C directory] [-I file]\n"
-	    "           [file ...]\n"
+	    "usage: tar {crtux}[014578beFfHhjLmNOoPpqsvwXZz]\n"
+	    "           [blocking-factor | format | archive | replstr]\n"
+	    "           [-C directory] [-I file] [file ...]\n"
 	    "       tar {-crtux} [-014578eHhjLmNOoPpqvwXZz] [-b blocking-factor]\n"
-	    "           [-C directory] [-f archive] [-I file] [-s replstr] [file ...]\n",
+	    "           [-C directory] [-F format] [-f archive] [-I file]\n"
+	    "           [-s replstr] [file ...]\n",
 	    stderr);
 	exit(1);
 }
Index: tar.1
===================================================================
RCS file: /cvs/src/bin/pax/tar.1,v
diff -u -p -r1.65 tar.1
--- tar.1	3 Aug 2023 18:17:54 -0000	1.65
+++ tar.1	15 Apr 2024 17:44:05 -0000
@@ -32,10 +32,10 @@
 .Sh SYNOPSIS
 .Nm tar
 .Sm off
-.No { Cm crtux No } Op Cm 014578befHhjLmNOoPpqsvwXZz
+.No { Cm crtux No } Op Cm 014578beFfHhjLmNOoPpqsvwXZz
 .Sm on
 .Bk -words
-.Op Ar blocking-factor | archive | replstr
+.Op Ar blocking-factor | format | archive | replstr
 .Op Fl C Ar directory
 .Op Fl I Ar file
 .Op Ar
@@ -46,6 +46,7 @@
 .Op Fl 014578eHhjLmNOoPpqvwXZz
 .Op Fl b Ar blocking-factor
 .Op Fl C Ar directory
+.Op Fl F Ar format
 .Op Fl f Ar archive
 .Op Fl I Ar file
 .Op Fl s Ar replstr
@@ -141,6 +142,77 @@ the specified directory; when creating, 
 from the directory.
 .It Fl e
 Stop after the first error.
+.It Fl F Ar format
+Specify the output archive format, with the default format being
+.Cm ustar .
+.Nm
+currently supports the following formats:
+.Bl -tag -width "sv4cpio"
+.It Cm bcpio
+The old binary cpio format.
+The default blocksize for this format is 5120 bytes.
+This format is not very portable and should not be used when other formats
+are available.
+Inode and device information about a file (used for detecting file hard links
+by this format), which may be truncated by this format, is detected by
+.Nm
+and is repaired.
+.It Cm cpio
+The extended cpio interchange format specified in the
+.St -p1003.2
+standard.
+The default blocksize for this format is 5120 bytes.
+Inode and device information about a file (used for detecting file hard links
+by this format), which may be truncated by this format, is detected by
+.Nm
+and is repaired.
+.It Cm sv4cpio
+The System V release 4 cpio.
+The default blocksize for this format is 5120 bytes.
+Inode and device information about a file (used for detecting file hard links
+by this format), which may be truncated by this format, is detected by
+.Nm
+and is repaired.
+.It Cm sv4crc
+The System V release 4 cpio with file CRC checksums.
+The default blocksize for this format is 5120 bytes.
+Inode and device information about a file (used for detecting file hard links
+by this format), which may be truncated by this format, is detected by
+.Nm
+and is repaired.
+.It Cm tar
+The old
+.Bx
+tar format as found in
+.Bx 4.3 .
+The default blocksize for this format is 10240 bytes.
+Pathnames stored by this format must be 100 characters or less in length.
+Only regular files, hard links, soft links, and directories
+will be archived (other file system types are not supported).
+For backwards compatibility with even older tar formats, see the
+description for
+.Fl o .
+.It Cm ustar
+The extended tar interchange format specified in the
+.St -p1003.2
+standard.
+The default blocksize for this format is 10240 bytes.
+Filenames stored by this format must be 100 characters or less in length;
+the total pathname must be 256 characters or less.
+.It Cm pax
+The pax interchange format specified in the
+.St -p1003.1-2001
+standard.
+The default blocksize for this format is 5120 bytes.
+.El
+.Pp
+.Nm
+will detect and report any file that it is unable to store or extract
+as the result of any specific archive format restrictions.
+The individual archive formats may impose additional restrictions on use.
+Typical archive format restrictions include (but are not limited to):
+file pathname length, file size, link pathname length, and the type of the
+file.
 .It Fl f Ar archive
 Read from or write to
 .Ar archive .

-- 
jca