From: Jeremie Courreges-Anglas Subject: tar(1) write format selection To: tech@openbsd.org Cc: millert@openbsd.org Date: Mon, 15 Apr 2024 19:48:22 +0200 One drawback of switching tar(1) to use 'pax' write format by default is that tar(1) doesn't give the user a generic way to specify a format (-o and -O suck). So the diff below uses -F for that. GNU tar uses -H (already used) and FreeBSD/NetBSD tar use --format and we don't want long options. -o handling becomes slightly brittle but I have plans to butcher^Wimprove that option anyway. Input and reviews welcome. ok? Index: options.c =================================================================== RCS file: /cvs/src/bin/pax/options.c,v diff -u -p -r1.108 options.c --- options.c 15 Apr 2024 17:33:10 -0000 1.108 +++ options.c 15 Apr 2024 17:44:05 -0000 @@ -725,9 +725,10 @@ static void tar_options(int argc, char **argv) { int c; - int Oflag = 0; int nincfiles = 0; int incfiles_max = 0; + unsigned int i; + unsigned int format = F_TAR; struct incfile { char *file; char *dir; @@ -743,7 +744,7 @@ tar_options(int argc, char **argv) * process option flags */ while ((c = getoldopt(argc, argv, - "b:cef:hjmopqruts:vwxzBC:HI:LNOPXZ014578")) != -1) { + "b:cef:hjmopqruts:vwxzBC:F:HI:LNOPXZ014578")) != -1) { switch (c) { case 'b': /* @@ -792,10 +793,10 @@ tar_options(int argc, char **argv) pmtime = 0; break; case 'O': - Oflag = 1; + format = F_OTAR; break; case 'o': - Oflag = 2; + format = F_OTAR; tar_nodir = 1; break; case 'p': @@ -868,6 +869,24 @@ tar_options(int argc, char **argv) havechd++; chdname = optarg; break; + case 'F': + for (i = 0; i < sizeof(fsub)/sizeof(FSUB); ++i) + if (fsub[i].name != NULL && + strcmp(fsub[i].name, optarg) == 0) + break; + if (i < sizeof(fsub)/sizeof(FSUB)) { + format = i; + break; + } + paxwarn(1, "Unknown -F format: %s", optarg); + (void)fputs("tar: Known -F formats are:", stderr); + for (i = 0; i < (sizeof(fsub)/sizeof(FSUB)); ++i) + if (fsub[i].name != NULL) + (void)fprintf(stderr, " %s", + fsub[i].name); + (void)fputs("\n\n", stderr); + tar_usage(); + break; case 'H': /* * follow command line symlinks only @@ -1042,7 +1061,7 @@ tar_options(int argc, char **argv) break; case ARCHIVE: case APPND: - frmt = &(fsub[Oflag ? F_OTAR : F_TAR]); + frmt = &fsub[format]; if (chdname != NULL) { /* initial chdir() */ if (ftree_add(chdname, 1) < 0) @@ -1704,11 +1723,12 @@ void tar_usage(void) { (void)fputs( - "usage: tar {crtux}[014578befHhjLmNOoPpqsvwXZz]\n" - " [blocking-factor | archive | replstr] [-C directory] [-I file]\n" - " [file ...]\n" + "usage: tar {crtux}[014578beFfHhjLmNOoPpqsvwXZz]\n" + " [blocking-factor | format | archive | replstr]\n" + " [-C directory] [-I file] [file ...]\n" " tar {-crtux} [-014578eHhjLmNOoPpqvwXZz] [-b blocking-factor]\n" - " [-C directory] [-f archive] [-I file] [-s replstr] [file ...]\n", + " [-C directory] [-F format] [-f archive] [-I file]\n" + " [-s replstr] [file ...]\n", stderr); exit(1); } Index: tar.1 =================================================================== RCS file: /cvs/src/bin/pax/tar.1,v diff -u -p -r1.65 tar.1 --- tar.1 3 Aug 2023 18:17:54 -0000 1.65 +++ tar.1 15 Apr 2024 17:44:05 -0000 @@ -32,10 +32,10 @@ .Sh SYNOPSIS .Nm tar .Sm off -.No { Cm crtux No } Op Cm 014578befHhjLmNOoPpqsvwXZz +.No { Cm crtux No } Op Cm 014578beFfHhjLmNOoPpqsvwXZz .Sm on .Bk -words -.Op Ar blocking-factor | archive | replstr +.Op Ar blocking-factor | format | archive | replstr .Op Fl C Ar directory .Op Fl I Ar file .Op Ar @@ -46,6 +46,7 @@ .Op Fl 014578eHhjLmNOoPpqvwXZz .Op Fl b Ar blocking-factor .Op Fl C Ar directory +.Op Fl F Ar format .Op Fl f Ar archive .Op Fl I Ar file .Op Fl s Ar replstr @@ -141,6 +142,77 @@ the specified directory; when creating, from the directory. .It Fl e Stop after the first error. +.It Fl F Ar format +Specify the output archive format, with the default format being +.Cm ustar . +.Nm +currently supports the following formats: +.Bl -tag -width "sv4cpio" +.It Cm bcpio +The old binary cpio format. +The default blocksize for this format is 5120 bytes. +This format is not very portable and should not be used when other formats +are available. +Inode and device information about a file (used for detecting file hard links +by this format), which may be truncated by this format, is detected by +.Nm +and is repaired. +.It Cm cpio +The extended cpio interchange format specified in the +.St -p1003.2 +standard. +The default blocksize for this format is 5120 bytes. +Inode and device information about a file (used for detecting file hard links +by this format), which may be truncated by this format, is detected by +.Nm +and is repaired. +.It Cm sv4cpio +The System V release 4 cpio. +The default blocksize for this format is 5120 bytes. +Inode and device information about a file (used for detecting file hard links +by this format), which may be truncated by this format, is detected by +.Nm +and is repaired. +.It Cm sv4crc +The System V release 4 cpio with file CRC checksums. +The default blocksize for this format is 5120 bytes. +Inode and device information about a file (used for detecting file hard links +by this format), which may be truncated by this format, is detected by +.Nm +and is repaired. +.It Cm tar +The old +.Bx +tar format as found in +.Bx 4.3 . +The default blocksize for this format is 10240 bytes. +Pathnames stored by this format must be 100 characters or less in length. +Only regular files, hard links, soft links, and directories +will be archived (other file system types are not supported). +For backwards compatibility with even older tar formats, see the +description for +.Fl o . +.It Cm ustar +The extended tar interchange format specified in the +.St -p1003.2 +standard. +The default blocksize for this format is 10240 bytes. +Filenames stored by this format must be 100 characters or less in length; +the total pathname must be 256 characters or less. +.It Cm pax +The pax interchange format specified in the +.St -p1003.1-2001 +standard. +The default blocksize for this format is 5120 bytes. +.El +.Pp +.Nm +will detect and report any file that it is unable to store or extract +as the result of any specific archive format restrictions. +The individual archive formats may impose additional restrictions on use. +Typical archive format restrictions include (but are not limited to): +file pathname length, file size, link pathname length, and the type of the +file. .It Fl f Ar archive Read from or write to .Ar archive . -- jca