Index | Thread | Search

From:
Jeremie Courreges-Anglas <jca@wxcvbn.org>
Subject:
pax -o delete=pattern (was: Re: pax format options)
To:
tech@openbsd.org
Cc:
millert@openbsd.org
Date:
Wed, 17 Apr 2024 18:53:28 +0200

Download raw body.

Thread
  • Jeremie Courreges-Anglas:

    pax format options

    • Jeremie Courreges-Anglas:

      pax -o delete=pattern (was: Re: pax format options)

    • Todd C. Miller:

      pax format options

On Wed, Apr 17, 2024 at 12:57:34PM +0200, Jeremie Courreges-Anglas wrote:
> 
> pax -o write_opt=no_dir and tar -o were designed for the old tar
> format, and zhuk@ extended it to also cover the ustar format.  IMO it
> makes no sense to keep on supporting it for the pax format.
> 
> Also, the pax format ought to support a lot more bells and whistles,
> so let's have an option handler specific for it, even if currently
> empty.

Here's an example of a possible pax-specific option: -o
delete=pattern.  Most useful right now if you want to strip the
atime/mtime headers that tend to bloat pax uncompressed archives:
pax -o delete=?time.

Thoughts?  ok?


Index: tar.c
===================================================================
--- tar.c.orig	2024-04-17 17:00:26.759086951 +0100
+++ tar.c	2024-04-17 17:00:30.499086757 +0100
@@ -39,6 +39,7 @@
 #include <sys/stat.h>
 #include <ctype.h>
 #include <errno.h>
+#include <fnmatch.h>
 #include <grp.h>
 #include <libgen.h>
 #include <limits.h>
@@ -52,6 +53,11 @@
 #include "extern.h"
 #include "tar.h"
 
+
+/*
+ * Definitions used for pax (extended headers) support
+ */
+
 SLIST_HEAD(xheader, xheader_record);
 struct xheader_record {
 	SLIST_ENTRY(xheader_record)	 entry;
@@ -62,6 +68,13 @@
 /* shortest possible extended record: "5 a=\n" */
 #define MINXHDRSZ	5
 
+SLIST_HEAD(xheader_filters, xheader_filter);
+struct xheader_filter {
+	SLIST_ENTRY(xheader_filter)	 entry;
+	char				*match;
+};
+struct xheader_filters			 xhdr_filters;
+
 /*
  * Routines for reading, writing and header identify of various versions of tar
  */
@@ -912,6 +925,36 @@
 
 #ifndef SMALL
 static int
+xheader_add_filter(const char *match)
+{
+	struct xheader_filter	*filter;
+
+	filter = calloc(1, sizeof(*filter));
+	if (filter == NULL)
+		return -1;
+	filter->match = strdup(match);
+	if (filter->match == NULL)
+		return -1;
+
+	SLIST_INSERT_HEAD(&xhdr_filters, filter, entry);
+
+	return 0;
+}
+
+static int
+xheader_is_filtered(const char *headername)
+{
+	struct xheader_filter	*filter;
+
+	SLIST_FOREACH(filter, &xhdr_filters, entry) {
+		if (fnmatch(filter->match, headername, 0) == 0)
+			return 1;
+	}
+
+	return 0;
+}
+
+static int
 xheader_add(struct xheader *xhdr, const char *keyword,
     const char *value)
 {
@@ -919,6 +962,9 @@
 	int reclen, tmplen;
 	char *s;
 
+	if (xheader_is_filtered(keyword))
+		return 0;
+
 	tmplen = MINXHDRSZ;
 	do {
 		reclen = tmplen;
@@ -951,6 +997,9 @@
 	int reclen, tmplen;
 	char *s;
 
+	if (xheader_is_filtered(keyword))
+		return 0;
+
 	tmplen = MINXHDRSZ;
 	do {
 		reclen = tmplen;
@@ -984,6 +1033,9 @@
 	char frac[sizeof(".111222333")] = "";
 	char *s;
 
+	if (xheader_is_filtered(keyword))
+		return 0;
+
 	/* Only write subsecond part if non-zero */
 	if (value->tv_nsec != 0) {
 		int n;
@@ -1460,10 +1512,13 @@
 	OPLIST *opt;
 
 	while ((opt = opt_next()) != NULL) {
-		if (1) {
+		if (!strcmp(opt->name, "delete")) {
+			if (xheader_add_filter(opt->value) == -1)
+				return -1;
+		} else {
 			paxwarn(1, "Unknown pax format -o option/value pair %s=%s",
 			    opt->name, opt->value);
-			return(-1);
+			return -1;
 		}
 	}
 	return 0;
Index: pax.1
===================================================================
--- pax.1.orig	2024-04-17 17:00:16.289096238 +0100
+++ pax.1	2024-04-17 17:48:23.389161836 +0100
@@ -467,6 +467,20 @@
 .It Cm write_opt=nodir
 When writing archives, omit the storage of directories.
 .El
+.Pp
+The following options are available for the
+.Cm pax
+format:
+.Pp
+.Bl -tag -width Ds -compact
+.It Cm delete=pattern
+When writing archives, omit pax extended records whose name matches
+.Ar pattern .
+.Ar pattern
+may be a string that contains shell globbing characters.
+See
+.Xr fnmatch 3 .
+.El
 .It Fl P
 Do not follow symbolic links, perform a physical file system traversal.
 This is the default mode.
@@ -874,6 +888,9 @@
 .St -p1003.1-2001
 standard.
 The default blocksize for this format is 5120 bytes.
+Behavior can be tuned using
+.Fl o
+options.
 .El
 .Pp
 .Nm

-- 
jca