Download raw body.
pax(1): new -o keyword framework, listopt, global exthdrs, and stricter invalid-path handling
Hello everyone,
I have attached a patch to this email that modernizes pax/tar to match
POSIX.1-2024 and adds a comprehensive -o keyword framework. It supports
global and per-file pax keywords (KEY=VALUE and KEY:=VALUE), deletion
filters (delete=pattern), configurable names for extended-header files
(exthdr.name= and globexthdr.name= with % placeholders), and new toggles
like linkdata (store hard-link contents) and times (always emit
atime/mtime). The -o parser is rebuilt to handle "=", ":=",
comma-separated lists, and backslash escaping, with last-value-wins
semantics. When global keywords are present, a single global pax
extended header (typeflag 'g') is written once per archive.
On read, a new invalid path/link policy is introduced via
invalid=bypass|write|rename|utf-8|binary; entries marked for bypass are
drained cleanly, and rename is requested where applicable. On write,
hard links can optionally carry data, and timestamp emission is more
consistent. A new listopt=FORMAT feature customizes verbose listings
(-v) using printf-like conversions with pax-aware fields, including %T
(strftime time), %M (symbolic mode), %D (device numbers or numeric
keyword), %F (composed path), and %L ("path -> linktarget"). The man
page documents all new options and claims POSIX.1-2024 compliance.
Internally, the change adds keyword/storage structs (PAXKEY, PAXOPKV,
PAXDEL), extends ARCHD with xattr/gattr/invalid, enhances wr_xheader()
for global headers and naming templates, resets per-member state between
entries and includes small correctness/robustness fixes.
Best regards,
David.
Index: bin/pax/tar.c
===================================================================
RCS file: /cvs/src/bin/pax/tar.c,v
diff -u -r1.87 tar.c
--- bin/pax/tar.c 6 Jul 2025 19:25:51 -0000 1.87
+++ bin/pax/tar.c 12 Oct 2025 07:53:27 -0000
@@ -39,6 +39,7 @@
#include <sys/stat.h>
#include <ctype.h>
#include <errno.h>
+#include <fnmatch.h>
#include <grp.h>
#include <libgen.h>
#include <limits.h>
@@ -46,6 +47,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <strings.h>
#include <unistd.h>
#include "pax.h"
@@ -59,6 +61,22 @@
char *record;
};
+/* Command-line controlled pax state shared across parsing and I/O. */
+static PAXKEY *pax_global_xattr;
+static int pax_opt_linkdata;
+static int pax_opt_times;
+static enum pax_invalid_action pax_opt_invalid = PAX_INVALID_BYPASS;
+static char *pax_opt_exthdr_name;
+static char *pax_opt_globexthdr_name;
+static PAXDEL *pax_opt_delete_list;
+static PAXOPKV *pax_opt_keywords_global;
+static PAXOPKV *pax_opt_keywords_local;
+static int pax_keywords_prepared;
+#ifndef SMALL
+static int pax_global_written;
+static unsigned int pax_global_seq = 1;
+#endif
+
/* shortest possible extended record: "5 a=\n" */
#define MINXHDRSZ 5
@@ -73,8 +91,19 @@
static int ull_oct(unsigned long long, char *, int, int);
static int rd_xheader(ARCHD *, int, off_t);
#ifndef SMALL
-static int wr_xheader(char *, HD_USTAR *, struct xheader *);
+static int wr_xheader(const char *, HD_USTAR *, struct xheader *, int,
+ const char *, unsigned int);
#endif
+static int pax_store_kv(PAXKEY **, const char *, const char *);
+static void pax_apply_global(ARCHD *);
+static void pax_global_free(void);
+static int pax_keyword_deleted(const char *);
+static void pax_prepare_user_keywords(void);
+static void pax_apply_local_option_keywords(ARCHD *);
+static int pax_format_xhdr_name(char *, size_t, const char *, const char *,
+ unsigned int);
+static void pax_option_apply_local_xhdr(struct xheader *);
+static int pax_write_global_header(void);
static uid_t uid_nobody;
static uid_t uid_warn;
@@ -112,6 +141,7 @@
off_t
tar_endrd(void)
{
+ pax_global_free();
return NULLCNT * BLKMULT;
}
@@ -656,7 +686,7 @@
* to be written
*/
if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum,
- sizeof(hd->chksum), 3))
+ sizeof(hd->chksum), 3))
goto out;
if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0 ||
wr_skip(BLKMULT - sizeof(HD_TAR)) < 0) {
@@ -735,8 +765,11 @@
if (ustar_id(buf, BLKMULT) < 0)
return(-1);
+ pax_prepare_user_keywords();
+
reset:
memset(arcn, 0, sizeof(*arcn));
+ pax_apply_global(arcn);
arcn->org_name = arcn->name;
arcn->sb.st_nlink = 1;
arcn->sb.st_size = (off_t)-1;
@@ -881,6 +914,10 @@
*/
arcn->sb.st_mode |= S_IFREG;
arcn->sb.st_nlink = 2;
+ if (arcn->sb.st_size > 0) {
+ arcn->pad = TAR_PAD(arcn->sb.st_size);
+ arcn->skip = arcn->sb.st_size;
+ }
}
break;
case LONGLINKTYPE:
@@ -908,6 +945,8 @@
arcn->sb.st_mode |= S_IFREG;
break;
}
+
+ pax_apply_local_option_keywords(arcn);
return(0);
}
@@ -920,6 +959,9 @@
int reclen, tmplen;
char *s;
+ if (pax_keyword_deleted(keyword))
+ return 0;
+
tmplen = MINXHDRSZ;
do {
reclen = tmplen;
@@ -952,6 +994,9 @@
int reclen, tmplen;
char *s;
+ if (pax_keyword_deleted(keyword))
+ return 0;
+
tmplen = MINXHDRSZ;
do {
reclen = tmplen;
@@ -985,6 +1030,9 @@
char frac[sizeof(".111222333")] = "";
char *s;
+ if (pax_keyword_deleted(keyword))
+ return 0;
+
/* Only write subsecond part if non-zero */
if (value->tv_nsec != 0) {
int n;
@@ -1038,7 +1086,8 @@
}
static int
-wr_xheader(char *fname, HD_USTAR *fhd, struct xheader *xhdr)
+wr_xheader(const char *fname, HD_USTAR *fhd, struct xheader *xhdr,
+ int global, const char *override_name, unsigned int seq)
{
char hdblk[sizeof(HD_USTAR)];
HD_USTAR *hd;
@@ -1052,41 +1101,66 @@
memset(hdblk, 0, sizeof(hdblk));
hd = (HD_USTAR *)hdblk;
- hd->typeflag = XHDRTYPE;
+ hd->typeflag = global ? GHDRTYPE : XHDRTYPE;
strncpy(hd->magic, TMAGIC, TMAGLEN);
strncpy(hd->version, TVERSION, TVERSLEN);
if (ul_oct(size, hd->size, sizeof(hd->size), 3))
goto out;
- /*
- * Best effort attempt at providing a useful file name for
- * implementations that don't support pax format. Don't bother
- * with truncation if the resulting file name doesn't fit.
- * XXX dirname/basename portability (check return value?)
- */
- (void)snprintf(buf, sizeof(buf), "%s/PaxHeaders.%ld/%s",
- dirname(fname), (long)getpid(), basename(fname));
+ if (global) {
+ const char *fmt = override_name != NULL ? override_name :
+ pax_option_globexthdr_name();
+ if (fmt != NULL) {
+ if (pax_format_xhdr_name(buf, sizeof(buf), fmt,
+ fname ? fname : "", seq) == -1)
+ goto out;
+ } else {
+ const char *tmpdir = getenv("TMPDIR");
+ if (tmpdir == NULL || *tmpdir == '\0')
+ tmpdir = "/tmp";
+ (void)snprintf(buf, sizeof(buf), "%s/GlobalHead.%ld.%u",
+ tmpdir, (long)getpid(), seq);
+ }
+ } else {
+ const char *fmt = override_name != NULL ? override_name :
+ pax_option_exthdr_name();
+ if (fmt != NULL) {
+ if (pax_format_xhdr_name(buf, sizeof(buf), fmt,
+ fname ? fname : "", 0) == -1)
+ goto out;
+ } else if (fname != NULL) {
+ char *opath = NULL, *odirbuf = NULL;
+ const char *obase = fname;
+ const char *odir = ".";
+
+ if ((opath = strdup(fname)) != NULL)
+ obase = basename(opath);
+ if ((odirbuf = strdup(fname)) != NULL)
+ odir = dirname(odirbuf);
+ (void)snprintf(buf, sizeof(buf), "%s/PaxHeaders.%ld/%s",
+ odir ? odir : ".", (long)getpid(), obase);
+ free(opath);
+ free(odirbuf);
+ } else {
+ (void)strlcpy(buf, "PaxHeaders", sizeof(buf));
+ }
+ }
fieldcpy(hd->name, sizeof(hd->name), buf, sizeof(buf));
- /*
- * Inherit mode, mtime and owner from the file the headers are for.
- * This will only be extracted as an actual file by implementations
- * that don't support pax format.
- */
- memcpy(hd->mode, fhd->mode, sizeof(hd->mode));
- memcpy(hd->mtime, fhd->mtime, sizeof(hd->mtime));
- memcpy(hd->uid, fhd->uid, sizeof(hd->uid));
- memcpy(hd->gid, fhd->gid, sizeof(hd->gid));
+ if (fhd != NULL) {
+ memcpy(hd->mode, fhd->mode, sizeof(hd->mode));
+ memcpy(hd->mtime, fhd->mtime, sizeof(hd->mtime));
+ memcpy(hd->uid, fhd->uid, sizeof(hd->uid));
+ memcpy(hd->gid, fhd->gid, sizeof(hd->gid));
+ }
if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
- sizeof(hd->chksum), 3))
+ sizeof(hd->chksum), 3))
goto out;
- /* write out extended header */
if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0 ||
wr_skip(BLKMULT - sizeof(HD_USTAR)) < 0)
goto err;
- /* write out extended header records */
SLIST_FOREACH(rec, xhdr, entry)
if (wr_rdbuf(rec->record, rec->reclen) < 0)
goto err;
@@ -1097,19 +1171,507 @@
return 0;
out:
- /*
- * header field is out of range
- */
- paxwarn(1, "Pax header field is too small for %s", fname);
+ paxwarn(1, "Pax header field is too small for %s",
+ fname ? fname : "pax header");
return 1;
err:
- paxwarn(1,"Could not write pax extended header for %s", fname);
+ paxwarn(1,"Could not write pax extended header for %s",
+ fname ? fname : "pax header");
return -1;
}
#endif
static int
+pax_store_kv(PAXKEY **head, const char *keyword, const char *value)
+{
+ PAXKEY **curp, *kv;
+ char *dup;
+
+ if (head == NULL || keyword == NULL || value == NULL)
+ return -1;
+
+ for (curp = head; (kv = *curp) != NULL; curp = &kv->next) {
+ if (strcmp(kv->name, keyword) == 0)
+ break;
+ }
+
+ if (value[0] == '\0') {
+ if (kv != NULL) {
+ *curp = kv->next;
+ free(kv->name);
+ free(kv->value);
+ free(kv);
+ }
+ return 0;
+ }
+
+ if (kv == NULL) {
+ if ((kv = calloc(1, sizeof(*kv))) == NULL)
+ return -1;
+ if ((kv->name = strdup(keyword)) == NULL) {
+ free(kv);
+ return -1;
+ }
+ if ((kv->value = strdup(value)) == NULL) {
+ free(kv->name);
+ free(kv);
+ return -1;
+ }
+ kv->next = *head;
+ *head = kv;
+ return 0;
+ }
+
+ dup = strdup(value);
+ if (dup == NULL)
+ return -1;
+ free(kv->value);
+ kv->value = dup;
+ return 0;
+}
+
+static void
+pax_apply_global(ARCHD *arcn)
+{
+ if (arcn != NULL)
+ arcn->gattr = pax_global_xattr;
+}
+
+static void
+pax_global_free(void)
+{
+ pax_kv_free(&pax_global_xattr);
+}
+
+static int
+pax_keyword_deleted(const char *keyword)
+{
+ const PAXDEL *del;
+
+ for (del = pax_option_delete(); del != NULL; del = del->next)
+ if (fnmatch(del->pattern, keyword, 0) == 0)
+ return 1;
+ return 0;
+}
+
+static void
+pax_prepare_user_keywords(void)
+{
+ const PAXOPKV *kv;
+
+ if (pax_keywords_prepared)
+ return;
+ for (kv = pax_option_keywords(OPT_ASSIGN_EQ); kv != NULL;
+ kv = kv->next) {
+ if (pax_keyword_deleted(kv->name))
+ continue;
+ if (pax_store_kv(&pax_global_xattr, kv->name, kv->value) == -1)
+ paxwarn(1, "Unable to apply global keyword %s", kv->name);
+ }
+ pax_keywords_prepared = 1;
+}
+
+static void
+pax_apply_local_option_keywords(ARCHD *arcn)
+{
+ const PAXOPKV *kv;
+
+ for (kv = pax_option_keywords(OPT_ASSIGN_COLON); kv != NULL;
+ kv = kv->next) {
+ if (pax_keyword_deleted(kv->name))
+ continue;
+ if (pax_store_kv(&arcn->xattr, kv->name, kv->value) == -1)
+ paxwarn(1, "Unable to apply per-file keyword %s",
+ kv->name);
+ }
+}
+
+#ifndef SMALL
+/* Inject per-file keyword overrides into the list of pax header records. */
+static void
+pax_option_apply_local_xhdr(struct xheader *xhdr)
+{
+ const PAXOPKV *kv;
+
+ if (xhdr == NULL)
+ return;
+ for (kv = pax_option_keywords(OPT_ASSIGN_COLON); kv != NULL;
+ kv = kv->next) {
+ if (pax_keyword_deleted(kv->name))
+ continue;
+ if (xheader_add(xhdr, kv->name, kv->value) == -1)
+ paxwarn(1, "Unable to write per-file keyword %s",
+ kv->name);
+ }
+}
+
+/* Emit a single typeflag 'g' global header the first time one is needed. */
+static int
+pax_write_global_header(void)
+{
+ const PAXOPKV *kv;
+ struct xheader xhdr = SLIST_HEAD_INITIALIZER(xhdr);
+ int have = 0;
+ HD_USTAR dummy;
+ int ret;
+
+ if (pax_global_written)
+ return 0;
+ memset(&dummy, 0, sizeof(dummy));
+ for (kv = pax_option_keywords(OPT_ASSIGN_EQ); kv != NULL;
+ kv = kv->next) {
+ if (pax_keyword_deleted(kv->name))
+ continue;
+ if (xheader_add(&xhdr, kv->name, kv->value) == -1) {
+ xheader_free(&xhdr);
+ return -1;
+ }
+ have = 1;
+ }
+ if (!have) {
+ xheader_free(&xhdr);
+ pax_global_written = 1;
+ return 0;
+ }
+ ret = wr_xheader(NULL, &dummy, &xhdr, 1, NULL, pax_global_seq++);
+ xheader_free(&xhdr);
+ if (ret < 0)
+ return -1;
+ if (ret >= 0)
+ pax_global_written = 1;
+ return ret;
+}
+#endif
+
+void
+pax_option_set_linkdata(int enable)
+{
+ pax_opt_linkdata = (enable != 0);
+}
+
+/* Remember whether to unconditionally emit atime/mtime keywords. */
+void
+pax_option_set_times(int enable)
+{
+ pax_opt_times = (enable != 0);
+}
+
+int
+pax_option_set_invalid(const char *value)
+{
+ if (value == NULL)
+ return -1;
+ if (strcasecmp(value, "bypass") == 0)
+ pax_opt_invalid = PAX_INVALID_BYPASS;
+ else if (strcasecmp(value, "write") == 0)
+ pax_opt_invalid = PAX_INVALID_WRITE;
+ else if (strcasecmp(value, "rename") == 0)
+ pax_opt_invalid = PAX_INVALID_RENAME;
+ else if (strcasecmp(value, "utf-8") == 0)
+ pax_opt_invalid = PAX_INVALID_UTF8;
+ else if (strcasecmp(value, "binary") == 0)
+ pax_opt_invalid = PAX_INVALID_BINARY;
+ else
+ return -1;
+ return 0;
+}
+
+static int
+pax_option_store_string(char **dst, const char *value)
+{
+ char *dup;
+
+ if (value == NULL)
+ return -1;
+ dup = strdup(value);
+ if (dup == NULL)
+ return -1;
+ free(*dst);
+ *dst = dup;
+ return 0;
+}
+
+int
+pax_option_set_exthdr_name(const char *value)
+{
+ return pax_option_store_string(&pax_opt_exthdr_name, value);
+}
+
+int
+pax_option_set_globexthdr_name(const char *value)
+{
+ return pax_option_store_string(&pax_opt_globexthdr_name, value);
+}
+
+int
+pax_option_add_delete(const char *pattern)
+{
+ PAXDEL *node, *cur;
+
+ if (pattern == NULL)
+ return -1;
+ if ((node = malloc(sizeof(*node))) == NULL)
+ return -1;
+ if ((node->pattern = strdup(pattern)) == NULL) {
+ free(node);
+ return -1;
+ }
+ node->next = NULL;
+ if (pax_opt_delete_list == NULL)
+ pax_opt_delete_list = node;
+ else {
+ cur = pax_opt_delete_list;
+ while (cur->next != NULL)
+ cur = cur->next;
+ cur->next = node;
+ }
+ return 0;
+}
+
+static int
+pax_option_add_keyword_internal(PAXOPKV **head, const char *name,
+ const char *value, int assign)
+{
+ PAXOPKV *node, *cur;
+
+ if ((node = malloc(sizeof(*node))) == NULL)
+ return -1;
+ if ((node->name = strdup(name)) == NULL) {
+ free(node);
+ return -1;
+ }
+ if ((node->value = strdup(value)) == NULL) {
+ free(node->name);
+ free(node);
+ return -1;
+ }
+ node->assign = assign;
+ node->next = NULL;
+ if (*head == NULL)
+ *head = node;
+ else {
+ cur = *head;
+ while (cur->next != NULL)
+ cur = cur->next;
+ cur->next = node;
+ }
+ return 0;
+}
+
+int
+pax_option_add_keyword(const char *name, const char *value, int assign)
+{
+ if (name == NULL || value == NULL)
+ return -1;
+ if (assign == OPT_ASSIGN_COLON)
+ return pax_option_add_keyword_internal(&pax_opt_keywords_local,
+ name, value, assign);
+ return pax_option_add_keyword_internal(&pax_opt_keywords_global,
+ name, value, assign);
+}
+
+const PAXDEL *
+pax_option_delete(void)
+{
+ return pax_opt_delete_list;
+}
+
+const PAXOPKV *
+pax_option_keywords(int assign)
+{
+ if (assign == OPT_ASSIGN_COLON)
+ return pax_opt_keywords_local;
+ return pax_opt_keywords_global;
+}
+
+int
+pax_option_linkdata(void)
+{
+ return pax_opt_linkdata;
+}
+
+int
+pax_option_times(void)
+{
+ return pax_opt_times;
+}
+
+const char *
+pax_option_exthdr_name(void)
+{
+ return pax_opt_exthdr_name;
+}
+
+const char *
+pax_option_globexthdr_name(void)
+{
+ return pax_opt_globexthdr_name;
+}
+
+enum pax_invalid_action
+pax_option_invalid(void)
+{
+ return pax_opt_invalid;
+}
+
+/* Reset state derived from -o keywords before parsing a new invocation. */
+void
+pax_option_reset_session(void)
+{
+#ifndef SMALL
+ pax_global_written = 0;
+ pax_global_seq = 1;
+#endif
+}
+
+/* Render a user-supplied template for extended header filenames. */
+static int
+pax_format_xhdr_name(char *buf, size_t bufsz, const char *fmt,
+ const char *path, unsigned int seq)
+{
+ char *path_copy = NULL, *dir_copy = NULL;
+ const char *dir = ".";
+ const char *file = path;
+ char *bp;
+ size_t remaining = bufsz;
+
+ if (fmt == NULL || buf == NULL || bufsz == 0)
+ return -1;
+
+ if (path != NULL && *path != '\0') {
+ if ((path_copy = strdup(path)) != NULL)
+ file = basename(path_copy);
+ if ((dir_copy = strdup(path)) != NULL)
+ dir = dirname(dir_copy);
+ }
+
+ buf[0] = '\0';
+ bp = buf;
+ while (*fmt != '\0') {
+ if (*fmt != '%') {
+ if (remaining <= 1)
+ goto toolong;
+ *bp++ = *fmt++;
+ remaining--;
+ continue;
+ }
+ fmt++;
+ char to_insert = *fmt;
+ if (to_insert == '\0')
+ break;
+ fmt++;
+ const char *ins = NULL;
+ char tmp[32];
+ size_t inslen = 0;
+ switch (to_insert) {
+ case 'd':
+ ins = dir;
+ break;
+ case 'f':
+ ins = file;
+ break;
+ case 'p':
+ snprintf(tmp, sizeof(tmp), "%ld", (long)getpid());
+ ins = tmp;
+ break;
+ case 'n':
+ snprintf(tmp, sizeof(tmp), "%u",
+ seq == 0 ? 1U : seq);
+ ins = tmp;
+ break;
+ case '%':
+ tmp[0] = '%';
+ tmp[1] = '\0';
+ ins = tmp;
+ break;
+ default:
+ tmp[0] = to_insert;
+ tmp[1] = '\0';
+ ins = tmp;
+ break;
+ }
+ if (ins == NULL)
+ ins = "";
+ inslen = strlen(ins);
+ if (inslen >= remaining)
+ goto toolong;
+ memcpy(bp, ins, inslen);
+ bp += inslen;
+ remaining -= inslen;
+ }
+ if (remaining == 0)
+ goto toolong;
+ *bp = '\0';
+ free(path_copy);
+ free(dir_copy);
+ return 0;
+
+toolong:
+ free(path_copy);
+ free(dir_copy);
+ return -1;
+}
+
+/* Decide what to do with an invalid pathname encountered during read. */
+int
+pax_handle_invalid_path(ARCHD *arcn, const char *keyword, const char *value)
+{
+ enum pax_invalid_action act = pax_option_invalid();
+
+ switch (act) {
+ case PAX_INVALID_BYPASS:
+ paxwarn(1, "Skipping entry with invalid %s \"%s\"",
+ keyword, value);
+ pax_mark_skip(arcn);
+ return -1;
+ case PAX_INVALID_WRITE:
+ case PAX_INVALID_UTF8:
+ case PAX_INVALID_BINARY:
+ return 0;
+ case PAX_INVALID_RENAME:
+ paxwarn(0, "Invalid %s \"%s\"; requesting rename", keyword,
+ value);
+ arcn->invalid = PAX_INVALID_RENAME;
+ return 0;
+ }
+ return 0;
+}
+
+/* Apply the invalid= policy to link targets pulled from extended headers. */
+int
+pax_handle_invalid_link(ARCHD *arcn, const char *keyword, const char *value)
+{
+ enum pax_invalid_action act = pax_option_invalid();
+
+ switch (act) {
+ case PAX_INVALID_BYPASS:
+ paxwarn(1, "Skipping link with invalid %s \"%s\"", keyword,
+ value);
+ pax_mark_skip(arcn);
+ return -1;
+ case PAX_INVALID_WRITE:
+ case PAX_INVALID_UTF8:
+ case PAX_INVALID_BINARY:
+ return 0;
+ case PAX_INVALID_RENAME:
+ paxwarn(1, "Cannot rename invalid link target \"%s\"; skipping",
+ value);
+ pax_mark_skip(arcn);
+ return -1;
+ }
+ return 0;
+}
+
+/* Mark the current archive member so the outer loops can drain it safely. */
+void
+pax_mark_skip(ARCHD *arcn)
+{
+ if (arcn != NULL)
+ arcn->invalid = PAX_INVALID_SKIP;
+}
+
+static int
wr_ustar_or_pax(ARCHD *arcn, int ustar)
{
HD_USTAR *hd;
@@ -1119,6 +1681,15 @@
struct xheader xhdr = SLIST_HEAD_INITIALIZER(xhdr);
#endif
int bad_mtime;
+ int write_data = 0;
+#ifndef SMALL
+ /* Ensure any pending global keywords are written once per archive. */
+ if (!ustar) {
+ int gres = pax_write_global_header();
+ if (gres < 0)
+ return 1;
+ }
+#endif
/*
* check for those file system types ustar cannot store
@@ -1234,15 +1805,27 @@
case PAX_SLK:
case PAX_HLK:
case PAX_HRG:
- if (arcn->type == PAX_SLK)
- hd->typeflag = SYMTYPE;
- else
- hd->typeflag = LNKTYPE;
fieldcpy(hd->linkname, sizeof(hd->linkname), arcn->ln_name,
sizeof(arcn->ln_name));
- if (ul_oct(0, hd->size, sizeof(hd->size), 3))
- goto out;
- break;
+ if (arcn->type == PAX_SLK) {
+ hd->typeflag = SYMTYPE;
+ if (ul_oct(0, hd->size, sizeof(hd->size), 3))
+ goto out;
+ } else {
+ hd->typeflag = LNKTYPE;
+ /* Optional pax extension: store file data along with the link. */
+ if (!ustar && pax_option_linkdata()) {
+ arcn->pad = TAR_PAD(arcn->sb.st_size);
+ if (ull_oct(arcn->sb.st_size, hd->size,
+ sizeof(hd->size), 3))
+ goto out;
+ write_data = 1;
+ } else {
+ if (ul_oct(0, hd->size, sizeof(hd->size), 3))
+ goto out;
+ }
+ }
+ break;
case PAX_REG:
case PAX_CTG:
default:
@@ -1254,12 +1837,12 @@
else
hd->typeflag = REGTYPE;
arcn->pad = TAR_PAD(arcn->sb.st_size);
- if (ull_oct(arcn->sb.st_size, hd->size, sizeof(hd->size), 3)) {
- if (ustar) {
- paxwarn(1, "File is too long for ustar %s",
- arcn->org_name);
- return(1);
- }
+ if (ull_oct(arcn->sb.st_size, hd->size, sizeof(hd->size), 3)) {
+ if (ustar) {
+ paxwarn(1, "File is too long for ustar %s",
+ arcn->org_name);
+ return(1);
+ }
#ifndef SMALL
else if (xheader_add_ull(&xhdr, "size",
arcn->sb.st_size) == -1) {
@@ -1270,6 +1853,7 @@
}
#endif
}
+ write_data = 1;
break;
}
@@ -1320,14 +1904,18 @@
*
* ctime isn't specified by POSIX so omit it.
*/
- if (xheader_add_ts(&xhdr, "atime", &arcn->sb.st_atim) == -1) {
- paxwarn(1, "Couldn't preserve %s in pax format for %s",
- "atime", arcn->org_name);
- xheader_free(&xhdr);
- return (1);
+ if (pax_option_times()) {
+ if (xheader_add_ts(&xhdr, "atime",
+ &arcn->sb.st_atim) == -1) {
+ paxwarn(1,
+ "Couldn't preserve %s in pax format for %s",
+ "atime", arcn->org_name);
+ xheader_free(&xhdr);
+ return (1);
+ }
}
- if ((bad_mtime || arcn->sb.st_mtime < 0 ||
- arcn->sb.st_mtim.tv_nsec != 0) &&
+ if ((pax_option_times() || bad_mtime || arcn->sb.st_mtime < 0 ||
+ arcn->sb.st_mtim.tv_nsec != 0) &&
xheader_add_ts(&xhdr, "mtime", &arcn->sb.st_mtim) == -1) {
paxwarn(1, "Couldn't preserve %s in pax format for %s",
"mtime", arcn->org_name);
@@ -1346,11 +1934,13 @@
}
#ifndef SMALL
+ pax_option_apply_local_xhdr(&xhdr);
+
/* write out a pax extended header if needed */
if (!SLIST_EMPTY(&xhdr)) {
int ret;
- ret = wr_xheader(arcn->name, hd, &xhdr);
+ ret = wr_xheader(arcn->name, hd, &xhdr, 0, NULL, 0);
xheader_free(&xhdr);
if (ret)
return(ret);
@@ -1363,7 +1953,7 @@
* needs to be written
*/
if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
- sizeof(hd->chksum), 3))
+ sizeof(hd->chksum), 3))
goto out;
if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0 ||
wr_skip(BLKMULT - sizeof(HD_USTAR)) < 0) {
@@ -1371,9 +1961,7 @@
arcn->org_name);
return(-1);
}
- if (PAX_IS_REG(arcn->type))
- return(0);
- return(1);
+ return (write_data ? 0 : 1);
out:
#ifndef SMALL
@@ -1475,11 +2063,66 @@
OPLIST *opt;
while ((opt = opt_next()) != NULL) {
- if (1) {
- paxwarn(1, "Unknown pax format -o option/value pair %s=%s",
- opt->name, opt->value);
- return(-1);
+ if (strcmp(opt->name, "delete") == 0) {
+ if (pax_option_add_delete(opt->value) < 0) {
+ paxwarn(1, "Unable to record delete pattern %s",
+ opt->value);
+ free(opt->name);
+ free(opt->value);
+ free(opt);
+ return (-1);
+ }
+ } else if (strcmp(opt->name, "exthdr.name") == 0) {
+ if (pax_option_set_exthdr_name(opt->value) < 0) {
+ paxwarn(1, "Unable to set exthdr.name to %s",
+ opt->value);
+ free(opt->name);
+ free(opt->value);
+ free(opt);
+ return (-1);
+ }
+ } else if (strcmp(opt->name, "globexthdr.name") == 0) {
+ if (pax_option_set_globexthdr_name(opt->value) < 0) {
+ paxwarn(1, "Unable to set globexthdr.name to %s",
+ opt->value);
+ free(opt->name);
+ free(opt->value);
+ free(opt);
+ return (-1);
+ }
+ } else if (strcmp(opt->name, "invalid") == 0) {
+ if (pax_option_set_invalid(opt->value) < 0) {
+ paxwarn(1, "Unknown invalid action %s", opt->value);
+ free(opt->name);
+ free(opt->value);
+ free(opt);
+ return (-1);
+ }
+ } else if (strcmp(opt->name, "linkdata") == 0) {
+ pax_option_set_linkdata(1);
+ } else if (strcmp(opt->name, "times") == 0) {
+ pax_option_set_times(1);
+ } else if (opt->assign != OPT_ASSIGN_NONE) {
+ if (pax_option_add_keyword(opt->name, opt->value,
+ opt->assign) < 0) {
+ paxwarn(1,
+ "Unable to record pax keyword %s=%s",
+ opt->name, opt->value);
+ free(opt->name);
+ free(opt->value);
+ free(opt);
+ return (-1);
+ }
+ } else {
+ paxwarn(1, "Unknown pax format -o option %s", opt->name);
+ free(opt->name);
+ free(opt->value);
+ free(opt);
+ return (-1);
}
+ free(opt->name);
+ free(opt->value);
+ free(opt);
}
return 0;
}
@@ -1695,13 +2338,33 @@
break;
}
*p++ = nextp[-1] = '\0';
+ if (pax_keyword_deleted(keyword)) {
+ p = nextp;
+ continue;
+ }
+ if (pax_store_kv(global ? &pax_global_xattr : &arcn->xattr,
+ keyword, p) == -1) {
+ paxwarn(1, "Unable to store extended header keyword %s",
+ keyword);
+ ret = -1;
+ break;
+ }
if (!global) {
if (!strcmp(keyword, "path")) {
- arcn->nlen = strlcpy(arcn->name, p,
+ size_t len = strlcpy(arcn->name, p,
sizeof(arcn->name));
+ arcn->nlen = MINIMUM(len, sizeof(arcn->name) - 1);
+ if (len >= sizeof(arcn->name))
+ (void)pax_handle_invalid_path(arcn,
+ keyword, p);
} else if (!strcmp(keyword, "linkpath")) {
- arcn->ln_nlen = strlcpy(arcn->ln_name, p,
+ size_t len = strlcpy(arcn->ln_name, p,
sizeof(arcn->ln_name));
+ arcn->ln_nlen = MINIMUM(len,
+ sizeof(arcn->ln_name) - 1);
+ if (len >= sizeof(arcn->ln_name))
+ (void)pax_handle_invalid_link(arcn,
+ keyword, p);
} else if (!strcmp(keyword, "mtime")) {
ret = rd_time(&arcn->sb.st_mtim, keyword, p);
if (ret < 0)
Index: bin/pax/gen_subs.c
===================================================================
RCS file: /cvs/src/bin/pax/gen_subs.c,v
diff -u -r1.34 gen_subs.c
--- bin/pax/gen_subs.c 27 Apr 2024 19:49:42 -0000 1.34
+++ bin/pax/gen_subs.c 12 Oct 2025 07:53:40 -0000
@@ -36,7 +36,9 @@
#include <sys/types.h>
#include <sys/stat.h>
+#include <ctype.h>
#include <grp.h>
+#include <limits.h>
#include <pwd.h>
#include <stdio.h>
#include <stdlib.h>
@@ -53,6 +55,334 @@
* a collection of general purpose subroutines used by pax
*/
+/* Accumulated format string for -o listopt=. */
+static char *listopt_format;
+static size_t listopt_len;
+
+/* Scratch context tracking dynamically duplicated strings. */
+struct listopt_ctx {
+ char **allocated;
+ size_t count;
+ size_t cap;
+};
+
+/* Parsed elements for a single custom listopt conversion. */
+struct listopt_spec {
+ char flags[16];
+ char width[16];
+ char precision[16];
+ char length[8];
+ char keyword[128];
+ char subfmt[128];
+ char conv;
+};
+
+static void
+listopt_ctx_init(struct listopt_ctx *ctx)
+{
+ ctx->allocated = NULL;
+ ctx->count = ctx->cap = 0;
+}
+
+static void
+listopt_ctx_free(struct listopt_ctx *ctx)
+{
+ size_t i;
+
+ if (ctx == NULL)
+ return;
+ for (i = 0; i < ctx->count; i++)
+ free(ctx->allocated[i]);
+ free(ctx->allocated);
+ ctx->allocated = NULL;
+ ctx->count = ctx->cap = 0;
+}
+
+static const char *
+listopt_store(struct listopt_ctx *ctx, const char *str)
+{
+ char *dup;
+ char **tmp;
+
+ if (str == NULL)
+ str = "";
+ dup = strdup(str);
+ if (dup == NULL)
+ return "";
+ if (ctx->count == ctx->cap) {
+ size_t newcap = ctx->cap ? ctx->cap * 2 : 8;
+ tmp = reallocarray(ctx->allocated, newcap, sizeof(*tmp));
+ if (tmp == NULL) {
+ free(dup);
+ return "";
+ }
+ ctx->allocated = tmp;
+ ctx->cap = newcap;
+ }
+ ctx->allocated[ctx->count++] = dup;
+ return dup;
+}
+
+/* Break down a single % conversion, recording printf modifiers and keyword. */
+static int
+listopt_parse_spec(const char *fmt, struct listopt_spec *spec,
+ const char **endp)
+{
+ const char *p = fmt;
+ enum { PARSE_FLAGS, PARSE_WIDTH, PARSE_PRECISION, PARSE_LENGTH } state;
+
+ if (*p != '%')
+ return 0;
+ memset(spec, 0, sizeof(*spec));
+ p++;
+ state = PARSE_FLAGS;
+ while (*p != '\0') {
+ if (*p == '(') {
+ const char *start = ++p;
+ size_t len;
+
+ while (*p != '\0' && *p != ')')
+ p++;
+ len = p - start;
+ if (len >= sizeof(spec->keyword))
+ len = sizeof(spec->keyword) - 1;
+ memcpy(spec->keyword, start, len);
+ spec->keyword[len] = '\0';
+ if (*p == ')')
+ p++;
+ state = PARSE_FLAGS;
+ continue;
+ }
+ switch (state) {
+ case PARSE_FLAGS:
+ if (strchr("-+ #0'", *p) != NULL) {
+ size_t fl = strlen(spec->flags);
+ if (fl + 1 < sizeof(spec->flags)) {
+ spec->flags[fl] = *p;
+ spec->flags[fl + 1] = '\0';
+ }
+ p++;
+ continue;
+ }
+ state = PARSE_WIDTH;
+ continue;
+ case PARSE_WIDTH:
+ if (isdigit((unsigned char)*p)) {
+ size_t wl = strlen(spec->width);
+ if (wl + 1 < sizeof(spec->width)) {
+ spec->width[wl] = *p;
+ spec->width[wl + 1] = '\0';
+ }
+ p++;
+ continue;
+ }
+ if (*p == '.') {
+ size_t pl = strlen(spec->precision);
+ if (pl + 1 < sizeof(spec->precision)) {
+ spec->precision[pl] = '.';
+ spec->precision[pl + 1] = '\0';
+ }
+ p++;
+ state = PARSE_PRECISION;
+ continue;
+ }
+ state = PARSE_LENGTH;
+ continue;
+ case PARSE_PRECISION:
+ if (isdigit((unsigned char)*p)) {
+ size_t pl = strlen(spec->precision);
+ if (pl + 1 < sizeof(spec->precision)) {
+ spec->precision[pl] = *p;
+ spec->precision[pl + 1] = '\0';
+ }
+ p++;
+ continue;
+ }
+ state = PARSE_LENGTH;
+ continue;
+ case PARSE_LENGTH:
+ if (strchr("hljztL", *p) != NULL) {
+ size_t ll = strlen(spec->length);
+ if (ll + 1 < sizeof(spec->length)) {
+ spec->length[ll] = *p;
+ spec->length[ll + 1] = '\0';
+ }
+ p++;
+ /* Support double h/l modifiers */
+ if ((spec->length[0] == 'h' || spec->length[0] == 'l') &&
+ spec->length[1] == '\0' && (*p == spec->length[0])) {
+ if (strlen(spec->length) + 1 < sizeof(spec->length)) {
+ size_t l2 = strlen(spec->length);
+ spec->length[l2] = *p;
+ spec->length[l2 + 1] = '\0';
+ }
+ p++;
+ }
+ continue;
+ }
+ spec->conv = *p++;
+ goto done;
+ }
+ }
+ done:
+ if (spec->conv == '\0')
+ return -1;
+ if (spec->keyword[0] != '\0' && spec->conv == 'T') {
+ char *eq = strchr(spec->keyword, '=');
+ if (eq != NULL) {
+ strlcpy(spec->subfmt, eq + 1, sizeof(spec->subfmt));
+ *eq = '\0';
+ }
+ }
+ *endp = p;
+ return 1;
+}
+
+/* Retrieve a keyword value as a string, allocating stable storage as needed. */
+static const char *
+listopt_keyword_string(struct listopt_ctx *ctx, ARCHD *arcn,
+ const char *keyword)
+{
+ const char *val;
+ char *dup;
+
+ if (keyword == NULL || *keyword == '\0' ||
+ strcmp(keyword, "path") == 0)
+ return arcn->name;
+ if (strcmp(keyword, "linkpath") == 0)
+ return arcn->ln_name;
+ if (strcmp(keyword, "uname") == 0) {
+ val = user_from_uid(arcn->sb.st_uid, 0);
+ return val ? val : "";
+ }
+ if (strcmp(keyword, "gname") == 0) {
+ val = group_from_gid(arcn->sb.st_gid, 0);
+ return val ? val : "";
+ }
+ if (strcmp(keyword, "name") == 0) {
+ if ((dup = strdup(arcn->name)) == NULL)
+ return "";
+ val = listopt_store(ctx, basename(dup));
+ free(dup);
+ return val;
+ }
+ if (strcmp(keyword, "dirname") == 0) {
+ if ((dup = strdup(arcn->name)) == NULL)
+ return "";
+ val = listopt_store(ctx, dirname(dup));
+ free(dup);
+ return val;
+ }
+ val = pax_kv_lookup(arcn, keyword);
+ return val ? val : "";
+}
+
+/* Interpret a keyword as a timespec, falling back to archive defaults. */
+static int
+listopt_keyword_time(struct listopt_ctx *ctx, ARCHD *arcn,
+ const char *keyword, struct timespec *ts)
+{
+ const char *val;
+ char *end;
+
+ if (keyword == NULL || *keyword == '\0' ||
+ strcmp(keyword, "mtime") == 0) {
+ *ts = arcn->sb.st_mtim;
+ return 0;
+ }
+ if (strcmp(keyword, "atime") == 0) {
+ *ts = arcn->sb.st_atim;
+ return 0;
+ }
+ if (strcmp(keyword, "ctime") == 0) {
+ *ts = arcn->sb.st_ctim;
+ return 0;
+ }
+ val = pax_kv_lookup(arcn, keyword);
+ if (val == NULL || *val == '\0')
+ return -1;
+ ts->tv_sec = strtoll(val, &end, 10);
+ ts->tv_nsec = 0;
+ if (end == val)
+ return -1;
+ if (*end == '.') {
+ long nsec = 0;
+ int digits = 0;
+ for (end++; *end && isdigit((unsigned char)*end) && digits < 9;
+ end++, digits++)
+ nsec = nsec * 10 + (*end - '0');
+ for (; digits < 9; digits++)
+ nsec *= 10;
+ ts->tv_nsec = nsec;
+ }
+ return 0;
+}
+
+/* Parse signed numeric keywords, allowing overrides from extended headers. */
+static int
+listopt_keyword_sll(ARCHD *arcn, const char *keyword, long long *out)
+{
+ const char *val;
+ char *end;
+
+ if (keyword == NULL)
+ return -1;
+ if (strcmp(keyword, "uid") == 0) {
+ *out = arcn->sb.st_uid;
+ return 0;
+ }
+ if (strcmp(keyword, "gid") == 0) {
+ *out = arcn->sb.st_gid;
+ return 0;
+ }
+ if (strcmp(keyword, "nlink") == 0) {
+ *out = arcn->sb.st_nlink;
+ return 0;
+ }
+ if (strcmp(keyword, "mode") == 0) {
+ *out = arcn->sb.st_mode;
+ return 0;
+ }
+ val = pax_kv_lookup(arcn, keyword);
+ if (val == NULL)
+ return -1;
+ *out = strtoll(val, &end, 10);
+ if (end == val)
+ return -1;
+ return 0;
+}
+
+/* Parse unsigned numeric keywords, falling back to header values. */
+static int
+listopt_keyword_ull(ARCHD *arcn, const char *keyword,
+ unsigned long long *out)
+{
+ const char *val;
+ char *end;
+
+ if (keyword == NULL)
+ return -1;
+ if (strcmp(keyword, "size") == 0) {
+ *out = arcn->sb.st_size;
+ return 0;
+ }
+ if (strcmp(keyword, "devmajor") == 0) {
+ *out = MAJOR(arcn->sb.st_rdev);
+ return 0;
+ }
+ if (strcmp(keyword, "devminor") == 0) {
+ *out = MINOR(arcn->sb.st_rdev);
+ return 0;
+ }
+ val = pax_kv_lookup(arcn, keyword);
+ if (val == NULL)
+ return -1;
+ *out = strtoull(val, &end, 10);
+ if (end == val)
+ return -1;
+ return 0;
+}
+
/*
* constants used by ls_list() when printing out archive members
*/
@@ -82,6 +412,13 @@
term = zeroflag ? '\0' : '\n'; /* path termination character */
+ if (vflag && listopt_get() != NULL) {
+ listopt_output(arcn, fp);
+ (void)fputc(term, fp);
+ (void)fflush(fp);
+ return;
+ }
+
/*
* if not verbose, just print the file name
*/
@@ -183,6 +520,248 @@
} else {
(void)fputs(str, fp);
}
+}
+
+/* Append a new fragment to the aggregated custom listopt format string. */
+int
+listopt_append(const char *chunk)
+{
+ char *tmp;
+ size_t add;
+
+ if (chunk == NULL)
+ return 0;
+ add = strlen(chunk);
+ if (add == 0)
+ return 0;
+ if (SIZE_MAX - listopt_len <= add)
+ return -1;
+ tmp = realloc(listopt_format, listopt_len + add + 1);
+ if (tmp == NULL)
+ return -1;
+ listopt_format = tmp;
+ memcpy(listopt_format + listopt_len, chunk, add);
+ listopt_len += add;
+ listopt_format[listopt_len] = '\0';
+ return 0;
+}
+
+const char *
+listopt_get(void)
+{
+ return listopt_format;
+}
+
+/* Reset cached list formatting between separate pax invocations. */
+void
+listopt_reset(void)
+{
+ free(listopt_format);
+ listopt_format = NULL;
+ listopt_len = 0;
+}
+
+/* Emit a single verbose listing line obeying the custom listopt format. */
+static void
+listopt_output(ARCHD *arcn, FILE *fp)
+{
+ const char *fmt = listopt_get();
+ struct listopt_ctx ctx;
+ struct listopt_spec spec;
+ const char *next;
+ char fmtbuf[64];
+ char outbuf[PATH_MAX * 2];
+
+ if (fmt == NULL || *fmt == '\0')
+ return;
+ listopt_ctx_init(&ctx);
+ while (*fmt != '\0') {
+ if (*fmt != '%') {
+ (void)fputc(*fmt++, fp);
+ continue;
+ }
+ if (fmt[1] == '%') {
+ fmt += 2;
+ (void)fputc('%', fp);
+ continue;
+ }
+ if (listopt_parse_spec(fmt, &spec, &next) <= 0) {
+ (void)fputc(*fmt++, fp);
+ continue;
+ }
+ fmt = next;
+ switch (spec.conv) {
+ case 's':
+ {
+ const char *str = listopt_keyword_string(&ctx, arcn,
+ spec.keyword[0] ? spec.keyword : "path");
+ snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s",
+ spec.flags, spec.width, spec.precision, "s");
+ (void)fprintf(fp, fmtbuf, str);
+ break;
+ }
+ case 'c':
+ {
+ const char *str = listopt_keyword_string(&ctx, arcn,
+ spec.keyword[0] ? spec.keyword : "path");
+ char ch = (str && *str) ? *str : ' ';
+ snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%c",
+ spec.flags, spec.width, spec.precision, 'c');
+ (void)fprintf(fp, fmtbuf, ch);
+ break;
+ }
+ case 'd':
+ case 'i':
+ {
+ long long val = 0;
+ if (listopt_keyword_sll(arcn, spec.keyword, &val) != 0)
+ val = 0;
+ const char *length = "ll";
+ snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s%c",
+ spec.flags, spec.width, spec.precision, length, spec.conv);
+ (void)fprintf(fp, fmtbuf, val);
+ break;
+ }
+ case 'o':
+ case 'u':
+ case 'x':
+ case 'X':
+ {
+ unsigned long long val = 0;
+ if (listopt_keyword_ull(arcn, spec.keyword, &val) != 0)
+ val = 0;
+ const char *length = "ll";
+ snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s%c",
+ spec.flags, spec.width, spec.precision, length, spec.conv);
+ (void)fprintf(fp, fmtbuf, val);
+ break;
+ }
+ case 'T':
+ {
+ struct timespec ts;
+ struct tm tm;
+ const char *key = spec.keyword[0] ? spec.keyword : "mtime";
+ const char *tfmt = spec.subfmt[0] ? spec.subfmt :
+ "%b %e %H:%M %Y";
+ if (listopt_keyword_time(&ctx, arcn, key, &ts) == 0 &&
+ localtime_r(&ts.tv_sec, &tm) != NULL &&
+ strftime(outbuf, sizeof(outbuf), tfmt, &tm) > 0) {
+ snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s",
+ spec.flags, spec.width, spec.precision, "s");
+ (void)fprintf(fp, fmtbuf, outbuf);
+ }
+ break;
+ }
+ case 'M':
+ {
+ char modebuf[12];
+ strmode(arcn->sb.st_mode, modebuf);
+ snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s",
+ spec.flags, spec.width, spec.precision, "s");
+ (void)fprintf(fp, fmtbuf, modebuf);
+ break;
+ }
+ case 'D':
+ {
+ const char *use = NULL;
+ if (S_ISCHR(arcn->sb.st_mode) || S_ISBLK(arcn->sb.st_mode)) {
+ snprintf(outbuf, sizeof(outbuf), "%lu,%lu",
+ (u_long)MAJOR(arcn->sb.st_rdev),
+ (u_long)MINOR(arcn->sb.st_rdev));
+ use = outbuf;
+ } else if (spec.keyword[0]) {
+ unsigned long long val = 0;
+ if (listopt_keyword_ull(arcn, spec.keyword, &val) == 0) {
+ snprintf(outbuf, sizeof(outbuf), "%llu", val);
+ use = outbuf;
+ }
+ }
+ if (use == NULL)
+ use = "";
+ snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s",
+ spec.flags, spec.width, spec.precision, "s");
+ (void)fprintf(fp, fmtbuf, use);
+ break;
+ }
+ case 'F':
+ {
+ const char *out = NULL;
+ if (!spec.keyword[0])
+ out = arcn->name;
+ else {
+ char *tmp = strdup(spec.keyword);
+ char *save = tmp;
+ outbuf[0] = '\0';
+ if (tmp != NULL) {
+ char *token;
+ int first = 1;
+ while ((token = strsep(&tmp, ",")) != NULL) {
+ const char *part =
+ listopt_keyword_string(&ctx, arcn, token);
+ if (!first)
+ strlcat(outbuf, "/",
+ sizeof(outbuf));
+ strlcat(outbuf, part, sizeof(outbuf));
+ first = 0;
+ }
+ free(save);
+ out = outbuf;
+ }
+ }
+ snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s",
+ spec.flags, spec.width, spec.precision, "s");
+ (void)fprintf(fp, fmtbuf, out ? out : "");
+ break;
+ }
+ case 'L':
+ {
+ if (arcn->type == PAX_SLK)
+ snprintf(outbuf, sizeof(outbuf), "%s -> %s",
+ arcn->name, arcn->ln_name);
+ else
+ strlcpy(outbuf, arcn->name, sizeof(outbuf));
+ snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s",
+ spec.flags, spec.width, spec.precision, "s");
+ (void)fprintf(fp, fmtbuf, outbuf);
+ break;
+ }
+ default:
+ (void)fputc(spec.conv, fp);
+ break;
+ }
+ }
+ listopt_ctx_free(&ctx);
+}
+
+void
+pax_kv_free(PAXKEY **head)
+{
+ PAXKEY *cur;
+
+ if (head == NULL)
+ return;
+ while ((cur = *head) != NULL) {
+ *head = cur->next;
+ free(cur->name);
+ free(cur->value);
+ free(cur);
+ }
+}
+
+const char *
+pax_kv_lookup(const ARCHD *arcn, const char *key)
+{
+ const PAXKEY *kv;
+
+ if (arcn == NULL || key == NULL)
+ return NULL;
+ for (kv = arcn->xattr; kv != NULL; kv = kv->next)
+ if (strcmp(kv->name, key) == 0)
+ return kv->value;
+ for (kv = arcn->gattr; kv != NULL; kv = kv->next)
+ if (strcmp(kv->name, key) == 0)
+ return kv->value;
+ return NULL;
}
/*
Index: bin/pax/options.c
===================================================================
RCS file: /cvs/src/bin/pax/options.c,v
diff -u -r1.115 options.c
--- bin/pax/options.c 10 May 2024 20:28:31 -0000 1.115
+++ bin/pax/options.c 12 Oct 2025 07:53:49 -0000
@@ -36,6 +36,7 @@
#include <sys/types.h>
#include <sys/stat.h>
+#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <paths.h>
@@ -51,6 +52,7 @@
static int bad_opt(void);
static int opt_add(const char *);
+static char *opt_parse_value(const char **, int);
/*
* argv[0] names. Used for tar and cpio emulation
*/
@@ -139,6 +141,8 @@
static void printflg(unsigned int);
static off_t str_offt(char *);
static char *get_line(FILE *fp);
+static char *opt_parse_value(const char **, int);
+static void opt_common(void);
static void pax_options(int, char **);
static void pax_usage(void);
static void tar_options(int, char **);
@@ -683,6 +687,8 @@
if (!(flg & XF) && (act == ARCHIVE))
frmt = &(fsub[DEFLT]);
+ opt_common();
+
/*
* process the args as they are interpreted by the operation mode
*/
@@ -984,6 +990,8 @@
if ((act == ARCHIVE || act == APPND) && argc == 0 && nincfiles == 0)
exit(0);
+ opt_common();
+
/*
* process the args as they are interpreted by the operation mode
*/
@@ -1408,6 +1416,8 @@
argc -= optind;
argv += optind;
+ opt_common();
+
/*
* process the args as they are interpreted by the operation mode
*/
@@ -1522,6 +1532,54 @@
return(0);
}
+static char *
+opt_parse_value(const char **srcp, int consume_rest)
+{
+ const char *src;
+ char *buf, *tmp;
+ size_t len, cap;
+
+ src = *srcp;
+ cap = 64;
+ len = 0;
+ if ((buf = malloc(cap)) == NULL)
+ return NULL;
+
+ while (*src != '\0') {
+ if (*src == '\\' && src[1] != '\0') {
+ src++;
+ if (len + 1 >= cap) {
+ tmp = realloc(buf, cap * 2);
+ if (tmp == NULL) {
+ free(buf);
+ return NULL;
+ }
+ buf = tmp;
+ cap *= 2;
+ }
+ buf[len++] = *src++;
+ continue;
+ }
+ if (!consume_rest && *src == ',')
+ break;
+ if (len + 1 >= cap) {
+ tmp = realloc(buf, cap * 2);
+ if (tmp == NULL) {
+ free(buf);
+ return NULL;
+ }
+ buf = tmp;
+ cap *= 2;
+ }
+ buf[len++] = *src++;
+ }
+ buf[len] = '\0';
+ while (isspace((unsigned char)*src))
+ src++;
+ *srcp = src;
+ return buf;
+}
+
/*
* opt_add()
* breaks the value supplied to -o into a option name and value. options
@@ -1534,58 +1592,130 @@
static int
opt_add(const char *str)
{
+ const char *src;
OPLIST *opt;
- char *frpt;
- char *pt;
- char *endpt;
- char *dstr;
+ char *name, *value;
+ int assign;
- if ((str == NULL) || (*str == '\0')) {
- paxwarn(0, "Invalid option name");
- return(-1);
+ if (str == NULL || *str == '\0') {
+ paxwarn(0, "Invalid option string");
+ return (-1);
}
- if ((dstr = strdup(str)) == NULL) {
- paxwarn(0, "Unable to allocate space for option list");
- return(-1);
- }
- frpt = endpt = dstr;
- /*
- * break into name and values pieces and stuff each one into a
- * OPLIST structure. When we know the format, the format specific
- * option function will go through this list
- */
- while ((frpt != NULL) && (*frpt != '\0')) {
- if ((endpt = strchr(frpt, ',')) != NULL)
- *endpt = '\0';
- if ((pt = strchr(frpt, '=')) == NULL) {
- paxwarn(0, "Invalid options format");
- free(dstr);
- return(-1);
+ src = str;
+ while (*src != '\0') {
+ while (isspace((unsigned char)*src))
+ src++;
+ if (*src == '\0')
+ break;
+ if (*src == ',') {
+ src++;
+ continue;
}
- if ((opt = malloc(sizeof(OPLIST))) == NULL) {
- paxwarn(0, "Unable to allocate space for option list");
- free(dstr);
- return(-1);
+
+ const char *key_start = src;
+ while (*src != '\0' && *src != '=' && *src != ':' &&
+ *src != ',' && !isspace((unsigned char)*src))
+ src++;
+ const char *key_end = src;
+ while (key_end > key_start &&
+ isspace((unsigned char)key_end[-1]))
+ key_end--;
+ if (key_end == key_start) {
+ paxwarn(0, "Invalid option name");
+ return (-1);
}
- dstr = NULL; /* parts of string going onto the OPLIST */
- *pt++ = '\0';
- opt->name = frpt;
- opt->value = pt;
+ if ((name = strndup(key_start, key_end - key_start)) == NULL)
+ return (-1);
+
+ while (isspace((unsigned char)*src))
+ src++;
+ assign = OPT_ASSIGN_NONE;
+ if (*src == ':' && src[1] == '=') {
+ assign = OPT_ASSIGN_COLON;
+ src += 2;
+ } else if (*src == '=') {
+ assign = OPT_ASSIGN_EQ;
+ src++;
+ } else if (*src == ':') {
+ assign = OPT_ASSIGN_EQ;
+ src++;
+ }
+ while (isspace((unsigned char)*src))
+ src++;
+
+ if (assign != OPT_ASSIGN_NONE) {
+ int consume_rest = (strcmp(name, "listopt") == 0);
+ value = opt_parse_value(&src, consume_rest);
+ if (value == NULL) {
+ free(name);
+ return (-1);
+ }
+ if (!consume_rest && *src == ',')
+ src++;
+ } else {
+ value = strdup("");
+ if (value == NULL) {
+ free(name);
+ return (-1);
+ }
+ if (*src == ',')
+ src++;
+ }
+ while (isspace((unsigned char)*src))
+ src++;
+
+ if ((opt = malloc(sizeof(*opt))) == NULL) {
+ free(name);
+ free(value);
+ return (-1);
+ }
+ opt->name = name;
+ opt->value = value;
+ opt->assign = assign;
+ opt->handled = 0;
opt->fow = NULL;
- if (endpt != NULL)
- frpt = endpt + 1;
- else
- frpt = NULL;
- if (ophead == NULL) {
+ if (ophead == NULL)
optail = ophead = opt;
+ else {
+ optail->fow = opt;
+ optail = opt;
+ }
+ if (assign != OPT_ASSIGN_NONE && strcmp(name, "listopt") == 0)
+ break;
+ }
+ return 0;
+}
+
+static void
+opt_common(void)
+{
+ OPLIST **prev, *opt, *next;
+
+ prev = &ophead;
+ while ((opt = *prev) != NULL) {
+ next = opt->fow;
+ if (strcmp(opt->name, "listopt") == 0) {
+ if (listopt_append(opt->value) < 0) {
+ paxwarn(1, "Unable to record listopt format");
+ pax_usage();
+ }
+ *prev = next;
+ free(opt->name);
+ free(opt->value);
+ free(opt);
continue;
}
- optail->fow = opt;
- optail = opt;
+ prev = &opt->fow;
}
- free(dstr);
- return(0);
+
+ optail = ophead;
+ if (optail != NULL) {
+ while (optail->fow != NULL)
+ optail = optail->fow;
+ }
+ /* Global keyword state is per-invocation, reset before parsing formats. */
+ pax_option_reset_session();
}
/*
Index: bin/pax/pax.h
===================================================================
RCS file: /cvs/src/bin/pax/pax.h,v
diff -u -r1.29 pax.h
--- bin/pax/pax.h 12 Sep 2017 17:11:11 -0000 1.29
+++ bin/pax/pax.h 12 Oct 2025 07:54:08 -0000
@@ -98,6 +98,41 @@
* may be required if and when the supporting operating system removes all
* restrictions on the length of pathnames it will resolve.
*/
+/*
+ * Linked list entry used to retain pax extended header keywords.
+ */
+typedef struct paxkey {
+ char *name;
+ char *value;
+ struct paxkey *next;
+} PAXKEY;
+
+/* Records command-line supplied pax keywords and their assignment style. */
+typedef struct paxopkv {
+ char *name;
+ char *value;
+ int assign;
+ struct paxopkv *next;
+} PAXOPKV;
+
+/* Tracks delete= pattern expressions supplied via -o options. */
+typedef struct paxdel {
+ char *pattern;
+ struct paxdel *next;
+} PAXDEL;
+
+enum pax_invalid_action {
+ PAX_INVALID_BYPASS,
+ PAX_INVALID_WRITE,
+ PAX_INVALID_RENAME,
+ PAX_INVALID_UTF8,
+ PAX_INVALID_BINARY
+};
+
+#define PAX_INVALID_NONE 0
+#define PAX_INVALID_SKIP 1
+#define PAX_INVALID_RENAME 2
+
typedef struct {
int nlen; /* file name length */
char name[PAXPATHLEN+1]; /* file name */
@@ -125,6 +160,9 @@
#define PAX_CTG 10 /* high performance file */
#define PAX_GLL 11 /* GNU long symlink */
#define PAX_GLF 12 /* GNU long file */
+ PAXKEY *xattr; /* file specific pax keywords */
+ const PAXKEY *gattr; /* global pax keywords in effect */
+ int invalid; /* invalid handling state */
} ARCHD;
#define PAX_IS_REG(type) ((type) == PAX_REG || (type) == PAX_CTG)
@@ -236,8 +274,14 @@
typedef struct oplist {
char *name; /* option variable name e.g. name= */
char *value; /* value for option variable */
+ int assign; /* assignment type: '=' or ':=' */
+ int handled; /* option consumed by generic parser */
struct oplist *fow; /* next option */
} OPLIST;
+
+#define OPT_ASSIGN_NONE 0
+#define OPT_ASSIGN_EQ 1
+#define OPT_ASSIGN_COLON 2
/*
* General Macros
Index: bin/pax/ar_subs.c
===================================================================
RCS file: /cvs/src/bin/pax/ar_subs.c,v
diff -u -r1.53 ar_subs.c
--- bin/pax/ar_subs.c 14 Jul 2024 14:32:02 -0000 1.53
+++ bin/pax/ar_subs.c 12 Oct 2025 07:54:16 -0000
@@ -92,6 +92,12 @@
* step through the archive until the format says it is done
*/
while (next_head(arcn) == 0) {
+ /* Skip archive members rejected by invalid= policy. */
+ if (arcn->invalid == PAX_INVALID_SKIP) {
+ if (rd_skip(arcn->skip + arcn->pad) == 1)
+ break;
+ continue;
+ }
if (arcn->type == PAX_GLL || arcn->type == PAX_GLF) {
/*
* we need to read, to get the real filename
@@ -244,6 +250,12 @@
* says it is done
*/
while (next_head(arcn) == 0) {
+ /* Honor invalid=bypass by skipping unwanted members outright. */
+ if (arcn->invalid == PAX_INVALID_SKIP) {
+ if (rd_skip(arcn->skip + arcn->pad) == 1)
+ break;
+ continue;
+ }
if (arcn->type == PAX_GLL || arcn->type == PAX_GLF) {
/*
* we need to read, to get the real filename
@@ -685,6 +697,12 @@
* step through the archive until the format says it is done
*/
while (next_head(arcn) == 0) {
+ /* Entries flagged for bypass are consumed without further work. */
+ if (arcn->invalid == PAX_INVALID_SKIP) {
+ if (rd_skip(arcn->skip + arcn->pad) == 1)
+ break;
+ continue;
+ }
/*
* check if this file meets user specified options.
*/
@@ -1024,6 +1042,14 @@
int in_resync = 0; /* set when we are in resync mode */
int cnt = 0; /* counter for trailer function */
int first = 1; /* on 1st read, EOF isn't premature. */
+
+ /*
+ * Clear out any per-file extended header state left from the
+ * previous archive member before we reuse the structure.
+ */
+ pax_kv_free(&arcn->xattr);
+ arcn->gattr = NULL;
+ arcn->invalid = PAX_INVALID_NONE;
/*
* set up initial conditions, we want a whole frmt->hsz block as we
Index: bin/pax/extern.h
===================================================================
RCS file: /cvs/src/bin/pax/extern.h,v
diff -u -r1.64 extern.h
--- bin/pax/extern.h 17 Apr 2024 18:12:12 -0000 1.64
+++ bin/pax/extern.h 12 Oct 2025 07:54:24 -0000
@@ -152,11 +152,34 @@
void ls_list(ARCHD *, time_t, FILE *);
void ls_tty(ARCHD *);
void safe_print(const char *, FILE *);
+int listopt_append(const char *);
+const char *listopt_get(void);
+void listopt_reset(void);
+void pax_option_set_linkdata(int);
+void pax_option_set_times(int);
+int pax_option_add_delete(const char *);
+int pax_option_set_invalid(const char *);
+int pax_option_set_exthdr_name(const char *);
+int pax_option_set_globexthdr_name(const char *);
+int pax_option_add_keyword(const char *, const char *, int);
+const PAXDEL *pax_option_delete(void);
+const PAXOPKV *pax_option_keywords(int);
+int pax_option_linkdata(void);
+int pax_option_times(void);
+const char *pax_option_exthdr_name(void);
+const char *pax_option_globexthdr_name(void);
+enum pax_invalid_action pax_option_invalid(void);
+void pax_option_reset_session(void);
u_long asc_ul(char *, int, int);
+int pax_handle_invalid_path(ARCHD *, const char *, const char *);
+int pax_handle_invalid_link(ARCHD *, const char *, const char *);
+void pax_mark_skip(ARCHD *);
int ul_asc(u_long, char *, int, int);
unsigned long long asc_ull(char *, int, int);
int ull_asc(unsigned long long, char *, int, int);
size_t fieldcpy(char *, size_t, const char *, size_t);
+void pax_kv_free(PAXKEY **);
+const char *pax_kv_lookup(const ARCHD *, const char *);
/*
* getoldopt.c
Index: bin/pax/pax.1
===================================================================
RCS file: /cvs/src/bin/pax/pax.1,v
diff -u -r1.80 pax.1
--- bin/pax/pax.1 30 Nov 2024 06:59:12 -0000 1.80
+++ bin/pax/pax.1 12 Oct 2025 07:54:32 -0000
@@ -453,8 +453,13 @@
.Fl x .
In general,
.Ar options
-take the form:
-.Ar name Ns = Ns Ar value .
+take the form
+.Ar name Ns = Ns Ar value
+or
+.Ar name Ns := Ns Ar value .
+Multiple keywords can be separated with commas.
+Backslash can be used to escape a literal comma or backslash inside a value.
+When the same keyword appears more than once, the last value wins.
.Pp
The following options are available for the
.Cm ustar
@@ -467,6 +472,74 @@
.It Cm write_opt=nodir
When writing archives, omit the storage of directories.
.El
+.Pp
+When the selected archive format is
+.Cm pax ,
+the following keywords are also understood:
+.Bl -tag -width "globexthdr.name=string"
+.It Cm delete Ns = Ns Ar pattern
+Suppress extended header keywords whose name matches
+.Ar pattern .
+.It Cm exthdr.name Ns = Ns Ar string
+Replace the default name used to store per-file extended attributes.
+The template may include
+.Ql %d
+(directory portion of the path),
+.Ql %f
+(final pathname component),
+.Ql %p
+(process ID), and
+.Ql %%
+for a literal percent sign.
+.It Cm globexthdr.name Ns = Ns Ar string
+Select the name written for
+.Ql typeflag g
+global extended headers.
+The template may include
+.Ql %n
+(the sequence number starting at 1),
+.Ql %p ,
+and
+.Ql %% .
+.It Cm invalid Ns = Ns Ar action
+Choose how pathnames that cannot be represented locally are handled when
+reading archives.
+The
+.Ar action
+may be one of
+.Cm bypass ,
+.Cm write ,
+.Cm rename ,
+.Cm UTF-8 ,
+or
+.Cm binary .
+.It Cm linkdata
+Always store the file data for hard links instead of sharing the contents
+with another archive member.
+.It Cm listopt Ns = Ns Ar format
+Customise the verbose listing produced in list mode with
+.Fl v .
+See
+.Sx List Output Formatting
+for the supported conversions.
+Multiple
+.Fl o Cm listopt
+options are concatenated in order.
+.It Cm times
+Force the emission of
+.Ql atime
+and
+.Ql mtime
+extended header records for each file when writing or copying.
+.It Ar keyword Ns = Ns Ar value
+Write
+.Ar keyword
+as a global extended header record when archiving, or override the value
+read from the archive when extracting.
+.It Ar keyword Ns := Ns Ar value
+Write a per-file extended header record with the specified value, or override
+the value for the current file when reading.
+.El
.It Fl P
Do not follow symbolic links, perform a physical file system traversal.
This is the default mode.
@@ -789,6 +862,63 @@
archive member.
The trailing newline
is not buffered and is written only after the file has been read or written.
+.Ss List Output Formatting
+The
+.Fl o Cm listopt Ns = Ns Ar format
+keyword alters the verbose list produced with
+.Fl v
+in list mode.
+The format string follows the rules of
+.Xr printf 3
+with the addition that conversions may be prefixed by
+.Pq Em keyword
+to select the field that supplies the argument.
+Unless noted below the standard flags, field width and precision are honoured.
+Keywords refer to archive header fields, including any values supplied by pax
+extended headers or by explicit
+.Fl o
+options.
+.Pp
+The following conversions extend the behaviour of
+.Xr printf 3 :
+.Bl -tag -width Ds
+.It %s , %c , %d , %i , %o , %u , %x , %X
+Use the selected keyword (defaulting to
+.Ql path )
+and print it with the requested conversion.
+.It %T
+Formats a time value.
+If the keyword is omitted
+.Ql mtime
+is used.
+A keyword of the form
+.Ql keyword=subformat
+selects the time field and the
+.Xr strftime 3
+format string to apply (default:
+.Ql %b %e %H:%M %Y ) .
+.It %M
+Prints the symbolic file mode string as produced by
+.Fn strmode
+.It %D
+Prints the device numbers for block or character special files
+as two comma-separated integers.
+If a keyword is supplied the numeric value associated with that keyword
+is printed instead.
+.It %F
+Prints a pathname composed from a comma-separated list of keywords.
+Missing components are skipped.
+When no list is supplied the stored pathname is used.
+.It %L
+Prints a symbolic link in the form
+.Dq path -> target .
+Non-links fall back to
+.Ql %F .
+.El
+.Pp
+Multiple
+.Fl o Cm listopt
+options append to the overall format string in the order they appear.
.It Fl w
Write files to the standard output
in the specified archive format.
@@ -1080,14 +1210,8 @@
The
.Nm
utility is compliant with the
-.St -p1003.1-2008
-specification,
-except that the
-.Cm pax
-archive format is only partially supported,
-and the
-.Cm listopt
-keyword is unsupported.
+.St -p1003.1-2024 ,
+specification.
.Pp
The flags
.Op Fl 0BDEGjOPTUYZz ,
pax(1): new -o keyword framework, listopt, global exthdrs, and stricter invalid-path handling