Index | Thread | Search

From:
David Uhden Collado <daviduhden@gmail.com>
Subject:
pax(1): new -o keyword framework, listopt, global exthdrs, and stricter invalid-path handling
To:
tech@openbsd.org
Date:
Sun, 12 Oct 2025 10:28:35 +0200

Download raw body.

Thread
Hello everyone,

I have attached a patch to this email that modernizes pax/tar to match 
POSIX.1-2024 and adds a comprehensive -o keyword framework. It supports 
global and per-file pax keywords (KEY=VALUE and KEY:=VALUE), deletion 
filters (delete=pattern), configurable names for extended-header files 
(exthdr.name= and globexthdr.name= with % placeholders), and new toggles 
like linkdata (store hard-link contents) and times (always emit 
atime/mtime). The -o parser is rebuilt to handle "=", ":=", 
comma-separated lists, and backslash escaping, with last-value-wins 
semantics. When global keywords are present, a single global pax 
extended header (typeflag 'g') is written once per archive.

On read, a new invalid path/link policy is introduced via 
invalid=bypass|write|rename|utf-8|binary; entries marked for bypass are 
drained cleanly, and rename is requested where applicable. On write, 
hard links can optionally carry data, and timestamp emission is more 
consistent. A new listopt=FORMAT feature customizes verbose listings 
(-v) using printf-like conversions with pax-aware fields, including %T 
(strftime time), %M (symbolic mode), %D (device numbers or numeric 
keyword), %F (composed path), and %L ("path -> linktarget"). The man 
page documents all new options and claims POSIX.1-2024 compliance. 
Internally, the change adds keyword/storage structs (PAXKEY, PAXOPKV, 
PAXDEL), extends ARCHD with xattr/gattr/invalid, enhances wr_xheader() 
for global headers and naming templates, resets per-member state between 
entries and includes small correctness/robustness fixes.

Best regards,
David.
Index: bin/pax/tar.c
===================================================================
RCS file: /cvs/src/bin/pax/tar.c,v
diff -u -r1.87 tar.c
--- bin/pax/tar.c	6 Jul 2025 19:25:51 -0000	1.87
+++ bin/pax/tar.c	12 Oct 2025 07:53:27 -0000
@@ -39,6 +39,7 @@
 #include <sys/stat.h>
 #include <ctype.h>
 #include <errno.h>
+#include <fnmatch.h>
 #include <grp.h>
 #include <libgen.h>
 #include <limits.h>
@@ -46,6 +47,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <strings.h>
 #include <unistd.h>
 
 #include "pax.h"
@@ -59,6 +61,22 @@
 	char				*record;
 };
 
+/* Command-line controlled pax state shared across parsing and I/O. */
+static PAXKEY *pax_global_xattr;
+static int pax_opt_linkdata;
+static int pax_opt_times;
+static enum pax_invalid_action pax_opt_invalid = PAX_INVALID_BYPASS;
+static char *pax_opt_exthdr_name;
+static char *pax_opt_globexthdr_name;
+static PAXDEL *pax_opt_delete_list;
+static PAXOPKV *pax_opt_keywords_global;
+static PAXOPKV *pax_opt_keywords_local;
+static int pax_keywords_prepared;
+#ifndef SMALL
+static int pax_global_written;
+static unsigned int pax_global_seq = 1;
+#endif
+
 /* shortest possible extended record: "5 a=\n" */
 #define MINXHDRSZ	5
 
@@ -73,8 +91,19 @@
 static int ull_oct(unsigned long long, char *, int, int);
 static int rd_xheader(ARCHD *, int, off_t);
 #ifndef SMALL
-static int wr_xheader(char *, HD_USTAR *, struct xheader *);
+static int wr_xheader(const char *, HD_USTAR *, struct xheader *, int,
+    const char *, unsigned int);
 #endif
+static int pax_store_kv(PAXKEY **, const char *, const char *);
+static void pax_apply_global(ARCHD *);
+static void pax_global_free(void);
+static int pax_keyword_deleted(const char *);
+static void pax_prepare_user_keywords(void);
+static void pax_apply_local_option_keywords(ARCHD *);
+static int pax_format_xhdr_name(char *, size_t, const char *, const char *,
+    unsigned int);
+static void pax_option_apply_local_xhdr(struct xheader *);
+static int pax_write_global_header(void);
 
 static uid_t uid_nobody;
 static uid_t uid_warn;
@@ -112,6 +141,7 @@
 off_t
 tar_endrd(void)
 {
+	pax_global_free();
 	return NULLCNT * BLKMULT;
 }
 
@@ -656,7 +686,7 @@
 	 * to be written
 	 */
 	if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum,
-	    sizeof(hd->chksum), 3))
+	   sizeof(hd->chksum), 3))
 		goto out;
 	if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0 ||
 	    wr_skip(BLKMULT - sizeof(HD_TAR)) < 0) {
@@ -735,8 +765,11 @@
 	if (ustar_id(buf, BLKMULT) < 0)
 		return(-1);
 
+	pax_prepare_user_keywords();
+
 reset:
 	memset(arcn, 0, sizeof(*arcn));
+	pax_apply_global(arcn);
 	arcn->org_name = arcn->name;
 	arcn->sb.st_nlink = 1;
 	arcn->sb.st_size = (off_t)-1;
@@ -881,6 +914,10 @@
 			 */
 			arcn->sb.st_mode |= S_IFREG;
 			arcn->sb.st_nlink = 2;
+			if (arcn->sb.st_size > 0) {
+				arcn->pad = TAR_PAD(arcn->sb.st_size);
+				arcn->skip = arcn->sb.st_size;
+			}
 		}
 		break;
 	case LONGLINKTYPE:
@@ -908,6 +945,8 @@
 		arcn->sb.st_mode |= S_IFREG;
 		break;
 	}
+
+	pax_apply_local_option_keywords(arcn);
 	return(0);
 }
 
@@ -920,6 +959,9 @@
 	int reclen, tmplen;
 	char *s;
 
+	if (pax_keyword_deleted(keyword))
+		return 0;
+
 	tmplen = MINXHDRSZ;
 	do {
 		reclen = tmplen;
@@ -952,6 +994,9 @@
 	int reclen, tmplen;
 	char *s;
 
+	if (pax_keyword_deleted(keyword))
+		return 0;
+
 	tmplen = MINXHDRSZ;
 	do {
 		reclen = tmplen;
@@ -985,6 +1030,9 @@
 	char frac[sizeof(".111222333")] = "";
 	char *s;
 
+	if (pax_keyword_deleted(keyword))
+		return 0;
+
 	/* Only write subsecond part if non-zero */
 	if (value->tv_nsec != 0) {
 		int n;
@@ -1038,7 +1086,8 @@
 }
 
 static int
-wr_xheader(char *fname, HD_USTAR *fhd, struct xheader *xhdr)
+wr_xheader(const char *fname, HD_USTAR *fhd, struct xheader *xhdr,
+    int global, const char *override_name, unsigned int seq)
 {
 	char hdblk[sizeof(HD_USTAR)];
 	HD_USTAR *hd;
@@ -1052,41 +1101,66 @@
 
 	memset(hdblk, 0, sizeof(hdblk));
 	hd = (HD_USTAR *)hdblk;
-	hd->typeflag = XHDRTYPE;
+	hd->typeflag = global ? GHDRTYPE : XHDRTYPE;
 	strncpy(hd->magic, TMAGIC, TMAGLEN);
 	strncpy(hd->version, TVERSION, TVERSLEN);
 	if (ul_oct(size, hd->size, sizeof(hd->size), 3))
 		goto out;
 
-	/*
-	 * Best effort attempt at providing a useful file name for
-	 * implementations that don't support pax format. Don't bother
-	 * with truncation if the resulting file name doesn't fit.
-	 * XXX dirname/basename portability (check return value?)
-	 */
-	(void)snprintf(buf, sizeof(buf), "%s/PaxHeaders.%ld/%s",
-	    dirname(fname), (long)getpid(), basename(fname));
+	if (global) {
+		const char *fmt = override_name != NULL ? override_name :
+		    pax_option_globexthdr_name();
+		if (fmt != NULL) {
+			if (pax_format_xhdr_name(buf, sizeof(buf), fmt,
+			    fname ? fname : "", seq) == -1)
+				goto out;
+		} else {
+			const char *tmpdir = getenv("TMPDIR");
+			if (tmpdir == NULL || *tmpdir == '\0')
+				tmpdir = "/tmp";
+			(void)snprintf(buf, sizeof(buf), "%s/GlobalHead.%ld.%u",
+			    tmpdir, (long)getpid(), seq);
+		}
+	} else {
+		const char *fmt = override_name != NULL ? override_name :
+		    pax_option_exthdr_name();
+		if (fmt != NULL) {
+			if (pax_format_xhdr_name(buf, sizeof(buf), fmt,
+			    fname ? fname : "", 0) == -1)
+				goto out;
+		} else if (fname != NULL) {
+			char *opath = NULL, *odirbuf = NULL;
+			const char *obase = fname;
+			const char *odir = ".";
+
+			if ((opath = strdup(fname)) != NULL)
+				obase = basename(opath);
+			if ((odirbuf = strdup(fname)) != NULL)
+				odir = dirname(odirbuf);
+			(void)snprintf(buf, sizeof(buf), "%s/PaxHeaders.%ld/%s",
+			    odir ? odir : ".", (long)getpid(), obase);
+			free(opath);
+			free(odirbuf);
+		} else {
+			(void)strlcpy(buf, "PaxHeaders", sizeof(buf));
+		}
+	}
 	fieldcpy(hd->name, sizeof(hd->name), buf, sizeof(buf));
 
-	/*
-	 * Inherit mode, mtime and owner from the file the headers are for.
-	 * This will only be extracted as an actual file by implementations
-	 * that don't support pax format.
-	 */
-	memcpy(hd->mode, fhd->mode, sizeof(hd->mode));
-	memcpy(hd->mtime, fhd->mtime, sizeof(hd->mtime));
-	memcpy(hd->uid, fhd->uid, sizeof(hd->uid));
-	memcpy(hd->gid, fhd->gid, sizeof(hd->gid));
+	if (fhd != NULL) {
+		memcpy(hd->mode, fhd->mode, sizeof(hd->mode));
+		memcpy(hd->mtime, fhd->mtime, sizeof(hd->mtime));
+		memcpy(hd->uid, fhd->uid, sizeof(hd->uid));
+		memcpy(hd->gid, fhd->gid, sizeof(hd->gid));
+	}
 	if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
-	   sizeof(hd->chksum), 3))
+   sizeof(hd->chksum), 3))
 		goto out;
 
-	/* write out extended header */
 	if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0 ||
 	    wr_skip(BLKMULT - sizeof(HD_USTAR)) < 0)
 		goto err;
 
-	/* write out extended header records */
 	SLIST_FOREACH(rec, xhdr, entry)
 		if (wr_rdbuf(rec->record, rec->reclen) < 0)
 			goto err;
@@ -1097,19 +1171,507 @@
 	return 0;
 
 out:
-	/*
-	 * header field is out of range
-	 */
-	paxwarn(1, "Pax header field is too small for %s", fname);
+	paxwarn(1, "Pax header field is too small for %s",
+	    fname ? fname : "pax header");
 	return 1;
 
 err:
-	paxwarn(1,"Could not write pax extended header for %s", fname);
+	paxwarn(1,"Could not write pax extended header for %s",
+	    fname ? fname : "pax header");
 	return -1;
 }
 #endif
 
 static int
+pax_store_kv(PAXKEY **head, const char *keyword, const char *value)
+{
+	PAXKEY **curp, *kv;
+	char *dup;
+
+	if (head == NULL || keyword == NULL || value == NULL)
+		return -1;
+
+	for (curp = head; (kv = *curp) != NULL; curp = &kv->next) {
+		if (strcmp(kv->name, keyword) == 0)
+			break;
+	}
+
+	if (value[0] == '\0') {
+		if (kv != NULL) {
+			*curp = kv->next;
+			free(kv->name);
+			free(kv->value);
+			free(kv);
+		}
+		return 0;
+	}
+
+	if (kv == NULL) {
+		if ((kv = calloc(1, sizeof(*kv))) == NULL)
+			return -1;
+		if ((kv->name = strdup(keyword)) == NULL) {
+			free(kv);
+			return -1;
+		}
+		if ((kv->value = strdup(value)) == NULL) {
+			free(kv->name);
+			free(kv);
+			return -1;
+		}
+		kv->next = *head;
+		*head = kv;
+		return 0;
+	}
+
+	dup = strdup(value);
+	if (dup == NULL)
+		return -1;
+	free(kv->value);
+	kv->value = dup;
+	return 0;
+}
+
+static void
+pax_apply_global(ARCHD *arcn)
+{
+	if (arcn != NULL)
+		arcn->gattr = pax_global_xattr;
+}
+
+static void
+pax_global_free(void)
+{
+	pax_kv_free(&pax_global_xattr);
+}
+
+static int
+pax_keyword_deleted(const char *keyword)
+{
+	const PAXDEL *del;
+
+	for (del = pax_option_delete(); del != NULL; del = del->next)
+		if (fnmatch(del->pattern, keyword, 0) == 0)
+			return 1;
+	return 0;
+}
+
+static void
+pax_prepare_user_keywords(void)
+{
+	const PAXOPKV *kv;
+
+	if (pax_keywords_prepared)
+		return;
+	for (kv = pax_option_keywords(OPT_ASSIGN_EQ); kv != NULL;
+	    kv = kv->next) {
+		if (pax_keyword_deleted(kv->name))
+			continue;
+		if (pax_store_kv(&pax_global_xattr, kv->name, kv->value) == -1)
+			paxwarn(1, "Unable to apply global keyword %s", kv->name);
+	}
+	pax_keywords_prepared = 1;
+}
+
+static void
+pax_apply_local_option_keywords(ARCHD *arcn)
+{
+	const PAXOPKV *kv;
+
+	for (kv = pax_option_keywords(OPT_ASSIGN_COLON); kv != NULL;
+ 	    kv = kv->next) {
+		if (pax_keyword_deleted(kv->name))
+			continue;
+		if (pax_store_kv(&arcn->xattr, kv->name, kv->value) == -1)
+			paxwarn(1, "Unable to apply per-file keyword %s",
+ 		    kv->name);
+	}
+}
+
+#ifndef SMALL
+/* Inject per-file keyword overrides into the list of pax header records. */
+static void
+pax_option_apply_local_xhdr(struct xheader *xhdr)
+{
+	const PAXOPKV *kv;
+
+	if (xhdr == NULL)
+		return;
+	for (kv = pax_option_keywords(OPT_ASSIGN_COLON); kv != NULL;
+	    kv = kv->next) {
+		if (pax_keyword_deleted(kv->name))
+			continue;
+		if (xheader_add(xhdr, kv->name, kv->value) == -1)
+			paxwarn(1, "Unable to write per-file keyword %s",
+			    kv->name);
+	}
+}
+
+/* Emit a single typeflag 'g' global header the first time one is needed. */
+static int
+pax_write_global_header(void)
+{
+	const PAXOPKV *kv;
+	struct xheader xhdr = SLIST_HEAD_INITIALIZER(xhdr);
+	int have = 0;
+	HD_USTAR dummy;
+	int ret;
+
+	if (pax_global_written)
+		return 0;
+	memset(&dummy, 0, sizeof(dummy));
+	for (kv = pax_option_keywords(OPT_ASSIGN_EQ); kv != NULL;
+	    kv = kv->next) {
+		if (pax_keyword_deleted(kv->name))
+			continue;
+		if (xheader_add(&xhdr, kv->name, kv->value) == -1) {
+			xheader_free(&xhdr);
+			return -1;
+		}
+		have = 1;
+	}
+	if (!have) {
+		xheader_free(&xhdr);
+		pax_global_written = 1;
+		return 0;
+	}
+	ret = wr_xheader(NULL, &dummy, &xhdr, 1, NULL, pax_global_seq++);
+	xheader_free(&xhdr);
+	if (ret < 0)
+		return -1;
+	if (ret >= 0)
+		pax_global_written = 1;
+	return ret;
+}
+#endif
+
+void
+pax_option_set_linkdata(int enable)
+{
+	pax_opt_linkdata = (enable != 0);
+}
+
+/* Remember whether to unconditionally emit atime/mtime keywords. */
+void
+pax_option_set_times(int enable)
+{
+	pax_opt_times = (enable != 0);
+}
+
+int
+pax_option_set_invalid(const char *value)
+{
+	if (value == NULL)
+		return -1;
+	if (strcasecmp(value, "bypass") == 0)
+		pax_opt_invalid = PAX_INVALID_BYPASS;
+	else if (strcasecmp(value, "write") == 0)
+		pax_opt_invalid = PAX_INVALID_WRITE;
+	else if (strcasecmp(value, "rename") == 0)
+		pax_opt_invalid = PAX_INVALID_RENAME;
+	else if (strcasecmp(value, "utf-8") == 0)
+		pax_opt_invalid = PAX_INVALID_UTF8;
+	else if (strcasecmp(value, "binary") == 0)
+		pax_opt_invalid = PAX_INVALID_BINARY;
+	else
+		return -1;
+	return 0;
+}
+
+static int
+pax_option_store_string(char **dst, const char *value)
+{
+	char *dup;
+
+	if (value == NULL)
+		return -1;
+	dup = strdup(value);
+	if (dup == NULL)
+		return -1;
+	free(*dst);
+	*dst = dup;
+	return 0;
+}
+
+int
+pax_option_set_exthdr_name(const char *value)
+{
+	return pax_option_store_string(&pax_opt_exthdr_name, value);
+}
+
+int
+pax_option_set_globexthdr_name(const char *value)
+{
+	return pax_option_store_string(&pax_opt_globexthdr_name, value);
+}
+
+int
+pax_option_add_delete(const char *pattern)
+{
+	PAXDEL *node, *cur;
+
+	if (pattern == NULL)
+		return -1;
+	if ((node = malloc(sizeof(*node))) == NULL)
+		return -1;
+	if ((node->pattern = strdup(pattern)) == NULL) {
+		free(node);
+		return -1;
+	}
+	node->next = NULL;
+	if (pax_opt_delete_list == NULL)
+		pax_opt_delete_list = node;
+	else {
+		cur = pax_opt_delete_list;
+		while (cur->next != NULL)
+			cur = cur->next;
+		cur->next = node;
+	}
+	return 0;
+}
+
+static int
+pax_option_add_keyword_internal(PAXOPKV **head, const char *name,
+    const char *value, int assign)
+{
+	PAXOPKV *node, *cur;
+
+	if ((node = malloc(sizeof(*node))) == NULL)
+		return -1;
+	if ((node->name = strdup(name)) == NULL) {
+		free(node);
+		return -1;
+	}
+	if ((node->value = strdup(value)) == NULL) {
+		free(node->name);
+		free(node);
+		return -1;
+	}
+	node->assign = assign;
+	node->next = NULL;
+	if (*head == NULL)
+		*head = node;
+	else {
+		cur = *head;
+		while (cur->next != NULL)
+			cur = cur->next;
+		cur->next = node;
+	}
+	return 0;
+}
+
+int
+pax_option_add_keyword(const char *name, const char *value, int assign)
+{
+	if (name == NULL || value == NULL)
+		return -1;
+	if (assign == OPT_ASSIGN_COLON)
+		return pax_option_add_keyword_internal(&pax_opt_keywords_local,
+		    name, value, assign);
+	return pax_option_add_keyword_internal(&pax_opt_keywords_global,
+	    name, value, assign);
+}
+
+const PAXDEL *
+pax_option_delete(void)
+{
+	return pax_opt_delete_list;
+}
+
+const PAXOPKV *
+pax_option_keywords(int assign)
+{
+	if (assign == OPT_ASSIGN_COLON)
+		return pax_opt_keywords_local;
+	return pax_opt_keywords_global;
+}
+
+int
+pax_option_linkdata(void)
+{
+	return pax_opt_linkdata;
+}
+
+int
+pax_option_times(void)
+{
+	return pax_opt_times;
+}
+
+const char *
+pax_option_exthdr_name(void)
+{
+	return pax_opt_exthdr_name;
+}
+
+const char *
+pax_option_globexthdr_name(void)
+{
+	return pax_opt_globexthdr_name;
+}
+
+enum pax_invalid_action
+pax_option_invalid(void)
+{
+	return pax_opt_invalid;
+}
+
+/* Reset state derived from -o keywords before parsing a new invocation. */
+void
+pax_option_reset_session(void)
+{
+#ifndef SMALL
+	pax_global_written = 0;
+	pax_global_seq = 1;
+#endif
+}
+
+/* Render a user-supplied template for extended header filenames. */
+static int
+pax_format_xhdr_name(char *buf, size_t bufsz, const char *fmt,
+    const char *path, unsigned int seq)
+{
+	char *path_copy = NULL, *dir_copy = NULL;
+	const char *dir = ".";
+	const char *file = path;
+	char *bp;
+	size_t remaining = bufsz;
+
+	if (fmt == NULL || buf == NULL || bufsz == 0)
+		return -1;
+
+	if (path != NULL && *path != '\0') {
+		if ((path_copy = strdup(path)) != NULL)
+			file = basename(path_copy);
+		if ((dir_copy = strdup(path)) != NULL)
+			dir = dirname(dir_copy);
+	}
+
+	buf[0] = '\0';
+	bp = buf;
+	while (*fmt != '\0') {
+		if (*fmt != '%') {
+			if (remaining <= 1)
+				goto toolong;
+			*bp++ = *fmt++;
+			remaining--;
+			continue;
+		}
+		fmt++;
+		char to_insert = *fmt;
+		if (to_insert == '\0')
+			break;
+		fmt++;
+		const char *ins = NULL;
+		char tmp[32];
+		size_t inslen = 0;
+		switch (to_insert) {
+		case 'd':
+			ins = dir;
+			break;
+		case 'f':
+			ins = file;
+			break;
+		case 'p':
+			snprintf(tmp, sizeof(tmp), "%ld", (long)getpid());
+			ins = tmp;
+			break;
+		case 'n':
+			snprintf(tmp, sizeof(tmp), "%u",
+			    seq == 0 ? 1U : seq);
+			ins = tmp;
+			break;
+		case '%':
+			tmp[0] = '%';
+			tmp[1] = '\0';
+			ins = tmp;
+			break;
+		default:
+			tmp[0] = to_insert;
+			tmp[1] = '\0';
+			ins = tmp;
+			break;
+		}
+		if (ins == NULL)
+			ins = "";
+		inslen = strlen(ins);
+		if (inslen >= remaining)
+			goto toolong;
+		memcpy(bp, ins, inslen);
+		bp += inslen;
+		remaining -= inslen;
+	}
+	if (remaining == 0)
+		goto toolong;
+	*bp = '\0';
+	free(path_copy);
+	free(dir_copy);
+	return 0;
+
+toolong:
+	free(path_copy);
+	free(dir_copy);
+	return -1;
+}
+
+/* Decide what to do with an invalid pathname encountered during read. */
+int
+pax_handle_invalid_path(ARCHD *arcn, const char *keyword, const char *value)
+{
+	enum pax_invalid_action act = pax_option_invalid();
+
+	switch (act) {
+	case PAX_INVALID_BYPASS:
+		paxwarn(1, "Skipping entry with invalid %s \"%s\"",
+		    keyword, value);
+		pax_mark_skip(arcn);
+		return -1;
+	case PAX_INVALID_WRITE:
+	case PAX_INVALID_UTF8:
+	case PAX_INVALID_BINARY:
+		return 0;
+	case PAX_INVALID_RENAME:
+		paxwarn(0, "Invalid %s \"%s\"; requesting rename", keyword,
+		    value);
+		arcn->invalid = PAX_INVALID_RENAME;
+		return 0;
+	}
+	return 0;
+}
+
+/* Apply the invalid= policy to link targets pulled from extended headers. */
+int
+pax_handle_invalid_link(ARCHD *arcn, const char *keyword, const char *value)
+{
+	enum pax_invalid_action act = pax_option_invalid();
+
+	switch (act) {
+	case PAX_INVALID_BYPASS:
+		paxwarn(1, "Skipping link with invalid %s \"%s\"", keyword,
+		    value);
+		pax_mark_skip(arcn);
+		return -1;
+	case PAX_INVALID_WRITE:
+	case PAX_INVALID_UTF8:
+	case PAX_INVALID_BINARY:
+		return 0;
+	case PAX_INVALID_RENAME:
+		paxwarn(1, "Cannot rename invalid link target \"%s\"; skipping",
+		    value);
+		pax_mark_skip(arcn);
+		return -1;
+	}
+	return 0;
+}
+
+/* Mark the current archive member so the outer loops can drain it safely. */
+void
+pax_mark_skip(ARCHD *arcn)
+{
+	if (arcn != NULL)
+		arcn->invalid = PAX_INVALID_SKIP;
+}
+
+static int
 wr_ustar_or_pax(ARCHD *arcn, int ustar)
 {
 	HD_USTAR *hd;
@@ -1119,6 +1681,15 @@
 	struct xheader xhdr = SLIST_HEAD_INITIALIZER(xhdr);
 #endif
 	int bad_mtime;
+	int write_data = 0;
+#ifndef SMALL
+	/* Ensure any pending global keywords are written once per archive. */
+	if (!ustar) {
+		int gres = pax_write_global_header();
+		if (gres < 0)
+			return 1;
+	}
+#endif
 
 	/*
 	 * check for those file system types ustar cannot store
@@ -1234,15 +1805,27 @@
 	case PAX_SLK:
 	case PAX_HLK:
 	case PAX_HRG:
-		if (arcn->type == PAX_SLK)
-			hd->typeflag = SYMTYPE;
-		else
-			hd->typeflag = LNKTYPE;
 		fieldcpy(hd->linkname, sizeof(hd->linkname), arcn->ln_name,
 		    sizeof(arcn->ln_name));
-		if (ul_oct(0, hd->size, sizeof(hd->size), 3))
-			goto out;
-		break;
+		if (arcn->type == PAX_SLK) {
+			hd->typeflag = SYMTYPE;
+			if (ul_oct(0, hd->size, sizeof(hd->size), 3))
+				goto out;
+		} else {
+			hd->typeflag = LNKTYPE;
+			/* Optional pax extension: store file data along with the link. */
+			if (!ustar && pax_option_linkdata()) {
+				arcn->pad = TAR_PAD(arcn->sb.st_size);
+				if (ull_oct(arcn->sb.st_size, hd->size,
+				    sizeof(hd->size), 3))
+					goto out;
+				write_data = 1;
+		} else {
+			if (ul_oct(0, hd->size, sizeof(hd->size), 3))
+				goto out;
+		}
+	}
+	break;
 	case PAX_REG:
 	case PAX_CTG:
 	default:
@@ -1254,12 +1837,12 @@
 		else
 			hd->typeflag = REGTYPE;
 		arcn->pad = TAR_PAD(arcn->sb.st_size);
-		if (ull_oct(arcn->sb.st_size, hd->size, sizeof(hd->size), 3)) {
-			if (ustar) {
-				paxwarn(1, "File is too long for ustar %s",
-				    arcn->org_name);
-				return(1);
-			}
+	if (ull_oct(arcn->sb.st_size, hd->size, sizeof(hd->size), 3)) {
+		if (ustar) {
+			paxwarn(1, "File is too long for ustar %s",
+			    arcn->org_name);
+			return(1);
+		}
 #ifndef SMALL
 			else if (xheader_add_ull(&xhdr, "size",
 			    arcn->sb.st_size) == -1) {
@@ -1270,6 +1853,7 @@
 			}
 #endif
 		}
+		write_data = 1;
 		break;
 	}
 
@@ -1320,14 +1904,18 @@
 		 *
 		 * ctime isn't specified by POSIX so omit it.
 		 */
-		if (xheader_add_ts(&xhdr, "atime", &arcn->sb.st_atim) == -1) {
-			paxwarn(1, "Couldn't preserve %s in pax format for %s",
-			    "atime", arcn->org_name);
-			xheader_free(&xhdr);
-			return (1);
+		if (pax_option_times()) {
+			if (xheader_add_ts(&xhdr, "atime",
+			    &arcn->sb.st_atim) == -1) {
+				paxwarn(1,
+				    "Couldn't preserve %s in pax format for %s",
+				    "atime", arcn->org_name);
+				xheader_free(&xhdr);
+				return (1);
+			}
 		}
-		if ((bad_mtime || arcn->sb.st_mtime < 0 ||
-			arcn->sb.st_mtim.tv_nsec != 0) &&
+		if ((pax_option_times() || bad_mtime || arcn->sb.st_mtime < 0 ||
+		    arcn->sb.st_mtim.tv_nsec != 0) &&
 		    xheader_add_ts(&xhdr, "mtime", &arcn->sb.st_mtim) == -1) {
 			paxwarn(1, "Couldn't preserve %s in pax format for %s",
 			    "mtime", arcn->org_name);
@@ -1346,11 +1934,13 @@
 	}
 
 #ifndef SMALL
+	pax_option_apply_local_xhdr(&xhdr);
+
 	/* write out a pax extended header if needed */
 	if (!SLIST_EMPTY(&xhdr)) {
 		int ret;
 
-		ret = wr_xheader(arcn->name, hd, &xhdr);
+		ret = wr_xheader(arcn->name, hd, &xhdr, 0, NULL, 0);
 		xheader_free(&xhdr);
 		if (ret)
 			return(ret);
@@ -1363,7 +1953,7 @@
 	 * needs to be written
 	 */
 	if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
-	   sizeof(hd->chksum), 3))
+		sizeof(hd->chksum), 3))
 		goto out;
 	if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0 ||
 	    wr_skip(BLKMULT - sizeof(HD_USTAR)) < 0) {
@@ -1371,9 +1961,7 @@
 		    arcn->org_name);
 		return(-1);
 	}
-	if (PAX_IS_REG(arcn->type))
-		return(0);
-	return(1);
+	return (write_data ? 0 : 1);
 
     out:
 #ifndef SMALL
@@ -1475,11 +2063,66 @@
 	OPLIST *opt;
 
 	while ((opt = opt_next()) != NULL) {
-		if (1) {
-			paxwarn(1, "Unknown pax format -o option/value pair %s=%s",
-			    opt->name, opt->value);
-			return(-1);
+		if (strcmp(opt->name, "delete") == 0) {
+			if (pax_option_add_delete(opt->value) < 0) {
+				paxwarn(1, "Unable to record delete pattern %s",
+				    opt->value);
+				free(opt->name);
+				free(opt->value);
+				free(opt);
+				return (-1);
+			}
+		} else if (strcmp(opt->name, "exthdr.name") == 0) {
+			if (pax_option_set_exthdr_name(opt->value) < 0) {
+				paxwarn(1, "Unable to set exthdr.name to %s",
+				    opt->value);
+				free(opt->name);
+				free(opt->value);
+				free(opt);
+				return (-1);
+			}
+		} else if (strcmp(opt->name, "globexthdr.name") == 0) {
+			if (pax_option_set_globexthdr_name(opt->value) < 0) {
+				paxwarn(1, "Unable to set globexthdr.name to %s",
+				    opt->value);
+				free(opt->name);
+				free(opt->value);
+				free(opt);
+				return (-1);
+			}
+		} else if (strcmp(opt->name, "invalid") == 0) {
+			if (pax_option_set_invalid(opt->value) < 0) {
+				paxwarn(1, "Unknown invalid action %s", opt->value);
+				free(opt->name);
+				free(opt->value);
+				free(opt);
+				return (-1);
+			}
+		} else if (strcmp(opt->name, "linkdata") == 0) {
+			pax_option_set_linkdata(1);
+		} else if (strcmp(opt->name, "times") == 0) {
+			pax_option_set_times(1);
+		} else if (opt->assign != OPT_ASSIGN_NONE) {
+			if (pax_option_add_keyword(opt->name, opt->value,
+			    opt->assign) < 0) {
+				paxwarn(1,
+				    "Unable to record pax keyword %s=%s",
+				    opt->name, opt->value);
+				free(opt->name);
+				free(opt->value);
+				free(opt);
+				return (-1);
+			}
+		} else {
+			paxwarn(1, "Unknown pax format -o option %s", opt->name);
+			free(opt->name);
+			free(opt->value);
+			free(opt);
+			return (-1);
 		}
+		free(opt->name);
+		free(opt->value);
+		free(opt);
 	}
 	return 0;
 }
@@ -1695,13 +2338,33 @@
 			break;
 		}
 		*p++ = nextp[-1] = '\0';
+		if (pax_keyword_deleted(keyword)) {
+			p = nextp;
+			continue;
+		}
+		if (pax_store_kv(global ? &pax_global_xattr : &arcn->xattr,
+		    keyword, p) == -1) {
+			paxwarn(1, "Unable to store extended header keyword %s",
+			    keyword);
+			ret = -1;
+			break;
+		}
 		if (!global) {
 			if (!strcmp(keyword, "path")) {
-				arcn->nlen = strlcpy(arcn->name, p,
+				size_t len = strlcpy(arcn->name, p,
 				    sizeof(arcn->name));
+				arcn->nlen = MINIMUM(len, sizeof(arcn->name) - 1);
+				if (len >= sizeof(arcn->name))
+					(void)pax_handle_invalid_path(arcn,
+					    keyword, p);
 			} else if (!strcmp(keyword, "linkpath")) {
-				arcn->ln_nlen = strlcpy(arcn->ln_name, p,
+				size_t len = strlcpy(arcn->ln_name, p,
 				    sizeof(arcn->ln_name));
+				arcn->ln_nlen = MINIMUM(len,
+				    sizeof(arcn->ln_name) - 1);
+				if (len >= sizeof(arcn->ln_name))
+					(void)pax_handle_invalid_link(arcn,
+					    keyword, p);
 			} else if (!strcmp(keyword, "mtime")) {
 				ret = rd_time(&arcn->sb.st_mtim, keyword, p);
 				if (ret < 0)
Index: bin/pax/gen_subs.c
===================================================================
RCS file: /cvs/src/bin/pax/gen_subs.c,v
diff -u -r1.34 gen_subs.c
--- bin/pax/gen_subs.c	27 Apr 2024 19:49:42 -0000	1.34
+++ bin/pax/gen_subs.c	12 Oct 2025 07:53:40 -0000
@@ -36,7 +36,9 @@
 
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <ctype.h>
 #include <grp.h>
+#include <limits.h>
 #include <pwd.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -53,6 +55,334 @@
  * a collection of general purpose subroutines used by pax
  */
 
+/* Accumulated format string for -o listopt=. */
+static char *listopt_format;
+static size_t listopt_len;
+
+/* Scratch context tracking dynamically duplicated strings. */
+struct listopt_ctx {
+	char **allocated;
+	size_t count;
+	size_t cap;
+};
+
+/* Parsed elements for a single custom listopt conversion. */
+struct listopt_spec {
+	char flags[16];
+	char width[16];
+	char precision[16];
+	char length[8];
+	char keyword[128];
+	char subfmt[128];
+	char conv;
+};
+
+static void
+listopt_ctx_init(struct listopt_ctx *ctx)
+{
+	ctx->allocated = NULL;
+	ctx->count = ctx->cap = 0;
+}
+
+static void
+listopt_ctx_free(struct listopt_ctx *ctx)
+{
+	size_t i;
+
+	if (ctx == NULL)
+		return;
+	for (i = 0; i < ctx->count; i++)
+		free(ctx->allocated[i]);
+	free(ctx->allocated);
+	ctx->allocated = NULL;
+	ctx->count = ctx->cap = 0;
+}
+
+static const char *
+listopt_store(struct listopt_ctx *ctx, const char *str)
+{
+	char *dup;
+	char **tmp;
+
+	if (str == NULL)
+		str = "";
+	dup = strdup(str);
+	if (dup == NULL)
+		return "";
+	if (ctx->count == ctx->cap) {
+		size_t newcap = ctx->cap ? ctx->cap * 2 : 8;
+		tmp = reallocarray(ctx->allocated, newcap, sizeof(*tmp));
+		if (tmp == NULL) {
+			free(dup);
+			return "";
+		}
+		ctx->allocated = tmp;
+		ctx->cap = newcap;
+	}
+	ctx->allocated[ctx->count++] = dup;
+	return dup;
+}
+
+/* Break down a single % conversion, recording printf modifiers and keyword. */
+static int
+listopt_parse_spec(const char *fmt, struct listopt_spec *spec,
+    const char **endp)
+{
+	const char *p = fmt;
+	enum { PARSE_FLAGS, PARSE_WIDTH, PARSE_PRECISION, PARSE_LENGTH } state;
+
+	if (*p != '%')
+		return 0;
+	memset(spec, 0, sizeof(*spec));
+	p++;
+	state = PARSE_FLAGS;
+	while (*p != '\0') {
+		if (*p == '(') {
+			const char *start = ++p;
+			size_t len;
+
+			while (*p != '\0' && *p != ')')
+				p++;
+			len = p - start;
+			if (len >= sizeof(spec->keyword))
+				len = sizeof(spec->keyword) - 1;
+			memcpy(spec->keyword, start, len);
+			spec->keyword[len] = '\0';
+			if (*p == ')')
+				p++;
+			state = PARSE_FLAGS;
+			continue;
+		}
+		switch (state) {
+		case PARSE_FLAGS:
+			if (strchr("-+ #0'", *p) != NULL) {
+				size_t fl = strlen(spec->flags);
+				if (fl + 1 < sizeof(spec->flags)) {
+					spec->flags[fl] = *p;
+					spec->flags[fl + 1] = '\0';
+				}
+				p++;
+				continue;
+			}
+			state = PARSE_WIDTH;
+			continue;
+		case PARSE_WIDTH:
+			if (isdigit((unsigned char)*p)) {
+				size_t wl = strlen(spec->width);
+				if (wl + 1 < sizeof(spec->width)) {
+					spec->width[wl] = *p;
+					spec->width[wl + 1] = '\0';
+				}
+				p++;
+				continue;
+			}
+			if (*p == '.') {
+				size_t pl = strlen(spec->precision);
+				if (pl + 1 < sizeof(spec->precision)) {
+					spec->precision[pl] = '.';
+					spec->precision[pl + 1] = '\0';
+				}
+				p++;
+				state = PARSE_PRECISION;
+				continue;
+			}
+			state = PARSE_LENGTH;
+			continue;
+		case PARSE_PRECISION:
+			if (isdigit((unsigned char)*p)) {
+				size_t pl = strlen(spec->precision);
+				if (pl + 1 < sizeof(spec->precision)) {
+					spec->precision[pl] = *p;
+					spec->precision[pl + 1] = '\0';
+				}
+				p++;
+				continue;
+			}
+			state = PARSE_LENGTH;
+			continue;
+		case PARSE_LENGTH:
+			if (strchr("hljztL", *p) != NULL) {
+				size_t ll = strlen(spec->length);
+				if (ll + 1 < sizeof(spec->length)) {
+					spec->length[ll] = *p;
+					spec->length[ll + 1] = '\0';
+				}
+				p++;
+				/* Support double h/l modifiers */
+				if ((spec->length[0] == 'h' || spec->length[0] == 'l') &&
+				    spec->length[1] == '\0' && (*p == spec->length[0])) {
+					if (strlen(spec->length) + 1 < sizeof(spec->length)) {
+						size_t l2 = strlen(spec->length);
+						spec->length[l2] = *p;
+						spec->length[l2 + 1] = '\0';
+					}
+					p++;
+				}
+				continue;
+			}
+			spec->conv = *p++;
+			goto done;
+		}
+	}
+	done:
+	if (spec->conv == '\0')
+		return -1;
+	if (spec->keyword[0] != '\0' && spec->conv == 'T') {
+		char *eq = strchr(spec->keyword, '=');
+		if (eq != NULL) {
+			strlcpy(spec->subfmt, eq + 1, sizeof(spec->subfmt));
+			*eq = '\0';
+		}
+	}
+	*endp = p;
+	return 1;
+}
+
+/* Retrieve a keyword value as a string, allocating stable storage as needed. */
+static const char *
+listopt_keyword_string(struct listopt_ctx *ctx, ARCHD *arcn,
+    const char *keyword)
+{
+	const char *val;
+	char *dup;
+
+	if (keyword == NULL || *keyword == '\0' ||
+	    strcmp(keyword, "path") == 0)
+		return arcn->name;
+	if (strcmp(keyword, "linkpath") == 0)
+		return arcn->ln_name;
+	if (strcmp(keyword, "uname") == 0) {
+		val = user_from_uid(arcn->sb.st_uid, 0);
+		return val ? val : "";
+	}
+	if (strcmp(keyword, "gname") == 0) {
+		val = group_from_gid(arcn->sb.st_gid, 0);
+		return val ? val : "";
+	}
+	if (strcmp(keyword, "name") == 0) {
+		if ((dup = strdup(arcn->name)) == NULL)
+			return "";
+		val = listopt_store(ctx, basename(dup));
+		free(dup);
+		return val;
+	}
+	if (strcmp(keyword, "dirname") == 0) {
+		if ((dup = strdup(arcn->name)) == NULL)
+			return "";
+		val = listopt_store(ctx, dirname(dup));
+		free(dup);
+		return val;
+	}
+	val = pax_kv_lookup(arcn, keyword);
+	return val ? val : "";
+}
+
+/* Interpret a keyword as a timespec, falling back to archive defaults. */
+static int
+listopt_keyword_time(struct listopt_ctx *ctx, ARCHD *arcn,
+    const char *keyword, struct timespec *ts)
+{
+	const char *val;
+	char *end;
+
+	if (keyword == NULL || *keyword == '\0' ||
+	    strcmp(keyword, "mtime") == 0) {
+		*ts = arcn->sb.st_mtim;
+		return 0;
+	}
+	if (strcmp(keyword, "atime") == 0) {
+		*ts = arcn->sb.st_atim;
+		return 0;
+	}
+	if (strcmp(keyword, "ctime") == 0) {
+		*ts = arcn->sb.st_ctim;
+		return 0;
+	}
+	val = pax_kv_lookup(arcn, keyword);
+	if (val == NULL || *val == '\0')
+		return -1;
+	ts->tv_sec = strtoll(val, &end, 10);
+	ts->tv_nsec = 0;
+	if (end == val)
+		return -1;
+	if (*end == '.') {
+		long nsec = 0;
+		int digits = 0;
+		for (end++; *end && isdigit((unsigned char)*end) && digits < 9;
+		    end++, digits++)
+			nsec = nsec * 10 + (*end - '0');
+		for (; digits < 9; digits++)
+			nsec *= 10;
+		ts->tv_nsec = nsec;
+	}
+	return 0;
+}
+
+/* Parse signed numeric keywords, allowing overrides from extended headers. */
+static int
+listopt_keyword_sll(ARCHD *arcn, const char *keyword, long long *out)
+{
+	const char *val;
+	char *end;
+
+	if (keyword == NULL)
+		return -1;
+	if (strcmp(keyword, "uid") == 0) {
+		*out = arcn->sb.st_uid;
+		return 0;
+	}
+	if (strcmp(keyword, "gid") == 0) {
+		*out = arcn->sb.st_gid;
+		return 0;
+	}
+	if (strcmp(keyword, "nlink") == 0) {
+		*out = arcn->sb.st_nlink;
+		return 0;
+	}
+	if (strcmp(keyword, "mode") == 0) {
+		*out = arcn->sb.st_mode;
+		return 0;
+	}
+	val = pax_kv_lookup(arcn, keyword);
+	if (val == NULL)
+		return -1;
+	*out = strtoll(val, &end, 10);
+	if (end == val)
+		return -1;
+	return 0;
+}
+
+/* Parse unsigned numeric keywords, falling back to header values. */
+static int
+listopt_keyword_ull(ARCHD *arcn, const char *keyword,
+    unsigned long long *out)
+{
+	const char *val;
+	char *end;
+
+	if (keyword == NULL)
+		return -1;
+	if (strcmp(keyword, "size") == 0) {
+		*out = arcn->sb.st_size;
+		return 0;
+	}
+	if (strcmp(keyword, "devmajor") == 0) {
+		*out = MAJOR(arcn->sb.st_rdev);
+		return 0;
+	}
+	if (strcmp(keyword, "devminor") == 0) {
+		*out = MINOR(arcn->sb.st_rdev);
+		return 0;
+	}
+	val = pax_kv_lookup(arcn, keyword);
+	if (val == NULL)
+		return -1;
+	*out = strtoull(val, &end, 10);
+	if (end == val)
+		return -1;
+	return 0;
+}
+
 /*
  * constants used by ls_list() when printing out archive members
  */
@@ -82,6 +412,13 @@
 
 	term = zeroflag ? '\0' : '\n';	/* path termination character */
 
+	if (vflag && listopt_get() != NULL) {
+		listopt_output(arcn, fp);
+		(void)fputc(term, fp);
+		(void)fflush(fp);
+		return;
+	}
+
 	/*
 	 * if not verbose, just print the file name
 	 */
@@ -183,6 +520,248 @@
 	} else {
 		(void)fputs(str, fp);
 	}
+}
+
+/* Append a new fragment to the aggregated custom listopt format string. */
+int
+listopt_append(const char *chunk)
+{
+	char *tmp;
+	size_t add;
+
+	if (chunk == NULL)
+		return 0;
+	add = strlen(chunk);
+	if (add == 0)
+		return 0;
+	if (SIZE_MAX - listopt_len <= add)
+		return -1;
+	tmp = realloc(listopt_format, listopt_len + add + 1);
+	if (tmp == NULL)
+		return -1;
+	listopt_format = tmp;
+	memcpy(listopt_format + listopt_len, chunk, add);
+	listopt_len += add;
+	listopt_format[listopt_len] = '\0';
+	return 0;
+}
+
+const char *
+listopt_get(void)
+{
+	return listopt_format;
+}
+
+/* Reset cached list formatting between separate pax invocations. */
+void
+listopt_reset(void)
+{
+	free(listopt_format);
+	listopt_format = NULL;
+	listopt_len = 0;
+}
+
+/* Emit a single verbose listing line obeying the custom listopt format. */
+static void
+listopt_output(ARCHD *arcn, FILE *fp)
+{
+	const char *fmt = listopt_get();
+	struct listopt_ctx ctx;
+	struct listopt_spec spec;
+	const char *next;
+	char fmtbuf[64];
+	char outbuf[PATH_MAX * 2];
+
+	if (fmt == NULL || *fmt == '\0')
+		return;
+	listopt_ctx_init(&ctx);
+	while (*fmt != '\0') {
+		if (*fmt != '%') {
+			(void)fputc(*fmt++, fp);
+			continue;
+		}
+		if (fmt[1] == '%') {
+			fmt += 2;
+			(void)fputc('%', fp);
+			continue;
+		}
+		if (listopt_parse_spec(fmt, &spec, &next) <= 0) {
+			(void)fputc(*fmt++, fp);
+			continue;
+		}
+		fmt = next;
+		switch (spec.conv) {
+		case 's':
+		{
+			const char *str = listopt_keyword_string(&ctx, arcn,
+			    spec.keyword[0] ? spec.keyword : "path");
+			snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s",
+			    spec.flags, spec.width, spec.precision, "s");
+			(void)fprintf(fp, fmtbuf, str);
+			break;
+		}
+		case 'c':
+		{
+			const char *str = listopt_keyword_string(&ctx, arcn,
+			    spec.keyword[0] ? spec.keyword : "path");
+			char ch = (str && *str) ? *str : ' ';
+			snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%c",
+			    spec.flags, spec.width, spec.precision, 'c');
+			(void)fprintf(fp, fmtbuf, ch);
+			break;
+		}
+		case 'd':
+		case 'i':
+		{
+			long long val = 0;
+			if (listopt_keyword_sll(arcn, spec.keyword, &val) != 0)
+				val = 0;
+			const char *length = "ll";
+			snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s%c",
+			    spec.flags, spec.width, spec.precision, length, spec.conv);
+			(void)fprintf(fp, fmtbuf, val);
+			break;
+		}
+		case 'o':
+		case 'u':
+		case 'x':
+		case 'X':
+		{
+			unsigned long long val = 0;
+			if (listopt_keyword_ull(arcn, spec.keyword, &val) != 0)
+				val = 0;
+			const char *length = "ll";
+			snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s%c",
+			    spec.flags, spec.width, spec.precision, length, spec.conv);
+			(void)fprintf(fp, fmtbuf, val);
+			break;
+		}
+		case 'T':
+		{
+			struct timespec ts;
+			struct tm tm;
+			const char *key = spec.keyword[0] ? spec.keyword : "mtime";
+			const char *tfmt = spec.subfmt[0] ? spec.subfmt :
+			    "%b %e %H:%M %Y";
+			if (listopt_keyword_time(&ctx, arcn, key, &ts) == 0 &&
+			    localtime_r(&ts.tv_sec, &tm) != NULL &&
+			    strftime(outbuf, sizeof(outbuf), tfmt, &tm) > 0) {
+				snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s",
+				    spec.flags, spec.width, spec.precision, "s");
+				(void)fprintf(fp, fmtbuf, outbuf);
+			}
+			break;
+		}
+		case 'M':
+		{
+			char modebuf[12];
+			strmode(arcn->sb.st_mode, modebuf);
+			snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s",
+			    spec.flags, spec.width, spec.precision, "s");
+			(void)fprintf(fp, fmtbuf, modebuf);
+			break;
+		}
+		case 'D':
+		{
+			const char *use = NULL;
+			if (S_ISCHR(arcn->sb.st_mode) || S_ISBLK(arcn->sb.st_mode)) {
+				snprintf(outbuf, sizeof(outbuf), "%lu,%lu",
+				    (u_long)MAJOR(arcn->sb.st_rdev),
+				    (u_long)MINOR(arcn->sb.st_rdev));
+				use = outbuf;
+			} else if (spec.keyword[0]) {
+				unsigned long long val = 0;
+				if (listopt_keyword_ull(arcn, spec.keyword, &val) == 0) {
+					snprintf(outbuf, sizeof(outbuf), "%llu", val);
+					use = outbuf;
+				}
+			}
+			if (use == NULL)
+				use = "";
+			snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s",
+			    spec.flags, spec.width, spec.precision, "s");
+			(void)fprintf(fp, fmtbuf, use);
+			break;
+		}
+		case 'F':
+		{
+			const char *out = NULL;
+			if (!spec.keyword[0])
+				out = arcn->name;
+			else {
+				char *tmp = strdup(spec.keyword);
+				char *save = tmp;
+				outbuf[0] = '\0';
+				if (tmp != NULL) {
+					char *token;
+					int first = 1;
+					while ((token = strsep(&tmp, ",")) != NULL) {
+						const char *part =
+						    listopt_keyword_string(&ctx, arcn, token);
+						if (!first)
+							strlcat(outbuf, "/",
+							    sizeof(outbuf));
+						strlcat(outbuf, part, sizeof(outbuf));
+						first = 0;
+					}
+					free(save);
+					out = outbuf;
+				}
+			}
+			snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s",
+			    spec.flags, spec.width, spec.precision, "s");
+			(void)fprintf(fp, fmtbuf, out ? out : "");
+			break;
+		}
+		case 'L':
+		{
+			if (arcn->type == PAX_SLK)
+				snprintf(outbuf, sizeof(outbuf), "%s -> %s",
+				    arcn->name, arcn->ln_name);
+			else
+				strlcpy(outbuf, arcn->name, sizeof(outbuf));
+			snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s",
+			    spec.flags, spec.width, spec.precision, "s");
+			(void)fprintf(fp, fmtbuf, outbuf);
+			break;
+		}
+		default:
+			(void)fputc(spec.conv, fp);
+			break;
+		}
+	}
+	listopt_ctx_free(&ctx);
+}
+
+void
+pax_kv_free(PAXKEY **head)
+{
+	PAXKEY *cur;
+
+	if (head == NULL)
+		return;
+	while ((cur = *head) != NULL) {
+		*head = cur->next;
+		free(cur->name);
+		free(cur->value);
+		free(cur);
+	}
+}
+
+const char *
+pax_kv_lookup(const ARCHD *arcn, const char *key)
+{
+	const PAXKEY *kv;
+
+	if (arcn == NULL || key == NULL)
+		return NULL;
+	for (kv = arcn->xattr; kv != NULL; kv = kv->next)
+		if (strcmp(kv->name, key) == 0)
+			return kv->value;
+	for (kv = arcn->gattr; kv != NULL; kv = kv->next)
+		if (strcmp(kv->name, key) == 0)
+			return kv->value;
+	return NULL;
 }
 
 /*
Index: bin/pax/options.c
===================================================================
RCS file: /cvs/src/bin/pax/options.c,v
diff -u -r1.115 options.c
--- bin/pax/options.c	10 May 2024 20:28:31 -0000	1.115
+++ bin/pax/options.c	12 Oct 2025 07:53:49 -0000
@@ -36,6 +36,7 @@
 
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <ctype.h>
 #include <errno.h>
 #include <limits.h>
 #include <paths.h>
@@ -51,6 +52,7 @@
 
 static int bad_opt(void);
 static int opt_add(const char *);
+static char *opt_parse_value(const char **, int);
 /*
  * argv[0] names. Used for tar and cpio emulation
  */
@@ -139,6 +141,8 @@
 static void printflg(unsigned int);
 static off_t str_offt(char *);
 static char *get_line(FILE *fp);
+static char *opt_parse_value(const char **, int);
+static void opt_common(void);
 static void pax_options(int, char **);
 static void pax_usage(void);
 static void tar_options(int, char **);
@@ -683,6 +687,8 @@
 	if (!(flg & XF) && (act == ARCHIVE))
 		frmt = &(fsub[DEFLT]);
 
+	opt_common();
+
 	/*
 	 * process the args as they are interpreted by the operation mode
 	 */
@@ -984,6 +990,8 @@
 	if ((act == ARCHIVE || act == APPND) && argc == 0 && nincfiles == 0)
 		exit(0);
 
+	opt_common();
+
 	/*
 	 * process the args as they are interpreted by the operation mode
 	 */
@@ -1408,6 +1416,8 @@
 	argc -= optind;
 	argv += optind;
 
+	opt_common();
+
 	/*
 	 * process the args as they are interpreted by the operation mode
 	 */
@@ -1522,6 +1532,54 @@
 	return(0);
 }
 
+static char *
+opt_parse_value(const char **srcp, int consume_rest)
+{
+	const char *src;
+	char *buf, *tmp;
+	size_t len, cap;
+
+	src = *srcp;
+	cap = 64;
+	len = 0;
+	if ((buf = malloc(cap)) == NULL)
+		return NULL;
+
+	while (*src != '\0') {
+		if (*src == '\\' && src[1] != '\0') {
+			src++;
+			if (len + 1 >= cap) {
+				tmp = realloc(buf, cap * 2);
+				if (tmp == NULL) {
+					free(buf);
+					return NULL;
+				}
+				buf = tmp;
+				cap *= 2;
+			}
+			buf[len++] = *src++;
+			continue;
+		}
+		if (!consume_rest && *src == ',')
+			break;
+		if (len + 1 >= cap) {
+			tmp = realloc(buf, cap * 2);
+			if (tmp == NULL) {
+				free(buf);
+				return NULL;
+			}
+			buf = tmp;
+			cap *= 2;
+		}
+		buf[len++] = *src++;
+	}
+	buf[len] = '\0';
+	while (isspace((unsigned char)*src))
+		src++;
+	*srcp = src;
+	return buf;
+}
+
 /*
  * opt_add()
  *	breaks the value supplied to -o into a option name and value. options
@@ -1534,58 +1592,130 @@
 static int
 opt_add(const char *str)
 {
+	const char *src;
 	OPLIST *opt;
-	char *frpt;
-	char *pt;
-	char *endpt;
-	char *dstr;
+	char *name, *value;
+	int assign;
 
-	if ((str == NULL) || (*str == '\0')) {
-		paxwarn(0, "Invalid option name");
-		return(-1);
+	if (str == NULL || *str == '\0') {
+		paxwarn(0, "Invalid option string");
+		return (-1);
 	}
-	if ((dstr = strdup(str)) == NULL) {
-		paxwarn(0, "Unable to allocate space for option list");
-		return(-1);
-	}
-	frpt = endpt = dstr;
 
-	/*
-	 * break into name and values pieces and stuff each one into a
-	 * OPLIST structure. When we know the format, the format specific
-	 * option function will go through this list
-	 */
-	while ((frpt != NULL) && (*frpt != '\0')) {
-		if ((endpt = strchr(frpt, ',')) != NULL)
-			*endpt = '\0';
-		if ((pt = strchr(frpt, '=')) == NULL) {
-			paxwarn(0, "Invalid options format");
-			free(dstr);
-			return(-1);
+	src = str;
+	while (*src != '\0') {
+		while (isspace((unsigned char)*src))
+			src++;
+		if (*src == '\0')
+			break;
+		if (*src == ',') {
+			src++;
+			continue;
 		}
-		if ((opt = malloc(sizeof(OPLIST))) == NULL) {
-			paxwarn(0, "Unable to allocate space for option list");
-			free(dstr);
-			return(-1);
+
+		const char *key_start = src;
+		while (*src != '\0' && *src != '=' && *src != ':' &&
+		    *src != ',' && !isspace((unsigned char)*src))
+			src++;
+		const char *key_end = src;
+		while (key_end > key_start &&
+		    isspace((unsigned char)key_end[-1]))
+			key_end--;
+		if (key_end == key_start) {
+			paxwarn(0, "Invalid option name");
+			return (-1);
 		}
-		dstr = NULL;	/* parts of string going onto the OPLIST */
-		*pt++ = '\0';
-		opt->name = frpt;
-		opt->value = pt;
+		if ((name = strndup(key_start, key_end - key_start)) == NULL)
+			return (-1);
+
+		while (isspace((unsigned char)*src))
+			src++;
+		assign = OPT_ASSIGN_NONE;
+		if (*src == ':' && src[1] == '=') {
+			assign = OPT_ASSIGN_COLON;
+			src += 2;
+		} else if (*src == '=') {
+			assign = OPT_ASSIGN_EQ;
+			src++;
+		} else if (*src == ':') {
+			assign = OPT_ASSIGN_EQ;
+			src++;
+		}
+		while (isspace((unsigned char)*src))
+			src++;
+
+		if (assign != OPT_ASSIGN_NONE) {
+			int consume_rest = (strcmp(name, "listopt") == 0);
+			value = opt_parse_value(&src, consume_rest);
+			if (value == NULL) {
+				free(name);
+				return (-1);
+			}
+			if (!consume_rest && *src == ',')
+				src++;
+		} else {
+			value = strdup("");
+			if (value == NULL) {
+				free(name);
+				return (-1);
+			}
+			if (*src == ',')
+				src++;
+		}
+		while (isspace((unsigned char)*src))
+			src++;
+
+		if ((opt = malloc(sizeof(*opt))) == NULL) {
+			free(name);
+			free(value);
+			return (-1);
+		}
+		opt->name = name;
+		opt->value = value;
+		opt->assign = assign;
+		opt->handled = 0;
 		opt->fow = NULL;
-		if (endpt != NULL)
-			frpt = endpt + 1;
-		else
-			frpt = NULL;
-		if (ophead == NULL) {
+		if (ophead == NULL)
 			optail = ophead = opt;
+		else {
+			optail->fow = opt;
+			optail = opt;
+		}
+		if (assign != OPT_ASSIGN_NONE && strcmp(name, "listopt") == 0)
+			break;
+	}
+	return 0;
+}
+
+static void
+opt_common(void)
+{
+	OPLIST **prev, *opt, *next;
+
+	prev = &ophead;
+	while ((opt = *prev) != NULL) {
+		next = opt->fow;
+		if (strcmp(opt->name, "listopt") == 0) {
+			if (listopt_append(opt->value) < 0) {
+				paxwarn(1, "Unable to record listopt format");
+				pax_usage();
+			}
+			*prev = next;
+			free(opt->name);
+			free(opt->value);
+			free(opt);
 			continue;
 		}
-		optail->fow = opt;
-		optail = opt;
+		prev = &opt->fow;
 	}
-	free(dstr);
-	return(0);
+
+	optail = ophead;
+	if (optail != NULL) {
+		while (optail->fow != NULL)
+			optail = optail->fow;
+	}
+	/* Global keyword state is per-invocation, reset before parsing formats. */
+	pax_option_reset_session();
 }
 
 /*
Index: bin/pax/pax.h
===================================================================
RCS file: /cvs/src/bin/pax/pax.h,v
diff -u -r1.29 pax.h
--- bin/pax/pax.h	12 Sep 2017 17:11:11 -0000	1.29
+++ bin/pax/pax.h	12 Oct 2025 07:54:08 -0000
@@ -98,6 +98,41 @@
  * may be required if and when the supporting operating system removes all
  * restrictions on the length of pathnames it will resolve.
  */
+/*
+ * Linked list entry used to retain pax extended header keywords.
+ */
+typedef struct paxkey {
+	char		*name;
+	char		*value;
+	struct paxkey	*next;
+} PAXKEY;
+
+/* Records command-line supplied pax keywords and their assignment style. */
+typedef struct paxopkv {
+	char		*name;
+	char		*value;
+	int		 assign;
+	struct paxopkv	*next;
+} PAXOPKV;
+
+/* Tracks delete= pattern expressions supplied via -o options. */
+typedef struct paxdel {
+	char		*pattern;
+	struct paxdel	*next;
+} PAXDEL;
+
+enum pax_invalid_action {
+	PAX_INVALID_BYPASS,
+	PAX_INVALID_WRITE,
+	PAX_INVALID_RENAME,
+	PAX_INVALID_UTF8,
+	PAX_INVALID_BINARY
+};
+
+#define	PAX_INVALID_NONE	0
+#define	PAX_INVALID_SKIP	1
+#define	PAX_INVALID_RENAME	2
+
 typedef struct {
 	int nlen;			/* file name length */
 	char name[PAXPATHLEN+1];	/* file name */
@@ -125,6 +160,9 @@
 #define PAX_CTG		10		/* high performance file */
 #define PAX_GLL		11		/* GNU long symlink */
 #define PAX_GLF		12		/* GNU long file */
+	PAXKEY *xattr;			/* file specific pax keywords */
+	const PAXKEY *gattr;		/* global pax keywords in effect */
+	int invalid;			/* invalid handling state */
 } ARCHD;
 
 #define PAX_IS_REG(type)	((type) == PAX_REG || (type) == PAX_CTG)
@@ -236,8 +274,14 @@
 typedef struct oplist {
 	char		*name;		/* option variable name e.g. name= */
 	char		*value;		/* value for option variable */
+	int		 assign;	/* assignment type: '=' or ':=' */
+	int		 handled;	/* option consumed by generic parser */
 	struct oplist	*fow;		/* next option */
 } OPLIST;
+
+#define OPT_ASSIGN_NONE	0
+#define OPT_ASSIGN_EQ	1
+#define OPT_ASSIGN_COLON	2
 
 /*
  * General Macros
Index: bin/pax/ar_subs.c
===================================================================
RCS file: /cvs/src/bin/pax/ar_subs.c,v
diff -u -r1.53 ar_subs.c
--- bin/pax/ar_subs.c	14 Jul 2024 14:32:02 -0000	1.53
+++ bin/pax/ar_subs.c	12 Oct 2025 07:54:16 -0000
@@ -92,6 +92,12 @@
 	 * step through the archive until the format says it is done
 	 */
 	while (next_head(arcn) == 0) {
+		/* Skip archive members rejected by invalid= policy. */
+		if (arcn->invalid == PAX_INVALID_SKIP) {
+			if (rd_skip(arcn->skip + arcn->pad) == 1)
+				break;
+			continue;
+		}
 		if (arcn->type == PAX_GLL || arcn->type == PAX_GLF) {
 			/*
 			 * we need to read, to get the real filename
@@ -244,6 +250,12 @@
 	 * says it is done
 	 */
 	while (next_head(arcn) == 0) {
+		/* Honor invalid=bypass by skipping unwanted members outright. */
+		if (arcn->invalid == PAX_INVALID_SKIP) {
+			if (rd_skip(arcn->skip + arcn->pad) == 1)
+				break;
+			continue;
+		}
 		if (arcn->type == PAX_GLL || arcn->type == PAX_GLF) {
 			/*
 			 * we need to read, to get the real filename
@@ -685,6 +697,12 @@
 	 * step through the archive until the format says it is done
 	 */
 	while (next_head(arcn) == 0) {
+		/* Entries flagged for bypass are consumed without further work. */
+		if (arcn->invalid == PAX_INVALID_SKIP) {
+			if (rd_skip(arcn->skip + arcn->pad) == 1)
+				break;
+			continue;
+		}
 		/*
 		 * check if this file meets user specified options.
 		 */
@@ -1024,6 +1042,14 @@
 	int in_resync = 0;		/* set when we are in resync mode */
 	int cnt = 0;			/* counter for trailer function */
 	int first = 1;			/* on 1st read, EOF isn't premature. */
+
+	/*
+	 * Clear out any per-file extended header state left from the
+	 * previous archive member before we reuse the structure.
+	 */
+	pax_kv_free(&arcn->xattr);
+	arcn->gattr = NULL;
+	arcn->invalid = PAX_INVALID_NONE;
 
 	/*
 	 * set up initial conditions, we want a whole frmt->hsz block as we
Index: bin/pax/extern.h
===================================================================
RCS file: /cvs/src/bin/pax/extern.h,v
diff -u -r1.64 extern.h
--- bin/pax/extern.h	17 Apr 2024 18:12:12 -0000	1.64
+++ bin/pax/extern.h	12 Oct 2025 07:54:24 -0000
@@ -152,11 +152,34 @@
 void ls_list(ARCHD *, time_t, FILE *);
 void ls_tty(ARCHD *);
 void safe_print(const char *, FILE *);
+int listopt_append(const char *);
+const char *listopt_get(void);
+void listopt_reset(void);
+void pax_option_set_linkdata(int);
+void pax_option_set_times(int);
+int pax_option_add_delete(const char *);
+int pax_option_set_invalid(const char *);
+int pax_option_set_exthdr_name(const char *);
+int pax_option_set_globexthdr_name(const char *);
+int pax_option_add_keyword(const char *, const char *, int);
+const PAXDEL *pax_option_delete(void);
+const PAXOPKV *pax_option_keywords(int);
+int pax_option_linkdata(void);
+int pax_option_times(void);
+const char *pax_option_exthdr_name(void);
+const char *pax_option_globexthdr_name(void);
+enum pax_invalid_action pax_option_invalid(void);
+void pax_option_reset_session(void);
 u_long asc_ul(char *, int, int);
+int pax_handle_invalid_path(ARCHD *, const char *, const char *);
+int pax_handle_invalid_link(ARCHD *, const char *, const char *);
+void pax_mark_skip(ARCHD *);
 int ul_asc(u_long, char *, int, int);
 unsigned long long asc_ull(char *, int, int);
 int ull_asc(unsigned long long, char *, int, int);
 size_t fieldcpy(char *, size_t, const char *, size_t);
+void pax_kv_free(PAXKEY **);
+const char *pax_kv_lookup(const ARCHD *, const char *);
 
 /*
  * getoldopt.c
Index: bin/pax/pax.1
===================================================================
RCS file: /cvs/src/bin/pax/pax.1,v
diff -u -r1.80 pax.1
--- bin/pax/pax.1	30 Nov 2024 06:59:12 -0000	1.80
+++ bin/pax/pax.1	12 Oct 2025 07:54:32 -0000
@@ -453,8 +453,13 @@
 .Fl x .
 In general,
 .Ar options
-take the form:
-.Ar name Ns = Ns Ar value .
+take the form
+.Ar name Ns = Ns Ar value
+or
+.Ar name Ns := Ns Ar value .
+Multiple keywords can be separated with commas.
+Backslash can be used to escape a literal comma or backslash inside a value.
+When the same keyword appears more than once, the last value wins.
 .Pp
 The following options are available for the
 .Cm ustar
@@ -467,6 +472,74 @@
 .It Cm write_opt=nodir
 When writing archives, omit the storage of directories.
 .El
+.Pp
+When the selected archive format is
+.Cm pax ,
+the following keywords are also understood:
+.Bl -tag -width "globexthdr.name=string"
+.It Cm delete Ns = Ns Ar pattern
+Suppress extended header keywords whose name matches
+.Ar pattern .
+.It Cm exthdr.name Ns = Ns Ar string
+Replace the default name used to store per-file extended attributes.
+The template may include
+.Ql %d
+(directory portion of the path),
+.Ql %f
+(final pathname component),
+.Ql %p
+(process ID), and
+.Ql %%
+for a literal percent sign.
+.It Cm globexthdr.name Ns = Ns Ar string
+Select the name written for
+.Ql typeflag g
+global extended headers.
+The template may include
+.Ql %n
+(the sequence number starting at 1),
+.Ql %p ,
+and
+.Ql %% .
+.It Cm invalid Ns = Ns Ar action
+Choose how pathnames that cannot be represented locally are handled when
+reading archives.
+The
+.Ar action
+may be one of
+.Cm bypass ,
+.Cm write ,
+.Cm rename ,
+.Cm UTF-8 ,
+or
+.Cm binary .
+.It Cm linkdata
+Always store the file data for hard links instead of sharing the contents
+with another archive member.
+.It Cm listopt Ns = Ns Ar format
+Customise the verbose listing produced in list mode with
+.Fl v .
+See
+.Sx List Output Formatting
+for the supported conversions.
+Multiple
+.Fl o Cm listopt
+options are concatenated in order.
+.It Cm times
+Force the emission of
+.Ql atime
+and
+.Ql mtime
+extended header records for each file when writing or copying.
+.It Ar keyword Ns = Ns Ar value
+Write
+.Ar keyword
+as a global extended header record when archiving, or override the value
+read from the archive when extracting.
+.It Ar keyword Ns := Ns Ar value
+Write a per-file extended header record with the specified value, or override
+the value for the current file when reading.
+.El
 .It Fl P
 Do not follow symbolic links, perform a physical file system traversal.
 This is the default mode.
@@ -789,6 +862,63 @@
 archive member.
 The trailing newline
 is not buffered and is written only after the file has been read or written.
+.Ss List Output Formatting
+The
+.Fl o Cm listopt Ns = Ns Ar format
+keyword alters the verbose list produced with
+.Fl v
+in list mode.
+The format string follows the rules of
+.Xr printf 3
+with the addition that conversions may be prefixed by
+.Pq Em keyword
+to select the field that supplies the argument.
+Unless noted below the standard flags, field width and precision are honoured.
+Keywords refer to archive header fields, including any values supplied by pax
+extended headers or by explicit
+.Fl o
+options.
+.Pp
+The following conversions extend the behaviour of
+.Xr printf 3 :
+.Bl -tag -width Ds
+.It %s , %c , %d , %i , %o , %u , %x , %X
+Use the selected keyword (defaulting to
+.Ql path )
+and print it with the requested conversion.
+.It %T
+Formats a time value.
+If the keyword is omitted
+.Ql mtime
+is used.
+A keyword of the form
+.Ql keyword=subformat
+selects the time field and the
+.Xr strftime 3
+format string to apply (default:
+.Ql %b %e %H:%M %Y ) .
+.It %M
+Prints the symbolic file mode string as produced by
+.Fn strmode
+.It %D
+Prints the device numbers for block or character special files
+as two comma-separated integers.
+If a keyword is supplied the numeric value associated with that keyword
+is printed instead.
+.It %F
+Prints a pathname composed from a comma-separated list of keywords.
+Missing components are skipped.
+When no list is supplied the stored pathname is used.
+.It %L
+Prints a symbolic link in the form
+.Dq path -> target .
+Non-links fall back to
+.Ql %F .
+.El
+.Pp
+Multiple
+.Fl o Cm listopt
+options append to the overall format string in the order they appear.
 .It Fl w
 Write files to the standard output
 in the specified archive format.
@@ -1080,14 +1210,8 @@
 The
 .Nm
 utility is compliant with the
-.St -p1003.1-2008
-specification,
-except that the
-.Cm pax
-archive format is only partially supported,
-and the
-.Cm listopt
-keyword is unsupported.
+.St -p1003.1-2024 ,
+specification.
 .Pp
 The flags
 .Op Fl 0BDEGjOPTUYZz ,