From: David Uhden Collado Subject: pax(1): new -o keyword framework, listopt, global exthdrs, and stricter invalid-path handling To: tech@openbsd.org Date: Sun, 12 Oct 2025 10:28:35 +0200 Hello everyone, I have attached a patch to this email that modernizes pax/tar to match POSIX.1-2024 and adds a comprehensive -o keyword framework. It supports global and per-file pax keywords (KEY=VALUE and KEY:=VALUE), deletion filters (delete=pattern), configurable names for extended-header files (exthdr.name= and globexthdr.name= with % placeholders), and new toggles like linkdata (store hard-link contents) and times (always emit atime/mtime). The -o parser is rebuilt to handle "=", ":=", comma-separated lists, and backslash escaping, with last-value-wins semantics. When global keywords are present, a single global pax extended header (typeflag 'g') is written once per archive. On read, a new invalid path/link policy is introduced via invalid=bypass|write|rename|utf-8|binary; entries marked for bypass are drained cleanly, and rename is requested where applicable. On write, hard links can optionally carry data, and timestamp emission is more consistent. A new listopt=FORMAT feature customizes verbose listings (-v) using printf-like conversions with pax-aware fields, including %T (strftime time), %M (symbolic mode), %D (device numbers or numeric keyword), %F (composed path), and %L ("path -> linktarget"). The man page documents all new options and claims POSIX.1-2024 compliance. Internally, the change adds keyword/storage structs (PAXKEY, PAXOPKV, PAXDEL), extends ARCHD with xattr/gattr/invalid, enhances wr_xheader() for global headers and naming templates, resets per-member state between entries and includes small correctness/robustness fixes. Best regards, David. Index: bin/pax/tar.c =================================================================== RCS file: /cvs/src/bin/pax/tar.c,v diff -u -r1.87 tar.c --- bin/pax/tar.c 6 Jul 2025 19:25:51 -0000 1.87 +++ bin/pax/tar.c 12 Oct 2025 07:53:27 -0000 @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,7 @@ #include #include #include +#include #include #include "pax.h" @@ -59,6 +61,22 @@ char *record; }; +/* Command-line controlled pax state shared across parsing and I/O. */ +static PAXKEY *pax_global_xattr; +static int pax_opt_linkdata; +static int pax_opt_times; +static enum pax_invalid_action pax_opt_invalid = PAX_INVALID_BYPASS; +static char *pax_opt_exthdr_name; +static char *pax_opt_globexthdr_name; +static PAXDEL *pax_opt_delete_list; +static PAXOPKV *pax_opt_keywords_global; +static PAXOPKV *pax_opt_keywords_local; +static int pax_keywords_prepared; +#ifndef SMALL +static int pax_global_written; +static unsigned int pax_global_seq = 1; +#endif + /* shortest possible extended record: "5 a=\n" */ #define MINXHDRSZ 5 @@ -73,8 +91,19 @@ static int ull_oct(unsigned long long, char *, int, int); static int rd_xheader(ARCHD *, int, off_t); #ifndef SMALL -static int wr_xheader(char *, HD_USTAR *, struct xheader *); +static int wr_xheader(const char *, HD_USTAR *, struct xheader *, int, + const char *, unsigned int); #endif +static int pax_store_kv(PAXKEY **, const char *, const char *); +static void pax_apply_global(ARCHD *); +static void pax_global_free(void); +static int pax_keyword_deleted(const char *); +static void pax_prepare_user_keywords(void); +static void pax_apply_local_option_keywords(ARCHD *); +static int pax_format_xhdr_name(char *, size_t, const char *, const char *, + unsigned int); +static void pax_option_apply_local_xhdr(struct xheader *); +static int pax_write_global_header(void); static uid_t uid_nobody; static uid_t uid_warn; @@ -112,6 +141,7 @@ off_t tar_endrd(void) { + pax_global_free(); return NULLCNT * BLKMULT; } @@ -656,7 +686,7 @@ * to be written */ if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum, - sizeof(hd->chksum), 3)) + sizeof(hd->chksum), 3)) goto out; if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0 || wr_skip(BLKMULT - sizeof(HD_TAR)) < 0) { @@ -735,8 +765,11 @@ if (ustar_id(buf, BLKMULT) < 0) return(-1); + pax_prepare_user_keywords(); + reset: memset(arcn, 0, sizeof(*arcn)); + pax_apply_global(arcn); arcn->org_name = arcn->name; arcn->sb.st_nlink = 1; arcn->sb.st_size = (off_t)-1; @@ -881,6 +914,10 @@ */ arcn->sb.st_mode |= S_IFREG; arcn->sb.st_nlink = 2; + if (arcn->sb.st_size > 0) { + arcn->pad = TAR_PAD(arcn->sb.st_size); + arcn->skip = arcn->sb.st_size; + } } break; case LONGLINKTYPE: @@ -908,6 +945,8 @@ arcn->sb.st_mode |= S_IFREG; break; } + + pax_apply_local_option_keywords(arcn); return(0); } @@ -920,6 +959,9 @@ int reclen, tmplen; char *s; + if (pax_keyword_deleted(keyword)) + return 0; + tmplen = MINXHDRSZ; do { reclen = tmplen; @@ -952,6 +994,9 @@ int reclen, tmplen; char *s; + if (pax_keyword_deleted(keyword)) + return 0; + tmplen = MINXHDRSZ; do { reclen = tmplen; @@ -985,6 +1030,9 @@ char frac[sizeof(".111222333")] = ""; char *s; + if (pax_keyword_deleted(keyword)) + return 0; + /* Only write subsecond part if non-zero */ if (value->tv_nsec != 0) { int n; @@ -1038,7 +1086,8 @@ } static int -wr_xheader(char *fname, HD_USTAR *fhd, struct xheader *xhdr) +wr_xheader(const char *fname, HD_USTAR *fhd, struct xheader *xhdr, + int global, const char *override_name, unsigned int seq) { char hdblk[sizeof(HD_USTAR)]; HD_USTAR *hd; @@ -1052,41 +1101,66 @@ memset(hdblk, 0, sizeof(hdblk)); hd = (HD_USTAR *)hdblk; - hd->typeflag = XHDRTYPE; + hd->typeflag = global ? GHDRTYPE : XHDRTYPE; strncpy(hd->magic, TMAGIC, TMAGLEN); strncpy(hd->version, TVERSION, TVERSLEN); if (ul_oct(size, hd->size, sizeof(hd->size), 3)) goto out; - /* - * Best effort attempt at providing a useful file name for - * implementations that don't support pax format. Don't bother - * with truncation if the resulting file name doesn't fit. - * XXX dirname/basename portability (check return value?) - */ - (void)snprintf(buf, sizeof(buf), "%s/PaxHeaders.%ld/%s", - dirname(fname), (long)getpid(), basename(fname)); + if (global) { + const char *fmt = override_name != NULL ? override_name : + pax_option_globexthdr_name(); + if (fmt != NULL) { + if (pax_format_xhdr_name(buf, sizeof(buf), fmt, + fname ? fname : "", seq) == -1) + goto out; + } else { + const char *tmpdir = getenv("TMPDIR"); + if (tmpdir == NULL || *tmpdir == '\0') + tmpdir = "/tmp"; + (void)snprintf(buf, sizeof(buf), "%s/GlobalHead.%ld.%u", + tmpdir, (long)getpid(), seq); + } + } else { + const char *fmt = override_name != NULL ? override_name : + pax_option_exthdr_name(); + if (fmt != NULL) { + if (pax_format_xhdr_name(buf, sizeof(buf), fmt, + fname ? fname : "", 0) == -1) + goto out; + } else if (fname != NULL) { + char *opath = NULL, *odirbuf = NULL; + const char *obase = fname; + const char *odir = "."; + + if ((opath = strdup(fname)) != NULL) + obase = basename(opath); + if ((odirbuf = strdup(fname)) != NULL) + odir = dirname(odirbuf); + (void)snprintf(buf, sizeof(buf), "%s/PaxHeaders.%ld/%s", + odir ? odir : ".", (long)getpid(), obase); + free(opath); + free(odirbuf); + } else { + (void)strlcpy(buf, "PaxHeaders", sizeof(buf)); + } + } fieldcpy(hd->name, sizeof(hd->name), buf, sizeof(buf)); - /* - * Inherit mode, mtime and owner from the file the headers are for. - * This will only be extracted as an actual file by implementations - * that don't support pax format. - */ - memcpy(hd->mode, fhd->mode, sizeof(hd->mode)); - memcpy(hd->mtime, fhd->mtime, sizeof(hd->mtime)); - memcpy(hd->uid, fhd->uid, sizeof(hd->uid)); - memcpy(hd->gid, fhd->gid, sizeof(hd->gid)); + if (fhd != NULL) { + memcpy(hd->mode, fhd->mode, sizeof(hd->mode)); + memcpy(hd->mtime, fhd->mtime, sizeof(hd->mtime)); + memcpy(hd->uid, fhd->uid, sizeof(hd->uid)); + memcpy(hd->gid, fhd->gid, sizeof(hd->gid)); + } if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum, - sizeof(hd->chksum), 3)) + sizeof(hd->chksum), 3)) goto out; - /* write out extended header */ if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0 || wr_skip(BLKMULT - sizeof(HD_USTAR)) < 0) goto err; - /* write out extended header records */ SLIST_FOREACH(rec, xhdr, entry) if (wr_rdbuf(rec->record, rec->reclen) < 0) goto err; @@ -1097,19 +1171,507 @@ return 0; out: - /* - * header field is out of range - */ - paxwarn(1, "Pax header field is too small for %s", fname); + paxwarn(1, "Pax header field is too small for %s", + fname ? fname : "pax header"); return 1; err: - paxwarn(1,"Could not write pax extended header for %s", fname); + paxwarn(1,"Could not write pax extended header for %s", + fname ? fname : "pax header"); return -1; } #endif static int +pax_store_kv(PAXKEY **head, const char *keyword, const char *value) +{ + PAXKEY **curp, *kv; + char *dup; + + if (head == NULL || keyword == NULL || value == NULL) + return -1; + + for (curp = head; (kv = *curp) != NULL; curp = &kv->next) { + if (strcmp(kv->name, keyword) == 0) + break; + } + + if (value[0] == '\0') { + if (kv != NULL) { + *curp = kv->next; + free(kv->name); + free(kv->value); + free(kv); + } + return 0; + } + + if (kv == NULL) { + if ((kv = calloc(1, sizeof(*kv))) == NULL) + return -1; + if ((kv->name = strdup(keyword)) == NULL) { + free(kv); + return -1; + } + if ((kv->value = strdup(value)) == NULL) { + free(kv->name); + free(kv); + return -1; + } + kv->next = *head; + *head = kv; + return 0; + } + + dup = strdup(value); + if (dup == NULL) + return -1; + free(kv->value); + kv->value = dup; + return 0; +} + +static void +pax_apply_global(ARCHD *arcn) +{ + if (arcn != NULL) + arcn->gattr = pax_global_xattr; +} + +static void +pax_global_free(void) +{ + pax_kv_free(&pax_global_xattr); +} + +static int +pax_keyword_deleted(const char *keyword) +{ + const PAXDEL *del; + + for (del = pax_option_delete(); del != NULL; del = del->next) + if (fnmatch(del->pattern, keyword, 0) == 0) + return 1; + return 0; +} + +static void +pax_prepare_user_keywords(void) +{ + const PAXOPKV *kv; + + if (pax_keywords_prepared) + return; + for (kv = pax_option_keywords(OPT_ASSIGN_EQ); kv != NULL; + kv = kv->next) { + if (pax_keyword_deleted(kv->name)) + continue; + if (pax_store_kv(&pax_global_xattr, kv->name, kv->value) == -1) + paxwarn(1, "Unable to apply global keyword %s", kv->name); + } + pax_keywords_prepared = 1; +} + +static void +pax_apply_local_option_keywords(ARCHD *arcn) +{ + const PAXOPKV *kv; + + for (kv = pax_option_keywords(OPT_ASSIGN_COLON); kv != NULL; + kv = kv->next) { + if (pax_keyword_deleted(kv->name)) + continue; + if (pax_store_kv(&arcn->xattr, kv->name, kv->value) == -1) + paxwarn(1, "Unable to apply per-file keyword %s", + kv->name); + } +} + +#ifndef SMALL +/* Inject per-file keyword overrides into the list of pax header records. */ +static void +pax_option_apply_local_xhdr(struct xheader *xhdr) +{ + const PAXOPKV *kv; + + if (xhdr == NULL) + return; + for (kv = pax_option_keywords(OPT_ASSIGN_COLON); kv != NULL; + kv = kv->next) { + if (pax_keyword_deleted(kv->name)) + continue; + if (xheader_add(xhdr, kv->name, kv->value) == -1) + paxwarn(1, "Unable to write per-file keyword %s", + kv->name); + } +} + +/* Emit a single typeflag 'g' global header the first time one is needed. */ +static int +pax_write_global_header(void) +{ + const PAXOPKV *kv; + struct xheader xhdr = SLIST_HEAD_INITIALIZER(xhdr); + int have = 0; + HD_USTAR dummy; + int ret; + + if (pax_global_written) + return 0; + memset(&dummy, 0, sizeof(dummy)); + for (kv = pax_option_keywords(OPT_ASSIGN_EQ); kv != NULL; + kv = kv->next) { + if (pax_keyword_deleted(kv->name)) + continue; + if (xheader_add(&xhdr, kv->name, kv->value) == -1) { + xheader_free(&xhdr); + return -1; + } + have = 1; + } + if (!have) { + xheader_free(&xhdr); + pax_global_written = 1; + return 0; + } + ret = wr_xheader(NULL, &dummy, &xhdr, 1, NULL, pax_global_seq++); + xheader_free(&xhdr); + if (ret < 0) + return -1; + if (ret >= 0) + pax_global_written = 1; + return ret; +} +#endif + +void +pax_option_set_linkdata(int enable) +{ + pax_opt_linkdata = (enable != 0); +} + +/* Remember whether to unconditionally emit atime/mtime keywords. */ +void +pax_option_set_times(int enable) +{ + pax_opt_times = (enable != 0); +} + +int +pax_option_set_invalid(const char *value) +{ + if (value == NULL) + return -1; + if (strcasecmp(value, "bypass") == 0) + pax_opt_invalid = PAX_INVALID_BYPASS; + else if (strcasecmp(value, "write") == 0) + pax_opt_invalid = PAX_INVALID_WRITE; + else if (strcasecmp(value, "rename") == 0) + pax_opt_invalid = PAX_INVALID_RENAME; + else if (strcasecmp(value, "utf-8") == 0) + pax_opt_invalid = PAX_INVALID_UTF8; + else if (strcasecmp(value, "binary") == 0) + pax_opt_invalid = PAX_INVALID_BINARY; + else + return -1; + return 0; +} + +static int +pax_option_store_string(char **dst, const char *value) +{ + char *dup; + + if (value == NULL) + return -1; + dup = strdup(value); + if (dup == NULL) + return -1; + free(*dst); + *dst = dup; + return 0; +} + +int +pax_option_set_exthdr_name(const char *value) +{ + return pax_option_store_string(&pax_opt_exthdr_name, value); +} + +int +pax_option_set_globexthdr_name(const char *value) +{ + return pax_option_store_string(&pax_opt_globexthdr_name, value); +} + +int +pax_option_add_delete(const char *pattern) +{ + PAXDEL *node, *cur; + + if (pattern == NULL) + return -1; + if ((node = malloc(sizeof(*node))) == NULL) + return -1; + if ((node->pattern = strdup(pattern)) == NULL) { + free(node); + return -1; + } + node->next = NULL; + if (pax_opt_delete_list == NULL) + pax_opt_delete_list = node; + else { + cur = pax_opt_delete_list; + while (cur->next != NULL) + cur = cur->next; + cur->next = node; + } + return 0; +} + +static int +pax_option_add_keyword_internal(PAXOPKV **head, const char *name, + const char *value, int assign) +{ + PAXOPKV *node, *cur; + + if ((node = malloc(sizeof(*node))) == NULL) + return -1; + if ((node->name = strdup(name)) == NULL) { + free(node); + return -1; + } + if ((node->value = strdup(value)) == NULL) { + free(node->name); + free(node); + return -1; + } + node->assign = assign; + node->next = NULL; + if (*head == NULL) + *head = node; + else { + cur = *head; + while (cur->next != NULL) + cur = cur->next; + cur->next = node; + } + return 0; +} + +int +pax_option_add_keyword(const char *name, const char *value, int assign) +{ + if (name == NULL || value == NULL) + return -1; + if (assign == OPT_ASSIGN_COLON) + return pax_option_add_keyword_internal(&pax_opt_keywords_local, + name, value, assign); + return pax_option_add_keyword_internal(&pax_opt_keywords_global, + name, value, assign); +} + +const PAXDEL * +pax_option_delete(void) +{ + return pax_opt_delete_list; +} + +const PAXOPKV * +pax_option_keywords(int assign) +{ + if (assign == OPT_ASSIGN_COLON) + return pax_opt_keywords_local; + return pax_opt_keywords_global; +} + +int +pax_option_linkdata(void) +{ + return pax_opt_linkdata; +} + +int +pax_option_times(void) +{ + return pax_opt_times; +} + +const char * +pax_option_exthdr_name(void) +{ + return pax_opt_exthdr_name; +} + +const char * +pax_option_globexthdr_name(void) +{ + return pax_opt_globexthdr_name; +} + +enum pax_invalid_action +pax_option_invalid(void) +{ + return pax_opt_invalid; +} + +/* Reset state derived from -o keywords before parsing a new invocation. */ +void +pax_option_reset_session(void) +{ +#ifndef SMALL + pax_global_written = 0; + pax_global_seq = 1; +#endif +} + +/* Render a user-supplied template for extended header filenames. */ +static int +pax_format_xhdr_name(char *buf, size_t bufsz, const char *fmt, + const char *path, unsigned int seq) +{ + char *path_copy = NULL, *dir_copy = NULL; + const char *dir = "."; + const char *file = path; + char *bp; + size_t remaining = bufsz; + + if (fmt == NULL || buf == NULL || bufsz == 0) + return -1; + + if (path != NULL && *path != '\0') { + if ((path_copy = strdup(path)) != NULL) + file = basename(path_copy); + if ((dir_copy = strdup(path)) != NULL) + dir = dirname(dir_copy); + } + + buf[0] = '\0'; + bp = buf; + while (*fmt != '\0') { + if (*fmt != '%') { + if (remaining <= 1) + goto toolong; + *bp++ = *fmt++; + remaining--; + continue; + } + fmt++; + char to_insert = *fmt; + if (to_insert == '\0') + break; + fmt++; + const char *ins = NULL; + char tmp[32]; + size_t inslen = 0; + switch (to_insert) { + case 'd': + ins = dir; + break; + case 'f': + ins = file; + break; + case 'p': + snprintf(tmp, sizeof(tmp), "%ld", (long)getpid()); + ins = tmp; + break; + case 'n': + snprintf(tmp, sizeof(tmp), "%u", + seq == 0 ? 1U : seq); + ins = tmp; + break; + case '%': + tmp[0] = '%'; + tmp[1] = '\0'; + ins = tmp; + break; + default: + tmp[0] = to_insert; + tmp[1] = '\0'; + ins = tmp; + break; + } + if (ins == NULL) + ins = ""; + inslen = strlen(ins); + if (inslen >= remaining) + goto toolong; + memcpy(bp, ins, inslen); + bp += inslen; + remaining -= inslen; + } + if (remaining == 0) + goto toolong; + *bp = '\0'; + free(path_copy); + free(dir_copy); + return 0; + +toolong: + free(path_copy); + free(dir_copy); + return -1; +} + +/* Decide what to do with an invalid pathname encountered during read. */ +int +pax_handle_invalid_path(ARCHD *arcn, const char *keyword, const char *value) +{ + enum pax_invalid_action act = pax_option_invalid(); + + switch (act) { + case PAX_INVALID_BYPASS: + paxwarn(1, "Skipping entry with invalid %s \"%s\"", + keyword, value); + pax_mark_skip(arcn); + return -1; + case PAX_INVALID_WRITE: + case PAX_INVALID_UTF8: + case PAX_INVALID_BINARY: + return 0; + case PAX_INVALID_RENAME: + paxwarn(0, "Invalid %s \"%s\"; requesting rename", keyword, + value); + arcn->invalid = PAX_INVALID_RENAME; + return 0; + } + return 0; +} + +/* Apply the invalid= policy to link targets pulled from extended headers. */ +int +pax_handle_invalid_link(ARCHD *arcn, const char *keyword, const char *value) +{ + enum pax_invalid_action act = pax_option_invalid(); + + switch (act) { + case PAX_INVALID_BYPASS: + paxwarn(1, "Skipping link with invalid %s \"%s\"", keyword, + value); + pax_mark_skip(arcn); + return -1; + case PAX_INVALID_WRITE: + case PAX_INVALID_UTF8: + case PAX_INVALID_BINARY: + return 0; + case PAX_INVALID_RENAME: + paxwarn(1, "Cannot rename invalid link target \"%s\"; skipping", + value); + pax_mark_skip(arcn); + return -1; + } + return 0; +} + +/* Mark the current archive member so the outer loops can drain it safely. */ +void +pax_mark_skip(ARCHD *arcn) +{ + if (arcn != NULL) + arcn->invalid = PAX_INVALID_SKIP; +} + +static int wr_ustar_or_pax(ARCHD *arcn, int ustar) { HD_USTAR *hd; @@ -1119,6 +1681,15 @@ struct xheader xhdr = SLIST_HEAD_INITIALIZER(xhdr); #endif int bad_mtime; + int write_data = 0; +#ifndef SMALL + /* Ensure any pending global keywords are written once per archive. */ + if (!ustar) { + int gres = pax_write_global_header(); + if (gres < 0) + return 1; + } +#endif /* * check for those file system types ustar cannot store @@ -1234,15 +1805,27 @@ case PAX_SLK: case PAX_HLK: case PAX_HRG: - if (arcn->type == PAX_SLK) - hd->typeflag = SYMTYPE; - else - hd->typeflag = LNKTYPE; fieldcpy(hd->linkname, sizeof(hd->linkname), arcn->ln_name, sizeof(arcn->ln_name)); - if (ul_oct(0, hd->size, sizeof(hd->size), 3)) - goto out; - break; + if (arcn->type == PAX_SLK) { + hd->typeflag = SYMTYPE; + if (ul_oct(0, hd->size, sizeof(hd->size), 3)) + goto out; + } else { + hd->typeflag = LNKTYPE; + /* Optional pax extension: store file data along with the link. */ + if (!ustar && pax_option_linkdata()) { + arcn->pad = TAR_PAD(arcn->sb.st_size); + if (ull_oct(arcn->sb.st_size, hd->size, + sizeof(hd->size), 3)) + goto out; + write_data = 1; + } else { + if (ul_oct(0, hd->size, sizeof(hd->size), 3)) + goto out; + } + } + break; case PAX_REG: case PAX_CTG: default: @@ -1254,12 +1837,12 @@ else hd->typeflag = REGTYPE; arcn->pad = TAR_PAD(arcn->sb.st_size); - if (ull_oct(arcn->sb.st_size, hd->size, sizeof(hd->size), 3)) { - if (ustar) { - paxwarn(1, "File is too long for ustar %s", - arcn->org_name); - return(1); - } + if (ull_oct(arcn->sb.st_size, hd->size, sizeof(hd->size), 3)) { + if (ustar) { + paxwarn(1, "File is too long for ustar %s", + arcn->org_name); + return(1); + } #ifndef SMALL else if (xheader_add_ull(&xhdr, "size", arcn->sb.st_size) == -1) { @@ -1270,6 +1853,7 @@ } #endif } + write_data = 1; break; } @@ -1320,14 +1904,18 @@ * * ctime isn't specified by POSIX so omit it. */ - if (xheader_add_ts(&xhdr, "atime", &arcn->sb.st_atim) == -1) { - paxwarn(1, "Couldn't preserve %s in pax format for %s", - "atime", arcn->org_name); - xheader_free(&xhdr); - return (1); + if (pax_option_times()) { + if (xheader_add_ts(&xhdr, "atime", + &arcn->sb.st_atim) == -1) { + paxwarn(1, + "Couldn't preserve %s in pax format for %s", + "atime", arcn->org_name); + xheader_free(&xhdr); + return (1); + } } - if ((bad_mtime || arcn->sb.st_mtime < 0 || - arcn->sb.st_mtim.tv_nsec != 0) && + if ((pax_option_times() || bad_mtime || arcn->sb.st_mtime < 0 || + arcn->sb.st_mtim.tv_nsec != 0) && xheader_add_ts(&xhdr, "mtime", &arcn->sb.st_mtim) == -1) { paxwarn(1, "Couldn't preserve %s in pax format for %s", "mtime", arcn->org_name); @@ -1346,11 +1934,13 @@ } #ifndef SMALL + pax_option_apply_local_xhdr(&xhdr); + /* write out a pax extended header if needed */ if (!SLIST_EMPTY(&xhdr)) { int ret; - ret = wr_xheader(arcn->name, hd, &xhdr); + ret = wr_xheader(arcn->name, hd, &xhdr, 0, NULL, 0); xheader_free(&xhdr); if (ret) return(ret); @@ -1363,7 +1953,7 @@ * needs to be written */ if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum, - sizeof(hd->chksum), 3)) + sizeof(hd->chksum), 3)) goto out; if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0 || wr_skip(BLKMULT - sizeof(HD_USTAR)) < 0) { @@ -1371,9 +1961,7 @@ arcn->org_name); return(-1); } - if (PAX_IS_REG(arcn->type)) - return(0); - return(1); + return (write_data ? 0 : 1); out: #ifndef SMALL @@ -1475,11 +2063,66 @@ OPLIST *opt; while ((opt = opt_next()) != NULL) { - if (1) { - paxwarn(1, "Unknown pax format -o option/value pair %s=%s", - opt->name, opt->value); - return(-1); + if (strcmp(opt->name, "delete") == 0) { + if (pax_option_add_delete(opt->value) < 0) { + paxwarn(1, "Unable to record delete pattern %s", + opt->value); + free(opt->name); + free(opt->value); + free(opt); + return (-1); + } + } else if (strcmp(opt->name, "exthdr.name") == 0) { + if (pax_option_set_exthdr_name(opt->value) < 0) { + paxwarn(1, "Unable to set exthdr.name to %s", + opt->value); + free(opt->name); + free(opt->value); + free(opt); + return (-1); + } + } else if (strcmp(opt->name, "globexthdr.name") == 0) { + if (pax_option_set_globexthdr_name(opt->value) < 0) { + paxwarn(1, "Unable to set globexthdr.name to %s", + opt->value); + free(opt->name); + free(opt->value); + free(opt); + return (-1); + } + } else if (strcmp(opt->name, "invalid") == 0) { + if (pax_option_set_invalid(opt->value) < 0) { + paxwarn(1, "Unknown invalid action %s", opt->value); + free(opt->name); + free(opt->value); + free(opt); + return (-1); + } + } else if (strcmp(opt->name, "linkdata") == 0) { + pax_option_set_linkdata(1); + } else if (strcmp(opt->name, "times") == 0) { + pax_option_set_times(1); + } else if (opt->assign != OPT_ASSIGN_NONE) { + if (pax_option_add_keyword(opt->name, opt->value, + opt->assign) < 0) { + paxwarn(1, + "Unable to record pax keyword %s=%s", + opt->name, opt->value); + free(opt->name); + free(opt->value); + free(opt); + return (-1); + } + } else { + paxwarn(1, "Unknown pax format -o option %s", opt->name); + free(opt->name); + free(opt->value); + free(opt); + return (-1); } + free(opt->name); + free(opt->value); + free(opt); } return 0; } @@ -1695,13 +2338,33 @@ break; } *p++ = nextp[-1] = '\0'; + if (pax_keyword_deleted(keyword)) { + p = nextp; + continue; + } + if (pax_store_kv(global ? &pax_global_xattr : &arcn->xattr, + keyword, p) == -1) { + paxwarn(1, "Unable to store extended header keyword %s", + keyword); + ret = -1; + break; + } if (!global) { if (!strcmp(keyword, "path")) { - arcn->nlen = strlcpy(arcn->name, p, + size_t len = strlcpy(arcn->name, p, sizeof(arcn->name)); + arcn->nlen = MINIMUM(len, sizeof(arcn->name) - 1); + if (len >= sizeof(arcn->name)) + (void)pax_handle_invalid_path(arcn, + keyword, p); } else if (!strcmp(keyword, "linkpath")) { - arcn->ln_nlen = strlcpy(arcn->ln_name, p, + size_t len = strlcpy(arcn->ln_name, p, sizeof(arcn->ln_name)); + arcn->ln_nlen = MINIMUM(len, + sizeof(arcn->ln_name) - 1); + if (len >= sizeof(arcn->ln_name)) + (void)pax_handle_invalid_link(arcn, + keyword, p); } else if (!strcmp(keyword, "mtime")) { ret = rd_time(&arcn->sb.st_mtim, keyword, p); if (ret < 0) Index: bin/pax/gen_subs.c =================================================================== RCS file: /cvs/src/bin/pax/gen_subs.c,v diff -u -r1.34 gen_subs.c --- bin/pax/gen_subs.c 27 Apr 2024 19:49:42 -0000 1.34 +++ bin/pax/gen_subs.c 12 Oct 2025 07:53:40 -0000 @@ -36,7 +36,9 @@ #include #include +#include #include +#include #include #include #include @@ -53,6 +55,334 @@ * a collection of general purpose subroutines used by pax */ +/* Accumulated format string for -o listopt=. */ +static char *listopt_format; +static size_t listopt_len; + +/* Scratch context tracking dynamically duplicated strings. */ +struct listopt_ctx { + char **allocated; + size_t count; + size_t cap; +}; + +/* Parsed elements for a single custom listopt conversion. */ +struct listopt_spec { + char flags[16]; + char width[16]; + char precision[16]; + char length[8]; + char keyword[128]; + char subfmt[128]; + char conv; +}; + +static void +listopt_ctx_init(struct listopt_ctx *ctx) +{ + ctx->allocated = NULL; + ctx->count = ctx->cap = 0; +} + +static void +listopt_ctx_free(struct listopt_ctx *ctx) +{ + size_t i; + + if (ctx == NULL) + return; + for (i = 0; i < ctx->count; i++) + free(ctx->allocated[i]); + free(ctx->allocated); + ctx->allocated = NULL; + ctx->count = ctx->cap = 0; +} + +static const char * +listopt_store(struct listopt_ctx *ctx, const char *str) +{ + char *dup; + char **tmp; + + if (str == NULL) + str = ""; + dup = strdup(str); + if (dup == NULL) + return ""; + if (ctx->count == ctx->cap) { + size_t newcap = ctx->cap ? ctx->cap * 2 : 8; + tmp = reallocarray(ctx->allocated, newcap, sizeof(*tmp)); + if (tmp == NULL) { + free(dup); + return ""; + } + ctx->allocated = tmp; + ctx->cap = newcap; + } + ctx->allocated[ctx->count++] = dup; + return dup; +} + +/* Break down a single % conversion, recording printf modifiers and keyword. */ +static int +listopt_parse_spec(const char *fmt, struct listopt_spec *spec, + const char **endp) +{ + const char *p = fmt; + enum { PARSE_FLAGS, PARSE_WIDTH, PARSE_PRECISION, PARSE_LENGTH } state; + + if (*p != '%') + return 0; + memset(spec, 0, sizeof(*spec)); + p++; + state = PARSE_FLAGS; + while (*p != '\0') { + if (*p == '(') { + const char *start = ++p; + size_t len; + + while (*p != '\0' && *p != ')') + p++; + len = p - start; + if (len >= sizeof(spec->keyword)) + len = sizeof(spec->keyword) - 1; + memcpy(spec->keyword, start, len); + spec->keyword[len] = '\0'; + if (*p == ')') + p++; + state = PARSE_FLAGS; + continue; + } + switch (state) { + case PARSE_FLAGS: + if (strchr("-+ #0'", *p) != NULL) { + size_t fl = strlen(spec->flags); + if (fl + 1 < sizeof(spec->flags)) { + spec->flags[fl] = *p; + spec->flags[fl + 1] = '\0'; + } + p++; + continue; + } + state = PARSE_WIDTH; + continue; + case PARSE_WIDTH: + if (isdigit((unsigned char)*p)) { + size_t wl = strlen(spec->width); + if (wl + 1 < sizeof(spec->width)) { + spec->width[wl] = *p; + spec->width[wl + 1] = '\0'; + } + p++; + continue; + } + if (*p == '.') { + size_t pl = strlen(spec->precision); + if (pl + 1 < sizeof(spec->precision)) { + spec->precision[pl] = '.'; + spec->precision[pl + 1] = '\0'; + } + p++; + state = PARSE_PRECISION; + continue; + } + state = PARSE_LENGTH; + continue; + case PARSE_PRECISION: + if (isdigit((unsigned char)*p)) { + size_t pl = strlen(spec->precision); + if (pl + 1 < sizeof(spec->precision)) { + spec->precision[pl] = *p; + spec->precision[pl + 1] = '\0'; + } + p++; + continue; + } + state = PARSE_LENGTH; + continue; + case PARSE_LENGTH: + if (strchr("hljztL", *p) != NULL) { + size_t ll = strlen(spec->length); + if (ll + 1 < sizeof(spec->length)) { + spec->length[ll] = *p; + spec->length[ll + 1] = '\0'; + } + p++; + /* Support double h/l modifiers */ + if ((spec->length[0] == 'h' || spec->length[0] == 'l') && + spec->length[1] == '\0' && (*p == spec->length[0])) { + if (strlen(spec->length) + 1 < sizeof(spec->length)) { + size_t l2 = strlen(spec->length); + spec->length[l2] = *p; + spec->length[l2 + 1] = '\0'; + } + p++; + } + continue; + } + spec->conv = *p++; + goto done; + } + } + done: + if (spec->conv == '\0') + return -1; + if (spec->keyword[0] != '\0' && spec->conv == 'T') { + char *eq = strchr(spec->keyword, '='); + if (eq != NULL) { + strlcpy(spec->subfmt, eq + 1, sizeof(spec->subfmt)); + *eq = '\0'; + } + } + *endp = p; + return 1; +} + +/* Retrieve a keyword value as a string, allocating stable storage as needed. */ +static const char * +listopt_keyword_string(struct listopt_ctx *ctx, ARCHD *arcn, + const char *keyword) +{ + const char *val; + char *dup; + + if (keyword == NULL || *keyword == '\0' || + strcmp(keyword, "path") == 0) + return arcn->name; + if (strcmp(keyword, "linkpath") == 0) + return arcn->ln_name; + if (strcmp(keyword, "uname") == 0) { + val = user_from_uid(arcn->sb.st_uid, 0); + return val ? val : ""; + } + if (strcmp(keyword, "gname") == 0) { + val = group_from_gid(arcn->sb.st_gid, 0); + return val ? val : ""; + } + if (strcmp(keyword, "name") == 0) { + if ((dup = strdup(arcn->name)) == NULL) + return ""; + val = listopt_store(ctx, basename(dup)); + free(dup); + return val; + } + if (strcmp(keyword, "dirname") == 0) { + if ((dup = strdup(arcn->name)) == NULL) + return ""; + val = listopt_store(ctx, dirname(dup)); + free(dup); + return val; + } + val = pax_kv_lookup(arcn, keyword); + return val ? val : ""; +} + +/* Interpret a keyword as a timespec, falling back to archive defaults. */ +static int +listopt_keyword_time(struct listopt_ctx *ctx, ARCHD *arcn, + const char *keyword, struct timespec *ts) +{ + const char *val; + char *end; + + if (keyword == NULL || *keyword == '\0' || + strcmp(keyword, "mtime") == 0) { + *ts = arcn->sb.st_mtim; + return 0; + } + if (strcmp(keyword, "atime") == 0) { + *ts = arcn->sb.st_atim; + return 0; + } + if (strcmp(keyword, "ctime") == 0) { + *ts = arcn->sb.st_ctim; + return 0; + } + val = pax_kv_lookup(arcn, keyword); + if (val == NULL || *val == '\0') + return -1; + ts->tv_sec = strtoll(val, &end, 10); + ts->tv_nsec = 0; + if (end == val) + return -1; + if (*end == '.') { + long nsec = 0; + int digits = 0; + for (end++; *end && isdigit((unsigned char)*end) && digits < 9; + end++, digits++) + nsec = nsec * 10 + (*end - '0'); + for (; digits < 9; digits++) + nsec *= 10; + ts->tv_nsec = nsec; + } + return 0; +} + +/* Parse signed numeric keywords, allowing overrides from extended headers. */ +static int +listopt_keyword_sll(ARCHD *arcn, const char *keyword, long long *out) +{ + const char *val; + char *end; + + if (keyword == NULL) + return -1; + if (strcmp(keyword, "uid") == 0) { + *out = arcn->sb.st_uid; + return 0; + } + if (strcmp(keyword, "gid") == 0) { + *out = arcn->sb.st_gid; + return 0; + } + if (strcmp(keyword, "nlink") == 0) { + *out = arcn->sb.st_nlink; + return 0; + } + if (strcmp(keyword, "mode") == 0) { + *out = arcn->sb.st_mode; + return 0; + } + val = pax_kv_lookup(arcn, keyword); + if (val == NULL) + return -1; + *out = strtoll(val, &end, 10); + if (end == val) + return -1; + return 0; +} + +/* Parse unsigned numeric keywords, falling back to header values. */ +static int +listopt_keyword_ull(ARCHD *arcn, const char *keyword, + unsigned long long *out) +{ + const char *val; + char *end; + + if (keyword == NULL) + return -1; + if (strcmp(keyword, "size") == 0) { + *out = arcn->sb.st_size; + return 0; + } + if (strcmp(keyword, "devmajor") == 0) { + *out = MAJOR(arcn->sb.st_rdev); + return 0; + } + if (strcmp(keyword, "devminor") == 0) { + *out = MINOR(arcn->sb.st_rdev); + return 0; + } + val = pax_kv_lookup(arcn, keyword); + if (val == NULL) + return -1; + *out = strtoull(val, &end, 10); + if (end == val) + return -1; + return 0; +} + /* * constants used by ls_list() when printing out archive members */ @@ -82,6 +412,13 @@ term = zeroflag ? '\0' : '\n'; /* path termination character */ + if (vflag && listopt_get() != NULL) { + listopt_output(arcn, fp); + (void)fputc(term, fp); + (void)fflush(fp); + return; + } + /* * if not verbose, just print the file name */ @@ -183,6 +520,248 @@ } else { (void)fputs(str, fp); } +} + +/* Append a new fragment to the aggregated custom listopt format string. */ +int +listopt_append(const char *chunk) +{ + char *tmp; + size_t add; + + if (chunk == NULL) + return 0; + add = strlen(chunk); + if (add == 0) + return 0; + if (SIZE_MAX - listopt_len <= add) + return -1; + tmp = realloc(listopt_format, listopt_len + add + 1); + if (tmp == NULL) + return -1; + listopt_format = tmp; + memcpy(listopt_format + listopt_len, chunk, add); + listopt_len += add; + listopt_format[listopt_len] = '\0'; + return 0; +} + +const char * +listopt_get(void) +{ + return listopt_format; +} + +/* Reset cached list formatting between separate pax invocations. */ +void +listopt_reset(void) +{ + free(listopt_format); + listopt_format = NULL; + listopt_len = 0; +} + +/* Emit a single verbose listing line obeying the custom listopt format. */ +static void +listopt_output(ARCHD *arcn, FILE *fp) +{ + const char *fmt = listopt_get(); + struct listopt_ctx ctx; + struct listopt_spec spec; + const char *next; + char fmtbuf[64]; + char outbuf[PATH_MAX * 2]; + + if (fmt == NULL || *fmt == '\0') + return; + listopt_ctx_init(&ctx); + while (*fmt != '\0') { + if (*fmt != '%') { + (void)fputc(*fmt++, fp); + continue; + } + if (fmt[1] == '%') { + fmt += 2; + (void)fputc('%', fp); + continue; + } + if (listopt_parse_spec(fmt, &spec, &next) <= 0) { + (void)fputc(*fmt++, fp); + continue; + } + fmt = next; + switch (spec.conv) { + case 's': + { + const char *str = listopt_keyword_string(&ctx, arcn, + spec.keyword[0] ? spec.keyword : "path"); + snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s", + spec.flags, spec.width, spec.precision, "s"); + (void)fprintf(fp, fmtbuf, str); + break; + } + case 'c': + { + const char *str = listopt_keyword_string(&ctx, arcn, + spec.keyword[0] ? spec.keyword : "path"); + char ch = (str && *str) ? *str : ' '; + snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%c", + spec.flags, spec.width, spec.precision, 'c'); + (void)fprintf(fp, fmtbuf, ch); + break; + } + case 'd': + case 'i': + { + long long val = 0; + if (listopt_keyword_sll(arcn, spec.keyword, &val) != 0) + val = 0; + const char *length = "ll"; + snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s%c", + spec.flags, spec.width, spec.precision, length, spec.conv); + (void)fprintf(fp, fmtbuf, val); + break; + } + case 'o': + case 'u': + case 'x': + case 'X': + { + unsigned long long val = 0; + if (listopt_keyword_ull(arcn, spec.keyword, &val) != 0) + val = 0; + const char *length = "ll"; + snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s%c", + spec.flags, spec.width, spec.precision, length, spec.conv); + (void)fprintf(fp, fmtbuf, val); + break; + } + case 'T': + { + struct timespec ts; + struct tm tm; + const char *key = spec.keyword[0] ? spec.keyword : "mtime"; + const char *tfmt = spec.subfmt[0] ? spec.subfmt : + "%b %e %H:%M %Y"; + if (listopt_keyword_time(&ctx, arcn, key, &ts) == 0 && + localtime_r(&ts.tv_sec, &tm) != NULL && + strftime(outbuf, sizeof(outbuf), tfmt, &tm) > 0) { + snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s", + spec.flags, spec.width, spec.precision, "s"); + (void)fprintf(fp, fmtbuf, outbuf); + } + break; + } + case 'M': + { + char modebuf[12]; + strmode(arcn->sb.st_mode, modebuf); + snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s", + spec.flags, spec.width, spec.precision, "s"); + (void)fprintf(fp, fmtbuf, modebuf); + break; + } + case 'D': + { + const char *use = NULL; + if (S_ISCHR(arcn->sb.st_mode) || S_ISBLK(arcn->sb.st_mode)) { + snprintf(outbuf, sizeof(outbuf), "%lu,%lu", + (u_long)MAJOR(arcn->sb.st_rdev), + (u_long)MINOR(arcn->sb.st_rdev)); + use = outbuf; + } else if (spec.keyword[0]) { + unsigned long long val = 0; + if (listopt_keyword_ull(arcn, spec.keyword, &val) == 0) { + snprintf(outbuf, sizeof(outbuf), "%llu", val); + use = outbuf; + } + } + if (use == NULL) + use = ""; + snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s", + spec.flags, spec.width, spec.precision, "s"); + (void)fprintf(fp, fmtbuf, use); + break; + } + case 'F': + { + const char *out = NULL; + if (!spec.keyword[0]) + out = arcn->name; + else { + char *tmp = strdup(spec.keyword); + char *save = tmp; + outbuf[0] = '\0'; + if (tmp != NULL) { + char *token; + int first = 1; + while ((token = strsep(&tmp, ",")) != NULL) { + const char *part = + listopt_keyword_string(&ctx, arcn, token); + if (!first) + strlcat(outbuf, "/", + sizeof(outbuf)); + strlcat(outbuf, part, sizeof(outbuf)); + first = 0; + } + free(save); + out = outbuf; + } + } + snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s", + spec.flags, spec.width, spec.precision, "s"); + (void)fprintf(fp, fmtbuf, out ? out : ""); + break; + } + case 'L': + { + if (arcn->type == PAX_SLK) + snprintf(outbuf, sizeof(outbuf), "%s -> %s", + arcn->name, arcn->ln_name); + else + strlcpy(outbuf, arcn->name, sizeof(outbuf)); + snprintf(fmtbuf, sizeof(fmtbuf), "%%%s%s%s%s", + spec.flags, spec.width, spec.precision, "s"); + (void)fprintf(fp, fmtbuf, outbuf); + break; + } + default: + (void)fputc(spec.conv, fp); + break; + } + } + listopt_ctx_free(&ctx); +} + +void +pax_kv_free(PAXKEY **head) +{ + PAXKEY *cur; + + if (head == NULL) + return; + while ((cur = *head) != NULL) { + *head = cur->next; + free(cur->name); + free(cur->value); + free(cur); + } +} + +const char * +pax_kv_lookup(const ARCHD *arcn, const char *key) +{ + const PAXKEY *kv; + + if (arcn == NULL || key == NULL) + return NULL; + for (kv = arcn->xattr; kv != NULL; kv = kv->next) + if (strcmp(kv->name, key) == 0) + return kv->value; + for (kv = arcn->gattr; kv != NULL; kv = kv->next) + if (strcmp(kv->name, key) == 0) + return kv->value; + return NULL; } /* Index: bin/pax/options.c =================================================================== RCS file: /cvs/src/bin/pax/options.c,v diff -u -r1.115 options.c --- bin/pax/options.c 10 May 2024 20:28:31 -0000 1.115 +++ bin/pax/options.c 12 Oct 2025 07:53:49 -0000 @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -51,6 +52,7 @@ static int bad_opt(void); static int opt_add(const char *); +static char *opt_parse_value(const char **, int); /* * argv[0] names. Used for tar and cpio emulation */ @@ -139,6 +141,8 @@ static void printflg(unsigned int); static off_t str_offt(char *); static char *get_line(FILE *fp); +static char *opt_parse_value(const char **, int); +static void opt_common(void); static void pax_options(int, char **); static void pax_usage(void); static void tar_options(int, char **); @@ -683,6 +687,8 @@ if (!(flg & XF) && (act == ARCHIVE)) frmt = &(fsub[DEFLT]); + opt_common(); + /* * process the args as they are interpreted by the operation mode */ @@ -984,6 +990,8 @@ if ((act == ARCHIVE || act == APPND) && argc == 0 && nincfiles == 0) exit(0); + opt_common(); + /* * process the args as they are interpreted by the operation mode */ @@ -1408,6 +1416,8 @@ argc -= optind; argv += optind; + opt_common(); + /* * process the args as they are interpreted by the operation mode */ @@ -1522,6 +1532,54 @@ return(0); } +static char * +opt_parse_value(const char **srcp, int consume_rest) +{ + const char *src; + char *buf, *tmp; + size_t len, cap; + + src = *srcp; + cap = 64; + len = 0; + if ((buf = malloc(cap)) == NULL) + return NULL; + + while (*src != '\0') { + if (*src == '\\' && src[1] != '\0') { + src++; + if (len + 1 >= cap) { + tmp = realloc(buf, cap * 2); + if (tmp == NULL) { + free(buf); + return NULL; + } + buf = tmp; + cap *= 2; + } + buf[len++] = *src++; + continue; + } + if (!consume_rest && *src == ',') + break; + if (len + 1 >= cap) { + tmp = realloc(buf, cap * 2); + if (tmp == NULL) { + free(buf); + return NULL; + } + buf = tmp; + cap *= 2; + } + buf[len++] = *src++; + } + buf[len] = '\0'; + while (isspace((unsigned char)*src)) + src++; + *srcp = src; + return buf; +} + /* * opt_add() * breaks the value supplied to -o into a option name and value. options @@ -1534,58 +1592,130 @@ static int opt_add(const char *str) { + const char *src; OPLIST *opt; - char *frpt; - char *pt; - char *endpt; - char *dstr; + char *name, *value; + int assign; - if ((str == NULL) || (*str == '\0')) { - paxwarn(0, "Invalid option name"); - return(-1); + if (str == NULL || *str == '\0') { + paxwarn(0, "Invalid option string"); + return (-1); } - if ((dstr = strdup(str)) == NULL) { - paxwarn(0, "Unable to allocate space for option list"); - return(-1); - } - frpt = endpt = dstr; - /* - * break into name and values pieces and stuff each one into a - * OPLIST structure. When we know the format, the format specific - * option function will go through this list - */ - while ((frpt != NULL) && (*frpt != '\0')) { - if ((endpt = strchr(frpt, ',')) != NULL) - *endpt = '\0'; - if ((pt = strchr(frpt, '=')) == NULL) { - paxwarn(0, "Invalid options format"); - free(dstr); - return(-1); + src = str; + while (*src != '\0') { + while (isspace((unsigned char)*src)) + src++; + if (*src == '\0') + break; + if (*src == ',') { + src++; + continue; } - if ((opt = malloc(sizeof(OPLIST))) == NULL) { - paxwarn(0, "Unable to allocate space for option list"); - free(dstr); - return(-1); + + const char *key_start = src; + while (*src != '\0' && *src != '=' && *src != ':' && + *src != ',' && !isspace((unsigned char)*src)) + src++; + const char *key_end = src; + while (key_end > key_start && + isspace((unsigned char)key_end[-1])) + key_end--; + if (key_end == key_start) { + paxwarn(0, "Invalid option name"); + return (-1); } - dstr = NULL; /* parts of string going onto the OPLIST */ - *pt++ = '\0'; - opt->name = frpt; - opt->value = pt; + if ((name = strndup(key_start, key_end - key_start)) == NULL) + return (-1); + + while (isspace((unsigned char)*src)) + src++; + assign = OPT_ASSIGN_NONE; + if (*src == ':' && src[1] == '=') { + assign = OPT_ASSIGN_COLON; + src += 2; + } else if (*src == '=') { + assign = OPT_ASSIGN_EQ; + src++; + } else if (*src == ':') { + assign = OPT_ASSIGN_EQ; + src++; + } + while (isspace((unsigned char)*src)) + src++; + + if (assign != OPT_ASSIGN_NONE) { + int consume_rest = (strcmp(name, "listopt") == 0); + value = opt_parse_value(&src, consume_rest); + if (value == NULL) { + free(name); + return (-1); + } + if (!consume_rest && *src == ',') + src++; + } else { + value = strdup(""); + if (value == NULL) { + free(name); + return (-1); + } + if (*src == ',') + src++; + } + while (isspace((unsigned char)*src)) + src++; + + if ((opt = malloc(sizeof(*opt))) == NULL) { + free(name); + free(value); + return (-1); + } + opt->name = name; + opt->value = value; + opt->assign = assign; + opt->handled = 0; opt->fow = NULL; - if (endpt != NULL) - frpt = endpt + 1; - else - frpt = NULL; - if (ophead == NULL) { + if (ophead == NULL) optail = ophead = opt; + else { + optail->fow = opt; + optail = opt; + } + if (assign != OPT_ASSIGN_NONE && strcmp(name, "listopt") == 0) + break; + } + return 0; +} + +static void +opt_common(void) +{ + OPLIST **prev, *opt, *next; + + prev = &ophead; + while ((opt = *prev) != NULL) { + next = opt->fow; + if (strcmp(opt->name, "listopt") == 0) { + if (listopt_append(opt->value) < 0) { + paxwarn(1, "Unable to record listopt format"); + pax_usage(); + } + *prev = next; + free(opt->name); + free(opt->value); + free(opt); continue; } - optail->fow = opt; - optail = opt; + prev = &opt->fow; } - free(dstr); - return(0); + + optail = ophead; + if (optail != NULL) { + while (optail->fow != NULL) + optail = optail->fow; + } + /* Global keyword state is per-invocation, reset before parsing formats. */ + pax_option_reset_session(); } /* Index: bin/pax/pax.h =================================================================== RCS file: /cvs/src/bin/pax/pax.h,v diff -u -r1.29 pax.h --- bin/pax/pax.h 12 Sep 2017 17:11:11 -0000 1.29 +++ bin/pax/pax.h 12 Oct 2025 07:54:08 -0000 @@ -98,6 +98,41 @@ * may be required if and when the supporting operating system removes all * restrictions on the length of pathnames it will resolve. */ +/* + * Linked list entry used to retain pax extended header keywords. + */ +typedef struct paxkey { + char *name; + char *value; + struct paxkey *next; +} PAXKEY; + +/* Records command-line supplied pax keywords and their assignment style. */ +typedef struct paxopkv { + char *name; + char *value; + int assign; + struct paxopkv *next; +} PAXOPKV; + +/* Tracks delete= pattern expressions supplied via -o options. */ +typedef struct paxdel { + char *pattern; + struct paxdel *next; +} PAXDEL; + +enum pax_invalid_action { + PAX_INVALID_BYPASS, + PAX_INVALID_WRITE, + PAX_INVALID_RENAME, + PAX_INVALID_UTF8, + PAX_INVALID_BINARY +}; + +#define PAX_INVALID_NONE 0 +#define PAX_INVALID_SKIP 1 +#define PAX_INVALID_RENAME 2 + typedef struct { int nlen; /* file name length */ char name[PAXPATHLEN+1]; /* file name */ @@ -125,6 +160,9 @@ #define PAX_CTG 10 /* high performance file */ #define PAX_GLL 11 /* GNU long symlink */ #define PAX_GLF 12 /* GNU long file */ + PAXKEY *xattr; /* file specific pax keywords */ + const PAXKEY *gattr; /* global pax keywords in effect */ + int invalid; /* invalid handling state */ } ARCHD; #define PAX_IS_REG(type) ((type) == PAX_REG || (type) == PAX_CTG) @@ -236,8 +274,14 @@ typedef struct oplist { char *name; /* option variable name e.g. name= */ char *value; /* value for option variable */ + int assign; /* assignment type: '=' or ':=' */ + int handled; /* option consumed by generic parser */ struct oplist *fow; /* next option */ } OPLIST; + +#define OPT_ASSIGN_NONE 0 +#define OPT_ASSIGN_EQ 1 +#define OPT_ASSIGN_COLON 2 /* * General Macros Index: bin/pax/ar_subs.c =================================================================== RCS file: /cvs/src/bin/pax/ar_subs.c,v diff -u -r1.53 ar_subs.c --- bin/pax/ar_subs.c 14 Jul 2024 14:32:02 -0000 1.53 +++ bin/pax/ar_subs.c 12 Oct 2025 07:54:16 -0000 @@ -92,6 +92,12 @@ * step through the archive until the format says it is done */ while (next_head(arcn) == 0) { + /* Skip archive members rejected by invalid= policy. */ + if (arcn->invalid == PAX_INVALID_SKIP) { + if (rd_skip(arcn->skip + arcn->pad) == 1) + break; + continue; + } if (arcn->type == PAX_GLL || arcn->type == PAX_GLF) { /* * we need to read, to get the real filename @@ -244,6 +250,12 @@ * says it is done */ while (next_head(arcn) == 0) { + /* Honor invalid=bypass by skipping unwanted members outright. */ + if (arcn->invalid == PAX_INVALID_SKIP) { + if (rd_skip(arcn->skip + arcn->pad) == 1) + break; + continue; + } if (arcn->type == PAX_GLL || arcn->type == PAX_GLF) { /* * we need to read, to get the real filename @@ -685,6 +697,12 @@ * step through the archive until the format says it is done */ while (next_head(arcn) == 0) { + /* Entries flagged for bypass are consumed without further work. */ + if (arcn->invalid == PAX_INVALID_SKIP) { + if (rd_skip(arcn->skip + arcn->pad) == 1) + break; + continue; + } /* * check if this file meets user specified options. */ @@ -1024,6 +1042,14 @@ int in_resync = 0; /* set when we are in resync mode */ int cnt = 0; /* counter for trailer function */ int first = 1; /* on 1st read, EOF isn't premature. */ + + /* + * Clear out any per-file extended header state left from the + * previous archive member before we reuse the structure. + */ + pax_kv_free(&arcn->xattr); + arcn->gattr = NULL; + arcn->invalid = PAX_INVALID_NONE; /* * set up initial conditions, we want a whole frmt->hsz block as we Index: bin/pax/extern.h =================================================================== RCS file: /cvs/src/bin/pax/extern.h,v diff -u -r1.64 extern.h --- bin/pax/extern.h 17 Apr 2024 18:12:12 -0000 1.64 +++ bin/pax/extern.h 12 Oct 2025 07:54:24 -0000 @@ -152,11 +152,34 @@ void ls_list(ARCHD *, time_t, FILE *); void ls_tty(ARCHD *); void safe_print(const char *, FILE *); +int listopt_append(const char *); +const char *listopt_get(void); +void listopt_reset(void); +void pax_option_set_linkdata(int); +void pax_option_set_times(int); +int pax_option_add_delete(const char *); +int pax_option_set_invalid(const char *); +int pax_option_set_exthdr_name(const char *); +int pax_option_set_globexthdr_name(const char *); +int pax_option_add_keyword(const char *, const char *, int); +const PAXDEL *pax_option_delete(void); +const PAXOPKV *pax_option_keywords(int); +int pax_option_linkdata(void); +int pax_option_times(void); +const char *pax_option_exthdr_name(void); +const char *pax_option_globexthdr_name(void); +enum pax_invalid_action pax_option_invalid(void); +void pax_option_reset_session(void); u_long asc_ul(char *, int, int); +int pax_handle_invalid_path(ARCHD *, const char *, const char *); +int pax_handle_invalid_link(ARCHD *, const char *, const char *); +void pax_mark_skip(ARCHD *); int ul_asc(u_long, char *, int, int); unsigned long long asc_ull(char *, int, int); int ull_asc(unsigned long long, char *, int, int); size_t fieldcpy(char *, size_t, const char *, size_t); +void pax_kv_free(PAXKEY **); +const char *pax_kv_lookup(const ARCHD *, const char *); /* * getoldopt.c Index: bin/pax/pax.1 =================================================================== RCS file: /cvs/src/bin/pax/pax.1,v diff -u -r1.80 pax.1 --- bin/pax/pax.1 30 Nov 2024 06:59:12 -0000 1.80 +++ bin/pax/pax.1 12 Oct 2025 07:54:32 -0000 @@ -453,8 +453,13 @@ .Fl x . In general, .Ar options -take the form: -.Ar name Ns = Ns Ar value . +take the form +.Ar name Ns = Ns Ar value +or +.Ar name Ns := Ns Ar value . +Multiple keywords can be separated with commas. +Backslash can be used to escape a literal comma or backslash inside a value. +When the same keyword appears more than once, the last value wins. .Pp The following options are available for the .Cm ustar @@ -467,6 +472,74 @@ .It Cm write_opt=nodir When writing archives, omit the storage of directories. .El +.Pp +When the selected archive format is +.Cm pax , +the following keywords are also understood: +.Bl -tag -width "globexthdr.name=string" +.It Cm delete Ns = Ns Ar pattern +Suppress extended header keywords whose name matches +.Ar pattern . +.It Cm exthdr.name Ns = Ns Ar string +Replace the default name used to store per-file extended attributes. +The template may include +.Ql %d +(directory portion of the path), +.Ql %f +(final pathname component), +.Ql %p +(process ID), and +.Ql %% +for a literal percent sign. +.It Cm globexthdr.name Ns = Ns Ar string +Select the name written for +.Ql typeflag g +global extended headers. +The template may include +.Ql %n +(the sequence number starting at 1), +.Ql %p , +and +.Ql %% . +.It Cm invalid Ns = Ns Ar action +Choose how pathnames that cannot be represented locally are handled when +reading archives. +The +.Ar action +may be one of +.Cm bypass , +.Cm write , +.Cm rename , +.Cm UTF-8 , +or +.Cm binary . +.It Cm linkdata +Always store the file data for hard links instead of sharing the contents +with another archive member. +.It Cm listopt Ns = Ns Ar format +Customise the verbose listing produced in list mode with +.Fl v . +See +.Sx List Output Formatting +for the supported conversions. +Multiple +.Fl o Cm listopt +options are concatenated in order. +.It Cm times +Force the emission of +.Ql atime +and +.Ql mtime +extended header records for each file when writing or copying. +.It Ar keyword Ns = Ns Ar value +Write +.Ar keyword +as a global extended header record when archiving, or override the value +read from the archive when extracting. +.It Ar keyword Ns := Ns Ar value +Write a per-file extended header record with the specified value, or override +the value for the current file when reading. +.El .It Fl P Do not follow symbolic links, perform a physical file system traversal. This is the default mode. @@ -789,6 +862,63 @@ archive member. The trailing newline is not buffered and is written only after the file has been read or written. +.Ss List Output Formatting +The +.Fl o Cm listopt Ns = Ns Ar format +keyword alters the verbose list produced with +.Fl v +in list mode. +The format string follows the rules of +.Xr printf 3 +with the addition that conversions may be prefixed by +.Pq Em keyword +to select the field that supplies the argument. +Unless noted below the standard flags, field width and precision are honoured. +Keywords refer to archive header fields, including any values supplied by pax +extended headers or by explicit +.Fl o +options. +.Pp +The following conversions extend the behaviour of +.Xr printf 3 : +.Bl -tag -width Ds +.It %s , %c , %d , %i , %o , %u , %x , %X +Use the selected keyword (defaulting to +.Ql path ) +and print it with the requested conversion. +.It %T +Formats a time value. +If the keyword is omitted +.Ql mtime +is used. +A keyword of the form +.Ql keyword=subformat +selects the time field and the +.Xr strftime 3 +format string to apply (default: +.Ql %b %e %H:%M %Y ) . +.It %M +Prints the symbolic file mode string as produced by +.Fn strmode +.It %D +Prints the device numbers for block or character special files +as two comma-separated integers. +If a keyword is supplied the numeric value associated with that keyword +is printed instead. +.It %F +Prints a pathname composed from a comma-separated list of keywords. +Missing components are skipped. +When no list is supplied the stored pathname is used. +.It %L +Prints a symbolic link in the form +.Dq path -> target . +Non-links fall back to +.Ql %F . +.El +.Pp +Multiple +.Fl o Cm listopt +options append to the overall format string in the order they appear. .It Fl w Write files to the standard output in the specified archive format. @@ -1080,14 +1210,8 @@ The .Nm utility is compliant with the -.St -p1003.1-2008 -specification, -except that the -.Cm pax -archive format is only partially supported, -and the -.Cm listopt -keyword is unsupported. +.St -p1003.1-2024 , +specification. .Pp The flags .Op Fl 0BDEGjOPTUYZz ,