Index | Thread | Search

From:
"Andy Bradford" <amb-sendok-1733150637.fobiohaalpfpamkpmnic@bradfords.org>
Subject:
Re: Implement -E for rs(1)
To:
tech@openbsd.org
Date:
3 Oct 2024 08:43:56 -0600

Download raw body.

Thread
[bump]

Thus said Stefan Sperling on Fri, 15 Mar 2024 10:36:21 +0100:

> > Updated patch:
> 
> Looks good to me.  But I would like to get feedback  from at least one
> additional developer before committing it.

Thanks for taking the  time to look at this. Any  other takers who would
like to provide feedback?

For those who may not have historical emails:

https://marc.info/?t=170070038400001&r=1&w=2

Again, here is the latest diff:



Index: rs.c
===================================================================
RCS file: /cvs/src/usr.bin/rs/rs.c,v
retrieving revision 1.30
diff -u -p -r1.30 rs.c
--- rs.c	3 Dec 2015 12:23:15 -0000	1.30
+++ rs.c	15 Mar 2024 03:26:52 -0000
@@ -81,7 +81,7 @@ int	propgutter;
 char	isep = ' ', osep = ' ';
 int	owidth = 80, gutter = 2;
 
-int	  mbsavis(char **, const char *);
+int	  mbsavis(char **, const char *, int *, int);
 
 void	  usage(void);
 void	  getargs(int, char *[]);
@@ -120,10 +120,12 @@ void
 getfile(void)
 {
 	const char delim[2] = { isep, '\0' };
-	char *p;
+	char *p, *f;
 	struct entry *ep;
 	int multisep = (flags & ONEISEPONLY ? 0 : 1);
 	int nullpad = flags & NULLPAD;
+	int oneperchar = flags & ONEPERCHAR;
+	int byte_len = 0;
 	struct entry *padto;
 
 	curline = NULL;
@@ -139,6 +141,8 @@ getfile(void)
 		flags |= ONEPERLINE;
 	if (flags & ONEPERLINE)
 		icols = 1;
+	else if (oneperchar)
+		icols = mbsavis(&f, curline, NULL, 0);
 	else				/* count cols on first line */
 		for (p = curline; *p != '\0'; p++) {
 			if (*p == isep && multisep)
@@ -151,7 +155,7 @@ getfile(void)
 	p = curline;
 	do {
 		if (flags & ONEPERLINE) {
-			ep->w = mbsavis(&ep->s, curline);
+			ep->w = mbsavis(&ep->s, curline, NULL, 0);
 			if (maxwidth < ep->w)
 				maxwidth = ep->w;
 			INCR(ep);		/* prepare for next entry */
@@ -160,14 +164,18 @@ getfile(void)
 		}
 		p = curline;
 		while (p != NULL && *p != '\0') {
-			if (*p == isep) {
+			if (oneperchar) {
+				ep->w = mbsavis(&ep->s, p, &byte_len, 1);
+				p += byte_len;
+			} else if (*p == isep) {
 				p++;
 				if (multisep)
 					continue;
 				ep->s = "";	/* empty column */
 				ep->w = 0;
 			} else
-				ep->w = mbsavis(&ep->s, strsep(&p, delim));
+				ep->w = mbsavis(&ep->s, strsep(&p, delim),
+				    NULL, 0);
 			if (maxwidth < ep->w)
 				maxwidth = ep->w;
 			INCR(ep);		/* prepare for next entry */
Index: utf8.c
===================================================================
RCS file: /cvs/src/usr.bin/rs/utf8.c,v
retrieving revision 1.1
diff -u -p -r1.1 utf8.c
--- utf8.c	3 Dec 2015 12:23:15 -0000	1.1
+++ utf8.c	15 Mar 2024 03:26:52 -0000
@@ -20,7 +20,7 @@
 #include <wchar.h>
 
 int
-mbsavis(char** outp, const char *mbs)
+mbsavis(char** outp, const char *mbs, int *bytes_used, int nchars)
 {
 	const char *src;  /* Iterate mbs. */
 	char	 *dst;  /* Iterate *outp. */
@@ -28,33 +28,49 @@ mbsavis(char** outp, const char *mbs)
 	int	  total_width;  /* Display width of the whole string. */
 	int	  width;  /* Display width of a single Unicode char. */
 	int	  len;  /* Length in bytes of UTF-8 encoded string. */
+	int	  count; /* Count of chars read. */
+	int	  nbytes; /* Number of bytes to increment in loop */
 
 	len = strlen(mbs);
 	if ((*outp = malloc(len + 1)) == NULL)
 		err(1, NULL);
 
 	if (MB_CUR_MAX == 1) {
+		len = (nchars && nchars < len) ? nchars : len;
 		memcpy(*outp, mbs, len + 1);
+		(*outp)[len] = '\0';
+		if (bytes_used != NULL)
+			*bytes_used = len;
 		return len;
 	}
 
 	src = mbs;
 	dst = *outp;
 	total_width = 0;
+	count = 0;
+	nbytes = 0;
+	if (bytes_used != NULL)
+		*bytes_used = 0;
 	while (*src != '\0') {
 		if ((len = mbtowc(&wc, src, MB_CUR_MAX)) == -1) {
 			total_width++;
 			*dst++ = '?';
 			src++;
+			nbytes = 1;
 		} else if ((width = wcwidth(wc)) == -1) {
 			total_width++;
 			*dst++ = '?';
 			src += len;
+			nbytes = len;
 		} else {
 			total_width += width;
+			nbytes = len;
 			while (len-- > 0)
 				*dst++ = *src++;
 		}
+		if (bytes_used != NULL)
+			*bytes_used += nbytes;
+		if (nchars > 0 && ++count >= nchars) break;
 	}
 	*dst = '\0';
 	return total_width;