Index | Thread | Search

From:
Kirill A. Korinsky <kirill@korins.ky>
Subject:
Re: bin/ksh: fix emacs mode UTF-8 display logic
To:
tech@openbsd.org
Date:
Sun, 25 Jan 2026 17:27:24 +0100

Download raw body.

Thread
On Thu, 22 Jan 2026 17:00:59 +0100,
"Kirill A. Korinsky" <kirill@korins.ky> wrote:
> 
> tech@,
> 
> Here the second diff which address discovered issue with UTF-8 display logic
> when resize terminal, but only for emacs mode.
> 
> The emacs line editor counts bytes when deciding what fits on screen and
> can stop in the middle of a UTF-8 sequence. This leaves partial UTF-8
> sequences on output, misplaces the "<" indicator, and can desync cursor
> movement after a resize.
> 
> Track display columns instead of bytes by skipping UTF-8 continuation
> bytes when positioning and redrawing. Add x_zotcp() to emit buffer bytes
> without splitting UTF-8 sequences, and use it in cursor motion and
> transpose paths.
> 

Here an updated version which also included tests.

Ok?

diff --git bin/ksh/emacs.c bin/ksh/emacs.c
index 43223bc06b2..107dd0e575f 100644
--- bin/ksh/emacs.c
+++ bin/ksh/emacs.c
@@ -130,6 +130,7 @@ static int	x_size_str(char *);
 static int	x_size(int);
 static void	x_zots(char *);
 static void	x_zotc(int);
+static void	x_zotcp(char *);
 static void	x_load_hist(char **);
 static int	x_search(char *, int, int);
 static int	x_match(char *, char *);
@@ -643,16 +644,17 @@ x_fword(void)
 static void
 x_goto(char *cp)
 {
-	if (cp < xbp || cp >= (xbp + x_displen)) {
+	x_lastcp();
+	if (cp < xbp || cp > xlp) {
 		/* we are heading off screen */
 		xcp = cp;
 		x_adjust();
 	} else if (cp < xcp) {		/* move back */
 		while (cp < xcp)
-			x_bs((unsigned char)*--xcp);
+			x_bs(*--xcp);
 	} else if (cp > xcp) {		/* move forward */
 		while (cp > xcp)
-			x_zotc((unsigned char)*xcp++);
+			x_zotcp(xcp++);
 	}
 }
 
@@ -671,7 +673,7 @@ x_size_str(char *cp)
 {
 	int size = 0;
 	while (*cp)
-		size += x_size(*cp++);
+		size += x_size((unsigned char)*cp++);
 	return size;
 }
 
@@ -699,7 +701,7 @@ x_zots(char *str)
 	}
 	x_lastcp();
 	while (*str && str < xlp && adj == x_adj_done)
-		x_zotc(*str++);
+		x_zotcp(str++);
 }
 
 static void
@@ -715,6 +717,39 @@ x_zotc(int c)
 		x_e_putc(c);
 }
 
+static void
+x_zotcp(char *p)
+{
+	unsigned char uc = (unsigned char)*p;
+
+	if (uc == '\t') {
+		/*  Kludge, tabs are always four spaces.  */
+		x_e_puts("    ");
+		return;
+	}
+	if (isu8cont(uc)) {
+		if (x_col <= xx_cols) {
+			x_putc(uc);
+		}
+		if (x_adj_ok && !isu8cont((unsigned char)p[1]) &&
+		    (x_col < 0 || x_col >= (xx_cols - 2)))
+			x_adjust();
+		return;
+	}
+	if (iscntrl(uc)) {
+		x_e_putc('^');
+		x_e_putc(UNCTRL(uc));
+	} else {
+		if (x_col < xx_cols) {
+			x_putc(uc);
+			x_col++;
+		}
+		if (x_adj_ok && !isu8cont((unsigned char)p[1]) &&
+		    (x_col < 0 || x_col >= (xx_cols - 2)))
+			x_adjust();
+	}
+}
+
 static int
 x_mv_back(int c)
 {
@@ -1034,7 +1069,7 @@ x_clear_screen(int c)
 static void
 x_redraw(int limit)
 {
-	int	i, j, truncate = 0;
+	int	i, j, truncate = 0, dcols;
 	char	*cp;
 
 	x_adj_ok = 0;
@@ -1074,10 +1109,12 @@ x_redraw(int limit)
 	if (xbp != xbuf || xep > xlp)
 		limit = xx_cols;
 	if (limit >= 0) {
-		if (xep > xlp)
-			i = 0;			/* we fill the line */
-		else
-			i = limit - (xlp - xbp);
+		dcols = 0;
+		for (cp = xbp; cp < xlp; cp++)
+			dcols += x_size((unsigned char)*cp);
+		i = limit - dcols;
+		if (i < 0)
+			i = 0;
 
 		for (j = 0; j < i && x_col < (xx_cols - 2); j++)
 			x_e_putc(' ');
@@ -1134,8 +1171,8 @@ x_transpose(int c)
 		 */
 		x_bs(xcp[-1]);
 		x_bs(xcp[-2]);
-		x_zotc(xcp[-1]);
-		x_zotc(xcp[-2]);
+		x_zotcp(&xcp[-1]);
+		x_zotcp(&xcp[-2]);
 		tmp = xcp[-1];
 		xcp[-1] = xcp[-2];
 		xcp[-2] = tmp;
@@ -1144,8 +1181,8 @@ x_transpose(int c)
 		 * cursor, move cursor position along one.
 		 */
 		x_bs(xcp[-1]);
-		x_zotc(xcp[0]);
-		x_zotc(xcp[-1]);
+		x_zotcp(&xcp[0]);
+		x_zotcp(&xcp[-1]);
 		tmp = xcp[-1];
 		xcp[-1] = xcp[0];
 		xcp[0] = tmp;
@@ -1806,12 +1843,22 @@ do_complete(int flags,	/* XCF_{COMMAND,FILE,COMMAND_FILE} */
 static void
 x_adjust(void)
 {
+	char *cp;
+	int col;
+
 	x_adj_done++;			/* flag the fact that we were called. */
 	/*
 	 * we had a problem if the prompt length > xx_cols / 2
 	 */
-	if ((xbp = xcp - (x_displen / 2)) < xbuf)
-		xbp = xbuf;
+	col = x_displen / 2;
+	cp = xcp;
+	while (cp > xbuf && col > 0) {
+		cp--;
+		while (cp > xbuf && isu8cont(*cp))
+			cp--;
+		col -= x_size((unsigned char)*cp);
+	}
+	xbp = cp;
 	xlp_valid = false;
 	x_redraw(xx_cols);
 	x_flush();
@@ -2165,6 +2212,8 @@ x_lastcp(void)
 		for (i = 0, rcp = xbp; rcp < xep && i < x_displen; rcp++)
 			i += x_size((unsigned char)*rcp);
 		xlp = rcp;
+		while (xlp < xep && isu8cont(*xlp))
+			xlp++;
 	}
 	xlp_valid = true;
 	return (xlp);
diff --git regress/bin/ksh/edit/emacs.sh regress/bin/ksh/edit/emacs.sh
index 6ae4184e928..736d54f0616 100644
--- regress/bin/ksh/edit/emacs.sh
+++ regress/bin/ksh/edit/emacs.sh
@@ -76,6 +76,38 @@ testseq "aaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccc
 testseq "aaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccc\0001\000RS81" \
 	" # aaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccc\rbbbbbbbbbbbbccccccccccccccccccccccccc                                         <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r # aaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccc \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r\033[J\r # aaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccc \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
 
+# ASCII window indicator at left edge
+testseq "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" \
+	" # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\rbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb                                         <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
+
+# UTF-8 window indicator at left edge
+testseq "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\0303\0266bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" \
+	" # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\0303\0266bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\r\0303\0266bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb                                         <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
+
+# ASCII redraw at right edge
+PS1='012345678901234567890123456789012345678901234567890123456789'
+testseq "#aaaaaaaaaaaaaaaaab\0033#" "012345678901234567890123456789012345678901234567890123456789#aaaaaaaaaaaaaaaaa\raaaaaaaaa                                                                     <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bb\r012345678901234567890123456789012345678901234567890123456789aaaaaaaaaaaaaaaaab \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
+
+# UTF-8 redraw at right edge
+testseq "#aaaaaaaaaaaaaaaaa\0303\0266\0033#" "012345678901234567890123456789012345678901234567890123456789#aaaaaaaaaaaaaaaaa\raaaaaaaaa                                                                     <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\0303\0266\r012345678901234567890123456789012345678901234567890123456789aaaaaaaaaaaaaaaaa\0303\0266 \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
+PS1=' # '
+
+# ASCII window indicator at right edge
+testseq "\003342ab\003397b\0001" \
+	" # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\raaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb                                         <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\rbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb                                       <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bbbbbbbbbbbbbbbbbbbbbbbbb\r # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb>\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
+
+# UTF-8 window indicator at right edge
+testseq "\003342a\0303\0266\003397b\0001" \
+	" # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\0303\0266bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\raaaa\0303\0266bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb                                         <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\rbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb                                       <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bbbbbbbbbbbbbbbbbbbbbbbbb\r # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\0303\0266bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb>\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
+
+# ASCII window indicator on both sides
+testseq "\003342ab\003397b\0001\003379\0006" \
+	" # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\raaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb                                         <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\rbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb                                       <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bbbbbbbbbbbbbbbbbbbbbbbbb\r # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb>\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\rbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb*\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
+
+# UTF-8 window indicator on both sides
+testseq "\003342a\0303\0266\003397b\0001\003379\0006" \
+	" # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\0303\0266bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\raaaa\0303\0266bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb                                         <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\rbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb                                       <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bbbbbbbbbbbbbbbbbbbbbbbbb\r # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\0303\0266bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb>\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\ra\0303\0266bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb*\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
+
 # insertion of incomplete UTF-8
 testseq "z\0002\0302\0006" " # z\b\0302z\bz"
 testseq "z\0002\0377\0006" " # z\b\0377z\bz"
-- 
2.52.0