Download raw body.
bin/ksh: fix emacs mode UTF-8 display logic
On Thu, 22 Jan 2026 17:00:59 +0100,
"Kirill A. Korinsky" <kirill@korins.ky> wrote:
>
> tech@,
>
> Here the second diff which address discovered issue with UTF-8 display logic
> when resize terminal, but only for emacs mode.
>
> The emacs line editor counts bytes when deciding what fits on screen and
> can stop in the middle of a UTF-8 sequence. This leaves partial UTF-8
> sequences on output, misplaces the "<" indicator, and can desync cursor
> movement after a resize.
>
> Track display columns instead of bytes by skipping UTF-8 continuation
> bytes when positioning and redrawing. Add x_zotcp() to emit buffer bytes
> without splitting UTF-8 sequences, and use it in cursor motion and
> transpose paths.
>
Here an updated version which also included tests.
Ok?
diff --git bin/ksh/emacs.c bin/ksh/emacs.c
index 43223bc06b2..107dd0e575f 100644
--- bin/ksh/emacs.c
+++ bin/ksh/emacs.c
@@ -130,6 +130,7 @@ static int x_size_str(char *);
static int x_size(int);
static void x_zots(char *);
static void x_zotc(int);
+static void x_zotcp(char *);
static void x_load_hist(char **);
static int x_search(char *, int, int);
static int x_match(char *, char *);
@@ -643,16 +644,17 @@ x_fword(void)
static void
x_goto(char *cp)
{
- if (cp < xbp || cp >= (xbp + x_displen)) {
+ x_lastcp();
+ if (cp < xbp || cp > xlp) {
/* we are heading off screen */
xcp = cp;
x_adjust();
} else if (cp < xcp) { /* move back */
while (cp < xcp)
- x_bs((unsigned char)*--xcp);
+ x_bs(*--xcp);
} else if (cp > xcp) { /* move forward */
while (cp > xcp)
- x_zotc((unsigned char)*xcp++);
+ x_zotcp(xcp++);
}
}
@@ -671,7 +673,7 @@ x_size_str(char *cp)
{
int size = 0;
while (*cp)
- size += x_size(*cp++);
+ size += x_size((unsigned char)*cp++);
return size;
}
@@ -699,7 +701,7 @@ x_zots(char *str)
}
x_lastcp();
while (*str && str < xlp && adj == x_adj_done)
- x_zotc(*str++);
+ x_zotcp(str++);
}
static void
@@ -715,6 +717,39 @@ x_zotc(int c)
x_e_putc(c);
}
+static void
+x_zotcp(char *p)
+{
+ unsigned char uc = (unsigned char)*p;
+
+ if (uc == '\t') {
+ /* Kludge, tabs are always four spaces. */
+ x_e_puts(" ");
+ return;
+ }
+ if (isu8cont(uc)) {
+ if (x_col <= xx_cols) {
+ x_putc(uc);
+ }
+ if (x_adj_ok && !isu8cont((unsigned char)p[1]) &&
+ (x_col < 0 || x_col >= (xx_cols - 2)))
+ x_adjust();
+ return;
+ }
+ if (iscntrl(uc)) {
+ x_e_putc('^');
+ x_e_putc(UNCTRL(uc));
+ } else {
+ if (x_col < xx_cols) {
+ x_putc(uc);
+ x_col++;
+ }
+ if (x_adj_ok && !isu8cont((unsigned char)p[1]) &&
+ (x_col < 0 || x_col >= (xx_cols - 2)))
+ x_adjust();
+ }
+}
+
static int
x_mv_back(int c)
{
@@ -1034,7 +1069,7 @@ x_clear_screen(int c)
static void
x_redraw(int limit)
{
- int i, j, truncate = 0;
+ int i, j, truncate = 0, dcols;
char *cp;
x_adj_ok = 0;
@@ -1074,10 +1109,12 @@ x_redraw(int limit)
if (xbp != xbuf || xep > xlp)
limit = xx_cols;
if (limit >= 0) {
- if (xep > xlp)
- i = 0; /* we fill the line */
- else
- i = limit - (xlp - xbp);
+ dcols = 0;
+ for (cp = xbp; cp < xlp; cp++)
+ dcols += x_size((unsigned char)*cp);
+ i = limit - dcols;
+ if (i < 0)
+ i = 0;
for (j = 0; j < i && x_col < (xx_cols - 2); j++)
x_e_putc(' ');
@@ -1134,8 +1171,8 @@ x_transpose(int c)
*/
x_bs(xcp[-1]);
x_bs(xcp[-2]);
- x_zotc(xcp[-1]);
- x_zotc(xcp[-2]);
+ x_zotcp(&xcp[-1]);
+ x_zotcp(&xcp[-2]);
tmp = xcp[-1];
xcp[-1] = xcp[-2];
xcp[-2] = tmp;
@@ -1144,8 +1181,8 @@ x_transpose(int c)
* cursor, move cursor position along one.
*/
x_bs(xcp[-1]);
- x_zotc(xcp[0]);
- x_zotc(xcp[-1]);
+ x_zotcp(&xcp[0]);
+ x_zotcp(&xcp[-1]);
tmp = xcp[-1];
xcp[-1] = xcp[0];
xcp[0] = tmp;
@@ -1806,12 +1843,22 @@ do_complete(int flags, /* XCF_{COMMAND,FILE,COMMAND_FILE} */
static void
x_adjust(void)
{
+ char *cp;
+ int col;
+
x_adj_done++; /* flag the fact that we were called. */
/*
* we had a problem if the prompt length > xx_cols / 2
*/
- if ((xbp = xcp - (x_displen / 2)) < xbuf)
- xbp = xbuf;
+ col = x_displen / 2;
+ cp = xcp;
+ while (cp > xbuf && col > 0) {
+ cp--;
+ while (cp > xbuf && isu8cont(*cp))
+ cp--;
+ col -= x_size((unsigned char)*cp);
+ }
+ xbp = cp;
xlp_valid = false;
x_redraw(xx_cols);
x_flush();
@@ -2165,6 +2212,8 @@ x_lastcp(void)
for (i = 0, rcp = xbp; rcp < xep && i < x_displen; rcp++)
i += x_size((unsigned char)*rcp);
xlp = rcp;
+ while (xlp < xep && isu8cont(*xlp))
+ xlp++;
}
xlp_valid = true;
return (xlp);
diff --git regress/bin/ksh/edit/emacs.sh regress/bin/ksh/edit/emacs.sh
index 6ae4184e928..736d54f0616 100644
--- regress/bin/ksh/edit/emacs.sh
+++ regress/bin/ksh/edit/emacs.sh
@@ -76,6 +76,38 @@ testseq "aaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccc
testseq "aaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccc\0001\000RS81" \
" # aaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccc\rbbbbbbbbbbbbccccccccccccccccccccccccc <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r # aaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccc \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r\033[J\r # aaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccc \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
+# ASCII window indicator at left edge
+testseq "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" \
+ " # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\rbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
+
+# UTF-8 window indicator at left edge
+testseq "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\0303\0266bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" \
+ " # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\0303\0266bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\r\0303\0266bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
+
+# ASCII redraw at right edge
+PS1='012345678901234567890123456789012345678901234567890123456789'
+testseq "#aaaaaaaaaaaaaaaaab\0033#" "012345678901234567890123456789012345678901234567890123456789#aaaaaaaaaaaaaaaaa\raaaaaaaaa <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bb\r012345678901234567890123456789012345678901234567890123456789aaaaaaaaaaaaaaaaab \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
+
+# UTF-8 redraw at right edge
+testseq "#aaaaaaaaaaaaaaaaa\0303\0266\0033#" "012345678901234567890123456789012345678901234567890123456789#aaaaaaaaaaaaaaaaa\raaaaaaaaa <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\0303\0266\r012345678901234567890123456789012345678901234567890123456789aaaaaaaaaaaaaaaaa\0303\0266 \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
+PS1=' # '
+
+# ASCII window indicator at right edge
+testseq "\003342ab\003397b\0001" \
+ " # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\raaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\rbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bbbbbbbbbbbbbbbbbbbbbbbbb\r # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb>\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
+
+# UTF-8 window indicator at right edge
+testseq "\003342a\0303\0266\003397b\0001" \
+ " # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\0303\0266bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\raaaa\0303\0266bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\rbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bbbbbbbbbbbbbbbbbbbbbbbbb\r # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\0303\0266bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb>\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
+
+# ASCII window indicator on both sides
+testseq "\003342ab\003397b\0001\003379\0006" \
+ " # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\raaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\rbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bbbbbbbbbbbbbbbbbbbbbbbbb\r # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb>\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\rbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb*\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
+
+# UTF-8 window indicator on both sides
+testseq "\003342a\0303\0266\003397b\0001\003379\0006" \
+ " # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\0303\0266bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\raaaa\0303\0266bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\rbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb <\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bbbbbbbbbbbbbbbbbbbbbbbbb\r # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\0303\0266bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb>\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\ra\0303\0266bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb*\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
+
# insertion of incomplete UTF-8
testseq "z\0002\0302\0006" " # z\b\0302z\bz"
testseq "z\0002\0377\0006" " # z\b\0377z\bz"
--
2.52.0
bin/ksh: fix emacs mode UTF-8 display logic