Index | Thread | Search

From:
Estevan Castilho <tevo@riajuu.net>
Subject:
[PATCH] ksh: add support for $'...' strings
To:
tech@openbsd.org
Date:
Sat, 29 Mar 2025 16:49:26 +0000

Download raw body.

Thread
  • Estevan Castilho:

    [PATCH] ksh: add support for $'...' strings

Hello,

Some shells implement the $'...' syntax (i.e. "dollar-single-quoted"
strings), which behave like '...' but expands C-style escapes (like \n
or \xFF) as if they were inserted inline. This behaviour has been
accepted onto POSIX as of POSIX 2024, the patch below adds support to it
to our ksh.

Thanks,

--
Tevo

diff --git bin/ksh/lex.c bin/ksh/lex.c
index b6d4279e6..abbe0b495 100644
--- bin/ksh/lex.c
+++ bin/ksh/lex.c
@@ -31,6 +31,7 @@
 #define SPATTERN 11		/* parsing *(...|...) pattern (*+?@!) */
 #define STBRACE 12		/* parsing ${..[#%]..} */
 #define	SBRACEQ	13		/* inside "${}" */
+#define SDSQUOTE 14		/* inside $'' */
 
 /* Structure to keep track of the lexing state and the various pieces of info
  * needed for each particular state.
@@ -80,6 +81,7 @@ static void	readhere(struct ioword *);
 static int	getsc__(void);
 static void	getsc_line(Source *);
 static int	getsc_bn(void);
+static char	get_dsquote_escape(void);
 static char	*get_brace_var(XString *, char *);
 static int	arraysub(char **);
 static const char *ungetsc(int);
@@ -322,6 +324,15 @@ yylex(int cf)
 				*wp++ = OQUOTE;
 				PUSH_STATE(SDQUOTE);
 				break;
+			case '$':
+				c2 = getsc();
+				if (c2 == '\'') {
+					*wp++ = OQUOTE;
+					PUSH_STATE(SDSQUOTE);
+					break;
+				}
+				ungetsc(c2);
+				/* FALLTHROUGH */
 			default:
 				goto Subst;
 			}
@@ -471,6 +482,19 @@ yylex(int cf)
 				goto Subst;
 			break;
 
+		case SDSQUOTE: /* $' .. ' */
+			if (c == '\'') {
+				POP_STATE();
+				*wp++ = CQUOTE;
+				ignore_backslash_newline--;
+			} else {
+				if (c == '\\') c = get_dsquote_escape();
+				*wp++ = QCHAR;
+				*wp++ = c;
+			}
+
+			break;
+
 		case SCSPAREN: /* $( .. ) */
 			/* todo: deal with $(...) quoting properly
 			 * kludge to partly fake quoting inside $(..): doesn't
@@ -1511,6 +1535,63 @@ promptlen(const char *cp, const char **spp)
 	return dopprompt(cp, 0, spp, 0);
 }
 
+/* Read an escaped character within a dollar-single-quoted string
+ */
+static char
+get_dsquote_escape(void) {
+	char c, ret;
+
+	ret = 0;
+
+	switch (c = getsc()) {
+	case 'a': ret = '\a'; break;
+	case 'b': ret = '\b'; break;
+	case 'e': ret = '\e'; break;
+	case 'f': ret = '\f'; break;
+	case 'n': ret = '\n'; break;
+	case 'r': ret = '\r'; break;
+	case 't': ret = '\t'; break;
+	case 'v': ret = '\v'; break;
+	case '"': ret = '"'; break;
+	case '\\': ret = '\\'; break;
+	case '\'': ret = '\''; break;
+	case 'c': /* control char */
+		c = upper(getsc());
+		if (c == '\\') c = get_dsquote_escape();
+		ret = c == '?' ? 0x7f : (c & 0x1f);
+		break;
+
+	case 'x': /* hex */
+		for (int i = 0; i < 2; i++) {
+			c = getsc();
+			if (!isxdigit(c)) {
+				ungetsc(c);
+				break;
+			}
+			c = upper(c);
+			ret *= 16;
+			if (c <= '9')
+				ret += c - '0';
+			else
+				ret += c - 'A' + 10;
+		}
+		break;
+
+	default: /* octal */
+		for (int i = 0; i < 3; i++) {
+			if (c >= '0' && c < '8')
+				ret = ret * 8 + c - '0';
+			else
+				break;
+			c = getsc();
+		}
+		ungetsc(c);
+	}
+
+	return ret;
+}
+
 /* Read the variable part of a ${...} expression (ie, up to but not including
  * the :[-+?=#%] or close-brace.
  */
diff --git bin/ksh/sh.h bin/ksh/sh.h
index bffb374e0..760c54399 100644
--- bin/ksh/sh.h
+++ bin/ksh/sh.h
@@ -287,6 +287,8 @@ extern	short ctypes [];
 #define	digit(c)	isdigit((unsigned char)(c))
 #define	letnum(c)	(ctype(c, C_ALPHA) || isdigit((unsigned char)(c)))
 
+#define upper(c)	((c) & ~0x20)
+
 extern int ifs0;	/* for "$*" */
 
 /* Argument parsing for built-in commands and getopts command */