Download raw body.
[PATCH] ksh: add support for $'...' strings
Hello,
Some shells implement the $'...' syntax (i.e. "dollar-single-quoted"
strings), which behave like '...' but expands C-style escapes (like \n
or \xFF) as if they were inserted inline. This behaviour has been
accepted onto POSIX as of POSIX 2024, the patch below adds support to it
to our ksh.
Thanks,
--
Tevo
diff --git bin/ksh/lex.c bin/ksh/lex.c
index b6d4279e6..abbe0b495 100644
--- bin/ksh/lex.c
+++ bin/ksh/lex.c
@@ -31,6 +31,7 @@
#define SPATTERN 11 /* parsing *(...|...) pattern (*+?@!) */
#define STBRACE 12 /* parsing ${..[#%]..} */
#define SBRACEQ 13 /* inside "${}" */
+#define SDSQUOTE 14 /* inside $'' */
/* Structure to keep track of the lexing state and the various pieces of info
* needed for each particular state.
@@ -80,6 +81,7 @@ static void readhere(struct ioword *);
static int getsc__(void);
static void getsc_line(Source *);
static int getsc_bn(void);
+static char get_dsquote_escape(void);
static char *get_brace_var(XString *, char *);
static int arraysub(char **);
static const char *ungetsc(int);
@@ -322,6 +324,15 @@ yylex(int cf)
*wp++ = OQUOTE;
PUSH_STATE(SDQUOTE);
break;
+ case '$':
+ c2 = getsc();
+ if (c2 == '\'') {
+ *wp++ = OQUOTE;
+ PUSH_STATE(SDSQUOTE);
+ break;
+ }
+ ungetsc(c2);
+ /* FALLTHROUGH */
default:
goto Subst;
}
@@ -471,6 +482,19 @@ yylex(int cf)
goto Subst;
break;
+ case SDSQUOTE: /* $' .. ' */
+ if (c == '\'') {
+ POP_STATE();
+ *wp++ = CQUOTE;
+ ignore_backslash_newline--;
+ } else {
+ if (c == '\\') c = get_dsquote_escape();
+ *wp++ = QCHAR;
+ *wp++ = c;
+ }
+
+ break;
+
case SCSPAREN: /* $( .. ) */
/* todo: deal with $(...) quoting properly
* kludge to partly fake quoting inside $(..): doesn't
@@ -1511,6 +1535,63 @@ promptlen(const char *cp, const char **spp)
return dopprompt(cp, 0, spp, 0);
}
+/* Read an escaped character within a dollar-single-quoted string
+ */
+static char
+get_dsquote_escape(void) {
+ char c, ret;
+
+ ret = 0;
+
+ switch (c = getsc()) {
+ case 'a': ret = '\a'; break;
+ case 'b': ret = '\b'; break;
+ case 'e': ret = '\e'; break;
+ case 'f': ret = '\f'; break;
+ case 'n': ret = '\n'; break;
+ case 'r': ret = '\r'; break;
+ case 't': ret = '\t'; break;
+ case 'v': ret = '\v'; break;
+ case '"': ret = '"'; break;
+ case '\\': ret = '\\'; break;
+ case '\'': ret = '\''; break;
+ case 'c': /* control char */
+ c = upper(getsc());
+ if (c == '\\') c = get_dsquote_escape();
+ ret = c == '?' ? 0x7f : (c & 0x1f);
+ break;
+
+ case 'x': /* hex */
+ for (int i = 0; i < 2; i++) {
+ c = getsc();
+ if (!isxdigit(c)) {
+ ungetsc(c);
+ break;
+ }
+ c = upper(c);
+ ret *= 16;
+ if (c <= '9')
+ ret += c - '0';
+ else
+ ret += c - 'A' + 10;
+ }
+ break;
+
+ default: /* octal */
+ for (int i = 0; i < 3; i++) {
+ if (c >= '0' && c < '8')
+ ret = ret * 8 + c - '0';
+ else
+ break;
+ c = getsc();
+ }
+ ungetsc(c);
+ }
+
+ return ret;
+}
+
/* Read the variable part of a ${...} expression (ie, up to but not including
* the :[-+?=#%] or close-brace.
*/
diff --git bin/ksh/sh.h bin/ksh/sh.h
index bffb374e0..760c54399 100644
--- bin/ksh/sh.h
+++ bin/ksh/sh.h
@@ -287,6 +287,8 @@ extern short ctypes [];
#define digit(c) isdigit((unsigned char)(c))
#define letnum(c) (ctype(c, C_ALPHA) || isdigit((unsigned char)(c)))
+#define upper(c) ((c) & ~0x20)
+
extern int ifs0; /* for "$*" */
/* Argument parsing for built-in commands and getopts command */
[PATCH] ksh: add support for $'...' strings