From 7f2f8b35e6b8ea67fe620f3363b5eaa06f55a222 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sun, 1 Jun 2008 16:07:48 -0700 Subject: [PATCH] qstring: add nasm_unquote() supporting `...` Add a nasm_unquote() function supporting the intended `...` syntax. --- nasmlib.c | 205 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) diff --git a/nasmlib.c b/nasmlib.c index 8e56bf4..8cd41cf 100644 --- a/nasmlib.c +++ b/nasmlib.c @@ -950,6 +950,211 @@ void nasm_quote(char **str) *str = p; } +static char *emit_utf8(char *q, int32_t v) +{ + if (v < 0) { + /* Impossible - do nothing */ + } else if (v <= 0x7f) { + *q++ = v; + } else if (v <= 0x000007ff) { + *q++ = 0xc0 | (v >> 6); + *q++ = 0x80 | (v & 63); + } else if (v <= 0x0000ffff) { + *q++ = 0xe0 | (v >> 12); + *q++ = 0x80 | ((v >> 6) & 63); + *q++ = 0x80 | (v & 63); + } else if (v <= 0x001fffff) { + *q++ = 0xf0 | (v >> 18); + *q++ = 0x80 | ((v >> 12) & 63); + *q++ = 0x80 | ((v >> 6) & 63); + *q++ = 0x80 | (v & 63); + } else if (v <= 0x03ffffff) { + *q++ = 0xf8 | (v >> 24); + *q++ = 0x80 | ((v >> 18) & 63); + *q++ = 0x80 | ((v >> 12) & 63); + *q++ = 0x80 | ((v >> 6) & 63); + *q++ = 0x80 | (v & 63); + } else { + *q++ = 0xfc | (v >> 30); + *q++ = 0x80 | ((v >> 24) & 63); + *q++ = 0x80 | ((v >> 18) & 63); + *q++ = 0x80 | ((v >> 12) & 63); + *q++ = 0x80 | ((v >> 6) & 63); + *q++ = 0x80 | (v & 63); + } + return q; +} + +/* + * Do an *in-place* dequoting of the specified string, returning the + * resulting length (which may be containing embedded nulls.) + * + * In-place replacement is possible since the unquoted length is always + * shorter than or equal to the quoted length. + */ +size_t nasm_unquote(char *str) +{ + size_t ln; + char bq, eq; + char *p, *q, *ep, *escp; + char c; + enum unq_state { + st_start, + st_backslash, + st_hex, + st_oct, + st_ucs, + } state; + int ndig = 0; + int32_t nval = 0; + + bq = str[0]; + if (!bq) + return 0; + ln = strlen(str); + eq = str[ln-1]; + + if ((bq == '\'' || bq == '\"') && bq == eq) { + /* '...' or "..." string */ + memmove(str, str+1, ln-2); + str[ln-2] = '\0'; + return ln-2; + } + if (bq == '`' || eq == '`') { + /* `...` string */ + q = str; + p = str+1; + ep = str+ln-1; + state = st_start; + + while (p < ep) { + c = *p++; + switch (state) { + case st_start: + if (c == '\\') + state = st_backslash; + else + *q++ = c; + break; + + case st_backslash: + state = st_start; + escp = p-1; + switch (c) { + case 'a': + *q++ = 7; + break; + case 'b': + *q++ = 8; + break; + case 'e': + *q++ = 27; + break; + case 'f': + *q++ = 12; + break; + case 'n': + *q++ = 10; + break; + case 'r': + *q++ = 13; + break; + case 't': + *q++ = 9; + break; + case 'u': + state = st_ucs; + ndig = 4; + nval = 0; + break; + case 'U': + state = st_ucs; + ndig = 8; + nval = 0; + break; + case 'v': + *q++ = 11; + case 'x': + case 'X': + state = st_hex; + ndig = nval = 0; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + state = st_oct; + ndig = 1; + nval = c - '0'; + break; + default: + *q++ = c; + break; + } + break; + + case st_oct: + if (c >= '0' && c <= '7') { + nval = (nval << 3) + (c - '0'); + if (++ndig >= 3) { + *q++ = nval; + state = st_start; + } + } else { + p--; /* Process this character again */ + *q++ = nval; + state = st_start; + } + break; + + case st_hex: + if ((c >= '0' && c <= '9') || + (c >= 'A' && c <= 'F') || + (c >= 'a' && c <= 'f')) { + nval = (nval << 4) + numvalue(c); + if (++ndig >= 2) { + *q++ = nval; + state = st_start; + } + } else { + p--; /* Process this character again */ + *q++ = ndig ? nval : *escp; + state = st_start; + } + break; + + case st_ucs: + if ((c >= '0' && c <= '9') || + (c >= 'A' && c <= 'F') || + (c >= 'a' && c <= 'f')) { + nval = (nval << 4) + numvalue(c); + if (!--ndig) { + q = emit_utf8(q, nval); + state = st_start; + } + } else { + p--; /* Process this character again */ + if (p > escp+1) + q = emit_utf8(q, nval); + else + *q++ = *escp; + state = st_start; + } + break; + } + } + *q = '\0'; + return q-str; + } + + /* Otherwise, just return the input... */ + return ln; +} + char *nasm_strcat(char *one, char *two) { char *rslt; -- 2.7.4