1 /* ----------------------------------------------------------------------- *
3 * Copyright 1996-2009 The NASM Authors - All Rights Reserved
4 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * ----------------------------------------------------------------------- */
45 #define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0')
47 char *nasm_quote(char *str, size_t len)
49 char c, c1, *p, *q, *nstr, *ep;
56 qlen = 0; /* Length if we need `...` quotes */
57 for (p = str; p < ep; p++) {
73 if (c < ' ' || c > '~') {
74 sq_ok = dq_ok = false;
87 c1 = (p+1 < ep) ? p[1] : 0;
88 if (c1 >= '0' && c1 <= '7')
89 uc = 0377; /* Must use the full form */
106 if (sq_ok || dq_ok) {
107 /* Use '...' or "..." */
108 nstr = nasm_malloc(len+3);
109 nstr[0] = nstr[len+1] = sq_ok ? '\'' : '\"';
111 memcpy(nstr+1, str, len);
113 /* Need to use `...` quoted syntax */
114 nstr = nasm_malloc(qlen+3);
117 for (p = str; p < ep; p++) {
158 if (c < ' ' || c > '~') {
159 c1 = (p+1 < ep) ? p[1] : 0;
160 if (c1 >= '0' && c1 <= '7')
161 uc = 0377; /* Must use the full form */
166 *q++ = ((unsigned char)c >> 6) + '0';
168 *q++ = (((unsigned char)c >> 3) & 7) + '0';
169 *q++ = ((unsigned char)c & 7) + '0';
179 nasm_assert((size_t)(q-nstr) == qlen+3);
184 static char *emit_utf8(char *q, int32_t v)
187 /* Impossible - do nothing */
188 } else if (v <= 0x7f) {
190 } else if (v <= 0x000007ff) {
191 *q++ = 0xc0 | (v >> 6);
192 *q++ = 0x80 | (v & 63);
193 } else if (v <= 0x0000ffff) {
194 *q++ = 0xe0 | (v >> 12);
195 *q++ = 0x80 | ((v >> 6) & 63);
196 *q++ = 0x80 | (v & 63);
197 } else if (v <= 0x001fffff) {
198 *q++ = 0xf0 | (v >> 18);
199 *q++ = 0x80 | ((v >> 12) & 63);
200 *q++ = 0x80 | ((v >> 6) & 63);
201 *q++ = 0x80 | (v & 63);
202 } else if (v <= 0x03ffffff) {
203 *q++ = 0xf8 | (v >> 24);
204 *q++ = 0x80 | ((v >> 18) & 63);
205 *q++ = 0x80 | ((v >> 12) & 63);
206 *q++ = 0x80 | ((v >> 6) & 63);
207 *q++ = 0x80 | (v & 63);
209 *q++ = 0xfc | (v >> 30);
210 *q++ = 0x80 | ((v >> 24) & 63);
211 *q++ = 0x80 | ((v >> 18) & 63);
212 *q++ = 0x80 | ((v >> 12) & 63);
213 *q++ = 0x80 | ((v >> 6) & 63);
214 *q++ = 0x80 | (v & 63);
220 * Do an *in-place* dequoting of the specified string, returning the
221 * resulting length (which may be containing embedded nulls.)
223 * In-place replacement is possible since the unquoted length is always
224 * shorter than or equal to the quoted length.
226 * *ep points to the final quote, or to the null if improperly quoted.
228 size_t nasm_unquote(char *str, char **ep)
253 /* '...' or "..." string */
254 while ((c = *p) && c != bq) {
271 state = st_backslash;
284 escp = p; /* Beginning of argument sequence */
333 ndig = 2; /* Up to two more digits */
343 if (c >= '0' && c <= '7') {
344 nval = (nval << 3) + (c - '0');
350 p--; /* Process this character again */
357 if ((c >= '0' && c <= '9') ||
358 (c >= 'A' && c <= 'F') ||
359 (c >= 'a' && c <= 'f')) {
360 nval = (nval << 4) + numvalue(c);
366 p--; /* Process this character again */
367 *q++ = (p > escp) ? nval : escp[-1];
373 if ((c >= '0' && c <= '9') ||
374 (c >= 'A' && c <= 'F') ||
375 (c >= 'a' && c <= 'f')) {
376 nval = (nval << 4) + numvalue(c);
378 q = emit_utf8(q, nval);
382 p--; /* Process this character again */
384 q = emit_utf8(q, nval);
400 *q++ = (p > escp) ? nval : escp[-1];
404 q = emit_utf8(q, nval);
413 /* Not a quoted string, just return the input... */
414 p = q = strchr(str, '\0');
424 * Find the end of a quoted string; returns the pointer to the terminating
425 * character (either the ending quote or the null character, if unterminated.)
427 char *nasm_skip_string(char *str)
438 if (bq == '\'' || bq == '\"') {
439 /* '...' or "..." string */
440 for (p = str+1; *p && *p != bq; p++)
443 } else if (bq == '`') {
453 state = st_backslash;
456 return p-1; /* Found the end */
464 * Note: for the purpose of finding the end of the string,
465 * all successor states to st_backslash are functionally
466 * equivalent to st_start, since either a backslash or
467 * a backquote will force a return to the st_start state.
473 return p; /* Unterminated string... */
475 return str; /* Not a string... */