1 /* quotearg.c - quote arguments for output
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007 Free
4 Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
20 /* Written by Paul Eggert <eggert@twinsun.com> */
38 #define _(msgid) gettext (msgid)
39 #define N_(msgid) msgid
42 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
43 other macros are defined only for documentation and to satisfy C
48 # define mbstate_t int
49 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
50 # define iswprint(wc) isprint ((unsigned char) (wc))
54 #if !defined mbsinit && !HAVE_MBSINIT
55 # define mbsinit(ps) 1
59 # define SIZE_MAX ((size_t) -1)
62 #define INT_BITS (sizeof (int) * CHAR_BIT)
64 struct quoting_options
66 /* Basic quoting style. */
67 enum quoting_style style;
69 /* Quote the characters indicated by this bit vector even if the
70 quoting style would not normally require them to be quoted. */
71 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
74 /* Names of quoting styles. */
75 char const *const quoting_style_args[] =
87 /* Correspondences to quoting style names. */
88 enum quoting_style const quoting_style_vals[] =
90 literal_quoting_style,
92 shell_always_quoting_style,
99 /* The default quoting options. */
100 static struct quoting_options default_quoting_options;
102 /* Allocate a new set of quoting options, with contents initially identical
103 to O if O is not null, or to the default if O is null.
104 It is the caller's responsibility to free the result. */
105 struct quoting_options *
106 clone_quoting_options (struct quoting_options *o)
109 struct quoting_options *p = xmemdup (o ? o : &default_quoting_options,
115 /* Get the value of O's quoting style. If O is null, use the default. */
117 get_quoting_style (struct quoting_options *o)
119 return (o ? o : &default_quoting_options)->style;
122 /* In O (or in the default if O is null),
123 set the value of the quoting style to S. */
125 set_quoting_style (struct quoting_options *o, enum quoting_style s)
127 (o ? o : &default_quoting_options)->style = s;
130 /* In O (or in the default if O is null),
131 set the value of the quoting options for character C to I.
132 Return the old value. Currently, the only values defined for I are
133 0 (the default) and 1 (which means to quote the character even if
134 it would not otherwise be quoted). */
136 set_char_quoting (struct quoting_options *o, char c, int i)
138 unsigned char uc = c;
140 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
141 int shift = uc % INT_BITS;
142 int r = (*p >> shift) & 1;
143 *p ^= ((i & 1) ^ r) << shift;
147 /* MSGID approximates a quotation mark. Return its translation if it
148 has one; otherwise, return either it or "\"", depending on S. */
150 gettext_quote (char const *msgid, enum quoting_style s)
152 char const *translation = _(msgid);
153 if (translation == msgid && s == clocale_quoting_style)
158 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
159 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
160 non-quoting-style part of O to control quoting.
161 Terminate the output with a null character, and return the written
162 size of the output, not counting the terminating null.
163 If BUFFERSIZE is too small to store the output string, return the
164 value that would have been returned had BUFFERSIZE been large enough.
165 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
167 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
168 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
169 style specified by O, and O may not be null. */
172 quotearg_buffer_restyled (char *buffer, size_t buffersize,
173 char const *arg, size_t argsize,
174 enum quoting_style quoting_style,
175 struct quoting_options const *o)
179 char const *quote_string = 0;
180 size_t quote_string_len = 0;
181 bool backslash_escapes = false;
182 bool unibyte_locale = MB_CUR_MAX == 1;
187 if (len < buffersize) \
193 switch (quoting_style)
195 case c_quoting_style:
197 backslash_escapes = true;
199 quote_string_len = 1;
202 case escape_quoting_style:
203 backslash_escapes = true;
206 case locale_quoting_style:
207 case clocale_quoting_style:
210 Get translations for open and closing quotation marks.
212 The message catalog should translate "`" to a left
213 quotation mark suitable for the locale, and similarly for
214 "'". If the catalog has no translation,
215 locale_quoting_style quotes `like this', and
216 clocale_quoting_style quotes "like this".
218 For example, an American English Unicode locale should
219 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
220 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
221 MARK). A British English Unicode locale should instead
222 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
223 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
225 If you don't know what to put here, please see
226 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
227 and use glyphs suitable for your language. */
229 char const *left = gettext_quote (N_("`"), quoting_style);
230 char const *right = gettext_quote (N_("'"), quoting_style);
231 for (quote_string = left; *quote_string; quote_string++)
232 STORE (*quote_string);
233 backslash_escapes = true;
234 quote_string = right;
235 quote_string_len = strlen (quote_string);
239 case shell_always_quoting_style:
242 quote_string_len = 1;
249 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
254 if (backslash_escapes
256 && i + quote_string_len <= argsize
257 && memcmp (arg + i, quote_string, quote_string_len) == 0)
264 if (backslash_escapes)
274 switch (quoting_style)
276 case shell_quoting_style:
277 goto use_shell_always_quoting_style;
279 case c_quoting_style:
280 if (i + 2 < argsize && arg[i + 1] == '?')
284 case '(': case ')': case '-': case '/':
285 case '<': case '=': case '>':
286 /* Escape the second '?' in what would otherwise be
305 case '\a': esc = 'a'; goto c_escape;
306 case '\b': esc = 'b'; goto c_escape;
307 case '\f': esc = 'f'; goto c_escape;
308 case '\n': esc = 'n'; goto c_and_shell_escape;
309 case '\r': esc = 'r'; goto c_and_shell_escape;
310 case '\t': esc = 't'; goto c_and_shell_escape;
311 case '\v': esc = 'v'; goto c_escape;
312 case '\\': esc = c; goto c_and_shell_escape;
315 if (quoting_style == shell_quoting_style)
316 goto use_shell_always_quoting_style;
318 if (backslash_escapes)
325 case '{': case '}': /* sometimes special if isolated */
326 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
334 case '!': /* special in bash */
335 case '"': case '$': case '&':
336 case '(': case ')': case '*': case ';':
338 case '=': /* sometimes special in 0th or (with "set -k") later args */
340 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
342 /* A shell special character. In theory, '$' and '`' could
343 be the first bytes of multibyte characters, which means
344 we should check them with mbrtowc, but in practice this
345 doesn't happen so it's not worth worrying about. */
346 if (quoting_style == shell_quoting_style)
347 goto use_shell_always_quoting_style;
351 switch (quoting_style)
353 case shell_quoting_style:
354 goto use_shell_always_quoting_style;
356 case shell_always_quoting_style:
367 case '%': case '+': case ',': case '-': case '.': case '/':
368 case '0': case '1': case '2': case '3': case '4': case '5':
369 case '6': case '7': case '8': case '9': case ':':
370 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
371 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
372 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
373 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
374 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
375 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
376 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
377 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
378 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
379 /* These characters don't cause problems, no matter what the
380 quoting style is. They cannot start multibyte sequences. */
384 /* If we have a multibyte sequence, copy it until we reach
385 its end, find an error, or come back to the initial shift
386 state. For C-like styles, if the sequence has
387 unprintable characters, escape the whole sequence, since
388 we can't easily escape single characters within it. */
390 /* Length of multibyte sequence found so far. */
398 printable = isprint (c) != 0;
403 memset (&mbstate, 0, sizeof mbstate);
407 if (argsize == SIZE_MAX)
408 argsize = strlen (arg);
413 size_t bytes = mbrtowc (&w, &arg[i + m],
414 argsize - (i + m), &mbstate);
417 else if (bytes == (size_t) -1)
422 else if (bytes == (size_t) -2)
425 while (i + m < argsize && arg[i + m])
431 /* Work around a bug with older shells that "see" a '\'
432 that is really the 2nd byte of a multibyte character.
433 In practice the problem is limited to ASCII
434 chars >= '@' that are shell special chars. */
435 if ('[' == 0x5b && quoting_style == shell_quoting_style)
438 for (j = 1; j < bytes; j++)
439 switch (arg[i + m + j])
441 case '[': case '\\': case '^':
443 goto use_shell_always_quoting_style;
455 while (! mbsinit (&mbstate));
458 if (1 < m || (backslash_escapes && ! printable))
460 /* Output a multibyte sequence, or an escaped
461 unprintable unibyte character. */
466 if (backslash_escapes && ! printable)
469 STORE ('0' + (c >> 6));
470 STORE ('0' + ((c >> 3) & 7));
484 if (! (backslash_escapes
485 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
495 if (i == 0 && quoting_style == shell_quoting_style)
496 goto use_shell_always_quoting_style;
499 for (; *quote_string; quote_string++)
500 STORE (*quote_string);
502 if (len < buffersize)
506 use_shell_always_quoting_style:
507 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
508 shell_always_quoting_style, o);
511 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
512 argument ARG (of size ARGSIZE), using O to control quoting.
513 If O is null, use the default.
514 Terminate the output with a null character, and return the written
515 size of the output, not counting the terminating null.
516 If BUFFERSIZE is too small to store the output string, return the
517 value that would have been returned had BUFFERSIZE been large enough.
518 If ARGSIZE is SIZE_MAX, use the string length of the argument for
521 quotearg_buffer (char *buffer, size_t buffersize,
522 char const *arg, size_t argsize,
523 struct quoting_options const *o)
525 struct quoting_options const *p = o ? o : &default_quoting_options;
527 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
533 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
534 allocated storage containing the quoted string. */
536 quotearg_alloc (char const *arg, size_t argsize,
537 struct quoting_options const *o)
540 size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1;
541 char *buf = xcharalloc (bufsize);
542 quotearg_buffer (buf, bufsize, arg, argsize, o);
547 /* A storage slot with size and pointer to a value. */
554 /* Preallocate a slot 0 buffer, so that the caller can always quote
555 one small component of a "memory exhausted" message in slot 0. */
556 static char slot0[256];
557 static unsigned int nslots = 1;
558 static struct slotvec slotvec0 = {sizeof slot0, slot0};
559 static struct slotvec *slotvec = &slotvec0;
564 struct slotvec *sv = slotvec;
566 for (i = 1; i < nslots; i++)
568 if (sv[0].val != slot0)
571 slotvec0.size = sizeof slot0;
572 slotvec0.val = slot0;
582 /* Use storage slot N to return a quoted version of argument ARG.
583 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
584 null-terminated string.
585 OPTIONS specifies the quoting options.
586 The returned value points to static storage that can be
587 reused by the next call to this function with the same value of N.
588 N must be nonnegative. N is deliberately declared with type "int"
589 to allow for future extensions (using negative values). */
591 quotearg_n_options (int n, char const *arg, size_t argsize,
592 struct quoting_options const *options)
597 struct slotvec *sv = slotvec;
604 /* FIXME: technically, the type of n1 should be `unsigned int',
605 but that evokes an unsuppressible warning from gcc-4.0.1 and
606 older. If gcc ever provides an option to suppress that warning,
607 revert to the original type, so that the test in xalloc_oversized
608 is once again performed only at compile time. */
610 bool preallocated = (sv == &slotvec0);
612 if (xalloc_oversized (n1, sizeof *sv))
615 slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv);
618 memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv);
623 size_t size = sv[n].size;
624 char *val = sv[n].val;
625 size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
629 sv[n].size = size = qsize + 1;
632 sv[n].val = val = xcharalloc (size);
633 quotearg_buffer (val, size, arg, argsize, options);
642 quotearg_n (int n, char const *arg)
644 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
648 quotearg (char const *arg)
650 return quotearg_n (0, arg);
653 /* Return quoting options for STYLE, with no extra quoting. */
654 static struct quoting_options
655 quoting_options_from_style (enum quoting_style style)
657 struct quoting_options o;
659 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
664 quotearg_n_style (int n, enum quoting_style s, char const *arg)
666 struct quoting_options const o = quoting_options_from_style (s);
667 return quotearg_n_options (n, arg, SIZE_MAX, &o);
671 quotearg_n_style_mem (int n, enum quoting_style s,
672 char const *arg, size_t argsize)
674 struct quoting_options const o = quoting_options_from_style (s);
675 return quotearg_n_options (n, arg, argsize, &o);
679 quotearg_style (enum quoting_style s, char const *arg)
681 return quotearg_n_style (0, s, arg);
685 quotearg_char (char const *arg, char ch)
687 struct quoting_options options;
688 options = default_quoting_options;
689 set_char_quoting (&options, ch, 1);
690 return quotearg_n_options (0, arg, SIZE_MAX, &options);
694 quotearg_colon (char const *arg)
696 return quotearg_char (arg, ':');