1 /* quotearg.c - quote arguments for output
3 Copyright (C) 1998-2002, 2004-2013 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 /* Written by Paul Eggert <eggert@twinsun.com> */
20 /* Without this pragma, gcc 4.7.0 20111124 mistakenly suggests that
21 the quoting_options_from_style function might be candidate for
23 #if (__GNUC__ == 4 && 6 <= __GNUC_MINOR__) || 4 < __GNUC__
24 # pragma GCC diagnostic ignored "-Wsuggest-attribute=pure"
33 #include "c-strcaseeq.h"
34 #include "localcharset.h"
46 #define _(msgid) gettext (msgid)
47 #define N_(msgid) msgid
50 # define SIZE_MAX ((size_t) -1)
53 #define INT_BITS (sizeof (int) * CHAR_BIT)
55 struct quoting_options
57 /* Basic quoting style. */
58 enum quoting_style style;
60 /* Additional flags. Bitwise combination of enum quoting_flags. */
63 /* Quote the characters indicated by this bit vector even if the
64 quoting style would not normally require them to be quoted. */
65 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
67 /* The left quote for custom_quoting_style. */
68 char const *left_quote;
70 /* The right quote for custom_quoting_style. */
71 char const *right_quote;
74 /* Names of quoting styles. */
75 char const *const quoting_style_args[] =
88 /* Correspondences to quoting style names. */
89 enum quoting_style const quoting_style_vals[] =
91 literal_quoting_style,
93 shell_always_quoting_style,
95 c_maybe_quoting_style,
101 /* The default quoting options. */
102 static struct quoting_options default_quoting_options;
104 /* Allocate a new set of quoting options, with contents initially identical
105 to O if O is not null, or to the default if O is null.
106 It is the caller's responsibility to free the result. */
107 struct quoting_options *
108 clone_quoting_options (struct quoting_options *o)
111 struct quoting_options *p = xmemdup (o ? o : &default_quoting_options,
117 /* Get the value of O's quoting style. If O is null, use the default. */
119 get_quoting_style (struct quoting_options *o)
121 return (o ? o : &default_quoting_options)->style;
124 /* In O (or in the default if O is null),
125 set the value of the quoting style to S. */
127 set_quoting_style (struct quoting_options *o, enum quoting_style s)
129 (o ? o : &default_quoting_options)->style = s;
132 /* In O (or in the default if O is null),
133 set the value of the quoting options for character C to I.
134 Return the old value. Currently, the only values defined for I are
135 0 (the default) and 1 (which means to quote the character even if
136 it would not otherwise be quoted). */
138 set_char_quoting (struct quoting_options *o, char c, int i)
140 unsigned char uc = c;
142 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
143 int shift = uc % INT_BITS;
144 int r = (*p >> shift) & 1;
145 *p ^= ((i & 1) ^ r) << shift;
149 /* In O (or in the default if O is null),
150 set the value of the quoting options flag to I, which can be a
151 bitwise combination of enum quoting_flags, or 0 for default
152 behavior. Return the old value. */
154 set_quoting_flags (struct quoting_options *o, int i)
158 o = &default_quoting_options;
165 set_custom_quoting (struct quoting_options *o,
166 char const *left_quote, char const *right_quote)
169 o = &default_quoting_options;
170 o->style = custom_quoting_style;
171 if (!left_quote || !right_quote)
173 o->left_quote = left_quote;
174 o->right_quote = right_quote;
177 /* Return quoting options for STYLE, with no extra quoting. */
178 static struct quoting_options /* NOT PURE!! */
179 quoting_options_from_style (enum quoting_style style)
181 struct quoting_options o = { 0, 0, { 0 }, NULL, NULL };
182 if (style == custom_quoting_style)
188 /* MSGID approximates a quotation mark. Return its translation if it
189 has one; otherwise, return either it or "\"", depending on S.
191 S is either clocale_quoting_style or locale_quoting_style. */
193 gettext_quote (char const *msgid, enum quoting_style s)
195 char const *translation = _(msgid);
196 char const *locale_code;
198 if (translation != msgid)
201 /* For UTF-8 and GB-18030, use single quotes U+2018 and U+2019.
202 Here is a list of other locales that include U+2018 and U+2019:
204 ISO-8859-7 0xA1 KOI8-T 0x91
205 CP869 0x8B CP874 0x91
206 CP932 0x81 0x65 CP936 0xA1 0xAE
207 CP949 0xA1 0xAE CP950 0xA1 0xA5
208 CP1250 0x91 CP1251 0x91
209 CP1252 0x91 CP1253 0x91
210 CP1254 0x91 CP1255 0x91
211 CP1256 0x91 CP1257 0x91
212 EUC-JP 0xA1 0xC6 EUC-KR 0xA1 0xAE
213 EUC-TW 0xA1 0xE4 BIG5 0xA1 0xA5
214 BIG5-HKSCS 0xA1 0xA5 EUC-CN 0xA1 0xAE
215 GBK 0xA1 0xAE Georgian-PS 0x91
218 None of these is still in wide use; using iconv is overkill. */
219 locale_code = locale_charset ();
220 if (STRCASEEQ (locale_code, "UTF-8", 'U','T','F','-','8',0,0,0,0))
221 return msgid[0] == '`' ? "\xe2\x80\x98": "\xe2\x80\x99";
222 if (STRCASEEQ (locale_code, "GB18030", 'G','B','1','8','0','3','0',0,0))
223 return msgid[0] == '`' ? "\xa1\ae": "\xa1\xaf";
225 return (s == clocale_quoting_style ? "\"" : "'");
228 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
229 argument ARG (of size ARGSIZE), using QUOTING_STYLE, FLAGS, and
230 QUOTE_THESE_TOO to control quoting.
231 Terminate the output with a null character, and return the written
232 size of the output, not counting the terminating null.
233 If BUFFERSIZE is too small to store the output string, return the
234 value that would have been returned had BUFFERSIZE been large enough.
235 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
237 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
238 ARGSIZE, O), except it breaks O into its component pieces and is
239 not careful about errno. */
242 quotearg_buffer_restyled (char *buffer, size_t buffersize,
243 char const *arg, size_t argsize,
244 enum quoting_style quoting_style, int flags,
245 unsigned int const *quote_these_too,
246 char const *left_quote,
247 char const *right_quote)
251 char const *quote_string = 0;
252 size_t quote_string_len = 0;
253 bool backslash_escapes = false;
254 bool unibyte_locale = MB_CUR_MAX == 1;
255 bool elide_outer_quotes = (flags & QA_ELIDE_OUTER_QUOTES) != 0;
260 if (len < buffersize) \
266 switch (quoting_style)
268 case c_maybe_quoting_style:
269 quoting_style = c_quoting_style;
270 elide_outer_quotes = true;
272 case c_quoting_style:
273 if (!elide_outer_quotes)
275 backslash_escapes = true;
277 quote_string_len = 1;
280 case escape_quoting_style:
281 backslash_escapes = true;
282 elide_outer_quotes = false;
285 case locale_quoting_style:
286 case clocale_quoting_style:
287 case custom_quoting_style:
289 if (quoting_style != custom_quoting_style)
292 Get translations for open and closing quotation marks.
293 The message catalog should translate "`" to a left
294 quotation mark suitable for the locale, and similarly for
295 "'". For example, a French Unicode local should translate
296 these to U+00AB (LEFT-POINTING DOUBLE ANGLE
297 QUOTATION MARK), and U+00BB (RIGHT-POINTING DOUBLE ANGLE
298 QUOTATION MARK), respectively.
300 If the catalog has no translation, we will try to
301 use Unicode U+2018 (LEFT SINGLE QUOTATION MARK) and
302 Unicode U+2019 (RIGHT SINGLE QUOTATION MARK). If the
303 current locale is not Unicode, locale_quoting_style
304 will quote 'like this', and clocale_quoting_style will
305 quote "like this". You should always include translations
306 for "`" and "'" even if U+2018 and U+2019 are appropriate
309 If you don't know what to put here, please see
310 <http://en.wikipedia.org/wiki/Quotation_marks_in_other_languages>
311 and use glyphs suitable for your language. */
312 left_quote = gettext_quote (N_("`"), quoting_style);
313 right_quote = gettext_quote (N_("'"), quoting_style);
315 if (!elide_outer_quotes)
316 for (quote_string = left_quote; *quote_string; quote_string++)
317 STORE (*quote_string);
318 backslash_escapes = true;
319 quote_string = right_quote;
320 quote_string_len = strlen (quote_string);
324 case shell_quoting_style:
325 quoting_style = shell_always_quoting_style;
326 elide_outer_quotes = true;
328 case shell_always_quoting_style:
329 if (!elide_outer_quotes)
332 quote_string_len = 1;
335 case literal_quoting_style:
336 elide_outer_quotes = false;
343 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
347 bool is_right_quote = false;
349 if (backslash_escapes
351 && (i + quote_string_len
352 <= (argsize == SIZE_MAX && 1 < quote_string_len
353 /* Use strlen only if we must: when argsize is SIZE_MAX,
354 and when the quote string is more than 1 byte long.
355 If we do call strlen, save the result. */
356 ? (argsize = strlen (arg)) : argsize))
357 && memcmp (arg + i, quote_string, quote_string_len) == 0)
359 if (elide_outer_quotes)
360 goto force_outer_quoting_style;
361 is_right_quote = true;
368 if (backslash_escapes)
370 if (elide_outer_quotes)
371 goto force_outer_quoting_style;
373 /* If quote_string were to begin with digits, we'd need to
374 test for the end of the arg as well. However, it's
375 hard to imagine any locale that would use digits in
376 quotes, and set_custom_quoting is documented not to
378 if (i + 1 < argsize && '0' <= arg[i + 1] && arg[i + 1] <= '9')
384 /* We don't have to worry that this last '0' will be
385 backslash-escaped because, again, quote_string should
386 not start with it and because quote_these_too is
387 documented as not accepting it. */
389 else if (flags & QA_ELIDE_NULL_BYTES)
394 switch (quoting_style)
396 case shell_always_quoting_style:
397 if (elide_outer_quotes)
398 goto force_outer_quoting_style;
401 case c_quoting_style:
402 if ((flags & QA_SPLIT_TRIGRAPHS)
403 && i + 2 < argsize && arg[i + 1] == '?')
407 case '(': case ')': case '-': case '/':
408 case '<': case '=': case '>':
409 /* Escape the second '?' in what would otherwise be
411 if (elide_outer_quotes)
412 goto force_outer_quoting_style;
431 case '\a': esc = 'a'; goto c_escape;
432 case '\b': esc = 'b'; goto c_escape;
433 case '\f': esc = 'f'; goto c_escape;
434 case '\n': esc = 'n'; goto c_and_shell_escape;
435 case '\r': esc = 'r'; goto c_and_shell_escape;
436 case '\t': esc = 't'; goto c_and_shell_escape;
437 case '\v': esc = 'v'; goto c_escape;
439 /* No need to escape the escape if we are trying to elide
440 outer quotes and nothing else is problematic. */
441 if (backslash_escapes && elide_outer_quotes && quote_string_len)
445 if (quoting_style == shell_always_quoting_style
446 && elide_outer_quotes)
447 goto force_outer_quoting_style;
450 if (backslash_escapes)
457 case '{': case '}': /* sometimes special if isolated */
458 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
466 case '!': /* special in bash */
467 case '"': case '$': case '&':
468 case '(': case ')': case '*': case ';':
470 case '=': /* sometimes special in 0th or (with "set -k") later args */
472 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
474 /* A shell special character. In theory, '$' and '`' could
475 be the first bytes of multibyte characters, which means
476 we should check them with mbrtowc, but in practice this
477 doesn't happen so it's not worth worrying about. */
478 if (quoting_style == shell_always_quoting_style
479 && elide_outer_quotes)
480 goto force_outer_quoting_style;
484 if (quoting_style == shell_always_quoting_style)
486 if (elide_outer_quotes)
487 goto force_outer_quoting_style;
494 case '%': case '+': case ',': case '-': case '.': case '/':
495 case '0': case '1': case '2': case '3': case '4': case '5':
496 case '6': case '7': case '8': case '9': case ':':
497 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
498 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
499 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
500 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
501 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
502 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
503 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
504 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
505 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
506 /* These characters don't cause problems, no matter what the
507 quoting style is. They cannot start multibyte sequences.
508 A digit or a special letter would cause trouble if it
509 appeared at the beginning of quote_string because we'd then
510 escape by prepending a backslash. However, it's hard to
511 imagine any locale that would use digits or letters as
512 quotes, and set_custom_quoting is documented not to accept
513 them. Also, a digit or a special letter would cause
514 trouble if it appeared in quote_these_too, but that's also
515 documented as not accepting them. */
519 /* If we have a multibyte sequence, copy it until we reach
520 its end, find an error, or come back to the initial shift
521 state. For C-like styles, if the sequence has
522 unprintable characters, escape the whole sequence, since
523 we can't easily escape single characters within it. */
525 /* Length of multibyte sequence found so far. */
533 printable = isprint (c) != 0;
538 memset (&mbstate, 0, sizeof mbstate);
542 if (argsize == SIZE_MAX)
543 argsize = strlen (arg);
548 size_t bytes = mbrtowc (&w, &arg[i + m],
549 argsize - (i + m), &mbstate);
552 else if (bytes == (size_t) -1)
557 else if (bytes == (size_t) -2)
560 while (i + m < argsize && arg[i + m])
566 /* Work around a bug with older shells that "see" a '\'
567 that is really the 2nd byte of a multibyte character.
568 In practice the problem is limited to ASCII
569 chars >= '@' that are shell special chars. */
570 if ('[' == 0x5b && elide_outer_quotes
571 && quoting_style == shell_always_quoting_style)
574 for (j = 1; j < bytes; j++)
575 switch (arg[i + m + j])
577 case '[': case '\\': case '^':
579 goto force_outer_quoting_style;
591 while (! mbsinit (&mbstate));
594 if (1 < m || (backslash_escapes && ! printable))
596 /* Output a multibyte sequence, or an escaped
597 unprintable unibyte character. */
602 if (backslash_escapes && ! printable)
604 if (elide_outer_quotes)
605 goto force_outer_quoting_style;
607 STORE ('0' + (c >> 6));
608 STORE ('0' + ((c >> 3) & 7));
611 else if (is_right_quote)
614 is_right_quote = false;
627 if (! ((backslash_escapes || elide_outer_quotes)
629 && quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))
634 if (elide_outer_quotes)
635 goto force_outer_quoting_style;
642 if (len == 0 && quoting_style == shell_always_quoting_style
643 && elide_outer_quotes)
644 goto force_outer_quoting_style;
646 if (quote_string && !elide_outer_quotes)
647 for (; *quote_string; quote_string++)
648 STORE (*quote_string);
650 if (len < buffersize)
654 force_outer_quoting_style:
655 /* Don't reuse quote_these_too, since the addition of outer quotes
656 sufficiently quotes the specified characters. */
657 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
659 flags & ~QA_ELIDE_OUTER_QUOTES, NULL,
660 left_quote, right_quote);
663 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
664 argument ARG (of size ARGSIZE), using O to control quoting.
665 If O is null, use the default.
666 Terminate the output with a null character, and return the written
667 size of the output, not counting the terminating null.
668 If BUFFERSIZE is too small to store the output string, return the
669 value that would have been returned had BUFFERSIZE been large enough.
670 If ARGSIZE is SIZE_MAX, use the string length of the argument for
673 quotearg_buffer (char *buffer, size_t buffersize,
674 char const *arg, size_t argsize,
675 struct quoting_options const *o)
677 struct quoting_options const *p = o ? o : &default_quoting_options;
679 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
680 p->style, p->flags, p->quote_these_too,
681 p->left_quote, p->right_quote);
686 /* Equivalent to quotearg_alloc (ARG, ARGSIZE, NULL, O). */
688 quotearg_alloc (char const *arg, size_t argsize,
689 struct quoting_options const *o)
691 return quotearg_alloc_mem (arg, argsize, NULL, o);
694 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
695 allocated storage containing the quoted string, and store the
696 resulting size into *SIZE, if non-NULL. The result can contain
697 embedded null bytes only if ARGSIZE is not SIZE_MAX, SIZE is not
698 NULL, and set_quoting_flags has not set the null byte elision
701 quotearg_alloc_mem (char const *arg, size_t argsize, size_t *size,
702 struct quoting_options const *o)
704 struct quoting_options const *p = o ? o : &default_quoting_options;
706 /* Elide embedded null bytes if we can't return a size. */
707 int flags = p->flags | (size ? 0 : QA_ELIDE_NULL_BYTES);
708 size_t bufsize = quotearg_buffer_restyled (0, 0, arg, argsize, p->style,
709 flags, p->quote_these_too,
712 char *buf = xcharalloc (bufsize);
713 quotearg_buffer_restyled (buf, bufsize, arg, argsize, p->style, flags,
715 p->left_quote, p->right_quote);
722 /* A storage slot with size and pointer to a value. */
729 /* Preallocate a slot 0 buffer, so that the caller can always quote
730 one small component of a "memory exhausted" message in slot 0. */
731 static char slot0[256];
732 static unsigned int nslots = 1;
733 static struct slotvec slotvec0 = {sizeof slot0, slot0};
734 static struct slotvec *slotvec = &slotvec0;
739 struct slotvec *sv = slotvec;
741 for (i = 1; i < nslots; i++)
743 if (sv[0].val != slot0)
746 slotvec0.size = sizeof slot0;
747 slotvec0.val = slot0;
757 /* Use storage slot N to return a quoted version of argument ARG.
758 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
759 null-terminated string.
760 OPTIONS specifies the quoting options.
761 The returned value points to static storage that can be
762 reused by the next call to this function with the same value of N.
763 N must be nonnegative. N is deliberately declared with type "int"
764 to allow for future extensions (using negative values). */
766 quotearg_n_options (int n, char const *arg, size_t argsize,
767 struct quoting_options const *options)
772 struct slotvec *sv = slotvec;
779 /* FIXME: technically, the type of n1 should be 'unsigned int',
780 but that evokes an unsuppressible warning from gcc-4.0.1 and
781 older. If gcc ever provides an option to suppress that warning,
782 revert to the original type, so that the test in xalloc_oversized
783 is once again performed only at compile time. */
785 bool preallocated = (sv == &slotvec0);
787 if (xalloc_oversized (n1, sizeof *sv))
790 slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv);
793 memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv);
798 size_t size = sv[n].size;
799 char *val = sv[n].val;
800 /* Elide embedded null bytes since we don't return a size. */
801 int flags = options->flags | QA_ELIDE_NULL_BYTES;
802 size_t qsize = quotearg_buffer_restyled (val, size, arg, argsize,
803 options->style, flags,
804 options->quote_these_too,
806 options->right_quote);
810 sv[n].size = size = qsize + 1;
813 sv[n].val = val = xcharalloc (size);
814 quotearg_buffer_restyled (val, size, arg, argsize, options->style,
815 flags, options->quote_these_too,
817 options->right_quote);
826 quotearg_n (int n, char const *arg)
828 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
832 quotearg_n_mem (int n, char const *arg, size_t argsize)
834 return quotearg_n_options (n, arg, argsize, &default_quoting_options);
838 quotearg (char const *arg)
840 return quotearg_n (0, arg);
844 quotearg_mem (char const *arg, size_t argsize)
846 return quotearg_n_mem (0, arg, argsize);
850 quotearg_n_style (int n, enum quoting_style s, char const *arg)
852 struct quoting_options const o = quoting_options_from_style (s);
853 return quotearg_n_options (n, arg, SIZE_MAX, &o);
857 quotearg_n_style_mem (int n, enum quoting_style s,
858 char const *arg, size_t argsize)
860 struct quoting_options const o = quoting_options_from_style (s);
861 return quotearg_n_options (n, arg, argsize, &o);
865 quotearg_style (enum quoting_style s, char const *arg)
867 return quotearg_n_style (0, s, arg);
871 quotearg_style_mem (enum quoting_style s, char const *arg, size_t argsize)
873 return quotearg_n_style_mem (0, s, arg, argsize);
877 quotearg_char_mem (char const *arg, size_t argsize, char ch)
879 struct quoting_options options;
880 options = default_quoting_options;
881 set_char_quoting (&options, ch, 1);
882 return quotearg_n_options (0, arg, argsize, &options);
886 quotearg_char (char const *arg, char ch)
888 return quotearg_char_mem (arg, SIZE_MAX, ch);
892 quotearg_colon (char const *arg)
894 return quotearg_char (arg, ':');
898 quotearg_colon_mem (char const *arg, size_t argsize)
900 return quotearg_char_mem (arg, argsize, ':');
904 quotearg_n_custom (int n, char const *left_quote,
905 char const *right_quote, char const *arg)
907 return quotearg_n_custom_mem (n, left_quote, right_quote, arg,
912 quotearg_n_custom_mem (int n, char const *left_quote,
913 char const *right_quote,
914 char const *arg, size_t argsize)
916 struct quoting_options o = default_quoting_options;
917 set_custom_quoting (&o, left_quote, right_quote);
918 return quotearg_n_options (n, arg, argsize, &o);
922 quotearg_custom (char const *left_quote, char const *right_quote,
925 return quotearg_n_custom (0, left_quote, right_quote, arg);
929 quotearg_custom_mem (char const *left_quote, char const *right_quote,
930 char const *arg, size_t argsize)
932 return quotearg_n_custom_mem (0, left_quote, right_quote, arg,
937 /* The quoting option used by the functions of quote.h. */
938 struct quoting_options quote_quoting_options =
940 locale_quoting_style,
947 quote_n_mem (int n, char const *arg, size_t argsize)
949 return quotearg_n_options (n, arg, argsize, "e_quoting_options);
953 quote_mem (char const *arg, size_t argsize)
955 return quote_n_mem (0, arg, argsize);
959 quote_n (int n, char const *arg)
961 return quote_n_mem (n, arg, SIZE_MAX);
965 quote (char const *arg)
967 return quote_n (0, arg);