From 6568b173db1c98f39a53eadd9b09e0a0e5c11920 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Wed, 10 Nov 2010 20:34:52 -0800 Subject: [PATCH] csplit: do not rely on undefined behavior in printf formats * doc/coreutils.texi (csplit invocation): Say that %d and %i are aliases for %u. * src/csplit.c (FLAG_THOUSANDS, FLAG_ALTERNATIVE): New constants. (get_format_flags): Now take char const * and int * and return size_t. It now stores info about the flags instead of merely scanning them. Also, it handles '0' correctly. Drop support for the undocumented '+' and ' ' flags since the value is unsigned. Add support for the (undocumented) "'" flag. All uses changed. (get_format_width, get_format_prec): Remove. (check_format_conv_type): Renamed from get_format_conv_type, with a different signature. It now converts the format to one that is compatible with unsigned int, and checks flags. All uses changed. (max_out): Have snprintf compute the number of bytes needed rather than attempting to do it ourselves (which doesn't work portably with outlandish formats such as %4294967296d). (check_format_conv_type, main): Check for overflow in size calculations. Don't assume size_t fits in unsigned int. * tests/misc/csplit: Check for proper handling of flags, with %0#6.3x. Coreutils 8.6 mishandles this somewhat-weird example. --- doc/coreutils.texi | 3 +- src/csplit.c | 141 ++++++++++++++++++++++++----------------------------- tests/misc/csplit | 15 ++++++ 3 files changed, 81 insertions(+), 78 deletions(-) diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 8dfb069..ce56b0e 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -3082,7 +3082,8 @@ specified, the suffix string must include exactly one @code{printf(3)}-style conversion specification, possibly including format specification flags, a field width, a precision specifications, or all of these kinds of modifiers. The format letter must convert a -binary integer argument to readable form; thus, only @samp{d}, @samp{i}, +binary unsigned integer argument to readable form. The format letters +@samp{d} and @samp{i} are aliases for @samp{u}, and the @samp{u}, @samp{o}, @samp{x}, and @samp{X} conversions are allowed. The entire @var{suffix} is given (with the current output file number) to @code{sprintf(3)} to form the file name suffixes for each of the diff --git a/src/csplit.c b/src/csplit.c index 948a795..531e492 100644 --- a/src/csplit.c +++ b/src/csplit.c @@ -1181,81 +1181,64 @@ parse_patterns (int argc, int start, char **argv) } } -static unsigned int -get_format_flags (char **format_ptr) + + +/* Names for the printf format flags ' and #. These can be ORed together. */ +enum { FLAG_THOUSANDS = 1, FLAG_ALTERNATIVE = 2 }; + +/* Scan the printf format flags in FORMAT, storing info about the + flags into *FLAGS_PTR. Return the number of flags found. */ +static size_t +get_format_flags (char const *format, int *flags_ptr) { - unsigned int count = 0; + int flags = 0; - for (; **format_ptr; (*format_ptr)++) + for (size_t count = 0; ; count++) { - switch (**format_ptr) + switch (format[count]) { case '-': + case '0': break; - case '+': - case ' ': - count |= 1; + case '\'': + flags |= FLAG_THOUSANDS; break; case '#': - count |= 2; /* Allow for 0x prefix preceding an `x' conversion. */ + flags |= FLAG_ALTERNATIVE; break; default: + *flags_ptr = flags; return count; } } - return count; -} - -static size_t -get_format_width (char **format_ptr) -{ - unsigned long int val = 0; - - if (ISDIGIT (**format_ptr) - && (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK - || SIZE_MAX < val)) - error (EXIT_FAILURE, 0, _("invalid format width")); - - /* Allow for enough octal digits to represent the value of UINT_MAX, - even if the field width is less than that. */ - return MAX (val, (sizeof (unsigned int) * CHAR_BIT + 2) / 3); -} - -static size_t -get_format_prec (char **format_ptr) -{ - if (**format_ptr != '.') - return 0; - (*format_ptr)++; - - if (! ISDIGIT (**format_ptr)) - return 0; - else - { - unsigned long int val; - if (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK - || SIZE_MAX < val) - error (EXIT_FAILURE, 0, _("invalid format precision")); - return val; - } } +/* Check that the printf format conversion specifier *FORMAT is valid + and compatible with FLAGS. Change it to 'u' if it is 'd' or 'i', + since the format will be used with an unsigned value. */ static void -get_format_conv_type (char **format_ptr) +check_format_conv_type (char *format, int flags) { - unsigned char ch = *(*format_ptr)++; + unsigned char ch = *format; + int compatible_flags = FLAG_THOUSANDS; switch (ch) { case 'd': case 'i': - case 'o': + *format = 'u'; + break; + case 'u': + break; + + case 'o': case 'x': case 'X': + compatible_flags = FLAG_ALTERNATIVE; break; case 0: @@ -1270,45 +1253,46 @@ get_format_conv_type (char **format_ptr) error (EXIT_FAILURE, 0, _("invalid conversion specifier in suffix: \\%.3o"), ch); } + + if (flags & ~ compatible_flags) + error (EXIT_FAILURE, 0, + _("invalid flags in conversion specification: %%%c%c"), + (flags & ~ compatible_flags & FLAG_ALTERNATIVE ? '#' : '\''), ch); } +/* Return the maximum number of bytes that can be generated by + applying FORMAT to an unsigned int value. If the format is + invalid, diagnose the problem and exit. */ static size_t max_out (char *format) { - size_t out_count = 0; bool percent = false; - while (*format) - { - if (*format++ != '%') - out_count++; - else if (*format == '%') - { - format++; - out_count++; - } - else - { - if (percent) - error (EXIT_FAILURE, 0, - _("too many %% conversion specifications in suffix")); - percent = true; - out_count += get_format_flags (&format); - { - size_t width = get_format_width (&format); - size_t prec = get_format_prec (&format); - - out_count += MAX (width, prec); - } - get_format_conv_type (&format); - } - } + for (char *f = format; *f; f++) + if (*f == '%' && *++f != '%') + { + if (percent) + error (EXIT_FAILURE, 0, + _("too many %% conversion specifications in suffix")); + percent = true; + unsigned int flags; + f += get_format_flags (f, &flags); + while (ISDIGIT (*f)) + f++; + if (*f == '.') + while (ISDIGIT (*++f)) + continue; + check_format_conv_type (f, flags); + } if (! percent) error (EXIT_FAILURE, 0, _("missing %% conversion specification in suffix")); - return out_count; + int maxlen = snprintf (NULL, 0, format, UINT_MAX); + if (! (0 <= maxlen && maxlen <= SIZE_MAX)) + xalloc_die (); + return maxlen; } int @@ -1349,7 +1333,7 @@ main (int argc, char **argv) case 'n': if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK - || val > INT_MAX) + || MIN (INT_MAX, SIZE_MAX) < val) error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg); digits = val; break; @@ -1380,11 +1364,14 @@ main (int argc, char **argv) usage (EXIT_FAILURE); } - unsigned int max_digit_string_len + size_t prefix_len = strlen (prefix); + size_t max_digit_string_len = (suffix ? max_out (suffix) : MAX (INT_STRLEN_BOUND (unsigned int), digits)); - filename_space = xmalloc (strlen (prefix) + max_digit_string_len + 1); + if (SIZE_MAX - 1 - prefix_len < max_digit_string_len) + xalloc_die (); + filename_space = xmalloc (prefix_len + max_digit_string_len + 1); set_input_file (argv[optind++]); diff --git a/tests/misc/csplit b/tests/misc/csplit index bef7224..f365da3 100755 --- a/tests/misc/csplit +++ b/tests/misc/csplit @@ -67,6 +67,21 @@ EOF compare err experr || fail=1 rm -f in out exp err experr +# `echo | csplit -b '%0#6.3x' - 1' incorrectly warned about the format +# up through coreutils 8.6. +echo > in +csplit -b '%0#6.3x' in 1 > out 2> err || fail=1 +cat < exp +0 +1 +EOF +compare out exp || fail=1 +touch experr +compare err experr || fail=1 +compare 'xx 000' experr || fail=1 +compare 'xx 0x001' in || fail=1 +rm -f in out exp err experr xx* + # make sure `csplit FILE 0' fails. echo > in csplit in 0 > out 2> err && fail=1 -- 2.7.4