From 62e1d5259df82155ae52201678093381a35d898e Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Thu, 8 Jul 2004 14:01:49 +0000 Subject: [PATCH] printf cleanup, to avoid undefined behavior, to add support for formats that Bash supports, and to support wide integers like Bash does. (UNSPECIFIED): Remove. All uses now replaced by booleans, so that we don't reserve any values for precision or width (like Bash). (STRTOX): Use prototype, not K&R-style definition. (vstrtoimax): Renamed from xstrtol (to avoid confusion with xstrtol in ../lib), with type change to intmax_t. All uses changed. (vstrtoumax): Renamed from xstrtoul, with type change to uintmax_t. All uses changed. (vstrtod): Renamed from xstrtod. All uses changed. (print_direc): Use boolean arg instead of special value to indicate a missing precision or width. LENGTH no longer includes length modifiers or conversion character. New arg CONVERSION now specifies conversion character. Use intmax_t-width formatting for integers (like Bash). Add support for C99 %a, %A, %F (like Bash). Add support for field width with %c (POSIX requires this). Add a FIXME for lack of support for field width and precision for %b. Add support for '\'', '0' flags. Check for invalid combinations of flags, field width, precision, and conversion, to prevent use of undefined behavior. Allow multiple length modifiers, for formats like "%lld" (like Bash). Add support for C99 'j', 't', 'z' length modifiers (like Bash). In error message, output entire invalid conversion specification, instead of merely outputting % followed by the conversion char. --- src/printf.c | 268 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 172 insertions(+), 96 deletions(-) diff --git a/src/printf.c b/src/printf.c index bbd80edc5..7d0f68b10 100644 --- a/src/printf.c +++ b/src/printf.c @@ -67,9 +67,6 @@ (c) >= 'A' && (c) <= 'F' ? (c) - 'A' + 10 : (c) - '0') #define octtobin(c) ((c) - '0') -/* A value for field_width or precision that indicates it was not specified. */ -#define UNSPECIFIED INT_MIN - /* The value to return to the calling program. */ static int exit_status; @@ -162,8 +159,7 @@ verify (const char *s, const char *end) #define STRTOX(TYPE, FUNC_NAME, LIB_FUNC_EXPR) \ static TYPE \ -FUNC_NAME (s) \ - const char *s; \ +FUNC_NAME (char const *s) \ { \ char *end; \ TYPE val; \ @@ -187,9 +183,9 @@ FUNC_NAME (s) \ return val; \ } \ -STRTOX (unsigned long int, xstrtoul, (strtoul (s, &end, 0))) -STRTOX (long int, xstrtol, (strtol (s, &end, 0))) -STRTOX (double, xstrtod, (c_strtod (s, &end))) +STRTOX (intmax_t, vstrtoimax, (strtoimax (s, &end, 0))) +STRTOX (uintmax_t, vstrtoumax, (strtoumax (s, &end, 0))) +STRTOX (double, vstrtod, (c_strtod (s, &end))) /* Output a single-character \ escape. */ @@ -317,97 +313,138 @@ print_esc_string (const char *str) putchar (*str); } -/* Output a % directive. START is the start of the directive, - LENGTH is its length, and ARGUMENT is its argument. - If FIELD_WIDTH or PRECISION is UNSPECIFIED, they are args for - '*' values in those fields. */ +/* Evaluate a printf conversion specification. START is the start of + the directive, LENGTH is its length, and CONVERSION specifies the + type of conversion. LENGTH does not include any length modifier or + the conversion specifier itself. FIELD_WIDTH and PRECISION are the + field width and precision for '*' values, if HAVE_FIELD_WIDTH and + HAVE_PRECISION are true, respectively. ARGUMENT is the argument to + be formatted. */ static void -print_direc (const char *start, size_t length, int field_width, - int precision, const char *argument) +print_direc (const char *start, size_t length, char conversion, + bool have_field_width, int field_width, + bool have_precision, int precision, + char const *argument) { char *p; /* Null-terminated copy of % directive. */ - p = xmalloc ((unsigned) (length + 1)); - strncpy (p, start, length); - p[length] = 0; - - switch (p[length - 1]) + /* Create a null-terminated copy of the % directive, with an + intmax_t-wide length modifier substituted for any existing + integer length modifier. */ + { + char *q; + size_t length_modifier_len; + + switch (conversion) + { + case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': + length_modifier_len = sizeof PRIdMAX - 2; + break; + + default: + length_modifier_len = 0; + break; + } + + p = xmalloc (length + length_modifier_len + 2); + q = mempcpy (p, start, length); + q = mempcpy (q, PRIdMAX, length_modifier_len); + *q++ = conversion; + *q = '\0'; + } + + switch (conversion) { case 'd': case 'i': - if (field_width == UNSPECIFIED) - { - if (precision == UNSPECIFIED) - printf (p, xstrtol (argument)); - else - printf (p, precision, xstrtol (argument)); - } - else - { - if (precision == UNSPECIFIED) - printf (p, field_width, xstrtol (argument)); - else - printf (p, field_width, precision, xstrtol (argument)); - } + { + intmax_t arg = vstrtoimax (argument); + if (!have_field_width) + { + if (!have_precision) + printf (p, arg); + else + printf (p, precision, arg); + } + else + { + if (!have_precision) + printf (p, field_width, arg); + else + printf (p, field_width, precision, arg); + } + } break; case 'o': case 'u': case 'x': case 'X': - if (field_width == UNSPECIFIED) - { - if (precision == UNSPECIFIED) - printf (p, xstrtoul (argument)); - else - printf (p, precision, xstrtoul (argument)); - } - else - { - if (precision == UNSPECIFIED) - printf (p, field_width, xstrtoul (argument)); - else - printf (p, field_width, precision, xstrtoul (argument)); - } + { + uintmax_t arg = vstrtoumax (argument); + if (!have_field_width) + { + if (!have_precision) + printf (p, arg); + else + printf (p, precision, arg); + } + else + { + if (!have_precision) + printf (p, field_width, arg); + else + printf (p, field_width, precision, arg); + } + } break; - case 'f': + case 'a': + case 'A': case 'e': case 'E': + case 'f': + case 'F': case 'g': case 'G': - if (field_width == UNSPECIFIED) - { - if (precision == UNSPECIFIED) - printf (p, xstrtod (argument)); - else - printf (p, precision, xstrtod (argument)); - } - else - { - if (precision == UNSPECIFIED) - printf (p, field_width, xstrtod (argument)); - else - printf (p, field_width, precision, xstrtod (argument)); - } + { + double arg = vstrtod (argument); + if (!have_field_width) + { + if (!have_precision) + printf (p, arg); + else + printf (p, precision, arg); + } + else + { + if (!have_precision) + printf (p, field_width, arg); + else + printf (p, field_width, precision, arg); + } + } break; case 'c': - printf (p, *argument); + if (!have_field_width) + printf (p, *argument); + else + printf (p, field_width, *argument); break; case 's': - if (field_width == UNSPECIFIED) + if (!have_field_width) { - if (precision == UNSPECIFIED) + if (!have_precision) printf (p, argument); else printf (p, precision, argument); } else { - if (precision == UNSPECIFIED) + if (!have_precision) printf (p, field_width, argument); else printf (p, field_width, precision, argument); @@ -429,8 +466,11 @@ print_formatted (const char *format, int argc, char **argv) const char *f; /* Pointer into `format'. */ const char *direc_start; /* Start of % directive. */ size_t direc_length; /* Length of % directive. */ - int field_width; /* Arg to first '*', or UNSPECIFIED if none. */ - int precision; /* Arg to second '*', or UNSPECIFIED if none. */ + bool have_field_width; /* True if FIELD_WIDTH is valid. */ + int field_width = 0; /* Arg to first '*'. */ + bool have_precision; /* True if PRECISION is valid. */ + int precision = 0; /* Arg to second '*'. */ + char ok[UCHAR_MAX + 1]; /* ok['x'] is true if %x is allowed. */ for (f = format; *f; ++f) { @@ -439,7 +479,7 @@ print_formatted (const char *format, int argc, char **argv) case '%': direc_start = f++; direc_length = 1; - field_width = precision = UNSPECIFIED; + have_field_width = have_precision = false; if (*f == '%') { putchar ('%'); @@ -447,6 +487,8 @@ print_formatted (const char *format, int argc, char **argv) } if (*f == 'b') { + /* FIXME: Field width and precision are not supported + for %b, even though POSIX requires it. */ if (argc > 0) { print_esc_string (*argv); @@ -455,19 +497,42 @@ print_formatted (const char *format, int argc, char **argv) } break; } - while (*f == ' ' || *f == '#' || *f == '+' || *f == '-') - { - ++f; - ++direc_length; - } + + memset (ok, 0, sizeof ok); + ok['a'] = ok['A'] = ok['c'] = ok['d'] = ok['e'] = ok['E'] = + ok['f'] = ok['F'] = ok['g'] = ok['G'] = ok['i'] = ok['o'] = + ok['s'] = ok['u'] = ok['x'] = ok['X'] = 1; + + for (;; f++, direc_length++) + switch (*f) + { + case '\'': + ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] = + ok['o'] = ok['s'] = ok['x'] = ok['X'] = 0; + break; + case '-': case '+': case ' ': + break; + case '#': + ok['c'] = ok['d'] = ok['i'] = ok['s'] = ok['u'] = 0; + break; + case '0': + ok['c'] = ok['s'] = 0; + break; + default: + goto no_more_flag_characters; + } + no_more_flag_characters:; + if (*f == '*') { ++f; ++direc_length; if (argc > 0) { - field_width = xstrtoul (*argv); - if (field_width == UNSPECIFIED) + intmax_t width = vstrtoimax (*argv); + if (INT_MIN <= width && width <= INT_MAX) + field_width = width; + else error (EXIT_FAILURE, 0, _("invalid field width: %s"), *argv); ++argv; @@ -475,6 +540,7 @@ print_formatted (const char *format, int argc, char **argv) } else field_width = 0; + have_field_width = true; } else while (ISDIGIT (*f)) @@ -486,21 +552,32 @@ print_formatted (const char *format, int argc, char **argv) { ++f; ++direc_length; + ok['c'] = 0; if (*f == '*') { ++f; ++direc_length; if (argc > 0) { - precision = xstrtoul (*argv); - if (precision == UNSPECIFIED) + intmax_t prec = vstrtoimax (*argv); + if (prec < 0) + { + /* A negative precision is taken as if the + precision were omitted, so -1 is safe + here even if prec < INT_MIN. */ + precision = -1; + } + else if (INT_MAX < prec) error (EXIT_FAILURE, 0, _("invalid precision: %s"), *argv); + else + precision = prec; ++argv; --argc; } else precision = 0; + have_precision = true; } else while (ISDIGIT (*f)) @@ -509,24 +586,23 @@ print_formatted (const char *format, int argc, char **argv) ++direc_length; } } - if (*f == 'l' || *f == 'L' || *f == 'h') - { - ++f; - ++direc_length; - } - if (! (*f && strchr ("diouxXfeEgGcs", *f))) - error (EXIT_FAILURE, 0, _("%%%c: invalid directive"), *f); - ++direc_length; - if (argc > 0) - { - print_direc (direc_start, direc_length, field_width, - precision, *argv); - ++argv; - --argc; - } - else - print_direc (direc_start, direc_length, field_width, - precision, ""); + + while (*f == 'l' || *f == 'L' || *f == 'h' + || *f == 'j' || *f == 't' || *f == 'z') + ++f; + + { + unsigned char conversion = *f; + if (! ok[conversion]) + error (EXIT_FAILURE, 0, + _("%.*s: invalid conversion specification"), + (int) (f + 1 - direc_start), direc_start); + } + + print_direc (direc_start, direc_length, *f, + have_field_width, field_width, + have_precision, precision, + (argc <= 0 ? "" : (argc--, *argv++))); break; case '\\': -- 2.34.1