//===----------------------------------------------------------------------===//
//
// Scanf implementation for use in *Sanitizer interceptors.
+// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
+// with a few common GNU extensions.
//
//===----------------------------------------------------------------------===//
#include <stdarg.h>
#ifdef _WIN32
#define va_copy(dst, src) ((dst) = (src))
-#endif // _WIN32
+#endif // _WIN32
-struct ScanfSpec {
- char c;
- unsigned size;
+struct ScanfDirective {
+ int argIdx; // argument index, or -1 of not specified ("%n$")
+ bool suppressed; // suppress assignment ("*")
+ int fieldWidth;
+ bool allocate; // allocate space ("m")
+ char lengthModifier[2];
+ char convSpecifier;
};
-// One-letter specs.
-static const ScanfSpec scanf_specs[] = {
- {'s', 1}, // FIXME: This is incorrect, we should check the actual number
- // of bytes written to the string.
- {'c', sizeof(char)},
- {'p', sizeof(void *)},
- {'e', sizeof(float)},
- {'E', sizeof(float)},
- {'a', sizeof(float)},
- {'f', sizeof(float)},
- {'g', sizeof(float)},
- {'d', sizeof(int)},
- {'i', sizeof(int)},
- {'o', sizeof(int)},
- {'u', sizeof(int)},
- {'x', sizeof(int)},
- {'X', sizeof(int)},
- {'n', sizeof(int)},
- {'t', sizeof(PTRDIFF_T)},
- {'z', sizeof(SIZE_T)},
- {'j', sizeof(INTMAX_T)},
- {'h', sizeof(short)}
-};
-
-static const unsigned scanf_specs_cnt =
- sizeof(scanf_specs) / sizeof(scanf_specs[0]);
-
-// %ll?, %L?, %q? specs
-static const ScanfSpec scanf_llspecs[] = {
- {'e', sizeof(long double)},
- {'f', sizeof(long double)},
- {'g', sizeof(long double)},
- {'d', sizeof(long long)},
- {'i', sizeof(long long)},
- {'o', sizeof(long long)},
- {'u', sizeof(long long)},
- {'x', sizeof(long long)}
-};
-
-static const unsigned scanf_llspecs_cnt =
- sizeof(scanf_llspecs) / sizeof(scanf_llspecs[0]);
-
-// %l? specs
-static const ScanfSpec scanf_lspecs[] = {
- {'e', sizeof(double)},
- {'f', sizeof(double)},
- {'g', sizeof(double)},
- {'d', sizeof(long)},
- {'i', sizeof(long)},
- {'o', sizeof(long)},
- {'u', sizeof(long)},
- {'x', sizeof(long)},
- {'X', sizeof(long)},
-};
-
-static const unsigned scanf_lspecs_cnt =
- sizeof(scanf_lspecs) / sizeof(scanf_lspecs[0]);
-
-static unsigned match_spec(const struct ScanfSpec *spec, unsigned n, char c) {
- for (unsigned i = 0; i < n; ++i)
- if (spec[i].c == c)
- return spec[i].size;
- return 0;
+static const char *parse_number(const char *p, int *out) {
+ *out = internal_atoll(p);
+ while (*p >= '0' && *p <= '9')
+ ++p;
+ return p;
}
-static void scanf_common(void *ctx, const char *format, va_list ap_const) {
- va_list aq;
- va_copy(aq, ap_const);
+static bool char_is_one_of(char c, const char *s) {
+ return !!internal_strchr(s, c);
+}
- const char *p = format;
- unsigned size;
+// Parse scanf format string. If a valid directive in encountered, it is
+// returned in dir. This function returns the pointer to the first
+// unprocessed character, or 0 in case of error.
+// In case of the end-of-string, a pointer to the closing \0 is returned.
+static const char *scanf_parse_next(const char *p, ScanfDirective *dir) {
+ internal_memset(dir, 0, sizeof(*dir));
+ dir->argIdx = -1;
while (*p) {
if (*p != '%') {
continue;
}
++p;
- if (*p == '*' || *p == '%' || *p == '\0') {
- // FIXME: Bailing out for (p == "*") is wrong, we should parse the
- // directive to the end.
- if (*p != '\0')
- ++p;
+ // %%
+ if (*p == '%') {
+ ++p;
continue;
}
-
- unsigned field_width = 0;
- if (*p >= '0' && *p <= '9') {
- field_width = internal_atoll(p);
- while (*p >= '0' && *p <= '9')
- p++;
+ if (*p == '\0') {
+ return 0;
}
- if (field_width > 0) {
- // +1 for the \0 at the end.
- if (*p == 's')
- field_width++;
- if (*p == 's' || *p == 'c') {
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, va_arg(aq, void*), field_width);
- ++p;
- continue;
+ // %n$
+ if (*p >= '0' && *p <= '9') {
+ int number;
+ const char *q = parse_number(p, &number);
+ if (*q == '$') {
+ dir->argIdx = number;
+ p = q + 1;
}
+ // Otherwise, do not change p. This will be re-parsed later as the field
+ // width.
}
-
- if (*p == '[') {
- // Search for the closing bracket. It is ignored if it goes right after
- // the opening bracket or after ^.
- p++;
- if (*p == ']') {
- p++;
- } else if (*p == '^' && *(p+1) == ']') {
- p += 2;
- }
- while (*p != ']')
- p++;
- // +1 for the \0 at the end.
- field_width++;
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, va_arg(aq, void*), field_width);
- continue;
+ // *
+ if (*p == '*') {
+ dir->suppressed = true;
+ ++p;
}
-
- if (*p == 'L' || *p == 'q') {
+ // Field width.
+ if (*p >= '0' && *p <= '9') {
+ p = parse_number(p, &dir->fieldWidth);
+ if (dir->fieldWidth <= 0)
+ return 0;
+ }
+ // m
+ if (*p == 'm') {
+ dir->allocate = true;
++p;
- size = match_spec(scanf_llspecs, scanf_llspecs_cnt, *p);
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, va_arg(aq, void *), size);
- continue;
}
-
- if (*p == 'l') {
+ // Length modifier.
+ if (char_is_one_of(*p, "jztLq")) {
+ dir->lengthModifier[0] = *p;
+ ++p;
+ } else if (*p == 'h') {
+ dir->lengthModifier[0] = 'h';
+ ++p;
+ if (*p == 'h') {
+ dir->lengthModifier[1] = 'h';
+ ++p;
+ }
+ } else if (*p == 'l') {
+ dir->lengthModifier[0] = 'l';
++p;
if (*p == 'l') {
+ dir->lengthModifier[1] = 'l';
++p;
- size = match_spec(scanf_llspecs, scanf_llspecs_cnt, *p);
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, va_arg(aq, void *), size);
- continue;
- } else {
- size = match_spec(scanf_lspecs, scanf_lspecs_cnt, *p);
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, va_arg(aq, void *), size);
- continue;
}
}
+ // Conversion specifier.
+ dir->convSpecifier = *p++;
+ // Consume %[...] expression.
+ if (dir->convSpecifier == '[') {
+ if (*p == '^')
+ ++p;
+ if (*p == ']')
+ ++p;
+ while (*p && *p != ']')
+ ++p;
+ if (*p == 0)
+ return 0; // unexpected end of string
+ // Consume the closing ']'.
+ ++p;
+ }
+ break;
+ }
+ return p;
+}
+
+// Returns true if the character is an integer conversion specifier.
+static bool scanf_is_integer_conv(char c) {
+ return char_is_one_of(c, "diouxXn");
+}
+
+// Returns true if the character is an floating point conversion specifier.
+static bool scanf_is_float_conv(char c) {
+ return char_is_one_of(c, "AeEfFgG");
+ // NOTE: c == 'a' is ambiguous between POSIX and GNU and, therefore,
+ // unsupported.
+}
+
+// Returns string output character size for string-like conversions,
+// or 0 if the conversion is invalid.
+static int scanf_get_char_size(ScanfDirective *dir) {
+ if (char_is_one_of(dir->convSpecifier, "CS")) {
+ // wchar_t
+ return 0;
+ }
+
+ if (char_is_one_of(dir->convSpecifier, "cs[")) {
+ if (dir->lengthModifier[0] == 'l')
+ // wchar_t
+ return 0;
+ else if (dir->lengthModifier[0] == 0)
+ return sizeof(char);
+ else
+ return 0;
+ }
+
+ return 0;
+}
+
+enum ScanfStoreSize {
+ // Store size not known in advance; can be calculated as strlen() of the
+ // destination buffer.
+ SSS_STRLEN = -1,
+ // Invalid conversion specifier.
+ SSS_INVALID = 0
+};
+
+// Returns the store size of a scanf directive (if >0), or a value of
+// ScanfStoreSize.
+static int scanf_get_store_size(ScanfDirective *dir) {
+ if (scanf_is_integer_conv(dir->convSpecifier)) {
+ switch (dir->lengthModifier[0]) {
+ case 'h':
+ return dir->lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
+ case 'l':
+ return dir->lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
+ case 'L':
+ return sizeof(long long);
+ case 'j':
+ return sizeof(INTMAX_T);
+ case 'z':
+ return sizeof(SIZE_T);
+ case 't':
+ return sizeof(PTRDIFF_T);
+ case 0:
+ return sizeof(int);
+ default:
+ return SSS_INVALID;
+ }
+ }
- if (*p == 'h' && *(p + 1) == 'h') {
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, va_arg(aq, void *), sizeof(char));
- p += 2;
- continue;
+ if (scanf_is_float_conv(dir->convSpecifier)) {
+ switch (dir->lengthModifier[0]) {
+ case 'L':
+ case 'q':
+ return sizeof(long double);
+ case 'l':
+ return dir->lengthModifier[1] == 'l' ? sizeof(long double)
+ : sizeof(double);
+ case 0:
+ return sizeof(float);
+ default:
+ return SSS_INVALID;
}
+ }
- size = match_spec(scanf_specs, scanf_specs_cnt, *p);
- if (size) {
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, va_arg(aq, void *), size);
- ++p;
+ if (char_is_one_of(dir->convSpecifier, "sS[")) {
+ unsigned charSize = scanf_get_char_size(dir);
+ if (charSize == 0)
+ return SSS_INVALID;
+ if (dir->fieldWidth == 0)
+ return SSS_STRLEN;
+ return (dir->fieldWidth + 1) * charSize;
+ }
+
+ if (char_is_one_of(dir->convSpecifier, "cC")) {
+ unsigned charSize = scanf_get_char_size(dir);
+ if (charSize == 0)
+ return SSS_INVALID;
+ if (dir->fieldWidth == 0)
+ return charSize;
+ return dir->fieldWidth * charSize;
+ }
+
+ if (dir->convSpecifier == 'p') {
+ if (dir->lengthModifier[1] != 0)
+ return SSS_INVALID;
+ return sizeof(void *);
+ }
+
+ return SSS_INVALID;
+}
+
+// Common part of *scanf interceptors.
+// Process format string and va_list, and report all store ranges.
+static void scanf_common(void *ctx, const char *format, va_list ap_const) {
+ va_list aq;
+ va_copy(aq, ap_const);
+
+ const char *p = format;
+
+ while (p) {
+ ScanfDirective dir;
+ p = scanf_parse_next(p, &dir);
+ if (!p)
+ break;
+ if (dir.convSpecifier == 0) {
+ // This can only happen at the end of the format string.
+ CHECK_EQ(*p, 0);
+ break;
+ }
+ // Here the directive is valid. Do what it says.
+ if (dir.argIdx != -1) {
+ // Unsupported.
+ break;
+ }
+ if (dir.suppressed)
+ continue;
+ if (dir.allocate) {
+ // Unsupported;
continue;
}
+
+ int size = scanf_get_store_size(&dir);
+ if (size == SSS_INVALID)
+ break;
+ void *p = va_arg(aq, void *);
+ if (size == SSS_STRLEN) {
+ size = internal_strlen((const char *)p) + 1;
+ }
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, size);
}
- va_end(aq);
}
va_end(ap);
}
+static const char scanf_buf[] = "Test string.";
+static size_t scanf_buf_size = sizeof(scanf_buf);
+
static void testScanf(const char *format, unsigned n, ...) {
std::vector<unsigned> scanf_sizes;
// 16 args should be enough.
testScanf2((void *)&scanf_sizes, format,
- (void*)0, (void*)0, (void*)0, (void*)0,
- (void*)0, (void*)0, (void*)0, (void*)0,
- (void*)0, (void*)0, (void*)0, (void*)0,
- (void*)0, (void*)0, (void*)0, (void*)0);
+ scanf_buf, scanf_buf, scanf_buf, scanf_buf,
+ scanf_buf, scanf_buf, scanf_buf, scanf_buf,
+ scanf_buf, scanf_buf, scanf_buf, scanf_buf,
+ scanf_buf, scanf_buf, scanf_buf, scanf_buf);
ASSERT_EQ(n, scanf_sizes.size()) <<
"Unexpected number of format arguments: '" << format << "'";
va_list ap;
testScanf("%*d", 0);
testScanf("%4d%8f%c", 3, I, F, C);
- testScanf("%s%d", 2, 1, I);
- testScanf("%[abc]", 1, 1);
+ testScanf("%s%d", 2, scanf_buf_size, I);
+ testScanf("%[abc]", 1, scanf_buf_size);
testScanf("%4[bcdef]", 1, 5);
- testScanf("%[]]", 1, 1);
+ testScanf("%[]]", 1, scanf_buf_size);
testScanf("%8[^]%d0-9-]%c", 2, 9, C);
testScanf("%*[^:]%n:%d:%1[ ]%n", 4, I, I, 2, I);
+
+ testScanf("%*d%u", 1, I);
+
+ // Unsupported stuff.
+ testScanf("%a[", 0);
+ testScanf("%as", 0);
+ testScanf("%aS", 0);
+ testScanf("%a13S", 0);
+ testScanf("%alS", 0);
+
+ testScanf("%5$d", 0);
+ testScanf("%md", 0);
+ testScanf("%m10s", 0);
}