From 28e312cbf098c05af9a1865805274cf7ab470dfd Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Thu, 3 Nov 2022 15:21:30 -0700 Subject: [PATCH] [libc][obvious] fix printf failing to stop on %\0 Previously, the printf parser would treat "%\0" as a conversion with the name "\0", and advance past the null byte causing a buffer overflow. This patch corrects that in both printf and scanf. Reviewed By: sivachandra Differential Revision: https://reviews.llvm.org/D137367 --- libc/src/stdio/printf_core/parser.cpp | 11 +++++++++-- libc/src/stdio/scanf_core/parser.cpp | 9 ++++++++- libc/test/src/stdio/printf_core/parser_test.cpp | 13 +++++++++++++ libc/test/src/stdio/scanf_core/parser_test.cpp | 13 +++++++++++++ 4 files changed, 43 insertions(+), 3 deletions(-) diff --git a/libc/src/stdio/printf_core/parser.cpp b/libc/src/stdio/printf_core/parser.cpp index 38d2e1e..6a27c1c 100644 --- a/libc/src/stdio/printf_core/parser.cpp +++ b/libc/src/stdio/printf_core/parser.cpp @@ -151,7 +151,11 @@ FormatSection Parser::get_next_section() { section.has_conv = false; break; } - ++cur_pos; + // If the end of the format section is on the '\0'. This means we need to + // not advance the cur_pos. + if (str[cur_pos] != '\0') + ++cur_pos; + } else { // raw section section.has_conv = false; @@ -372,7 +376,10 @@ Parser::TypeDesc Parser::get_type_desc(size_t index) { if (conv_index == index) return conv_size; } - ++local_pos; + // If the end of the format section is on the '\0'. This means we need to + // not advance the local_pos. + if (str[local_pos] != '\0') + ++local_pos; } // If there is no size for the requested index, then just guess that it's an diff --git a/libc/src/stdio/scanf_core/parser.cpp b/libc/src/stdio/scanf_core/parser.cpp index 31dd118..76e658e 100644 --- a/libc/src/stdio/scanf_core/parser.cpp +++ b/libc/src/stdio/scanf_core/parser.cpp @@ -74,7 +74,14 @@ FormatSection Parser::get_next_section() { section.output_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index); } - ++cur_pos; + // If the end of the format section is on the '\0'. This means we need to + // not advance the cur_pos and we should not count this has having a + // conversion. + if (str[cur_pos] != '\0') { + ++cur_pos; + } else { + section.has_conv = false; + } // If the format is a bracketed one, then we need to parse out the insides // of the brackets. diff --git a/libc/test/src/stdio/printf_core/parser_test.cpp b/libc/test/src/stdio/printf_core/parser_test.cpp index 0684ebc..3ae8bf4 100644 --- a/libc/test/src/stdio/printf_core/parser_test.cpp +++ b/libc/test/src/stdio/printf_core/parser_test.cpp @@ -102,6 +102,19 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArg) { ASSERT_PFORMAT_EQ(expected, format_arr[0]); } +TEST(LlvmLibcPrintfParserTest, EvalBadArg) { + __llvm_libc::printf_core::FormatSection format_arr[10]; + const char *str = "%\0abc"; + int arg1 = 12345; + evaluate(format_arr, str, arg1); + + __llvm_libc::printf_core::FormatSection expected; + expected.has_conv = false; + expected.raw_string = {str, 1}; + + ASSERT_PFORMAT_EQ(expected, format_arr[0]); +} + TEST(LlvmLibcPrintfParserTest, EvalOneArgWithFlags) { __llvm_libc::printf_core::FormatSection format_arr[10]; const char *str = "%+-0 #d"; diff --git a/libc/test/src/stdio/scanf_core/parser_test.cpp b/libc/test/src/stdio/scanf_core/parser_test.cpp index e2ed4b0..3d2c081 100644 --- a/libc/test/src/stdio/scanf_core/parser_test.cpp +++ b/libc/test/src/stdio/scanf_core/parser_test.cpp @@ -103,6 +103,19 @@ TEST(LlvmLibcScanfParserTest, EvalOneArg) { ASSERT_SFORMAT_EQ(expected, format_arr[0]); } +TEST(LlvmLibcScanfParserTest, EvalBadArg) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%\0abc"; + int arg1 = 12345; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = false; + expected.raw_string = {str, 1}; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + TEST(LlvmLibcScanfParserTest, EvalOneArgWithFlag) { __llvm_libc::scanf_core::FormatSection format_arr[10]; const char *str = "%*d"; -- 2.7.4