[libc] add scanf string converters
authorMichael Jones <michaelrj@google.com>
Thu, 3 Nov 2022 21:22:34 +0000 (14:22 -0700)
committerMichael Jones <michaelrj@google.com>
Mon, 7 Nov 2022 21:49:01 +0000 (13:49 -0800)
This patch adds the basic conversion facilities to scanf as well as unit
tests for them. It also adds scanf_main which will be used for the
eventual scanf entrypoints.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D137376

12 files changed:
libc/src/stdio/scanf_core/CMakeLists.txt
libc/src/stdio/scanf_core/converter.cpp [new file with mode: 0644]
libc/src/stdio/scanf_core/converter.h [new file with mode: 0644]
libc/src/stdio/scanf_core/core_structs.h
libc/src/stdio/scanf_core/reader.cpp
libc/src/stdio/scanf_core/reader.h
libc/src/stdio/scanf_core/scanf_main.cpp [new file with mode: 0644]
libc/src/stdio/scanf_core/scanf_main.h [new file with mode: 0644]
libc/src/stdio/scanf_core/string_converter.cpp [new file with mode: 0644]
libc/src/stdio/scanf_core/string_converter.h [new file with mode: 0644]
libc/test/src/stdio/scanf_core/CMakeLists.txt
libc/test/src/stdio/scanf_core/converter_test.cpp [new file with mode: 0644]

index 91cf5e2..940e9f0 100644 (file)
@@ -31,6 +31,20 @@ if(NOT (TARGET libc.src.__support.File.file))
 endif()
 
 add_object_library(
+  scanf_main
+  SRCS
+    scanf_main.cpp
+  HDRS
+    scanf_main.h
+  DEPENDS
+    .parser
+    .reader
+    .converter
+    .core_structs
+    libc.src.__support.arg_list
+)
+
+add_object_library(
   string_reader
   SRCS
     string_reader.cpp
@@ -58,3 +72,20 @@ add_object_library(
     .string_reader
     .file_reader
 )
+
+add_object_library(
+  converter
+  SRCS
+    converter.cpp
+    string_converter.cpp
+  HDRS
+    converter.h
+    string_converter.h
+  DEPENDS
+    .reader
+    .core_structs
+    libc.src.__support.ctype_utils
+    libc.src.__support.CPP.bitset
+    libc.src.__support.CPP.string_view
+    libc.src.__support.CPP.limits
+)
diff --git a/libc/src/stdio/scanf_core/converter.cpp b/libc/src/stdio/scanf_core/converter.cpp
new file mode 100644 (file)
index 0000000..3cfa875
--- /dev/null
@@ -0,0 +1,98 @@
+//===-- Format specifier converter implmentation for scanf -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdio/scanf_core/converter.h"
+
+#include "src/__support/ctype_utils.h"
+#include "src/stdio/scanf_core/core_structs.h"
+#include "src/stdio/scanf_core/reader.h"
+
+#include "src/stdio/scanf_core/string_converter.h"
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace scanf_core {
+
+int convert(Reader *reader, const FormatSection &to_conv) {
+  int ret_val = 0;
+  switch (to_conv.conv_name) {
+  case '%':
+    return raw_match(reader, "%");
+  case 's':
+    ret_val = raw_match(reader, " ");
+    if (ret_val != READ_OK)
+      return ret_val;
+    return convert_string(reader, to_conv);
+  case 'c':
+  case '[':
+    return convert_string(reader, to_conv);
+    //   case 'd':
+    //   case 'i':
+    //   case 'u':
+    //   case 'o':
+    //   case 'x':
+    //   case 'X':
+    //     ret_val = raw_match(reader, " ");
+    //     if (ret_val != READ_OK)
+    //       return ret_val;
+    //     return convert_int(reader, to_conv);
+    // #ifndef LLVM_LIBC_SCANF_DISABLE_FLOAT
+    //   case 'f':
+    //   case 'F':
+    //   case 'e':
+    //   case 'E':
+    //   case 'a':
+    //   case 'A':
+    //   case 'g':
+    //   case 'G':
+    //     ret_val = raw_match(reader, " ");
+    //     if (ret_val != READ_OK)
+    //       return ret_val;
+    //     return convert_float(reader, to_conv);
+    // #endif // LLVM_LIBC_SCANF_DISABLE_FLOAT
+    // #ifndef LLVM_LIBC_SCANF_DISABLE_WRITE_INT
+    //   case 'n':
+    //     return convert_write_int(reader, to_conv);
+    // #endif // LLVM_LIBC_SCANF_DISABLE_WRITE_INT
+    //   case 'p':
+    //     ret_val = raw_match(reader, " ");
+    //     if (ret_val != READ_OK)
+    //       return ret_val;
+    //     return convert_pointer(reader, to_conv);
+  default:
+    return raw_match(reader, to_conv.raw_string);
+  }
+  return -1;
+}
+
+// raw_string is assumed to have a positive size.
+int raw_match(Reader *reader, cpp::string_view raw_string) {
+  char cur_char = reader->getc();
+  int ret_val = READ_OK;
+  for (size_t i = 0; i < raw_string.size(); ++i) {
+    // Any space character matches any number of space characters.
+    if (internal::isspace(raw_string[i])) {
+      while (internal::isspace(cur_char)) {
+        cur_char = reader->getc();
+      }
+    } else {
+      if (raw_string[i] == cur_char) {
+        cur_char = reader->getc();
+      } else {
+        ret_val = MATCHING_FAILURE;
+        break;
+      }
+    }
+  }
+  reader->ungetc(cur_char);
+  return ret_val;
+}
+
+} // namespace scanf_core
+} // namespace __llvm_libc
diff --git a/libc/src/stdio/scanf_core/converter.h b/libc/src/stdio/scanf_core/converter.h
new file mode 100644 (file)
index 0000000..cd91ff6
--- /dev/null
@@ -0,0 +1,33 @@
+//===-- Format specifier converter for scanf -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_H
+#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_H
+
+#include "src/__support/CPP/string_view.h"
+#include "src/stdio/scanf_core/core_structs.h"
+#include "src/stdio/scanf_core/reader.h"
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace scanf_core {
+
+// convert will call a conversion function to convert the FormatSection into
+// its string representation, and then that will write the result to the
+// reader.
+int convert(Reader *reader, const FormatSection &to_conv);
+
+// raw_match takes a raw string and matches it to the characters obtained from
+// the reader.
+int raw_match(Reader *reader, cpp::string_view raw_string);
+
+} // namespace scanf_core
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_H
index 213a5e1..7f331db 100644 (file)
@@ -78,7 +78,7 @@ struct FormatSection {
 
 enum ErrorCodes : int {
   // This is the value to be returned by conversions when no error has occurred.
-  WRITE_OK = 0,
+  READ_OK = 0,
   // These are the scanf return values for when an error has occurred. They are
   // all negative, and should be distinct.
   FILE_READ_ERROR = -1,
index 23dcbd4..0d8d5a3 100644 (file)
@@ -13,6 +13,7 @@ namespace __llvm_libc {
 namespace scanf_core {
 
 char Reader::getc() {
+  ++cur_chars_read;
   if (reader_type == ReaderType::String) {
     return string_reader->get_char();
   } else {
@@ -21,6 +22,7 @@ char Reader::getc() {
 }
 
 void Reader::ungetc(char c) {
+  --cur_chars_read;
   if (reader_type == ReaderType::String) {
     // The string reader ignores the char c passed to unget since it doesn't
     // need to place anything back into a buffer, and modifying the source
index 4d6ed06..4ca25cc 100644 (file)
@@ -26,6 +26,8 @@ class Reader final {
 
   const ReaderType reader_type;
 
+  size_t cur_chars_read = 0;
+
 public:
   Reader(StringReader *init_string_reader)
       : string_reader(init_string_reader), reader_type(ReaderType::String) {}
@@ -41,6 +43,8 @@ public:
   // This moves the input back by one character, placing c into the buffer if
   // this is a file reader, else c is ignored.
   void ungetc(char c);
+
+  size_t chars_read() { return cur_chars_read; }
 };
 
 } // namespace scanf_core
diff --git a/libc/src/stdio/scanf_core/scanf_main.cpp b/libc/src/stdio/scanf_core/scanf_main.cpp
new file mode 100644 (file)
index 0000000..fcf7af2
--- /dev/null
@@ -0,0 +1,47 @@
+//===-- Starting point for scanf --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdio/scanf_core/scanf_main.h"
+
+#include "src/__support/arg_list.h"
+#include "src/stdio/scanf_core/converter.h"
+#include "src/stdio/scanf_core/core_structs.h"
+#include "src/stdio/scanf_core/parser.h"
+#include "src/stdio/scanf_core/reader.h"
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace scanf_core {
+
+int scanf_main(Reader *reader, const char *__restrict str,
+               internal::ArgList &args) {
+  Parser parser(str, args);
+  int ret_val = READ_OK;
+  int conversions = 0;
+  for (FormatSection cur_section = parser.get_next_section();
+       !cur_section.raw_string.empty() && ret_val == READ_OK;
+       cur_section = parser.get_next_section()) {
+    if (cur_section.has_conv) {
+      ret_val = convert(reader, cur_section);
+      conversions += ret_val == READ_OK ? 1 : 0;
+    } else {
+      ret_val = raw_match(reader, cur_section.raw_string);
+    }
+  }
+
+  if (conversions == 0 && ret_val != READ_OK) {
+    // This is intended to be converted to EOF in the client call to avoid
+    // including stdio.h in this internal file.
+    return -1;
+  }
+  return conversions;
+}
+
+} // namespace scanf_core
+} // namespace __llvm_libc
diff --git a/libc/src/stdio/scanf_core/scanf_main.h b/libc/src/stdio/scanf_core/scanf_main.h
new file mode 100644 (file)
index 0000000..d1db46b
--- /dev/null
@@ -0,0 +1,26 @@
+//===-- Starting point for scanf --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_SCANF_MAIN_H
+#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_SCANF_MAIN_H
+
+#include "src/__support/arg_list.h"
+#include "src/stdio/scanf_core/reader.h"
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace scanf_core {
+
+int scanf_main(Reader *reader, const char *__restrict str,
+               internal::ArgList &args);
+
+} // namespace scanf_core
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_SCANF_MAIN_H
diff --git a/libc/src/stdio/scanf_core/string_converter.cpp b/libc/src/stdio/scanf_core/string_converter.cpp
new file mode 100644 (file)
index 0000000..bdbb5c8
--- /dev/null
@@ -0,0 +1,76 @@
+//===-- String type specifier converters for scanf --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdio/scanf_core/string_converter.h"
+
+#include "src/__support/CPP/limits.h"
+#include "src/__support/ctype_utils.h"
+#include "src/stdio/scanf_core/core_structs.h"
+#include "src/stdio/scanf_core/reader.h"
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace scanf_core {
+
+int convert_string(Reader *reader, const FormatSection &to_conv) {
+  // %s "Matches a sequence of non-white-space characters"
+
+  // %c "Matches a sequence of characters of exactly the number specified by the
+  // field width (1 if no field width is present in the directive)"
+
+  // %[ "Matches a nonempty sequence of characters from a set of expected
+  // characters (the scanset)."
+  size_t max_width = 0;
+  if (to_conv.max_width > 0) {
+    max_width = to_conv.max_width;
+  } else {
+    if (to_conv.conv_name == 'c') {
+      max_width = 1;
+    } else {
+      max_width = cpp::numeric_limits<size_t>::max();
+    }
+  }
+
+  char *output = reinterpret_cast<char *>(to_conv.output_ptr);
+
+  char cur_char = reader->getc();
+  size_t i = 0;
+  for (; i < max_width && cur_char != '\0'; ++i) {
+    // If this is %s and we've hit a space, or if this is %[] and we've found
+    // something not in the scanset.
+    if ((to_conv.conv_name == 's' && internal::isspace(cur_char)) ||
+        (to_conv.conv_name == '[' && !to_conv.scan_set.test(cur_char))) {
+      break;
+    }
+    // if the NO_WRITE flag is not set, write to the output.
+    if ((to_conv.flags & NO_WRITE) == 0)
+      output[i] = cur_char;
+    cur_char = reader->getc();
+  }
+
+  // We always read one more character than will be used, so we have to put the
+  // last one back.
+  reader->ungetc(cur_char);
+
+  // If this is %s or %[]
+  if (to_conv.conv_name != 'c' && (to_conv.flags & NO_WRITE) == 0) {
+    // Always null terminate the string. This may cause a write to the
+    // (max_width + 1) byte, which is correct. The max width describes the max
+    // number of characters read from the input string, and doesn't necessarily
+    // correspond to the output.
+    output[i] = '\0';
+  }
+
+  if (i == 0)
+    return MATCHING_FAILURE;
+  return READ_OK;
+}
+
+} // namespace scanf_core
+} // namespace __llvm_libc
diff --git a/libc/src/stdio/scanf_core/string_converter.h b/libc/src/stdio/scanf_core/string_converter.h
new file mode 100644 (file)
index 0000000..4113f5c
--- /dev/null
@@ -0,0 +1,25 @@
+//===-- String type specifier converters for scanf --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_CONVERTER_H
+#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_CONVERTER_H
+
+#include "src/stdio/scanf_core/core_structs.h"
+#include "src/stdio/scanf_core/reader.h"
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace scanf_core {
+
+int convert_string(Reader *reader, const FormatSection &to_conv);
+
+} // namespace scanf_core
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_CONVERTER_H
index fa4878a..db20335 100644 (file)
@@ -30,3 +30,16 @@ add_libc_unittest(
     libc.src.stdio.scanf_core.string_reader
     libc.src.__support.CPP.string_view
 )
+
+add_libc_unittest(
+  converter_test
+  SUITE
+    libc_stdio_unittests
+  SRCS
+    converter_test.cpp
+  DEPENDS
+    libc.src.stdio.scanf_core.reader
+    libc.src.stdio.scanf_core.string_reader
+    libc.src.stdio.scanf_core.converter
+    libc.src.__support.CPP.string_view
+)
diff --git a/libc/test/src/stdio/scanf_core/converter_test.cpp b/libc/test/src/stdio/scanf_core/converter_test.cpp
new file mode 100644 (file)
index 0000000..d90af34
--- /dev/null
@@ -0,0 +1,295 @@
+//===-- Unittests for the basic scanf converters --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/CPP/string_view.h"
+#include "src/stdio/scanf_core/converter.h"
+#include "src/stdio/scanf_core/core_structs.h"
+#include "src/stdio/scanf_core/reader.h"
+#include "src/stdio/scanf_core/string_reader.h"
+
+#include "utils/UnitTest/Test.h"
+
+TEST(LlvmLibcScanfConverterTest, RawMatchBasic) {
+  const char *str = "abcdef";
+  __llvm_libc::scanf_core::StringReader str_reader(str);
+  __llvm_libc::scanf_core::Reader reader(&str_reader);
+
+  // Reading "abc" should succeed.
+  ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "abc"),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(3));
+
+  // Reading nothing should succeed and not advance.
+  ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, ""),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(3));
+
+  // Reading a space where there is none should succeed and not advance.
+  ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, " "),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(3));
+
+  // Reading "d" should succeed and advance by 1.
+  ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "d"),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(4));
+
+  // Reading "z" should fail and not advance.
+  ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "z"),
+            static_cast<int>(__llvm_libc::scanf_core::MATCHING_FAILURE));
+  ASSERT_EQ(reader.chars_read(), size_t(4));
+
+  // Reading "efgh" should fail but advance to the end.
+  ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "efgh"),
+            static_cast<int>(__llvm_libc::scanf_core::MATCHING_FAILURE));
+  ASSERT_EQ(reader.chars_read(), size_t(6));
+}
+
+TEST(LlvmLibcScanfConverterTest, RawMatchSpaces) {
+  const char *str = " a \t\n b   cd";
+  __llvm_libc::scanf_core::StringReader str_reader(str);
+  __llvm_libc::scanf_core::Reader reader(&str_reader);
+
+  // Reading "a" should fail and not advance.
+  // Since there's nothing in the format string (the second argument to
+  // raw_match) to match the space in the buffer it isn't consumed.
+  ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "a"),
+            static_cast<int>(__llvm_libc::scanf_core::MATCHING_FAILURE));
+  ASSERT_EQ(reader.chars_read(), size_t(0));
+
+  // Reading "  \t\n  " should succeed and advance past the space.
+  // Any number of space characters in the format string match 0 or more space
+  // characters in the buffer.
+  ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "  \t\n  "),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(1));
+
+  // Reading "ab" should fail and only advance past the a
+  // The a characters match, but the format string doesn't have anything to
+  // consume the spaces in the buffer, so it fails.
+  ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "ab"),
+            static_cast<int>(__llvm_libc::scanf_core::MATCHING_FAILURE));
+  ASSERT_EQ(reader.chars_read(), size_t(2));
+
+  // Reading "  b" should succeed and advance past the b
+  // Any number of space characters in the format string matches 0 or more space
+  // characters in the buffer.
+  ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "  b"),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(7));
+
+  // Reading "\t" should succeed and advance past the spaces to the c
+  ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "\t"),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(10));
+
+  // Reading "c d" should succeed and advance past the d.
+  // Here the space character in the format string is matching 0 space
+  // characters in the buffer.
+  ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "c d"),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(12));
+}
+
+TEST(LlvmLibcScanfConverterTest, StringConvSimple) {
+  const char *str = "abcDEF123 654LKJihg";
+  char result[20];
+  __llvm_libc::scanf_core::StringReader str_reader(str);
+  __llvm_libc::scanf_core::Reader reader(&str_reader);
+
+  __llvm_libc::scanf_core::FormatSection conv;
+  conv.has_conv = true;
+  conv.conv_name = 's';
+  conv.output_ptr = result;
+
+  ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(9));
+  ASSERT_STREQ(result, "abcDEF123");
+
+  //%s skips all spaces before beginning to read.
+  ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(19));
+  ASSERT_STREQ(result, "654LKJihg");
+}
+
+TEST(LlvmLibcScanfConverterTest, StringConvNoWrite) {
+  const char *str = "abcDEF123 654LKJihg";
+  __llvm_libc::scanf_core::StringReader str_reader(str);
+  __llvm_libc::scanf_core::Reader reader(&str_reader);
+
+  __llvm_libc::scanf_core::FormatSection conv;
+  conv.has_conv = true;
+  conv.conv_name = 's';
+  conv.flags = __llvm_libc::scanf_core::NO_WRITE;
+
+  ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(9));
+
+  //%s skips all spaces before beginning to read.
+  ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(19));
+}
+
+TEST(LlvmLibcScanfConverterTest, StringConvWidth) {
+  const char *str = "abcDEF123 654LKJihg";
+  char result[6];
+  __llvm_libc::scanf_core::StringReader str_reader(str);
+  __llvm_libc::scanf_core::Reader reader(&str_reader);
+
+  __llvm_libc::scanf_core::FormatSection conv;
+  conv.has_conv = true;
+  conv.conv_name = 's';
+  conv.max_width = 5; // this means the result takes up 6 characters (with \0).
+  conv.output_ptr = result;
+
+  ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(5));
+  ASSERT_STREQ(result, "abcDE");
+
+  ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(9));
+  ASSERT_STREQ(result, "F123");
+
+  //%s skips all spaces before beginning to read.
+  ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(15));
+  ASSERT_STREQ(result, "654LK");
+
+  ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(19));
+  ASSERT_STREQ(result, "Jihg");
+}
+
+TEST(LlvmLibcScanfConverterTest, CharsConv) {
+  const char *str = "abcDEF123 654LKJihg MNOpqr&*(";
+  char result[20];
+  __llvm_libc::scanf_core::StringReader str_reader(str);
+  __llvm_libc::scanf_core::Reader reader(&str_reader);
+
+  __llvm_libc::scanf_core::FormatSection conv;
+  conv.has_conv = true;
+  conv.conv_name = 'c';
+  conv.output_ptr = result;
+
+  ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(1));
+  ASSERT_EQ(result[0], 'a');
+
+  ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(2));
+  ASSERT_EQ(result[0], 'b');
+
+  ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(3));
+  ASSERT_EQ(result[0], 'c');
+
+  // Switch from character by character to 8 at a time.
+  conv.max_width = 8;
+  __llvm_libc::cpp::string_view result_view(result, 8);
+
+  //%c doesn't stop on spaces.
+  ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(11));
+  ASSERT_EQ(result_view, __llvm_libc::cpp::string_view("DEF123 6", 8));
+
+  ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(19));
+  ASSERT_EQ(result_view, __llvm_libc::cpp::string_view("54LKJihg", 8));
+
+  //%c also doesn't skip spaces at the start.
+  ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(27));
+  ASSERT_EQ(result_view, __llvm_libc::cpp::string_view(" MNOpqr&", 8));
+
+  //%c will stop on a null byte though.
+  ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(29));
+  ASSERT_EQ(__llvm_libc::cpp::string_view(result, 2),
+            __llvm_libc::cpp::string_view("*(", 2));
+}
+
+TEST(LlvmLibcScanfConverterTest, ScansetConv) {
+  const char *str = "abcDEF[123] 654LKJihg";
+  char result[20];
+  __llvm_libc::scanf_core::StringReader str_reader(str);
+  __llvm_libc::scanf_core::Reader reader(&str_reader);
+
+  __llvm_libc::scanf_core::FormatSection conv;
+  conv.has_conv = true;
+  conv.conv_name = '[';
+  conv.output_ptr = result;
+
+  __llvm_libc::cpp::bitset<256> bitset1;
+  bitset1.set_range('a', 'c');
+  bitset1.set_range('D', 'F');
+  bitset1.set_range('1', '6');
+  bitset1.set('[');
+  bitset1.set(']');
+
+  conv.scan_set = bitset1;
+
+  ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(11));
+  ASSERT_EQ(__llvm_libc::cpp::string_view(result, 11),
+            __llvm_libc::cpp::string_view("abcDEF[123]", 11));
+
+  // The scanset conversion doesn't consume leading spaces. If it did it would
+  // return "654" here.
+  ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+            static_cast<int>(__llvm_libc::scanf_core::MATCHING_FAILURE));
+  ASSERT_EQ(reader.chars_read(), size_t(11));
+
+  // This set is everything except for a-g.
+  __llvm_libc::cpp::bitset<256> bitset2;
+  bitset2.set_range('a', 'g');
+  bitset2.flip();
+  conv.scan_set = bitset2;
+
+  conv.max_width = 5;
+
+  ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(16));
+  ASSERT_EQ(__llvm_libc::cpp::string_view(result, 5),
+            __llvm_libc::cpp::string_view(" 654L", 5));
+
+  ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(20));
+  ASSERT_EQ(__llvm_libc::cpp::string_view(result, 4),
+            __llvm_libc::cpp::string_view("KJih", 4));
+
+  // This set is g and '\0'.
+  __llvm_libc::cpp::bitset<256> bitset3;
+  bitset3.set('g');
+  bitset3.set('\0');
+  conv.scan_set = bitset3;
+
+  // Even though '\0' is in the scanset, it should still stop on it.
+  ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+            static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+  ASSERT_EQ(reader.chars_read(), size_t(21));
+  ASSERT_EQ(__llvm_libc::cpp::string_view(result, 1),
+            __llvm_libc::cpp::string_view("g", 1));
+}