From 74da5e6c082edff19981892328d724a8e02ffcd9 Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj@google.com>
Date: Thu, 1 Sep 2022 15:19:35 -0700
Subject: [PATCH] [libc] add result class to strtointeger

This is a class intended to improve errno handling for internal
functions by allowing functions to return their result and error status
instead of setting errno. This specific class will be used for
strtointeger and (in a followup patch) strtofloat.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D133163
---
 libc/src/__support/CMakeLists.txt                 |  7 +++++
 libc/src/__support/high_precision_decimal.h       |  2 +-
 libc/src/__support/str_to_float.h                 | 17 +++++++++---
 libc/src/__support/str_to_integer.h               | 31 ++++++++++-----------
 libc/src/__support/str_to_num_result.h            | 33 +++++++++++++++++++++++
 libc/src/inttypes/strtoimax.cpp                   |  9 ++++++-
 libc/src/inttypes/strtoumax.cpp                   |  9 ++++++-
 libc/src/stdio/printf_core/parser.cpp             | 23 +++++++---------
 libc/src/stdio/scanf_core/parser.cpp              | 16 +++++------
 libc/src/stdlib/atoi.cpp                          |  6 ++++-
 libc/src/stdlib/atol.cpp                          |  6 ++++-
 libc/src/stdlib/atoll.cpp                         |  6 ++++-
 libc/src/stdlib/strtol.cpp                        |  9 ++++++-
 libc/src/stdlib/strtoll.cpp                       |  9 ++++++-
 libc/src/stdlib/strtoul.cpp                       |  9 ++++++-
 libc/src/stdlib/strtoull.cpp                      |  9 ++++++-
 utils/bazel/llvm-project-overlay/libc/BUILD.bazel |  9 +++++++
 17 files changed, 159 insertions(+), 51 deletions(-)
 create mode 100644 libc/src/__support/str_to_num_result.h
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index 970fa3c..e72437b 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -37,11 +37,18 @@ add_header_library(
 )
 
 add_header_library(
+  str_to_num_result
+  HDRS
+    str_to_num_result.h
+)
+
+add_header_library(
   str_to_integer
   HDRS
     str_to_integer.h
   DEPENDS
     .ctype_utils
+    .str_to_num_result
     libc.include.errno
     libc.src.errno.errno
     libc.src.__support.CPP.limits
diff --git a/libc/src/__support/high_precision_decimal.h b/libc/src/__support/high_precision_decimal.h
index d3f3f02..dba561c 100644
--- a/libc/src/__support/high_precision_decimal.h
+++ b/libc/src/__support/high_precision_decimal.h
@@ -319,7 +319,7 @@ public:
     if ((*numString | 32) == 'e') {
       ++numString;
       if (isdigit(*numString) || *numString == '+' || *numString == '-') {
-        int32_t add_to_exp = strtointeger<int32_t>(numString, nullptr, 10);
+        int32_t add_to_exp = strtointeger<int32_t>(numString, 10);
         if (add_to_exp > 100000) {
           add_to_exp = 100000;
         } else if (add_to_exp < -100000) {
diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h
index 24a16c6..1579ac8 100644
--- a/libc/src/__support/str_to_float.h
+++ b/libc/src/__support/str_to_float.h
@@ -817,7 +817,10 @@ decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT,
     if (*(src + 1) == '+' || *(src + 1) == '-' || isdigit(*(src + 1))) {
       ++src;
       char *temp_str_end;
-      int32_t add_to_exponent = strtointeger<int32_t>(src, &temp_str_end, 10);
+      auto result = strtointeger<int32_t>(src, 10);
+      // TODO: If error, return with error.
+      temp_str_end = const_cast<char *>(src + result.parsed_len);
+      int32_t add_to_exponent = result.value;
       if (add_to_exponent > 100000)
         add_to_exponent = 100000;
       else if (add_to_exponent < -100000)
@@ -911,7 +914,10 @@ static inline bool hexadecimal_string_to_float(
     if (*(src + 1) == '+' || *(src + 1) == '-' || isdigit(*(src + 1))) {
       ++src;
       char *temp_str_end;
-      int32_t add_to_exponent = strtointeger<int32_t>(src, &temp_str_end, 10);
+      auto result = strtointeger<int32_t>(src, 10);
+      // TODO: If error, return error.
+      temp_str_end = const_cast<char *>(src + result.parsed_len);
+      int32_t add_to_exponent = result.value;
       if (add_to_exponent > 100000)
         add_to_exponent = 100000;
       else if (add_to_exponent < -100000)
@@ -1001,8 +1007,11 @@ static inline T strtofloatingpoint(const char *__restrict src,
             // more than is required by the specification, which says for the
             // input type "NAN(n-char-sequence)" that "the meaning of
             // the n-char sequence is implementation-defined."
-            nan_mantissa = static_cast<BitsType>(
-                strtointeger<uint64_t>(left_paren + 1, &temp_src, 0));
+
+            auto result = strtointeger<uint64_t>(left_paren + 1, 0);
+            // TODO: If error, return error
+            temp_src = const_cast<char *>(left_paren + 1 + result.parsed_len);
+            nan_mantissa = result.value;
             if (*temp_src != ')')
               nan_mantissa = 0;
           }
diff --git a/libc/src/__support/str_to_integer.h b/libc/src/__support/str_to_integer.h
index 3f3349d..269d432 100644
--- a/libc/src/__support/str_to_integer.h
+++ b/libc/src/__support/str_to_integer.h
@@ -11,6 +11,7 @@
 
 #include "src/__support/CPP/limits.h"
 #include "src/__support/ctype_utils.h"
+#include "src/__support/str_to_num_result.h"
 #include <errno.h>
 #include <limits.h>
 
@@ -63,19 +64,19 @@ static inline int infer_base(const char *__restrict *__restrict src) {
   }
 }
 
-// Takes a pointer to a string, a pointer to a string pointer, and the base to
-// convert to. This function is used as the backend for all of the string to int
-// functions.
+// Takes a pointer to a string and the base to convert to. This function is used
+// as the backend for all of the string to int functions.
 template <class T>
-static inline T strtointeger(const char *__restrict src,
-                             char **__restrict str_end, int base) {
+static inline StrToNumResult<T> strtointeger(const char *__restrict src,
+                                             int base) {
   unsigned long long result = 0;
   bool is_number = false;
   const char *original_src = src;
+  int error_val = 0;
 
   if (base < 0 || base == 1 || base > 36) {
-    errno = EINVAL;
-    return 0;
+    error_val = EINVAL;
+    return {0, 0, error_val};
   }
 
   src = first_non_whitespace(src);
@@ -113,35 +114,35 @@ static inline T strtointeger(const char *__restrict src,
     // the result cannot change, but we still need to advance src to the end of
     // the number.
     if (result == abs_max) {
-      errno = ERANGE;
+      error_val = ERANGE;
       continue;
     }
 
     if (result > abs_max_div_by_base) {
       result = abs_max;
-      errno = ERANGE;
+      error_val = ERANGE;
     } else {
       result = result * base;
     }
     if (result > abs_max - cur_digit) {
       result = abs_max;
-      errno = ERANGE;
+      error_val = ERANGE;
     } else {
       result = result + cur_digit;
     }
   }
 
-  if (str_end != nullptr)
-    *str_end = const_cast<char *>(is_number ? src : original_src);
+  ptrdiff_t str_len = is_number ? (src - original_src) : 0;
 
   if (result == abs_max) {
     if (is_positive || IS_UNSIGNED)
-      return cpp::numeric_limits<T>::max();
+      return {cpp::numeric_limits<T>::max(), str_len, error_val};
     else // T is signed and there is a negative overflow
-      return cpp::numeric_limits<T>::min();
+      return {cpp::numeric_limits<T>::min(), str_len, error_val};
   }
 
-  return is_positive ? static_cast<T>(result) : -static_cast<T>(result);
+  return {is_positive ? static_cast<T>(result) : -static_cast<T>(result),
+          str_len, error_val};
 }
 
 } // namespace internal
diff --git a/libc/src/__support/str_to_num_result.h b/libc/src/__support/str_to_num_result.h
new file mode 100644
index 0000000..cd0e24f
--- /dev/null
+++ b/libc/src/__support/str_to_num_result.h
@@ -0,0 +1,33 @@
+//===-- A data structure for str_to_number to return ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SUPPORT_STR_TO_NUM_RESULT_H
+#define LLVM_LIBC_SUPPORT_STR_TO_NUM_RESULT_H
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+
+template <typename T> struct StrToNumResult {
+  T value;
+  int error;
+  ptrdiff_t parsed_len;
+
+  constexpr StrToNumResult(T value) : value(value), error(0), parsed_len(0) {}
+  constexpr StrToNumResult(T value, ptrdiff_t parsed_len)
+      : value(value), error(0), parsed_len(parsed_len) {}
+  constexpr StrToNumResult(T value, ptrdiff_t parsed_len, int error)
+      : value(value), error(error), parsed_len(parsed_len) {}
+
+  constexpr bool has_error() { return error != 0; }
+
+  constexpr operator T() { return value; }
+};
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SUPPORT_STR_TO_NUM_RESULT_H
diff --git a/libc/src/inttypes/strtoimax.cpp b/libc/src/inttypes/strtoimax.cpp
index ef5e84e..3fa2266 100644
--- a/libc/src/inttypes/strtoimax.cpp
+++ b/libc/src/inttypes/strtoimax.cpp
@@ -15,7 +15,14 @@ namespace __llvm_libc {
 LLVM_LIBC_FUNCTION(intmax_t, strtoimax,
                    (const char *__restrict str, char **__restrict str_end,
                     int base)) {
-  return internal::strtointeger<intmax_t>(str, str_end, base);
+  auto result = internal::strtointeger<intmax_t>(str, base);
+  if (result.has_error())
+    errno = result.error;
+
+  if (str_end != nullptr)
+    *str_end = const_cast<char *>(str + result.parsed_len);
+
+  return result;
 }
 
 } // namespace __llvm_libc
diff --git a/libc/src/inttypes/strtoumax.cpp b/libc/src/inttypes/strtoumax.cpp
index edf8b65..6c1ecb6 100644
--- a/libc/src/inttypes/strtoumax.cpp
+++ b/libc/src/inttypes/strtoumax.cpp
@@ -15,7 +15,14 @@ namespace __llvm_libc {
 LLVM_LIBC_FUNCTION(uintmax_t, strtoumax,
                    (const char *__restrict str, char **__restrict str_end,
                     int base)) {
-  return internal::strtointeger<uintmax_t>(str, str_end, base);
+  auto result = internal::strtointeger<uintmax_t>(str, base);
+  if (result.has_error())
+    errno = result.error;
+
+  if (str_end != nullptr)
+    *str_end = const_cast<char *>(str + result.parsed_len);
+
+  return result;
 }
 
 } // namespace __llvm_libc
diff --git a/libc/src/stdio/printf_core/parser.cpp b/libc/src/stdio/printf_core/parser.cpp
index 6a27c1c..6a2db74 100644
--- a/libc/src/stdio/printf_core/parser.cpp
+++ b/libc/src/stdio/printf_core/parser.cpp
@@ -50,10 +50,9 @@ FormatSection Parser::get_next_section() {
 
       section.min_width = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos));
     } else if (internal::isdigit(str[cur_pos])) {
-      char *int_end;
-      section.min_width =
-          internal::strtointeger<int>(str + cur_pos, &int_end, 10);
-      cur_pos = int_end - str;
+      auto result = internal::strtointeger<int>(str + cur_pos, 10);
+      section.min_width = result.value;
+      cur_pos = cur_pos + result.parsed_len;
     }
     if (section.min_width < 0) {
       section.min_width = -section.min_width;
@@ -73,10 +72,9 @@ FormatSection Parser::get_next_section() {
         section.precision = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos));
 
       } else if (internal::isdigit(str[cur_pos])) {
-        char *int_end;
-        section.precision =
-            internal::strtointeger<int>(str + cur_pos, &int_end, 10);
-        cur_pos = int_end - str;
+        auto result = internal::strtointeger<int>(str + cur_pos, 10);
+        section.precision = result.value;
+        cur_pos = cur_pos + result.parsed_len;
       }
     }
 
@@ -238,12 +236,11 @@ LengthModifier Parser::parse_length_modifier(size_t *local_pos) {
 
 size_t Parser::parse_index(size_t *local_pos) {
   if (internal::isdigit(str[*local_pos])) {
-    char *int_end;
-    size_t index =
-        internal::strtointeger<size_t>(str + *local_pos, &int_end, 10);
-    if (int_end[0] != '$')
+    auto result = internal::strtointeger<int>(str + *local_pos, 10);
+    size_t index = result.value;
+    if (str[*local_pos + result.parsed_len] != '$')
       return 0;
-    *local_pos = 1 + int_end - str;
+    *local_pos = 1 + result.parsed_len + *local_pos;
     return index;
   }
   return 0;
diff --git a/libc/src/stdio/scanf_core/parser.cpp b/libc/src/stdio/scanf_core/parser.cpp
index 76e658e..767f80a 100644
--- a/libc/src/stdio/scanf_core/parser.cpp
+++ b/libc/src/stdio/scanf_core/parser.cpp
@@ -50,10 +50,9 @@ FormatSection Parser::get_next_section() {
     // handle width
     section.max_width = -1;
     if (internal::isdigit(str[cur_pos])) {
-      char *int_end;
-      section.max_width =
-          internal::strtointeger<int>(str + cur_pos, &int_end, 10);
-      cur_pos = int_end - str;
+      auto result = internal::strtointeger<int>(str + cur_pos, 10);
+      section.max_width = result.value;
+      cur_pos = cur_pos + result.parsed_len;
     }
 
     // TODO(michaelrj): add posix allocate flag support.
@@ -196,12 +195,11 @@ LengthModifier Parser::parse_length_modifier(size_t *local_pos) {
 
 size_t Parser::parse_index(size_t *local_pos) {
   if (internal::isdigit(str[*local_pos])) {
-    char *int_end;
-    size_t index =
-        internal::strtointeger<size_t>(str + *local_pos, &int_end, 10);
-    if (int_end[0] != '$')
+    auto result = internal::strtointeger<int>(str + *local_pos, 10);
+    size_t index = result.value;
+    if (str[*local_pos + result.parsed_len] != '$')
       return 0;
-    *local_pos = 1 + int_end - str;
+    *local_pos = 1 + result.parsed_len + *local_pos;
     return index;
   }
   return 0;
diff --git a/libc/src/stdlib/atoi.cpp b/libc/src/stdlib/atoi.cpp
index 37cfab1..f61e365 100644
--- a/libc/src/stdlib/atoi.cpp
+++ b/libc/src/stdlib/atoi.cpp
@@ -13,7 +13,11 @@
 namespace __llvm_libc {
 
 LLVM_LIBC_FUNCTION(int, atoi, (const char *str)) {
-  return internal::strtointeger<int>(str, nullptr, 10);
+  auto result = internal::strtointeger<int>(str, 10);
+  if (result.has_error())
+    errno = result.error;
+
+  return result;
 }
 
 } // namespace __llvm_libc
diff --git a/libc/src/stdlib/atol.cpp b/libc/src/stdlib/atol.cpp
index 6a1da4c..713ccfa 100644
--- a/libc/src/stdlib/atol.cpp
+++ b/libc/src/stdlib/atol.cpp
@@ -13,7 +13,11 @@
 namespace __llvm_libc {
 
 LLVM_LIBC_FUNCTION(long, atol, (const char *str)) {
-  return internal::strtointeger<long>(str, nullptr, 10);
+  auto result = internal::strtointeger<int>(str, 10);
+  if (result.has_error())
+    errno = result.error;
+
+  return result;
 }
 
 } // namespace __llvm_libc
diff --git a/libc/src/stdlib/atoll.cpp b/libc/src/stdlib/atoll.cpp
index ffa8105..cfa8edf 100644
--- a/libc/src/stdlib/atoll.cpp
+++ b/libc/src/stdlib/atoll.cpp
@@ -13,7 +13,11 @@
 namespace __llvm_libc {
 
 LLVM_LIBC_FUNCTION(long long, atoll, (const char *str)) {
-  return internal::strtointeger<long long>(str, nullptr, 10);
+  auto result = internal::strtointeger<long long>(str, 10);
+  if (result.has_error())
+    errno = result.error;
+
+  return result;
 }
 
 } // namespace __llvm_libc
diff --git a/libc/src/stdlib/strtol.cpp b/libc/src/stdlib/strtol.cpp
index 33038b5..9564f5d 100644
--- a/libc/src/stdlib/strtol.cpp
+++ b/libc/src/stdlib/strtol.cpp
@@ -15,7 +15,14 @@ namespace __llvm_libc {
 LLVM_LIBC_FUNCTION(long, strtol,
                    (const char *__restrict str, char **__restrict str_end,
                     int base)) {
-  return internal::strtointeger<long>(str, str_end, base);
+  auto result = internal::strtointeger<long>(str, base);
+  if (result.has_error())
+    errno = result.error;
+
+  if (str_end != nullptr)
+    *str_end = const_cast<char *>(str + result.parsed_len);
+
+  return result;
 }
 
 } // namespace __llvm_libc
diff --git a/libc/src/stdlib/strtoll.cpp b/libc/src/stdlib/strtoll.cpp
index e2f0aac..c1aa94e 100644
--- a/libc/src/stdlib/strtoll.cpp
+++ b/libc/src/stdlib/strtoll.cpp
@@ -15,7 +15,14 @@ namespace __llvm_libc {
 LLVM_LIBC_FUNCTION(long long, strtoll,
                    (const char *__restrict str, char **__restrict str_end,
                     int base)) {
-  return internal::strtointeger<long long>(str, str_end, base);
+  auto result = internal::strtointeger<long long>(str, base);
+  if (result.has_error())
+    errno = result.error;
+
+  if (str_end != nullptr)
+    *str_end = const_cast<char *>(str + result.parsed_len);
+
+  return result;
 }
 
 } // namespace __llvm_libc
diff --git a/libc/src/stdlib/strtoul.cpp b/libc/src/stdlib/strtoul.cpp
index 0069679..d0accb54 100644
--- a/libc/src/stdlib/strtoul.cpp
+++ b/libc/src/stdlib/strtoul.cpp
@@ -15,7 +15,14 @@ namespace __llvm_libc {
 LLVM_LIBC_FUNCTION(unsigned long, strtoul,
                    (const char *__restrict str, char **__restrict str_end,
                     int base)) {
-  return internal::strtointeger<unsigned long>(str, str_end, base);
+  auto result = internal::strtointeger<unsigned long>(str, base);
+  if (result.has_error())
+    errno = result.error;
+
+  if (str_end != nullptr)
+    *str_end = const_cast<char *>(str + result.parsed_len);
+
+  return result;
 }
 
 } // namespace __llvm_libc
diff --git a/libc/src/stdlib/strtoull.cpp b/libc/src/stdlib/strtoull.cpp
index db6c838..26f0821 100644
--- a/libc/src/stdlib/strtoull.cpp
+++ b/libc/src/stdlib/strtoull.cpp
@@ -15,7 +15,14 @@ namespace __llvm_libc {
 LLVM_LIBC_FUNCTION(unsigned long long, strtoull,
                    (const char *__restrict str, char **__restrict str_end,
                     int base)) {
-  return internal::strtointeger<unsigned long long>(str, str_end, base);
+  auto result = internal::strtointeger<unsigned long long>(str, base);
+  if (result.has_error())
+    errno = result.error;
+
+  if (str_end != nullptr)
+    *str_end = const_cast<char *>(str + result.parsed_len);
+
+  return result;
 }
 
 } // namespace __llvm_libc
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 5cfc002..3565c0d 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -202,6 +202,14 @@ cc_library(
 )
 
 cc_library(
+    name = "__support_str_to_num_result",
+    hdrs = ["src/__support/str_to_num_result.h"],
+    deps = [
+        ":libc_root",
+    ],
+)
+
+cc_library(
     name = "__support_integer_operations",
     hdrs = ["src/__support/integer_operations.h"],
     deps = [":__support_cpp_type_traits"],
@@ -218,6 +226,7 @@ cc_library(
     deps = [
         ":__support_cpp_limits",
         ":__support_ctype_utils",
+        ":__support_str_to_num_result",
     ],
 )
 
-- 
2.7.4