From 2348d593aeb57d9310873651986ea753acb29826 Mon Sep 17 00:00:00 2001
From: peter klausler <pklausler@nvidia.com>
Date: Fri, 18 Oct 2019 16:51:29 -0700
Subject: [PATCH] [flang] Add software uint128_t (debugging incomplete)

Original-commit: flang-compiler/f18@5be270e604965626ece73ca376ec450986cc5ae1
Reviewed-on: https://github.com/flang-compiler/f18/pull/785
Tree-same-pre-rewrite: false
---
 flang/lib/common/uint128.h                   | 253 +++++++++++++++++++++++++++
 flang/lib/common/unsigned-const-division.h   |  11 +-
 flang/lib/decimal/big-radix-floating-point.h |   9 +-
 flang/lib/decimal/binary-floating-point.h    |  14 +-
 flang/lib/decimal/decimal-to-binary.cc       |   4 +-
 5 files changed, 269 insertions(+), 22 deletions(-)
 create mode 100644 flang/lib/common/uint128.h
diff --git a/flang/lib/common/uint128.h b/flang/lib/common/uint128.h
new file mode 100644
index 0000000..3974982
--- /dev/null
+++ b/flang/lib/common/uint128.h
@@ -0,0 +1,253 @@
+// Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Portable 128-bit unsigned integer arithmetic
+
+#ifndef FORTRAN_COMMON_UINT128_H_
+#define FORTRAN_COMMON_UINT128_H_
+
+#define AVOID_NATIVE_UINT128 1  // for testing purposes
+
+#include "leading-zero-bit-count.h"
+#include <cstdint>
+#include <type_traits>
+
+namespace Fortran::common {
+
+class UnsignedInt128 {
+public:
+  constexpr UnsignedInt128() {}
+  constexpr UnsignedInt128(std::uint64_t n) : low_{n} {}
+  constexpr UnsignedInt128(std::int64_t n) : low_{static_cast<std::uint64_t>(n)}, high_{-static_cast<std::uint64_t>(n<0)} {}
+  constexpr UnsignedInt128(int n) : low_{static_cast<std::uint64_t>(n)}, high_{-static_cast<std::uint64_t>(n<0)} {}
+  constexpr UnsignedInt128(const UnsignedInt128 &) = default;
+  constexpr UnsignedInt128(UnsignedInt128 &&) = default;
+  constexpr UnsignedInt128 &operator=(const UnsignedInt128 &) = default;
+  constexpr UnsignedInt128 &operator=(UnsignedInt128 &&) = default;
+
+  constexpr UnsignedInt128 operator+() const { return *this; }
+  constexpr UnsignedInt128 operator~() const { return {~high_, ~low_}; }
+  constexpr UnsignedInt128 operator-() const { return ~*this + 1; }
+  constexpr bool operator!() const { return !low_ && !high_; }
+  constexpr explicit operator bool() const { return low_ || high_; }
+  constexpr explicit operator std::uint64_t() const { return low_; }
+  constexpr explicit operator int() const { return static_cast<int>(low_); }
+
+  constexpr std::uint64_t high() const { return high_; }
+  constexpr std::uint64_t low() const { return low_; }
+
+  constexpr UnsignedInt128 operator++(/*prefix*/) {
+    *this += 1;
+    return *this;
+  }
+  constexpr UnsignedInt128 operator++(int /*postfix*/) {
+    UnsignedInt128 result{*this};
+    *this += 1;
+    return result;
+  }
+  constexpr UnsignedInt128 operator--(/*prefix*/) {
+    *this -= 1;
+    return *this;
+  }
+  constexpr UnsignedInt128 operator--(int /*postfix*/) {
+    UnsignedInt128 result{*this};
+    *this -= 1;
+    return result;
+  }
+
+  constexpr UnsignedInt128 operator&(UnsignedInt128 that) const {
+    return {high_ & that.high_, low_ & that.low_};
+  }
+  constexpr UnsignedInt128 operator|(UnsignedInt128 that) const {
+    return {high_ | that.high_, low_ | that.low_};
+  }
+  constexpr UnsignedInt128 operator^(UnsignedInt128 that) const {
+    return {high_ ^ that.high_, low_ ^ that.low_};
+  }
+
+  constexpr UnsignedInt128 operator<<(UnsignedInt128 that) const {
+    if (that >= 128) {
+      return {};
+    } else {
+      std::uint64_t n{that.low_};
+      if (n >= 64) {
+        return {low_ << (n - 64), 0};
+      } else {
+        return {(high_ << n) | (low_ >> (64 - n)), low_ << n};
+      }
+    }
+  }
+  constexpr UnsignedInt128 operator>>(UnsignedInt128 that) const {
+    if (that >= 128) {
+      return {};
+    } else {
+      std::uint64_t n{that.low_};
+      if (n >= 64) {
+        return {0, high_ >> (n - 64)};
+      } else {
+        return {high_ >> n, (high_ << (64 - n)) | (low_ >> n)};
+      }
+    }
+  }
+
+  constexpr UnsignedInt128 operator+(UnsignedInt128 that) const {
+    std::uint64_t lower{(low_ & ~topBit) + (that.low_ & ~topBit)};
+    bool carry{((lower >> 63) + (low_ >> 63) + (that.low_ >> 63)) > 1};
+    return {high_ + that.high_ + carry, low_ + that.low_};
+  }
+  constexpr UnsignedInt128 operator-(UnsignedInt128 that) const {
+    return *this + -that;
+  }
+
+  constexpr UnsignedInt128 operator*(UnsignedInt128 that) const {
+    std::uint64_t mask32{0xffffffff};
+    if (high_ == 0 && that.high_ == 0) {
+      std::uint64_t x0{low_ & mask32}, x1{low_ >> 32};
+      std::uint64_t y0{that.low_ & mask32}, y1{that.low_ >> 32};
+      UnsignedInt128 x0y0{x0 * y0}, x0y1{x0 * y1};
+      UnsignedInt128 x1y0{x1 * y0}, x1y1{x1 * y1};
+      return x0y0 + ((x0y1 + x1y0) << 32) + (x1y1 << 64);
+    } else {
+      std::uint64_t x0{low_ & mask32}, x1{low_ >> 32}, x2{high_ & mask32}, x3{high_ >> 32};
+      std::uint64_t y0{that.low_ & mask32}, y1{that.low_ >> 32}, y2{that.high_ & mask32}, y3{that.high_ >> 32};
+      UnsignedInt128 x0y0{x0 * y0}, x0y1{x0 * y1}, x0y2{x0 * y2}, x0y3{x0 * y3};
+      UnsignedInt128 x1y0{x1 * y0}, x1y1{x1 * y1}, x1y2{x1 * y2};
+      UnsignedInt128 x2y0{x2 * y0}, x2y1{x2 * y1};
+      UnsignedInt128 x3y0{x3 * y0};
+      return x0y0 + ((x0y1 + x1y0) << 32) + ((x0y2 + x1y1 + x2y0) << 64) + ((x0y3 + x1y2 + x2y1 + x3y0) << 96);
+    }
+  }
+
+  constexpr UnsignedInt128 operator/(UnsignedInt128 that) const {
+    int j{high_ == 0 ? 64 + LeadingZeroBitCount(low_) : LeadingZeroBitCount(high_)};
+    UnsignedInt128 bits{*this};
+    bits <<= j;
+    UnsignedInt128 numerator{};
+    UnsignedInt128 quotient{};
+    for (; j < 128; ++j) {
+      numerator <<= 1;
+      if (bits.high_ & topBit) {
+        numerator.low_ |= 1;
+      }
+      bits <<= 1;
+      quotient <<= 1;
+      if (numerator >= that) {
+        ++quotient;
+        numerator -= that;
+      }
+    }
+    return quotient;
+  }
+
+  constexpr UnsignedInt128 operator%(UnsignedInt128 that) const {
+    int j{high_ == 0 ? 64 + LeadingZeroBitCount(low_) : LeadingZeroBitCount(high_)};
+    UnsignedInt128 bits{*this};
+    bits <<= j;
+    UnsignedInt128 remainder{};
+    for (; j < 128; ++j) {
+      remainder <<= 1;
+      if (bits.high_ & topBit) {
+        remainder.low_ |= 1;
+      }
+      bits <<= 1;
+      if (remainder >= that) {
+        remainder -= that;
+      }
+    }
+    return remainder;
+  }
+
+  constexpr bool operator<(UnsignedInt128 that) const {
+    return high_ < that.high_ || (high_ == that.high_ && low_ < that.low_);
+  }
+  constexpr bool operator<=(UnsignedInt128 that) const {
+    return !(*this > that);
+  }
+  constexpr bool operator==(UnsignedInt128 that) const {
+    return low_ == that.low_ && high_ == that.high_;
+  }
+  constexpr bool operator!=(UnsignedInt128 that) const {
+    return !(*this == that);
+  }
+  constexpr bool operator>=(UnsignedInt128 that) const {
+    return that <= *this;
+  }
+  constexpr bool operator>(UnsignedInt128 that) const {
+    return that < *this;
+  }
+
+  constexpr UnsignedInt128 &operator&=(const UnsignedInt128 &that) {
+    *this = *this & that;
+    return *this;
+  }
+  constexpr UnsignedInt128 &operator|=(const UnsignedInt128 &that) {
+    *this = *this | that;
+    return *this;
+  }
+  constexpr UnsignedInt128 &operator^=(const UnsignedInt128 &that) {
+    *this = *this ^ that;
+    return *this;
+  }
+  constexpr UnsignedInt128 &operator<<=(const UnsignedInt128 &that) {
+    *this = *this << that;
+    return *this;
+  }
+  constexpr UnsignedInt128 &operator>>=(const UnsignedInt128 &that) {
+    *this = *this >> that;
+    return *this;
+  }
+  constexpr UnsignedInt128 &operator+=(const UnsignedInt128 &that) {
+    *this = *this + that;
+    return *this;
+  }
+  constexpr UnsignedInt128 &operator-=(const UnsignedInt128 &that) {
+    *this = *this - that;
+    return *this;
+  }
+  constexpr UnsignedInt128 &operator*=(const UnsignedInt128 &that) {
+    *this = *this * that;
+    return *this;
+  }
+  constexpr UnsignedInt128 &operator/=(const UnsignedInt128 &that) {
+    *this = *this / that;
+    return *this;
+  }
+  constexpr UnsignedInt128 &operator%=(const UnsignedInt128 &that) {
+    *this = *this % that;
+    return *this;
+  }
+
+private:
+  constexpr UnsignedInt128(std::uint64_t hi, std::uint64_t lo) : low_{lo}, high_{hi} {}
+  static constexpr std::uint64_t topBit{std::uint64_t{1} << 63};
+  std::uint64_t low_{0}, high_{0};
+};
+
+#if (defined __GNUC__ || defined __clang__) && defined __SIZEOF_INT128__ && !AVOID_NATIVE_UINT128
+using uint128_t = __uint128_t;
+#else
+using uint128_t = UnsignedInt128;
+#endif
+
+template<int BITS> struct HostUnsignedIntTypeHelper {
+  using type = std::conditional_t<(BITS <= 8), std::uint8_t,
+      std::conditional_t<(BITS <= 16), std::uint16_t,
+          std::conditional_t<(BITS <= 32), std::uint32_t,
+              std::conditional_t<(BITS <= 64), std::uint64_t, uint128_t>>>>;
+};
+template<int BITS>
+using HostUnsignedIntType = typename HostUnsignedIntTypeHelper<BITS>::type;
+
+}
+#endif
diff --git a/flang/lib/common/unsigned-const-division.h b/flang/lib/common/unsigned-const-division.h
index 31d288c..115d010 100644
--- a/flang/lib/common/unsigned-const-division.h
+++ b/flang/lib/common/unsigned-const-division.h
@@ -22,6 +22,7 @@
 
 #include "bit-population-count.h"
 #include "leading-zero-bit-count.h"
+#include "uint128.h"
 #include <cinttypes>
 #include <type_traits>
 
@@ -35,7 +36,7 @@ private:
   static_assert(std::is_unsigned_v<type>);
   static const int bits{static_cast<int>(8 * sizeof(type))};
   static_assert(bits <= 64);
-  using Big = std::conditional_t<(bits <= 32), std::uint64_t, __uint128_t>;
+  using Big = std::conditional_t<(bits <= 32), std::uint64_t, uint128_t>;
 
 public:
   static constexpr FixedPointReciprocal For(type n) {
@@ -50,7 +51,7 @@ public:
   }
 
   constexpr type Divide(type n) const {
-    return (static_cast<Big>(reciprocal_) * n) >> shift_;
+    return static_cast<type>((static_cast<Big>(reciprocal_) * n) >> shift_);
   }
 
 private:
@@ -65,12 +66,12 @@ static_assert(FixedPointReciprocal<std::uint32_t>::For(5).Divide(2000000000u) ==
 static_assert(FixedPointReciprocal<std::uint64_t>::For(10).Divide(
                   10000000000000000u) == 1000000000000000u);
 
-template<typename UINT, UINT DENOM>
+template<typename UINT, std::uint64_t DENOM>
 inline constexpr UINT DivideUnsignedBy(UINT n) {
-  if constexpr (!std::is_same_v<UINT, __uint128_t>) {
+  if constexpr (!std::is_same_v<UINT, uint128_t>) {
     return FixedPointReciprocal<UINT>::For(DENOM).Divide(n);
   } else {
-    return n / DENOM;
+    return n / static_cast<UINT>(DENOM);
   }
 }
 }
diff --git a/flang/lib/decimal/big-radix-floating-point.h b/flang/lib/decimal/big-radix-floating-point.h
index dde8fcd..45658b1 100644
--- a/flang/lib/decimal/big-radix-floating-point.h
+++ b/flang/lib/decimal/big-radix-floating-point.h
@@ -31,6 +31,7 @@
 #include "decimal.h"
 #include "../common/bit-population-count.h"
 #include "../common/leading-zero-bit-count.h"
+#include "../common/uint128.h"
 #include "../common/unsigned-const-division.h"
 #include <cinttypes>
 #include <limits>
@@ -54,7 +55,7 @@ private:
   static constexpr std::uint64_t uint64Radix{TenToThe(log10Radix)};
   static constexpr int minDigitBits{
       64 - common::LeadingZeroBitCount(uint64Radix)};
-  using Digit = HostUnsignedIntType<minDigitBits>;
+  using Digit = common::HostUnsignedIntType<minDigitBits>;
   static constexpr Digit radix{uint64Radix};
   static_assert(radix < std::numeric_limits<Digit>::max() / 1000,
       "radix is somehow too big");
@@ -146,11 +147,11 @@ private:
   // Returns any remainder.
   template<typename UINT> UINT SetTo(UINT n) {
     static_assert(
-        std::is_same_v<UINT, __uint128_t> || std::is_unsigned_v<UINT>);
+        std::is_same_v<UINT, common::uint128_t> || std::is_unsigned_v<UINT>);
     SetToZero();
     while (n != 0) {
       auto q{common::DivideUnsignedBy<UINT, 10>(n)};
-      if (n != 10 * q) {
+      if (n != q * 10) {
         break;
       }
       ++exponent_;
@@ -164,7 +165,7 @@ private:
     } else {
       while (n != 0 && digits_ < digitLimit_) {
         auto q{common::DivideUnsignedBy<UINT, radix>(n)};
-        digit_[digits_++] = n - radix * q;
+        digit_[digits_++] = static_cast<Digit>(n - q * radix);
         n = q;
       }
       return n;
diff --git a/flang/lib/decimal/binary-floating-point.h b/flang/lib/decimal/binary-floating-point.h
index b7ad8ef..4eb7a7b 100644
--- a/flang/lib/decimal/binary-floating-point.h
+++ b/flang/lib/decimal/binary-floating-point.h
@@ -18,6 +18,7 @@
 // Access and manipulate the fields of an IEEE-754 binary
 // floating-point value via a generalized template.
 
+#include "../common/uint128.h"
 #include <cinttypes>
 #include <climits>
 #include <cstring>
@@ -25,15 +26,6 @@
 
 namespace Fortran::decimal {
 
-template<int BITS> struct HostUnsignedIntTypeHelper {
-  using type = std::conditional_t<(BITS <= 8), std::uint8_t,
-      std::conditional_t<(BITS <= 16), std::uint16_t,
-          std::conditional_t<(BITS <= 32), std::uint32_t,
-              std::conditional_t<(BITS <= 64), std::uint64_t, __uint128_t>>>>;
-};
-template<int BITS>
-using HostUnsignedIntType = typename HostUnsignedIntTypeHelper<BITS>::type;
-
 static constexpr int BitsForPrecision(int prec) {
   switch (prec) {
   case 8: return 16;
@@ -52,7 +44,7 @@ static constexpr std::int64_t ScaledLogBaseTenOfTwo{301029995664};
 template<int PRECISION> struct BinaryFloatingPointNumber {
   static constexpr int precision{PRECISION};
   static constexpr int bits{BitsForPrecision(precision)};
-  using RawType = HostUnsignedIntType<bits>;
+  using RawType = common::HostUnsignedIntType<bits>;
   static_assert(CHAR_BIT * sizeof(RawType) >= bits);
   static constexpr bool implicitMSB{precision != 64 /*x87*/};
   static constexpr int significandBits{precision - implicitMSB};
@@ -107,7 +99,7 @@ template<int PRECISION> struct BinaryFloatingPointNumber {
     return BiasedExponent() == maxExponent - 1 &&
         Significand() == significandMask;
   }
-  constexpr bool IsNegative() const { return (raw >> (bits - 1)) & 1; }
+  constexpr bool IsNegative() const { return ((raw >> (bits - 1)) & 1) != 0; }
 
   constexpr void Negate() { raw ^= RawType{1} << (bits - 1); }
 
diff --git a/flang/lib/decimal/decimal-to-binary.cc b/flang/lib/decimal/decimal-to-binary.cc
index 5f25b23..98d9f93 100644
--- a/flang/lib/decimal/decimal-to-binary.cc
+++ b/flang/lib/decimal/decimal-to-binary.cc
@@ -151,7 +151,7 @@ bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
 template<int PREC> class IntermediateFloat {
 public:
   static constexpr int precision{PREC};
-  using IntType = HostUnsignedIntType<precision>;
+  using IntType = common::HostUnsignedIntType<precision>;
   static constexpr IntType topBit{IntType{1} << (precision - 1)};
   static constexpr IntType mask{topBit + (topBit - 1)};
 
@@ -227,7 +227,7 @@ ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary(
   // The value is nonzero; normalize it.
   while (fraction < topBit && expo > 1) {
     --expo;
-    fraction = 2 * fraction + (guard >> (guardBits - 2));
+    fraction = fraction * 2 + (guard >> (guardBits - 2));
     guard = (((guard >> (guardBits - 2)) & 1) << (guardBits - 1)) | (guard & 1);
   }
   // Apply rounding
-- 
2.7.4