[release/3.0] Updating Math.Round and MathF.Round to be IEEE compliant so that the...

author Tanner Gooding <tagoo@outlook.com>

Wed, 7 Aug 2019 16:15:58 +0000 (09:15 -0700)

committer William Godbe <wigodbe@microsoft.com>

Wed, 7 Aug 2019 16:15:58 +0000 (09:15 -0700)
author Tanner Gooding <tagoo@outlook.com>
Wed, 7 Aug 2019 16:15:58 +0000 (09:15 -0700)
committer William Godbe <wigodbe@microsoft.com>
Wed, 7 Aug 2019 16:15:58 +0000 (09:15 -0700)
diff --git a/THIRD-PARTY-NOTICES.TXT b/THIRD-PARTY-NOTICES.TXT

index 831105f..a5c4a66 100644 (file)
--- a/THIRD-PARTY-NOTICES.TXT
+++ b/THIRD-PARTY-NOTICES.TXT
@@ -281,3 +281,45 @@ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+License notice for Berkeley SoftFloat Release 3e
+------------------------------------------------
+
+https://github.com/ucb-bar/berkeley-softfloat-3
+https://github.com/ucb-bar/berkeley-softfloat-3/blob/master/COPYING.txt
+
+License for Berkeley SoftFloat Release 3e
+
+John R. Hauser
+2018 January 20
+
+The following applies to the whole of SoftFloat Release 3e as well as to
+each source file individually.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions, and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors
+    may be used to endorse or promote products derived from this software
+    without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/src/System.Private.CoreLib/shared/System/Double.cs b/src/System.Private.CoreLib/shared/System/Double.cs

index 806add0..1a15c8e 100644 (file)
--- a/src/System.Private.CoreLib/shared/System/Double.cs
+++ b/src/System.Private.CoreLib/shared/System/Double.cs
@@ -12,6 +12,7 @@
  **
  ===========================================================*/
  
+using System.Diagnostics;
  using System.Globalization;
  using System.Runtime.CompilerServices;
  using System.Runtime.InteropServices;
@@ -44,6 +45,29 @@ namespace System
          // We use this explicit definition to avoid the confusion between 0.0 and -0.0.
          internal const double NegativeZero = -0.0;
  
+        //
+        // Constants for manipulating the private bit-representation
+        //
+
+        internal const ulong SignMask = 0x8000_0000_0000_0000;
+        internal const int SignShift = 63;
+        internal const int ShiftedSignMask = (int)(SignMask >> SignShift);
+
+        internal const ulong ExponentMask = 0x7FF0_0000_0000_0000;
+        internal const int ExponentShift = 52;
+        internal const int ShiftedExponentMask = (int)(ExponentMask >> ExponentShift);
+
+        internal const ulong SignificandMask = 0x000F_FFFF_FFFF_FFFF;
+
+        internal const byte MinSign = 0;
+        internal const byte MaxSign = 1;
+
+        internal const ushort MinExponent = 0x0000;
+        internal const ushort MaxExponent = 0x07FF;
+
+        internal const ulong MinSignificand = 0x0000_0000_0000_0000;
+        internal const ulong MaxSignificand = 0x000F_FFFF_FFFF_FFFF;
+
          /// <summary>Determines whether the specified value is finite (zero, subnormal, or normal).</summary>
          [NonVersionable]
          [MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -115,6 +139,16 @@ namespace System
              return (bits < 0x7FF0000000000000) && (bits != 0) && ((bits & 0x7FF0000000000000) == 0);
          }
  
+        internal static int ExtractExponentFromBits(ulong bits)
+        {
+            return (int)(bits >> ExponentShift) & ShiftedExponentMask;
+        }
+
+        internal static ulong ExtractSignificandFromBits(ulong bits)
+        {
+            return bits & SignificandMask;
+        }
+
          // Compares this object to another object, returning an instance of System.Relation.
          // Null is considered less than any instance.
          //
diff --git a/src/System.Private.CoreLib/shared/System/Math.cs b/src/System.Private.CoreLib/shared/System/Math.cs

index 014589c..8e6779b 100644 (file)
--- a/src/System.Private.CoreLib/shared/System/Math.cs
+++ b/src/System.Private.CoreLib/shared/System/Math.cs
@@ -2,6 +2,10 @@
  // The .NET Foundation licenses this file to you under the MIT license.
  // See the LICENSE file in the project root for more information.
  
+// ===================================================================================================
+// Portions of the code implemented below are based on the 'Berkeley SoftFloat Release 3e' algorithms.
+// ===================================================================================================
+
  /*============================================================
  **
  **
@@ -15,7 +19,6 @@
  
  using System.Diagnostics;
  using System.Diagnostics.CodeAnalysis;
-using System.Runtime;
  using System.Runtime.CompilerServices;
  using System.Runtime.Versioning;
  
@@ -802,29 +805,70 @@ namespace System
          public static double Round(double a)
          {
              // ************************************************************************************
-            // IMPORTANT: Do not change this implementation without also updating Math.Round(double),
+            // IMPORTANT: Do not change this implementation without also updating MathF.Round(float),
              //            FloatingPointUtils::round(double), and FloatingPointUtils::round(float)
              // ************************************************************************************
  
-            // If the number has no fractional part do nothing
-            // This shortcut is necessary to workaround precision loss in borderline cases on some platforms
+            // This is based on the 'Berkeley SoftFloat Release 3e' algorithm
+            // This only includes the roundToNearestTiesToEven code paths
+
+            ulong bits = (ulong)BitConverter.DoubleToInt64Bits(a);
+            int exponent = double.ExtractExponentFromBits(bits);
+
+            if (exponent <= 0x03FE)
+            {
+                if ((bits << 1) == 0)
+                {
+                    // Exactly +/- zero should return the original value
+                    return a;
+                }
+
+                // Any value less than or equal to 0.5 will always round to exactly zero
+                // and any value greater than 0.5 will always round to exactly one. However,
+                // we need to preserve the original sign for IEEE compliance.
+
+                double result = ((exponent == 0x03FE) && (double.ExtractSignificandFromBits(bits) != 0)) ? 1.0 : 0.0;
+                return CopySign(result, a);
+            }
  
-            if (a == (double)((long)a))
+            if (exponent >= 0x0433)
              {
+                // Any value greater than or equal to 2^52 cannot have a fractional part,
+                // So it will always round to exactly itself.
+
                  return a;
              }
  
-            // We had a number that was equally close to 2 integers.
-            // We need to return the even one.
+            // The absolute value should be greater than or equal to 1.0 and less than 2^52
+            Debug.Assert((0x03FF <= exponent) && (exponent <= 0x0432));
+
+            // Determine the last bit that represents the integral portion of the value
+            // and the bits representing the fractional portion
+
+            ulong lastBitMask = 1UL << (0x0433 - exponent);
+            ulong roundBitsMask = lastBitMask - 1;
  
-            double flrTempVal = Floor(a + 0.5);
+            // Increment the first fractional bit, which represents the midpoint between
+            // two integral values in the current window.
  
-            if ((a == (Floor(a) + 0.5)) && (FMod(flrTempVal, 2.0) != 0))
+            bits += lastBitMask >> 1;
+
+            if ((bits & roundBitsMask) == 0)
              {
-                flrTempVal -= 1.0;
+                // If that overflowed and the rest of the fractional bits are zero
+                // then we were exactly x.5 and we want to round to the even result
+
+                bits &= ~lastBitMask;
+            }
+            else
+            {
+                // Otherwise, we just want to strip the fractional bits off, truncating
+                // to the current integer value.
+
+                bits &= ~roundBitsMask;
              }
  
-            return CopySign(flrTempVal, a);
+            return BitConverter.Int64BitsToDouble((long)bits);
          }
  
          [MethodImpl(MethodImplOptions.AggressiveInlining)]
diff --git a/src/System.Private.CoreLib/shared/System/MathF.cs b/src/System.Private.CoreLib/shared/System/MathF.cs

index da710a1..48d032d 100644 (file)
--- a/src/System.Private.CoreLib/shared/System/MathF.cs
+++ b/src/System.Private.CoreLib/shared/System/MathF.cs
@@ -2,6 +2,10 @@
  // The .NET Foundation licenses this file to you under the MIT license.
  // See the LICENSE file in the project root for more information.
  
+// ===================================================================================================
+// Portions of the code implemented below are based on the 'Berkeley SoftFloat Release 3e' algorithms.
+// ===================================================================================================
+
  /*============================================================
  **
  ** Purpose: Some single-precision floating-point math operations
@@ -10,6 +14,7 @@
  
  //This class contains only static members and doesn't require serialization.
  
+using System.Diagnostics;
  using System.Runtime.CompilerServices;
  
  namespace System
@@ -245,29 +250,70 @@ namespace System
          public static float Round(float x)
          {
              // ************************************************************************************
-            // IMPORTANT: Do not change this implementation without also updating Math.Round(double),
+            // IMPORTANT: Do not change this implementation without also updating MathF.Round(float),
              //            FloatingPointUtils::round(double), and FloatingPointUtils::round(float)
              // ************************************************************************************
-    
-            // If the number has no fractional part do nothing
-            // This shortcut is necessary to workaround precision loss in borderline cases on some platforms
  
-            if (x == (float)((int)x))
+            // This is based on the 'Berkeley SoftFloat Release 3e' algorithm
+            // This only includes the roundToNearestTiesToEven code paths
+
+            uint bits = (uint)BitConverter.SingleToInt32Bits(x);
+            int exponent = float.ExtractExponentFromBits(bits);
+
+            if (exponent <= 0x7E)
+            {
+                if ((bits << 1) == 0)
+                {
+                    // Exactly +/- zero should return the original value
+                    return x;
+                }
+
+                // Any value less than or equal to 0.5 will always round to exactly zero
+                // and any value greater than 0.5 will always round to exactly one. However,
+                // we need to preserve the original sign for IEEE compliance.
+
+                float result = ((exponent == 0x7E) && (float.ExtractSignificandFromBits(bits) != 0)) ? 1.0f : 0.0f;
+                return CopySign(result, x);
+            }
+
+            if (exponent >= 0x96)
              {
+                // Any value greater than or equal to 2^23 cannot have a fractional part,
+                // So it will always round to exactly itself.
+
                  return x;
              }
  
-            // We had a number that was equally close to 2 integers.
-            // We need to return the even one.
+            // The absolute value should be greater than or equal to 1.0 and less than 2^23
+            Debug.Assert((0x7F <= exponent) && (exponent <= 0x95));
+
+            // Determine the last bit that represents the integral portion of the value
+            // and the bits representing the fractional portion
+
+            uint lastBitMask = 1U << (0x96 - exponent);
+            uint roundBitsMask = lastBitMask - 1;
  
-            float flrTempVal = Floor(x + 0.5f);
+            // Increment the first fractional bit, which represents the midpoint between
+            // two integral values in the current window.
  
-            if ((x == (Floor(x) + 0.5f)) && (FMod(flrTempVal, 2.0f) != 0))
+            bits += lastBitMask >> 1;
+
+            if ((bits & roundBitsMask) == 0)
              {
-                flrTempVal -= 1.0f;
+                // If that overflowed and the rest of the fractional bits are zero
+                // then we were exactly x.5 and we want to round to the even result
+
+                bits &= ~lastBitMask;
+            }
+            else
+            {
+                // Otherwise, we just want to strip the fractional bits off, truncating
+                // to the current integer value.
+
+                bits &= ~roundBitsMask;
              }
  
-            return CopySign(flrTempVal, x);
+            return BitConverter.Int32BitsToSingle((int)bits);
          }
  
          [MethodImpl(MethodImplOptions.AggressiveInlining)]
diff --git a/src/System.Private.CoreLib/shared/System/Single.cs b/src/System.Private.CoreLib/shared/System/Single.cs

index 590091b..d31518c 100644 (file)
--- a/src/System.Private.CoreLib/shared/System/Single.cs
+++ b/src/System.Private.CoreLib/shared/System/Single.cs
@@ -40,6 +40,29 @@ namespace System
          // We use this explicit definition to avoid the confusion between 0.0 and -0.0.
          internal const float NegativeZero = (float)-0.0;
  
+        //
+        // Constants for manipulating the private bit-representation
+        //
+
+        internal const uint SignMask = 0x8000_0000;
+        internal const int SignShift = 31;
+        internal const int ShiftedSignMask = (int)(SignMask >> SignShift);
+
+        internal const uint ExponentMask = 0x7F80_0000;
+        internal const int ExponentShift = 23;
+        internal const int ShiftedExponentMask = (int)(ExponentMask >> ExponentShift);
+
+        internal const uint SignificandMask = 0x007F_FFFF;
+
+        internal const byte MinSign = 0;
+        internal const byte MaxSign = 1;
+
+        internal const byte MinExponent = 0x00;
+        internal const byte MaxExponent = 0xFF;
+
+        internal const uint MinSignificand = 0x0000_0000;
+        internal const uint MaxSignificand = 0x007F_FFFF;
+
          /// <summary>Determines whether the specified value is finite (zero, subnormal, or normal).</summary>
          [NonVersionable]
          [MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -111,6 +134,16 @@ namespace System
              return (bits < 0x7F800000) && (bits != 0) && ((bits & 0x7F800000) == 0);
          }
  
+        internal static int ExtractExponentFromBits(uint bits)
+        {
+            return (int)(bits >> ExponentShift) & ShiftedExponentMask;
+        }
+
+        internal static uint ExtractSignificandFromBits(uint bits)
+        {
+            return bits & SignificandMask;
+        }
+
          // Compares this object to another object, returning an integer that
          // indicates the relationship.
          // Returns a value less than zero if this  object
diff --git a/src/jit/utils.cpp b/src/jit/utils.cpp

index 60f5cee..de56d18 100644 (file)
--- a/src/jit/utils.cpp
+++ b/src/jit/utils.cpp
@@ -2,6 +2,10 @@
  // The .NET Foundation licenses this file to you under the MIT license.
  // See the LICENSE file in the project root for more information.
  
+// ===================================================================================================
+// Portions of the code implemented below are based on the 'Berkeley SoftFloat Release 3e' algorithms.
+// ===================================================================================================
+
  /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
  XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
  XX                                                                           XX
@@ -1891,25 +1895,66 @@ double FloatingPointUtils::round(double x)
      //            MathF.Round(float), and FloatingPointUtils::round(float)
      // ************************************************************************************
  
-    // If the number has no fractional part do nothing
-    // This shortcut is necessary to workaround precision loss in borderline cases on some platforms
+    // This is based on the 'Berkeley SoftFloat Release 3e' algorithm
+    // This only includes the roundToNearestTiesToEven code paths
+
+    uint64_t bits     = *reinterpret_cast<uint64_t*>(&x);
+    int32_t  exponent = (int32_t)(bits >> 52) & 0x07FF;
+
+    if (exponent <= 0x03FE)
+    {
+        if ((bits << 1) == 0)
+        {
+            // Exactly +/- zero should return the original value
+            return x;
+        }
+
+        // Any value less than or equal to 0.5 will always round to exactly zero
+        // and any value greater than 0.5 will always round to exactly one. However,
+        // we need to preserve the original sign for IEEE compliance.
+
+        double result = ((exponent == 0x03FE) && ((bits & UI64(0x000FFFFFFFFFFFFF)) != 0)) ? 1.0 : 0.0;
+        return _copysign(result, x);
+    }
  
-    if (x == (double)((INT64)x))
+    if (exponent >= 0x0433)
      {
+        // Any value greater than or equal to 2^52 cannot have a fractional part,
+        // So it will always round to exactly itself.
+
          return x;
      }
  
-    // We had a number that was equally close to 2 integers.
-    // We need to return the even one.
+    // The absolute value should be greater than or equal to 1.0 and less than 2^52
+    assert((0x03FF <= exponent) && (exponent <= 0x0432));
+
+    // Determine the last bit that represents the integral portion of the value
+    // and the bits representing the fractional portion
+
+    uint64_t lastBitMask   = UI64(1) << (0x0433 - exponent);
+    uint64_t roundBitsMask = lastBitMask - 1;
  
-    double flrTempVal = floor(x + 0.5);
+    // Increment the first fractional bit, which represents the midpoint between
+    // two integral values in the current window.
  
-    if ((x == (floor(x) + 0.5)) && (fmod(flrTempVal, 2.0) != 0))
+    bits += lastBitMask >> 1;
+
+    if ((bits & roundBitsMask) == 0)
      {
-        flrTempVal -= 1.0;
+        // If that overflowed and the rest of the fractional bits are zero
+        // then we were exactly x.5 and we want to round to the even result
+
+        bits &= ~lastBitMask;
+    }
+    else
+    {
+        // Otherwise, we just want to strip the fractional bits off, truncating
+        // to the current integer value.
+
+        bits &= ~roundBitsMask;
      }
  
-    return _copysign(flrTempVal, x);
+    return *reinterpret_cast<double*>(&bits);
  }
  
  // Windows x86 and Windows ARM/ARM64 may not define _copysignf() but they do define _copysign().
@@ -1933,25 +1978,65 @@ float FloatingPointUtils::round(float x)
      //            Math.Round(double), and FloatingPointUtils::round(double)
      // ************************************************************************************
  
-    // If the number has no fractional part do nothing
-    // This shortcut is necessary to workaround precision loss in borderline cases on some platforms
+    // This is based on the 'Berkeley SoftFloat Release 3e' algorithm
  
-    if (x == (float)((INT32)x))
+    uint32_t bits     = *reinterpret_cast<uint32_t*>(&x);
+    int32_t  exponent = (int32_t)(bits >> 23) & 0xFF;
+
+    if (exponent <= 0x7E)
      {
+        if ((bits << 1) == 0)
+        {
+            // Exactly +/- zero should return the original value
+            return x;
+        }
+
+        // Any value less than or equal to 0.5 will always round to exactly zero
+        // and any value greater than 0.5 will always round to exactly one. However,
+        // we need to preserve the original sign for IEEE compliance.
+
+        float result = ((exponent == 0x7E) && ((bits & 0x007FFFFF) != 0)) ? 1.0f : 0.0f;
+        return _copysignf(result, x);
+    }
+
+    if (exponent >= 0x96)
+    {
+        // Any value greater than or equal to 2^52 cannot have a fractional part,
+        // So it will always round to exactly itself.
+
          return x;
      }
  
-    // We had a number that was equally close to 2 integers.
-    // We need to return the even one.
+    // The absolute value should be greater than or equal to 1.0 and less than 2^52
+    assert((0x7F <= exponent) && (exponent <= 0x95));
  
-    float flrTempVal = floorf(x + 0.5f);
+    // Determine the last bit that represents the integral portion of the value
+    // and the bits representing the fractional portion
  
-    if ((x == (floorf(x) + 0.5f)) && (fmodf(flrTempVal, 2.0f) != 0))
+    uint32_t lastBitMask   = 1U << (0x96 - exponent);
+    uint32_t roundBitsMask = lastBitMask - 1;
+
+    // Increment the first fractional bit, which represents the midpoint between
+    // two integral values in the current window.
+
+    bits += lastBitMask >> 1;
+
+    if ((bits & roundBitsMask) == 0)
+    {
+        // If that overflowed and the rest of the fractional bits are zero
+        // then we were exactly x.5 and we want to round to the even result
+
+        bits &= ~lastBitMask;
+    }
+    else
      {
-        flrTempVal -= 1.0f;
+        // Otherwise, we just want to strip the fractional bits off, truncating
+        // to the current integer value.
+
+        bits &= ~roundBitsMask;
      }
  
-    return _copysignf(flrTempVal, x);
+    return *reinterpret_cast<float*>(&bits);
  }
  
  namespace MagicDivide
author	Tanner Gooding <tagoo@outlook.com>
	Wed, 7 Aug 2019 16:15:58 +0000 (09:15 -0700)
committer	William Godbe <wigodbe@microsoft.com>
	Wed, 7 Aug 2019 16:15:58 +0000 (09:15 -0700)
THIRD-PARTY-NOTICES.TXT		patch \| blob \| history
src/System.Private.CoreLib/shared/System/Double.cs		patch \| blob \| history
src/System.Private.CoreLib/shared/System/Math.cs		patch \| blob \| history
src/System.Private.CoreLib/shared/System/MathF.cs		patch \| blob \| history
src/System.Private.CoreLib/shared/System/Single.cs		patch \| blob \| history
src/jit/utils.cpp		patch \| blob \| history