Handle the remaining known issues (modulo snan) with the double/single parsing logic...
authorTanner Gooding <tagoo@outlook.com>
Mon, 26 Nov 2018 19:01:04 +0000 (11:01 -0800)
committerGitHub <noreply@github.com>
Mon, 26 Nov 2018 19:01:04 +0000 (11:01 -0800)
* Fixing the double/single parser to return negative zero for `-0`

* Updating the Utf8Parser to allow exponents of arbitrary length

* Updating the double/single parser to support case-insensitive and signed infinity/nan

* Ensure TryParseAsSpecialFloatingPoint handles the case where we have an empty source

* Fixing the number parser to allow `-0` for decimal and to not parse '++infini'

Commit migrated from https://github.com/dotnet/coreclr/commit/1ebc1a9deb24bdb82a0a28afb2f6e853bfa0bf8f

src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Decimal.cs
src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Float.cs
src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Number.cs
src/libraries/System.Private.CoreLib/src/System/Number.Parsing.cs

index fa8bdc7..5ed385a 100644 (file)
@@ -64,13 +64,8 @@ namespace System.Buffers.Text
                 return false;
             }
 
-            // More compat with .NET behavior - whether or not a 0 keeps the negative sign depends on whether it an "integer" 0 or a "fractional" 0
-            if (number.Digits[0] == 0 && number.Scale == 0)
-            {
-                number.IsNegative = false;
-            }
-
             value = default;
+
             if (!Number.TryNumberToDecimal(ref number, ref value))
             {
                 value = default;
index e5e9e24..f68945c 100644 (file)
@@ -2,6 +2,8 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using System.Buffers.Binary;
+
 namespace System.Buffers.Text
 {
     public static partial class Utf8Parser
@@ -112,32 +114,66 @@ namespace System.Buffers.Text
         // Assuming the text doesn't look like a normal floating point, we attempt to parse it as one the special floating point values.
         //
         private static bool TryParseAsSpecialFloatingPoint<T>(ReadOnlySpan<byte> source, T positiveInfinity, T negativeInfinity, T nan, out T value, out int bytesConsumed)
-        {
-            if (source.Length >= 8 &&
-                source[0] == 'I' && source[1] == 'n' && source[2] == 'f' && source[3] == 'i' &&
-                source[4] == 'n' && source[5] == 'i' && source[6] == 't' && source[7] == 'y')
-            {
-                value = positiveInfinity;
-                bytesConsumed = 8;
-                return true;
-            }
+        {            
+            int srcIndex = 0;
+            int remaining = source.Length;
+            bool isNegative = false;
 
-            if (source.Length >= 9 &&
-                source[0] == Utf8Constants.Minus &&
-                source[1] == 'I' && source[2] == 'n' && source[3] == 'f' && source[4] == 'i' &&
-                source[5] == 'n' && source[6] == 'i' && source[7] == 't' && source[8] == 'y')
+            // We need at least 4 characters to process a sign
+            if (remaining >= 4)
             {
-                value = negativeInfinity;
-                bytesConsumed = 9;
-                return true;
+                byte c = source[srcIndex];
+
+                switch (c)
+                {
+                    case Utf8Constants.Minus:
+                    {
+                        isNegative = true;
+                        goto case Utf8Constants.Plus;
+                    }
+
+                    case Utf8Constants.Plus:
+                    {
+                        srcIndex++;
+                        remaining--;
+                        break;
+                    }
+                }
             }
 
-            if (source.Length >= 3 &&
-                source[0] == 'N' && source[1] == 'a' && source[2] == 'N')
+            // We can efficiently do an ASCII IsLower check by xor'ing with the expected
+            // result and validating that it returns either 0 or exactly 0x20 (which is the
+            // delta between lowercase and uppercase ASCII characters).
+
+            if (remaining >= 3)
             {
-                value = nan;
-                bytesConsumed = 3;
-                return true;
+                if ((((source[srcIndex] ^ (byte)('n')) & ~0x20) == 0) &&
+                    (((source[srcIndex + 1] ^ (byte)('a')) & ~0x20) == 0) &&
+                    (((source[srcIndex + 2] ^ (byte)('n')) & ~0x20) == 0))
+                {
+                    value = nan;
+                    bytesConsumed = 3 + srcIndex;
+                    return true;
+                }
+
+                if (remaining >= 8)
+                {
+                    const int infi = 0x69666E69;
+                    int diff = (BinaryPrimitives.ReadInt32LittleEndian(source.Slice(srcIndex)) ^ infi);
+
+                    if ((diff & ~0x20202020) == 0)
+                    {
+                        const int nity = 0x7974696E;
+                        diff = (BinaryPrimitives.ReadInt32LittleEndian(source.Slice(srcIndex + 4)) ^ nity);
+
+                        if ((diff & ~0x20202020) == 0)
+                        {
+                            value = isNegative ? negativeInfinity : positiveInfinity;
+                            bytesConsumed = 8 + srcIndex;
+                            return true;
+                        }
+                    }
+                }
             }
 
             value = default;
index f39d99f..daa1da6 100644 (file)
@@ -72,7 +72,6 @@ namespace System.Buffers.Text
 
             if (srcIndex == source.Length)
             {
-                number.IsNegative = false;
                 bytesConsumed = srcIndex;
                 number.CheckConsistency();
                 return true;
@@ -206,11 +205,6 @@ namespace System.Buffers.Text
 
             if ((c & ~0x20u) != 'E')
             {
-                if ((digits[0] == 0) && (numDigitsAfterDecimal == 0))
-                {
-                    number.IsNegative = false;
-                }
-
                 digits[dstIndex] = 0;
                 number.DigitsCount = dstIndex;
                 bytesConsumed = srcIndex;
@@ -258,12 +252,37 @@ namespace System.Buffers.Text
                     break;
             }
 
-            if (!Utf8Parser.TryParseUInt32D(source.Slice(srcIndex), out uint absoluteExponent, out int bytesConsumedByExponent))
+            // If the next character isn't a digit, an exponent wasn't specified
+            if ((byte)(c - (byte)('0')) > 9)
             {
                 bytesConsumed = 0;
                 return false;
             }
 
+            if (!TryParseUInt32D(source.Slice(srcIndex), out uint absoluteExponent, out int bytesConsumedByExponent))
+            {
+                // Since we found at least one digit, we know that any failure to parse means we had an
+                // exponent that was larger than uint.MaxValue, and we can just eat characters until the end
+                absoluteExponent = uint.MaxValue;
+
+                // This also means that we know there was at least 10 characters and we can "eat" those, and
+                // continue eating digits from there
+                srcIndex += 10;
+
+                while (srcIndex != source.Length)
+                {
+                    c = source[srcIndex];
+                    int value = (byte)(c - (byte)('0'));
+
+                    if (value > 9)
+                    {
+                        break;
+                    }
+
+                    srcIndex++;
+                }
+            }
+
             srcIndex += bytesConsumedByExponent;
 
             if (exponentIsNegative)
index d0c42cc..fe83189 100644 (file)
@@ -463,7 +463,7 @@ namespace System
                         {
                             number.Scale = 0;
                         }
-                        if ((state & StateDecimal) == 0)
+                        if ((number.Kind == NumberBufferKind.Integer) && (state & StateDecimal) == 0)
                         {
                             number.IsNegative = false;
                         }
@@ -1693,6 +1693,10 @@ namespace System
             {
                 ReadOnlySpan<char> valueTrim = value.Trim();
 
+                // This code would be simpler if we only had the concept of `InfinitySymbol`, but
+                // we don't so we'll check the existing cases first and then handle `PositiveSign` +
+                // `PositiveInfinitySymbol` and `PositiveSign/NegativeSign` + `NaNSymbol` last.
+
                 if (valueTrim.EqualsOrdinalIgnoreCase(info.PositiveInfinitySymbol))
                 {
                     result = double.PositiveInfinity;
@@ -1705,6 +1709,29 @@ namespace System
                 {
                     result = double.NaN;
                 }
+                else if (valueTrim.StartsWith(info.PositiveSign, StringComparison.OrdinalIgnoreCase))
+                {
+                    valueTrim = valueTrim.Slice(info.PositiveSign.Length);
+
+                    if (valueTrim.EqualsOrdinalIgnoreCase(info.PositiveInfinitySymbol))
+                    {
+                        result = double.PositiveInfinity;
+                    }
+                    else if (valueTrim.EqualsOrdinalIgnoreCase(info.NaNSymbol))
+                    {
+                        result = double.NaN;
+                    }
+                    else
+                    {
+                        result = 0;
+                        return false;
+                    }
+                }
+                else if (valueTrim.StartsWith(info.NegativeSign, StringComparison.OrdinalIgnoreCase) &&
+                        valueTrim.Slice(info.NegativeSign.Length).EqualsOrdinalIgnoreCase(info.NaNSymbol))
+                {
+                    result = double.NaN;
+                }
                 else
                 {
                     result = 0;
@@ -1728,6 +1755,14 @@ namespace System
             {
                 ReadOnlySpan<char> valueTrim = value.Trim();
 
+                // This code would be simpler if we only had the concept of `InfinitySymbol`, but
+                // we don't so we'll check the existing cases first and then handle `PositiveSign` +
+                // `PositiveInfinitySymbol` and `PositiveSign/NegativeSign` + `NaNSymbol` last.
+                //
+                // Additionally, since some cultures ("wo") actually define `PositiveInfinitySymbol`
+                // to include `PositiveSign`, we need to check whether `PositiveInfinitySymbol` fits
+                // that case so that we don't start parsing things like `++infini`.
+
                 if (valueTrim.EqualsOrdinalIgnoreCase(info.PositiveInfinitySymbol))
                 {
                     result = float.PositiveInfinity;
@@ -1740,6 +1775,30 @@ namespace System
                 {
                     result = float.NaN;
                 }
+                else if (valueTrim.StartsWith(info.PositiveSign, StringComparison.OrdinalIgnoreCase))
+                {
+                    valueTrim = valueTrim.Slice(info.PositiveSign.Length);
+
+                    if (!info.PositiveInfinitySymbol.StartsWith(info.PositiveSign, StringComparison.OrdinalIgnoreCase) && valueTrim.EqualsOrdinalIgnoreCase(info.PositiveInfinitySymbol))
+                    {
+                        result = float.PositiveInfinity;
+                    }
+                    else if (!info.NaNSymbol.StartsWith(info.PositiveSign, StringComparison.OrdinalIgnoreCase) && valueTrim.EqualsOrdinalIgnoreCase(info.NaNSymbol))
+                    {
+                        result = float.NaN;
+                    }
+                    else
+                    {
+                        result = 0;
+                        return false;
+                    }
+                }
+                else if (valueTrim.StartsWith(info.NegativeSign, StringComparison.OrdinalIgnoreCase) &&
+                         !info.NaNSymbol.StartsWith(info.NegativeSign, StringComparison.OrdinalIgnoreCase) &&
+                         valueTrim.Slice(info.NegativeSign.Length).EqualsOrdinalIgnoreCase(info.NaNSymbol))
+                {
+                    result = float.NaN;
+                }
                 else
                 {
                     result = 0;