Handle the remaining known issues (modulo snan) with the double/single parsing logic...

author Tanner Gooding <tagoo@outlook.com>

Mon, 26 Nov 2018 19:01:04 +0000 (11:01 -0800)

committer GitHub <noreply@github.com>

Mon, 26 Nov 2018 19:01:04 +0000 (11:01 -0800)
author Tanner Gooding <tagoo@outlook.com>
Mon, 26 Nov 2018 19:01:04 +0000 (11:01 -0800)
committer GitHub <noreply@github.com>
Mon, 26 Nov 2018 19:01:04 +0000 (11:01 -0800)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Decimal.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Decimal.cs

index fa8bdc7..5ed385a 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Decimal.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Decimal.cs
@@ -64,13 +64,8 @@ namespace System.Buffers.Text
                  return false;
              }
  
-            // More compat with .NET behavior - whether or not a 0 keeps the negative sign depends on whether it an "integer" 0 or a "fractional" 0
-            if (number.Digits[0] == 0 && number.Scale == 0)
-            {
-                number.IsNegative = false;
-            }
-
              value = default;
+
              if (!Number.TryNumberToDecimal(ref number, ref value))
              {
                  value = default;
diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Float.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Float.cs

index e5e9e24..f68945c 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Float.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Float.cs
@@ -2,6 +2,8 @@
  // The .NET Foundation licenses this file to you under the MIT license.
  // See the LICENSE file in the project root for more information.
  
+using System.Buffers.Binary;
+
  namespace System.Buffers.Text
  {
      public static partial class Utf8Parser
@@ -112,32 +114,66 @@ namespace System.Buffers.Text
          // Assuming the text doesn't look like a normal floating point, we attempt to parse it as one the special floating point values.
          //
          private static bool TryParseAsSpecialFloatingPoint<T>(ReadOnlySpan<byte> source, T positiveInfinity, T negativeInfinity, T nan, out T value, out int bytesConsumed)
-        {
-            if (source.Length >= 8 &&
-                source[0] == 'I' && source[1] == 'n' && source[2] == 'f' && source[3] == 'i' &&
-                source[4] == 'n' && source[5] == 'i' && source[6] == 't' && source[7] == 'y')
-            {
-                value = positiveInfinity;
-                bytesConsumed = 8;
-                return true;
-            }
+        {            
+            int srcIndex = 0;
+            int remaining = source.Length;
+            bool isNegative = false;
  
-            if (source.Length >= 9 &&
-                source[0] == Utf8Constants.Minus &&
-                source[1] == 'I' && source[2] == 'n' && source[3] == 'f' && source[4] == 'i' &&
-                source[5] == 'n' && source[6] == 'i' && source[7] == 't' && source[8] == 'y')
+            // We need at least 4 characters to process a sign
+            if (remaining >= 4)
              {
-                value = negativeInfinity;
-                bytesConsumed = 9;
-                return true;
+                byte c = source[srcIndex];
+
+                switch (c)
+                {
+                    case Utf8Constants.Minus:
+                    {
+                        isNegative = true;
+                        goto case Utf8Constants.Plus;
+                    }
+
+                    case Utf8Constants.Plus:
+                    {
+                        srcIndex++;
+                        remaining--;
+                        break;
+                    }
+                }
              }
  
-            if (source.Length >= 3 &&
-                source[0] == 'N' && source[1] == 'a' && source[2] == 'N')
+            // We can efficiently do an ASCII IsLower check by xor'ing with the expected
+            // result and validating that it returns either 0 or exactly 0x20 (which is the
+            // delta between lowercase and uppercase ASCII characters).
+
+            if (remaining >= 3)
              {
-                value = nan;
-                bytesConsumed = 3;
-                return true;
+                if ((((source[srcIndex] ^ (byte)('n')) & ~0x20) == 0) &&
+                    (((source[srcIndex + 1] ^ (byte)('a')) & ~0x20) == 0) &&
+                    (((source[srcIndex + 2] ^ (byte)('n')) & ~0x20) == 0))
+                {
+                    value = nan;
+                    bytesConsumed = 3 + srcIndex;
+                    return true;
+                }
+
+                if (remaining >= 8)
+                {
+                    const int infi = 0x69666E69;
+                    int diff = (BinaryPrimitives.ReadInt32LittleEndian(source.Slice(srcIndex)) ^ infi);
+
+                    if ((diff & ~0x20202020) == 0)
+                    {
+                        const int nity = 0x7974696E;
+                        diff = (BinaryPrimitives.ReadInt32LittleEndian(source.Slice(srcIndex + 4)) ^ nity);
+
+                        if ((diff & ~0x20202020) == 0)
+                        {
+                            value = isNegative ? negativeInfinity : positiveInfinity;
+                            bytesConsumed = 8 + srcIndex;
+                            return true;
+                        }
+                    }
+                }
              }
  
              value = default;
diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Number.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Number.cs

index f39d99f..daa1da6 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Number.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Number.cs
@@ -72,7 +72,6 @@ namespace System.Buffers.Text
  
              if (srcIndex == source.Length)
              {
-                number.IsNegative = false;
                  bytesConsumed = srcIndex;
                  number.CheckConsistency();
                  return true;
@@ -206,11 +205,6 @@ namespace System.Buffers.Text
  
              if ((c & ~0x20u) != 'E')
              {
-                if ((digits[0] == 0) && (numDigitsAfterDecimal == 0))
-                {
-                    number.IsNegative = false;
-                }
-
                  digits[dstIndex] = 0;
                  number.DigitsCount = dstIndex;
                  bytesConsumed = srcIndex;
@@ -258,12 +252,37 @@ namespace System.Buffers.Text
                      break;
              }
  
-            if (!Utf8Parser.TryParseUInt32D(source.Slice(srcIndex), out uint absoluteExponent, out int bytesConsumedByExponent))
+            // If the next character isn't a digit, an exponent wasn't specified
+            if ((byte)(c - (byte)('0')) > 9)
              {
                  bytesConsumed = 0;
                  return false;
              }
  
+            if (!TryParseUInt32D(source.Slice(srcIndex), out uint absoluteExponent, out int bytesConsumedByExponent))
+            {
+                // Since we found at least one digit, we know that any failure to parse means we had an
+                // exponent that was larger than uint.MaxValue, and we can just eat characters until the end
+                absoluteExponent = uint.MaxValue;
+
+                // This also means that we know there was at least 10 characters and we can "eat" those, and
+                // continue eating digits from there
+                srcIndex += 10;
+
+                while (srcIndex != source.Length)
+                {
+                    c = source[srcIndex];
+                    int value = (byte)(c - (byte)('0'));
+
+                    if (value > 9)
+                    {
+                        break;
+                    }
+
+                    srcIndex++;
+                }
+            }
+
              srcIndex += bytesConsumedByExponent;
  
              if (exponentIsNegative)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Number.Parsing.cs b/src/libraries/System.Private.CoreLib/src/System/Number.Parsing.cs

index d0c42cc..fe83189 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Number.Parsing.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Number.Parsing.cs
@@ -463,7 +463,7 @@ namespace System
                          {
                              number.Scale = 0;
                          }
-                        if ((state & StateDecimal) == 0)
+                        if ((number.Kind == NumberBufferKind.Integer) && (state & StateDecimal) == 0)
                          {
                              number.IsNegative = false;
                          }
@@ -1693,6 +1693,10 @@ namespace System
              {
                  ReadOnlySpan<char> valueTrim = value.Trim();
  
+                // This code would be simpler if we only had the concept of `InfinitySymbol`, but
+                // we don't so we'll check the existing cases first and then handle `PositiveSign` +
+                // `PositiveInfinitySymbol` and `PositiveSign/NegativeSign` + `NaNSymbol` last.
+
                  if (valueTrim.EqualsOrdinalIgnoreCase(info.PositiveInfinitySymbol))
                  {
                      result = double.PositiveInfinity;
@@ -1705,6 +1709,29 @@ namespace System
                  {
                      result = double.NaN;
                  }
+                else if (valueTrim.StartsWith(info.PositiveSign, StringComparison.OrdinalIgnoreCase))
+                {
+                    valueTrim = valueTrim.Slice(info.PositiveSign.Length);
+
+                    if (valueTrim.EqualsOrdinalIgnoreCase(info.PositiveInfinitySymbol))
+                    {
+                        result = double.PositiveInfinity;
+                    }
+                    else if (valueTrim.EqualsOrdinalIgnoreCase(info.NaNSymbol))
+                    {
+                        result = double.NaN;
+                    }
+                    else
+                    {
+                        result = 0;
+                        return false;
+                    }
+                }
+                else if (valueTrim.StartsWith(info.NegativeSign, StringComparison.OrdinalIgnoreCase) &&
+                        valueTrim.Slice(info.NegativeSign.Length).EqualsOrdinalIgnoreCase(info.NaNSymbol))
+                {
+                    result = double.NaN;
+                }
                  else
                  {
                      result = 0;
@@ -1728,6 +1755,14 @@ namespace System
              {
                  ReadOnlySpan<char> valueTrim = value.Trim();
  
+                // This code would be simpler if we only had the concept of `InfinitySymbol`, but
+                // we don't so we'll check the existing cases first and then handle `PositiveSign` +
+                // `PositiveInfinitySymbol` and `PositiveSign/NegativeSign` + `NaNSymbol` last.
+                //
+                // Additionally, since some cultures ("wo") actually define `PositiveInfinitySymbol`
+                // to include `PositiveSign`, we need to check whether `PositiveInfinitySymbol` fits
+                // that case so that we don't start parsing things like `++infini`.
+
                  if (valueTrim.EqualsOrdinalIgnoreCase(info.PositiveInfinitySymbol))
                  {
                      result = float.PositiveInfinity;
@@ -1740,6 +1775,30 @@ namespace System
                  {
                      result = float.NaN;
                  }
+                else if (valueTrim.StartsWith(info.PositiveSign, StringComparison.OrdinalIgnoreCase))
+                {
+                    valueTrim = valueTrim.Slice(info.PositiveSign.Length);
+
+                    if (!info.PositiveInfinitySymbol.StartsWith(info.PositiveSign, StringComparison.OrdinalIgnoreCase) && valueTrim.EqualsOrdinalIgnoreCase(info.PositiveInfinitySymbol))
+                    {
+                        result = float.PositiveInfinity;
+                    }
+                    else if (!info.NaNSymbol.StartsWith(info.PositiveSign, StringComparison.OrdinalIgnoreCase) && valueTrim.EqualsOrdinalIgnoreCase(info.NaNSymbol))
+                    {
+                        result = float.NaN;
+                    }
+                    else
+                    {
+                        result = 0;
+                        return false;
+                    }
+                }
+                else if (valueTrim.StartsWith(info.NegativeSign, StringComparison.OrdinalIgnoreCase) &&
+                         !info.NaNSymbol.StartsWith(info.NegativeSign, StringComparison.OrdinalIgnoreCase) &&
+                         valueTrim.Slice(info.NegativeSign.Length).EqualsOrdinalIgnoreCase(info.NaNSymbol))
+                {
+                    result = float.NaN;
+                }
                  else
                  {
                      result = 0;
author	Tanner Gooding <tagoo@outlook.com>
	Mon, 26 Nov 2018 19:01:04 +0000 (11:01 -0800)
committer	GitHub <noreply@github.com>
	Mon, 26 Nov 2018 19:01:04 +0000 (11:01 -0800)
src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Decimal.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Float.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Number.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Number.Parsing.cs		patch \| blob \| history