From ce0a2617d40bc217b8e0a2137e340afe585afcb1 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Mon, 16 Jul 2018 06:59:55 -0700 Subject: [PATCH] Improve throughput of UInt32/UInt64 integer parsing (#18930) As was recently done for Int32/Int64, ports the Utf8Parser approach to parsing to UInt32/UIn64.{Try}Parse, specifically for NumberStyles.Integer (the default). Also fixes an issue discovered in the previous Int32/Int64 changes, where if the input both has an overflow and has a formatting error (e.g. Int32.Parse("12345678910blah"), we would end up throwing whichever error was hit first, which is a change from .NET Core 2.1 and netfx. The FormatException needs to be preferred over the OverflowException, which just means we can't bail early when overflow is detected. --- .../shared/System/Number.Parsing.cs | 475 ++++++++++++++++++--- 1 file changed, 419 insertions(+), 56 deletions(-) diff --git a/src/System.Private.CoreLib/shared/System/Number.Parsing.cs b/src/System.Private.CoreLib/shared/System/Number.Parsing.cs index 7af9113..0fe2c08 100644 --- a/src/System.Private.CoreLib/shared/System/Number.Parsing.cs +++ b/src/System.Private.CoreLib/shared/System/Number.Parsing.cs @@ -307,7 +307,8 @@ namespace System if ((style & ~NumberStyles.Integer) == 0) { // Optimized path for the common case of anything that's allowed for integer style. - if (!TryParseInt32IntegerStyle(s, style, info, out i, out bool overflow)) + bool overflow = false; + if (!TryParseInt32IntegerStyle(s, style, info, out i, ref overflow)) { throw overflow ? new OverflowException(SR.Overflow_Int32) : @@ -343,7 +344,8 @@ namespace System if ((options & ~NumberStyles.Integer) == 0) { // Optimized path for the common case of anything that's allowed for integer style. - if (!TryParseInt64IntegerStyle(value, options, numfmt, out i, out bool overflow)) + bool overflow = false; + if (!TryParseInt64IntegerStyle(value, options, numfmt, out i, ref overflow)) { throw overflow ? new OverflowException(SR.Overflow_Int64) : @@ -374,9 +376,22 @@ namespace System internal static unsafe uint ParseUInt32(ReadOnlySpan value, NumberStyles options, NumberFormatInfo numfmt) { - NumberBuffer number = default; uint i = 0; + if ((options & ~NumberStyles.Integer) == 0) + { + // Optimized path for the common case of anything that's allowed for integer style. + bool overflow = false; + if (!TryParseUInt32IntegerStyle(value, options, numfmt, out i, ref overflow)) + { + throw overflow ? + new OverflowException(SR.Overflow_UInt32) : + (Exception)new FormatException(SR.Format_InvalidString); + } + return i; + } + + NumberBuffer number = default; StringToNumber(value, options, ref number, numfmt, false); if ((options & NumberStyles.AllowHexSpecifier) != 0) @@ -399,9 +414,22 @@ namespace System internal static unsafe ulong ParseUInt64(ReadOnlySpan value, NumberStyles options, NumberFormatInfo numfmt) { - NumberBuffer number = default; ulong i = 0; + if ((options & ~NumberStyles.Integer) == 0) + { + // Optimized path for the common case of anything that's allowed for integer style. + bool overflow = false; + if (!TryParseUInt64IntegerStyle(value, options, numfmt, out i, ref overflow)) + { + throw overflow ? + new OverflowException(SR.Overflow_UInt64) : + (Exception)new FormatException(SR.Format_InvalidString); + } + return i; + } + + NumberBuffer number = default; StringToNumber(value, options, ref number, numfmt, false); if ((options & NumberStyles.AllowHexSpecifier) != 0) { @@ -637,7 +665,8 @@ namespace System if ((style & ~NumberStyles.Integer) == 0) { // Optimized path for the common case of anything that's allowed for integer style. - return TryParseInt32IntegerStyle(s, style, info, out result, out _); + bool overflow = false; + return TryParseInt32IntegerStyle(s, style, info, out result, ref overflow); } if (!TryStringToNumber(s, style, ref number, info, false)) @@ -663,13 +692,14 @@ namespace System } /// Parses int limited to styles that make up NumberStyles.Integer. - private static bool TryParseInt32IntegerStyle(ReadOnlySpan source, NumberStyles styles, NumberFormatInfo info, out int result, out bool overflow) + private static bool TryParseInt32IntegerStyle(ReadOnlySpan source, NumberStyles styles, NumberFormatInfo info, out int result, ref bool failureIsOverflow) { - overflow = false; - Debug.Assert((styles & ~NumberStyles.Integer) == 0); + Debug.Assert((styles & ~NumberStyles.Integer) == 0, "Only handles subsets of Integer format"); + Debug.Assert(!failureIsOverflow, $"failureIsOverflow should have been initialized to false"); if ((uint)source.Length < 1) goto FalseExit; + bool overflow = false; int sign = 1; int index = 0; int num = source[0]; @@ -738,61 +768,69 @@ namespace System do { index++; - if ((uint)index >= (uint)source.Length) goto DoneAtEnd; + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; num = source[index]; } while (num == '0'); - if (!IsDigit(num)) goto DoneButRemainingChars; + if (!IsDigit(num)) goto HasTrailingChars; } - // Parse most digits, up to the potential for overflow, which can't - // happen until after 9 digits. + // Parse most digits, up to the potential for overflow, which can't happen until after 9 digits. answer = num - '0'; // first digit index++; for (int i = 0; i < 8; i++) // next 8 digits can't overflow { - if ((uint)index >= (uint)source.Length) goto DoneAtEnd; + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; num = source[index]; - if (!IsDigit(num)) goto DoneButRemainingChars; + if (!IsDigit(num)) goto HasTrailingChars; index++; answer = 10 * answer + num - '0'; } // Potential overflow now processing the 10th digit. - if ((uint)index >= (uint)source.Length) goto DoneAtEnd; + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; num = source[index]; - if (!IsDigit(num)) goto DoneButRemainingChars; + if (!IsDigit(num)) goto HasTrailingChars; index++; if (answer > int.MaxValue / 10) { overflow = true; - goto FalseExit; } answer = answer * 10 + num - '0'; if ((uint)answer > (uint)int.MaxValue + (-1 * sign + 1) / 2) { overflow = true; - goto FalseExit; } + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; - if ((uint)index >= (uint)source.Length) goto DoneAtEnd; + // At this point, we're either overflowing or hitting a formatting error. + // Format errors take precedence for compatibility. num = source[index]; - if (!IsDigit(num)) goto DoneButRemainingChars; - - // Anything more than 10 digits is definitely overflow. - overflow = true; + while (IsDigit(num)) + { + overflow = true; + index++; + if ((uint)index >= (uint)source.Length) + goto DoneAtEndButPotentialOverflow; + num = source[index]; + } + goto HasTrailingChars; } FalseExit: // parsing failed result = 0; return false; - DoneAtEnd: // we've successfully parsed up to the end of the span + DoneAtEndButPotentialOverflow: + if (overflow) + { + failureIsOverflow = true; + goto FalseExit; + } result = answer * sign; return true; - DoneButRemainingChars: // we've successfully parsed, but there are still remaining characters in the span - // Skip past trailing whitespace, then past trailing zeros, and - // if anything else remains, fail. + HasTrailingChars: // we've successfully parsed, but there are still remaining characters in the span + // Skip past trailing whitespace, then past trailing zeros, and if anything else remains, fail. if (IsWhite(num)) { if ((styles & NumberStyles.AllowTrailingWhite) == 0) goto FalseExit; @@ -800,22 +838,24 @@ namespace System { if (!IsWhite(source[index])) break; } - if ((uint)index >= (uint)source.Length) goto DoneAtEnd; + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; } if (!TrailingZeros(source, index)) goto FalseExit; - goto DoneAtEnd; + goto DoneAtEndButPotentialOverflow; } /// Parses long inputs limited to styles that make up NumberStyles.Integer. - private static bool TryParseInt64IntegerStyle(ReadOnlySpan source, NumberStyles styles, NumberFormatInfo info, out long result, out bool overflow) + private static bool TryParseInt64IntegerStyle( + ReadOnlySpan source, NumberStyles styles, NumberFormatInfo info, out long result, ref bool failureIsOverflow) { - overflow = false; - Debug.Assert((styles & ~NumberStyles.Integer) == 0); + Debug.Assert((styles & ~NumberStyles.Integer) == 0, "Only handles subsets of Integer format"); + Debug.Assert(!failureIsOverflow, $"failureIsOverflow should have been initialized to false"); if ((uint)source.Length < 1) goto FalseExit; + bool overflow = false; int sign = 1; int index = 0; int num = source[0]; @@ -884,62 +924,68 @@ namespace System do { index++; - if ((uint)index >= (uint)source.Length) goto DoneAtEnd; + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; num = source[index]; } while (num == '0'); - if (!IsDigit(num)) goto DoneButRemainingChars; + if (!IsDigit(num)) goto HasTrailingChars; } - // Parse most digits, up to the potential for overflow, which can't - // happen until after 18 digits. + // Parse most digits, up to the potential for overflow, which can't happen until after 18 digits. answer = num - '0'; // first digit index++; - for (int i = 0; i < 17; i++) // next 17 digits can't overflow { - if ((uint)index >= (uint)source.Length) goto DoneAtEnd; + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; num = source[index]; - if (!IsDigit(num)) goto DoneButRemainingChars; + if (!IsDigit(num)) goto HasTrailingChars; index++; answer = 10 * answer + num - '0'; } // Potential overflow now processing the 19th digit. - if ((uint)index >= (uint)source.Length) goto DoneAtEnd; + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; num = source[index]; - if (!IsDigit(num)) goto DoneButRemainingChars; + if (!IsDigit(num)) goto HasTrailingChars; index++; if (answer > long.MaxValue / 10) { overflow = true; - goto FalseExit; } answer = answer * 10 + num - '0'; if ((ulong)answer > (ulong)long.MaxValue + (ulong)((-1 * sign + 1) / 2)) // + sign => 0, - sign => 1 { overflow = true; - goto FalseExit; } + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; - if ((uint)index >= (uint)source.Length) goto DoneAtEnd; + // At this point, we're either overflowing or hitting a formatting error. + // Format errors take precedence for compatibility. num = source[index]; - if (!IsDigit(num)) goto DoneButRemainingChars; - - // Anything more than 19 digits is definitely overflow. - overflow = true; + while (IsDigit(num)) + { + overflow = true; + index++; + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; + num = source[index]; + } + goto HasTrailingChars; } FalseExit: // parsing failed result = 0; return false; - DoneAtEnd: // we've successfully parsed up to the end of the span + DoneAtEndButPotentialOverflow: + if (overflow) + { + failureIsOverflow = true; + goto FalseExit; + } result = answer * sign; return true; - DoneButRemainingChars: // we've successfully parsed, but there are still remaining characters in the span - // Skip past trailing whitespace, then past trailing zeros, and - // if anything else remains, fail. + HasTrailingChars: // we've successfully parsed, but there are still remaining characters in the span + // Skip past trailing whitespace, then past trailing zeros, and if anything else remains, fail. if (IsWhite(num)) { if ((styles & NumberStyles.AllowTrailingWhite) == 0) goto FalseExit; @@ -947,12 +993,12 @@ namespace System { if (!IsWhite(source[index])) break; } - if ((uint)index >= (uint)source.Length) goto DoneAtEnd; + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; } if (!TrailingZeros(source, index)) goto FalseExit; - goto DoneAtEnd; + goto DoneAtEndButPotentialOverflow; } internal static unsafe bool TryParseInt64(ReadOnlySpan s, NumberStyles style, NumberFormatInfo info, out long result) @@ -962,7 +1008,8 @@ namespace System if ((style & ~NumberStyles.Integer) == 0) { // Optimized path for the common case of anything that's allowed for integer style. - return TryParseInt64IntegerStyle(s, style, info, out result, out _); + bool overflow = false; + return TryParseInt64IntegerStyle(s, style, info, out result, ref overflow); } NumberBuffer number = default; @@ -993,6 +1040,13 @@ namespace System NumberBuffer number = default; result = 0; + if ((style & ~NumberStyles.Integer) == 0) + { + // Optimized path for the common case of anything that's allowed for integer style. + bool overflow = false; + return TryParseUInt32IntegerStyle(s, style, info, out result, ref overflow); + } + if (!TryStringToNumber(s, style, ref number, info, false)) { return false; @@ -1015,11 +1069,169 @@ namespace System return true; } + /// Parses uint limited to styles that make up NumberStyles.Integer. + private static bool TryParseUInt32IntegerStyle( + ReadOnlySpan source, NumberStyles styles, NumberFormatInfo info, out uint result, ref bool failureIsOverflow) + { + Debug.Assert((styles & ~NumberStyles.Integer) == 0, "Only handles subsets of Integer format"); + Debug.Assert(!failureIsOverflow, $"failureIsOverflow should have been initialized to false"); + + if ((uint)source.Length < 1) goto FalseExit; + + bool overflow = false; + bool hasNegativeSign = false; + int index = 0; + int num = source[0]; + + // Skip past any whitespace at the beginning. + if ((styles & NumberStyles.AllowLeadingWhite) != 0 && IsWhite(num)) + { + index++; + while (true) + { + if ((uint)index >= (uint)source.Length) goto FalseExit; + num = source[index]; + if (!IsWhite(num)) break; + index++; + } + } + + // Parse leading sign. + if ((styles & NumberStyles.AllowLeadingSign) != 0) + { + string positiveSign = info.PositiveSign, negativeSign = info.NegativeSign; + + if (positiveSign == "+" && negativeSign == "-") + { + if (num == '+') + { + index++; + if ((uint)index >= (uint)source.Length) goto FalseExit; + num = source[index]; + } + else if (num == '-') + { + hasNegativeSign = true; + index++; + if ((uint)index >= (uint)source.Length) goto FalseExit; + num = source[index]; + } + } + else + { + source = source.Slice(index); + index = 0; + if (!string.IsNullOrEmpty(positiveSign) && source.StartsWith(positiveSign)) + { + index += positiveSign.Length; + if ((uint)index >= (uint)source.Length) goto FalseExit; + num = source[index]; + } + else if (!string.IsNullOrEmpty(negativeSign) && source.StartsWith(negativeSign)) + { + hasNegativeSign = true; + index += negativeSign.Length; + if ((uint)index >= (uint)source.Length) goto FalseExit; + num = source[index]; + } + } + } + + int answer = 0; + + if (IsDigit(num)) + { + // Skip past leading zeros. + if (num == '0') + { + do + { + index++; + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; + num = source[index]; + } while (num == '0'); + if (!IsDigit(num)) goto HasTrailingChars; + } + + // Parse most digits, up to the potential for overflow, which can't happen until after 9 digits. + answer = num - '0'; // first digit + index++; + for (int i = 0; i < 8; i++) // next 8 digits can't overflow + { + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; + num = source[index]; + if (!IsDigit(num)) goto HasTrailingChars; + index++; + answer = 10 * answer + num - '0'; + } + + // Potential overflow now processing the 10th digit. + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; + num = source[index]; + if (!IsDigit(num)) goto HasTrailingChars; + index++; + if ((uint)answer > uint.MaxValue / 10 || ((uint)answer == uint.MaxValue / 10 && num > '5')) + { + overflow = true; + } + answer = answer * 10 + num - '0'; + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; + + // At this point, we're either overflowing or hitting a formatting error. + // Format errors take precedence for compatibility. + num = source[index]; + while (IsDigit(num)) + { + overflow = true; + index++; + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; + num = source[index]; + } + goto HasTrailingChars; + } + + FalseExit: // parsing failed + result = 0; + return false; + + DoneAtEndButPotentialOverflow: + if (overflow || (hasNegativeSign && answer != 0)) + { + failureIsOverflow = true; + goto FalseExit; + } + result = (uint)answer; + return true; + + HasTrailingChars: // we've successfully parsed, but there are still remaining characters in the span + // Skip past trailing whitespace, then past trailing zeros, and if anything else remains, fail. + if (IsWhite(num)) + { + if ((styles & NumberStyles.AllowTrailingWhite) == 0) goto FalseExit; + for (index++; index < source.Length; index++) + { + if (!IsWhite(source[index])) break; + } + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; + } + + if (!TrailingZeros(source, index)) goto FalseExit; + + goto DoneAtEndButPotentialOverflow; + } + internal static unsafe bool TryParseUInt64(ReadOnlySpan s, NumberStyles style, NumberFormatInfo info, out ulong result) { NumberBuffer number = default; result = 0; + if ((style & ~NumberStyles.Integer) == 0) + { + // Optimized path for the common case of anything that's allowed for integer style. + bool overflow = false; + return TryParseUInt64IntegerStyle(s, style, info, out result, ref overflow); + } + if (!TryStringToNumber(s, style, ref number, info, false)) { return false; @@ -1042,6 +1254,157 @@ namespace System return true; } + /// Parses ulong limited to styles that make up NumberStyles.Integer. + private static bool TryParseUInt64IntegerStyle( + ReadOnlySpan source, NumberStyles styles, NumberFormatInfo info, out ulong result, ref bool failureIsOverflow) + { + Debug.Assert((styles & ~NumberStyles.Integer) == 0, "Only handles subsets of Integer format"); + Debug.Assert(!failureIsOverflow, $"failureIsOverflow should have been initialized to false"); + + if ((uint)source.Length < 1) goto FalseExit; + + bool overflow = false; + bool hasNegativeSign = false; + int index = 0; + int num = source[0]; + + // Skip past any whitespace at the beginning. + if ((styles & NumberStyles.AllowLeadingWhite) != 0 && IsWhite(num)) + { + index++; + while (true) + { + if ((uint)index >= (uint)source.Length) goto FalseExit; + num = source[index]; + if (!IsWhite(num)) break; + index++; + } + } + + // Parse leading sign. + if ((styles & NumberStyles.AllowLeadingSign) != 0) + { + string positiveSign = info.PositiveSign, negativeSign = info.NegativeSign; + + if (positiveSign == "+" && negativeSign == "-") + { + if (num == '+') + { + index++; + if ((uint)index >= (uint)source.Length) goto FalseExit; + num = source[index]; + } + else if (num == '-') + { + hasNegativeSign = true; + index++; + if ((uint)index >= (uint)source.Length) goto FalseExit; + num = source[index]; + } + } + else + { + source = source.Slice(index); + index = 0; + if (!string.IsNullOrEmpty(positiveSign) && source.StartsWith(positiveSign)) + { + index += positiveSign.Length; + if ((uint)index >= (uint)source.Length) goto FalseExit; + num = source[index]; + } + else if (!string.IsNullOrEmpty(negativeSign) && source.StartsWith(negativeSign)) + { + hasNegativeSign = true; + index += negativeSign.Length; + if ((uint)index >= (uint)source.Length) goto FalseExit; + num = source[index]; + } + } + } + + long answer = 0; + + if (IsDigit(num)) + { + // Skip past leading zeros. + if (num == '0') + { + do + { + index++; + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; + num = source[index]; + } while (num == '0'); + if (!IsDigit(num)) goto HasTrailingChars; + } + + // Parse most digits, up to the potential for overflow, which can't happen until after 19 digits. + answer = num - '0'; // first digit + index++; + for (int i = 0; i < 18; i++) // next 18 digits can't overflow + { + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; + num = source[index]; + if (!IsDigit(num)) goto HasTrailingChars; + index++; + answer = 10 * answer + num - '0'; + } + + // Potential overflow now processing the 20th digit. + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; + num = source[index]; + if (!IsDigit(num)) goto HasTrailingChars; + index++; + if ((ulong)answer > ulong.MaxValue / 10 || ((ulong)answer == ulong.MaxValue / 10 && num > '5')) + { + overflow = true; + } + answer = answer * 10 + num - '0'; + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; + + // At this point, we're either overflowing or hitting a formatting error. + // Format errors take precedence for compatibility. + num = source[index]; + while (IsDigit(num)) + { + overflow = true; + index++; + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; + num = source[index]; + } + goto HasTrailingChars; + } + + FalseExit: // parsing failed + result = 0; + return false; + + DoneAtEndButPotentialOverflow: + if (overflow || (hasNegativeSign && answer != 0)) + { + failureIsOverflow = true; + goto FalseExit; + } + result = (ulong)answer; + return true; + + HasTrailingChars: // we've successfully parsed, but there are still remaining characters in the span + // Skip past trailing whitespace, then past trailing zeros, and if anything else remains, fail. + if (IsWhite(num)) + { + if ((styles & NumberStyles.AllowTrailingWhite) == 0) goto FalseExit; + for (index++; index < source.Length; index++) + { + if (!IsWhite(source[index])) break; + } + if ((uint)index >= (uint)source.Length) goto DoneAtEndButPotentialOverflow; + } + + if (!TrailingZeros(source, index)) goto FalseExit; + + goto DoneAtEndButPotentialOverflow; + } + internal static unsafe decimal ParseDecimal(ReadOnlySpan value, NumberStyles options, NumberFormatInfo numfmt) { NumberBuffer number = default; @@ -1265,7 +1628,7 @@ namespace System return null; } - private static bool IsWhite(int ch) => ch == 0x20 || (ch >= 0x09 && ch <= 0x0D); + private static bool IsWhite(int ch) => ch == 0x20 || ((uint)(ch - 0x09) <= (0x0D - 0x09)); private static bool IsDigit(int ch) => ((uint)ch - '0') <= 9; -- 2.7.4