From d49a76c364bccc10a95f11b233f58a5d2c5667ab Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Wed, 4 Jul 2018 21:38:05 -0400 Subject: [PATCH] Improve DateTime{Offset}.ParseExact{Multiple} performance for RFC1123 ("r") (dotnet/coreclr#18771) * Improve DateTime{Offset}.ParseExact{Multiple} performance for RFC1123 ("r") Significantly improves the performance by porting and adapting the Utf8Parser code from corefx. This optimizes for the (default) case of a DateTimeStyles.None; specifying any other style falls back to the normal parsing support, as that requires handling things such as arbitrary whitespace anywhere in the string. * Address PR feedback Commit migrated from https://github.com/dotnet/coreclr/commit/9f9a37d04460559c3f91927f1297c8195411a0a3 --- .../src/System/Globalization/DateTimeParse.cs | 234 +++++++++++++++++++-- 1 file changed, 222 insertions(+), 12 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/DateTimeParse.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/DateTimeParse.cs index 682b9e3..d9c7664 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/DateTimeParse.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/DateTimeParse.cs @@ -3789,13 +3789,7 @@ new DS[] { DS.ERROR, DS.TX_NNN, DS.TX_NNN, DS.TX_NNN, DS.ERROR, DS.ERROR, break; case 'r': case 'R': // RFC 1123 Standard. (in Universal time) - parseInfo.calendar = GregorianCalendar.GetDefaultInstance(); - dtfi = DateTimeFormatInfo.InvariantInfo; - - if ((result.flags & ParseFlags.CaptureOffset) != 0) - { - result.flags |= ParseFlags.Rfc1123Pattern; - } + ConfigureFormatR(ref dtfi, ref parseInfo, ref result); break; case 's': // Sortable format (in local time) dtfi = DateTimeFormatInfo.InvariantInfo; @@ -3829,10 +3823,16 @@ new DS[] { DS.ERROR, DS.TX_NNN, DS.TX_NNN, DS.TX_NNN, DS.ERROR, DS.ERROR, return (DateTimeFormat.GetRealFormat(format, dtfi)); } - - - - + private static void ConfigureFormatR(ref DateTimeFormatInfo dtfi, ref ParsingInfo parseInfo, ref DateTimeResult result) + { + parseInfo.calendar = GregorianCalendar.GetDefaultInstance(); + dtfi = DateTimeFormatInfo.InvariantInfo; + if ((result.flags & ParseFlags.CaptureOffset) != 0) + { + result.flags |= ParseFlags.Rfc1123Pattern; + } + } + // Given a specified format character, parse and update the parsing result. // private static bool ParseByFormat( @@ -4443,12 +4443,27 @@ new DS[] { DS.ERROR, DS.TX_NNN, DS.TX_NNN, DS.TX_NNN, DS.ERROR, DS.ERROR, if (formatParam.Length == 1) { - if (((result.flags & ParseFlags.CaptureOffset) != 0) && formatParam[0] == 'U') + char formatParamChar = formatParam[0]; + + // Fast-paths for common and important formats/configurations. + if (styles == DateTimeStyles.None) + { + switch (formatParamChar) + { + case 'R': + case 'r': + ConfigureFormatR(ref dtfi, ref parseInfo, ref result); + return ParseFormatR(s, ref parseInfo, ref result); + } + } + + if (((result.flags & ParseFlags.CaptureOffset) != 0) && formatParamChar == 'U') { // The 'U' format is not allowed for DateTimeOffset result.SetBadFormatSpecifierFailure(formatParam); return false; } + formatParam = ExpandPredefinedFormat(formatParam, ref dtfi, ref parseInfo, ref result); } @@ -4621,6 +4636,201 @@ new DS[] { DS.ERROR, DS.TX_NNN, DS.TX_NNN, DS.TX_NNN, DS.ERROR, DS.ERROR, return true; } + private static bool ParseFormatR(ReadOnlySpan source, ref ParsingInfo parseInfo, ref DateTimeResult result) + { + // Example: + // Tue, 03 Jan 2017 08:08:05 GMT + + // The format is exactly 29 characters. + if ((uint)source.Length != 29) + { + result.SetBadDateTimeFailure(); + return false; + } + + // Parse the three-letter day of week. Any casing is valid. + DayOfWeek dayOfWeek; + { + uint dow0 = source[0], dow1 = source[1], dow2 = source[2], comma = source[3]; + + if ((dow0 | dow1 | dow2 | comma) > 0x7F) + { + result.SetBadDateTimeFailure(); + return false; + } + + uint dowString = (dow0 << 24) | (dow1 << 16) | (dow2 << 8) | comma | 0x20202000; + switch (dowString) + { + case 0x73756E2c /* 'sun,' */: dayOfWeek = DayOfWeek.Sunday; break; + case 0x6d6f6e2c /* 'mon,' */: dayOfWeek = DayOfWeek.Monday; break; + case 0x7475652c /* 'tue,' */: dayOfWeek = DayOfWeek.Tuesday; break; + case 0x7765642c /* 'wed,' */: dayOfWeek = DayOfWeek.Wednesday; break; + case 0x7468752c /* 'thu,' */: dayOfWeek = DayOfWeek.Thursday; break; + case 0x6672692c /* 'fri,' */: dayOfWeek = DayOfWeek.Friday; break; + case 0x7361742c /* 'sat,' */: dayOfWeek = DayOfWeek.Saturday; break; + default: + result.SetBadDateTimeFailure(); + return false; + } + } + + if (source[4] != ' ') + { + result.SetBadDateTimeFailure(); + return false; + } + + // Parse the two digit day. + int day; + { + uint digit1 = (uint)(source[5] - '0'), digit2 = (uint)(source[6] - '0'); + + if (digit1 > 9 || digit2 > 9) + { + result.SetBadDateTimeFailure(); + return false; + } + + day = (int)(digit1*10 + digit2); + } + + if (source[7] != ' ') + { + result.SetBadDateTimeFailure(); + return false; + } + + // Parse the three letter month (followed by a space). Any casing is valid. + int month; + { + uint m0 = source[8], m1 = source[9], m2 = source[10], space = source[11]; + + if ((m0 | m1 | m2 | space) > 0x7F) + { + result.SetBadDateTimeFailure(); + return false; + } + + switch ((m0 << 24) | (m1 << 16) | (m2 << 8) | space | 0x20202000) + { + case 0x6a616e20 /* 'jan ' */ : month = 1; break; + case 0x66656220 /* 'feb ' */ : month = 2; break; + case 0x6d617220 /* 'mar ' */ : month = 3; break; + case 0x61707220 /* 'apr ' */ : month = 4; break; + case 0x6d617920 /* 'may ' */ : month = 5; break; + case 0x6a756e20 /* 'jun ' */ : month = 6; break; + case 0x6a756c20 /* 'jul ' */ : month = 7; break; + case 0x61756720 /* 'aug ' */ : month = 8; break; + case 0x73657020 /* 'sep ' */ : month = 9; break; + case 0x6f637420 /* 'oct ' */ : month = 10; break; + case 0x6e6f7620 /* 'nov ' */ : month = 11; break; + case 0x64656320 /* 'dec ' */ : month = 12; break; + default: + result.SetBadDateTimeFailure(); + return false; + } + } + + // Parse the four-digit year. + int year; + { + uint y1 = (uint)(source[12] - '0'), y2 = (uint)(source[13] - '0'), y3 = (uint)(source[14] - '0'), y4 = (uint)(source[15] - '0'); + + if (y1 > 9 || y2 > 9 || y3 > 9 || y4 > 9) + { + result.SetBadDateTimeFailure(); + return false; + } + + year = (int)(y1*1000 + y2*100 + y3*10 + y4); + } + + if (source[16] != ' ') + { + result.SetBadDateTimeFailure(); + return false; + } + + // Parse the two digit hour. + int hour; + { + uint h1 = (uint)(source[17] - '0'), h2 = (uint)(source[18] - '0'); + + if (h1 > 9 || h2 > 9) + { + result.SetBadDateTimeFailure(); + return false; + } + + hour = (int)(h1*10 + h2); + } + + if (source[19] != ':') + { + result.SetBadDateTimeFailure(); + return false; + } + + // Parse the two-digit minute. + int minute; + { + uint m1 = (uint)(source[20] - '0'); + uint m2 = (uint)(source[21] - '0'); + + if (m1 > 9 || m2 > 9) + { + result.SetBadDateTimeFailure(); + return false; + } + + minute = (int)(m1*10 + m2); + } + + if (source[22] != ':') + { + result.SetBadDateTimeFailure(); + return false; + } + + // Parse the two-digit second. + int second; + { + uint s1 = (uint)(source[23] - '0'), s2 = (uint)(source[24] - '0'); + + if (s1 > 9 || s2 > 9) + { + result.SetBadDateTimeFailure(); + return false; + } + + second = (int)(s1*10 + s2); + } + + // Parse " GMT". It must be upper case. + if (source[25] != ' ' || source[26] != 'G' || source[27] != 'M' || source[28] != 'T') + { + result.SetBadDateTimeFailure(); + return false; + } + + // Validate that the parsed date is valid according to the calendar. + if (!parseInfo.calendar.TryToDateTime(year, month, day, hour, minute, second, 0, 0, out result.parsedDate)) + { + result.SetFailure(ParseFailureKind.FormatBadDateTimeCalendar, nameof(SR.Format_BadDateTimeCalendar)); + return false; + } + + // And validate that the parsed day of week matches what the calendar said it should be. + if (dayOfWeek != result.parsedDate.DayOfWeek) + { + result.SetFailure(ParseFailureKind.FormatWithOriginalDateTime, nameof(SR.Format_BadDayOfWeek)); + return false; + } + + return true; + } + private static Exception GetDateTimeParseException(ref DateTimeResult result) { switch (result.failure) -- 2.7.4