From 27cd821920a69ea6456284654e8d2b5c24de6b15 Mon Sep 17 00:00:00 2001 From: Jan Kotas Date: Mon, 4 Jun 2018 13:09:29 -0700 Subject: [PATCH] Make Encoding.Unicode.GetString faster on 64-bit platforms (#18263) UnicodeEncoding was taking fast path only when both source and destination were 8-byte aligned. It was not the case for Unicode.GetString. The fix is to just check the source alignment as fast path precondition, and use unaligned write for destination. Also removed alternative fast path that is not needed anymore, tweaked local variable assignments for better register allocation, and fixed performance issue introduced by recent big endian portability fix. --- .../shared/System/Text/UnicodeEncoding.cs | 156 +++++++-------------- 1 file changed, 51 insertions(+), 105 deletions(-) diff --git a/src/System.Private.CoreLib/shared/System/Text/UnicodeEncoding.cs b/src/System.Private.CoreLib/shared/System/Text/UnicodeEncoding.cs index b654838..8386c49 100644 --- a/src/System.Private.CoreLib/shared/System/Text/UnicodeEncoding.cs +++ b/src/System.Private.CoreLib/shared/System/Text/UnicodeEncoding.cs @@ -6,11 +6,17 @@ // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused. // +// This define can be used to turn off the fast loops. Useful for finding whether +// the problem is fastloop-specific. +#define FASTLOOP + using System; using System.Globalization; using System.Diagnostics; using System.Runtime.InteropServices; +using Internal.Runtime.CompilerServices; + namespace System.Text { public class UnicodeEncoding : Encoding @@ -23,17 +29,14 @@ namespace System.Text private static readonly byte[] s_bigEndianPreamble = new byte[2] { 0xfe, 0xff }; private static readonly byte[] s_littleEndianPreamble = new byte[2] { 0xff, 0xfe }; - internal bool isThrowException = false; + private bool isThrowException = false; - internal bool bigEndian = false; - internal bool byteOrderMark = true; + private bool bigEndian = false; + private bool byteOrderMark = true; // Unicode version 2.0 character size in bytes public const int CharSize = 2; - // endianness-based bit pattern mask. - static readonly ulong highLowPatternMask = ((ulong) 0xd800d800d800d800 | (BitConverter.IsLittleEndian ? (ulong) 0x0400000004000000 : (ulong) 0x0000040000000400)); - public UnicodeEncoding() : this(false, true) { @@ -374,11 +377,6 @@ namespace System.Text bool wasHereBefore = false; - // Need -1 to check 2 at a time. If we have an even #, longChars will go - // from longEnd - 1/2 long to longEnd + 1/2 long. If we're odd, longChars - // will go from longEnd - 1 long to longEnd. (Might not get to use this) - ulong* longEnd = (ulong*)(charEnd - 3); - // For fallback we may need a fallback buffer EncoderFallbackBuffer fallbackBuffer = null; char* charsForFallback; @@ -412,16 +410,21 @@ namespace System.Text if (ch == 0) { // No fallback, maybe we can do it fast -#if !NO_FAST_UNICODE_LOOP +#if FASTLOOP // If endianess is backwards then each pair of bytes would be backwards. - if ( (bigEndian ^ BitConverter.IsLittleEndian) && - -#if BIT64 // 64 bit CPU needs to be long aligned for this to work. - charLeftOver == 0 && (unchecked((long)chars) & 7) == 0) + if ( (bigEndian ^ BitConverter.IsLittleEndian) && +#if BIT64 + (unchecked((long)chars) & 7) == 0 && #else - charLeftOver == 0 && (unchecked((int)chars) & 3) == 0) + (unchecked((int)chars) & 3) == 0 && #endif + charLeftOver == 0) { + // Need -1 to check 2 at a time. If we have an even #, longChars will go + // from longEnd - 1/2 long to longEnd + 1/2 long. If we're odd, longChars + // will go from longEnd - 1 long to longEnd. (Might not get to use this) + ulong* longEnd = (ulong*)(charEnd - 3); + // Need new char* so we can check 4 at a time ulong* longChars = (ulong*)chars; @@ -452,7 +455,8 @@ namespace System.Text // If they happen to be high/low/high/low, we may as well continue. Check the next // bit to see if its set (low) or not (high) in the right pattern - if (((0xfc00fc00fc00fc00 & *longChars) ^ highLowPatternMask) != 0) + if ((0xfc00fc00fc00fc00 & *longChars) != + (BitConverter.IsLittleEndian ? (ulong)0xdc00d800dc00d800 : (ulong)0xd800dc00d800dc00)) { // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high // was hoped for or the 0x0400 bit wasn't set where a low was hoped for. @@ -475,7 +479,7 @@ namespace System.Text if (chars >= charEnd) break; } -#endif // !NO_FAST_UNICODE_LOOP +#endif // FASTLOOP // No fallback, just get next char ch = *chars; @@ -695,14 +699,14 @@ namespace System.Text if (ch == 0) { // No fallback, maybe we can do it fast -#if !NO_FAST_UNICODE_LOOP +#if FASTLOOP // If endianess is backwards then each pair of bytes would be backwards. if ( (bigEndian ^ BitConverter.IsLittleEndian) && -#if BIT64 // 64 bit CPU needs to be long aligned for this to work, 32 bit CPU needs to be 32 bit aligned - (unchecked((long)chars) & 7) == 0 && (unchecked((long)bytes) & 7) == 0 && +#if BIT64 + (unchecked((long)chars) & 7) == 0 && #else - (unchecked((int)chars) & 3) == 0 && (unchecked((int)bytes) & 3) == 0 && -#endif // BIT64 + (unchecked((int)chars) & 3) == 0 && +#endif charLeftOver == 0) { // Need -1 to check 2 at a time. If we have an even #, longChars will go @@ -744,7 +748,8 @@ namespace System.Text // If they happen to be high/low/high/low, we may as well continue. Check the next // bit to see if its set (low) or not (high) in the right pattern - if (((0xfc00fc00fc00fc00 & *longChars) ^ highLowPatternMask) != 0) + if ((0xfc00fc00fc00fc00 & *longChars) != + (BitConverter.IsLittleEndian ? (ulong)0xdc00d800dc00d800 : (ulong)0xd800dc00d800dc00)) { // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high // was hoped for or the 0x0400 bit wasn't set where a low was hoped for. @@ -759,7 +764,7 @@ namespace System.Text // else all < 0x8000 so we can use them // We can use these 4 chars. - *longBytes = *longChars; + Unsafe.WriteUnaligned(longBytes, *longChars); longChars++; longBytes++; } @@ -770,62 +775,7 @@ namespace System.Text if (chars >= charEnd) break; } - // Not aligned, but maybe we can still be somewhat faster - // Also somehow this optimizes the above loop? It seems to cause something above - // to get enregistered, but I haven't figured out how to make that happen without this loop. - else if ((charLeftOver == 0) && - (bigEndian ^ BitConverter.IsLittleEndian) && - -#if BIT64 - (unchecked((long)chars) & 7) != (unchecked((long)bytes) & 7) && // Only do this if chars & bytes are out of line, otherwise faster loop will be faster next time -#else - (unchecked((int)chars) & 3) != (unchecked((int)bytes) & 3) && // Only do this if chars & bytes are out of line, otherwise faster loop will be faster next time -#endif // BIT64 - (unchecked((int)(bytes)) & 1) == 0) - { - // # to use - long iCount = ((byteEnd - bytes) >> 1 < charEnd - chars) ? - (byteEnd - bytes) >> 1 : charEnd - chars; - - // Need new char* - char* charOut = ((char*)bytes); // a char* for our output - char* tempEnd = chars + iCount - 1; // Our end pointer - - while (chars < tempEnd) - { - if (*chars >= (char)0xd800 && *chars <= (char)0xdfff) - { - // break for fallback for low surrogate - if (*chars >= 0xdc00) - break; - - // break if next one's not a low surrogate (will do fallback) - if (*(chars + 1) < 0xdc00 || *(chars + 1) > 0xdfff) - break; - - // They both exist, use them - } - // If 2nd char is surrogate & this one isn't then only add one - else if (*(chars + 1) >= (char)0xd800 && *(chars + 1) <= 0xdfff) - { - *charOut = *chars; - charOut++; - chars++; - continue; - } - - *charOut = *chars; - *(charOut + 1) = *(chars + 1); - charOut += 2; - chars += 2; - } - - bytes = (byte*)charOut; - - if (chars >= charEnd) - break; - } -#endif // !NO_FAST_UNICODE_LOOP +#endif // FASTLOOP // No fallback, just get next char ch = *chars; @@ -1054,9 +1004,6 @@ namespace System.Text encoder == null || !encoder._throwOnOverflow, "[UnicodeEncoding.GetBytes]Expected empty fallback buffer if not converting"); - // We used to copy it fast, but this doesn't check for surrogates - // System.IO.__UnmanagedMemoryStream.memcpyimpl(bytes, (byte*)chars, usedByteCount); - return (int)(bytes - byteStart); } @@ -1077,11 +1024,6 @@ namespace System.Text // Start by assuming same # of chars as bytes int charCount = count >> 1; - // Need -1 to check 2 at a time. If we have an even #, longBytes will go - // from longEnd - 1/2 long to longEnd + 1/2 long. If we're odd, longBytes - // will go from longEnd - 1 long to longEnd. (Might not get to use this) - ulong* longEnd = (ulong*)(byteEnd - 7); - // For fallback we may need a fallback buffer DecoderFallbackBuffer fallbackBuffer = null; @@ -1110,15 +1052,20 @@ namespace System.Text { // If we're aligned then maybe we can do it fast // That'll hurt if we're unaligned because we'll always test but never be aligned -#if !NO_FAST_UNICODE_LOOP +#if FASTLOOP if ((bigEndian ^ BitConverter.IsLittleEndian) && -#if BIT64 // win64 has to be long aligned +#if BIT64 (unchecked((long)bytes) & 7) == 0 && #else (unchecked((int)bytes) & 3) == 0 && #endif // BIT64 lastByte == -1 && lastChar == 0) { + // Need -1 to check 2 at a time. If we have an even #, longBytes will go + // from longEnd - 1/2 long to longEnd + 1/2 long. If we're odd, longBytes + // will go from longEnd - 1 long to longEnd. (Might not get to use this) + ulong* longEnd = (ulong*)(byteEnd - 7); + // Need new char* so we can check 4 at a time ulong* longBytes = (ulong*)bytes; @@ -1149,7 +1096,8 @@ namespace System.Text // If they happen to be high/low/high/low, we may as well continue. Check the next // bit to see if its set (low) or not (high) in the right pattern - if (((0xfc00fc00fc00fc00 & *longBytes) ^ highLowPatternMask) != 0) + if ((0xfc00fc00fc00fc00 & *longBytes) != + (BitConverter.IsLittleEndian ? (ulong)0xdc00d800dc00d800 : (ulong)0xd800dc00d800dc00)) { // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high // was hoped for or the 0x0400 bit wasn't set where a low was hoped for. @@ -1172,7 +1120,7 @@ namespace System.Text if (bytes >= byteEnd) break; } -#endif // !NO_FAST_UNICODE_LOOP +#endif // FASTLOOP // Get 1st byte if (lastByte < 0) @@ -1425,13 +1373,13 @@ namespace System.Text { // If we're aligned then maybe we can do it fast // That'll hurt if we're unaligned because we'll always test but never be aligned -#if !NO_FAST_UNICODE_LOOP +#if FASTLOOP if ((bigEndian ^ BitConverter.IsLittleEndian) && -#if BIT64 // win64 has to be long aligned - (unchecked((long)chars) & 7) == 0 && (unchecked((long)bytes) & 7) == 0 && +#if BIT64 + (unchecked((long)chars) & 7) == 0 && #else - (unchecked((int)chars) & 3) == 0 && (unchecked((int)bytes) & 3) == 0 && -#endif // BIT64 + (unchecked((int)chars) & 3) == 0 && +#endif lastByte == -1 && lastChar == 0) { // Need -1 to check 2 at a time. If we have an even #, longChars will go @@ -1473,7 +1421,8 @@ namespace System.Text // If they happen to be high/low/high/low, we may as well continue. Check the next // bit to see if its set (low) or not (high) in the right pattern - if (((0xfc00fc00fc00fc00 & *longBytes) ^ highLowPatternMask) != 0) + if ((0xfc00fc00fc00fc00 & *longBytes) != + (BitConverter.IsLittleEndian ? (ulong)0xdc00d800dc00d800 : (ulong)0xd800dc00d800dc00)) { // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high // was hoped for or the 0x0400 bit wasn't set where a low was hoped for. @@ -1488,7 +1437,7 @@ namespace System.Text // else all < 0x8000 so we can use them // We can use these 4 chars. - *longChars = *longBytes; + Unsafe.WriteUnaligned(longChars, *longBytes); longBytes++; longChars++; } @@ -1499,7 +1448,7 @@ namespace System.Text if (bytes >= byteEnd) break; } -#endif // !NO_FAST_UNICODE_LOOP +#endif // FASTLOOP // Get 1st byte if (lastByte < 0) @@ -1808,9 +1757,6 @@ namespace System.Text decoder.lastByte = lastByte; } - // Used to do this the old way - // System.IO.__UnmanagedMemoryStream.memcpyimpl((byte*)chars, bytes, byteCount); - // Shouldn't have anything in fallback buffer for GetChars // (don't have to check _throwOnOverflow for count or chars) Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0, -- 2.7.4