From: Jan Kotas Date: Mon, 13 Mar 2017 04:06:26 +0000 (-0700) Subject: Improve encoding performance (dotnet/coreclr#10124) X-Git-Tag: submit/tizen/20210909.063632~11030^2~7763 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=37fb7a4769f2e917c00ea63a62946103f0588d11;p=platform%2Fupstream%2Fdotnet%2Fruntime.git Improve encoding performance (dotnet/coreclr#10124) Commit migrated from https://github.com/dotnet/coreclr/commit/425969612573ac02e9cddd8ac87126fcbe4efba0 --- diff --git a/src/coreclr/src/mscorlib/src/System/Text/ASCIIEncoding.cs b/src/coreclr/src/mscorlib/src/System/Text/ASCIIEncoding.cs index 307e489..5d140f5 100644 --- a/src/coreclr/src/mscorlib/src/System/Text/ASCIIEncoding.cs +++ b/src/coreclr/src/mscorlib/src/System/Text/ASCIIEncoding.cs @@ -158,6 +158,7 @@ namespace System.Text // For fallback we may need a fallback buffer, we know we aren't default fallback. EncoderFallbackBuffer fallbackBuffer = null; + char* charsForFallback; if (encoder != null) { @@ -227,7 +228,9 @@ namespace System.Text fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false); // This will fallback a pair if *chars is a low surrogate - fallbackBuffer.InternalFallback(charLeftOver, ref chars); + charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered + fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback); + chars = charsForFallback; } // Now we may have fallback char[] already from the encoder @@ -260,7 +263,9 @@ namespace System.Text } // Get Fallback - fallbackBuffer.InternalFallback(ch, ref chars); + charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered + fallbackBuffer.InternalFallback(ch, ref charsForFallback); + chars = charsForFallback; continue; } @@ -292,6 +297,7 @@ namespace System.Text // For fallback we may need a fallback buffer, we know we aren't default fallback. EncoderFallbackBuffer fallbackBuffer = null; + char* charsForFallback; // prepare our end char* charEnd = chars + charCount; @@ -407,7 +413,9 @@ namespace System.Text // Since left over char was a surrogate, it'll have to be fallen back. // Get Fallback // This will fallback a pair if *chars is a low surrogate - fallbackBuffer.InternalFallback(charLeftOver, ref chars); + charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered + fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback); + chars = charsForFallback; } // Now we may have fallback char[] already from the encoder @@ -440,7 +448,9 @@ namespace System.Text } // Get Fallback - fallbackBuffer.InternalFallback(ch, ref chars); + charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered + fallbackBuffer.InternalFallback(ch, ref charsForFallback); + chars = charsForFallback; // Go ahead & continue (& do the fallback) continue; @@ -580,6 +590,7 @@ namespace System.Text // Only need decoder fallback buffer if not using ? fallback. // ASCII doesn't do best fit, so don't have to check for it, find out which decoder fallback we're using DecoderReplacementFallback fallback = null; + char* charsForFallback; if (decoder == null) fallback = this.DecoderFallback as DecoderReplacementFallback; @@ -651,7 +662,11 @@ namespace System.Text byteBuffer[0] = b; // Note that chars won't get updated unless this succeeds - if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars)) + charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered + bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback); + chars = charsForFallback; + + if (!fallbackResult) { // May or may not throw, but we didn't get this byte Debug.Assert(bytes > byteStart || chars == charStart, diff --git a/src/coreclr/src/mscorlib/src/System/Text/Latin1Encoding.cs b/src/coreclr/src/mscorlib/src/System/Text/Latin1Encoding.cs index 569d045..7742f7c 100644 --- a/src/coreclr/src/mscorlib/src/System/Text/Latin1Encoding.cs +++ b/src/coreclr/src/mscorlib/src/System/Text/Latin1Encoding.cs @@ -114,6 +114,7 @@ namespace System.Text // For fallback we may need a fallback buffer, we know we aren't default fallback. EncoderFallbackBuffer fallbackBuffer = null; + char* charsForFallback; // We may have a left over character from last time, try and process it. if (charLeftOver > 0) @@ -127,7 +128,9 @@ namespace System.Text // Since left over char was a surrogate, it'll have to be fallen back. // Get Fallback // This will fallback a pair if *chars is a low surrogate - fallbackBuffer.InternalFallback(charLeftOver, ref chars); + charsForFallback = chars; + fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback); + chars = charsForFallback; } // Now we may have fallback char[] already from the encoder @@ -160,7 +163,9 @@ namespace System.Text } // Get Fallback - fallbackBuffer.InternalFallback(ch, ref chars); + charsForFallback = chars; + fallbackBuffer.InternalFallback(ch, ref charsForFallback); + chars = charsForFallback; continue; } @@ -274,6 +279,7 @@ namespace System.Text // For fallback we may need a fallback buffer, we know we aren't default fallback, create & init it EncoderFallbackBuffer fallbackBuffer = null; + char* charsForFallback; // We may have a left over character from last time, try and process it. if (charLeftOver > 0) @@ -288,7 +294,10 @@ namespace System.Text // Since left over char was a surrogate, it'll have to be fallen back. // Get Fallback // This will fallback a pair if *chars is a low surrogate - fallbackBuffer.InternalFallback(charLeftOver, ref chars); + charsForFallback = chars; + fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback); + chars = charsForFallback; + if (fallbackBuffer.Remaining > byteEnd - bytes) { // Throw it, if we don't have enough for this we never will @@ -326,7 +335,9 @@ namespace System.Text } // Get Fallback - fallbackBuffer.InternalFallback(ch, ref chars); + charsForFallback = chars; + fallbackBuffer.InternalFallback(ch, ref charsForFallback); + chars = charsForFallback; // Make sure we have enough room. Each fallback char will be 1 output char // (or else cause a recursion exception) diff --git a/src/coreclr/src/mscorlib/src/System/Text/UTF32Encoding.cs b/src/coreclr/src/mscorlib/src/System/Text/UTF32Encoding.cs index f5b3d10..d74653c 100644 --- a/src/coreclr/src/mscorlib/src/System/Text/UTF32Encoding.cs +++ b/src/coreclr/src/mscorlib/src/System/Text/UTF32Encoding.cs @@ -193,6 +193,8 @@ namespace System.Text // For fallback we may need a fallback buffer EncoderFallbackBuffer fallbackBuffer = null; + char* charsForFallback; + if (encoder != null) { highSurrogate = encoder.charLeftOver; @@ -250,7 +252,9 @@ namespace System.Text chars--; // Do the fallback - fallbackBuffer.InternalFallback(highSurrogate, ref chars); + charsForFallback = chars; + fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback); + chars = charsForFallback; // We're going to fallback the old high surrogate. highSurrogate = '\0'; @@ -271,7 +275,9 @@ namespace System.Text if (Char.IsLowSurrogate(ch)) { // We have a leading low surrogate, do the fallback - fallbackBuffer.InternalFallback(ch, ref chars); + charsForFallback = chars; + fallbackBuffer.InternalFallback(ch, ref charsForFallback); + chars = charsForFallback; // Try again with fallback buffer continue; @@ -285,7 +291,10 @@ namespace System.Text if ((encoder == null || encoder.MustFlush) && highSurrogate > 0) { // We have to do the fallback for the lonely high surrogate - fallbackBuffer.InternalFallback(highSurrogate, ref chars); + charsForFallback = chars; + fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback); + chars = charsForFallback; + highSurrogate = (char)0; goto TryAgain; } @@ -321,6 +330,8 @@ namespace System.Text // For fallback we may need a fallback buffer EncoderFallbackBuffer fallbackBuffer = null; + char* charsForFallback; + if (encoder != null) { highSurrogate = encoder.charLeftOver; @@ -412,7 +423,9 @@ namespace System.Text chars--; // Do the fallback - fallbackBuffer.InternalFallback(highSurrogate, ref chars); + charsForFallback = chars; + fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback); + chars = charsForFallback; // We're going to fallback the old high surrogate. highSurrogate = '\0'; @@ -433,7 +446,9 @@ namespace System.Text if (Char.IsLowSurrogate(ch)) { // We have a leading low surrogate, do the fallback - fallbackBuffer.InternalFallback(ch, ref chars); + charsForFallback = chars; + fallbackBuffer.InternalFallback(ch, ref charsForFallback); + chars = charsForFallback; // Try again with fallback buffer continue; @@ -476,7 +491,10 @@ namespace System.Text if ((encoder == null || encoder.MustFlush) && highSurrogate > 0) { // We have to do the fallback for the lonely high surrogate - fallbackBuffer.InternalFallback(highSurrogate, ref chars); + charsForFallback = chars; + fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback); + chars = charsForFallback; + highSurrogate = (char)0; goto TryAgain; } @@ -663,6 +681,7 @@ namespace System.Text // For fallback we may need a fallback buffer DecoderFallbackBuffer fallbackBuffer = null; + char* charsForFallback; // See if there's anything in our decoder if (decoder != null) @@ -729,8 +748,13 @@ namespace System.Text } // Chars won't be updated unless this works. - if (!fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref chars)) + charsForFallback = chars; + bool fallbackResult = fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref charsForFallback); + chars = charsForFallback; + + if (!fallbackResult) { + // Couldn't fallback, throw or wait til next time // We either read enough bytes for bytes-=4 to work, or we're // going to throw in ThrowCharsOverflow because chars == charStart @@ -813,7 +837,11 @@ namespace System.Text } } - if (!fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref chars)) + charsForFallback = chars; + bool fallbackResult = fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref charsForFallback); + chars = charsForFallback; + + if (!fallbackResult) { // Couldn't fallback. fallbackBuffer.InternalReset(); diff --git a/src/coreclr/src/mscorlib/src/System/Text/UTF8Encoding.cs b/src/coreclr/src/mscorlib/src/System/Text/UTF8Encoding.cs index b2cc462..8be2e4a 100644 --- a/src/coreclr/src/mscorlib/src/System/Text/UTF8Encoding.cs +++ b/src/coreclr/src/mscorlib/src/System/Text/UTF8Encoding.cs @@ -204,6 +204,8 @@ namespace System.Text // For fallback we may need a fallback buffer. // We wait to initialize it though in case we don't have any broken input unicode EncoderFallbackBuffer fallbackBuffer = null; + char* pSrcForFallback; + char* pSrc = chars; char* pEnd = pSrc + count; @@ -369,7 +371,9 @@ namespace System.Text // Do our fallback. Actually we already know its a mixed up surrogate, // so the ref pSrc isn't gonna do anything. - fallbackBuffer.InternalFallback(unchecked((char)ch), ref pSrc); + pSrcForFallback = pSrc; // Avoid passing pSrc by reference to allow it to be enregistered + fallbackBuffer.InternalFallback(unchecked((char)ch), ref pSrcForFallback); + pSrc = pSrcForFallback; // Ignore it if we don't throw (we had preallocated this ch) byteCount--; @@ -615,6 +619,8 @@ namespace System.Text // For fallback we may need a fallback buffer. // We wait to initialize it though in case we don't have any broken input unicode EncoderFallbackBuffer fallbackBuffer = null; + char* pSrcForFallback; + char* pSrc = chars; byte* pTarget = bytes; @@ -764,7 +770,9 @@ namespace System.Text // Do our fallback. Actually we already know its a mixed up surrogate, // so the ref pSrc isn't gonna do anything. - fallbackBuffer.InternalFallback(unchecked((char)ch), ref pSrc); + pSrcForFallback = pSrc; // Avoid passing pSrc by reference to allow it to be enregistered + fallbackBuffer.InternalFallback(unchecked((char)ch), ref pSrcForFallback); + pSrc = pSrcForFallback; // Ignore it if we don't throw ch = 0; @@ -1528,6 +1536,8 @@ namespace System.Text int ch = 0; DecoderFallbackBuffer fallback = null; + byte* pSrcForFallback; + char* pTargetForFallback; if (baseDecoder != null) { UTF8Decoder decoder = (UTF8Decoder)baseDecoder; @@ -1639,7 +1649,13 @@ namespace System.Text fallback.InternalInitialize(bytes, pAllocatedBufferEnd); } // This'll back us up the appropriate # of bytes if we didn't get anywhere - if (!FallbackInvalidByteSequence(ref pSrc, ch, fallback, ref pTarget)) + pSrcForFallback = pSrc; // Avoid passing pSrc by reference to allow it to be enregistered + pTargetForFallback = pTarget; // Avoid passing pTarget by reference to allow it to be enregistered + bool fallbackResult = FallbackInvalidByteSequence(ref pSrcForFallback, ch, fallback, ref pTargetForFallback); + pSrc = pSrcForFallback; + pTarget = pTargetForFallback; + + if (!fallbackResult) { // Ran out of buffer space // Need to throw an exception? @@ -2041,7 +2057,13 @@ namespace System.Text } // This'll back us up the appropriate # of bytes if we didn't get anywhere - if (!FallbackInvalidByteSequence(ref pSrc, ch, fallback, ref pTarget)) + pSrcForFallback = pSrc; // Avoid passing pSrc by reference to allow it to be enregistered + pTargetForFallback = pTarget; // Avoid passing pTarget by reference to allow it to be enregistered + bool fallbackResult = FallbackInvalidByteSequence(ref pSrcForFallback, ch, fallback, ref pTargetForFallback); + pSrc = pSrcForFallback; + pTarget = pTargetForFallback; + + if (!fallbackResult) { Debug.Assert(pSrc >= bytes || pTarget == chars, "[UTF8Encoding.GetChars]Expected to throw or remain in byte buffer while flushing"); diff --git a/src/coreclr/src/mscorlib/src/System/Text/UnicodeEncoding.cs b/src/coreclr/src/mscorlib/src/System/Text/UnicodeEncoding.cs index fb667ba..b15bced 100644 --- a/src/coreclr/src/mscorlib/src/System/Text/UnicodeEncoding.cs +++ b/src/coreclr/src/mscorlib/src/System/Text/UnicodeEncoding.cs @@ -204,6 +204,7 @@ namespace System.Text // For fallback we may need a fallback buffer EncoderFallbackBuffer fallbackBuffer = null; + char* charsForFallback; if (encoder != null) { @@ -349,7 +350,9 @@ namespace System.Text fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false); } - fallbackBuffer.InternalFallback(charLeftOver, ref chars); + charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered + fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback); + chars = charsForFallback; // Now no high surrogate left over charLeftOver = (char)0; @@ -381,7 +384,9 @@ namespace System.Text // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false); } - fallbackBuffer.InternalFallback(ch, ref chars); + charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered + fallbackBuffer.InternalFallback(ch, ref charsForFallback); + chars = charsForFallback; continue; } @@ -412,7 +417,9 @@ namespace System.Text // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false); } - fallbackBuffer.InternalFallback(charLeftOver, ref chars); + charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered + fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback); + chars = charsForFallback; // Ignore charLeftOver or throw byteCount -= 2; @@ -452,7 +459,9 @@ namespace System.Text // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false); } - fallbackBuffer.InternalFallback(charLeftOver, ref chars); + charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered + fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback); + chars = charsForFallback; charLeftOver = (char)0; wasHereBefore = true; goto TryAgain; @@ -489,6 +498,7 @@ namespace System.Text // For fallback we may need a fallback buffer EncoderFallbackBuffer fallbackBuffer = null; + char* charsForFallback; // Get our encoder, but don't clear it yet. if (encoder != null) @@ -695,7 +705,9 @@ namespace System.Text fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true); } - fallbackBuffer.InternalFallback(charLeftOver, ref chars); + charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered + fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback); + chars = charsForFallback; charLeftOver = (char)0; continue; @@ -722,7 +734,9 @@ namespace System.Text fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true); } - fallbackBuffer.InternalFallback(ch, ref chars); + charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered + fallbackBuffer.InternalFallback(ch, ref charsForFallback); + chars = charsForFallback; continue; } @@ -790,7 +804,9 @@ namespace System.Text fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true); } - fallbackBuffer.InternalFallback(charLeftOver, ref chars); + charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered + fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback); + chars = charsForFallback; // Ignore charLeftOver or throw charLeftOver = (char)0; @@ -856,7 +872,9 @@ namespace System.Text } // If we're not flushing, this'll remember the left over character. - fallbackBuffer.InternalFallback(charLeftOver, ref chars); + charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered + fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback); + chars = charsForFallback; charLeftOver = (char)0; wasHereBefore = true; @@ -1249,6 +1267,7 @@ namespace System.Text // For fallback we may need a fallback buffer DecoderFallbackBuffer fallbackBuffer = null; + char* charsForFallback; byte* byteEnd = bytes + byteCount; char* charEnd = chars + charCount; @@ -1396,7 +1415,11 @@ namespace System.Text fallbackBuffer.InternalInitialize(byteStart, charEnd); } - if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars)) + charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered + bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback); + chars = charsForFallback; + + if (!fallbackResult) { // couldn't fall back lonely surrogate // We either advanced bytes or chars should == charStart and throw below @@ -1444,7 +1467,11 @@ namespace System.Text fallbackBuffer.InternalInitialize(byteStart, charEnd); } - if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars)) + charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered + bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback); + chars = charsForFallback; + + if (!fallbackResult) { // couldn't fall back lonely surrogate // We either advanced bytes or chars should == charStart and throw below @@ -1502,7 +1529,11 @@ namespace System.Text fallbackBuffer.InternalInitialize(byteStart, charEnd); } - if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars)) + charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered + bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback); + chars = charsForFallback; + + if (!fallbackResult) { // couldn't fall back high surrogate, or char that would be next // We either advanced bytes or chars should == charStart and throw below @@ -1563,7 +1594,11 @@ namespace System.Text fallbackBuffer.InternalInitialize(byteStart, charEnd); } - if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars)) + charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered + bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback); + chars = charsForFallback; + + if (!fallbackResult) { // 2 bytes couldn't fall back // We either advanced bytes or chars should == charStart and throw below @@ -1599,7 +1634,11 @@ namespace System.Text } // No hanging odd bytes allowed if must flush - if (!fallbackBuffer.InternalFallback(new byte[] { unchecked((byte)lastByte) }, bytes, ref chars)) + charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered + bool fallbackResult = fallbackBuffer.InternalFallback(new byte[] { unchecked((byte)lastByte) }, bytes, ref charsForFallback); + chars = charsForFallback; + + if (!fallbackResult) { // odd byte couldn't fall back bytes--; // didn't use this byte