From: Levi Broderick Date: Wed, 26 Jun 2019 22:05:53 +0000 (-0700) Subject: Fix EncoderNLS / DecoderNLS regression in reporting error index (#25397) X-Git-Tag: accepted/tizen/unified/20190813.215958~40^2~1 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=17112db9d9a4e3aa6c5280ce2059796e168de0f9;p=platform%2Fupstream%2Fcoreclr.git Fix EncoderNLS / DecoderNLS regression in reporting error index (#25397) Also fixes incorrect asserts in the Encoding type --- diff --git a/src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs b/src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs index 5b0247a..184d59d 100644 --- a/src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs +++ b/src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs @@ -296,9 +296,10 @@ namespace System.Text break; } - // Couldn't decode the buffer. Fallback the buffer instead. + // Couldn't decode the buffer. Fallback the buffer instead. See comment in DrainLeftoverDataForGetChars + // for more information on why a negative index is provided. - if (FallbackBuffer.Fallback(combinedBuffer.Slice(0, combinedBufferBytesConsumed).ToArray(), index: 0)) + if (FallbackBuffer.Fallback(combinedBuffer.Slice(0, combinedBufferBytesConsumed).ToArray(), index: -_leftoverByteCount)) { charCount = _fallbackBuffer!.DrainRemainingDataForGetCharCount(); Debug.Assert(charCount >= 0, "Fallback buffer shouldn't have returned a negative char count."); @@ -360,9 +361,13 @@ namespace System.Text break; } - // Couldn't decode the buffer. Fallback the buffer instead. + // Couldn't decode the buffer. Fallback the buffer instead. The fallback mechanism relies + // on a negative index to convey "the start of the invalid sequence was some number of + // bytes back before the current buffer." Since we know the invalid sequence must have + // started at the beginning of our leftover byte buffer, we can signal to our caller that + // they must backtrack that many bytes to find the real start of the invalid sequence. - if (FallbackBuffer.Fallback(combinedBuffer.Slice(0, combinedBufferBytesConsumed).ToArray(), index: 0) + if (FallbackBuffer.Fallback(combinedBuffer.Slice(0, combinedBufferBytesConsumed).ToArray(), index: -_leftoverByteCount) && !_fallbackBuffer!.TryDrainRemainingDataForGetChars(chars, out charsWritten)) { goto DestinationTooSmall; diff --git a/src/System.Private.CoreLib/shared/System/Text/EncoderNLS.cs b/src/System.Private.CoreLib/shared/System/Text/EncoderNLS.cs index 8ec3e33..e71ad05 100644 --- a/src/System.Private.CoreLib/shared/System/Text/EncoderNLS.cs +++ b/src/System.Private.CoreLib/shared/System/Text/EncoderNLS.cs @@ -302,12 +302,18 @@ namespace System.Text } else { - didFallback = FallbackBuffer.Fallback(_charLeftOver, secondChar, index: 0); + // The fallback mechanism relies on a negative index to convey "the start of the invalid + // sequence was some number of chars back before the current buffer." In this block and + // in the block immediately thereafter, we know we have a single leftover high surrogate + // character from a previous operation, so we provide an index of -1 to convey that the + // char immediately before the current buffer was the start of the invalid sequence. + + didFallback = FallbackBuffer.Fallback(_charLeftOver, secondChar, index: -1); } } else { - didFallback = FallbackBuffer.Fallback(_charLeftOver, index: 0); + didFallback = FallbackBuffer.Fallback(_charLeftOver, index: -1); } // Now tally the number of bytes that would've been emitted as part of fallback. @@ -367,7 +373,7 @@ namespace System.Text break; case OperationStatus.InvalidData: - FallbackBuffer.Fallback(_charLeftOver, secondChar, index: 0); + FallbackBuffer.Fallback(_charLeftOver, secondChar, index: -1); // see comment in DrainLeftoverDataForGetByteCount break; default: @@ -377,7 +383,7 @@ namespace System.Text } else { - FallbackBuffer.Fallback(_charLeftOver, index: 0); + FallbackBuffer.Fallback(_charLeftOver, index: -1); // see comment in DrainLeftoverDataForGetByteCount } } diff --git a/src/System.Private.CoreLib/shared/System/Text/Encoding.Internal.cs b/src/System.Private.CoreLib/shared/System/Text/Encoding.Internal.cs index a033cdc..08a5543 100644 --- a/src/System.Private.CoreLib/shared/System/Text/Encoding.Internal.cs +++ b/src/System.Private.CoreLib/shared/System/Text/Encoding.Internal.cs @@ -257,7 +257,7 @@ namespace System.Text private unsafe int GetByteCountWithFallback(char* pOriginalChars, int originalCharCount, int charsConsumedSoFar, EncoderNLS encoder) { Debug.Assert(encoder != null, "This code path should only be called from EncoderNLS."); - Debug.Assert(0 <= charsConsumedSoFar && charsConsumedSoFar < originalCharCount, "Caller should've checked this condition."); + Debug.Assert(0 <= charsConsumedSoFar && charsConsumedSoFar <= originalCharCount, "Caller should've checked this condition."); // First, try draining any data that already exists on the encoder instance. If we can't complete // that operation, there's no point to continuing down to the main workhorse methods. @@ -523,7 +523,7 @@ namespace System.Text private unsafe int GetBytesWithFallback(char* pOriginalChars, int originalCharCount, byte* pOriginalBytes, int originalByteCount, int charsConsumedSoFar, int bytesWrittenSoFar, EncoderNLS encoder) { Debug.Assert(encoder != null, "This code path should only be called from EncoderNLS."); - Debug.Assert(0 <= charsConsumedSoFar && charsConsumedSoFar < originalCharCount, "Caller should've checked this condition."); + Debug.Assert(0 <= charsConsumedSoFar && charsConsumedSoFar <= originalCharCount, "Caller should've checked this condition."); Debug.Assert(0 <= bytesWrittenSoFar && bytesWrittenSoFar <= originalByteCount, "Caller should've checked this condition."); // First, try draining any data that already exists on the encoder instance. If we can't complete @@ -843,7 +843,7 @@ namespace System.Text private unsafe int GetCharCountWithFallback(byte* pOriginalBytes, int originalByteCount, int bytesConsumedSoFar, DecoderNLS decoder) { Debug.Assert(decoder != null, "This code path should only be called from DecoderNLS."); - Debug.Assert(0 <= bytesConsumedSoFar && bytesConsumedSoFar < originalByteCount, "Caller should've checked this condition."); + Debug.Assert(0 <= bytesConsumedSoFar && bytesConsumedSoFar <= originalByteCount, "Caller should've checked this condition."); // First, try draining any data that already exists on the decoder instance. If we can't complete // that operation, there's no point to continuing down to the main workhorse methods. @@ -1111,7 +1111,7 @@ namespace System.Text private protected unsafe int GetCharsWithFallback(byte* pOriginalBytes, int originalByteCount, char* pOriginalChars, int originalCharCount, int bytesConsumedSoFar, int charsWrittenSoFar, DecoderNLS decoder) { Debug.Assert(decoder != null, "This code path should only be called from DecoderNLS."); - Debug.Assert(0 <= bytesConsumedSoFar && bytesConsumedSoFar < originalByteCount, "Caller should've checked this condition."); + Debug.Assert(0 <= bytesConsumedSoFar && bytesConsumedSoFar <= originalByteCount, "Caller should've checked this condition."); Debug.Assert(0 <= charsWrittenSoFar && charsWrittenSoFar <= originalCharCount, "Caller should've checked this condition."); // First, try draining any data that already exists on the encoder instance. If we can't complete