From d6291f6bbe6bf0036dc162673e960155046d50aa Mon Sep 17 00:00:00 2001 From: devsko Date: Tue, 7 Feb 2023 22:39:47 +0100 Subject: [PATCH] Fix EncodingExtensions.Convert for sliced single segment ROSequence (#81665) Fixes https://github.com/dotnet/runtime/issues/45346 --- .../src/System/Text/EncodingExtensions.cs | 72 +++++++++++++--------- .../EncodingExtensions/EncodingExtensionsTests.cs | 70 +++++++++++++++++++++ 2 files changed, 113 insertions(+), 29 deletions(-) diff --git a/src/libraries/System.Memory/src/System/Text/EncodingExtensions.cs b/src/libraries/System.Memory/src/System/Text/EncodingExtensions.cs index 2441e01..fc2ef97 100644 --- a/src/libraries/System.Memory/src/System/Text/EncodingExtensions.cs +++ b/src/libraries/System.Memory/src/System/Text/EncodingExtensions.cs @@ -27,7 +27,7 @@ namespace System.Text /// and writes the result to . /// /// The which represents how the data in should be encoded. - /// The to encode to s. + /// The to encode to s. /// The buffer to which the encoded bytes will be written. /// Thrown if contains data that cannot be encoded and is configured /// to throw an exception when such data is seen. @@ -462,25 +462,32 @@ namespace System.Text { // Parameter null checks will be performed by the workhorse routine. - ReadOnlySequence remainingChars = chars; - long totalBytesWritten = 0; - bool isFinalSegment; - - do + if (chars.IsSingleSegment) { - // Process each segment individually. We need to run at least one iteration of the loop in case - // the Encoder has internal state. + Convert(encoder, chars.FirstSpan, writer, flush, out bytesUsed, out completed); + } + else + { + ReadOnlySequence remainingChars = chars; + long totalBytesWritten = 0; + bool isFinalSegment; - remainingChars.GetFirstSpan(out ReadOnlySpan firstSpan, out SequencePosition next); - isFinalSegment = remainingChars.IsSingleSegment; + do + { + // Process each segment individually. We need to run at least one iteration of the loop in case + // the Encoder has internal state. - Convert(encoder, firstSpan, writer, flush && isFinalSegment, out long bytesWrittenThisIteration, out completed); + remainingChars.GetFirstSpan(out ReadOnlySpan firstSpan, out SequencePosition next); + isFinalSegment = remainingChars.IsSingleSegment; - totalBytesWritten += bytesWrittenThisIteration; - remainingChars = remainingChars.Slice(next); - } while (!isFinalSegment); + Convert(encoder, firstSpan, writer, flush && isFinalSegment, out long bytesWrittenThisIteration, out completed); - bytesUsed = totalBytesWritten; + totalBytesWritten += bytesWrittenThisIteration; + remainingChars = remainingChars.Slice(next); + } while (!isFinalSegment); + + bytesUsed = totalBytesWritten; + } } /// @@ -549,25 +556,32 @@ namespace System.Text { // Parameter null checks will be performed by the workhorse routine. - ReadOnlySequence remainingBytes = bytes; - long totalCharsWritten = 0; - bool isFinalSegment; - - do + if (bytes.IsSingleSegment) { - // Process each segment individually. We need to run at least one iteration of the loop in case - // the Decoder has internal state. + Convert(decoder, bytes.FirstSpan, writer, flush, out charsUsed, out completed); + } + else + { + ReadOnlySequence remainingBytes = bytes; + long totalCharsWritten = 0; + bool isFinalSegment; - remainingBytes.GetFirstSpan(out ReadOnlySpan firstSpan, out SequencePosition next); - isFinalSegment = remainingBytes.IsSingleSegment; + do + { + // Process each segment individually. We need to run at least one iteration of the loop in case + // the Decoder has internal state. - Convert(decoder, firstSpan, writer, flush && isFinalSegment, out long charsWrittenThisIteration, out completed); + remainingBytes.GetFirstSpan(out ReadOnlySpan firstSpan, out SequencePosition next); + isFinalSegment = remainingBytes.IsSingleSegment; - totalCharsWritten += charsWrittenThisIteration; - remainingBytes = remainingBytes.Slice(next); - } while (!isFinalSegment); + Convert(decoder, firstSpan, writer, flush && isFinalSegment, out long charsWrittenThisIteration, out completed); - charsUsed = totalCharsWritten; + totalCharsWritten += charsWrittenThisIteration; + remainingBytes = remainingBytes.Slice(next); + } while (!isFinalSegment); + + charsUsed = totalCharsWritten; + } } } } diff --git a/src/libraries/System.Memory/tests/EncodingExtensions/EncodingExtensionsTests.cs b/src/libraries/System.Memory/tests/EncodingExtensions/EncodingExtensionsTests.cs index b3c8b9e..673d0ba 100644 --- a/src/libraries/System.Memory/tests/EncodingExtensions/EncodingExtensionsTests.cs +++ b/src/libraries/System.Memory/tests/EncodingExtensions/EncodingExtensionsTests.cs @@ -149,6 +149,41 @@ namespace System.Text.Tests } [Fact] + public static void Convert_Decoder_ReadOnlySequence_Single_Sliced() + { + Decoder decoder = Encoding.UTF8.GetDecoder(); + ArrayBufferWriter writer = new ArrayBufferWriter(); + + // First, input with no flushing and no leftover data. + + ReadOnlySequence inputData = SequenceFactory.Create( + new byte[] { 0x00, 0x20, 0x61, 0xC2, 0x80, 0xED, 0x9F, 0xBF }).Slice(1); + EncodingExtensions.Convert(decoder, inputData, writer, flush: false, out long charsUsed, out bool completed); + Assert.Equal(4, charsUsed); + Assert.True(completed); + + // Then, input with no flushing and leftover data. + + inputData = SequenceFactory.Create( + new byte[] { 0x00, 0xF4, 0x80 }).Slice(1); + EncodingExtensions.Convert(decoder, inputData, writer, flush: false, out charsUsed, out completed); + Assert.Equal(0, charsUsed); + Assert.True(completed); + + // Then, input with flushing and leftover data (should be replaced). + + inputData = SequenceFactory.Create( + new byte[] { 0x00, 0x80, 0x80, 0xC2 }).Slice(1); + EncodingExtensions.Convert(decoder, inputData, writer, flush: true, out charsUsed, out completed); + Assert.Equal(3, charsUsed); + Assert.True(completed); + + // Now make sure all of the data was decoded properly. + + Assert.Equal("\u0020\u0061\u0080\ud7ff\U00100000\ufffd", writer.WrittenSpan.ToString()); + } + + [Fact] public static void Convert_Encoder_ReadOnlySpan_IBufferWriter_ParamChecks() { Encoder encoder = Encoding.UTF8.GetEncoder(); @@ -240,6 +275,41 @@ namespace System.Text.Tests } [Fact] + public static void Convert_Encoder_ReadOnlySequence_Single_Sliced() + { + Encoder encoder = Encoding.UTF8.GetEncoder(); + ArrayBufferWriter writer = new ArrayBufferWriter(); + + // First, input with no flushing and no leftover data. + + ReadOnlySequence inputData = SequenceFactory.Create( + new char[] { ' ', '\u0020', '\ud7ff' }).Slice(1); + EncodingExtensions.Convert(encoder, inputData, writer, flush: false, out long bytesUsed, out bool completed); + Assert.Equal(4, bytesUsed); + Assert.True(completed); + + // Then, input with no flushing and leftover data. + + inputData = SequenceFactory.Create( + new char[] { ' ', '\udbc0' }).Slice(1); + EncodingExtensions.Convert(encoder, inputData, writer, flush: false, out bytesUsed, out completed); + Assert.Equal(0, bytesUsed); + Assert.True(completed); + + // Then, input with flushing and leftover data (should be replaced). + + inputData = SequenceFactory.Create( + new char[] { ' ', '\udc00', '\ud800' }).Slice(1); + EncodingExtensions.Convert(encoder, inputData, writer, flush: true, out bytesUsed, out completed); + Assert.Equal(7, bytesUsed); + Assert.True(completed); + + // Now make sure all of the data was decoded properly. + + Assert.Equal("\u0020\ud7ff\U00100000\ufffd"u8.ToArray(), writer.WrittenSpan.ToArray()); + } + + [Fact] public static void GetBytes_Encoding_ReadOnlySequence_ParamChecks() { ReadOnlySequence sequence = new ReadOnlySequence(new char[0]); -- 2.7.4