Fix EncodingExtensions.Convert for sliced single segment ROSequence (#81665)
authordevsko <devsko@users.noreply.github.com>
Tue, 7 Feb 2023 21:39:47 +0000 (22:39 +0100)
committerGitHub <noreply@github.com>
Tue, 7 Feb 2023 21:39:47 +0000 (13:39 -0800)
Fixes https://github.com/dotnet/runtime/issues/45346

src/libraries/System.Memory/src/System/Text/EncodingExtensions.cs
src/libraries/System.Memory/tests/EncodingExtensions/EncodingExtensionsTests.cs

index 2441e01..fc2ef97 100644 (file)
@@ -27,7 +27,7 @@ namespace System.Text
         /// and writes the result to <paramref name="writer"/>.
         /// </summary>
         /// <param name="encoding">The <see cref="Encoding"/> which represents how the data in <paramref name="chars"/> should be encoded.</param>
-        /// <param name="chars">The <see cref="ReadOnlySequence{Char}"/> to encode to <see langword="byte"/>s.</param>
+        /// <param name="chars">The <see cref="ReadOnlySpan{Char}"/> to encode to <see langword="byte"/>s.</param>
         /// <param name="writer">The buffer to which the encoded bytes will be written.</param>
         /// <exception cref="EncoderFallbackException">Thrown if <paramref name="chars"/> contains data that cannot be encoded and <paramref name="encoding"/> is configured
         /// to throw an exception when such data is seen.</exception>
@@ -462,25 +462,32 @@ namespace System.Text
         {
             // Parameter null checks will be performed by the workhorse routine.
 
-            ReadOnlySequence<char> remainingChars = chars;
-            long totalBytesWritten = 0;
-            bool isFinalSegment;
-
-            do
+            if (chars.IsSingleSegment)
             {
-                // Process each segment individually. We need to run at least one iteration of the loop in case
-                // the Encoder has internal state.
+                Convert(encoder, chars.FirstSpan, writer, flush, out bytesUsed, out completed);
+            }
+            else
+            {
+                ReadOnlySequence<char> remainingChars = chars;
+                long totalBytesWritten = 0;
+                bool isFinalSegment;
 
-                remainingChars.GetFirstSpan(out ReadOnlySpan<char> firstSpan, out SequencePosition next);
-                isFinalSegment = remainingChars.IsSingleSegment;
+                do
+                {
+                    // Process each segment individually. We need to run at least one iteration of the loop in case
+                    // the Encoder has internal state.
 
-                Convert(encoder, firstSpan, writer, flush && isFinalSegment, out long bytesWrittenThisIteration, out completed);
+                    remainingChars.GetFirstSpan(out ReadOnlySpan<char> firstSpan, out SequencePosition next);
+                    isFinalSegment = remainingChars.IsSingleSegment;
 
-                totalBytesWritten += bytesWrittenThisIteration;
-                remainingChars = remainingChars.Slice(next);
-            } while (!isFinalSegment);
+                    Convert(encoder, firstSpan, writer, flush && isFinalSegment, out long bytesWrittenThisIteration, out completed);
 
-            bytesUsed = totalBytesWritten;
+                    totalBytesWritten += bytesWrittenThisIteration;
+                    remainingChars = remainingChars.Slice(next);
+                } while (!isFinalSegment);
+
+                bytesUsed = totalBytesWritten;
+            }
         }
 
         /// <summary>
@@ -549,25 +556,32 @@ namespace System.Text
         {
             // Parameter null checks will be performed by the workhorse routine.
 
-            ReadOnlySequence<byte> remainingBytes = bytes;
-            long totalCharsWritten = 0;
-            bool isFinalSegment;
-
-            do
+            if (bytes.IsSingleSegment)
             {
-                // Process each segment individually. We need to run at least one iteration of the loop in case
-                // the Decoder has internal state.
+                Convert(decoder, bytes.FirstSpan, writer, flush, out charsUsed, out completed);
+            }
+            else
+            {
+                ReadOnlySequence<byte> remainingBytes = bytes;
+                long totalCharsWritten = 0;
+                bool isFinalSegment;
 
-                remainingBytes.GetFirstSpan(out ReadOnlySpan<byte> firstSpan, out SequencePosition next);
-                isFinalSegment = remainingBytes.IsSingleSegment;
+                do
+                {
+                    // Process each segment individually. We need to run at least one iteration of the loop in case
+                    // the Decoder has internal state.
 
-                Convert(decoder, firstSpan, writer, flush && isFinalSegment, out long charsWrittenThisIteration, out completed);
+                    remainingBytes.GetFirstSpan(out ReadOnlySpan<byte> firstSpan, out SequencePosition next);
+                    isFinalSegment = remainingBytes.IsSingleSegment;
 
-                totalCharsWritten += charsWrittenThisIteration;
-                remainingBytes = remainingBytes.Slice(next);
-            } while (!isFinalSegment);
+                    Convert(decoder, firstSpan, writer, flush && isFinalSegment, out long charsWrittenThisIteration, out completed);
 
-            charsUsed = totalCharsWritten;
+                    totalCharsWritten += charsWrittenThisIteration;
+                    remainingBytes = remainingBytes.Slice(next);
+                } while (!isFinalSegment);
+
+                charsUsed = totalCharsWritten;
+            }
         }
     }
 }
index b3c8b9e..673d0ba 100644 (file)
@@ -149,6 +149,41 @@ namespace System.Text.Tests
         }
 
         [Fact]
+        public static void Convert_Decoder_ReadOnlySequence_Single_Sliced()
+        {
+            Decoder decoder = Encoding.UTF8.GetDecoder();
+            ArrayBufferWriter<char> writer = new ArrayBufferWriter<char>();
+
+            // First, input with no flushing and no leftover data.
+             
+            ReadOnlySequence<byte> inputData = SequenceFactory.Create(
+                new byte[] { 0x00, 0x20, 0x61, 0xC2, 0x80, 0xED, 0x9F, 0xBF }).Slice(1);
+            EncodingExtensions.Convert(decoder, inputData, writer, flush: false, out long charsUsed, out bool completed);
+            Assert.Equal(4, charsUsed);
+            Assert.True(completed);
+
+            // Then, input with no flushing and leftover data.
+
+            inputData = SequenceFactory.Create(
+                new byte[] { 0x00, 0xF4, 0x80 }).Slice(1);
+            EncodingExtensions.Convert(decoder, inputData, writer, flush: false, out charsUsed, out completed);
+            Assert.Equal(0, charsUsed);
+            Assert.True(completed);
+
+            // Then, input with flushing and leftover data (should be replaced).
+
+            inputData = SequenceFactory.Create(
+                new byte[] { 0x00, 0x80, 0x80, 0xC2 }).Slice(1);
+            EncodingExtensions.Convert(decoder, inputData, writer, flush: true, out charsUsed, out completed);
+            Assert.Equal(3, charsUsed);
+            Assert.True(completed);
+
+            // Now make sure all of the data was decoded properly.
+
+            Assert.Equal("\u0020\u0061\u0080\ud7ff\U00100000\ufffd", writer.WrittenSpan.ToString());
+        }
+
+        [Fact]
         public static void Convert_Encoder_ReadOnlySpan_IBufferWriter_ParamChecks()
         {
             Encoder encoder = Encoding.UTF8.GetEncoder();
@@ -240,6 +275,41 @@ namespace System.Text.Tests
         }
 
         [Fact]
+        public static void Convert_Encoder_ReadOnlySequence_Single_Sliced()
+        {
+            Encoder encoder = Encoding.UTF8.GetEncoder();
+            ArrayBufferWriter<byte> writer = new ArrayBufferWriter<byte>();
+
+            // First, input with no flushing and no leftover data.
+
+            ReadOnlySequence<char> inputData = SequenceFactory.Create(
+                new char[] { ' ', '\u0020', '\ud7ff' }).Slice(1);
+            EncodingExtensions.Convert(encoder, inputData, writer, flush: false, out long bytesUsed, out bool completed);
+            Assert.Equal(4, bytesUsed);
+            Assert.True(completed);
+
+            // Then, input with no flushing and leftover data.
+
+            inputData = SequenceFactory.Create(
+                new char[] { ' ', '\udbc0' }).Slice(1);
+            EncodingExtensions.Convert(encoder, inputData, writer, flush: false, out bytesUsed, out completed);
+            Assert.Equal(0, bytesUsed);
+            Assert.True(completed);
+
+            // Then, input with flushing and leftover data (should be replaced).
+
+            inputData = SequenceFactory.Create(
+                new char[] { ' ', '\udc00', '\ud800' }).Slice(1);
+            EncodingExtensions.Convert(encoder, inputData, writer, flush: true, out bytesUsed, out completed);
+            Assert.Equal(7, bytesUsed);
+            Assert.True(completed);
+
+            // Now make sure all of the data was decoded properly.
+
+            Assert.Equal("\u0020\ud7ff\U00100000\ufffd"u8.ToArray(), writer.WrittenSpan.ToArray());
+        }
+
+        [Fact]
         public static void GetBytes_Encoding_ReadOnlySequence_ParamChecks()
         {
             ReadOnlySequence<char> sequence = new ReadOnlySequence<char>(new char[0]);