Add Encoding extensions which work with sequences (dotnet/corefx#41810)

author Levi Broderick <GrabYourPitchforks@users.noreply.github.com>

Tue, 22 Oct 2019 22:18:05 +0000 (15:18 -0700)

committer GitHub <noreply@github.com>

Tue, 22 Oct 2019 22:18:05 +0000 (15:18 -0700)
author Levi Broderick <GrabYourPitchforks@users.noreply.github.com>
Tue, 22 Oct 2019 22:18:05 +0000 (15:18 -0700)
committer GitHub <noreply@github.com>
Tue, 22 Oct 2019 22:18:05 +0000 (15:18 -0700)
diff --git a/src/libraries/System.Memory/ref/System.Memory.cs b/src/libraries/System.Memory/ref/System.Memory.cs

index fe0af37..58c0d2f 100644 (file)
--- a/src/libraries/System.Memory/ref/System.Memory.cs
+++ b/src/libraries/System.Memory/ref/System.Memory.cs
@@ -645,6 +645,21 @@ namespace System.Runtime.InteropServices
  }
  namespace System.Text
  {
+    public static partial class EncodingExtensions
+    {
+        public static void Convert(this System.Text.Decoder decoder, in System.Buffers.ReadOnlySequence<byte> bytes, System.Buffers.IBufferWriter<char> writer, bool flush, out long charsUsed, out bool completed) { throw null; }
+        public static void Convert(this System.Text.Decoder decoder, System.ReadOnlySpan<byte> bytes, System.Buffers.IBufferWriter<char> writer, bool flush, out long charsUsed, out bool completed) { throw null; }
+        public static void Convert(this System.Text.Encoder encoder, in System.Buffers.ReadOnlySequence<char> chars, System.Buffers.IBufferWriter<byte> writer, bool flush, out long bytesUsed, out bool completed) { throw null; }
+        public static void Convert(this System.Text.Encoder encoder, System.ReadOnlySpan<char> chars, System.Buffers.IBufferWriter<byte> writer, bool flush, out long bytesUsed, out bool completed) { throw null; }
+        public static byte[] GetBytes(this System.Text.Encoding encoding, in System.Buffers.ReadOnlySequence<char> chars) { throw null; }
+        public static long GetBytes(this System.Text.Encoding encoding, in System.Buffers.ReadOnlySequence<char> chars, System.Buffers.IBufferWriter<byte> writer) { throw null; }
+        public static int GetBytes(this System.Text.Encoding encoding, in System.Buffers.ReadOnlySequence<char> chars, System.Span<byte> bytes) { throw null; }
+        public static long GetBytes(this System.Text.Encoding encoding, System.ReadOnlySpan<char> chars, System.Buffers.IBufferWriter<byte> writer) { throw null; }
+        public static long GetChars(this System.Text.Encoding encoding, in System.Buffers.ReadOnlySequence<byte> bytes, System.Buffers.IBufferWriter<char> writer) { throw null; }
+        public static int GetChars(this System.Text.Encoding encoding, in System.Buffers.ReadOnlySequence<byte> bytes, System.Span<char> chars) { throw null; }
+        public static long GetChars(this System.Text.Encoding encoding, System.ReadOnlySpan<byte> bytes, System.Buffers.IBufferWriter<char> writer) { throw null; }
+        public static string GetString(this System.Text.Encoding encoding, in System.Buffers.ReadOnlySequence<byte> bytes) { throw null; }
+    }
      public ref partial struct SpanRuneEnumerator
      {
          private object _dummy;
diff --git a/src/libraries/System.Memory/src/System.Memory.csproj b/src/libraries/System.Memory/src/System.Memory.csproj

index e51e55d..1bb5702 100644 (file)
--- a/src/libraries/System.Memory/src/System.Memory.csproj
+++ b/src/libraries/System.Memory/src/System.Memory.csproj
@@ -30,6 +30,7 @@
      <Compile Include="System\Buffers\Text\Base64Decoder.cs" />
      <Compile Include="System\Buffers\Text\Base64Encoder.cs" />
      <Compile Include="System\Runtime\InteropServices\SequenceMarshal.cs" />
+    <Compile Include="System\Text\EncodingExtensions.cs" />
    </ItemGroup>
    <ItemGroup>
      <!-- Common or Common-branched source files -->
diff --git a/src/libraries/System.Memory/src/System/Text/EncodingExtensions.cs b/src/libraries/System.Memory/src/System/Text/EncodingExtensions.cs

new file mode 100644 (file)

index 0000000..100a2da
--- /dev/null
+++ b/src/libraries/System.Memory/src/System/Text/EncodingExtensions.cs
@@ -0,0 +1,628 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Buffers;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+namespace System.Text
+{
+    public static class EncodingExtensions
+    {
+        /// <summary>
+        /// The maximum number of input elements after which we'll begin to chunk the input.
+        /// </summary>
+        /// <remarks>
+        /// The reason for this chunking is that the existing Encoding / Encoder / Decoder APIs
+        /// like GetByteCount / GetCharCount will throw if an integer overflow occurs. Since
+        /// we may be working with large inputs in these extension methods, we don't want to
+        /// risk running into this issue. While it's technically possible even for 1 million
+        /// input elements to result in an overflow condition, such a scenario is unrealistic,
+        /// so we won't worry about it.
+        /// </remarks>
+        private const int MaxInputElementsPerIteration = 1 * 1024 * 1024;
+
+        /// <summary>
+        /// Encodes the specified <see cref="ReadOnlySpan{Char}"/> to <see langword="byte"/>s using the specified <see cref="Encoding"/>
+        /// and writes the result to <paramref name="writer"/>.
+        /// </summary>
+        /// <param name="encoding">The <see cref="Encoding"/> which represents how the data in <paramref name="chars"/> should be encoded.</param>
+        /// <param name="chars">The <see cref="ReadOnlySequence{Char}"/> to encode to <see langword="byte"/>s.</param>
+        /// <param name="writer">The buffer to which the encoded bytes will be written.</param>
+        /// <exception cref="EncoderFallbackException">Thrown if <paramref name="chars"/> contains data that cannot be encoded and <paramref name="encoding"/> is configured
+        /// to throw an exception when such data is seen.</exception>
+        public static long GetBytes(this Encoding encoding, ReadOnlySpan<char> chars, IBufferWriter<byte> writer)
+        {
+            if (encoding is null)
+            {
+                throw new ArgumentNullException(nameof(encoding));
+            }
+
+            if (writer is null)
+            {
+                throw new ArgumentNullException(nameof(writer));
+            }
+
+            if (chars.Length <= MaxInputElementsPerIteration)
+            {
+                // The input span is small enough where we can one-shot this.
+
+                int byteCount = encoding.GetByteCount(chars);
+                Span<byte> scratchBuffer = writer.GetSpan(byteCount);
+
+                int actualBytesWritten = encoding.GetBytes(chars, scratchBuffer);
+
+                writer.Advance(actualBytesWritten);
+                return actualBytesWritten;
+            }
+            else
+            {
+                // Allocate a stateful Encoder instance and chunk this.
+
+                Convert(encoding.GetEncoder(), chars, writer, flush: true, out long totalBytesWritten, out bool completed);
+                return totalBytesWritten;
+            }
+        }
+
+        /// <summary>
+        /// Decodes the specified <see cref="ReadOnlySequence{Char}"/> to <see langword="byte"/>s using the specified <see cref="Encoding"/>
+        /// and writes the result to <paramref name="writer"/>.
+        /// </summary>
+        /// <param name="encoding">The <see cref="Encoding"/> which represents how the data in <paramref name="chars"/> should be encoded.</param>
+        /// <param name="chars">The <see cref="ReadOnlySequence{Char}"/> whose contents should be encoded.</param>
+        /// <param name="writer">The buffer to which the encoded bytes will be written.</param>
+        /// <returns>The number of bytes written to <paramref name="writer"/>.</returns>
+        /// <exception cref="EncoderFallbackException">Thrown if <paramref name="chars"/> contains data that cannot be encoded and <paramref name="encoding"/> is configured
+        /// to throw an exception when such data is seen.</exception>
+        public static long GetBytes(this Encoding encoding, in ReadOnlySequence<char> chars, IBufferWriter<byte> writer)
+        {
+            if (encoding is null)
+            {
+                throw new ArgumentNullException(nameof(encoding));
+            }
+
+            if (writer is null)
+            {
+                throw new ArgumentNullException(nameof(writer));
+            }
+
+            // Delegate to the Span-based method if possible.
+            // If that doesn't work, allocate the Encoder instance and run a loop.
+
+            if (chars.IsSingleSegment)
+            {
+                return GetBytes(encoding, chars.FirstSpan, writer);
+            }
+            else
+            {
+                Convert(encoding.GetEncoder(), chars, writer, flush: true, out long bytesWritten, out bool completed);
+                return bytesWritten;
+            }
+        }
+
+        /// <summary>
+        /// Encodes the specified <see cref="ReadOnlySequence{Char}"/> to <see langword="byte"/>s using the specified <see cref="Encoding"/>
+        /// and outputs the result to <paramref name="bytes"/>.
+        /// </summary>
+        /// <param name="encoding">The <see cref="Encoding"/> which represents how the data in <paramref name="chars"/> should be encoded.</param>
+        /// <param name="chars">The <see cref="ReadOnlySequence{Char}"/> to encode to <see langword="byte"/>s.</param>
+        /// <param name="bytes">The destination buffer to which the encoded bytes will be written.</param>
+        /// <returns>The number of bytes written to <paramref name="bytes"/>.</returns>
+        /// <exception cref="ArgumentException">Thrown if <paramref name="bytes"/> is not large enough to contain the encoded form of <paramref name="chars"/>.</exception>
+        /// <exception cref="EncoderFallbackException">Thrown if <paramref name="chars"/> contains data that cannot be encoded and <paramref name="encoding"/> is configured
+        /// to throw an exception when such data is seen.</exception>
+        public static int GetBytes(this Encoding encoding, in ReadOnlySequence<char> chars, Span<byte> bytes)
+        {
+            if (encoding is null)
+            {
+                throw new ArgumentNullException(nameof(encoding));
+            }
+
+            if (chars.IsSingleSegment)
+            {
+                // If the incoming sequence is single-segment, one-shot this.
+
+                return encoding.GetBytes(chars.FirstSpan, bytes);
+            }
+            else
+            {
+                // If the incoming sequence is multi-segment, create a stateful Encoder
+                // and use it as the workhorse. On the final iteration we'll pass flush=true.
+
+                ReadOnlySequence<char> remainingChars = chars;
+                int originalBytesLength = bytes.Length;
+                Encoder encoder = encoding.GetEncoder();
+                bool isFinalSegment;
+
+                do
+                {
+                    remainingChars.GetFirstSpan(out ReadOnlySpan<char> firstSpan, out SequencePosition next);
+                    isFinalSegment = remainingChars.IsSingleSegment;
+
+                    int bytesWrittenJustNow = encoder.GetBytes(firstSpan, bytes, flush: isFinalSegment);
+                    bytes = bytes.Slice(bytesWrittenJustNow);
+                    remainingChars = remainingChars.Slice(next);
+                } while (!isFinalSegment);
+
+                return originalBytesLength - bytes.Length; // total number of bytes we wrote
+            }
+        }
+
+        /// <summary>
+        /// Encodes the specified <see cref="ReadOnlySequence{Char}"/> into a <see cref="byte"/> array using the specified <see cref="Encoding"/>.
+        /// </summary>
+        /// <param name="encoding">The <see cref="Encoding"/> which represents how the data in <paramref name="chars"/> should be encoded.</param>
+        /// <param name="chars">The <see cref="ReadOnlySequence{Char}"/> to encode to <see langword="byte"/>s.</param>
+        /// <returns>A <see cref="byte"/> array which represents the encoded contents of <paramref name="chars"/>.</returns>
+        /// <exception cref="EncoderFallbackException">Thrown if <paramref name="chars"/> contains data that cannot be encoded and <paramref name="encoding"/> is configured
+        /// to throw an exception when such data is seen.</exception>
+        public static byte[] GetBytes(this Encoding encoding, in ReadOnlySequence<char> chars)
+        {
+            if (encoding is null)
+            {
+                throw new ArgumentNullException(nameof(encoding));
+            }
+
+            if (chars.IsSingleSegment)
+            {
+                // If the incoming sequence is single-segment, one-shot this.
+
+                ReadOnlySpan<char> span = chars.FirstSpan;
+
+                byte[] retVal = new byte[encoding.GetByteCount(span)];
+                encoding.GetBytes(span, retVal);
+                return retVal;
+            }
+            else
+            {
+                // If the incoming sequence is multi-segment, create a stateful Encoder
+                // and use it as the workhorse. On the final iteration we'll pass flush=true.
+
+                Encoder encoder = encoding.GetEncoder();
+
+                // Maintain a list of all the segments we'll need to concat together.
+                // These will be released back to the pool at the end of the method.
+
+                List<(byte[], int)> listOfSegments = new List<(byte[], int)>();
+                int totalByteCount = 0;
+
+                ReadOnlySequence<char> remainingChars = chars;
+                bool isFinalSegment;
+
+                do
+                {
+                    remainingChars.GetFirstSpan(out ReadOnlySpan<char> firstSpan, out SequencePosition next);
+                    isFinalSegment = remainingChars.IsSingleSegment;
+
+                    int byteCountThisIteration = encoder.GetByteCount(firstSpan, flush: isFinalSegment);
+                    byte[] rentedArray = ArrayPool<byte>.Shared.Rent(byteCountThisIteration);
+                    int actualBytesWrittenThisIteration = encoder.GetBytes(firstSpan, rentedArray, flush: isFinalSegment); // could throw ArgumentException if overflow would occur
+                    listOfSegments.Add((rentedArray, actualBytesWrittenThisIteration));
+
+                    totalByteCount += actualBytesWrittenThisIteration;
+                    if (totalByteCount < 0)
+                    {
+                        // If we overflowed, call the array ctor, passing int.MaxValue.
+                        // This will end up throwing the expected OutOfMemoryException
+                        // since arrays are limited to under int.MaxValue elements in length.
+
+                        totalByteCount = int.MaxValue;
+                        break;
+                    }
+
+                    remainingChars = remainingChars.Slice(next);
+                } while (!isFinalSegment);
+
+                // Now build up the byte[] to return, then release all of our scratch buffers
+                // back to the shared pool.
+
+                byte[] retVal = new byte[totalByteCount];
+                Span<byte> remainingBytes = retVal;
+
+                foreach ((byte[] array, int length) in listOfSegments)
+                {
+                    array.AsSpan(0, length).CopyTo(remainingBytes);
+                    ArrayPool<byte>.Shared.Return(array);
+                    remainingBytes = remainingBytes.Slice(length);
+                }
+
+                Debug.Assert(remainingBytes.IsEmpty, "Over-allocated the byte[] instance?");
+
+                return retVal;
+            }
+        }
+
+        /// <summary>
+        /// Decodes the specified <see cref="ReadOnlySpan{Byte}"/> to <see langword="char"/>s using the specified <see cref="Encoding"/>
+        /// and writes the result to <paramref name="writer"/>.
+        /// </summary>
+        /// <param name="encoding">The <see cref="Encoding"/> which represents how the data in <paramref name="bytes"/> should be decoded.</param>
+        /// <param name="bytes">The <see cref="ReadOnlySpan{Byte}"/> whose bytes should be decoded.</param>
+        /// <param name="writer">The buffer to which the decoded chars will be written.</param>
+        /// <returns>The number of chars written to <paramref name="writer"/>.</returns>
+        /// <exception cref="DecoderFallbackException">Thrown if <paramref name="bytes"/> contains data that cannot be decoded and <paramref name="encoding"/> is configured
+        /// to throw an exception when such data is seen.</exception>
+        public static long GetChars(this Encoding encoding, ReadOnlySpan<byte> bytes, IBufferWriter<char> writer)
+        {
+            if (encoding is null)
+            {
+                throw new ArgumentNullException(nameof(encoding));
+            }
+
+            if (writer is null)
+            {
+                throw new ArgumentNullException(nameof(writer));
+            }
+
+            if (bytes.Length <= MaxInputElementsPerIteration)
+            {
+                // The input span is small enough where we can one-shot this.
+
+                int charCount = encoding.GetCharCount(bytes);
+                Span<char> scratchBuffer = writer.GetSpan(charCount);
+
+                int actualCharsWritten = encoding.GetChars(bytes, scratchBuffer);
+
+                writer.Advance(actualCharsWritten);
+                return actualCharsWritten;
+            }
+            else
+            {
+                // Allocate a stateful Decoder instance and chunk this.
+
+                Convert(encoding.GetDecoder(), bytes, writer, flush: true, out long totalCharsWritten, out bool completed);
+                return totalCharsWritten;
+            }
+        }
+
+        /// <summary>
+        /// Decodes the specified <see cref="ReadOnlySequence{Byte}"/> to <see langword="char"/>s using the specified <see cref="Encoding"/>
+        /// and writes the result to <paramref name="writer"/>.
+        /// </summary>
+        /// <param name="encoding">The <see cref="Encoding"/> which represents how the data in <paramref name="bytes"/> should be decoded.</param>
+        /// <param name="bytes">The <see cref="ReadOnlySequence{Byte}"/> whose bytes should be decoded.</param>
+        /// <param name="writer">The buffer to which the decoded chars will be written.</param>
+        /// <returns>The number of chars written to <paramref name="writer"/>.</returns>
+        /// <exception cref="DecoderFallbackException">Thrown if <paramref name="bytes"/> contains data that cannot be decoded and <paramref name="encoding"/> is configured
+        /// to throw an exception when such data is seen.</exception>
+        public static long GetChars(this Encoding encoding, in ReadOnlySequence<byte> bytes, IBufferWriter<char> writer)
+        {
+            if (encoding is null)
+            {
+                throw new ArgumentNullException(nameof(encoding));
+            }
+
+            if (writer is null)
+            {
+                throw new ArgumentNullException(nameof(writer));
+            }
+
+            // Delegate to the Span-based method if possible.
+            // If that doesn't work, allocate the Encoder instance and run a loop.
+
+            if (bytes.IsSingleSegment)
+            {
+                return GetChars(encoding, bytes.FirstSpan, writer);
+            }
+            else
+            {
+                Convert(encoding.GetDecoder(), bytes, writer, flush: true, out long charsWritten, out bool completed);
+                return charsWritten;
+            }
+        }
+
+        /// <summary>
+        /// Decodes the specified <see cref="ReadOnlySequence{Byte}"/> to <see langword="char"/>s using the specified <see cref="Encoding"/>
+        /// and outputs the result to <paramref name="chars"/>.
+        /// </summary>
+        /// <param name="encoding">The <see cref="Encoding"/> which represents how the data in <paramref name="bytes"/> is encoded.</param>
+        /// <param name="bytes">The <see cref="ReadOnlySequence{Byte}"/> to decode to characters.</param>
+        /// <param name="chars">The destination buffer to which the decoded characters will be written.</param>
+        /// <returns>The number of chars written to <paramref name="chars"/>.</returns>
+        /// <exception cref="ArgumentException">Thrown if <paramref name="chars"/> is not large enough to contain the encoded form of <paramref name="bytes"/>.</exception>
+        /// <exception cref="DecoderFallbackException">Thrown if <paramref name="bytes"/> contains data that cannot be decoded and <paramref name="encoding"/> is configured
+        /// to throw an exception when such data is seen.</exception>
+        public static int GetChars(this Encoding encoding, in ReadOnlySequence<byte> bytes, Span<char> chars)
+        {
+            if (encoding is null)
+            {
+                throw new ArgumentNullException(nameof(encoding));
+            }
+
+            if (bytes.IsSingleSegment)
+            {
+                // If the incoming sequence is single-segment, one-shot this.
+
+                return encoding.GetChars(bytes.FirstSpan, chars);
+            }
+            else
+            {
+                // If the incoming sequence is multi-segment, create a stateful Decoder
+                // and use it as the workhorse. On the final iteration we'll pass flush=true.
+
+                ReadOnlySequence<byte> remainingBytes = bytes;
+                int originalCharsLength = chars.Length;
+                Decoder decoder = encoding.GetDecoder();
+                bool isFinalSegment;
+
+                do
+                {
+                    remainingBytes.GetFirstSpan(out ReadOnlySpan<byte> firstSpan, out SequencePosition next);
+                    isFinalSegment = remainingBytes.IsSingleSegment;
+
+                    int charsWrittenJustNow = decoder.GetChars(firstSpan, chars, flush: isFinalSegment);
+                    chars = chars.Slice(charsWrittenJustNow);
+                    remainingBytes = remainingBytes.Slice(next);
+                } while (!isFinalSegment);
+
+                return originalCharsLength - chars.Length; // total number of chars we wrote
+            }
+        }
+
+        /// <summary>
+        /// Decodes the specified <see cref="ReadOnlySequence{Byte}"/> into a <see cref="string"/> using the specified <see cref="Encoding"/>.
+        /// </summary>
+        /// <param name="encoding">The <see cref="Encoding"/> which represents how the data in <paramref name="bytes"/> is encoded.</param>
+        /// <param name="bytes">The <see cref="ReadOnlySequence{Byte}"/> to decode into characters.</param>
+        /// <returns>A <see cref="string"/> which represents the decoded contents of <paramref name="bytes"/>.</returns>
+        /// <exception cref="DecoderFallbackException">Thrown if <paramref name="bytes"/> contains data that cannot be decoded and <paramref name="encoding"/> is configured
+        /// to throw an exception when such data is seen.</exception>
+        public static string GetString(this Encoding encoding, in ReadOnlySequence<byte> bytes)
+        {
+            if (encoding is null)
+            {
+                throw new ArgumentNullException(nameof(encoding));
+            }
+
+            if (bytes.IsSingleSegment)
+            {
+                // If the incoming sequence is single-segment, one-shot this.
+
+                return encoding.GetString(bytes.FirstSpan);
+            }
+            else
+            {
+                // If the incoming sequence is multi-segment, create a stateful Decoder
+                // and use it as the workhorse. On the final iteration we'll pass flush=true.
+
+                Decoder decoder = encoding.GetDecoder();
+
+                // Maintain a list of all the segments we'll need to concat together.
+                // These will be released back to the pool at the end of the method.
+
+                List<(char[], int)> listOfSegments = new List<(char[], int)>();
+                int totalCharCount = 0;
+
+                ReadOnlySequence<byte> remainingBytes = bytes;
+                bool isFinalSegment;
+
+                do
+                {
+                    remainingBytes.GetFirstSpan(out ReadOnlySpan<byte> firstSpan, out SequencePosition next);
+                    isFinalSegment = remainingBytes.IsSingleSegment;
+
+                    int charCountThisIteration = decoder.GetCharCount(firstSpan, flush: isFinalSegment); // could throw ArgumentException if overflow would occur
+                    char[] rentedArray = ArrayPool<char>.Shared.Rent(charCountThisIteration);
+                    int actualCharsWrittenThisIteration = decoder.GetChars(firstSpan, rentedArray, flush: isFinalSegment);
+                    listOfSegments.Add((rentedArray, actualCharsWrittenThisIteration));
+
+                    totalCharCount += actualCharsWrittenThisIteration;
+                    if (totalCharCount < 0)
+                    {
+                        // If we overflowed, call string.Create, passing int.MaxValue.
+                        // This will end up throwing the expected OutOfMemoryException
+                        // since strings are limited to under int.MaxValue elements in length.
+
+                        totalCharCount = int.MaxValue;
+                        break;
+                    }
+
+                    remainingBytes = remainingBytes.Slice(next);
+                } while (!isFinalSegment);
+
+                // Now build up the string to return, then release all of our scratch buffers
+                // back to the shared pool.
+
+                return string.Create(totalCharCount, listOfSegments, (span, listOfSegments) =>
+                {
+                    foreach ((char[] array, int length) in listOfSegments)
+                    {
+                        array.AsSpan(0, length).CopyTo(span);
+                        ArrayPool<char>.Shared.Return(array);
+                        span = span.Slice(length);
+                    }
+
+                    Debug.Assert(span.IsEmpty, "Over-allocated the string instance?");
+                });
+            }
+        }
+
+        /// <summary>
+        /// Converts a <see cref="ReadOnlySpan{Char}"/> to bytes using <paramref name="encoder"/> and writes the result to <paramref name="writer"/>.
+        /// </summary>
+        /// <param name="encoder">The <see cref="Encoder"/> instance which can convert <see langword="char"/>s to <see langword="byte"/>s.</param>
+        /// <param name="chars">A sequence of characters to encode.</param>
+        /// <param name="writer">The buffer to which the encoded bytes will be written.</param>
+        /// <param name="flush"><see langword="true"/> to indicate no further data is to be converted; otherwise <see langword="false"/>.</param>
+        /// <param name="bytesUsed">When this method returns, contains the count of <see langword="byte"/>s which were written to <paramref name="writer"/>.</param>
+        /// <param name="completed">
+        /// When this method returns, contains <see langword="true"/> if <paramref name="encoder"/> contains no partial internal state; otherwise, <see langword="false"/>.
+        /// If <paramref name="flush"/> is <see langword="true"/>, this will always be set to <see langword="true"/> when the method returns.
+        /// </param>
+        /// <exception cref="EncoderFallbackException">Thrown if <paramref name="chars"/> contains data that cannot be encoded and <paramref name="encoder"/> is configured
+        /// to throw an exception when such data is seen.</exception>
+        public static void Convert(this Encoder encoder, ReadOnlySpan<char> chars, IBufferWriter<byte> writer, bool flush, out long bytesUsed, out bool completed)
+        {
+            if (encoder is null)
+            {
+                throw new ArgumentNullException(nameof(encoder));
+            }
+
+            if (writer is null)
+            {
+                throw new ArgumentNullException(nameof(writer));
+            }
+
+            // We need to perform at least one iteration of the loop since the encoder could have internal state.
+
+            long totalBytesWritten = 0;
+
+            do
+            {
+                // If our remaining input is very large, instead truncate it and tell the encoder
+                // that there'll be more data after this call. This truncation is only for the
+                // purposes of getting the required byte count. Since the writer may give us a span
+                // larger than what we asked for, we'll pass the entirety of the remaining data
+                // to the transcoding routine, since it may be able to make progress beyond what
+                // was initially computed for the truncated input data.
+
+                int byteCountForThisSlice = (chars.Length <= MaxInputElementsPerIteration)
+                  ? encoder.GetByteCount(chars, flush)
+                  : encoder.GetByteCount(chars.Slice(0, MaxInputElementsPerIteration), flush: false /* this isn't the end of the data */);
+
+                Span<byte> scratchBuffer = writer.GetSpan(byteCountForThisSlice);
+
+                encoder.Convert(chars, scratchBuffer, flush, out int charsUsedJustNow, out int bytesWrittenJustNow, out completed);
+
+                chars = chars.Slice(charsUsedJustNow);
+                writer.Advance(bytesWrittenJustNow);
+                totalBytesWritten += bytesWrittenJustNow;
+            } while (!chars.IsEmpty);
+
+            bytesUsed = totalBytesWritten;
+        }
+
+        /// <summary>
+        /// Converts a <see cref="ReadOnlySequence{Char}"/> to encoded bytes and writes the result to <paramref name="writer"/>.
+        /// </summary>
+        /// <param name="encoder">The <see cref="Encoder"/> instance which can convert <see langword="char"/>s to <see langword="byte"/>s.</param>
+        /// <param name="chars">A sequence of characters to encode.</param>
+        /// <param name="writer">The buffer to which the encoded bytes will be written.</param>
+        /// <param name="flush"><see langword="true"/> to indicate no further data is to be converted; otherwise <see langword="false"/>.</param>
+        /// <param name="bytesUsed">When this method returns, contains the count of <see langword="byte"/>s which were written to <paramref name="writer"/>.</param>
+        /// <param name="completed">When this method returns, contains <see langword="true"/> if all input up until <paramref name="bytesUsed"/> was
+        /// converted; otherwise, <see langword="false"/>. If <paramref name="flush"/> is <see langword="true"/>, this will always be set to
+        /// <see langword="true"/> when the method returns.</param>
+        /// <exception cref="EncoderFallbackException">Thrown if <paramref name="chars"/> contains data that cannot be encoded and <paramref name="encoder"/> is configured
+        /// to throw an exception when such data is seen.</exception>
+        public static void Convert(this Encoder encoder, in ReadOnlySequence<char> chars, IBufferWriter<byte> writer, bool flush, out long bytesUsed, out bool completed)
+        {
+            // Parameter null checks will be performed by the workhorse routine.
+
+            ReadOnlySequence<char> remainingChars = chars;
+            long totalBytesWritten = 0;
+            bool isFinalSegment;
+
+            do
+            {
+                // Process each segment individually. We need to run at least one iteration of the loop in case
+                // the Encoder has internal state.
+
+                remainingChars.GetFirstSpan(out ReadOnlySpan<char> firstSpan, out SequencePosition next);
+                isFinalSegment = remainingChars.IsSingleSegment;
+
+                Convert(encoder, firstSpan, writer, flush && isFinalSegment, out long bytesWrittenThisIteration, out completed);
+
+                totalBytesWritten += bytesWrittenThisIteration;
+                remainingChars = remainingChars.Slice(next);
+            } while (!isFinalSegment);
+
+            bytesUsed = totalBytesWritten;
+        }
+
+        /// <summary>
+        /// Converts a <see cref="ReadOnlySpan{Byte}"/> to chars using <paramref name="decoder"/> and writes the result to <paramref name="writer"/>.
+        /// </summary>
+        /// <param name="decoder">The <see cref="Decoder"/> instance which can convert <see langword="byte"/>s to <see langword="char"/>s.</param>
+        /// <param name="bytes">A sequence of bytes to decode.</param>
+        /// <param name="writer">The buffer to which the decoded chars will be written.</param>
+        /// <param name="flush"><see langword="true"/> to indicate no further data is to be converted; otherwise <see langword="false"/>.</param>
+        /// <param name="charsUsed">When this method returns, contains the count of <see langword="char"/>s which were written to <paramref name="writer"/>.</param>
+        /// <param name="completed">
+        /// When this method returns, contains <see langword="true"/> if <paramref name="decoder"/> contains no partial internal state; otherwise, <see langword="false"/>.
+        /// If <paramref name="flush"/> is <see langword="true"/>, this will always be set to <see langword="true"/> when the method returns.
+        /// </param>
+        /// <exception cref="DecoderFallbackException">Thrown if <paramref name="bytes"/> contains data that cannot be encoded and <paramref name="decoder"/> is configured
+        /// to throw an exception when such data is seen.</exception>
+        public static void Convert(this Decoder decoder, ReadOnlySpan<byte> bytes, IBufferWriter<char> writer, bool flush, out long charsUsed, out bool completed)
+        {
+            if (decoder is null)
+            {
+                throw new ArgumentNullException(nameof(decoder));
+            }
+
+            if (writer is null)
+            {
+                throw new ArgumentNullException(nameof(writer));
+            }
+
+            // We need to perform at least one iteration of the loop since the decoder could have internal state.
+
+            long totalCharsWritten = 0;
+
+            do
+            {
+                // If our remaining input is very large, instead truncate it and tell the decoder
+                // that there'll be more data after this call. This truncation is only for the
+                // purposes of getting the required char count. Since the writer may give us a span
+                // larger than what we asked for, we'll pass the entirety of the remaining data
+                // to the transcoding routine, since it may be able to make progress beyond what
+                // was initially computed for the truncated input data.
+
+                int charCountForThisSlice = (bytes.Length <= MaxInputElementsPerIteration)
+                    ? decoder.GetCharCount(bytes, flush)
+                    : decoder.GetCharCount(bytes.Slice(0, MaxInputElementsPerIteration), flush: false /* this isn't the end of the data */);
+
+                Span<char> scratchBuffer = writer.GetSpan(charCountForThisSlice);
+
+                decoder.Convert(bytes, scratchBuffer, flush, out int bytesUsedJustNow, out int charsWrittenJustNow, out completed);
+
+                bytes = bytes.Slice(bytesUsedJustNow);
+                writer.Advance(charsWrittenJustNow);
+                totalCharsWritten += charsWrittenJustNow;
+            } while (!bytes.IsEmpty);
+
+            charsUsed = totalCharsWritten;
+        }
+
+        /// <summary>
+        /// Converts a <see cref="ReadOnlySequence{Byte}"/> to UTF-16 encoded characters and writes the result to <paramref name="writer"/>.
+        /// </summary>
+        /// <param name="decoder">The <see cref="Decoder"/> instance which can convert <see langword="byte"/>s to <see langword="char"/>s.</param>
+        /// <param name="bytes">A sequence of bytes to decode.</param>
+        /// <param name="writer">The buffer to which the decoded characters will be written.</param>
+        /// <param name="flush"><see langword="true"/> to indicate no further data is to be converted; otherwise <see langword="false"/>.</param>
+        /// <param name="charsUsed">When this method returns, contains the count of <see langword="char"/>s which were written to <paramref name="writer"/>.</param>
+        /// <param name="completed">
+        /// When this method returns, contains <see langword="true"/> if <paramref name="decoder"/> contains no partial internal state; otherwise, <see langword="false"/>.
+        /// If <paramref name="flush"/> is <see langword="true"/>, this will always be set to <see langword="true"/> when the method returns.
+        /// </param>
+        /// <exception cref="DecoderFallbackException">Thrown if <paramref name="bytes"/> contains data that cannot be decoded and <paramref name="decoder"/> is configured
+        /// to throw an exception when such data is seen.</exception>
+        public static void Convert(this Decoder decoder, in ReadOnlySequence<byte> bytes, IBufferWriter<char> writer, bool flush, out long charsUsed, out bool completed)
+        {
+            // Parameter null checks will be performed by the workhorse routine.
+
+            ReadOnlySequence<byte> remainingBytes = bytes;
+            long totalCharsWritten = 0;
+            bool isFinalSegment;
+
+            do
+            {
+                // Process each segment individually. We need to run at least one iteration of the loop in case
+                // the Decoder has internal state.
+
+                remainingBytes.GetFirstSpan(out ReadOnlySpan<byte> firstSpan, out SequencePosition next);
+                isFinalSegment = remainingBytes.IsSingleSegment;
+
+                Convert(decoder, firstSpan, writer, flush && isFinalSegment, out long charsWrittenThisIteration, out completed);
+
+                totalCharsWritten += charsWrittenThisIteration;
+                remainingBytes = remainingBytes.Slice(next);
+            } while (!isFinalSegment);
+
+            charsUsed = totalCharsWritten;
+        }
+    }
+}
diff --git a/src/libraries/System.Memory/tests/EncodingExtensions/EncodingExtensionsTests.cs b/src/libraries/System.Memory/tests/EncodingExtensions/EncodingExtensionsTests.cs

new file mode 100644 (file)

index 0000000..3760ca3
--- /dev/null
+++ b/src/libraries/System.Memory/tests/EncodingExtensions/EncodingExtensionsTests.cs
@@ -0,0 +1,654 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Buffers;
+using System.Collections.Generic;
+using System.Linq;
+using System.Memory.Tests.SequenceReader;
+using Xunit;
+
+namespace System.Text.Tests
+{
+    public class EncodingExtensionsTests
+    {
+        private static readonly char[] AllScalarsAsUtf16 = CreateAllScalarsAsUtf16(); // 2,160,640 chars
+        private static readonly byte[] AllScalarsAsUtf8 = Encoding.UTF8.GetBytes(AllScalarsAsUtf16); // 4,382,592 bytes
+
+        private static char[] CreateAllScalarsAsUtf16()
+        {
+            List<char> list = new List<char>(2_160_640);
+
+            // Add U+0000 .. U+D7FF
+
+            for (int i = 0; i < 0xD800; i++)
+            {
+                list.Add((char)i);
+            }
+
+            // Add U+E000 .. U+10FFFF
+
+            Span<char> scratch = stackalloc char[2]; // max UTF-16 sequence length
+            for (int i = 0xE000; i <= 0x10FFFF; i++)
+            {
+                foreach (char ch in scratch.Slice(0, new Rune(i).EncodeToUtf16(scratch)))
+                {
+                    list.Add(ch);
+                }
+            }
+
+            char[] allScalarsAsChars = list.ToArray();
+
+            //  U+0000 ..   U+D7FF =     55,296 1-char sequences
+            //  U+E000 ..   U+FFFF =     8,192 1-char sequences
+            // U+10000 .. U+10FFFF = 1,048,576 2-char sequences
+            //               total = 2,160,640 chars to encode all scalars as UTF-16
+            //
+            //  U+0000 ..   U+007F =       128 1-byte sequences
+            //  U+0080 ..   U+07FF =     1,920 2-byte sequences
+            //  U+0800 ..   U+D7FF =    53,247 3-byte sequences
+            //  U+E000 ..   U+FFFF =     8,192 3-byte sequences
+            // U+10000 .. U+10FFFF = 1,048,576 4-byte sequences
+            //               total = 4,382,592 bytes to encode all scalars as UTF-8
+
+            Assert.Equal(2_160_640, allScalarsAsChars.Length);
+            Assert.Equal(4_382_592, Encoding.UTF8.GetByteCount(allScalarsAsChars));
+
+            return allScalarsAsChars;
+        }
+
+        [Fact]
+        public static void Convert_Decoder_ReadOnlySpan_IBufferWriter_ParamChecks()
+        {
+            Decoder decoder = Encoding.UTF8.GetDecoder();
+            IBufferWriter<char> writer = new ArrayBufferWriter<char>();
+
+            Assert.Throws<ArgumentNullException>("decoder", () => EncodingExtensions.Convert((Decoder)null, ReadOnlySpan<byte>.Empty, writer, true, out _, out _));
+            Assert.Throws<ArgumentNullException>("writer", () => EncodingExtensions.Convert(decoder, ReadOnlySpan<byte>.Empty, (IBufferWriter<char>)null, true, out _, out _));
+        }
+
+        [Fact]
+        public static void Convert_Decoder_ReadOnlySpan_IBufferWriter()
+        {
+            Decoder decoder = Encoding.UTF8.GetDecoder();
+            ArrayBufferWriter<char> writer = new ArrayBufferWriter<char>();
+
+            // First, a small input with no flushing and no leftover data.
+
+            ReadOnlySpan<byte> inputData = Encoding.UTF8.GetBytes("Hello");
+            EncodingExtensions.Convert(decoder, inputData, writer, flush: false, out long charsUsed, out bool completed);
+            Assert.Equal(5, charsUsed);
+            Assert.True(completed);
+
+            // Then, a large input with no flushing and leftover data.
+
+            inputData = Encoding.UTF8.GetBytes(new string('x', 20_000_000)).Concat(new byte[] { 0xE0, 0xA0 }).ToArray();
+            EncodingExtensions.Convert(decoder, inputData, writer, flush: false, out charsUsed, out completed);
+            Assert.Equal(20_000_000, charsUsed);
+            Assert.False(completed);
+
+            // Then, a large input with flushing and leftover data (should be replaced).
+
+            inputData = new byte[] { 0x80 }.Concat(Encoding.UTF8.GetBytes(new string('x', 20_000_000))).Concat(new byte[] { 0xE0 }).ToArray();
+            EncodingExtensions.Convert(decoder, inputData, writer, flush: true, out charsUsed, out completed);
+            Assert.Equal(20_000_002, charsUsed); // 1 for leftover at beginning, 1 for replacement at end
+            Assert.True(completed);
+
+            // Now make sure all of the data was decoded properly.
+
+            Assert.Equal(
+                expected: "Hello" + new string('x', 20_000_000) + '\u0800' + new string('x', 20_000_000) + '\ufffd',
+                actual: writer.WrittenSpan.ToString());
+        }
+
+        [Fact]
+        public static void Convert_Decoder_ReadOnlySequence_IBufferWriter_ParamChecks()
+        {
+            Decoder decoder = Encoding.UTF8.GetDecoder();
+            IBufferWriter<char> writer = new ArrayBufferWriter<char>();
+
+            Assert.Throws<ArgumentNullException>("decoder", () => EncodingExtensions.Convert((Decoder)null, ReadOnlySequence<byte>.Empty, writer, true, out _, out _));
+            Assert.Throws<ArgumentNullException>("writer", () => EncodingExtensions.Convert(decoder, ReadOnlySequence<byte>.Empty, (IBufferWriter<char>)null, true, out _, out _));
+        }
+
+        [Fact]
+        public static void Convert_Decoder_ReadOnlySequence_IBufferWriter()
+        {
+            Decoder decoder = Encoding.UTF8.GetDecoder();
+            ArrayBufferWriter<char> writer = new ArrayBufferWriter<char>();
+
+            // First, input with no flushing and no leftover data.
+
+            ReadOnlySequence<byte> inputData = SequenceFactory.Create(
+                new byte[] { 0x20 }, // U+0020
+                new byte[] { 0x61, 0xC2 }, // U+0061 and U+0080 (continues on next line)
+                new byte[] { 0x80, 0xED, 0x9F, 0xBF }); // (cont.) + U+D7FF
+            EncodingExtensions.Convert(decoder, inputData, writer, flush: false, out long charsUsed, out bool completed);
+            Assert.Equal(4, charsUsed);
+            Assert.True(completed);
+
+            // Then, input with no flushing and leftover data.
+
+            inputData = SequenceFactory.Create(
+                new byte[] { 0xF4, 0x80 }); // U+100000 (continues on next line)
+            EncodingExtensions.Convert(decoder, inputData, writer, flush: false, out charsUsed, out completed);
+            Assert.Equal(0, charsUsed);
+            Assert.False(completed);
+
+            // Then, input with flushing and leftover data (should be replaced).
+
+            inputData = SequenceFactory.Create(
+                new byte[] { 0x80, 0x80 }, // (cont.)
+                new byte[] { 0xC2 }); // leftover data (should be replaced)
+            EncodingExtensions.Convert(decoder, inputData, writer, flush: true, out charsUsed, out completed);
+            Assert.Equal(3, charsUsed);
+            Assert.True(completed);
+
+            // Now make sure all of the data was decoded properly.
+
+            Assert.Equal("\u0020\u0061\u0080\ud7ff\U00100000\ufffd", writer.WrittenSpan.ToString());
+        }
+
+        [Fact]
+        public static void Convert_Encoder_ReadOnlySpan_IBufferWriter_ParamChecks()
+        {
+            Encoder encoder = Encoding.UTF8.GetEncoder();
+            IBufferWriter<byte> writer = new ArrayBufferWriter<byte>();
+
+            Assert.Throws<ArgumentNullException>("encoder", () => EncodingExtensions.Convert((Encoder)null, ReadOnlySpan<char>.Empty, writer, true, out _, out _));
+            Assert.Throws<ArgumentNullException>("writer", () => EncodingExtensions.Convert(encoder, ReadOnlySpan<char>.Empty, (IBufferWriter<byte>)null, true, out _, out _));
+        }
+
+        [Fact]
+        public static void Convert_Encoder_ReadOnlySpan_IBufferWriter()
+        {
+            Encoder encoder = Encoding.UTF8.GetEncoder();
+            ArrayBufferWriter<byte> writer = new ArrayBufferWriter<byte>();
+
+            // First, a small input with no flushing and no leftover data.
+
+            ReadOnlySpan<char> inputData = "Hello";
+            EncodingExtensions.Convert(encoder, inputData, writer, flush: false, out long bytesUsed, out bool completed);
+            Assert.Equal(5, bytesUsed);
+            Assert.True(completed);
+
+            // Then, a large input with no flushing and leftover data.
+
+            inputData = new string('x', 20_000_000) + '\ud800';
+            EncodingExtensions.Convert(encoder, inputData, writer, flush: false, out bytesUsed, out completed);
+            Assert.Equal(20_000_000, bytesUsed);
+            Assert.False(completed);
+
+            // Then, a large input with flushing and leftover data (should be replaced).
+
+            inputData = '\udc00' + new string('x', 20_000_000) + '\ud800';
+            EncodingExtensions.Convert(encoder, inputData, writer, flush: true, out bytesUsed, out completed);
+            Assert.Equal(20_000_007, bytesUsed); // 4 for supplementary at beginning, 3 for replacement at end
+            Assert.True(completed);
+
+            // Now make sure all of the data was encoded properly.
+            // Use SequenceEqual instead of Assert.Equal for perf.
+
+            Assert.True(
+                Encoding.UTF8.GetBytes("Hello" + new string('x', 20_000_000) + "\U00010000" + new string('x', 20_000_000) + '\ufffd').AsSpan().SequenceEqual(writer.WrittenSpan));
+        }
+
+        [Fact]
+        public static void Convert_Encoder_ReadOnlySequence_IBufferWriter_ParamChecks()
+        {
+            Encoder encoder = Encoding.UTF8.GetEncoder();
+            IBufferWriter<byte> writer = new ArrayBufferWriter<byte>();
+
+            Assert.Throws<ArgumentNullException>("encoder", () => EncodingExtensions.Convert((Encoder)null, ReadOnlySequence<char>.Empty, writer, true, out _, out _));
+            Assert.Throws<ArgumentNullException>("writer", () => EncodingExtensions.Convert(encoder, ReadOnlySequence<char>.Empty, (IBufferWriter<byte>)null, true, out _, out _));
+        }
+
+        [Fact]
+        public static void Convert_Encoder_ReadOnlySequence_IBufferWriter()
+        {
+            Encoder encoder = Encoding.UTF8.GetEncoder();
+            ArrayBufferWriter<byte> writer = new ArrayBufferWriter<byte>();
+
+            // First, input with no flushing and no leftover data.
+
+            ReadOnlySequence<char> inputData = SequenceFactory.Create(
+                new char[] { '\u0020' }, // U+0020
+                new char[] { '\ud7ff' }); // U+D7FF
+            EncodingExtensions.Convert(encoder, inputData, writer, flush: false, out long bytesUsed, out bool completed);
+            Assert.Equal(4, bytesUsed);
+            Assert.True(completed);
+
+            // Then, input with no flushing and leftover data.
+
+            inputData = SequenceFactory.Create(
+                new char[] { '\udbc0' }); // U+100000 (continues on next line)
+            EncodingExtensions.Convert(encoder, inputData, writer, flush: false, out bytesUsed, out completed);
+            Assert.Equal(0, bytesUsed);
+            Assert.False(completed);
+
+            // Then, input with flushing and leftover data (should be replaced).
+
+            inputData = SequenceFactory.Create(
+                new char[] { '\udc00' }, // (cont.)
+                new char[] { '\ud800' }); // leftover data (should be replaced)
+            EncodingExtensions.Convert(encoder, inputData, writer, flush: true, out bytesUsed, out completed);
+            Assert.Equal(7, bytesUsed);
+            Assert.True(completed);
+
+            // Now make sure all of the data was decoded properly.
+
+            Assert.Equal(Encoding.UTF8.GetBytes("\u0020\ud7ff\U00100000\ufffd"), writer.WrittenSpan.ToArray());
+        }
+
+        [Fact]
+        public static void GetBytes_Encoding_ReadOnlySequence_ParamChecks()
+        {
+            ReadOnlySequence<char> sequence = new ReadOnlySequence<char>(new char[0]);
+            Assert.Throws<ArgumentNullException>("encoding", () => EncodingExtensions.GetBytes(null, sequence));
+        }
+
+        [Fact]
+        public static void GetBytes_Encoding_ReadOnlySequence()
+        {
+            // First try the single-segment code path.
+
+            ReadOnlySequence<char> sequence = new ReadOnlySequence<char>("Hello!".ToCharArray());
+            Assert.Equal(Encoding.UTF8.GetBytes("Hello!"), EncodingExtensions.GetBytes(Encoding.UTF8, sequence));
+
+            // Next try the multi-segment code path.
+            // We've intentionally split multi-char subsequences here to test flushing mechanisms.
+
+            sequence = SequenceFactory.Create(
+                new char[] { '\u0020' }, // U+0020
+                new char[] { '\u0061', '\u0080' }, // U+0061 and U+0080 (continues on next line)
+                new char[] { '\ud800' }, // U+10000 (continues on next line)
+                new char[] { }, // empty segment, just to make sure we handle it correctly
+                new char[] { '\udc00', '\udbff' }, // (cont.) + U+10FFFF (continues on next line)
+                new char[] { '\udfff' }, // (cont.)
+                new char[] { '\ud800' }); // leftover data (should be replaced)
+
+            Assert.Equal(Encoding.UTF8.GetBytes("\u0020\u0061\u0080\U00010000\U0010FFFF\ufffd"), EncodingExtensions.GetBytes(Encoding.UTF8, sequence));
+        }
+
+        [Fact]
+        public static void GetBytes_Encoding_ReadOnlySequence_IBufferWriter_SingleSegment()
+        {
+            ReadOnlySequence<char> sequence = new ReadOnlySequence<char>("Hello".ToCharArray());
+            ArrayBufferWriter<byte> writer = new ArrayBufferWriter<byte>();
+
+            long bytesWritten = EncodingExtensions.GetBytes(Encoding.UTF8, sequence, writer);
+
+            Assert.Equal(5, bytesWritten);
+            Assert.Equal(Encoding.UTF8.GetBytes("Hello"), writer.WrittenSpan.ToArray());
+        }
+
+        [Fact]
+        [OuterLoop] // this test takes ~10 seconds on modern hardware since it operates over GBs of data
+        public static void GetBytes_Encoding_ReadOnlySequence_IBufferWriter_LargeMultiSegment()
+        {
+            ReadOnlySequence<char> sequence = GetLargeRepeatingReadOnlySequence<char>(AllScalarsAsUtf16, 1500); // ~ 3.2bn chars of UTF-16 input
+            RepeatingValidatingBufferWriter<byte> writer = new RepeatingValidatingBufferWriter<byte>(AllScalarsAsUtf8);
+
+            long expectedBytesWritten = 1500 * (long)AllScalarsAsUtf8.Length;
+            long actualBytesWritten = EncodingExtensions.GetBytes(Encoding.UTF8, sequence, writer);
+
+            Assert.Equal(expectedBytesWritten, actualBytesWritten);
+            Assert.Equal(expectedBytesWritten, writer.TotalElementsWritten); // our writer will validate as data is written to it
+        }
+
+        [Fact]
+        public static void GetBytes_Encoding_ReadOnlySequence_IBufferWriter_ParamChecks()
+        {
+            ReadOnlySequence<char> sequence = new ReadOnlySequence<char>(new char[0]);
+            IBufferWriter<byte> writer = new ArrayBufferWriter<byte>();
+            Assert.Throws<ArgumentNullException>("encoding", () => EncodingExtensions.GetBytes((Encoding)null, sequence, writer));
+            Assert.Throws<ArgumentNullException>("writer", () => EncodingExtensions.GetBytes(Encoding.UTF8, sequence, (IBufferWriter<byte>)null));
+        }
+
+        [Fact]
+        public static void GetBytes_Encoding_ReadOnlySequence_Span_ParamChecks()
+        {
+            ReadOnlySequence<char> sequence = new ReadOnlySequence<char>(new char[0]);
+            Assert.Throws<ArgumentNullException>("encoding", () => EncodingExtensions.GetBytes((Encoding)null, sequence, Span<byte>.Empty));
+        }
+
+        [Fact]
+        public static void GetBytes_Encoding_ReadOnlySequence_Span()
+        {
+            Span<byte> destination = stackalloc byte[32];
+
+            // First try the single-segment code path.
+
+            ReadOnlySequence<char> sequence = new ReadOnlySequence<char>("Hello!".ToCharArray());
+            Assert.Equal(
+                expected: Encoding.UTF8.GetBytes("Hello!"),
+                actual: destination.Slice(0, EncodingExtensions.GetBytes(Encoding.UTF8, sequence, destination)).ToArray());
+
+            // Next try the multi-segment code path.
+            // We've intentionally split multi-char subsequences here to test flushing mechanisms.
+
+            sequence = SequenceFactory.Create(
+                new char[] { '\u0020' }, // U+0020
+                new char[] { '\u0061', '\u0080' }, // U+0061 and U+0080 (continues on next line)
+                new char[] { '\ud800' }, // U+10000 (continues on next line)
+                new char[] { }, // empty segment, just to make sure we handle it correctly
+                new char[] { '\udc00', '\udbff' }, // (cont.) + U+10FFFF (continues on next line)
+                new char[] { '\udfff' }, // (cont.)
+                new char[] { '\ud800' }); // leftover data (should be replaced)
+
+            Assert.Equal(
+                expected: Encoding.UTF8.GetBytes("\u0020\u0061\u0080\U00010000\U0010FFFF\ufffd"),
+                actual: destination.Slice(0, EncodingExtensions.GetBytes(Encoding.UTF8, sequence, destination)).ToArray());
+        }
+
+        [Fact]
+        public static void GetBytes_Encoding_ReadOnlySpan_IBufferWriter_ParamChecks()
+        {
+            IBufferWriter<byte> writer = new ArrayBufferWriter<byte>();
+
+            Assert.Throws<ArgumentNullException>("encoding", () => EncodingExtensions.GetBytes((Encoding)null, ReadOnlySpan<char>.Empty, writer));
+            Assert.Throws<ArgumentNullException>("writer", () => EncodingExtensions.GetBytes(Encoding.UTF8, ReadOnlySpan<char>.Empty, (IBufferWriter<byte>)null));
+        }
+
+        [Fact]
+        public static void GetBytes_Encoding_ReadOnlySpan_IBufferWriter()
+        {
+            ArrayBufferWriter<byte> writer = new ArrayBufferWriter<byte>();
+
+            // First, a small input that goes through the one-shot code path.
+
+            ReadOnlySpan<char> inputData = "Hello";
+            long bytesWritten = EncodingExtensions.GetBytes(Encoding.UTF8, inputData, writer);
+            Assert.Equal(5, bytesWritten);
+            Assert.Equal(Encoding.UTF8.GetBytes("Hello"), writer.WrittenSpan.ToArray());
+
+            // Then, a large input that goes through the chunked path.
+            // We alternate between 1-char and 2-char sequences so that the input will be split in
+            // several locations by the internal GetChars chunking logic. This helps us test
+            // that we're flowing the 'flush' parameter through the system correctly.
+
+            string largeString = string.Create(5_000_000, (object)null, (span, _) =>
+            {
+                while (span.Length >= 3)
+                {
+                    span[0] = '\u00EA'; // U+00EA LATIN SMALL LETTER E WITH CIRCUMFLEX
+                    span[1] = '\uD83D'; // U+1F405 TIGER
+                    span[2] = '\uDC05';
+
+                    span = span.Slice(3);
+                }
+
+                // There are 2 bytes left over.
+
+                Assert.Equal(2, span.Length);
+                span[0] = 'x';
+                span[1] = 'y';
+            });
+
+            writer = new ArrayBufferWriter<byte>();
+            inputData = largeString + '\uD800'; // standalone lead surrogate at end of input, testing replacement
+            bytesWritten = EncodingExtensions.GetBytes(Encoding.UTF8, inputData, writer);
+            Assert.Equal(10_000_001, bytesWritten); // 9,999,998 for data + 3 for repalcement char at end
+
+            // Now make sure all of the data was encoded properly.
+
+            Assert.True(Encoding.UTF8.GetBytes(largeString + "\ufffd").AsSpan().SequenceEqual(writer.WrittenSpan));
+        }
+
+        [Fact]
+        public static void GetString_Encoding_ReadOnlySequence()
+        {
+            // First try the single-segment code path.
+
+            ReadOnlySequence<byte> sequence = new ReadOnlySequence<byte>(Encoding.UTF8.GetBytes("Hello!"));
+            Assert.Equal("Hello!", EncodingExtensions.GetString(Encoding.UTF8, sequence));
+
+            // Next try the multi-segment code path.
+            // We've intentionally split multi-byte subsequences here to test flushing mechanisms.
+
+            sequence = SequenceFactory.Create(
+                new byte[] { 0x20 }, // U+0020
+                new byte[] { 0x61, 0xC2 }, // U+0061 and U+0080 (continues on next line)
+                new byte[] { 0x80, 0xED }, // (cont.) + U+D7FF (continues on next line)
+                new byte[] { }, // empty segment, just to make sure we handle it correctly
+                new byte[] { 0x9F, 0xBF, 0xF4, 0x80 }, // (cont.) + U+100000 (continues on next line)
+                new byte[] { 0x80, 0x80 }, // (cont.)
+                new byte[] { 0xC2 }); // leftover data (should be replaced)
+
+            Assert.Equal("\u0020\u0061\u0080\ud7ff\U00100000\ufffd", EncodingExtensions.GetString(Encoding.UTF8, sequence));
+        }
+
+        [Fact]
+        public static void GetString_Encoding_ReadOnlySequence_ParamChecks()
+        {
+            ReadOnlySequence<byte> sequence = new ReadOnlySequence<byte>(new byte[0]);
+            Assert.Throws<ArgumentNullException>("encoding", () => EncodingExtensions.GetString(null, sequence));
+        }
+
+        [Fact]
+        public static void GetChars_Encoding_ReadOnlySequence_IBufferWriter_SingleSegment()
+        {
+            ReadOnlySequence<byte> sequence = new ReadOnlySequence<byte>(Encoding.UTF8.GetBytes("Hello"));
+            ArrayBufferWriter<char> writer = new ArrayBufferWriter<char>();
+
+            long charsWritten = EncodingExtensions.GetChars(Encoding.UTF8, sequence, writer);
+
+            Assert.Equal(5, charsWritten);
+            Assert.Equal("Hello", writer.WrittenSpan.ToString());
+        }
+
+        [Fact]
+        [OuterLoop] // this test takes ~10 seconds on modern hardware since it operates over GBs of data
+        public static void GetChars_Encoding_ReadOnlySequence_IBufferWriter_LargeMultiSegment()
+        {
+            ReadOnlySequence<byte> sequence = GetLargeRepeatingReadOnlySequence<byte>(AllScalarsAsUtf8, 1500); // ~ 6.5bn bytes of UTF-8 input
+            RepeatingValidatingBufferWriter<char> writer = new RepeatingValidatingBufferWriter<char>(AllScalarsAsUtf16);
+
+            long expectedCharsWritten = 1500 * (long)AllScalarsAsUtf16.Length;
+            long actualCharsWritten = EncodingExtensions.GetChars(Encoding.UTF8, sequence, writer);
+
+            Assert.Equal(expectedCharsWritten, actualCharsWritten);
+            Assert.Equal(expectedCharsWritten, writer.TotalElementsWritten); // our writer will validate as data is written to it
+        }
+
+        [Fact]
+        public static void GetChars_Encoding_ReadOnlySequence_IBufferWriter_ParamChecks()
+        {
+            ReadOnlySequence<byte> sequence = new ReadOnlySequence<byte>(new byte[0]);
+            IBufferWriter<char> writer = new ArrayBufferWriter<char>();
+            Assert.Throws<ArgumentNullException>("encoding", () => EncodingExtensions.GetChars((Encoding)null, sequence, writer));
+            Assert.Throws<ArgumentNullException>("writer", () => EncodingExtensions.GetChars(Encoding.UTF8, sequence, (IBufferWriter<char>)null));
+        }
+
+        [Fact]
+        public static void GetChars_Encoding_ReadOnlySequence_Span()
+        {
+            Span<char> destination = stackalloc char[32];
+
+            // First try the single-segment code path.
+
+            ReadOnlySequence<byte> sequence = new ReadOnlySequence<byte>(Encoding.UTF8.GetBytes("Hello!"));
+            Assert.Equal("Hello!", destination.Slice(0, EncodingExtensions.GetChars(Encoding.UTF8, sequence, destination)).ToString());
+
+            // Next try the multi-segment code path.
+            // We've intentionally split multi-byte subsequences here to test flushing mechanisms.
+
+            sequence = SequenceFactory.Create(
+                new byte[] { 0x20 }, // U+0020
+                new byte[] { 0x61, 0xC2 }, // U+0061 and U+0080 (continues on next line)
+                new byte[] { 0x80, 0xED }, // (cont.) + U+D7FF (continues on next line)
+                new byte[] { }, // empty segment, just to make sure we handle it correctly
+                new byte[] { 0x9F, 0xBF, 0xF4, 0x80 }, // (cont.) + U+100000 (continues on next line)
+                new byte[] { 0x80, 0x80 }, // (cont.)
+                new byte[] { 0xC2 }); // leftover data (should be replaced)
+
+            Assert.Equal("\u0020\u0061\u0080\ud7ff\U00100000\ufffd", destination.Slice(0, EncodingExtensions.GetChars(Encoding.UTF8, sequence, destination)).ToString());
+        }
+
+        [Fact]
+        public static void GetChars_Encoding_ReadOnlySequence_Span_ParamChecks()
+        {
+            ReadOnlySequence<byte> sequence = new ReadOnlySequence<byte>(new byte[0]);
+            Assert.Throws<ArgumentNullException>("encoding", () => EncodingExtensions.GetChars((Encoding)null, sequence, Span<char>.Empty));
+        }
+
+        [Fact]
+        public static void GetChars_Encoding_ReadOnlySpan_IBufferWriter_ParamChecks()
+        {
+            IBufferWriter<char> writer = new ArrayBufferWriter<char>();
+
+            Assert.Throws<ArgumentNullException>("encoding", () => EncodingExtensions.GetChars((Encoding)null, ReadOnlySpan<byte>.Empty, writer));
+            Assert.Throws<ArgumentNullException>("writer", () => EncodingExtensions.GetChars(Encoding.UTF8, ReadOnlySpan<byte>.Empty, (IBufferWriter<char>)null));
+        }
+
+        [Fact]
+        public static void GetChars_Encoding_ReadOnlySpan_IBufferWriter()
+        {
+            ArrayBufferWriter<char> writer = new ArrayBufferWriter<char>();
+
+            // First, a small input that goes through the one-shot code path.
+
+            ReadOnlySpan<byte> inputData = Encoding.UTF8.GetBytes("Hello");
+            long charsWritten = EncodingExtensions.GetChars(Encoding.UTF8, inputData, writer);
+            Assert.Equal(5, charsWritten);
+            Assert.Equal("Hello", writer.WrittenSpan.ToString());
+
+            // Then, a large input that goes through the chunked path.
+            // We use U+1234 because it's a 3-byte UTF-8 sequence, which means it'll be split in
+            // several locations by the internal GetBytes chunking logic. This helps us test
+            // that we're flowing the 'flush' parameter through the system correctly.
+
+            writer = new ArrayBufferWriter<char>();
+            inputData = Encoding.UTF8.GetBytes(new string('\u1234', 5_000_000)).Concat(new byte[] { 0xE0 }).ToArray();
+            charsWritten = EncodingExtensions.GetChars(Encoding.UTF8, inputData, writer);
+            Assert.Equal(5_000_001, charsWritten); // 5 MM for data, 1 for replacement char at end
+
+            // Now make sure all of the data was decoded properly.
+
+            Assert.Equal(
+                expected: new string('\u1234', 5_000_000) + '\ufffd',
+                actual: writer.WrittenSpan.ToString());
+        }
+
+        /// <summary>
+        /// Returns a <see cref="ReadOnlySequence{T}"/> consisting of <paramref name="dataToRepeat"/> repeated <paramref name="repetitionCount"/> times.
+        /// This can be used to produce a sequence consisting of billions of elements while consuming a fraction of that memory.
+        /// </summary>
+        /// <returns></returns>
+        private static ReadOnlySequence<T> GetLargeRepeatingReadOnlySequence<T>(ReadOnlyMemory<T> dataToRepeat, int repetitionCount)
+        {
+            const int MAX_SEGMENT_LENGTH = 300_007; // a prime number, which ensures we'll have some multi-byte / multi-char splits if the data is long
+
+            MockSequenceSegment<T> firstSegment = null;
+            MockSequenceSegment<T> previousSegment = null;
+            MockSequenceSegment<T> lastSegment = null;
+            long runningTotalLength = 0;
+
+            for (int i = 0; i < repetitionCount; i++)
+            {
+                ReadOnlyMemory<T> remainingData = dataToRepeat;
+                while (!remainingData.IsEmpty)
+                {
+                    int thisSegmentLength = Math.Min(remainingData.Length, MAX_SEGMENT_LENGTH);
+
+                    lastSegment = new MockSequenceSegment<T>
+                    {
+                        Memory = remainingData.Slice(0, thisSegmentLength),
+                        RunningIndex = runningTotalLength
+                    };
+
+                    if (previousSegment != null)
+                    {
+                        previousSegment.Next = lastSegment;
+                    }
+
+                    previousSegment = lastSegment;
+                    if (firstSegment == null)
+                    {
+                        firstSegment = lastSegment;
+                    }
+
+                    remainingData = remainingData.Slice(thisSegmentLength);
+                    runningTotalLength += thisSegmentLength;
+                }
+            }
+
+            return new ReadOnlySequence<T>(firstSegment, 0, lastSegment, lastSegment.Memory.Length);
+        }
+
+        /// <summary>
+        /// An <see cref="IBufferWriter{T}"/> that validates that the data written to it consists of 'knownGoodData' repeated indefinitely.
+        /// </summary>
+        private class RepeatingValidatingBufferWriter<T> : IBufferWriter<T> where T : unmanaged, IEquatable<T>
+        {
+            private T[] _buffer;
+            private readonly ReadOnlyMemory<T> _knownGoodData;
+
+            public long TotalElementsWritten { get; private set; }
+
+            public RepeatingValidatingBufferWriter(ReadOnlyMemory<T> knownGoodData)
+            {
+                Assert.False(knownGoodData.IsEmpty);
+                _knownGoodData = knownGoodData;
+            }
+
+            public void Advance(int count)
+            {
+                ReadOnlySpan<T> bufferSpan = _buffer.AsSpan(0, count);
+                ReadOnlySpan<T> remainingGoodDataSpan = _knownGoodData.Span.Slice((int)(TotalElementsWritten % _knownGoodData.Length));
+
+                while (!bufferSpan.IsEmpty)
+                {
+                    int compareLength = Math.Min(bufferSpan.Length, remainingGoodDataSpan.Length);
+                    Assert.True(remainingGoodDataSpan.Slice(0, compareLength).SequenceEqual(bufferSpan.Slice(0, compareLength)));
+
+                    remainingGoodDataSpan = remainingGoodDataSpan.Slice(compareLength);
+                    if (remainingGoodDataSpan.IsEmpty)
+                    {
+                        remainingGoodDataSpan = _knownGoodData.Span;
+                    }
+
+                    bufferSpan = bufferSpan.Slice(compareLength);
+                }
+
+                TotalElementsWritten += count;
+            }
+
+            public Memory<T> GetMemory(int sizeHint) => throw new NotImplementedException();
+
+            public Span<T> GetSpan(int sizeHint)
+            {
+                if (_buffer is null || sizeHint > _buffer.Length)
+                {
+                    _buffer = new T[Math.Max(sizeHint, 128)];
+                }
+
+                return _buffer;
+            }
+        }
+
+        /// <summary>
+        /// A <see cref="ReadOnlySequenceSegment{T}"/> where all members are public.
+        /// </summary>
+        private sealed class MockSequenceSegment<T> : ReadOnlySequenceSegment<T>
+        {
+            public new ReadOnlyMemory<T> Memory
+            {
+                get => base.Memory;
+                set => base.Memory = value;
+            }
+
+            public new ReadOnlySequenceSegment<T> Next
+            {
+                get => base.Next;
+                set => base.Next = value;
+            }
+
+            public new long RunningIndex
+            {
+                get => base.RunningIndex;
+                set => base.RunningIndex = value;
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj

index 5dc79a2..f611c60 100644 (file)
--- a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj
+++ b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj
@@ -46,6 +46,9 @@
      <Compile Include="BuffersExtensions\BuffersExtensionsTests.cs" />
    </ItemGroup>
    <ItemGroup>
+    <Compile Include="EncodingExtensions\EncodingExtensionsTests.cs" />
+  </ItemGroup>
+  <ItemGroup>
      <Compile Include="Span\AsSpan.cs" />
      <Compile Include="Span\Fill.cs" />
      <Compile Include="Span\Clear.cs" />
author	Levi Broderick <GrabYourPitchforks@users.noreply.github.com>
	Tue, 22 Oct 2019 22:18:05 +0000 (15:18 -0700)
committer	GitHub <noreply@github.com>
	Tue, 22 Oct 2019 22:18:05 +0000 (15:18 -0700)
src/libraries/System.Memory/ref/System.Memory.cs		patch \| blob \| history
src/libraries/System.Memory/src/System.Memory.csproj		patch \| blob \| history
src/libraries/System.Memory/src/System/Text/EncodingExtensions.cs	[new file with mode: 0644]	patch \| blob
src/libraries/System.Memory/tests/EncodingExtensions/EncodingExtensionsTests.cs	[new file with mode: 0644]	patch \| blob
src/libraries/System.Memory/tests/System.Memory.Tests.csproj		patch \| blob \| history