Fix BinaryReader.ReadChars for fragmented Streams (dotnet/coreclr#26324)
authorJan Kotas <jkotas@microsoft.com>
Sat, 24 Aug 2019 17:10:00 +0000 (10:10 -0700)
committerGitHub <noreply@github.com>
Sat, 24 Aug 2019 17:10:00 +0000 (10:10 -0700)
BinaryReader.ReadChars incorrectly read more than necessary from the underlying Stream when multi-byte characters straddled the read chunks.

Fixes https://github.com/dotnet/corefx/issues/40455

Commit migrated from https://github.com/dotnet/coreclr/commit/4e8339f0d025146a60b4fe8bba8d65dc787405c2

src/libraries/System.Private.CoreLib/src/System/IO/BinaryReader.cs
src/libraries/System.Private.CoreLib/src/System/Text/DecoderNLS.cs

index fc2ab5f..98e853b 100644 (file)
@@ -384,6 +384,28 @@ namespace System.IO
                 {
                     numBytes <<= 1;
                 }
+
+                // We do not want to read even a single byte more than necessary.
+                //
+                // Subtract pending bytes that the decoder may be holding onto. This assumes that each
+                // decoded char corresponds to one or more bytes. Note that custom encodings or encodings with
+                // a custom replacement sequence may violate this assumption.
+                if (numBytes > 1)
+                {
+                    DecoderNLS? decoder = _decoder as DecoderNLS;
+                    // For internal decoders, we can check whether the decoder has any pending state.
+                    // For custom decoders, assume that the decoder has pending state.
+                    if (decoder == null || decoder.HasState)
+                    {
+                        numBytes -= 1;
+
+                        // The worst case is charsRemaining = 2 and UTF32Decoder holding onto 3 pending bytes. We need to read just
+                        // one byte in this case.
+                        if (_2BytesPerChar && numBytes > 2)
+                            numBytes -= 2;
+                    }
+                }
+
                 if (numBytes > MaxCharBytesSize)
                 {
                     numBytes = MaxCharBytesSize;
index 7d453f7..499c0ba 100644 (file)
@@ -217,7 +217,7 @@ namespace System.Text
         public bool MustFlush => _mustFlush;
 
         // Anything left in our decoder?
-        internal virtual bool HasState => false;
+        internal virtual bool HasState => _leftoverByteCount != 0;
 
         // Allow encoding to clear our must flush instead of throwing (in ThrowCharsOverflow)
         internal void ClearMustFlush()