From 77e676be9fab34471c4ae9f16a7a51a06bd470ee Mon Sep 17 00:00:00 2001 From: Ben Adams Date: Sat, 18 Feb 2017 18:47:36 +0000 Subject: [PATCH] Fast-path for ASCII & UTF8 Encoding ASCII data (#9187) --- src/mscorlib/src/System/Text/ASCIIEncoding.cs | 288 ++++++++++++++++++- src/mscorlib/src/System/Text/Encoder.cs | 32 ++- src/mscorlib/src/System/Text/EncoderNLS.cs | 197 +++++++------ src/mscorlib/src/System/Text/Encoding.cs | 60 ++-- src/mscorlib/src/System/Text/EncodingForwarder.cs | 322 +++++++++++++++++++--- src/mscorlib/src/System/Text/UTF8Encoding.cs | 267 +++++++++++++++++- src/mscorlib/src/System/ThrowHelper.cs | 33 ++- 7 files changed, 1016 insertions(+), 183 deletions(-) diff --git a/src/mscorlib/src/System/Text/ASCIIEncoding.cs b/src/mscorlib/src/System/Text/ASCIIEncoding.cs index 07b7f3e..046be7e 100644 --- a/src/mscorlib/src/System/Text/ASCIIEncoding.cs +++ b/src/mscorlib/src/System/Text/ASCIIEncoding.cs @@ -2,11 +2,10 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. + namespace System.Text { using System; - using System.Runtime.Serialization; - using System.Diagnostics; using System.Diagnostics.Contracts; // ASCIIEncoding @@ -64,7 +63,7 @@ namespace System.Text return EncodingForwarder.GetByteCount(this, chars, index, count); } - public override int GetByteCount(String chars) + public override int GetByteCount(string chars) { return EncodingForwarder.GetByteCount(this, chars); } @@ -75,12 +74,161 @@ namespace System.Text return EncodingForwarder.GetByteCount(this, chars, count); } - public override int GetBytes(String chars, int charIndex, int charCount, - byte[] bytes, int byteIndex) + public unsafe override byte[] GetBytes(string s) { - return EncodingForwarder.GetBytes(this, chars, charIndex, charCount, bytes, byteIndex); + if (s == null) + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.s, ExceptionResource.ArgumentNull_String); + Contract.EndContractBlock(); + + int charCount = s.Length; + + byte[] bytes; + if (charCount > 0) + { + fixed (char* input = s) + bytes = GetBytesValidated(input, charCount); + } + else + { + bytes = Array.Empty(); + } + + return bytes; + } + + public unsafe override int GetBytes(string chars, int charIndex, int charCount, byte[] bytes, int byteIndex) + { + if ((chars == null) || + (bytes == null) || + (charIndex < 0) || + (charCount < 0) || + (chars.Length - charIndex < charCount) || + (byteIndex < 0 || byteIndex > bytes.Length)) + { + EncodingForwarder.ThrowValidationFailed(this, chars, charIndex, charCount, bytes); + } + Contract.EndContractBlock(); + + // Note that byteCount is the # of bytes to decode, not the size of the array + int byteCount = bytes.Length - byteIndex; + int bytesWritten; + if (charCount > 0) + { + if (byteCount == 0) + { + // Definitely not enough space, early bail + EncodingForwarder.ThrowBytesOverflow(this); + } + fixed (char* pInput = chars) + fixed (byte* pOutput = &bytes[0]) + { + char* input = pInput + charIndex; + byte* output = pOutput + byteIndex; + int charactersConsumed; + if (!EncodingForwarder.TryEncode(input, charCount, output, byteCount, out charactersConsumed, out bytesWritten)) + { + // Not all ASCII, GetBytesFallback for remaining conversion + bytesWritten += GetBytesFallback(input + charactersConsumed, charCount - charactersConsumed, output + bytesWritten, byteCount - bytesWritten, null); + } + } + } + else + { + // Nothing to encode + bytesWritten = 0; + } + + return bytesWritten; + } + + public override byte[] GetBytes(char[] chars) + { + if (chars == null) + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars, ExceptionResource.ArgumentNull_Array); + Contract.EndContractBlock(); + + return GetBytesValidated(chars, 0, chars.Length); } + public override byte[] GetBytes(char[] chars, int index, int count) + { + if ((chars == null) || + (index < 0) || + (count < 0) || + (chars.Length - index < count)) + { + EncodingForwarder.ThrowValidationFailedException(chars, index, count); + } + Contract.EndContractBlock(); + + return GetBytesValidated(chars, index, count); + } + + private unsafe byte[] GetBytesValidated(char[] chars, int index, int count) + { + byte[] bytes; + if (count > 0) + { + fixed (char* input = chars) + { + bytes = GetBytesValidated(input + index, count); + } + } + else + { + bytes = Array.Empty(); + } + + return bytes; + } + + private unsafe byte[] GetBytesValidated(char* input, int charCount) + { + int remaining = 0; + // Assume string is all ASCII and size array for that + byte[] bytes = new byte[charCount]; + + int bytesWritten; + fixed (byte* output = &bytes[0]) + { + int charactersConsumed; + if (!EncodingForwarder.TryEncode(input, charCount, output, charCount, out charactersConsumed, out bytesWritten)) + { + // Not all ASCII, get the byte count for the remaining encoded conversion + remaining = GetByteCount(input + charactersConsumed, charCount - charactersConsumed, null); + } + } + + if (remaining > 0) + { + // Not all ASCII, fallback to slower path for remaining encoding + var encoded = ResizeGetRemainingBytes(input, charCount, ref bytes, bytesWritten, remaining); + Debug.Assert(encoded == remaining); + } + + return bytes; + } + + private unsafe int ResizeGetRemainingBytes(char* chars, int charCount, ref byte[] bytes, int alreadyEncoded, int remaining) + { + if (bytes.Length - remaining != alreadyEncoded) + { + // Resize the array to the correct size + byte[] oldArray = bytes; + bytes = new byte[alreadyEncoded + remaining]; + // Copy already encoded bytes + Array.Copy(oldArray, 0, bytes, 0, alreadyEncoded); + } + int encoded; + fixed (byte* output = &bytes[0]) + { + // Use GetBytesFallback for remaining conversion + encoded = GetBytesFallback(chars + alreadyEncoded, charCount - alreadyEncoded, output + alreadyEncoded, remaining, null); + } + + return encoded; + } + // Encodes a range of characters in a character array into a range of bytes // in a byte array. An exception occurs if the byte array is not large // enough to hold the complete encoding of the characters. The @@ -89,17 +237,131 @@ namespace System.Text // Alternatively, the GetMaxByteCount method can be used to // determine the maximum number of bytes that will be produced for a given // number of characters, regardless of the actual character values. - - public override int GetBytes(char[] chars, int charIndex, int charCount, - byte[] bytes, int byteIndex) + public unsafe override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) { - return EncodingForwarder.GetBytes(this, chars, charIndex, charCount, bytes, byteIndex); + if ((chars == null) || + (bytes == null) || + (charIndex < 0) || + (charCount < 0) || + (chars.Length - charIndex < charCount) || + (byteIndex < 0 || byteIndex > bytes.Length)) + { + EncodingForwarder.ThrowValidationFailedException(chars, charIndex, charCount, bytes); + } + Contract.EndContractBlock(); + + // Note that byteCount is the # of bytes to decode, not the size of the array + int byteCount = bytes.Length - byteIndex; + int bytesWritten; + if (charCount > 0) + { + if (byteCount == 0) + { + // Definitely not enough space, early bail + EncodingForwarder.ThrowBytesOverflow(this); + } + + fixed (char* pInput = &chars[0]) + fixed (byte* pOutput = &bytes[0]) + { + char* input = pInput + charIndex; + byte* output = pOutput + byteIndex; + int charactersConsumed; + if (!EncodingForwarder.TryEncode(input, charCount, output, byteCount, out charactersConsumed, out bytesWritten)) + { + // Not all ASCII, GetBytesFallback for remaining conversion + bytesWritten += GetBytesFallback(input + charactersConsumed, charCount - charactersConsumed, output + bytesWritten, byteCount - bytesWritten, null); + } + } + } + else + { + // Nothing to encode + bytesWritten = 0; + } + + return bytesWritten; } [CLSCompliant(false)] public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount) { - return EncodingForwarder.GetBytes(this, chars, charCount, bytes, byteCount); + if ((bytes == null) || + (chars == null) || + (charCount < 0) || + (byteCount < 0)) + { + EncodingForwarder.ThrowValidationFailedException(chars, charCount, bytes); + } + Contract.EndContractBlock(); + + int bytesWritten; + if (charCount > 0) + { + if (byteCount == 0) + { + // Definitely not enough space, early bail + EncodingForwarder.ThrowBytesOverflow(this); + } + int charactersConsumed; + if (!EncodingForwarder.TryEncode(chars, charCount, bytes, byteCount, out charactersConsumed, out bytesWritten)) + { + // Not all ASCII, GetBytesFallback for remaining conversion + bytesWritten += GetBytesFallback(chars + charactersConsumed, charCount - charactersConsumed, bytes + bytesWritten, byteCount - bytesWritten, null); + } + } + else + { + // Nothing to encode + bytesWritten = 0; + } + + return bytesWritten; + } + + internal override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS encoder) + { + // Just need to Assert, this is called by internal EncoderNLS and parameters should already be checked + Debug.Assert(this != null); + Debug.Assert(bytes != null); + Debug.Assert(chars != null); + Debug.Assert(charCount >= 0); + Debug.Assert(byteCount >= 0); + + int bytesWritten; + int charactersConsumed = 0; + if (((encoder?.InternalHasFallbackBuffer ?? false) && + (encoder.FallbackBuffer.Remaining > 0)) || + (charCount > byteCount)) + { + // Data already in Fallback buffer, so straight to GetBytesFallback + bytesWritten = GetBytesFallback(chars, charCount, bytes, byteCount, encoder); + } + else if (charCount > 0) + { + if (byteCount == 0) + { + // Definitely not enough space, early bail + EncodingForwarder.ThrowBytesOverflow(this); + } + if (!EncodingForwarder.TryEncode(chars, charCount, bytes, byteCount, out charactersConsumed, out bytesWritten)) + { + // Not all ASCII, use GetBytesFallback for remaining conversion + bytesWritten += GetBytesFallback(chars + charactersConsumed, charCount - charactersConsumed, bytes + bytesWritten, byteCount - bytesWritten, encoder); + } + } + else + { + // Nothing to encode + bytesWritten = 0; + } + + if (encoder != null) + { + encoder.m_charsUsed += charactersConsumed; + } + + return bytesWritten; } // Returns the number of characters produced by decoding a range of bytes @@ -131,7 +393,7 @@ namespace System.Text // Returns a string containing the decoded representation of a range of // bytes in a byte array. - public override String GetString(byte[] bytes, int byteIndex, int byteCount) + public override string GetString(byte[] bytes, int byteIndex, int byteCount) { return EncodingForwarder.GetString(this, bytes, byteIndex, byteCount); } @@ -275,7 +537,7 @@ namespace System.Text return byteCount; } - internal override unsafe int GetBytes(char* chars, int charCount, + private unsafe int GetBytesFallback(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS encoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already diff --git a/src/mscorlib/src/System/Text/Encoder.cs b/src/mscorlib/src/System/Text/Encoder.cs index f766f98..cd766c2 100644 --- a/src/mscorlib/src/System/Text/Encoder.cs +++ b/src/mscorlib/src/System/Text/Encoder.cs @@ -9,6 +9,7 @@ namespace System.Text using System; using System.Diagnostics; using System.Diagnostics.Contracts; + using System.Runtime.CompilerServices; // An Encoder is used to encode a sequence of blocks of characters into // a sequence of blocks of bytes. Following instantiation of an encoder, // sequential blocks of characters are converted into blocks of bytes through @@ -67,16 +68,31 @@ namespace System.Text { get { - if (m_fallbackBuffer == null) - { - if (m_fallback != null) - m_fallbackBuffer = m_fallback.CreateFallbackBuffer(); - else - m_fallbackBuffer = EncoderFallback.ReplacementFallback.CreateFallbackBuffer(); - } + return m_fallbackBuffer ?? FallbackBufferInitialize(); + } + } - return m_fallbackBuffer; + private EncoderFallbackBuffer FallbackBufferInitialize() + { + // This is indirected through a second NoInlining function it has a special meaning + // in System.Private.CoreLib of indicatating it takes a StackMark which cause + // the caller to also be not inlined; so we can't mark it directly. + return FallbackBufferInitializeInner(); + } + + // Second function in chain so as to not propergate the non-inlining to outside caller + [MethodImpl(MethodImplOptions.NoInlining)] + private EncoderFallbackBuffer FallbackBufferInitializeInner() + { + if (m_fallback != null) + { + m_fallbackBuffer = m_fallback.CreateFallbackBuffer(); + } + else + { + m_fallbackBuffer = EncoderFallback.ReplacementFallback.CreateFallbackBuffer(); } + return m_fallbackBuffer; } internal bool InternalHasFallbackBuffer diff --git a/src/mscorlib/src/System/Text/EncoderNLS.cs b/src/mscorlib/src/System/Text/EncoderNLS.cs index 95901e0..1ae7732 100644 --- a/src/mscorlib/src/System/Text/EncoderNLS.cs +++ b/src/mscorlib/src/System/Text/EncoderNLS.cs @@ -8,6 +8,8 @@ namespace System.Text using System.Text; using System; using System.Diagnostics.Contracts; + using Runtime.CompilerServices; + // An Encoder is used to encode a sequence of blocks of characters into // a sequence of blocks of bytes. Following instantiation of an encoder, // sequential blocks of characters are converted into blocks of bytes through @@ -78,17 +80,13 @@ namespace System.Text public override unsafe int GetByteCount(char[] chars, int index, int count, bool flush) { // Validate input parameters - if (chars == null) - throw new ArgumentNullException(nameof(chars), - Environment.GetResourceString("ArgumentNull_Array")); - - if (index < 0 || count < 0) - throw new ArgumentOutOfRangeException((index<0 ? nameof(index) : nameof(count)), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - - if (chars.Length - index < count) - throw new ArgumentOutOfRangeException(nameof(chars), - Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer")); + if ((chars == null) || + (index < 0) || + (count < 0) || + (chars.Length - index < count)) + { + EncodingForwarder.ThrowValidationFailedException(chars, index, count); + } Contract.EndContractBlock(); // Avoid empty input problem @@ -99,7 +97,7 @@ namespace System.Text int result = -1; fixed (char* pChars = &chars[0]) { - result = GetByteCount(pChars + index, count, flush); + result = GetByteCountValidated(pChars + index, count, flush); } return result; } @@ -108,14 +106,17 @@ namespace System.Text { // Validate input parameters if (chars == null) - throw new ArgumentNullException(nameof(chars), - Environment.GetResourceString("ArgumentNull_Array")); - + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars, ExceptionResource.ArgumentNull_Array); if (count < 0) - throw new ArgumentOutOfRangeException(nameof(count), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); + ThrowHelper.ThrowCountArgumentOutOfRange_NeedNonNegNumException(); Contract.EndContractBlock(); + return GetByteCountValidated(chars, count, flush); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private unsafe int GetByteCountValidated(char* chars, int count, bool flush) + { this.m_mustFlush = flush; this.m_throwOnOverflow = true; return m_encoding.GetByteCount(chars, count, this); @@ -125,51 +126,56 @@ namespace System.Text byte[] bytes, int byteIndex, bool flush) { // Validate parameters - if (chars == null || bytes == null) - throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)), - Environment.GetResourceString("ArgumentNull_Array")); - - if (charIndex < 0 || charCount < 0) - throw new ArgumentOutOfRangeException((charIndex<0 ? nameof(charIndex) : nameof(charCount)), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - - if (chars.Length - charIndex < charCount) - throw new ArgumentOutOfRangeException(nameof(chars), - Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer")); - - if (byteIndex < 0 || byteIndex > bytes.Length) - throw new ArgumentOutOfRangeException(nameof(byteIndex), - Environment.GetResourceString("ArgumentOutOfRange_Index")); + if ((chars == null) || + (bytes == null) || + (charIndex < 0) || + (charCount < 0) || + (chars.Length - charIndex < charCount) || + (byteIndex < 0 || byteIndex > bytes.Length)) + { + EncodingForwarder.ThrowValidationFailedException(chars, charIndex, charCount, bytes); + } Contract.EndContractBlock(); + int byteCount = bytes.Length - byteIndex; + if (charCount > 0 && byteCount == 0) + { + // Definitely not enough space, early bail + EncodingForwarder.ThrowBytesOverflow(m_encoding); + } + if (chars.Length == 0) chars = new char[1]; - - int byteCount = bytes.Length - byteIndex; if (bytes.Length == 0) bytes = new byte[1]; // Just call pointer version fixed (char* pChars = &chars[0]) - fixed (byte* pBytes = &bytes[0]) + fixed (byte* pBytes = &bytes[0]) + { + return GetBytesValidated(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, flush); + } - // Remember that charCount is # to decode, not size of array. - return GetBytes(pChars + charIndex, charCount, - pBytes + byteIndex, byteCount, flush); } public unsafe override int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, bool flush) { // Validate parameters - if (chars == null || bytes == null) - throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)), - Environment.GetResourceString("ArgumentNull_Array")); - - if (byteCount < 0 || charCount < 0) - throw new ArgumentOutOfRangeException((byteCount<0 ? nameof(byteCount) : nameof(charCount)), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); + if ((bytes == null) || + (chars == null) || + (charCount < 0) || + (byteCount < 0)) + { + EncodingForwarder.ThrowValidationFailedException(chars, charCount, bytes); + } Contract.EndContractBlock(); + return GetBytesValidated(chars, charCount, bytes, byteCount, flush); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private unsafe int GetBytesValidated(char* chars, int charCount, byte* bytes, int byteCount, bool flush) + { this.m_mustFlush = flush; this.m_throwOnOverflow = true; return m_encoding.GetBytes(chars, charCount, bytes, byteCount, this); @@ -182,28 +188,22 @@ namespace System.Text out int charsUsed, out int bytesUsed, out bool completed) { // Validate parameters - if (chars == null || bytes == null) - throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)), - Environment.GetResourceString("ArgumentNull_Array")); - - if (charIndex < 0 || charCount < 0) - throw new ArgumentOutOfRangeException((charIndex<0 ? nameof(charIndex) : nameof(charCount)), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - - if (byteIndex < 0 || byteCount < 0) - throw new ArgumentOutOfRangeException((byteIndex<0 ? nameof(byteIndex) : nameof(byteCount)), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - - if (chars.Length - charIndex < charCount) - throw new ArgumentOutOfRangeException(nameof(chars), - Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer")); - - if (bytes.Length - byteIndex < byteCount) - throw new ArgumentOutOfRangeException(nameof(bytes), - Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer")); + if ((chars == null) || + (bytes == null) || + (charIndex < 0) || + (charCount < 0) || + (byteIndex < 0) || + (byteCount < 0) || + (chars.Length - charIndex < charCount) || + (bytes.Length - byteIndex < byteCount)) + { + ThrowValidationFailedException(chars, charIndex, charCount, bytes, byteIndex, byteCount); + } Contract.EndContractBlock(); + StartConversion(flush); + // Avoid empty input problem if (chars.Length == 0) chars = new char[1]; @@ -212,13 +212,12 @@ namespace System.Text // Just call the pointer version (can't do this for non-msft encoders) fixed (char* pChars = &chars[0]) + fixed (byte* pBytes = &bytes[0]) { - fixed (byte* pBytes = &bytes[0]) - { - Convert(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, flush, - out charsUsed, out bytesUsed, out completed); - } + bytesUsed = this.m_encoding.GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, this); } + + FinishConversion(charCount, flush, out charsUsed, out completed); } // This is the version that uses pointers. We call the base encoding worker function @@ -228,28 +227,39 @@ namespace System.Text out int charsUsed, out int bytesUsed, out bool completed) { // Validate input parameters - if (bytes == null || chars == null) - throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), - Environment.GetResourceString("ArgumentNull_Array")); - if (charCount < 0 || byteCount < 0) - throw new ArgumentOutOfRangeException((charCount<0 ? nameof(charCount) : nameof(byteCount)), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); + if ((bytes == null) || + (chars == null) || + (charCount < 0) || + (byteCount < 0)) + { + EncodingForwarder.ThrowValidationFailedException(chars, charCount, bytes); + } Contract.EndContractBlock(); + StartConversion(flush); + + // Do conversion + bytesUsed = this.m_encoding.GetBytes(chars, charCount, bytes, byteCount, this); + + FinishConversion(charCount, flush, out charsUsed, out completed); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void StartConversion(bool flush) + { // We don't want to throw this.m_mustFlush = flush; this.m_throwOnOverflow = false; this.m_charsUsed = 0; + } - // Do conversion - bytesUsed = this.m_encoding.GetBytes(chars, charCount, bytes, byteCount, this); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void FinishConversion(int charCount, bool flush, out int charsUsed, out bool completed) + { charsUsed = this.m_charsUsed; - // Its completed if they've used what they wanted AND if they didn't want flush or if we are flushed completed = (charsUsed == charCount) && (!flush || !this.HasState) && - (m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0); - - // Our data thingys are now full, we can return + (m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0); } public Encoding Encoding @@ -284,5 +294,30 @@ namespace System.Text m_mustFlush = false; } + private static void ThrowValidationFailedException(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, int byteCount) + { + if (chars == null) + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars, ExceptionResource.ArgumentNull_Array); + if (bytes == null) + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.bytes, ExceptionResource.ArgumentNull_Array); + if (charIndex < 0) + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.charIndex, + ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + if (charCount < 0) + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.charCount, + ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + if (byteIndex < 0) + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.byteIndex, + ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + if (byteCount < 0) + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.byteCount, + ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + if (chars.Length - charIndex < charCount) + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.chars, + ExceptionResource.ArgumentOutOfRange_IndexCountBuffer); + Debug.Assert(bytes.Length - byteIndex < byteCount); + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.bytes, + ExceptionResource.ArgumentOutOfRange_IndexCountBuffer); + } } } diff --git a/src/mscorlib/src/System/Text/Encoding.cs b/src/mscorlib/src/System/Text/Encoding.cs index 8cb01e4..ee4b7ec 100644 --- a/src/mscorlib/src/System/Text/Encoding.cs +++ b/src/mscorlib/src/System/Text/Encoding.cs @@ -745,20 +745,14 @@ namespace System.Text [Pure] public int GetByteCount(string s, int index, int count) { - if (s == null) - throw new ArgumentNullException(nameof(s), - Environment.GetResourceString("ArgumentNull_String")); - if (index < 0) - throw new ArgumentOutOfRangeException(nameof(index), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - if (count < 0) - throw new ArgumentOutOfRangeException(nameof(count), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - if (index > s.Length - count) - throw new ArgumentOutOfRangeException(nameof(index), - Environment.GetResourceString("ArgumentOutOfRange_IndexCount")); + if ((s == null) || + (index < 0) || + (count < 0) || + (index > s.Length - count)) + { + EncodingForwarder.ThrowValidationFailed(s, index, count); + } Contract.EndContractBlock(); - unsafe { fixed (char* pChar = s) @@ -865,39 +859,37 @@ namespace System.Text // string range. // [Pure] - public byte[] GetBytes(string s, int index, int count) + public unsafe byte[] GetBytes(string s, int index, int count) { - if (s == null) - throw new ArgumentNullException(nameof(s), - Environment.GetResourceString("ArgumentNull_String")); - if (index < 0) - throw new ArgumentOutOfRangeException(nameof(index), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - if (count < 0) - throw new ArgumentOutOfRangeException(nameof(count), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - if (index > s.Length - count) - throw new ArgumentOutOfRangeException(nameof(index), - Environment.GetResourceString("ArgumentOutOfRange_IndexCount")); + if ((s == null) || + (index < 0) || + (count < 0) || + (index > s.Length - count)) + { + EncodingForwarder.ThrowValidationFailed(s, index, count); + } Contract.EndContractBlock(); - unsafe + byte[] bytes; + fixed (char* pChar = s) { - fixed (char* pChar = s) + int byteCount = GetByteCount(pChar + index, count); + if (byteCount == 0) { - int byteCount = GetByteCount(pChar + index, count); - if (byteCount == 0) - return Array.Empty(); - - byte[] bytes = new byte[byteCount]; + bytes = Array.Empty(); + } + else + { + bytes = new byte[byteCount]; fixed (byte* pBytes = &bytes[0]) { int bytesReceived = GetBytes(pChar + index, count, pBytes, byteCount); Debug.Assert(byteCount == bytesReceived); } - return bytes; } } + + return bytes; } public virtual int GetBytes(String s, int charIndex, int charCount, diff --git a/src/mscorlib/src/System/Text/EncodingForwarder.cs b/src/mscorlib/src/System/Text/EncodingForwarder.cs index 50ccbd9..a23a485 100644 --- a/src/mscorlib/src/System/Text/EncodingForwarder.cs +++ b/src/mscorlib/src/System/Text/EncodingForwarder.cs @@ -35,19 +35,13 @@ namespace System.Text public unsafe static int GetByteCount(Encoding encoding, char[] chars, int index, int count) { // Validate parameters - Debug.Assert(encoding != null); // this parameter should only be affected internally, so just do a debug check here - if (chars == null) + if ((chars == null) || + (index < 0) || + (count < 0) || + (chars.Length - index < count)) { - throw new ArgumentNullException(nameof(chars), Environment.GetResourceString("ArgumentNull_Array")); - } - if (index < 0 || count < 0) - { - throw new ArgumentOutOfRangeException(index < 0 ? nameof(index) : nameof(count), Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - } - if (chars.Length - index < count) - { - throw new ArgumentOutOfRangeException(nameof(chars), Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer")); + ThrowValidationFailedException(chars, index, count); } Contract.EndContractBlock(); @@ -65,9 +59,7 @@ namespace System.Text Debug.Assert(encoding != null); if (s == null) { - string paramName = encoding is ASCIIEncoding ? "chars" : nameof(s); // ASCIIEncoding calls the string chars - // UTF8Encoding does this as well, but it originally threw an ArgumentNull for "s" so don't check for that - throw new ArgumentNullException(paramName); + ThrowValidationFailed(encoding); } Contract.EndContractBlock(); @@ -139,21 +131,15 @@ namespace System.Text public unsafe static int GetBytes(Encoding encoding, char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) { Debug.Assert(encoding != null); - if (chars == null || bytes == null) - { - throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes), Environment.GetResourceString("ArgumentNull_Array")); - } - if (charIndex < 0 || charCount < 0) - { - throw new ArgumentOutOfRangeException(charIndex < 0 ? nameof(charIndex) : nameof(charCount), Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - } - if (chars.Length - charIndex < charCount) - { - throw new ArgumentOutOfRangeException(nameof(chars), Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer")); - } - if (byteIndex < 0 || byteIndex > bytes.Length) + // Validate parameters + if ((chars == null) || + (bytes == null) || + (charIndex < 0) || + (charCount < 0) || + (chars.Length - charIndex < charCount) || + (byteIndex < 0 || byteIndex > bytes.Length)) { - throw new ArgumentOutOfRangeException(nameof(byteIndex), Environment.GetResourceString("ArgumentOutOfRange_Index")); + ThrowValidationFailedException(chars, charIndex, charCount, bytes); } Contract.EndContractBlock(); @@ -179,19 +165,169 @@ namespace System.Text public unsafe static int GetBytes(Encoding encoding, char* chars, int charCount, byte* bytes, int byteCount) { Debug.Assert(encoding != null); - if (bytes == null || chars == null) + // Validate parameters + if ((bytes == null) || + (chars == null) || + (charCount < 0) || + (byteCount < 0)) { - throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), Environment.GetResourceString("ArgumentNull_Array")); - } - if (charCount < 0 || byteCount < 0) - { - throw new ArgumentOutOfRangeException(charCount < 0 ? nameof(charCount) : nameof(byteCount), Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); + ThrowValidationFailedException(chars, charCount, bytes); } Contract.EndContractBlock(); return encoding.GetBytes(chars, charCount, bytes, byteCount, encoder: null); } + internal unsafe static bool TryEncode(char* input, int charCount, byte* output, int byteCount, out int charactersConsumed, out int bytesWritten) + { + const int Shift16Shift24 = (1 << 16) | (1 << 24); + const int Shift8Identity = (1 << 8) | (1); + + int charsToEncode = Math.Min(charCount, byteCount); + + // Encode as bytes upto the first non-ASCII byte and return count encoded + int i = 0; +#if BIT64 && !BIGENDIAN + if (charsToEncode < 4) goto trailing; + + int unaligned = (int)(((ulong)input) & 0x7) >> 1; + // Unaligned chars + for (; i < unaligned; i++) + { + char ch = *(input + i); + if (ch > 0x7F) + { + goto exit; // Found non-ASCII, bail + } + else + { + *(output + i) = (byte)ch; // Cast convert + } + } + + // Aligned + int ulongDoubleCount = (charsToEncode - i) & ~0x7; + for (; i < ulongDoubleCount; i += 8) + { + ulong inputUlong0 = *(ulong*)(input + i); + ulong inputUlong1 = *(ulong*)(input + i + 4); + if (((inputUlong0 | inputUlong1) & 0xFF80FF80FF80FF80) != 0) + { + goto exit; // Found non-ASCII, bail + } + // Pack 16 ASCII chars into 16 bytes + *(uint*)(output + i) = + ((uint)((inputUlong0 * Shift16Shift24) >> 24) & 0xffff) | + ((uint)((inputUlong0 * Shift8Identity) >> 24) & 0xffff0000); + *(uint*)(output + i + 4) = + ((uint)((inputUlong1 * Shift16Shift24) >> 24) & 0xffff) | + ((uint)((inputUlong1 * Shift8Identity) >> 24) & 0xffff0000); + } + if (charsToEncode - 4 > i) + { + ulong inputUlong = *(ulong*)(input + i); + if ((inputUlong & 0xFF80FF80FF80FF80) != 0) + { + goto exit; // Found non-ASCII, bail + } + // Pack 8 ASCII chars into 8 bytes + *(uint*)(output + i) = + ((uint)((inputUlong * Shift16Shift24) >> 24) & 0xffff) | + ((uint)((inputUlong * Shift8Identity) >> 24) & 0xffff0000); + i += 4; + } + + trailing: + for (; i < charsToEncode; i++) + { + char ch = *(input + i); + if (ch > 0x7F) + { + goto exit; // Found non-ASCII, bail + } + else + { + *(output + i) = (byte)ch; // Cast convert + } + } +#else + // Unaligned chars + if ((unchecked((int)input) & 0x2) != 0) + { + char ch = *input; + if (ch > 0x7F) + { + goto exit; // Found non-ASCII, bail + } + else + { + i = 1; + *(output) = (byte)ch; // Cast convert + } + } + + // Aligned + int uintCount = (charsToEncode - i) & ~0x3; + for (; i < uintCount; i += 4) + { + uint inputUint0 = *(uint*)(input + i); + uint inputUint1 = *(uint*)(input + i + 2); + if (((inputUint0 | inputUint1) & 0xFF80FF80) != 0) + { + goto exit; // Found non-ASCII, bail + } + // Pack 4 ASCII chars into 4 bytes +#if BIGENDIAN + *(output + i) = (byte)(inputUint0 >> 16); + *(output + i + 1) = (byte)inputUint0; + *(output + i + 2) = (byte)(inputUint1 >> 16); + *(output + i + 3) = (byte)inputUint1; +#else // BIGENDIAN + *(ushort*)(output + i) = (ushort)(inputUint0 | (inputUint0 >> 8)); + *(ushort*)(output + i + 2) = (ushort)(inputUint1 | (inputUint1 >> 8)); +#endif // BIGENDIAN + } + if (charsToEncode - 1 > i) + { + uint inputUint = *(uint*)(input + i); + if ((inputUint & 0xFF80FF80) != 0) + { + goto exit; // Found non-ASCII, bail + } +#if BIGENDIAN + *(output + i) = (byte)(inputUint0 >> 16); + *(output + i + 1) = (byte)inputUint0; +#else // BIGENDIAN + // Pack 2 ASCII chars into 2 bytes + *(ushort*)(output + i) = (ushort)(inputUint | (inputUint >> 8)); +#endif // BIGENDIAN + i += 2; + } + + if (i < charsToEncode) + { + char ch = *(input + i); + if (ch > 0x7F) + { + goto exit; // Found non-ASCII, bail + } + else + { +#if BIGENDIAN + *(output + i) = (byte)(ch >> 16); +#else // BIGENDIAN + *(output + i) = (byte)ch; // Cast convert +#endif // BIGENDIAN + i = charsToEncode; + } + } +#endif // BIT64 + exit: + bytesWritten = i; + charactersConsumed = i; + return charCount == charactersConsumed; + } + public unsafe static int GetCharCount(Encoding encoding, byte[] bytes, int index, int count) { Debug.Assert(encoding != null); @@ -325,5 +461,123 @@ namespace System.Text return string.CreateStringFromEncoding(pBytes + index, count, encoding); } } + + internal static void ThrowBytesOverflow(Encoding encoding) + { + throw GetArgumentException_ThrowBytesOverflow(encoding); + } + + internal static void ThrowValidationFailedException(char[] chars, int index, int count) + { + throw GetValidationFailedException(chars, index, count); + } + + internal static void ThrowValidationFailedException(char[] chars, int charIndex, int charCount, byte[] bytes) + { + throw GetValidationFailedException(chars, charIndex, charCount, bytes); + } + + internal static void ThrowValidationFailed(string s, int index, int count) + { + throw GetValidationFailedException(s, index, count); + } + + internal static void ThrowValidationFailed(Encoding encoding, string s, int charIndex, int charCount, byte[] bytes) + { + throw GetValidationFailedException(encoding, s, charIndex, charCount, bytes); + } + + internal static unsafe void ThrowValidationFailedException(char* chars, int charCount, byte* bytes) + { + throw GetValidationFailedException(chars, charCount, bytes); + } + + private static void ThrowValidationFailed(Encoding encoding) + { + throw GetValidationFailedException(encoding); + } + + private static ArgumentException GetArgumentException_ThrowBytesOverflow(Encoding encoding) + { + throw new ArgumentException( + Environment.GetResourceString("Argument_EncodingConversionOverflowBytes", + encoding.EncodingName, encoding.EncoderFallback.GetType()), "bytes"); + } + + private static Exception GetValidationFailedException(Encoding encoding) + { + if (encoding is ASCIIEncoding) + return ThrowHelper.GetArgumentNullException(ExceptionArgument.chars); + else + return ThrowHelper.GetArgumentNullException(ExceptionArgument.s); + } + + private static Exception GetValidationFailedException(char[] chars, int index, int count) + { + if (chars == null) + return ThrowHelper.GetArgumentNullException(ExceptionArgument.chars, ExceptionResource.ArgumentNull_Array); + if (index < 0) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.index, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + if (count < 0) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + Debug.Assert(chars.Length - index < count); + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.chars, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer); + } + + private static Exception GetValidationFailedException(char[] chars, int charIndex, int charCount, byte[] bytes) + { + if (chars == null) + return ThrowHelper.GetArgumentNullException(ExceptionArgument.chars, ExceptionResource.ArgumentNull_Array); + if (bytes == null) + return ThrowHelper.GetArgumentNullException(ExceptionArgument.bytes, ExceptionResource.ArgumentNull_Array); + if (charIndex < 0) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.charIndex, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + if (charCount < 0) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.charCount, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + if (chars.Length - charIndex < charCount) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.chars, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer); + //if (byteIndex < 0 || byteIndex > bytes.Length) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.byteIndex, ExceptionResource.ArgumentOutOfRange_Index); + } + + private static Exception GetValidationFailedException(string s, int index, int count) + { + if (s == null) + return ThrowHelper.GetArgumentNullException(ExceptionArgument.s, ExceptionResource.ArgumentNull_String); + if (index < 0) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.index, ExceptionResource.ArgumentOutOfRange_Index); + if (count < 0) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + Debug.Assert(index > s.Length - count); + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.index, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer); + } + + private static unsafe Exception GetValidationFailedException(char* chars, int charCount, byte* bytes) + { + if (bytes == null) + return ThrowHelper.GetArgumentNullException(ExceptionArgument.bytes, ExceptionResource.ArgumentNull_Array); + if (chars == null) + return ThrowHelper.GetArgumentNullException(ExceptionArgument.chars, ExceptionResource.ArgumentNull_Array); + if (charCount < 0) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.charCount, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + // (byteCount < 0) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.byteCount, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + } + + private static Exception GetValidationFailedException(Encoding encoding, string s, int charIndex, int charCount, byte[] bytes) + { + if (s == null) + return ThrowHelper.GetArgumentNullException(encoding is ASCIIEncoding ? ExceptionArgument.chars : ExceptionArgument.s, ExceptionResource.ArgumentNull_String); + if (bytes == null) + return ThrowHelper.GetArgumentNullException(ExceptionArgument.bytes, ExceptionResource.ArgumentNull_Array); + if (charIndex < 0) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.charIndex, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + if (charCount < 0) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.charCount, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + if (s.Length - charIndex < charCount) + return ThrowHelper.GetArgumentOutOfRangeException(encoding is ASCIIEncoding ? ExceptionArgument.chars : ExceptionArgument.s, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer); + // (byteIndex < 0 || byteIndex > bytes.Length) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.byteIndex, ExceptionResource.ArgumentOutOfRange_Index); + } } } diff --git a/src/mscorlib/src/System/Text/UTF8Encoding.cs b/src/mscorlib/src/System/Text/UTF8Encoding.cs index 191bbfe..574a365 100644 --- a/src/mscorlib/src/System/Text/UTF8Encoding.cs +++ b/src/mscorlib/src/System/Text/UTF8Encoding.cs @@ -134,10 +134,139 @@ namespace System.Text return EncodingForwarder.GetByteCount(this, chars, count); } - public override int GetBytes(String s, int charIndex, int charCount, - byte[] bytes, int byteIndex) + public unsafe override byte[] GetBytes(String s) { - return EncodingForwarder.GetBytes(this, s, charIndex, charCount, bytes, byteIndex); + // Fast path for pure ASCII data for ASCII and UTF8 encoding + if (s == null) + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.s, ExceptionResource.ArgumentNull_String); + Contract.EndContractBlock(); + + int charCount = s.Length; + + byte[] bytes; + if (charCount > 0) + { + fixed (char* input = s) + bytes = GetBytesValidated(input, charCount); + } + else + { + bytes = Array.Empty(); + } + + return bytes; + } + + public unsafe override int GetBytes(String s, int charIndex, int charCount, byte[] bytes, int byteIndex) + { + if ((s == null) || + (bytes == null) || + (charIndex < 0) || + (charCount < 0) || + (s.Length - charIndex < charCount) || + (byteIndex < 0 || byteIndex > bytes.Length)) + { + EncodingForwarder.ThrowValidationFailed(this, s, charIndex, charCount, bytes); + } + Contract.EndContractBlock(); + + // Note that byteCount is the # of bytes to decode, not the size of the array + int byteCount = bytes.Length - byteIndex; + int bytesWritten; + if (charCount > 0) + { + if (byteCount == 0) + { + // Definitely not enough space, early bail + EncodingForwarder.ThrowBytesOverflow(this); + } + fixed (char* pInput = s) + fixed (byte* pOutput = &bytes[0]) + { + char* input = pInput + charIndex; + byte* output = pOutput + byteIndex; + int charactersConsumed; + // TODO: Replace with call to System.Text.Primitives/System/Text/Encoding/Utf8/Utf8Encoder + // TryEncode(ReadOnlySpan utf16, Span utf8, out int charactersConsumed, out int bytesWritten) + if (!EncodingForwarder.TryEncode(input, charCount, output, byteCount, out charactersConsumed, out bytesWritten)) + { + // Not all converted, use GetBytesFallback for remaining conversion + bytesWritten += GetBytesFallback(input + charactersConsumed, charCount - charactersConsumed, output + bytesWritten, byteCount - bytesWritten, null); + } + } + } + else + { + // Nothing to encode + bytesWritten = 0; + } + + return bytesWritten; + } + + public override byte[] GetBytes(char[] chars) { + if (chars == null) + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars, ExceptionResource.ArgumentNull_Array); + Contract.EndContractBlock(); + + return GetBytesValidated(chars, 0, chars.Length); + } + + public override byte[] GetBytes(char[] chars, int index, int count) { + if ((chars == null) || + (index < 0) || + (count < 0) || + (chars.Length - index < count)) + { + EncodingForwarder.ThrowValidationFailedException(chars, index, count); + } + Contract.EndContractBlock(); + + return GetBytesValidated(chars, index, count); + } + + private unsafe byte[] GetBytesValidated(char[] chars, int index, int count) + { + byte[] bytes; + if (count > 0) + { + fixed (char* input = chars) + { + bytes = GetBytesValidated(input + index, count); + } + } + else + { + bytes = Array.Empty(); + } + + return bytes; + } + + private unsafe byte[] GetBytesValidated(char* input, int charCount) + { + int byteCount = GetByteCount(input, charCount, null); + byte[] bytes = new byte[byteCount]; + + fixed (byte* output = &bytes[0]) + { + int bytesWritten; + int charactersConsumed; + // TODO: Replace with call to System.Text.Primitives/System/Text/Encoding/Utf8/Utf8Encoder + // TryEncode(ReadOnlySpan utf16, Span utf8, out int charactersConsumed, out int bytesWritten) + if (!EncodingForwarder.TryEncode(input, charCount, output, charCount, out charactersConsumed, out bytesWritten)) + { + // Not all converted, use GetBytesFallback for remaining conversion + bytesWritten += GetBytesFallback(input + charactersConsumed, charCount - charactersConsumed, output + bytesWritten, byteCount - bytesWritten, null); + } + else + { + Debug.Assert(charactersConsumed == charCount); + } + Debug.Assert(bytesWritten == byteCount); + } + + return bytes; } // Encodes a range of characters in a character array into a range of bytes @@ -148,17 +277,137 @@ namespace System.Text // Alternatively, the GetMaxByteCount method can be used to // determine the maximum number of bytes that will be produced for a given // number of characters, regardless of the actual character values. - - public override int GetBytes(char[] chars, int charIndex, int charCount, - byte[] bytes, int byteIndex) + public unsafe override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) { - return EncodingForwarder.GetBytes(this, chars, charIndex, charCount, bytes, byteIndex); + if ((chars == null) || + (bytes == null) || + (charIndex < 0) || + (charCount < 0) || + (chars.Length - charIndex < charCount) || + (byteIndex < 0 || byteIndex > bytes.Length)) + { + EncodingForwarder.ThrowValidationFailedException(chars, charIndex, charCount, bytes); + } + Contract.EndContractBlock(); + + // Note that byteCount is the # of bytes to decode, not the size of the array + int byteCount = bytes.Length - byteIndex; + int bytesWritten; + if (charCount > 0) + { + if (byteCount == 0) + { + // Definitely not enough space, early bail + EncodingForwarder.ThrowBytesOverflow(this); + } + + fixed (char* pInput = &chars[0]) + fixed (byte* pOutput = &bytes[0]) + { + char* input = pInput + charIndex; + byte* output = pOutput + byteIndex; + int charactersConsumed; + // TODO: Replace with call to System.Text.Primitives/System/Text/Encoding/Utf8/Utf8Encoder + // TryEncode(ReadOnlySpan utf16, Span utf8, out int charactersConsumed, out int bytesWritten) + if (!EncodingForwarder.TryEncode(input, charCount, output, byteCount, out charactersConsumed, out bytesWritten)) + { + // Not all converted, use GetBytesFallback for remaining conversion + bytesWritten += GetBytesFallback(input + charactersConsumed, charCount - charactersConsumed, output + bytesWritten, byteCount - bytesWritten, null); + } + } + } + else + { + // Nothing to encode + bytesWritten = 0; + } + + return bytesWritten; } [CLSCompliant(false)] public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount) { - return EncodingForwarder.GetBytes(this, chars, charCount, bytes, byteCount); + if ((bytes == null) || + (chars == null) || + (charCount < 0) || + (byteCount < 0)) + { + EncodingForwarder.ThrowValidationFailedException(chars, charCount, bytes); + } + Contract.EndContractBlock(); + + int bytesWritten; + if (charCount > 0) + { + if (byteCount == 0) + { + // Definitely not enough space, early bail + EncodingForwarder.ThrowBytesOverflow(this); + } + int charactersConsumed; + // TODO: Replace with call to System.Text.Primitives/System/Text/Encoding/Utf8/Utf8Encoder + // TryEncode(ReadOnlySpan utf16, Span utf8, out int charactersConsumed, out int bytesWritten) + if (!EncodingForwarder.TryEncode(chars, charCount, bytes, byteCount, out charactersConsumed, out bytesWritten)) + { + // Not all converted, use GetBytesFallback for remaining conversion + bytesWritten += GetBytesFallback(chars + charactersConsumed, charCount - charactersConsumed, bytes + bytesWritten, byteCount - bytesWritten, null); + } + } + else + { + // Nothing to encode + bytesWritten = 0; + } + + return bytesWritten; + } + + internal override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS encoder) + { + // Just need to Assert, this is called by internal EncoderNLS and parameters should already be checked + Debug.Assert(this != null); + Debug.Assert(bytes != null); + Debug.Assert(chars != null); + Debug.Assert(charCount >= 0); + Debug.Assert(byteCount >= 0); + + int bytesWritten; + int charactersConsumed = 0; + if (((encoder?.InternalHasFallbackBuffer ?? false) && + (encoder.FallbackBuffer.Remaining > 0)) || + (charCount > byteCount)) + { + // Data already in Fallback buffer, so straight to GetBytesFallback + bytesWritten = GetBytesFallback(chars, charCount, bytes, byteCount, encoder); + } + else if (charCount > 0) + { + if (byteCount == 0) + { + // Definitely not enough space, early bail + EncodingForwarder.ThrowBytesOverflow(this); + } + // TODO: Replace with call to System.Text.Primitives/System/Text/Encoding/Utf8/Utf8Encoder + // TryEncode(ReadOnlySpan utf16, Span utf8, out int charactersConsumed, out int bytesWritten) + if (!EncodingForwarder.TryEncode(chars, charCount, bytes, byteCount, out charactersConsumed, out bytesWritten)) + { + // Not all converted, use GetBytesFallback for remaining conversion + bytesWritten += GetBytesFallback(chars + charactersConsumed, charCount - charactersConsumed, bytes + bytesWritten, byteCount - bytesWritten, encoder); + } + } + else + { + // Nothing to encode + bytesWritten = 0; + } + + if (encoder != null) + { + encoder.m_charsUsed += charactersConsumed; + } + + return bytesWritten; } // Returns the number of characters produced by decoding a range of bytes @@ -573,7 +822,7 @@ namespace System.Text // Our workhorse // Note: We ignore mismatched surrogates, unless the exception flag is set in which case we throw - internal override unsafe int GetBytes(char* chars, int charCount, + private unsafe int GetBytesFallback(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS baseEncoder) { Debug.Assert(chars!=null, "[UTF8Encoding.GetBytes]chars!=null"); diff --git a/src/mscorlib/src/System/ThrowHelper.cs b/src/mscorlib/src/System/ThrowHelper.cs index 99f074d..416e45f 100644 --- a/src/mscorlib/src/System/ThrowHelper.cs +++ b/src/mscorlib/src/System/ThrowHelper.cs @@ -76,6 +76,11 @@ namespace System { ExceptionResource.ArgumentOutOfRange_Index); } + internal static void ThrowCountArgumentOutOfRange_NeedNonNegNumException() { + throw GetArgumentOutOfRangeException(ExceptionArgument.count, + ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + } + internal static void ThrowIndexArgumentOutOfRange_NeedNonNegNumException() { throw GetArgumentOutOfRangeException(ExceptionArgument.index, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); @@ -124,10 +129,6 @@ namespace System { throw GetArgumentException(resource, argument); } - private static ArgumentNullException GetArgumentNullException(ExceptionArgument argument) { - return new ArgumentNullException(GetArgumentName(argument)); - } - internal static void ThrowArgumentNullException(ExceptionArgument argument) { throw GetArgumentNullException(argument); } @@ -208,6 +209,18 @@ namespace System { throw GetInvalidOperationException(ExceptionResource.InvalidOperation_EnumFailedVersion); } + internal static ArgumentNullException GetArgumentNullException(ExceptionArgument argument) { + return new ArgumentNullException(GetArgumentName(argument)); + } + + internal static ArgumentNullException GetArgumentNullException(ExceptionArgument argument, ExceptionResource resource) { + throw new ArgumentNullException(GetArgumentName(argument), GetResourceString(resource)); + } + + internal static void ThrowArgumentNullException(ExceptionArgument argument, ExceptionResource resource) { + throw GetArgumentNullException(argument, resource); + } + internal static void ThrowInvalidOperationException_InvalidOperation_EnumOpCantHappen() { throw GetInvalidOperationException(ExceptionResource.InvalidOperation_EnumOpCantHappen); } @@ -363,6 +376,14 @@ namespace System { callBack, type, stateMachine, + s, + chars, + bytes, + byteIndex, + charIndex, + byteCount, + charCount, + } // @@ -469,6 +490,10 @@ namespace System { ArgumentOutOfRange_Enum, InvalidOperation_HandleIsNotInitialized, AsyncMethodBuilder_InstanceNotInitialized, + ArgumentNull_Array, + ArgumentOutOfRange_IndexCountBuffer, + ArgumentNull_String, + } } -- 2.7.4