From 1e02781003505f5314c330539633eeaff1eef799 Mon Sep 17 00:00:00 2001 From: Ben Adams Date: Tue, 24 Jan 2017 21:21:52 +0000 Subject: [PATCH] Fast-path for ASCII & UTF8 Encoding ASCII data (dotnet/coreclr#8969) * ASCII Encoding fast-path * Add skipp for BIGENDIAN * fixes * ascii GetBytes(char[] chars) fix * feedback * Clean up * Reuse exception block * Add debug Asserts Commit migrated from https://github.com/dotnet/coreclr/commit/5c20488da0d06c14f8d28e9bade3e8e6d8cd970f --- .../src/mscorlib/src/System/Text/ASCIIEncoding.cs | 48 +- .../src/mscorlib/src/System/Text/EncoderNLS.cs | 172 ++++--- .../src/mscorlib/src/System/Text/Encoding.cs | 68 +-- .../mscorlib/src/System/Text/EncodingForwarder.cs | 545 +++++++++++++++++++-- .../src/mscorlib/src/System/Text/UTF8Encoding.cs | 48 +- src/coreclr/src/mscorlib/src/System/ThrowHelper.cs | 25 +- 6 files changed, 738 insertions(+), 168 deletions(-) diff --git a/src/coreclr/src/mscorlib/src/System/Text/ASCIIEncoding.cs b/src/coreclr/src/mscorlib/src/System/Text/ASCIIEncoding.cs index fc7589f..7c45d18 100644 --- a/src/coreclr/src/mscorlib/src/System/Text/ASCIIEncoding.cs +++ b/src/coreclr/src/mscorlib/src/System/Text/ASCIIEncoding.cs @@ -78,12 +78,22 @@ namespace System.Text return EncodingForwarder.GetByteCount(this, chars, count); } - public override int GetBytes(String chars, int charIndex, int charCount, - byte[] bytes, int byteIndex) - { - return EncodingForwarder.GetBytes(this, chars, charIndex, charCount, bytes, byteIndex); +#if !BIGENDIAN + public override byte[] GetBytes(String s) + => EncodingForwarder.GetBytesAsciiFastPath(this, s); + + public override int GetBytes(String chars, int charIndex, int charCount, byte[] bytes, int byteIndex) + => EncodingForwarder.GetBytesAsciiFastPath(this, chars, charIndex, charCount, bytes, byteIndex); + + public override byte[] GetBytes(char[] chars) { + if (chars == null) + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars, ExceptionResource.ArgumentNull_Array); + return EncodingForwarder.GetBytesAsciiFastPath(this, chars, 0, chars.Length); } + public override byte[] GetBytes(char[] chars, int index, int count) + => EncodingForwarder.GetBytesAsciiFastPath(this, chars, index, count); + // Encodes a range of characters in a character array into a range of bytes // in a byte array. An exception occurs if the byte array is not large // enough to hold the complete encoding of the characters. The @@ -92,19 +102,30 @@ namespace System.Text // Alternatively, the GetMaxByteCount method can be used to // determine the maximum number of bytes that will be produced for a given // number of characters, regardless of the actual character values. + public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) + => EncodingForwarder.GetBytesAsciiFastPath(this, chars, charIndex, charCount, bytes, byteIndex); + + [CLSCompliant(false)] + [System.Runtime.InteropServices.ComVisible(false)] + public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount) + => EncodingForwarder.GetBytesAsciiFastPath(this, chars, charCount, bytes, byteCount); + + internal override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS encoder) + => EncodingForwarder.GetBytesAsciiFastPath(this, chars, charCount, bytes, byteCount, encoder); +#else + public override int GetBytes(String chars, int charIndex, int charCount, + byte[] bytes, int byteIndex) + => EncodingForwarder.GetBytes(this, chars, charIndex, charCount, bytes, byteIndex); public override int GetBytes(char[] chars, int charIndex, int charCount, - byte[] bytes, int byteIndex) - { - return EncodingForwarder.GetBytes(this, chars, charIndex, charCount, bytes, byteIndex); - } + byte[] bytes, int byteIndex) + => EncodingForwarder.GetBytes(this, chars, charIndex, charCount, bytes, byteIndex); [CLSCompliant(false)] [System.Runtime.InteropServices.ComVisible(false)] public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount) - { - return EncodingForwarder.GetBytes(this, chars, charCount, bytes, byteCount); - } + => EncodingForwarder.GetBytes(this, chars, charCount, bytes, byteCount); +#endif // !BIGENDIAN // Returns the number of characters produced by decoding a range of bytes // in a byte array. @@ -281,8 +302,13 @@ namespace System.Text return byteCount; } +#if !BIGENDIAN + internal override unsafe int GetBytesFallback(char* chars, int charCount, + byte* bytes, int byteCount, EncoderNLS encoder) +#else internal override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS encoder) +#endif { // Just need to ASSERT, this is called by something else internal that checked parameters already Debug.Assert(bytes != null, "[ASCIIEncoding.GetBytes]bytes is null"); diff --git a/src/coreclr/src/mscorlib/src/System/Text/EncoderNLS.cs b/src/coreclr/src/mscorlib/src/System/Text/EncoderNLS.cs index 2add017..704402a 100644 --- a/src/coreclr/src/mscorlib/src/System/Text/EncoderNLS.cs +++ b/src/coreclr/src/mscorlib/src/System/Text/EncoderNLS.cs @@ -9,6 +9,8 @@ namespace System.Text using System.Text; using System; using System.Diagnostics.Contracts; + using Runtime.CompilerServices; + // An Encoder is used to encode a sequence of blocks of characters into // a sequence of blocks of bytes. Following instantiation of an encoder, // sequential blocks of characters are converted into blocks of bytes through @@ -79,17 +81,13 @@ namespace System.Text public override unsafe int GetByteCount(char[] chars, int index, int count, bool flush) { // Validate input parameters - if (chars == null) - throw new ArgumentNullException(nameof(chars), - Environment.GetResourceString("ArgumentNull_Array")); - - if (index < 0 || count < 0) - throw new ArgumentOutOfRangeException((index<0 ? nameof(index) : nameof(count)), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - - if (chars.Length - index < count) - throw new ArgumentOutOfRangeException(nameof(chars), - Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer")); + if ((chars == null) || + (index < 0) || + (count < 0) || + (chars.Length - index < count)) + { + EncodingForwarder.ThrowValidationFailedException(chars, index, count); + } Contract.EndContractBlock(); // Avoid empty input problem @@ -98,9 +96,9 @@ namespace System.Text // Just call the pointer version int result = -1; - fixed (char* pChars = chars) + fixed (char* pChars = &chars[0]) { - result = GetByteCount(pChars + index, count, flush); + result = GetByteCountValidated(pChars + index, count, flush); } return result; } @@ -109,14 +107,17 @@ namespace System.Text { // Validate input parameters if (chars == null) - throw new ArgumentNullException(nameof(chars), - Environment.GetResourceString("ArgumentNull_Array")); - + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars, ExceptionResource.ArgumentNull_Array); if (count < 0) - throw new ArgumentOutOfRangeException(nameof(count), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); + ThrowHelper.ThrowCountArgumentOutOfRange_NeedNonNegNumException(); Contract.EndContractBlock(); + return GetByteCountValidated(chars, count, flush); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private unsafe int GetByteCountValidated(char* chars, int count, bool flush) + { this.m_mustFlush = flush; this.m_throwOnOverflow = true; return m_encoding.GetByteCount(chars, count, this); @@ -126,21 +127,15 @@ namespace System.Text byte[] bytes, int byteIndex, bool flush) { // Validate parameters - if (chars == null || bytes == null) - throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)), - Environment.GetResourceString("ArgumentNull_Array")); - - if (charIndex < 0 || charCount < 0) - throw new ArgumentOutOfRangeException((charIndex<0 ? nameof(charIndex) : nameof(charCount)), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - - if (chars.Length - charIndex < charCount) - throw new ArgumentOutOfRangeException(nameof(chars), - Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer")); - - if (byteIndex < 0 || byteIndex > bytes.Length) - throw new ArgumentOutOfRangeException(nameof(byteIndex), - Environment.GetResourceString("ArgumentOutOfRange_Index")); + if ((chars == null) || + (bytes == null) || + (charIndex < 0) || + (charCount < 0) || + (chars.Length - charIndex < charCount) || + (byteIndex < 0 || byteIndex > bytes.Length)) + { + EncodingForwarder.ThrowValidationFailedException(chars, charIndex, charCount, bytes); + } Contract.EndContractBlock(); if (chars.Length == 0) @@ -150,27 +145,34 @@ namespace System.Text if (bytes.Length == 0) bytes = new byte[1]; + // Just call pointer version - fixed (char* pChars = chars) - fixed (byte* pBytes = bytes) + fixed (char* pChars = &chars[0]) + fixed (byte* pBytes = &bytes[0]) + { + return GetBytesValidated(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, flush); + } - // Remember that charCount is # to decode, not size of array. - return GetBytes(pChars + charIndex, charCount, - pBytes + byteIndex, byteCount, flush); } public unsafe override int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, bool flush) { // Validate parameters - if (chars == null || bytes == null) - throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)), - Environment.GetResourceString("ArgumentNull_Array")); - - if (byteCount < 0 || charCount < 0) - throw new ArgumentOutOfRangeException((byteCount<0 ? nameof(byteCount) : nameof(charCount)), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); + if ((bytes == null) || + (chars == null) || + (charCount < 0) || + (byteCount < 0)) + { + EncodingForwarder.ThrowValidationFailedException(chars, charCount, bytes); + } Contract.EndContractBlock(); + return GetBytesValidated(chars, charCount, bytes, byteCount, flush); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private unsafe int GetBytesValidated(char* chars, int charCount, byte* bytes, int byteCount, bool flush) + { this.m_mustFlush = flush; this.m_throwOnOverflow = true; return m_encoding.GetBytes(chars, charCount, bytes, byteCount, this); @@ -183,28 +185,26 @@ namespace System.Text out int charsUsed, out int bytesUsed, out bool completed) { // Validate parameters - if (chars == null || bytes == null) - throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)), - Environment.GetResourceString("ArgumentNull_Array")); - - if (charIndex < 0 || charCount < 0) - throw new ArgumentOutOfRangeException((charIndex<0 ? nameof(charIndex) : nameof(charCount)), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - - if (byteIndex < 0 || byteCount < 0) - throw new ArgumentOutOfRangeException((byteIndex<0 ? nameof(byteIndex) : nameof(byteCount)), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - + if (chars == null) + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars, ExceptionResource.ArgumentNull_Array); + if (bytes == null) + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.bytes, ExceptionResource.ArgumentNull_Array); + if (charIndex < 0) + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.charIndex, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + if (charCount < 0) + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.charCount, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + if (byteIndex < 0) + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.byteIndex, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + if (byteCount < 0) + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.byteCount, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); if (chars.Length - charIndex < charCount) - throw new ArgumentOutOfRangeException(nameof(chars), - Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer")); - + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.chars, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer); if (bytes.Length - byteIndex < byteCount) - throw new ArgumentOutOfRangeException(nameof(bytes), - Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer")); - + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.bytes, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer); Contract.EndContractBlock(); + StartConversion(flush); + // Avoid empty input problem if (chars.Length == 0) chars = new char[1]; @@ -212,14 +212,13 @@ namespace System.Text bytes = new byte[1]; // Just call the pointer version (can't do this for non-msft encoders) - fixed (char* pChars = chars) + fixed (char* pChars = &chars[0]) + fixed (byte* pBytes = &bytes[0]) { - fixed (byte* pBytes = bytes) - { - Convert(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, flush, - out charsUsed, out bytesUsed, out completed); - } + bytesUsed = this.m_encoding.GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, this); } + + FinishConversion(charCount, flush, out charsUsed, out completed); } // This is the version that uses pointers. We call the base encoding worker function @@ -229,28 +228,39 @@ namespace System.Text out int charsUsed, out int bytesUsed, out bool completed) { // Validate input parameters - if (bytes == null || chars == null) - throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), - Environment.GetResourceString("ArgumentNull_Array")); - if (charCount < 0 || byteCount < 0) - throw new ArgumentOutOfRangeException((charCount<0 ? nameof(charCount) : nameof(byteCount)), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); + if ((bytes == null) || + (chars == null) || + (charCount < 0) || + (byteCount < 0)) + { + EncodingForwarder.ThrowValidationFailedException(chars, charCount, bytes); + } Contract.EndContractBlock(); + StartConversion(flush); + + // Do conversion + bytesUsed = this.m_encoding.GetBytes(chars, charCount, bytes, byteCount, this); + + FinishConversion(charCount, flush, out charsUsed, out completed); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void StartConversion(bool flush) + { // We don't want to throw this.m_mustFlush = flush; this.m_throwOnOverflow = false; this.m_charsUsed = 0; + } - // Do conversion - bytesUsed = this.m_encoding.GetBytes(chars, charCount, bytes, byteCount, this); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void FinishConversion(int charCount, bool flush, out int charsUsed, out bool completed) + { charsUsed = this.m_charsUsed; - // Its completed if they've used what they wanted AND if they didn't want flush or if we are flushed completed = (charsUsed == charCount) && (!flush || !this.HasState) && - (m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0); - - // Our data thingys are now full, we can return + (m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0); } public Encoding Encoding diff --git a/src/coreclr/src/mscorlib/src/System/Text/Encoding.cs b/src/coreclr/src/mscorlib/src/System/Text/Encoding.cs index dece2e9..db6ca9c 100644 --- a/src/coreclr/src/mscorlib/src/System/Text/Encoding.cs +++ b/src/coreclr/src/mscorlib/src/System/Text/Encoding.cs @@ -752,20 +752,14 @@ namespace System.Text [Pure] public int GetByteCount(string s, int index, int count) { - if (s == null) - throw new ArgumentNullException(nameof(s), - Environment.GetResourceString("ArgumentNull_String")); - if (index < 0) - throw new ArgumentOutOfRangeException(nameof(index), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - if (count < 0) - throw new ArgumentOutOfRangeException(nameof(count), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - if (index > s.Length - count) - throw new ArgumentOutOfRangeException(nameof(index), - Environment.GetResourceString("ArgumentOutOfRange_IndexCount")); + if ((s == null) || + (index < 0) || + (count < 0) || + (index > s.Length - count)) + { + EncodingForwarder.ThrowValidationFailed(s, index, count); + } Contract.EndContractBlock(); - unsafe { fixed (char* pChar = s) @@ -873,39 +867,37 @@ namespace System.Text // string range. // [Pure] - public byte[] GetBytes(string s, int index, int count) + public unsafe byte[] GetBytes(string s, int index, int count) { - if (s == null) - throw new ArgumentNullException(nameof(s), - Environment.GetResourceString("ArgumentNull_String")); - if (index < 0) - throw new ArgumentOutOfRangeException(nameof(index), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - if (count < 0) - throw new ArgumentOutOfRangeException(nameof(count), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - if (index > s.Length - count) - throw new ArgumentOutOfRangeException(nameof(index), - Environment.GetResourceString("ArgumentOutOfRange_IndexCount")); + if ((s == null) || + (index < 0) || + (count < 0) || + (index > s.Length - count)) + { + EncodingForwarder.ThrowValidationFailed(s, index, count); + } Contract.EndContractBlock(); - unsafe + byte[] bytes; + fixed (char* pChar = s) { - fixed (char* pChar = s) + int byteCount = GetByteCount(pChar + index, count); + if (byteCount == 0) { - int byteCount = GetByteCount(pChar + index, count); - if (byteCount == 0) - return Array.Empty(); - - byte[] bytes = new byte[byteCount]; + bytes = Array.Empty(); + } + else + { + bytes = new byte[byteCount]; fixed (byte* pBytes = &bytes[0]) { int bytesReceived = GetBytes(pChar + index, count, pBytes, byteCount); Debug.Assert(byteCount == bytesReceived); } - return bytes; } } + + return bytes; } public virtual int GetBytes(String s, int charIndex, int charCount, @@ -925,6 +917,14 @@ namespace System.Text return GetBytes(chars, charCount, bytes, byteCount); } + internal virtual unsafe int GetBytesFallback(char* chars, int charCount, + byte* bytes, int byteCount, EncoderNLS encoder) + { + // Used for fallback for internal GetBytes(..., EncoderNLS encoder) + // where it shares a common path. + throw new NotSupportedException(); + } + // We expect this to be the workhorse for NLS Encodings, but for existing // ones we need a working (if slow) default implimentation) // diff --git a/src/coreclr/src/mscorlib/src/System/Text/EncodingForwarder.cs b/src/coreclr/src/mscorlib/src/System/Text/EncodingForwarder.cs index 9a8dd26..fc641e4 100644 --- a/src/coreclr/src/mscorlib/src/System/Text/EncodingForwarder.cs +++ b/src/coreclr/src/mscorlib/src/System/Text/EncodingForwarder.cs @@ -35,19 +35,13 @@ namespace System.Text public unsafe static int GetByteCount(Encoding encoding, char[] chars, int index, int count) { // Validate parameters - Debug.Assert(encoding != null); // this parameter should only be affected internally, so just do a debug check here - if (chars == null) - { - throw new ArgumentNullException(nameof(chars), Environment.GetResourceString("ArgumentNull_Array")); - } - if (index < 0 || count < 0) + if ((chars == null) || + (index < 0) || + (count < 0) || + (chars.Length - index < count)) { - throw new ArgumentOutOfRangeException(index < 0 ? nameof(index) : nameof(count), Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - } - if (chars.Length - index < count) - { - throw new ArgumentOutOfRangeException(nameof(chars), Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer")); + ThrowValidationFailedException(chars, index, count); } Contract.EndContractBlock(); @@ -139,21 +133,15 @@ namespace System.Text public unsafe static int GetBytes(Encoding encoding, char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) { Debug.Assert(encoding != null); - if (chars == null || bytes == null) - { - throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes), Environment.GetResourceString("ArgumentNull_Array")); - } - if (charIndex < 0 || charCount < 0) - { - throw new ArgumentOutOfRangeException(charIndex < 0 ? nameof(charIndex) : nameof(charCount), Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - } - if (chars.Length - charIndex < charCount) - { - throw new ArgumentOutOfRangeException(nameof(chars), Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer")); - } - if (byteIndex < 0 || byteIndex > bytes.Length) + // Validate parameters + if ((chars == null) || + (bytes == null) || + (charIndex < 0) || + (charCount < 0) || + (chars.Length - charIndex < charCount) || + (byteIndex < 0 || byteIndex > bytes.Length)) { - throw new ArgumentOutOfRangeException(nameof(byteIndex), Environment.GetResourceString("ArgumentOutOfRange_Index")); + ThrowValidationFailedException(chars, charIndex, charCount, bytes); } Contract.EndContractBlock(); @@ -179,19 +167,411 @@ namespace System.Text public unsafe static int GetBytes(Encoding encoding, char* chars, int charCount, byte* bytes, int byteCount) { Debug.Assert(encoding != null); - if (bytes == null || chars == null) + // Validate parameters + if ((bytes == null) || + (chars == null) || + (charCount < 0) || + (byteCount < 0)) { - throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), Environment.GetResourceString("ArgumentNull_Array")); + ThrowValidationFailedException(chars, charCount, bytes); } - if (charCount < 0 || byteCount < 0) + Contract.EndContractBlock(); + + return encoding.GetBytes(chars, charCount, bytes, byteCount, encoder: null); + } + +#if !BIGENDIAN + // Ascii fast-paths + public unsafe static byte[] GetBytesAsciiFastPath(Encoding encoding, String s) + { + // Fast path for pure ASCII data for ASCII and UTF8 encoding + Debug.Assert(encoding != null); + if (s == null) + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.s, ExceptionResource.ArgumentNull_String); + Contract.EndContractBlock(); + + int charCount = s.Length; + + byte[] bytes; + if (charCount > 0) { - throw new ArgumentOutOfRangeException(charCount < 0 ? nameof(charCount) : nameof(byteCount), Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); + fixed (char* input = s) + bytes = GetBytesAsciiFastPath(encoding, input, charCount); + } + else + { + bytes = Array.Empty(); + } + + return bytes; + } + + internal unsafe static byte[] GetBytesAsciiFastPath(Encoding encoding, char[] chars, int index, int count) + { + // Fast path for pure ASCII data for ASCII and UTF8 encoding + Debug.Assert(encoding != null); + if ((chars == null) || + (index < 0) || + (count < 0) || + (chars.Length - index < count)) + { + ThrowValidationFailedException(chars, index, count); } Contract.EndContractBlock(); - return encoding.GetBytes(chars, charCount, bytes, byteCount, encoder: null); + byte[] bytes; + if (count > 0) + { + fixed (char* input = chars) + bytes = GetBytesAsciiFastPath(encoding, input + index, count); + } + else + { + bytes = Array.Empty(); + } + + return bytes; + + } + + public unsafe static int GetBytesAsciiFastPath(Encoding encoding, char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) + { + // Fast path for pure ASCII data for ASCII and UTF8 encoding + Debug.Assert(encoding != null); + if ((chars == null) || + (bytes == null) || + (charIndex < 0) || + (charCount < 0) || + (chars.Length - charIndex < charCount) || + (byteIndex < 0 || byteIndex > bytes.Length)) + { + ThrowValidationFailedException(chars, charIndex, charCount, bytes); + } + Contract.EndContractBlock(); + + // Note that byteCount is the # of bytes to decode, not the size of the array + int byteCount = bytes.Length - byteIndex; + if (charCount > 0 && byteCount == 0) + ThrowBytesOverflow(encoding); + + int lengthEncoded; + if (charCount > 0 && byteCount > 0) + { + fixed (char* pInput = chars) + fixed (byte* pOutput = &bytes[0]) + { + char* input = pInput + charIndex; + byte* output = pOutput + byteIndex; + var lengthToEncode = Math.Min(charCount, byteCount); + lengthEncoded = GetBytesAsciiFastPath(input, output, lengthToEncode); + if (lengthEncoded < lengthToEncode) + { + // Not all ASCII, use encoding's GetBytes for remaining conversion + lengthEncoded += encoding.GetBytesFallback(input + lengthEncoded, charCount - lengthEncoded, output + lengthEncoded, byteCount - lengthEncoded, null); + } + } + } + else + { + // Nothing to encode + lengthEncoded = 0; + } + + return lengthEncoded; + } + + public unsafe static int GetBytesAsciiFastPath(Encoding encoding, char* chars, int charCount, byte* bytes, int byteCount) + { + // Fast path for pure ASCII data for ASCII and UTF8 encoding + Debug.Assert(encoding != null); + if ((bytes == null) || + (chars == null) || + (charCount < 0) || + (byteCount < 0)) + { + ThrowValidationFailedException(chars, charCount, bytes); + } + Contract.EndContractBlock(); + + if (charCount > 0 && byteCount == 0) + ThrowBytesOverflow(encoding); + + int lengthEncoded; + if (charCount > 0 && byteCount > 0) + { + var lengthToEncode = Math.Min(charCount, byteCount); + lengthEncoded = GetBytesAsciiFastPath(chars, bytes, lengthToEncode); + if (lengthEncoded < lengthToEncode) + { + // Not all ASCII, use encoding's GetBytes for remaining conversion + lengthEncoded += encoding.GetBytesFallback(chars + lengthEncoded, charCount - lengthEncoded, bytes + lengthEncoded, byteCount - lengthEncoded, null); + } + } + else + { + // Nothing to encode + lengthEncoded = 0; + } + + return lengthEncoded; + } + + public unsafe static int GetBytesAsciiFastPath(Encoding encoding, char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS encoder) + { + // Fast path for pure ASCII data for ASCII and UTF8 encoding + // Just need to Assert, this is called by internal EncoderNLS and parameters should already be checked + Debug.Assert(encoding != null); + Debug.Assert(bytes != null); + Debug.Assert(chars != null); + Debug.Assert(charCount >= 0); + Debug.Assert(byteCount >= 0); + + int lengthEncoded; + if ((encoder?.InternalHasFallbackBuffer ?? false) && + (encoder.FallbackBuffer.Remaining > 0)) + { + // Non-ASCII data already in Fallback buffer, so straight to encoder's version + lengthEncoded = encoding.GetBytesFallback(chars, charCount, bytes, byteCount, encoder); + } + else if (charCount > 0 && byteCount > 0) + { + var lengthToEncode = Math.Min(charCount, byteCount); + lengthEncoded = GetBytesAsciiFastPath(chars, bytes, lengthToEncode); + if (lengthEncoded < lengthToEncode) + { + // Not all ASCII, use encoding's GetBytes for remaining conversion + lengthEncoded += encoding.GetBytesFallback(chars + lengthEncoded, charCount - lengthEncoded, bytes + lengthEncoded, byteCount - lengthEncoded, encoder); + } + } + else + { + // Nothing to encode + lengthEncoded = 0; + } + + return lengthEncoded; + } + + public unsafe static int GetBytesAsciiFastPath(Encoding encoding, String s, int charIndex, int charCount, byte[] bytes, int byteIndex) + { + // Fast path for pure ASCII data for ASCII and UTF8 encoding + Debug.Assert(encoding != null); + if ((s == null) || + (bytes == null) || + (charIndex < 0) || + (charCount < 0) || + (s.Length - charIndex < charCount) || + (byteIndex < 0 || byteIndex > bytes.Length)) + { + ThrowValidationFailed(s, charIndex, charCount, bytes); + } + Contract.EndContractBlock(); + + // Note that byteCount is the # of bytes to decode, not the size of the array + int byteCount = bytes.Length - byteIndex; + if (charCount > 0 && byteCount == 0) + ThrowBytesOverflow(encoding); + + int lengthEncoded; + if (charCount > 0 && byteCount > 0) + { + fixed (char* pInput = s) + fixed (byte* pOutput = &bytes[0]) + { + char* input = pInput + charIndex; + byte* output = pOutput + byteIndex; + var lengthToEncode = Math.Min(charCount, byteCount); + lengthEncoded = GetBytesAsciiFastPath(input, output, lengthToEncode); + if (lengthEncoded < lengthToEncode) + { + // Not all ASCII, use encoding's GetBytes for remaining conversion + lengthEncoded += encoding.GetBytesFallback(input + lengthEncoded, charCount - lengthEncoded, output + lengthEncoded, byteCount - lengthEncoded, null); + } + } + } + else + { + // Nothing to encode + lengthEncoded = 0; + } + + return lengthEncoded; + } + + private unsafe static byte[] GetBytesAsciiFastPath(Encoding encoding, char* input, int charCount) + { + // Fast path for pure ASCII data for ASCII and UTF8 encoding + int asciiLength; + int remaining = 0; + // Assume string is all ASCII and size array for that + byte[] bytes = new byte[charCount]; + + fixed (byte* output = &bytes[0]) + { + asciiLength = GetBytesAsciiFastPath(input, output, charCount); + if (asciiLength < charCount) + { + // Not all ASCII, get the byte count for the remaining encoded conversion + remaining = encoding.GetByteCount(input + asciiLength, charCount - asciiLength, null); + } + } + + if (remaining > 0) + { + // Not all ASCII, fallback to slower path for remaining encoding + var encoded = ResizeGetRemainingBytes(encoding, input, charCount, ref bytes, asciiLength, remaining); + Debug.Assert(encoded == remaining); + } + + return bytes; + } + + internal unsafe static int ResizeGetRemainingBytes(Encoding encoding, char* chars, int charCount, ref byte[] bytes, int alreadyEncoded, int remaining) + { + // Resize the array to the correct size + Array.Resize(ref bytes, alreadyEncoded + remaining); + + int encoded; + fixed (byte* output = &bytes[0]) + { + // Use encoding's GetBytes for remaining conversion + encoded = encoding.GetBytesFallback(chars + alreadyEncoded, charCount - alreadyEncoded, output + alreadyEncoded, remaining, null); + } + + return encoded; } + internal unsafe static int GetBytesAsciiFastPath(char* input, byte* output, int byteCount) + { + const int Shift16Shift24 = (1 << 16) | (1 << 24); + const int Shift8Identity = (1 << 8) | (1); + + // Encode as bytes upto the first non-ASCII byte and return count encoded + int i = 0; +#if BIT64 + if (byteCount < 4) goto trailing; + + int unaligned = (int)(((ulong)input) & 0x7) >> 1; + // Unaligned chars + for (; i < unaligned; i++) + { + char ch = *(input + i); + if (ch > 0x7F) + { + goto exit; // Found non-ASCII, bail + } + else + { + *(output + i) = (byte)ch; // Cast convert + } + } + + // Aligned + int ulongDoubleCount = (byteCount - i) & ~0x7; + for (; i < ulongDoubleCount; i += 8) + { + ulong inputUlong0 = *(ulong*)(input + i); + ulong inputUlong1 = *(ulong*)(input + i + 4); + if (((inputUlong0 | inputUlong1) & 0xFF80FF80FF80FF80) != 0) + { + goto exit; // Found non-ASCII, bail + } + // Pack 16 ASCII chars into 16 bytes + *(uint*)(output + i) = + ((uint)((inputUlong0 * Shift16Shift24) >> 24) & 0xffff) | + ((uint)((inputUlong0 * Shift8Identity) >> 24) & 0xffff0000); + *(uint*)(output + i + 4) = + ((uint)((inputUlong1 * Shift16Shift24) >> 24) & 0xffff) | + ((uint)((inputUlong1 * Shift8Identity) >> 24) & 0xffff0000); + } + if (byteCount - 4 > i) + { + ulong inputUlong = *(ulong*)(input + i); + if ((inputUlong & 0xFF80FF80FF80FF80) != 0) + { + goto exit; // Found non-ASCII, bail + } + // Pack 8 ASCII chars into 8 bytes + *(uint*)(output + i) = + ((uint)((inputUlong * Shift16Shift24) >> 24) & 0xffff) | + ((uint)((inputUlong * Shift8Identity) >> 24) & 0xffff0000); + i += 4; + } + + trailing: + for (; i < byteCount; i++) + { + char ch = *(input + i); + if (ch > 0x7F) + { + goto exit; // Found non-ASCII, bail + } + else + { + *(output + i) = (byte)ch; // Cast convert + } + } +#else + // Unaligned chars + if ((unchecked((int)input) & 0x2) != 0) + { + char ch = *input; + if (ch > 0x7F) + { + goto exit; // Found non-ASCII, bail + } + else + { + i = 1; + *(output) = (byte)ch; // Cast convert + } + } + + // Aligned + int uintCount = (byteCount - i) & ~0x3; + for (; i < uintCount; i += 4) + { + uint inputUint0 = *(uint*)(input + i); + uint inputUint1 = *(uint*)(input + i + 2); + if (((inputUint0 | inputUint1) & 0xFF80FF80) != 0) + { + goto exit; // Found non-ASCII, bail + } + // Pack 4 ASCII chars into 4 bytes + *(ushort*)(output + i) = (ushort)(inputUint0 | (inputUint0 >> 8)); + *(ushort*)(output + i + 2) = (ushort)(inputUint1 | (inputUint1 >> 8)); + } + if (byteCount - 1 > i) + { + uint inputUint = *(uint*)(input + i); + if ((inputUint & 0xFF80FF80) != 0) + { + goto exit; // Found non-ASCII, bail + } + // Pack 2 ASCII chars into 2 bytes + *(ushort*)(output + i) = (ushort)(inputUint | (inputUint >> 8)); + i += 2; + } + + if (i < byteCount) + { + char ch = *(input + i); + if (ch > 0x7F) + { + goto exit; // Found non-ASCII, bail + } + else + { + *(output + i) = (byte)ch; // Cast convert + i = byteCount; + } + } +#endif // BIT64 + exit: + return i; + } +#endif // !BIGENDIAN + public unsafe static int GetCharCount(Encoding encoding, byte[] bytes, int index, int count) { Debug.Assert(encoding != null); @@ -325,5 +705,110 @@ namespace System.Text return string.CreateStringFromEncoding(pBytes + index, count, encoding); } } + + private static void ThrowBytesOverflow(Encoding encoding) + { + throw GetArgumentException_ThrowBytesOverflow(encoding); + } + + internal static void ThrowValidationFailedException(char[] chars, int index, int count) + { + throw GetValidationFailedException(chars, index, count); + } + + internal static void ThrowValidationFailedException(char[] chars, int charIndex, int charCount, byte[] bytes) + { + throw GetValidationFailedException(chars, charIndex, charCount, bytes); + } + + internal static void ThrowValidationFailed(string s, int index, int count) + { + throw GetValidationFailedException(s, index, count); + } + + private static void ThrowValidationFailed(string s, int charIndex, int charCount, byte[] bytes) + { + throw GetValidationFailedException(s, charIndex, charCount, bytes); + } + + internal static unsafe void ThrowValidationFailedException(char* chars, int charCount, byte* bytes) + { + throw GetValidationFailedException(chars, charCount, bytes); + } + + private static ArgumentException GetArgumentException_ThrowBytesOverflow(Encoding encoding) + { + throw new ArgumentException( + Environment.GetResourceString("Argument_EncodingConversionOverflowBytes", + encoding.EncodingName, encoding.EncoderFallback.GetType()), "bytes"); + } + + private static Exception GetValidationFailedException(char[] chars, int index, int count) + { + if (chars == null) + return ThrowHelper.GetArgumentNullException(ExceptionArgument.chars, ExceptionResource.ArgumentNull_Array); + if (index < 0) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.charIndex, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + if (count < 0) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.charCount, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + Debug.Assert(chars.Length - index < count); + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.chars, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer); + } + + private static Exception GetValidationFailedException(char[] chars, int charIndex, int charCount, byte[] bytes) + { + if (chars == null) + return ThrowHelper.GetArgumentNullException(ExceptionArgument.chars, ExceptionResource.ArgumentNull_Array); + if (bytes == null) + return ThrowHelper.GetArgumentNullException(ExceptionArgument.bytes, ExceptionResource.ArgumentNull_Array); + if (charIndex < 0) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.charIndex, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + if (charCount < 0) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.charCount, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + if (chars.Length - charIndex < charCount) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.chars, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer); + //if (byteIndex < 0 || byteIndex > bytes.Length) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.byteIndex, ExceptionResource.ArgumentOutOfRange_Index); + } + + private static Exception GetValidationFailedException(string s, int index, int count) + { + if (s == null) + return ThrowHelper.GetArgumentNullException(ExceptionArgument.s, ExceptionResource.ArgumentNull_String); + if (index < 0) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.index, ExceptionResource.ArgumentOutOfRange_Index); + if (count < 0) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + Debug.Assert(index > s.Length - count); + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.index, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer); + } + + private static unsafe Exception GetValidationFailedException(char* chars, int charCount, byte* bytes) + { + if (bytes == null) + return ThrowHelper.GetArgumentNullException(ExceptionArgument.bytes, ExceptionResource.ArgumentNull_Array); + if (chars == null) + return ThrowHelper.GetArgumentNullException(ExceptionArgument.chars, ExceptionResource.ArgumentNull_Array); + if (charCount < 0) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.charCount, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + // (byteCount < 0) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.byteCount, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + } + + private static Exception GetValidationFailedException(string s, int charIndex, int charCount, byte[] bytes) + { + if (s == null) + return ThrowHelper.GetArgumentNullException(ExceptionArgument.s, ExceptionResource.ArgumentNull_String); + if (bytes == null) + return ThrowHelper.GetArgumentNullException(ExceptionArgument.bytes, ExceptionResource.ArgumentNull_Array); + if (charIndex < 0) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.charIndex, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + if (charCount < 0) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.charCount, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + if (s.Length - charIndex < charCount) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.chars, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer); + // (byteIndex < 0 || byteIndex > bytes.Length) + return ThrowHelper.GetArgumentOutOfRangeException(ExceptionArgument.byteIndex, ExceptionResource.ArgumentOutOfRange_Index); + } } } diff --git a/src/coreclr/src/mscorlib/src/System/Text/UTF8Encoding.cs b/src/coreclr/src/mscorlib/src/System/Text/UTF8Encoding.cs index ba19649..29a1f75 100644 --- a/src/coreclr/src/mscorlib/src/System/Text/UTF8Encoding.cs +++ b/src/coreclr/src/mscorlib/src/System/Text/UTF8Encoding.cs @@ -137,12 +137,22 @@ namespace System.Text return EncodingForwarder.GetByteCount(this, chars, count); } - public override int GetBytes(String s, int charIndex, int charCount, - byte[] bytes, int byteIndex) - { - return EncodingForwarder.GetBytes(this, s, charIndex, charCount, bytes, byteIndex); +#if !BIGENDIAN + public override byte[] GetBytes(String s) + => EncodingForwarder.GetBytesAsciiFastPath(this, s); + + public override int GetBytes(String s, int charIndex, int charCount, byte[] bytes, int byteIndex) + => EncodingForwarder.GetBytesAsciiFastPath(this, s, charIndex, charCount, bytes, byteIndex); + + public override byte[] GetBytes(char[] chars) { + if (chars == null) + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars, ExceptionResource.ArgumentNull_Array); + return EncodingForwarder.GetBytesAsciiFastPath(this, chars, 0, chars.Length); } + public override byte[] GetBytes(char[] chars, int index, int count) + => EncodingForwarder.GetBytesAsciiFastPath(this, chars, index, count); + // Encodes a range of characters in a character array into a range of bytes // in a byte array. An exception occurs if the byte array is not large // enough to hold the complete encoding of the characters. The @@ -151,19 +161,30 @@ namespace System.Text // Alternatively, the GetMaxByteCount method can be used to // determine the maximum number of bytes that will be produced for a given // number of characters, regardless of the actual character values. + public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) + => EncodingForwarder.GetBytesAsciiFastPath(this, chars, charIndex, charCount, bytes, byteIndex); + + [CLSCompliant(false)] + [System.Runtime.InteropServices.ComVisible(false)] + public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount) + => EncodingForwarder.GetBytesAsciiFastPath(this, chars, charCount, bytes, byteCount); + + internal override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS encoder) + => EncodingForwarder.GetBytesAsciiFastPath(this, chars, charCount, bytes, byteCount, encoder); +#else + public override int GetBytes(String chars, int charIndex, int charCount, + byte[] bytes, int byteIndex) + => EncodingForwarder.GetBytes(this, chars, charIndex, charCount, bytes, byteIndex); public override int GetBytes(char[] chars, int charIndex, int charCount, - byte[] bytes, int byteIndex) - { - return EncodingForwarder.GetBytes(this, chars, charIndex, charCount, bytes, byteIndex); - } + byte[] bytes, int byteIndex) + => EncodingForwarder.GetBytes(this, chars, charIndex, charCount, bytes, byteIndex); [CLSCompliant(false)] [System.Runtime.InteropServices.ComVisible(false)] public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount) - { - return EncodingForwarder.GetBytes(this, chars, charCount, bytes, byteCount); - } + => EncodingForwarder.GetBytes(this, chars, charCount, bytes, byteCount); +#endif // !BIGENDIAN // Returns the number of characters produced by decoding a range of bytes // in a byte array. @@ -580,8 +601,13 @@ namespace System.Text // Our workhorse // Note: We ignore mismatched surrogates, unless the exception flag is set in which case we throw +#if !BIGENDIAN + internal override unsafe int GetBytesFallback(char* chars, int charCount, + byte* bytes, int byteCount, EncoderNLS baseEncoder) +#else internal override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS baseEncoder) +#endif { Debug.Assert(chars!=null, "[UTF8Encoding.GetBytes]chars!=null"); Debug.Assert(byteCount >=0, "[UTF8Encoding.GetBytes]byteCount >=0"); diff --git a/src/coreclr/src/mscorlib/src/System/ThrowHelper.cs b/src/coreclr/src/mscorlib/src/System/ThrowHelper.cs index f487bf6..8f429dc 100644 --- a/src/coreclr/src/mscorlib/src/System/ThrowHelper.cs +++ b/src/coreclr/src/mscorlib/src/System/ThrowHelper.cs @@ -77,6 +77,11 @@ namespace System { ExceptionResource.ArgumentOutOfRange_Index); } + internal static void ThrowCountArgumentOutOfRange_NeedNonNegNumException() { + throw GetArgumentOutOfRangeException(ExceptionArgument.count, + ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + } + internal static void ThrowIndexArgumentOutOfRange_NeedNonNegNumException() { throw GetArgumentOutOfRangeException(ExceptionArgument.index, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); @@ -205,6 +210,14 @@ namespace System { throw GetInvalidOperationException(ExceptionResource.InvalidOperation_EnumFailedVersion); } + internal static ArgumentNullException GetArgumentNullException(ExceptionArgument argument, ExceptionResource resource) { + throw new ArgumentNullException(GetArgumentName(argument), GetResourceString(resource)); + } + + internal static void ThrowArgumentNullException(ExceptionArgument argument, ExceptionResource resource) { + throw GetArgumentNullException(argument, resource); + } + internal static void ThrowInvalidOperationException_InvalidOperation_EnumOpCantHappen() { throw GetInvalidOperationException(ExceptionResource.InvalidOperation_EnumOpCantHappen); } @@ -225,7 +238,7 @@ namespace System { return new ArgumentException(Environment.GetResourceString("Arg_WrongType", value, targetType), nameof(value)); } - private static ArgumentOutOfRangeException GetArgumentOutOfRangeException(ExceptionArgument argument, ExceptionResource resource) { + internal static ArgumentOutOfRangeException GetArgumentOutOfRangeException(ExceptionArgument argument, ExceptionResource resource) { return new ArgumentOutOfRangeException(GetArgumentName(argument), GetResourceString(resource)); } @@ -361,6 +374,13 @@ namespace System { updateValueFactory, concurrencyLevel, text, + s, + chars, + bytes, + byteIndex, + charIndex, + byteCount, + charCount, } @@ -466,6 +486,9 @@ namespace System { ConcurrentDictionary_ArrayNotLargeEnough, ConcurrentDictionary_ArrayIncorrectType, ConcurrentCollection_SyncRoot_NotSupported, + ArgumentNull_Array, + ArgumentOutOfRange_IndexCountBuffer, + ArgumentNull_String, } } -- 2.7.4