From eae763affd9624fbddf1c30ead52d23651de98e4 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Tue, 8 Aug 2017 17:45:16 -0400 Subject: [PATCH] Add Encoding Span-based APIs (#13269) * Add Encoding Span-based APIs * Address PR feedback --- src/mscorlib/shared/System/Text/Encoding.cs | 44 +++++++++++++++++++ .../shared/System/Text/UTF32Encoding.cs | 7 +++ .../shared/System/Text/UTF8Encoding.cs | 10 ++++- .../shared/System/Text/UnicodeEncoding.cs | 7 +++ 4 files changed, 67 insertions(+), 1 deletion(-) diff --git a/src/mscorlib/shared/System/Text/Encoding.cs b/src/mscorlib/shared/System/Text/Encoding.cs index 4f23d2a4a7..bf8cb03db2 100644 --- a/src/mscorlib/shared/System/Text/Encoding.cs +++ b/src/mscorlib/shared/System/Text/Encoding.cs @@ -382,6 +382,8 @@ namespace System.Text return Array.Empty(); } + public virtual ReadOnlySpan Preamble => GetPreamble(); + private void GetDataItem() { if (_dataItem == null) @@ -733,6 +735,14 @@ namespace System.Text return GetByteCount(arrChar, 0, count); } + public virtual unsafe int GetByteCount(ReadOnlySpan chars) + { + fixed (char* charsPtr = &chars.DangerousGetPinnableReference()) + { + return GetByteCount(charsPtr, chars.Length); + } + } + // For NLS Encodings, workhorse takes an encoder (may be null) // Always validate parameters before calling internal version, which will only assert. internal virtual unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder) @@ -916,6 +926,15 @@ namespace System.Text return byteCount; } + public virtual unsafe int GetBytes(ReadOnlySpan chars, Span bytes) + { + fixed (char* charsPtr = &chars.DangerousGetPinnableReference()) + fixed (byte* bytesPtr = &bytes.DangerousGetPinnableReference()) + { + return GetBytes(charsPtr, chars.Length, bytesPtr, bytes.Length); + } + } + // Returns the number of characters produced by decoding the given byte // array. // @@ -962,6 +981,14 @@ namespace System.Text return GetCharCount(arrbyte, 0, count); } + public virtual unsafe int GetCharCount(ReadOnlySpan bytes) + { + fixed (byte* bytesPtr = &bytes.DangerousGetPinnableReference()) + { + return GetCharCount(bytesPtr, bytes.Length); + } + } + // This is our internal workhorse // Always validate parameters before calling internal version, which will only assert. internal virtual unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder) @@ -1070,6 +1097,14 @@ namespace System.Text return charCount; } + public virtual unsafe int GetChars(ReadOnlySpan bytes, Span chars) + { + fixed (byte* bytesPtr = &bytes.DangerousGetPinnableReference()) + fixed (char* charsPtr = &chars.DangerousGetPinnableReference()) + { + return GetChars(bytesPtr, bytes.Length, charsPtr, chars.Length); + } + } // This is our internal workhorse // Always validate parameters before calling internal version, which will only assert. @@ -1093,6 +1128,15 @@ namespace System.Text return String.CreateStringFromEncoding(bytes, byteCount, this); } + public unsafe string GetString(ReadOnlySpan bytes) + { + fixed (byte* bytesPtr = &bytes.DangerousGetPinnableReference()) + { + return GetString(bytesPtr, bytes.Length); + } + } + + // Returns the code page identifier of this encoding. The returned value is // an integer between 0 and 65535 if the encoding has a code page // identifier, or -1 if the encoding does not represent a code page. diff --git a/src/mscorlib/shared/System/Text/UTF32Encoding.cs b/src/mscorlib/shared/System/Text/UTF32Encoding.cs index 10161d193e..260518e21c 100644 --- a/src/mscorlib/shared/System/Text/UTF32Encoding.cs +++ b/src/mscorlib/shared/System/Text/UTF32Encoding.cs @@ -39,6 +39,9 @@ namespace System.Text internal static readonly UTF32Encoding s_default = new UTF32Encoding(bigEndian: false, byteOrderMark: true); internal static readonly UTF32Encoding s_bigEndianDefault = new UTF32Encoding(bigEndian: true, byteOrderMark: true); + private static readonly byte[] s_bigEndianPreamble = new byte[4] { 0x00, 0x00, 0xFE, 0xFF }; + private static readonly byte[] s_littleEndianPreamble = new byte[4] { 0xFF, 0xFE, 0x00, 0x00 }; + private bool _emitUTF32ByteOrderMark = false; private bool _isThrowException = false; private bool _bigEndian = false; @@ -1177,6 +1180,10 @@ namespace System.Text return Array.Empty(); } + public override ReadOnlySpan Preamble => + GetType() != typeof(UTF32Encoding) ? GetPreamble() : // in case a derived UTF32Encoding overrode GetPreamble + _emitUTF32ByteOrderMark ? (_bigEndian ? s_bigEndianPreamble : s_littleEndianPreamble) : + Array.Empty(); public override bool Equals(Object value) { diff --git a/src/mscorlib/shared/System/Text/UTF8Encoding.cs b/src/mscorlib/shared/System/Text/UTF8Encoding.cs index 02b18935e4..974bf75650 100644 --- a/src/mscorlib/shared/System/Text/UTF8Encoding.cs +++ b/src/mscorlib/shared/System/Text/UTF8Encoding.cs @@ -54,15 +54,19 @@ namespace System.Text internal sealed class UTF8EncodingSealed : UTF8Encoding { public UTF8EncodingSealed(bool encoderShouldEmitUTF8Identifier) : base(encoderShouldEmitUTF8Identifier) { } + + public override ReadOnlySpan Preamble => _emitUTF8Identifier ? s_preamble : Array.Empty(); } // Used by Encoding.UTF8 for lazy initialization // The initialization code will not be run until a static member of the class is referenced internal static readonly UTF8EncodingSealed s_default = new UTF8EncodingSealed(encoderShouldEmitUTF8Identifier: true); + internal static readonly byte[] s_preamble = new byte[3] { 0xEF, 0xBB, 0xBF }; + // Yes, the idea of emitting U+FEFF as a UTF-8 identifier has made it into // the standard. - private bool _emitUTF8Identifier = false; + internal readonly bool _emitUTF8Identifier = false; private bool _isThrowException = false; @@ -2497,6 +2501,10 @@ namespace System.Text return Array.Empty(); } + public override ReadOnlySpan Preamble => + GetType() != typeof(UTF8Encoding) ? GetPreamble() : // in case a derived UTF8Encoding overrode GetPreamble + _emitUTF8Identifier ? s_preamble : + Array.Empty(); public override bool Equals(Object value) { diff --git a/src/mscorlib/shared/System/Text/UnicodeEncoding.cs b/src/mscorlib/shared/System/Text/UnicodeEncoding.cs index 8e44317ce2..78355299c1 100644 --- a/src/mscorlib/shared/System/Text/UnicodeEncoding.cs +++ b/src/mscorlib/shared/System/Text/UnicodeEncoding.cs @@ -20,6 +20,9 @@ namespace System.Text internal static readonly UnicodeEncoding s_bigEndianDefault = new UnicodeEncoding(bigEndian: true, byteOrderMark: true); internal static readonly UnicodeEncoding s_littleEndianDefault = new UnicodeEncoding(bigEndian: false, byteOrderMark: true); + private static readonly byte[] s_bigEndianPreamble = new byte[2] { 0xfe, 0xff }; + private static readonly byte[] s_littleEndianPreamble = new byte[2] { 0xff, 0xfe }; + internal bool isThrowException = false; internal bool bigEndian = false; @@ -1898,6 +1901,10 @@ namespace System.Text return Array.Empty(); } + public override ReadOnlySpan Preamble => + GetType() != typeof(UnicodeEncoding) ? GetPreamble() : // in case a derived UnicodeEncoding overrode GetPreamble + byteOrderMark ? (bigEndian ? s_bigEndianPreamble : s_littleEndianPreamble) : + Array.Empty(); public override int GetMaxByteCount(int charCount) { -- 2.34.1