<Compile Include="$(MSBuildThisFileDirectory)System\Text\ASCIIEncoding.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Decoder.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Encoder.cs" />
+ <Compile Include="$(MSBuildThisFileDirectory)System\Text\Encoding.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingInfo.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingNLS.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingProvider.cs" />
+ <Compile Include="$(MSBuildThisFileDirectory)System\Text\Latin1Encoding.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Normalization.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\StringBuilder.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\UnicodeEncoding.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\UTF32Encoding.cs" />
+ <Compile Include="$(MSBuildThisFileDirectory)System\Text\UTF7Encoding.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\UTF8Encoding.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\ThreadAttributes.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Threading\AbandonedMutexException.cs" />
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Diagnostics;
+using System.Diagnostics.Contracts;
+using System.Threading;
+using System.Runtime.Serialization;
+using System.Diagnostics.CodeAnalysis;
+
+namespace System.Text
+{
+ // This abstract base class represents a character encoding. The class provides
+ // methods to convert arrays and strings of Unicode characters to and from
+ // arrays of bytes. A number of Encoding implementations are provided in
+ // the System.Text package, including:
+ //
+ // ASCIIEncoding, which encodes Unicode characters as single 7-bit
+ // ASCII characters. This encoding only supports character values between 0x00
+ // and 0x7F.
+ // BaseCodePageEncoding, which encapsulates a Windows code page. Any
+ // installed code page can be accessed through this encoding, and conversions
+ // are performed using the WideCharToMultiByte and
+ // MultiByteToWideChar Windows API functions.
+ // UnicodeEncoding, which encodes each Unicode character as two
+ // consecutive bytes. Both little-endian (code page 1200) and big-endian (code
+ // page 1201) encodings are recognized.
+ // UTF7Encoding, which encodes Unicode characters using the UTF-7
+ // encoding (UTF-7 stands for UCS Transformation Format, 7-bit form). This
+ // encoding supports all Unicode character values, and can also be accessed
+ // as code page 65000.
+ // UTF8Encoding, which encodes Unicode characters using the UTF-8
+ // encoding (UTF-8 stands for UCS Transformation Format, 8-bit form). This
+ // encoding supports all Unicode character values, and can also be accessed
+ // as code page 65001.
+ // UTF32Encoding, both 12000 (little endian) & 12001 (big endian)
+ //
+ // In addition to directly instantiating Encoding objects, an
+ // application can use the ForCodePage, GetASCII,
+ // GetDefault, GetUnicode, GetUTF7, and GetUTF8
+ // methods in this class to obtain encodings.
+ //
+ // Through an encoding, the GetBytes method is used to convert arrays
+ // of characters to arrays of bytes, and the GetChars method is used to
+ // convert arrays of bytes to arrays of characters. The GetBytes and
+ // GetChars methods maintain no state between conversions, and are
+ // generally intended for conversions of complete blocks of bytes and
+ // characters in one operation. When the data to be converted is only available
+ // in sequential blocks (such as data read from a stream) or when the amount of
+ // data is so large that it needs to be divided into smaller blocks, an
+ // application may choose to use a Decoder or an Encoder to
+ // perform the conversion. Decoders and encoders allow sequential blocks of
+ // data to be converted and they maintain the state required to support
+ // conversions of data that spans adjacent blocks. Decoders and encoders are
+ // obtained using the GetDecoder and GetEncoder methods.
+ //
+ // The core GetBytes and GetChars methods require the caller
+ // to provide the destination buffer and ensure that the buffer is large enough
+ // to hold the entire result of the conversion. When using these methods,
+ // either directly on an Encoding object or on an associated
+ // Decoder or Encoder, an application can use one of two methods
+ // to allocate destination buffers.
+ //
+ // The GetByteCount and GetCharCount methods can be used to
+ // compute the exact size of the result of a particular conversion, and an
+ // appropriately sized buffer for that conversion can then be allocated.
+ // The GetMaxByteCount and GetMaxCharCount methods can be
+ // be used to compute the maximum possible size of a conversion of a given
+ // number of bytes or characters, and a buffer of that size can then be reused
+ // for multiple conversions.
+ //
+ // The first method generally uses less memory, whereas the second method
+ // generally executes faster.
+ //
+
+ public abstract class Encoding : ICloneable
+ {
+ // For netcore we use UTF8 as default encoding since ANSI isn't available
+ private static readonly UTF8Encoding.UTF8EncodingSealed s_defaultEncoding = new UTF8Encoding.UTF8EncodingSealed(encoderShouldEmitUTF8Identifier: false);
+
+ // Returns an encoding for the system's current ANSI code page.
+ public static Encoding Default => s_defaultEncoding;
+
+ //
+ // The following values are from mlang.idl. These values
+ // should be in sync with those in mlang.idl.
+ //
+ internal const int MIMECONTF_MAILNEWS = 0x00000001;
+ internal const int MIMECONTF_BROWSER = 0x00000002;
+ internal const int MIMECONTF_SAVABLE_MAILNEWS = 0x00000100;
+ internal const int MIMECONTF_SAVABLE_BROWSER = 0x00000200;
+
+ // Special Case Code Pages
+ private const int CodePageDefault = 0;
+ private const int CodePageNoOEM = 1; // OEM Code page not supported
+ private const int CodePageNoMac = 2; // MAC code page not supported
+ private const int CodePageNoThread = 3; // Thread code page not supported
+ private const int CodePageNoSymbol = 42; // Symbol code page not supported
+ private const int CodePageUnicode = 1200; // Unicode
+ private const int CodePageBigEndian = 1201; // Big Endian Unicode
+ private const int CodePageWindows1252 = 1252; // Windows 1252 code page
+
+ // 20936 has same code page as 10008, so we'll special case it
+ private const int CodePageMacGB2312 = 10008;
+ private const int CodePageGB2312 = 20936;
+ private const int CodePageMacKorean = 10003;
+ private const int CodePageDLLKorean = 20949;
+
+ // ISO 2022 Code Pages
+ private const int ISO2022JP = 50220;
+ private const int ISO2022JPESC = 50221;
+ private const int ISO2022JPSISO = 50222;
+ private const int ISOKorean = 50225;
+ private const int ISOSimplifiedCN = 50227;
+ private const int EUCJP = 51932;
+ private const int ChineseHZ = 52936; // HZ has ~}~{~~ sequences
+
+ // 51936 is the same as 936
+ private const int DuplicateEUCCN = 51936;
+ private const int EUCCN = 936;
+
+ private const int EUCKR = 51949;
+
+ // Latin 1 & ASCII Code Pages
+ internal const int CodePageASCII = 20127; // ASCII
+ internal const int ISO_8859_1 = 28591; // Latin1
+
+ // ISCII
+ private const int ISCIIAssemese = 57006;
+ private const int ISCIIBengali = 57003;
+ private const int ISCIIDevanagari = 57002;
+ private const int ISCIIGujarathi = 57010;
+ private const int ISCIIKannada = 57008;
+ private const int ISCIIMalayalam = 57009;
+ private const int ISCIIOriya = 57007;
+ private const int ISCIIPanjabi = 57011;
+ private const int ISCIITamil = 57004;
+ private const int ISCIITelugu = 57005;
+
+ // GB18030
+ private const int GB18030 = 54936;
+
+ // Other
+ private const int ISO_8859_8I = 38598;
+ private const int ISO_8859_8_Visual = 28598;
+
+ // 50229 is currently unsupported // "Chinese Traditional (ISO-2022)"
+ private const int ENC50229 = 50229;
+
+ // Special code pages
+ private const int CodePageUTF7 = 65000;
+ private const int CodePageUTF8 = 65001;
+ private const int CodePageUTF32 = 12000;
+ private const int CodePageUTF32BE = 12001;
+
+ internal int m_codePage = 0;
+
+ // dataItem should be internal (not private). otherwise it will break during the deserialization
+ // of the data came from Everett
+ internal CodePageDataItem dataItem = null;
+
+ [NonSerialized]
+ internal bool m_deserializedFromEverett = false;
+
+ // Because of encoders we may be read only
+ [OptionalField(VersionAdded = 2)]
+ private bool m_isReadOnly = true;
+
+ // Encoding (encoder) fallback
+ [OptionalField(VersionAdded = 2)]
+ internal EncoderFallback encoderFallback = null;
+ [OptionalField(VersionAdded = 2)]
+ internal DecoderFallback decoderFallback = null;
+
+ protected Encoding() : this(0)
+ {
+ }
+
+
+ protected Encoding(int codePage)
+ {
+ // Validate code page
+ if (codePage < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(codePage));
+ }
+ Contract.EndContractBlock();
+
+ // Remember code page
+ m_codePage = codePage;
+
+ // Use default encoder/decoder fallbacks
+ this.SetDefaultFallbacks();
+ }
+
+ // This constructor is needed to allow any sub-classing implementation to provide encoder/decoder fallback objects
+ // because the encoding object is always created as read-only object and don't allow setting encoder/decoder fallback
+ // after the creation is done.
+ protected Encoding(int codePage, EncoderFallback encoderFallback, DecoderFallback decoderFallback)
+ {
+ // Validate code page
+ if (codePage < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(codePage));
+ }
+ Contract.EndContractBlock();
+
+ // Remember code page
+ m_codePage = codePage;
+
+ this.encoderFallback = encoderFallback ?? new InternalEncoderBestFitFallback(this);
+ this.decoderFallback = decoderFallback ?? new InternalDecoderBestFitFallback(this);
+ }
+
+ // Default fallback that we'll use.
+ internal virtual void SetDefaultFallbacks()
+ {
+ // For UTF-X encodings, we use a replacement fallback with an "\xFFFD" string,
+ // For ASCII we use "?" replacement fallback, etc.
+ this.encoderFallback = new InternalEncoderBestFitFallback(this);
+ this.decoderFallback = new InternalDecoderBestFitFallback(this);
+ }
+
+
+ #region Serialization
+ internal void OnDeserializing()
+ {
+ // intialize the optional Whidbey fields
+ encoderFallback = null;
+ decoderFallback = null;
+ m_isReadOnly = true;
+ }
+
+ internal void OnDeserialized()
+ {
+ if (encoderFallback == null || decoderFallback == null)
+ {
+ m_deserializedFromEverett = true;
+ SetDefaultFallbacks();
+ }
+
+ // dataItem is always recalculated from the code page #
+ dataItem = null;
+ }
+
+ [OnDeserializing]
+ private void OnDeserializing(StreamingContext ctx)
+ {
+ OnDeserializing();
+ }
+
+
+ [OnDeserialized]
+ private void OnDeserialized(StreamingContext ctx)
+ {
+ OnDeserialized();
+ }
+
+ [OnSerializing]
+ private void OnSerializing(StreamingContext ctx)
+ {
+ // to be consistent with SerializeEncoding
+ dataItem = null;
+ }
+
+ // the following two methods are used for the inherited classes which implemented ISerializable
+ // Deserialization Helper
+ internal void DeserializeEncoding(SerializationInfo info, StreamingContext context)
+ {
+ // Any info?
+ if (info == null) throw new ArgumentNullException(nameof(info));
+ Contract.EndContractBlock();
+
+ // All versions have a code page
+ this.m_codePage = (int)info.GetValue("m_codePage", typeof(int));
+
+ // We can get dataItem on the fly if needed, and the index is different between versions
+ // so ignore whatever dataItem data we get from Everett.
+ this.dataItem = null;
+
+ // See if we have a code page
+ try
+ {
+ //
+ // Try Whidbey V2.0 Fields
+ //
+
+ m_isReadOnly = (bool)info.GetValue("m_isReadOnly", typeof(bool));
+
+ this.encoderFallback = (EncoderFallback)info.GetValue("encoderFallback", typeof(EncoderFallback));
+ this.decoderFallback = (DecoderFallback)info.GetValue("decoderFallback", typeof(DecoderFallback));
+ }
+ catch (SerializationException)
+ {
+ //
+ // Didn't have Whidbey things, must be Everett
+ //
+ this.m_deserializedFromEverett = true;
+
+ // May as well be read only
+ m_isReadOnly = true;
+ SetDefaultFallbacks();
+ }
+ }
+
+ // Serialization Helper
+ internal void SerializeEncoding(SerializationInfo info, StreamingContext context)
+ {
+ // Any Info?
+ if (info == null) throw new ArgumentNullException(nameof(info));
+ Contract.EndContractBlock();
+
+ // These are new V2.0 Whidbey stuff
+ info.AddValue("m_isReadOnly", m_isReadOnly);
+ info.AddValue("encoderFallback", this.EncoderFallback);
+ info.AddValue("decoderFallback", this.DecoderFallback);
+
+ // These were in Everett V1.1 as well
+ info.AddValue("m_codePage", this.m_codePage);
+
+ // This was unique to Everett V1.1
+ info.AddValue("dataItem", null);
+
+ // Everett duplicated these fields, so these are needed for portability
+ info.AddValue("Encoding+m_codePage", this.m_codePage);
+ info.AddValue("Encoding+dataItem", null);
+ }
+
+ #endregion Serialization
+
+ // Converts a byte array from one encoding to another. The bytes in the
+ // bytes array are converted from srcEncoding to
+ // dstEncoding, and the returned value is a new byte array
+ // containing the result of the conversion.
+ //
+ [Pure]
+ public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
+ byte[] bytes)
+ {
+ if (bytes == null)
+ throw new ArgumentNullException(nameof(bytes));
+ Contract.Ensures(Contract.Result<byte[]>() != null);
+
+ return Convert(srcEncoding, dstEncoding, bytes, 0, bytes.Length);
+ }
+
+ // Converts a range of bytes in a byte array from one encoding to another.
+ // This method converts count bytes from bytes starting at
+ // index index from srcEncoding to dstEncoding, and
+ // returns a new byte array containing the result of the conversion.
+ //
+ [Pure]
+ public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
+ byte[] bytes, int index, int count)
+ {
+ if (srcEncoding == null || dstEncoding == null)
+ {
+ throw new ArgumentNullException((srcEncoding == null ? nameof(srcEncoding) : nameof(dstEncoding)),
+ SR.ArgumentNull_Array);
+ }
+ if (bytes == null)
+ {
+ throw new ArgumentNullException(nameof(bytes),
+ SR.ArgumentNull_Array);
+ }
+ Contract.Ensures(Contract.Result<byte[]>() != null);
+
+ return dstEncoding.GetBytes(srcEncoding.GetChars(bytes, index, count));
+ }
+
+ public static void RegisterProvider(EncodingProvider provider)
+ {
+ // Parameters validated inside EncodingProvider
+ EncodingProvider.AddProvider(provider);
+ }
+
+ [Pure]
+ public static Encoding GetEncoding(int codepage)
+ {
+ Encoding result = EncodingProvider.GetEncodingFromProvider(codepage);
+ if (result != null)
+ return result;
+
+ //
+ // NOTE: If you add a new encoding that can be retrieved by codepage, be sure to
+ // add the corresponding item in EncodingTable.
+ // Otherwise, the code below will throw exception when trying to call
+ // EncodingTable.GetDataItem().
+ //
+ if (codepage < 0 || codepage > 65535)
+ {
+ throw new ArgumentOutOfRangeException(
+ nameof(codepage), SR.Format(SR.ArgumentOutOfRange_Range, 0, 65535));
+ }
+
+ Contract.EndContractBlock();
+
+ switch (codepage)
+ {
+ case CodePageDefault: return Default; // 0
+ case CodePageUnicode: return Unicode; // 1200
+ case CodePageBigEndian: return BigEndianUnicode; // 1201
+ case CodePageUTF32: return UTF32; // 12000
+ case CodePageUTF32BE: return BigEndianUTF32; // 12001
+ case CodePageUTF7: return UTF7; // 65000
+ case CodePageUTF8: return UTF8; // 65001
+ case CodePageASCII: return ASCII; // 20127
+ case ISO_8859_1: return Latin1; // 28591
+
+ // We don't allow the following special code page values that Win32 allows.
+ case CodePageNoOEM: // 1 CP_OEMCP
+ case CodePageNoMac: // 2 CP_MACCP
+ case CodePageNoThread: // 3 CP_THREAD_ACP
+ case CodePageNoSymbol: // 42 CP_SYMBOL
+ throw new ArgumentException(SR.Format(SR.Argument_CodepageNotSupported, codepage), nameof(codepage));
+ }
+
+ // Is it a valid code page?
+ if (EncodingTable.GetCodePageDataItem(codepage) == null)
+ {
+ throw new NotSupportedException(
+ SR.Format(SR.NotSupported_NoCodepageData, codepage));
+ }
+
+ return UTF8;
+ }
+
+ [Pure]
+ public static Encoding GetEncoding(int codepage,
+ EncoderFallback encoderFallback, DecoderFallback decoderFallback)
+ {
+ Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(codepage, encoderFallback, decoderFallback);
+
+ if (baseEncoding != null)
+ return baseEncoding;
+
+ // Get the default encoding (which is cached and read only)
+ baseEncoding = GetEncoding(codepage);
+
+ // Clone it and set the fallback
+ Encoding fallbackEncoding = (Encoding)baseEncoding.Clone();
+ fallbackEncoding.EncoderFallback = encoderFallback;
+ fallbackEncoding.DecoderFallback = decoderFallback;
+
+ return fallbackEncoding;
+ }
+
+ // Returns an Encoding object for a given name or a given code page value.
+ //
+ [Pure]
+ public static Encoding GetEncoding(String name)
+ {
+ Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(name);
+ if (baseEncoding != null)
+ return baseEncoding;
+
+ //
+ // NOTE: If you add a new encoding that can be requested by name, be sure to
+ // add the corresponding item in EncodingTable.
+ // Otherwise, the code below will throw exception when trying to call
+ // EncodingTable.GetCodePageFromName().
+ //
+ return GetEncoding(EncodingTable.GetCodePageFromName(name));
+ }
+
+ // Returns an Encoding object for a given name or a given code page value.
+ //
+ [Pure]
+ public static Encoding GetEncoding(String name,
+ EncoderFallback encoderFallback, DecoderFallback decoderFallback)
+ {
+ Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(name, encoderFallback, decoderFallback);
+ if (baseEncoding != null)
+ return baseEncoding;
+
+ //
+ // NOTE: If you add a new encoding that can be requested by name, be sure to
+ // add the corresponding item in EncodingTable.
+ // Otherwise, the code below will throw exception when trying to call
+ // EncodingTable.GetCodePageFromName().
+ //
+ return (GetEncoding(EncodingTable.GetCodePageFromName(name), encoderFallback, decoderFallback));
+ }
+
+ // Return a list of all EncodingInfo objects describing all of our encodings
+ [Pure]
+ public static EncodingInfo[] GetEncodings()
+ {
+ return EncodingTable.GetEncodings();
+ }
+
+ [Pure]
+ public virtual byte[] GetPreamble()
+ {
+ return Array.Empty<byte>();
+ }
+
+ private void GetDataItem()
+ {
+ if (dataItem == null)
+ {
+ dataItem = EncodingTable.GetCodePageDataItem(m_codePage);
+ if (dataItem == null)
+ {
+ throw new NotSupportedException(
+ SR.Format(SR.NotSupported_NoCodepageData, m_codePage));
+ }
+ }
+ }
+
+ // Returns the name for this encoding that can be used with mail agent body tags.
+ // If the encoding may not be used, the string is empty.
+
+ public virtual String BodyName
+ {
+ get
+ {
+ if (dataItem == null)
+ {
+ GetDataItem();
+ }
+ return (dataItem.BodyName);
+ }
+ }
+
+ // Returns the human-readable description of the encoding ( e.g. Hebrew (DOS)).
+#if PROJECTN
+ public virtual String EncodingName
+ {
+ get
+ {
+ string encodingName = GetLocalizedEncodingNameResource(this.CodePage);
+ if (encodingName == null)
+ {
+ throw new NotSupportedException(SR.Format(SR.MissingEncodingNameResource, this.CodePage));
+ }
+
+ if (encodingName.StartsWith("Globalization_cp_", StringComparison.Ordinal))
+ {
+ // On ProjectN, resource strings are stripped from retail builds and replaced by
+ // their identifier names. Since this property is meant to be a localized string,
+ // but we don't localize ProjectN, we specifically need to do something reasonable
+ // in this case. This currently returns the English name of the encoding from a
+ // static data table.
+ encodingName = EncodingTable.GetCodePageDataItem(this.CodePage).EnglishName;
+ if (encodingName == null)
+ {
+ throw new NotSupportedException(SR.Format(SR.MissingEncodingNameResource, this.WebName, this.CodePage));
+ }
+ }
+ return encodingName;
+ }
+ }
+
+ private static string GetLocalizedEncodingNameResource(int codePage)
+ {
+ switch (codePage)
+ {
+ case 1200: return SR.Globalization_cp_1200;
+ case 1201: return SR.Globalization_cp_1201;
+ case 12000: return SR.Globalization_cp_12000;
+ case 12001: return SR.Globalization_cp_12001;
+ case 20127: return SR.Globalization_cp_20127;
+ case 28591: return SR.Globalization_cp_28591;
+ case 65000: return SR.Globalization_cp_65000;
+ case 65001: return SR.Globalization_cp_65001;
+ default: return null;
+ }
+ }
+#else
+ public virtual String EncodingName
+ {
+ get
+ {
+ return SR.GetResourceString("Globalization_cp_" + m_codePage.ToString());
+ }
+ }
+#endif
+ // Returns the name for this encoding that can be used with mail agent header
+ // tags. If the encoding may not be used, the string is empty.
+
+ public virtual String HeaderName
+ {
+ get
+ {
+ if (dataItem == null)
+ {
+ GetDataItem();
+ }
+ return (dataItem.HeaderName);
+ }
+ }
+
+ // Returns the IANA preferred name for this encoding.
+ public virtual String WebName
+ {
+ get
+ {
+ if (dataItem == null)
+ {
+ GetDataItem();
+ }
+ return (dataItem.WebName);
+ }
+ }
+
+ // Returns the windows code page that most closely corresponds to this encoding.
+
+ public virtual int WindowsCodePage
+ {
+ get
+ {
+ if (dataItem == null)
+ {
+ GetDataItem();
+ }
+ return (dataItem.UIFamilyCodePage);
+ }
+ }
+
+
+ // True if and only if the encoding is used for display by browsers clients.
+
+ public virtual bool IsBrowserDisplay
+ {
+ get
+ {
+ if (dataItem == null)
+ {
+ GetDataItem();
+ }
+ return ((dataItem.Flags & MIMECONTF_BROWSER) != 0);
+ }
+ }
+
+ // True if and only if the encoding is used for saving by browsers clients.
+
+ public virtual bool IsBrowserSave
+ {
+ get
+ {
+ if (dataItem == null)
+ {
+ GetDataItem();
+ }
+ return ((dataItem.Flags & MIMECONTF_SAVABLE_BROWSER) != 0);
+ }
+ }
+
+ // True if and only if the encoding is used for display by mail and news clients.
+
+ public virtual bool IsMailNewsDisplay
+ {
+ get
+ {
+ if (dataItem == null)
+ {
+ GetDataItem();
+ }
+ return ((dataItem.Flags & MIMECONTF_MAILNEWS) != 0);
+ }
+ }
+
+
+ // True if and only if the encoding is used for saving documents by mail and
+ // news clients
+
+ public virtual bool IsMailNewsSave
+ {
+ get
+ {
+ if (dataItem == null)
+ {
+ GetDataItem();
+ }
+ return ((dataItem.Flags & MIMECONTF_SAVABLE_MAILNEWS) != 0);
+ }
+ }
+
+ // True if and only if the encoding only uses single byte code points. (Ie, ASCII, 1252, etc)
+
+ public virtual bool IsSingleByte
+ {
+ get
+ {
+ return false;
+ }
+ }
+
+
+ public EncoderFallback EncoderFallback
+ {
+ get
+ {
+ return encoderFallback;
+ }
+
+ set
+ {
+ if (this.IsReadOnly)
+ throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
+
+ if (value == null)
+ throw new ArgumentNullException(nameof(value));
+ Contract.EndContractBlock();
+
+ encoderFallback = value;
+ }
+ }
+
+
+ public DecoderFallback DecoderFallback
+ {
+ get
+ {
+ return decoderFallback;
+ }
+
+ set
+ {
+ if (this.IsReadOnly)
+ throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
+
+ if (value == null)
+ throw new ArgumentNullException(nameof(value));
+ Contract.EndContractBlock();
+
+ decoderFallback = value;
+ }
+ }
+
+
+ public virtual Object Clone()
+ {
+ Encoding newEncoding = (Encoding)this.MemberwiseClone();
+
+ // New one should be readable
+ newEncoding.m_isReadOnly = false;
+ return newEncoding;
+ }
+
+
+ public bool IsReadOnly
+ {
+ get
+ {
+ return (m_isReadOnly);
+ }
+ }
+
+ // Returns an encoding for the ASCII character set. The returned encoding
+ // will be an instance of the ASCIIEncoding class.
+
+ public static Encoding ASCII => ASCIIEncoding.s_default;
+
+ // Returns an encoding for the Latin1 character set. The returned encoding
+ // will be an instance of the Latin1Encoding class.
+ //
+ // This is for our optimizations
+ private static Encoding Latin1 => Latin1Encoding.s_default;
+
+ // Returns the number of bytes required to encode the given character
+ // array.
+ //
+ [Pure]
+ public virtual int GetByteCount(char[] chars)
+ {
+ if (chars == null)
+ {
+ throw new ArgumentNullException(nameof(chars),
+ SR.ArgumentNull_Array);
+ }
+ Contract.EndContractBlock();
+
+ return GetByteCount(chars, 0, chars.Length);
+ }
+
+ [Pure]
+ public virtual int GetByteCount(String s)
+ {
+ if (s == null)
+ throw new ArgumentNullException(nameof(s));
+ Contract.EndContractBlock();
+
+ char[] chars = s.ToCharArray();
+ return GetByteCount(chars, 0, chars.Length);
+ }
+
+ // Returns the number of bytes required to encode a range of characters in
+ // a character array.
+ //
+ [Pure]
+ public abstract int GetByteCount(char[] chars, int index, int count);
+
+ // Returns the number of bytes required to encode a string range.
+ //
+ [Pure]
+ public int GetByteCount(string s, int index, int count)
+ {
+ if (s == null)
+ throw new ArgumentNullException(nameof(s),
+ SR.ArgumentNull_String);
+ if (index < 0)
+ throw new ArgumentOutOfRangeException(nameof(index),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ if (count < 0)
+ throw new ArgumentOutOfRangeException(nameof(count),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ if (index > s.Length - count)
+ throw new ArgumentOutOfRangeException(nameof(index),
+ SR.ArgumentOutOfRange_IndexCount);
+ Contract.EndContractBlock();
+
+ unsafe
+ {
+ fixed (char* pChar = s)
+ {
+ return GetByteCount(pChar + index, count);
+ }
+ }
+ }
+
+ // We expect this to be the workhorse for NLS encodings
+ // unfortunately for existing overrides, it has to call the [] version,
+ // which is really slow, so this method should be avoided if you're calling
+ // a 3rd party encoding.
+ [Pure]
+ [CLSCompliant(false)]
+ public virtual unsafe int GetByteCount(char* chars, int count)
+ {
+ // Validate input parameters
+ if (chars == null)
+ throw new ArgumentNullException(nameof(chars),
+ SR.ArgumentNull_Array);
+
+ if (count < 0)
+ throw new ArgumentOutOfRangeException(nameof(count),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ char[] arrChar = new char[count];
+ int index;
+
+ for (index = 0; index < count; index++)
+ arrChar[index] = chars[index];
+
+ return GetByteCount(arrChar, 0, count);
+ }
+
+ // For NLS Encodings, workhorse takes an encoder (may be null)
+ // Always validate parameters before calling internal version, which will only assert.
+ internal virtual unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
+ {
+ Debug.Assert(chars != null);
+ Debug.Assert(count >= 0);
+
+ return GetByteCount(chars, count);
+ }
+
+ // Returns a byte array containing the encoded representation of the given
+ // character array.
+ //
+ [Pure]
+ public virtual byte[] GetBytes(char[] chars)
+ {
+ if (chars == null)
+ {
+ throw new ArgumentNullException(nameof(chars),
+ SR.ArgumentNull_Array);
+ }
+ Contract.EndContractBlock();
+ return GetBytes(chars, 0, chars.Length);
+ }
+
+ // Returns a byte array containing the encoded representation of a range
+ // of characters in a character array.
+ //
+ [Pure]
+ public virtual byte[] GetBytes(char[] chars, int index, int count)
+ {
+ byte[] result = new byte[GetByteCount(chars, index, count)];
+ GetBytes(chars, index, count, result, 0);
+ return result;
+ }
+
+ // Encodes a range of characters in a character array into a range of bytes
+ // in a byte array. An exception occurs if the byte array is not large
+ // enough to hold the complete encoding of the characters. The
+ // GetByteCount method can be used to determine the exact number of
+ // bytes that will be produced for a given range of characters.
+ // Alternatively, the GetMaxByteCount method can be used to
+ // determine the maximum number of bytes that will be produced for a given
+ // number of characters, regardless of the actual character values.
+ //
+ public abstract int GetBytes(char[] chars, int charIndex, int charCount,
+ byte[] bytes, int byteIndex);
+
+ // Returns a byte array containing the encoded representation of the given
+ // string.
+ //
+ [Pure]
+ public virtual byte[] GetBytes(String s)
+ {
+ if (s == null)
+ throw new ArgumentNullException(nameof(s),
+ SR.ArgumentNull_String);
+ Contract.EndContractBlock();
+
+ int byteCount = GetByteCount(s);
+ byte[] bytes = new byte[byteCount];
+ int bytesReceived = GetBytes(s, 0, s.Length, bytes, 0);
+ Debug.Assert(byteCount == bytesReceived);
+ return bytes;
+ }
+
+ // Returns a byte array containing the encoded representation of the given
+ // string range.
+ //
+ [Pure]
+ public byte[] GetBytes(string s, int index, int count)
+ {
+ if (s == null)
+ throw new ArgumentNullException(nameof(s),
+ SR.ArgumentNull_String);
+ if (index < 0)
+ throw new ArgumentOutOfRangeException(nameof(index),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ if (count < 0)
+ throw new ArgumentOutOfRangeException(nameof(count),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ if (index > s.Length - count)
+ throw new ArgumentOutOfRangeException(nameof(index),
+ SR.ArgumentOutOfRange_IndexCount);
+ Contract.EndContractBlock();
+
+ unsafe
+ {
+ fixed (char* pChar = s)
+ {
+ int byteCount = GetByteCount(pChar + index, count);
+ if (byteCount == 0)
+ return Array.Empty<byte>();
+
+ byte[] bytes = new byte[byteCount];
+ fixed (byte* pBytes = &bytes[0])
+ {
+ int bytesReceived = GetBytes(pChar + index, count, pBytes, byteCount);
+ Debug.Assert(byteCount == bytesReceived);
+ }
+ return bytes;
+ }
+ }
+ }
+
+ public virtual int GetBytes(String s, int charIndex, int charCount,
+ byte[] bytes, int byteIndex)
+ {
+ if (s == null)
+ throw new ArgumentNullException(nameof(s));
+ Contract.EndContractBlock();
+ return GetBytes(s.ToCharArray(), charIndex, charCount, bytes, byteIndex);
+ }
+
+ // This is our internal workhorse
+ // Always validate parameters before calling internal version, which will only assert.
+ internal virtual unsafe int GetBytes(char* chars, int charCount,
+ byte* bytes, int byteCount, EncoderNLS encoder)
+ {
+ return GetBytes(chars, charCount, bytes, byteCount);
+ }
+
+ // We expect this to be the workhorse for NLS Encodings, but for existing
+ // ones we need a working (if slow) default implementation)
+ //
+ // WARNING WARNING WARNING
+ //
+ // WARNING: If this breaks it could be a security threat. Obviously we
+ // call this internally, so you need to make sure that your pointers, counts
+ // and indexes are correct when you call this method.
+ //
+ // In addition, we have internal code, which will be marked as "safe" calling
+ // this code. However this code is dependent upon the implementation of an
+ // external GetBytes() method, which could be overridden by a third party and
+ // the results of which cannot be guaranteed. We use that result to copy
+ // the byte[] to our byte* output buffer. If the result count was wrong, we
+ // could easily overflow our output buffer. Therefore we do an extra test
+ // when we copy the buffer so that we don't overflow byteCount either.
+
+ [CLSCompliant(false)]
+ public virtual unsafe int GetBytes(char* chars, int charCount,
+ byte* bytes, int byteCount)
+ {
+ // Validate input parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
+ SR.ArgumentNull_Array);
+
+ if (charCount < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Get the char array to convert
+ char[] arrChar = new char[charCount];
+
+ int index;
+ for (index = 0; index < charCount; index++)
+ arrChar[index] = chars[index];
+
+ // Get the byte array to fill
+ byte[] arrByte = new byte[byteCount];
+
+ // Do the work
+ int result = GetBytes(arrChar, 0, charCount, arrByte, 0);
+
+ Debug.Assert(result <= byteCount, "[Encoding.GetBytes]Returned more bytes than we have space for");
+
+ // Copy the byte array
+ // WARNING: We MUST make sure that we don't copy too many bytes. We can't
+ // rely on result because it could be a 3rd party implementation. We need
+ // to make sure we never copy more than byteCount bytes no matter the value
+ // of result
+ if (result < byteCount)
+ byteCount = result;
+
+ // Copy the data, don't overrun our array!
+ for (index = 0; index < byteCount; index++)
+ bytes[index] = arrByte[index];
+
+ return byteCount;
+ }
+
+ // Returns the number of characters produced by decoding the given byte
+ // array.
+ //
+ [Pure]
+ public virtual int GetCharCount(byte[] bytes)
+ {
+ if (bytes == null)
+ {
+ throw new ArgumentNullException(nameof(bytes),
+ SR.ArgumentNull_Array);
+ }
+ Contract.EndContractBlock();
+ return GetCharCount(bytes, 0, bytes.Length);
+ }
+
+ // Returns the number of characters produced by decoding a range of bytes
+ // in a byte array.
+ //
+ [Pure]
+ public abstract int GetCharCount(byte[] bytes, int index, int count);
+
+ // We expect this to be the workhorse for NLS Encodings, but for existing
+ // ones we need a working (if slow) default implementation)
+ [Pure]
+ [CLSCompliant(false)]
+ public virtual unsafe int GetCharCount(byte* bytes, int count)
+ {
+ // Validate input parameters
+ if (bytes == null)
+ throw new ArgumentNullException(nameof(bytes),
+ SR.ArgumentNull_Array);
+
+ if (count < 0)
+ throw new ArgumentOutOfRangeException(nameof(count),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ byte[] arrbyte = new byte[count];
+ int index;
+
+ for (index = 0; index < count; index++)
+ arrbyte[index] = bytes[index];
+
+ return GetCharCount(arrbyte, 0, count);
+ }
+
+ // This is our internal workhorse
+ // Always validate parameters before calling internal version, which will only assert.
+ internal virtual unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder)
+ {
+ return GetCharCount(bytes, count);
+ }
+
+ // Returns a character array containing the decoded representation of a
+ // given byte array.
+ //
+ [Pure]
+ public virtual char[] GetChars(byte[] bytes)
+ {
+ if (bytes == null)
+ {
+ throw new ArgumentNullException(nameof(bytes),
+ SR.ArgumentNull_Array);
+ }
+ Contract.EndContractBlock();
+ return GetChars(bytes, 0, bytes.Length);
+ }
+
+ // Returns a character array containing the decoded representation of a
+ // range of bytes in a byte array.
+ //
+ [Pure]
+ public virtual char[] GetChars(byte[] bytes, int index, int count)
+ {
+ char[] result = new char[GetCharCount(bytes, index, count)];
+ GetChars(bytes, index, count, result, 0);
+ return result;
+ }
+
+ // Decodes a range of bytes in a byte array into a range of characters in a
+ // character array. An exception occurs if the character array is not large
+ // enough to hold the complete decoding of the bytes. The
+ // GetCharCount method can be used to determine the exact number of
+ // characters that will be produced for a given range of bytes.
+ // Alternatively, the GetMaxCharCount method can be used to
+ // determine the maximum number of characterss that will be produced for a
+ // given number of bytes, regardless of the actual byte values.
+ //
+
+ public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount,
+ char[] chars, int charIndex);
+
+
+ // We expect this to be the workhorse for NLS Encodings, but for existing
+ // ones we need a working (if slow) default implementation)
+ //
+ // WARNING WARNING WARNING
+ //
+ // WARNING: If this breaks it could be a security threat. Obviously we
+ // call this internally, so you need to make sure that your pointers, counts
+ // and indexes are correct when you call this method.
+ //
+ // In addition, we have internal code, which will be marked as "safe" calling
+ // this code. However this code is dependent upon the implementation of an
+ // external GetChars() method, which could be overridden by a third party and
+ // the results of which cannot be guaranteed. We use that result to copy
+ // the char[] to our char* output buffer. If the result count was wrong, we
+ // could easily overflow our output buffer. Therefore we do an extra test
+ // when we copy the buffer so that we don't overflow charCount either.
+
+ [CLSCompliant(false)]
+ public virtual unsafe int GetChars(byte* bytes, int byteCount,
+ char* chars, int charCount)
+ {
+ // Validate input parameters
+ if (chars == null || bytes == null)
+ throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
+ SR.ArgumentNull_Array);
+
+ if (byteCount < 0 || charCount < 0)
+ throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Get the byte array to convert
+ byte[] arrByte = new byte[byteCount];
+
+ int index;
+ for (index = 0; index < byteCount; index++)
+ arrByte[index] = bytes[index];
+
+ // Get the char array to fill
+ char[] arrChar = new char[charCount];
+
+ // Do the work
+ int result = GetChars(arrByte, 0, byteCount, arrChar, 0);
+
+ Debug.Assert(result <= charCount, "[Encoding.GetChars]Returned more chars than we have space for");
+
+ // Copy the char array
+ // WARNING: We MUST make sure that we don't copy too many chars. We can't
+ // rely on result because it could be a 3rd party implementation. We need
+ // to make sure we never copy more than charCount chars no matter the value
+ // of result
+ if (result < charCount)
+ charCount = result;
+
+ // Copy the data, don't overrun our array!
+ for (index = 0; index < charCount; index++)
+ chars[index] = arrChar[index];
+
+ return charCount;
+ }
+
+
+ // This is our internal workhorse
+ // Always validate parameters before calling internal version, which will only assert.
+ internal virtual unsafe int GetChars(byte* bytes, int byteCount,
+ char* chars, int charCount, DecoderNLS decoder)
+ {
+ return GetChars(bytes, byteCount, chars, charCount);
+ }
+
+
+ [CLSCompliant(false)]
+ public unsafe string GetString(byte* bytes, int byteCount)
+ {
+ if (bytes == null)
+ throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
+
+ if (byteCount < 0)
+ throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return String.CreateStringFromEncoding(bytes, byteCount, this);
+ }
+
+ // Returns the code page identifier of this encoding. The returned value is
+ // an integer between 0 and 65535 if the encoding has a code page
+ // identifier, or -1 if the encoding does not represent a code page.
+ //
+
+ public virtual int CodePage
+ {
+ get
+ {
+ return m_codePage;
+ }
+ }
+
+ // IsAlwaysNormalized
+ // Returns true if the encoding is always normalized for the specified encoding form
+ [Pure]
+ public bool IsAlwaysNormalized()
+ {
+ return this.IsAlwaysNormalized(NormalizationForm.FormC);
+ }
+
+ [Pure]
+ public virtual bool IsAlwaysNormalized(NormalizationForm form)
+ {
+ // Assume false unless the encoding knows otherwise
+ return false;
+ }
+
+ // Returns a Decoder object for this encoding. The returned object
+ // can be used to decode a sequence of bytes into a sequence of characters.
+ // Contrary to the GetChars family of methods, a Decoder can
+ // convert partial sequences of bytes into partial sequences of characters
+ // by maintaining the appropriate state between the conversions.
+ //
+ // This default implementation returns a Decoder that simply
+ // forwards calls to the GetCharCount and GetChars methods to
+ // the corresponding methods of this encoding. Encodings that require state
+ // to be maintained between successive conversions should override this
+ // method and return an instance of an appropriate Decoder
+ // implementation.
+ //
+
+ public virtual Decoder GetDecoder()
+ {
+ return new DefaultDecoder(this);
+ }
+
+ // Returns an Encoder object for this encoding. The returned object
+ // can be used to encode a sequence of characters into a sequence of bytes.
+ // Contrary to the GetBytes family of methods, an Encoder can
+ // convert partial sequences of characters into partial sequences of bytes
+ // by maintaining the appropriate state between the conversions.
+ //
+ // This default implementation returns an Encoder that simply
+ // forwards calls to the GetByteCount and GetBytes methods to
+ // the corresponding methods of this encoding. Encodings that require state
+ // to be maintained between successive conversions should override this
+ // method and return an instance of an appropriate Encoder
+ // implementation.
+ //
+
+ public virtual Encoder GetEncoder()
+ {
+ return new DefaultEncoder(this);
+ }
+
+ // Returns the maximum number of bytes required to encode a given number of
+ // characters. This method can be used to determine an appropriate buffer
+ // size for byte arrays passed to the GetBytes method of this
+ // encoding or the GetBytes method of an Encoder for this
+ // encoding. All encodings must guarantee that no buffer overflow
+ // exceptions will occur if buffers are sized according to the results of
+ // this method.
+ //
+ // WARNING: If you're using something besides the default replacement encoder fallback,
+ // then you could have more bytes than this returned from an actual call to GetBytes().
+ //
+ [Pure]
+ public abstract int GetMaxByteCount(int charCount);
+
+ // Returns the maximum number of characters produced by decoding a given
+ // number of bytes. This method can be used to determine an appropriate
+ // buffer size for character arrays passed to the GetChars method of
+ // this encoding or the GetChars method of a Decoder for this
+ // encoding. All encodings must guarantee that no buffer overflow
+ // exceptions will occur if buffers are sized according to the results of
+ // this method.
+ //
+ [Pure]
+ public abstract int GetMaxCharCount(int byteCount);
+
+ // Returns a string containing the decoded representation of a given byte
+ // array.
+ //
+ [Pure]
+ public virtual String GetString(byte[] bytes)
+ {
+ if (bytes == null)
+ throw new ArgumentNullException(nameof(bytes),
+ SR.ArgumentNull_Array);
+ Contract.EndContractBlock();
+
+ return GetString(bytes, 0, bytes.Length);
+ }
+
+ // Returns a string containing the decoded representation of a range of
+ // bytes in a byte array.
+ //
+ // Internally we override this for performance
+ //
+ [Pure]
+ public virtual String GetString(byte[] bytes, int index, int count)
+ {
+ return new String(GetChars(bytes, index, count));
+ }
+
+ // Returns an encoding for Unicode format. The returned encoding will be
+ // an instance of the UnicodeEncoding class.
+ //
+ // It will use little endian byte order, but will detect
+ // input in big endian if it finds a byte order mark per Unicode 2.0.
+
+ public static Encoding Unicode => UnicodeEncoding.s_littleEndianDefault;
+
+ // Returns an encoding for Unicode format. The returned encoding will be
+ // an instance of the UnicodeEncoding class.
+ //
+ // It will use big endian byte order, but will detect
+ // input in little endian if it finds a byte order mark per Unicode 2.0.
+
+ public static Encoding BigEndianUnicode => UnicodeEncoding.s_bigEndianDefault;
+
+ // Returns an encoding for the UTF-7 format. The returned encoding will be
+ // an instance of the UTF7Encoding class.
+
+ public static Encoding UTF7 => UTF7Encoding.s_default;
+
+ // Returns an encoding for the UTF-8 format. The returned encoding will be
+ // an instance of the UTF8Encoding class.
+
+ public static Encoding UTF8 => UTF8Encoding.s_default;
+
+ // Returns an encoding for the UTF-32 format. The returned encoding will be
+ // an instance of the UTF32Encoding class.
+
+ public static Encoding UTF32 => UTF32Encoding.s_default;
+
+ // Returns an encoding for the UTF-32 format. The returned encoding will be
+ // an instance of the UTF32Encoding class.
+ //
+ // It will use big endian byte order.
+
+ private static Encoding BigEndianUTF32 => UTF32Encoding.s_bigEndianDefault;
+
+ public override bool Equals(Object value)
+ {
+ Encoding that = value as Encoding;
+ if (that != null)
+ return (m_codePage == that.m_codePage) &&
+ (EncoderFallback.Equals(that.EncoderFallback)) &&
+ (DecoderFallback.Equals(that.DecoderFallback));
+ return (false);
+ }
+
+
+ public override int GetHashCode()
+ {
+ return m_codePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode();
+ }
+
+ internal virtual char[] GetBestFitUnicodeToBytesData()
+ {
+ // Normally we don't have any best fit data.
+ return Array.Empty<char>();
+ }
+
+ internal virtual char[] GetBestFitBytesToUnicodeData()
+ {
+ // Normally we don't have any best fit data.
+ return Array.Empty<char>();
+ }
+
+ internal void ThrowBytesOverflow()
+ {
+ // Special message to include fallback type in case fallback's GetMaxCharCount is broken
+ // This happens if user has implimented an encoder fallback with a broken GetMaxCharCount
+ throw new ArgumentException(
+ SR.Format(SR.Argument_EncodingConversionOverflowBytes, EncodingName, EncoderFallback.GetType()), "bytes");
+ }
+
+ internal void ThrowBytesOverflow(EncoderNLS encoder, bool nothingEncoded)
+ {
+ if (encoder == null || encoder.m_throwOnOverflow || nothingEncoded)
+ {
+ if (encoder != null && encoder.InternalHasFallbackBuffer)
+ encoder.FallbackBuffer.InternalReset();
+ // Special message to include fallback type in case fallback's GetMaxCharCount is broken
+ // This happens if user has implimented an encoder fallback with a broken GetMaxCharCount
+ ThrowBytesOverflow();
+ }
+
+ // If we didn't throw, we are in convert and have to remember our flushing
+ encoder.ClearMustFlush();
+ }
+
+ internal void ThrowCharsOverflow()
+ {
+ // Special message to include fallback type in case fallback's GetMaxCharCount is broken
+ // This happens if user has implimented a decoder fallback with a broken GetMaxCharCount
+ throw new ArgumentException(
+ SR.Format(SR.Argument_EncodingConversionOverflowChars, EncodingName, DecoderFallback.GetType()), "chars");
+ }
+
+ internal void ThrowCharsOverflow(DecoderNLS decoder, bool nothingDecoded)
+ {
+ if (decoder == null || decoder.m_throwOnOverflow || nothingDecoded)
+ {
+ if (decoder != null && decoder.InternalHasFallbackBuffer)
+ decoder.FallbackBuffer.InternalReset();
+
+ // Special message to include fallback type in case fallback's GetMaxCharCount is broken
+ // This happens if user has implimented a decoder fallback with a broken GetMaxCharCount
+ ThrowCharsOverflow();
+ }
+
+ // If we didn't throw, we are in convert and have to remember our flushing
+ decoder.ClearMustFlush();
+ }
+
+ internal sealed class DefaultEncoder : Encoder, IObjectReference, ISerializable
+ {
+ private Encoding m_encoding;
+
+ public DefaultEncoder(Encoding encoding)
+ {
+ m_encoding = encoding;
+ }
+
+ public Object GetRealObject(StreamingContext context)
+ {
+ throw new PlatformNotSupportedException();
+ }
+
+ // ISerializable implementation, get data for this object
+ void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
+ {
+ throw new PlatformNotSupportedException();
+ }
+
+ // Returns the number of bytes the next call to GetBytes will
+ // produce if presented with the given range of characters and the given
+ // value of the flush parameter. The returned value takes into
+ // account the state in which the encoder was left following the last call
+ // to GetBytes. The state of the encoder is not affected by a call
+ // to this method.
+ //
+
+ public override int GetByteCount(char[] chars, int index, int count, bool flush)
+ {
+ return m_encoding.GetByteCount(chars, index, count);
+ }
+
+ [SuppressMessage("Microsoft.Contracts", "CC1055")] // Skip extra error checking to avoid *potential* AppCompat problems.
+ public unsafe override int GetByteCount(char* chars, int count, bool flush)
+ {
+ return m_encoding.GetByteCount(chars, count);
+ }
+
+ // Encodes a range of characters in a character array into a range of bytes
+ // in a byte array. The method encodes charCount characters from
+ // chars starting at index charIndex, storing the resulting
+ // bytes in bytes starting at index byteIndex. The encoding
+ // takes into account the state in which the encoder was left following the
+ // last call to this method. The flush parameter indicates whether
+ // the encoder should flush any shift-states and partial characters at the
+ // end of the conversion. To ensure correct termination of a sequence of
+ // blocks of encoded bytes, the last call to GetBytes should specify
+ // a value of true for the flush parameter.
+ //
+ // An exception occurs if the byte array is not large enough to hold the
+ // complete encoding of the characters. The GetByteCount method can
+ // be used to determine the exact number of bytes that will be produced for
+ // a given range of characters. Alternatively, the GetMaxByteCount
+ // method of the Encoding that produced this encoder can be used to
+ // determine the maximum number of bytes that will be produced for a given
+ // number of characters, regardless of the actual character values.
+ //
+
+ public override int GetBytes(char[] chars, int charIndex, int charCount,
+ byte[] bytes, int byteIndex, bool flush)
+ {
+ return m_encoding.GetBytes(chars, charIndex, charCount, bytes, byteIndex);
+ }
+
+ [SuppressMessage("Microsoft.Contracts", "CC1055")] // Skip extra error checking to avoid *potential* AppCompat problems.
+ public unsafe override int GetBytes(char* chars, int charCount,
+ byte* bytes, int byteCount, bool flush)
+ {
+ return m_encoding.GetBytes(chars, charCount, bytes, byteCount);
+ }
+ }
+
+ internal sealed class DefaultDecoder : Decoder, IObjectReference, ISerializable
+ {
+ private Encoding m_encoding;
+
+ public DefaultDecoder(Encoding encoding)
+ {
+ m_encoding = encoding;
+ }
+
+ public Object GetRealObject(StreamingContext context)
+ {
+ throw new PlatformNotSupportedException();
+ }
+
+ // ISerializable implementation
+ void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
+ {
+ throw new PlatformNotSupportedException();
+ }
+
+ // Returns the number of characters the next call to GetChars will
+ // produce if presented with the given range of bytes. The returned value
+ // takes into account the state in which the decoder was left following the
+ // last call to GetChars. The state of the decoder is not affected
+ // by a call to this method.
+ //
+
+ public override int GetCharCount(byte[] bytes, int index, int count)
+ {
+ return GetCharCount(bytes, index, count, false);
+ }
+
+ public override int GetCharCount(byte[] bytes, int index, int count, bool flush)
+ {
+ return m_encoding.GetCharCount(bytes, index, count);
+ }
+
+ [SuppressMessage("Microsoft.Contracts", "CC1055")] // Skip extra error checking to avoid *potential* AppCompat problems.
+ public unsafe override int GetCharCount(byte* bytes, int count, bool flush)
+ {
+ // By default just call the encoding version, no flush by default
+ return m_encoding.GetCharCount(bytes, count);
+ }
+
+ // Decodes a range of bytes in a byte array into a range of characters
+ // in a character array. The method decodes byteCount bytes from
+ // bytes starting at index byteIndex, storing the resulting
+ // characters in chars starting at index charIndex. The
+ // decoding takes into account the state in which the decoder was left
+ // following the last call to this method.
+ //
+ // An exception occurs if the character array is not large enough to
+ // hold the complete decoding of the bytes. The GetCharCount method
+ // can be used to determine the exact number of characters that will be
+ // produced for a given range of bytes. Alternatively, the
+ // GetMaxCharCount method of the Encoding that produced this
+ // decoder can be used to determine the maximum number of characters that
+ // will be produced for a given number of bytes, regardless of the actual
+ // byte values.
+ //
+
+ public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
+ char[] chars, int charIndex)
+ {
+ return GetChars(bytes, byteIndex, byteCount, chars, charIndex, false);
+ }
+
+ public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
+ char[] chars, int charIndex, bool flush)
+ {
+ return m_encoding.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
+ }
+
+ [SuppressMessage("Microsoft.Contracts", "CC1055")] // Skip extra error checking to avoid *potential* AppCompat problems.
+ public unsafe override int GetChars(byte* bytes, int byteCount,
+ char* chars, int charCount, bool flush)
+ {
+ // By default just call the encoding's version
+ return m_encoding.GetChars(bytes, byteCount, chars, charCount);
+ }
+ }
+
+ internal class EncodingCharBuffer
+ {
+ private unsafe char* _chars;
+ private unsafe char* _charStart;
+ private unsafe char* _charEnd;
+ private int _charCountResult = 0;
+ private Encoding _enc;
+ private DecoderNLS _decoder;
+ private unsafe byte* _byteStart;
+ private unsafe byte* _byteEnd;
+ private unsafe byte* _bytes;
+ private DecoderFallbackBuffer _fallbackBuffer;
+
+ internal unsafe EncodingCharBuffer(Encoding enc, DecoderNLS decoder, char* charStart, int charCount,
+ byte* byteStart, int byteCount)
+ {
+ _enc = enc;
+ _decoder = decoder;
+
+ _chars = charStart;
+ _charStart = charStart;
+ _charEnd = charStart + charCount;
+
+ _byteStart = byteStart;
+ _bytes = byteStart;
+ _byteEnd = byteStart + byteCount;
+
+ if (_decoder == null)
+ _fallbackBuffer = enc.DecoderFallback.CreateFallbackBuffer();
+ else
+ _fallbackBuffer = _decoder.FallbackBuffer;
+
+ // If we're getting chars or getting char count we don't expect to have
+ // to remember fallbacks between calls (so it should be empty)
+ Debug.Assert(_fallbackBuffer.Remaining == 0,
+ "[Encoding.EncodingCharBuffer.EncodingCharBuffer]Expected empty fallback buffer for getchars/charcount");
+ _fallbackBuffer.InternalInitialize(_bytes, _charEnd);
+ }
+
+ internal unsafe bool AddChar(char ch, int numBytes)
+ {
+ if (_chars != null)
+ {
+ if (_chars >= _charEnd)
+ {
+ // Throw maybe
+ _bytes -= numBytes; // Didn't encode these bytes
+ _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw?
+ return false; // No throw, but no store either
+ }
+
+ *(_chars++) = ch;
+ }
+ _charCountResult++;
+ return true;
+ }
+
+ internal unsafe bool AddChar(char ch)
+ {
+ return AddChar(ch, 1);
+ }
+
+
+ internal unsafe bool AddChar(char ch1, char ch2, int numBytes)
+ {
+ // Need room for 2 chars
+ if (_chars >= _charEnd - 1)
+ {
+ // Throw maybe
+ _bytes -= numBytes; // Didn't encode these bytes
+ _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw?
+ return false; // No throw, but no store either
+ }
+ return AddChar(ch1, numBytes) && AddChar(ch2, numBytes);
+ }
+
+ internal unsafe void AdjustBytes(int count)
+ {
+ _bytes += count;
+ }
+
+ internal unsafe bool MoreData
+ {
+ get
+ {
+ return _bytes < _byteEnd;
+ }
+ }
+
+ // Do we have count more bytes?
+ internal unsafe bool EvenMoreData(int count)
+ {
+ return (_bytes <= _byteEnd - count);
+ }
+
+ // GetNextByte shouldn't be called unless the caller's already checked more data or even more data,
+ // but we'll double check just to make sure.
+ internal unsafe byte GetNextByte()
+ {
+ Debug.Assert(_bytes < _byteEnd, "[EncodingCharBuffer.GetNextByte]Expected more date");
+ if (_bytes >= _byteEnd)
+ return 0;
+ return *(_bytes++);
+ }
+
+ internal unsafe int BytesUsed
+ {
+ get
+ {
+ return (int)(_bytes - _byteStart);
+ }
+ }
+
+ internal unsafe bool Fallback(byte fallbackByte)
+ {
+ // Build our buffer
+ byte[] byteBuffer = new byte[] { fallbackByte };
+
+ // Do the fallback and add the data.
+ return Fallback(byteBuffer);
+ }
+
+ internal unsafe bool Fallback(byte byte1, byte byte2)
+ {
+ // Build our buffer
+ byte[] byteBuffer = new byte[] { byte1, byte2 };
+
+ // Do the fallback and add the data.
+ return Fallback(byteBuffer);
+ }
+
+ internal unsafe bool Fallback(byte byte1, byte byte2, byte byte3, byte byte4)
+ {
+ // Build our buffer
+ byte[] byteBuffer = new byte[] { byte1, byte2, byte3, byte4 };
+
+ // Do the fallback and add the data.
+ return Fallback(byteBuffer);
+ }
+
+ internal unsafe bool Fallback(byte[] byteBuffer)
+ {
+ // Do the fallback and add the data.
+ if (_chars != null)
+ {
+ char* pTemp = _chars;
+ if (_fallbackBuffer.InternalFallback(byteBuffer, _bytes, ref _chars) == false)
+ {
+ // Throw maybe
+ _bytes -= byteBuffer.Length; // Didn't use how many ever bytes we're falling back
+ _fallbackBuffer.InternalReset(); // We didn't use this fallback.
+ _enc.ThrowCharsOverflow(_decoder, _chars == _charStart); // Throw?
+ return false; // No throw, but no store either
+ }
+ _charCountResult += unchecked((int)(_chars - pTemp));
+ }
+ else
+ {
+ _charCountResult += _fallbackBuffer.InternalFallback(byteBuffer, _bytes);
+ }
+
+ return true;
+ }
+
+ internal unsafe int Count
+ {
+ get
+ {
+ return _charCountResult;
+ }
+ }
+ }
+
+ internal class EncodingByteBuffer
+ {
+ private unsafe byte* _bytes;
+ private unsafe byte* _byteStart;
+ private unsafe byte* _byteEnd;
+ private unsafe char* _chars;
+ private unsafe char* _charStart;
+ private unsafe char* _charEnd;
+ private int _byteCountResult = 0;
+ private Encoding _enc;
+ private EncoderNLS _encoder;
+ internal EncoderFallbackBuffer fallbackBuffer;
+
+ internal unsafe EncodingByteBuffer(Encoding inEncoding, EncoderNLS inEncoder,
+ byte* inByteStart, int inByteCount, char* inCharStart, int inCharCount)
+ {
+ _enc = inEncoding;
+ _encoder = inEncoder;
+
+ _charStart = inCharStart;
+ _chars = inCharStart;
+ _charEnd = inCharStart + inCharCount;
+
+ _bytes = inByteStart;
+ _byteStart = inByteStart;
+ _byteEnd = inByteStart + inByteCount;
+
+ if (_encoder == null)
+ this.fallbackBuffer = _enc.EncoderFallback.CreateFallbackBuffer();
+ else
+ {
+ this.fallbackBuffer = _encoder.FallbackBuffer;
+ // If we're not converting we must not have data in our fallback buffer
+ if (_encoder.m_throwOnOverflow && _encoder.InternalHasFallbackBuffer &&
+ this.fallbackBuffer.Remaining > 0)
+ throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty,
+ _encoder.Encoding.EncodingName, _encoder.Fallback.GetType()));
+ }
+ fallbackBuffer.InternalInitialize(_chars, _charEnd, _encoder, _bytes != null);
+ }
+
+ internal unsafe bool AddByte(byte b, int moreBytesExpected)
+ {
+ Debug.Assert(moreBytesExpected >= 0, "[EncodingByteBuffer.AddByte]expected non-negative moreBytesExpected");
+ if (_bytes != null)
+ {
+ if (_bytes >= _byteEnd - moreBytesExpected)
+ {
+ // Throw maybe. Check which buffer to back up (only matters if Converting)
+ this.MovePrevious(true); // Throw if necessary
+ return false; // No throw, but no store either
+ }
+
+ *(_bytes++) = b;
+ }
+ _byteCountResult++;
+ return true;
+ }
+
+ internal unsafe bool AddByte(byte b1)
+ {
+ return (AddByte(b1, 0));
+ }
+
+ internal unsafe bool AddByte(byte b1, byte b2)
+ {
+ return (AddByte(b1, b2, 0));
+ }
+
+ internal unsafe bool AddByte(byte b1, byte b2, int moreBytesExpected)
+ {
+ return (AddByte(b1, 1 + moreBytesExpected) && AddByte(b2, moreBytesExpected));
+ }
+
+ internal unsafe bool AddByte(byte b1, byte b2, byte b3)
+ {
+ return AddByte(b1, b2, b3, (int)0);
+ }
+
+ internal unsafe bool AddByte(byte b1, byte b2, byte b3, int moreBytesExpected)
+ {
+ return (AddByte(b1, 2 + moreBytesExpected) &&
+ AddByte(b2, 1 + moreBytesExpected) &&
+ AddByte(b3, moreBytesExpected));
+ }
+
+ internal unsafe bool AddByte(byte b1, byte b2, byte b3, byte b4)
+ {
+ return (AddByte(b1, 3) &&
+ AddByte(b2, 2) &&
+ AddByte(b3, 1) &&
+ AddByte(b4, 0));
+ }
+
+ internal unsafe void MovePrevious(bool bThrow)
+ {
+ if (fallbackBuffer.bFallingBack)
+ fallbackBuffer.MovePrevious(); // don't use last fallback
+ else
+ {
+ Debug.Assert(_chars > _charStart ||
+ ((bThrow == true) && (_bytes == _byteStart)),
+ "[EncodingByteBuffer.MovePrevious]expected previous data or throw");
+ if (_chars > _charStart)
+ _chars--; // don't use last char
+ }
+
+ if (bThrow)
+ _enc.ThrowBytesOverflow(_encoder, _bytes == _byteStart); // Throw? (and reset fallback if not converting)
+ }
+
+ internal unsafe bool Fallback(char charFallback)
+ {
+ // Do the fallback
+ return fallbackBuffer.InternalFallback(charFallback, ref _chars);
+ }
+
+ internal unsafe bool MoreData
+ {
+ get
+ {
+ // See if fallbackBuffer is not empty or if there's data left in chars buffer.
+ return ((fallbackBuffer.Remaining > 0) || (_chars < _charEnd));
+ }
+ }
+
+ internal unsafe char GetNextChar()
+ {
+ // See if there's something in our fallback buffer
+ char cReturn = fallbackBuffer.InternalGetNextChar();
+
+ // Nothing in the fallback buffer, return our normal data.
+ if (cReturn == 0)
+ {
+ if (_chars < _charEnd)
+ cReturn = *(_chars++);
+ }
+
+ return cReturn;
+ }
+
+ internal unsafe int CharsUsed
+ {
+ get
+ {
+ return (int)(_chars - _charStart);
+ }
+ }
+
+ internal unsafe int Count
+ {
+ get
+ {
+ return _byteCountResult;
+ }
+ }
+ }
+ }
+}
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Diagnostics;
+using System.Diagnostics.Contracts;
+using System.Runtime.Serialization;
+
+namespace System.Text
+{
+ //
+ // Latin1Encoding is a simple override to optimize the GetString version of Latin1Encoding.
+ // because of the best fit cases we can't do this when encoding the string, only when decoding
+ //
+ internal class Latin1Encoding : EncodingNLS, ISerializable
+ {
+ // Used by Encoding.Latin1 for lazy initialization
+ // The initialization code will not be run until a static member of the class is referenced
+ internal static readonly Latin1Encoding s_default = new Latin1Encoding();
+
+ // We only use the best-fit table, of which ASCII is a superset for us.
+ public Latin1Encoding() : base(Encoding.ISO_8859_1)
+ {
+ }
+
+ // ISerializable implementation
+ void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
+ {
+ throw new PlatformNotSupportedException();
+ }
+
+ // GetByteCount
+ // Note: We start by assuming that the output will be the same as count. Having
+ // an encoder or fallback may change that assumption
+ internal override unsafe int GetByteCount(char* chars, int charCount, EncoderNLS encoder)
+ {
+ // Just need to ASSERT, this is called by something else internal that checked parameters already
+ Debug.Assert(charCount >= 0, "[Latin1Encoding.GetByteCount]count is negative");
+ Debug.Assert(chars != null, "[Latin1Encoding.GetByteCount]chars is null");
+
+ // Assert because we shouldn't be able to have a null encoder.
+ Debug.Assert(encoderFallback != null, "[Latin1Encoding.GetByteCount]Attempting to use null fallback encoder");
+
+ char charLeftOver = (char)0;
+
+ // If we have an encoder AND we aren't using default fallback,
+ // then we may have a complicated count.
+ EncoderReplacementFallback fallback;
+ if (encoder != null)
+ {
+ charLeftOver = encoder.charLeftOver;
+ Debug.Assert(charLeftOver == 0 || Char.IsHighSurrogate(charLeftOver),
+ "[Latin1Encoding.GetByteCount]leftover character should be high surrogate");
+
+ fallback = encoder.Fallback as EncoderReplacementFallback;
+
+ // Verify that we have no fallbackbuffer, for Latin1 its always empty, so just assert
+ Debug.Assert(!encoder.m_throwOnOverflow || !encoder.InternalHasFallbackBuffer ||
+ encoder.FallbackBuffer.Remaining == 0,
+ "[Latin1CodePageEncoding.GetByteCount]Expected empty fallback buffer");
+ }
+ else
+ fallback = this.EncoderFallback as EncoderReplacementFallback;
+
+ if ((fallback != null && fallback.MaxCharCount == 1)/* || bIsBestFit*/)
+ {
+ // Replacement fallback encodes surrogate pairs as two ?? (or two whatever), so return size is always
+ // same as input size.
+ // Note that no existing SBCS code pages map code points to supplimentary characters, so this is easy.
+
+ // We could however have 1 extra byte if the last call had an encoder and a funky fallback and
+ // if we don't use the funky fallback this time.
+
+ // Do we have an extra char left over from last time?
+ if (charLeftOver > 0)
+ charCount++;
+
+ return (charCount);
+ }
+
+ // Count is more complicated if you have a funky fallback
+ // For fallback we may need a fallback buffer, we know we're not default fallback
+ int byteCount = 0;
+
+ // Start by assuming default count, then +/- for fallback characters
+ char* charEnd = chars + charCount;
+
+ // For fallback we may need a fallback buffer, we know we aren't default fallback.
+ EncoderFallbackBuffer fallbackBuffer = null;
+ char* charsForFallback;
+
+ // We may have a left over character from last time, try and process it.
+ if (charLeftOver > 0)
+ {
+ // Initialize the buffer
+ Debug.Assert(encoder != null,
+ "[Latin1Encoding.GetByteCount]Expected encoder if we have charLeftOver");
+ fallbackBuffer = encoder.FallbackBuffer;
+ fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false);
+
+ // Since left over char was a surrogate, it'll have to be fallen back.
+ // Get Fallback
+ // This will fallback a pair if *chars is a low surrogate
+ charsForFallback = chars;
+ fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
+ chars = charsForFallback;
+ }
+
+ // Now we may have fallback char[] already from the encoder
+
+ // Go ahead and do it, including the fallback.
+ char ch;
+ while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 ||
+ chars < charEnd)
+ {
+ // First unwind any fallback
+ if (ch == 0)
+ {
+ // No fallback, just get next char
+ ch = *chars;
+ chars++;
+ }
+
+ // Check for fallback, this'll catch surrogate pairs too.
+ // no chars >= 0x100 are allowed.
+ if (ch > 0xff)
+ {
+ // Initialize the buffer
+ if (fallbackBuffer == null)
+ {
+ if (encoder == null)
+ fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = encoder.FallbackBuffer;
+ fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, false);
+ }
+
+ // Get Fallback
+ charsForFallback = chars;
+ fallbackBuffer.InternalFallback(ch, ref charsForFallback);
+ chars = charsForFallback;
+ continue;
+ }
+
+ // We'll use this one
+ byteCount++;
+ }
+
+ Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
+ "[Latin1Encoding.GetByteCount]Expected Empty fallback buffer");
+
+ return byteCount;
+ }
+
+ internal override unsafe int GetBytes(char* chars, int charCount,
+ byte* bytes, int byteCount, EncoderNLS encoder)
+ {
+ // Just need to ASSERT, this is called by something else internal that checked parameters already
+ Debug.Assert(bytes != null, "[Latin1Encoding.GetBytes]bytes is null");
+ Debug.Assert(byteCount >= 0, "[Latin1Encoding.GetBytes]byteCount is negative");
+ Debug.Assert(chars != null, "[Latin1Encoding.GetBytes]chars is null");
+ Debug.Assert(charCount >= 0, "[Latin1Encoding.GetBytes]charCount is negative");
+
+ // Assert because we shouldn't be able to have a null encoder.
+ Debug.Assert(encoderFallback != null, "[Latin1Encoding.GetBytes]Attempting to use null encoder fallback");
+
+ // Get any left over characters & check fast or slower fallback type
+ char charLeftOver = (char)0;
+ EncoderReplacementFallback fallback = null;
+ if (encoder != null)
+ {
+ charLeftOver = encoder.charLeftOver;
+ fallback = encoder.Fallback as EncoderReplacementFallback;
+ Debug.Assert(charLeftOver == 0 || Char.IsHighSurrogate(charLeftOver),
+ "[Latin1Encoding.GetBytes]leftover character should be high surrogate");
+
+ // Verify that we have no fallbackbuffer, for ASCII its always empty, so just assert
+ Debug.Assert(!encoder.m_throwOnOverflow || !encoder.InternalHasFallbackBuffer ||
+ encoder.FallbackBuffer.Remaining == 0,
+ "[Latin1CodePageEncoding.GetBytes]Expected empty fallback buffer");
+ }
+ else
+ {
+ fallback = this.EncoderFallback as EncoderReplacementFallback;
+ }
+
+ // prepare our end
+ char* charEnd = chars + charCount;
+ byte* byteStart = bytes;
+ char* charStart = chars;
+
+ // See if we do the fast default or slightly slower fallback
+ if (fallback != null && fallback.MaxCharCount == 1)
+ {
+ // Fast version
+ char cReplacement = fallback.DefaultString[0];
+
+ // Check for replacements in range, otherwise fall back to slow version.
+ if (cReplacement <= (char)0xff)
+ {
+ // We should have exactly as many output bytes as input bytes, unless there's a left
+ // over character, in which case we may need one more.
+
+ // If we had a left over character will have to add a ? (This happens if they had a funky
+ // fallback last time, but not this time.) (We can't spit any out though
+ // because with fallback encoder each surrogate is treated as a seperate code point)
+ if (charLeftOver > 0)
+ {
+ // Have to have room
+ // Throw even if doing no throw version because this is just 1 char,
+ // so buffer will never be big enough
+ if (byteCount == 0)
+ ThrowBytesOverflow(encoder, true);
+
+ // This'll make sure we still have more room and also make sure our return value is correct.
+ *(bytes++) = (byte)cReplacement;
+ byteCount--; // We used one of the ones we were counting.
+ }
+
+ // This keeps us from overrunning our output buffer
+ if (byteCount < charCount)
+ {
+ // Throw or make buffer smaller?
+ ThrowBytesOverflow(encoder, byteCount < 1);
+
+ // Just use what we can
+ charEnd = chars + byteCount;
+ }
+
+ // We just do a quick copy
+ while (chars < charEnd)
+ {
+ char ch2 = *(chars++);
+ if (ch2 > 0x00ff) *(bytes++) = (byte)cReplacement;
+ else *(bytes++) = (byte)ch2;
+ }
+
+ // Clear encoder
+ if (encoder != null)
+ {
+ encoder.charLeftOver = (char)0;
+ encoder.m_charsUsed = (int)(chars - charStart);
+ }
+ return (int)(bytes - byteStart);
+ }
+ }
+
+ // Slower version, have to do real fallback.
+
+ // prepare our end
+ byte* byteEnd = bytes + byteCount;
+
+ // For fallback we may need a fallback buffer, we know we aren't default fallback, create & init it
+ EncoderFallbackBuffer fallbackBuffer = null;
+ char* charsForFallback;
+
+ // We may have a left over character from last time, try and process it.
+ if (charLeftOver > 0)
+ {
+ // Since left over char was a surrogate, it'll have to be fallen back.
+ // Get Fallback
+ Debug.Assert(encoder != null,
+ "[Latin1Encoding.GetBytes]Expected encoder if we have charLeftOver");
+ fallbackBuffer = encoder.FallbackBuffer;
+ fallbackBuffer.InternalInitialize(chars, charEnd, encoder, true);
+
+ // Since left over char was a surrogate, it'll have to be fallen back.
+ // Get Fallback
+ // This will fallback a pair if *chars is a low surrogate
+ charsForFallback = chars;
+ fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
+ chars = charsForFallback;
+
+ if (fallbackBuffer.Remaining > byteEnd - bytes)
+ {
+ // Throw it, if we don't have enough for this we never will
+ ThrowBytesOverflow(encoder, true);
+ }
+ }
+
+ // Now we may have fallback char[] already from the encoder fallback above
+
+ // Go ahead and do it, including the fallback.
+ char ch;
+ while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 ||
+ chars < charEnd)
+ {
+ // First unwind any fallback
+ if (ch == 0)
+ {
+ // No fallback, just get next char
+ ch = *chars;
+ chars++;
+ }
+
+ // Check for fallback, this'll catch surrogate pairs too.
+ // All characters >= 0x100 must fall back.
+ if (ch > 0xff)
+ {
+ // Initialize the buffer
+ if (fallbackBuffer == null)
+ {
+ if (encoder == null)
+ fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = encoder.FallbackBuffer;
+ fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, true);
+ }
+
+ // Get Fallback
+ charsForFallback = chars;
+ fallbackBuffer.InternalFallback(ch, ref charsForFallback);
+ chars = charsForFallback;
+
+ // Make sure we have enough room. Each fallback char will be 1 output char
+ // (or else cause a recursion exception)
+ if (fallbackBuffer.Remaining > byteEnd - bytes)
+ {
+ // Didn't use this char, throw it. Chars should've advanced by now
+ // If we had encoder fallback data it would've thrown before the loop
+ Debug.Assert(chars > charStart,
+ "[Latin1Encoding.GetBytes]Expected chars to have advanced (fallback case)");
+ chars--;
+ fallbackBuffer.InternalReset();
+
+ // Throw it
+ ThrowBytesOverflow(encoder, chars == charStart);
+ break;
+ }
+
+ continue;
+ }
+
+ // We'll use this one
+ // Bounds check
+ if (bytes >= byteEnd)
+ {
+ // didn't use this char, we'll throw or use buffer
+ Debug.Assert(fallbackBuffer == null || fallbackBuffer.bFallingBack == false,
+ "[Latin1Encoding.GetBytes]Expected fallback to have throw initially if insufficient space");
+ if (fallbackBuffer == null || fallbackBuffer.bFallingBack == false)
+ {
+ Debug.Assert(chars > charStart,
+ "[Latin1Encoding.GetBytes]Expected chars to have advanced (fallback case)");
+ chars--; // don't use last char
+ }
+ ThrowBytesOverflow(encoder, chars == charStart); // throw ?
+ break; // don't throw, stop
+ }
+
+ // Go ahead and add it
+ *bytes = unchecked((byte)ch);
+ bytes++;
+ }
+
+ // Need to do encoder stuff
+ if (encoder != null)
+ {
+ // Fallback stuck it in encoder if necessary, but we have to clear MustFlush cases
+ if (fallbackBuffer != null && !fallbackBuffer.bUsedEncoder)
+ // Clear it in case of MustFlush
+ encoder.charLeftOver = (char)0;
+
+ // Set our chars used count
+ encoder.m_charsUsed = (int)(chars - charStart);
+ }
+
+ Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
+ "[Latin1Encoding.GetBytes]Expected Empty fallback buffer");
+
+ return (int)(bytes - byteStart);
+ }
+
+ // This is internal and called by something else,
+ internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder)
+ {
+ // Just assert, we're called internally so these should be safe, checked already
+ Debug.Assert(bytes != null, "[Latin1Encoding.GetCharCount]bytes is null");
+ Debug.Assert(count >= 0, "[Latin1Encoding.GetCharCount]byteCount is negative");
+
+ // Just return length, SBCS stay the same length because they don't map to surrogate
+ // pairs and we don't have to fallback because all latin1Encoding code points are unicode
+ return count;
+ }
+
+ internal override unsafe int GetChars(byte* bytes, int byteCount,
+ char* chars, int charCount, DecoderNLS decoder)
+ {
+ // Just need to ASSERT, this is called by something else internal that checked parameters already
+ Debug.Assert(bytes != null, "[Latin1Encoding.GetChars]bytes is null");
+ Debug.Assert(byteCount >= 0, "[Latin1Encoding.GetChars]byteCount is negative");
+ Debug.Assert(chars != null, "[Latin1Encoding.GetChars]chars is null");
+ Debug.Assert(charCount >= 0, "[Latin1Encoding.GetChars]charCount is negative");
+
+ // Need byteCount chars, otherwise too small buffer
+ if (charCount < byteCount)
+ {
+ // Buffer too small. Do we throw?
+ ThrowCharsOverflow(decoder, charCount < 1);
+
+ // Don't throw, correct buffer size
+ byteCount = charCount;
+ }
+
+ // Do it our fast way
+ byte* byteEnd = bytes + byteCount;
+
+ // Quick loop, all bytes are the same as chars, so no fallbacks for latin1
+ while (bytes < byteEnd)
+ {
+ *(chars) = unchecked((char)*(bytes));
+ chars++;
+ bytes++;
+ }
+
+ // Might need to know input bytes used
+ if (decoder != null)
+ decoder.m_bytesUsed = byteCount;
+
+ // Converted sequence is same length as input, so output charsUsed is same as byteCount;
+ return byteCount;
+ }
+
+ public override int GetMaxByteCount(int charCount)
+ {
+ if (charCount < 0)
+ throw new ArgumentOutOfRangeException(nameof(charCount),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Characters would be # of characters + 1 in case high surrogate is ? * max fallback
+ long byteCount = (long)charCount + 1;
+
+ if (EncoderFallback.MaxCharCount > 1)
+ byteCount *= EncoderFallback.MaxCharCount;
+
+ // 1 to 1 for most characters. Only surrogates with fallbacks have less.
+
+ if (byteCount > 0x7fffffff)
+ throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
+ return (int)byteCount;
+ }
+
+ public override int GetMaxCharCount(int byteCount)
+ {
+ if (byteCount < 0)
+ throw new ArgumentOutOfRangeException(nameof(byteCount),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Just return length, SBCS stay the same length because they don't map to surrogate
+ long charCount = (long)byteCount;
+
+ // 1 to 1 for most characters. Only surrogates with fallbacks have less, unknown fallbacks could be longer.
+ if (DecoderFallback.MaxCharCount > 1)
+ charCount *= DecoderFallback.MaxCharCount;
+
+ if (charCount > 0x7fffffff)
+ throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_GetCharCountOverflow);
+
+ return (int)charCount;
+ }
+
+ // True if and only if the encoding only uses single byte code points. (Ie, ASCII, 1252, etc)
+ public override bool IsSingleByte
+ {
+ get
+ {
+ return true;
+ }
+ }
+
+ public override bool IsAlwaysNormalized(NormalizationForm form)
+ {
+ // Latin-1 contains precomposed characters, so normal for Form C.
+ // Since some are composed, not normal for D & KD.
+ // Also some letters like 0x00A8 (spacing diarisis) have compatibility decompositions, so false for KD & KC.
+
+ // Only true for form C.
+ return (form == NormalizationForm.FormC);
+ }
+ // Since our best fit table is small we'll hard code it
+ internal override char[] GetBestFitUnicodeToBytesData()
+ {
+ // Get our best fit data
+ return Latin1Encoding.arrayCharBestFit;
+ }
+
+ // Best fit for ASCII, and since it works for ASCII, we use it for latin1 as well.
+ private static readonly char[] arrayCharBestFit =
+ {
+// The first many are in case you wanted to use this for ASCIIEncoding, which we don't need to do any more.
+// (char)0x00a0, (char)0x0020, // No-Break Space -> Space
+// (char)0x00a1, (char)0x0021, // Inverted Exclamation Mark -> !
+// (char)0x00a2, (char)0x0063, // Cent Sign -> c
+// (char)0x00a3, (char)0x003f, // Pound Sign
+// (char)0x00a4, (char)0x0024, // Currency Sign -> $
+// (char)0x00a5, (char)0x0059, // Yen Sign -> Y
+// (char)0x00a6, (char)0x007c, // Broken Bar -> |
+// (char)0x00a7, (char)0x003f, // Section Sign
+// (char)0x00a8, (char)0x003f, // Diaeresis
+// (char)0x00a9, (char)0x0043, // Copyright Sign -> C
+// (char)0x00aa, (char)0x0061, // Feminine Ordinal Indicator -> a
+// (char)0x00ab, (char)0x003c, // Left-Pointing Double Angle Quotation Mark -> <
+// (char)0x00ac, (char)0x003f, // Not Sign
+// (char)0x00ad, (char)0x002d, // Soft Hyphen -> -
+// (char)0x00ae, (char)0x0052, // Registered Sign -> R
+// (char)0x00af, (char)0x003f, // Macron
+// (char)0x00b0, (char)0x003f, // Degree Sign
+// (char)0x00b1, (char)0x003f, // Plus-Minus Sign
+// (char)0x00b2, (char)0x0032, // Superscript Two -> 2
+// (char)0x00b3, (char)0x0033, // Superscript Three -> 3
+// (char)0x00b4, (char)0x003f, // Acute Accent
+// (char)0x00b5, (char)0x003f, // Micro Sign
+// (char)0x00b6, (char)0x003f, // Pilcrow Sign
+// (char)0x00b7, (char)0x002e, // Middle Dot -> .
+// (char)0x00b8, (char)0x002c, // Cedilla -> ,
+// (char)0x00b9, (char)0x0031, // Superscript One -> 1
+// (char)0x00ba, (char)0x006f, // Masculine Ordinal Indicator -> o
+// (char)0x00bb, (char)0x003e, // Right-Pointing Double Angle Quotation Mark -> >
+// (char)0x00bc, (char)0x003f, // Vulgar Fraction One Quarter
+// (char)0x00bd, (char)0x003f, // Vulgar Fraction One Half
+// (char)0x00be, (char)0x003f, // Vulgar Fraction Three Quarters
+// (char)0x00bf, (char)0x003f, // Inverted Question Mark
+// (char)0x00c0, (char)0x0041, // Latin Capital Letter A With Grave -> A
+// (char)0x00c1, (char)0x0041, // Latin Capital Letter A With Acute -> A
+// (char)0x00c2, (char)0x0041, // Latin Capital Letter A With Circumflex -> A
+// (char)0x00c3, (char)0x0041, // Latin Capital Letter A With Tilde -> A
+// (char)0x00c4, (char)0x0041, // Latin Capital Letter A With Diaeresis -> A
+// (char)0x00c5, (char)0x0041, // Latin Capital Letter A With Ring Above -> A
+// (char)0x00c6, (char)0x0041, // Latin Capital Ligature Ae -> A
+// (char)0x00c7, (char)0x0043, // Latin Capital Letter C With Cedilla -> C
+// (char)0x00c8, (char)0x0045, // Latin Capital Letter E With Grave -> E
+// (char)0x00c9, (char)0x0045, // Latin Capital Letter E With Acute -> E
+// (char)0x00ca, (char)0x0045, // Latin Capital Letter E With Circumflex -> E
+// (char)0x00cb, (char)0x0045, // Latin Capital Letter E With Diaeresis -> E
+// (char)0x00cc, (char)0x0049, // Latin Capital Letter I With Grave -> I
+// (char)0x00cd, (char)0x0049, // Latin Capital Letter I With Acute -> I
+// (char)0x00ce, (char)0x0049, // Latin Capital Letter I With Circumflex -> I
+// (char)0x00cf, (char)0x0049, // Latin Capital Letter I With Diaeresis -> I
+// (char)0x00d0, (char)0x0044, // Latin Capital Letter Eth -> D
+// (char)0x00d1, (char)0x004e, // Latin Capital Letter N With Tilde -> N
+// (char)0x00d2, (char)0x004f, // Latin Capital Letter O With Grave -> O
+// (char)0x00d3, (char)0x004f, // Latin Capital Letter O With Acute -> O
+// (char)0x00d4, (char)0x004f, // Latin Capital Letter O With Circumflex -> O
+// (char)0x00d5, (char)0x004f, // Latin Capital Letter O With Tilde -> O
+// (char)0x00d6, (char)0x004f, // Latin Capital Letter O With Diaeresis -> O
+// (char)0x00d7, (char)0x003f, // Multiplication Sign
+// (char)0x00d8, (char)0x004f, // Latin Capital Letter O With Stroke -> O
+// (char)0x00d9, (char)0x0055, // Latin Capital Letter U With Grave -> U
+// (char)0x00da, (char)0x0055, // Latin Capital Letter U With Acute -> U
+// (char)0x00db, (char)0x0055, // Latin Capital Letter U With Circumflex -> U
+// (char)0x00dc, (char)0x0055, // Latin Capital Letter U With Diaeresis -> U
+// (char)0x00dd, (char)0x0059, // Latin Capital Letter Y With Acute -> Y
+// (char)0x00de, (char)0x003f, // Latin Capital Letter Thorn
+// (char)0x00df, (char)0x003f, // Latin Small Letter Sharp S
+// (char)0x00e0, (char)0x0061, // Latin Small Letter A With Grave -> a
+// (char)0x00e1, (char)0x0061, // Latin Small Letter A With Acute -> a
+// (char)0x00e2, (char)0x0061, // Latin Small Letter A With Circumflex -> a
+// (char)0x00e3, (char)0x0061, // Latin Small Letter A With Tilde -> a
+// (char)0x00e4, (char)0x0061, // Latin Small Letter A With Diaeresis -> a
+// (char)0x00e5, (char)0x0061, // Latin Small Letter A With Ring Above -> a
+// (char)0x00e6, (char)0x0061, // Latin Small Ligature Ae -> a
+// (char)0x00e7, (char)0x0063, // Latin Small Letter C With Cedilla -> c
+// (char)0x00e8, (char)0x0065, // Latin Small Letter E With Grave -> e
+// (char)0x00e9, (char)0x0065, // Latin Small Letter E With Acute -> e
+// (char)0x00ea, (char)0x0065, // Latin Small Letter E With Circumflex -> e
+// (char)0x00eb, (char)0x0065, // Latin Small Letter E With Diaeresis -> e
+// (char)0x00ec, (char)0x0069, // Latin Small Letter I With Grave -> i
+// (char)0x00ed, (char)0x0069, // Latin Small Letter I With Acute -> i
+// (char)0x00ee, (char)0x0069, // Latin Small Letter I With Circumflex -> i
+// (char)0x00ef, (char)0x0069, // Latin Small Letter I With Diaeresis -> i
+// (char)0x00f0, (char)0x003f, // Latin Small Letter Eth
+// (char)0x00f1, (char)0x006e, // Latin Small Letter N With Tilde -> n
+// (char)0x00f2, (char)0x006f, // Latin Small Letter O With Grave -> o
+// (char)0x00f3, (char)0x006f, // Latin Small Letter O With Acute -> o
+// (char)0x00f4, (char)0x006f, // Latin Small Letter O With Circumflex -> o
+// (char)0x00f5, (char)0x006f, // Latin Small Letter O With Tilde -> o
+// (char)0x00f6, (char)0x006f, // Latin Small Letter O With Diaeresis -> o
+// (char)0x00f7, (char)0x003f, // Division Sign
+// (char)0x00f8, (char)0x006f, // Latin Small Letter O With Stroke -> o
+// (char)0x00f9, (char)0x0075, // Latin Small Letter U With Grave -> u
+// (char)0x00fa, (char)0x0075, // Latin Small Letter U With Acute -> u
+// (char)0x00fb, (char)0x0075, // Latin Small Letter U With Circumflex -> u
+// (char)0x00fc, (char)0x0075, // Latin Small Letter U With Diaeresis -> u
+// (char)0x00fd, (char)0x0079, // Latin Small Letter Y With Acute -> y
+// (char)0x00fe, (char)0x003f, // Latin Small Letter Thorn
+// (char)0x00ff, (char)0x0079, // Latin Small Letter Y With Diaeresis -> y
+ (char)0x0100, (char)0x0041, // Latin Capital Letter A With Macron -> A
+ (char)0x0101, (char)0x0061, // Latin Small Letter A With Macron -> a
+ (char)0x0102, (char)0x0041, // Latin Capital Letter A With Breve -> A
+ (char)0x0103, (char)0x0061, // Latin Small Letter A With Breve -> a
+ (char)0x0104, (char)0x0041, // Latin Capital Letter A With Ogonek -> A
+ (char)0x0105, (char)0x0061, // Latin Small Letter A With Ogonek -> a
+ (char)0x0106, (char)0x0043, // Latin Capital Letter C With Acute -> C
+ (char)0x0107, (char)0x0063, // Latin Small Letter C With Acute -> c
+ (char)0x0108, (char)0x0043, // Latin Capital Letter C With Circumflex -> C
+ (char)0x0109, (char)0x0063, // Latin Small Letter C With Circumflex -> c
+ (char)0x010a, (char)0x0043, // Latin Capital Letter C With Dot Above -> C
+ (char)0x010b, (char)0x0063, // Latin Small Letter C With Dot Above -> c
+ (char)0x010c, (char)0x0043, // Latin Capital Letter C With Caron -> C
+ (char)0x010d, (char)0x0063, // Latin Small Letter C With Caron -> c
+ (char)0x010e, (char)0x0044, // Latin Capital Letter D With Caron -> D
+ (char)0x010f, (char)0x0064, // Latin Small Letter D With Caron -> d
+ (char)0x0110, (char)0x0044, // Latin Capital Letter D With Stroke -> D
+ (char)0x0111, (char)0x0064, // Latin Small Letter D With Stroke -> d
+ (char)0x0112, (char)0x0045, // Latin Capital Letter E With Macron -> E
+ (char)0x0113, (char)0x0065, // Latin Small Letter E With Macron -> e
+ (char)0x0114, (char)0x0045, // Latin Capital Letter E With Breve -> E
+ (char)0x0115, (char)0x0065, // Latin Small Letter E With Breve -> e
+ (char)0x0116, (char)0x0045, // Latin Capital Letter E With Dot Above -> E
+ (char)0x0117, (char)0x0065, // Latin Small Letter E With Dot Above -> e
+ (char)0x0118, (char)0x0045, // Latin Capital Letter E With Ogonek -> E
+ (char)0x0119, (char)0x0065, // Latin Small Letter E With Ogonek -> e
+ (char)0x011a, (char)0x0045, // Latin Capital Letter E With Caron -> E
+ (char)0x011b, (char)0x0065, // Latin Small Letter E With Caron -> e
+ (char)0x011c, (char)0x0047, // Latin Capital Letter G With Circumflex -> G
+ (char)0x011d, (char)0x0067, // Latin Small Letter G With Circumflex -> g
+ (char)0x011e, (char)0x0047, // Latin Capital Letter G With Breve -> G
+ (char)0x011f, (char)0x0067, // Latin Small Letter G With Breve -> g
+ (char)0x0120, (char)0x0047, // Latin Capital Letter G With Dot Above -> G
+ (char)0x0121, (char)0x0067, // Latin Small Letter G With Dot Above -> g
+ (char)0x0122, (char)0x0047, // Latin Capital Letter G With Cedilla -> G
+ (char)0x0123, (char)0x0067, // Latin Small Letter G With Cedilla -> g
+ (char)0x0124, (char)0x0048, // Latin Capital Letter H With Circumflex -> H
+ (char)0x0125, (char)0x0068, // Latin Small Letter H With Circumflex -> h
+ (char)0x0126, (char)0x0048, // Latin Capital Letter H With Stroke -> H
+ (char)0x0127, (char)0x0068, // Latin Small Letter H With Stroke -> h
+ (char)0x0128, (char)0x0049, // Latin Capital Letter I With Tilde -> I
+ (char)0x0129, (char)0x0069, // Latin Small Letter I With Tilde -> i
+ (char)0x012a, (char)0x0049, // Latin Capital Letter I With Macron -> I
+ (char)0x012b, (char)0x0069, // Latin Small Letter I With Macron -> i
+ (char)0x012c, (char)0x0049, // Latin Capital Letter I With Breve -> I
+ (char)0x012d, (char)0x0069, // Latin Small Letter I With Breve -> i
+ (char)0x012e, (char)0x0049, // Latin Capital Letter I With Ogonek -> I
+ (char)0x012f, (char)0x0069, // Latin Small Letter I With Ogonek -> i
+ (char)0x0130, (char)0x0049, // Latin Capital Letter I With Dot Above -> I
+ (char)0x0131, (char)0x0069, // Latin Small Letter Dotless I -> i
+ (char)0x0134, (char)0x004a, // Latin Capital Letter J With Circumflex -> J
+ (char)0x0135, (char)0x006a, // Latin Small Letter J With Circumflex -> j
+ (char)0x0136, (char)0x004b, // Latin Capital Letter K With Cedilla -> K
+ (char)0x0137, (char)0x006b, // Latin Small Letter K With Cedilla -> k
+ (char)0x0139, (char)0x004c, // Latin Capital Letter L With Acute -> L
+ (char)0x013a, (char)0x006c, // Latin Small Letter L With Acute -> l
+ (char)0x013b, (char)0x004c, // Latin Capital Letter L With Cedilla -> L
+ (char)0x013c, (char)0x006c, // Latin Small Letter L With Cedilla -> l
+ (char)0x013d, (char)0x004c, // Latin Capital Letter L With Caron -> L
+ (char)0x013e, (char)0x006c, // Latin Small Letter L With Caron -> l
+ (char)0x0141, (char)0x004c, // Latin Capital Letter L With Stroke -> L
+ (char)0x0142, (char)0x006c, // Latin Small Letter L With Stroke -> l
+ (char)0x0143, (char)0x004e, // Latin Capital Letter N With Acute -> N
+ (char)0x0144, (char)0x006e, // Latin Small Letter N With Acute -> n
+ (char)0x0145, (char)0x004e, // Latin Capital Letter N With Cedilla -> N
+ (char)0x0146, (char)0x006e, // Latin Small Letter N With Cedilla -> n
+ (char)0x0147, (char)0x004e, // Latin Capital Letter N With Caron -> N
+ (char)0x0148, (char)0x006e, // Latin Small Letter N With Caron -> n
+ (char)0x014c, (char)0x004f, // Latin Capital Letter O With Macron -> O
+ (char)0x014d, (char)0x006f, // Latin Small Letter O With Macron -> o
+ (char)0x014e, (char)0x004f, // Latin Capital Letter O With Breve -> O
+ (char)0x014f, (char)0x006f, // Latin Small Letter O With Breve -> o
+ (char)0x0150, (char)0x004f, // Latin Capital Letter O With Double Acute -> O
+ (char)0x0151, (char)0x006f, // Latin Small Letter O With Double Acute -> o
+ (char)0x0152, (char)0x004f, // Latin Capital Ligature Oe -> O
+ (char)0x0153, (char)0x006f, // Latin Small Ligature Oe -> o
+ (char)0x0154, (char)0x0052, // Latin Capital Letter R With Acute -> R
+ (char)0x0155, (char)0x0072, // Latin Small Letter R With Acute -> r
+ (char)0x0156, (char)0x0052, // Latin Capital Letter R With Cedilla -> R
+ (char)0x0157, (char)0x0072, // Latin Small Letter R With Cedilla -> r
+ (char)0x0158, (char)0x0052, // Latin Capital Letter R With Caron -> R
+ (char)0x0159, (char)0x0072, // Latin Small Letter R With Caron -> r
+ (char)0x015a, (char)0x0053, // Latin Capital Letter S With Acute -> S
+ (char)0x015b, (char)0x0073, // Latin Small Letter S With Acute -> s
+ (char)0x015c, (char)0x0053, // Latin Capital Letter S With Circumflex -> S
+ (char)0x015d, (char)0x0073, // Latin Small Letter S With Circumflex -> s
+ (char)0x015e, (char)0x0053, // Latin Capital Letter S With Cedilla -> S
+ (char)0x015f, (char)0x0073, // Latin Small Letter S With Cedilla -> s
+ (char)0x0160, (char)0x0053, // Latin Capital Letter S With Caron -> S
+ (char)0x0161, (char)0x0073, // Latin Small Letter S With Caron -> s
+ (char)0x0162, (char)0x0054, // Latin Capital Letter T With Cedilla -> T
+ (char)0x0163, (char)0x0074, // Latin Small Letter T With Cedilla -> t
+ (char)0x0164, (char)0x0054, // Latin Capital Letter T With Caron -> T
+ (char)0x0165, (char)0x0074, // Latin Small Letter T With Caron -> t
+ (char)0x0166, (char)0x0054, // Latin Capital Letter T With Stroke -> T
+ (char)0x0167, (char)0x0074, // Latin Small Letter T With Stroke -> t
+ (char)0x0168, (char)0x0055, // Latin Capital Letter U With Tilde -> U
+ (char)0x0169, (char)0x0075, // Latin Small Letter U With Tilde -> u
+ (char)0x016a, (char)0x0055, // Latin Capital Letter U With Macron -> U
+ (char)0x016b, (char)0x0075, // Latin Small Letter U With Macron -> u
+ (char)0x016c, (char)0x0055, // Latin Capital Letter U With Breve -> U
+ (char)0x016d, (char)0x0075, // Latin Small Letter U With Breve -> u
+ (char)0x016e, (char)0x0055, // Latin Capital Letter U With Ring Above -> U
+ (char)0x016f, (char)0x0075, // Latin Small Letter U With Ring Above -> u
+ (char)0x0170, (char)0x0055, // Latin Capital Letter U With Double Acute -> U
+ (char)0x0171, (char)0x0075, // Latin Small Letter U With Double Acute -> u
+ (char)0x0172, (char)0x0055, // Latin Capital Letter U With Ogonek -> U
+ (char)0x0173, (char)0x0075, // Latin Small Letter U With Ogonek -> u
+ (char)0x0174, (char)0x0057, // Latin Capital Letter W With Circumflex -> W
+ (char)0x0175, (char)0x0077, // Latin Small Letter W With Circumflex -> w
+ (char)0x0176, (char)0x0059, // Latin Capital Letter Y With Circumflex -> Y
+ (char)0x0177, (char)0x0079, // Latin Small Letter Y With Circumflex -> y
+ (char)0x0178, (char)0x0059, // Latin Capital Letter Y With Diaeresis -> Y
+ (char)0x0179, (char)0x005a, // Latin Capital Letter Z With Acute -> Z
+ (char)0x017a, (char)0x007a, // Latin Small Letter Z With Acute -> z
+ (char)0x017b, (char)0x005a, // Latin Capital Letter Z With Dot Above -> Z
+ (char)0x017c, (char)0x007a, // Latin Small Letter Z With Dot Above -> z
+ (char)0x017d, (char)0x005a, // Latin Capital Letter Z With Caron -> Z
+ (char)0x017e, (char)0x007a, // Latin Small Letter Z With Caron -> z
+ (char)0x0180, (char)0x0062, // Latin Small Letter B With Stroke -> b
+ (char)0x0189, (char)0x0044, // Latin Capital Letter African D -> D
+ (char)0x0191, (char)0x0046, // Latin Capital Letter F With Hook -> F
+ (char)0x0192, (char)0x0066, // Latin Small Letter F With Hook -> f
+ (char)0x0197, (char)0x0049, // Latin Capital Letter I With Stroke -> I
+ (char)0x019a, (char)0x006c, // Latin Small Letter L With Bar -> l
+ (char)0x019f, (char)0x004f, // Latin Capital Letter O With Middle Tilde -> O
+ (char)0x01a0, (char)0x004f, // Latin Capital Letter O With Horn -> O
+ (char)0x01a1, (char)0x006f, // Latin Small Letter O With Horn -> o
+ (char)0x01ab, (char)0x0074, // Latin Small Letter T With Palatal Hook -> t
+ (char)0x01ae, (char)0x0054, // Latin Capital Letter T With Retroflex Hook -> T
+ (char)0x01af, (char)0x0055, // Latin Capital Letter U With Horn -> U
+ (char)0x01b0, (char)0x0075, // Latin Small Letter U With Horn -> u
+ (char)0x01b6, (char)0x007a, // Latin Small Letter Z With Stroke -> z
+ (char)0x01cd, (char)0x0041, // Latin Capital Letter A With Caron -> A
+ (char)0x01ce, (char)0x0061, // Latin Small Letter A With Caron -> a
+ (char)0x01cf, (char)0x0049, // Latin Capital Letter I With Caron -> I
+ (char)0x01d0, (char)0x0069, // Latin Small Letter I With Caron -> i
+ (char)0x01d1, (char)0x004f, // Latin Capital Letter O With Caron -> O
+ (char)0x01d2, (char)0x006f, // Latin Small Letter O With Caron -> o
+ (char)0x01d3, (char)0x0055, // Latin Capital Letter U With Caron -> U
+ (char)0x01d4, (char)0x0075, // Latin Small Letter U With Caron -> u
+ (char)0x01d5, (char)0x0055, // Latin Capital Letter U With Diaeresis And Macron -> U
+ (char)0x01d6, (char)0x0075, // Latin Small Letter U With Diaeresis And Macron -> u
+ (char)0x01d7, (char)0x0055, // Latin Capital Letter U With Diaeresis And Acute -> U
+ (char)0x01d8, (char)0x0075, // Latin Small Letter U With Diaeresis And Acute -> u
+ (char)0x01d9, (char)0x0055, // Latin Capital Letter U With Diaeresis And Caron -> U
+ (char)0x01da, (char)0x0075, // Latin Small Letter U With Diaeresis And Caron -> u
+ (char)0x01db, (char)0x0055, // Latin Capital Letter U With Diaeresis And Grave -> U
+ (char)0x01dc, (char)0x0075, // Latin Small Letter U With Diaeresis And Grave -> u
+ (char)0x01de, (char)0x0041, // Latin Capital Letter A With Diaeresis And Macron -> A
+ (char)0x01df, (char)0x0061, // Latin Small Letter A With Diaeresis And Macron -> a
+ (char)0x01e4, (char)0x0047, // Latin Capital Letter G With Stroke -> G
+ (char)0x01e5, (char)0x0067, // Latin Small Letter G With Stroke -> g
+ (char)0x01e6, (char)0x0047, // Latin Capital Letter G With Caron -> G
+ (char)0x01e7, (char)0x0067, // Latin Small Letter G With Caron -> g
+ (char)0x01e8, (char)0x004b, // Latin Capital Letter K With Caron -> K
+ (char)0x01e9, (char)0x006b, // Latin Small Letter K With Caron -> k
+ (char)0x01ea, (char)0x004f, // Latin Capital Letter O With Ogonek -> O
+ (char)0x01eb, (char)0x006f, // Latin Small Letter O With Ogonek -> o
+ (char)0x01ec, (char)0x004f, // Latin Capital Letter O With Ogonek And Macron -> O
+ (char)0x01ed, (char)0x006f, // Latin Small Letter O With Ogonek And Macron -> o
+ (char)0x01f0, (char)0x006a, // Latin Small Letter J With Caron -> j
+ (char)0x0261, (char)0x0067, // Latin Small Letter Script G -> g
+ (char)0x02b9, (char)0x0027, // Modifier Letter Prime -> '
+ (char)0x02ba, (char)0x0022, // Modifier Letter Double Prime -> "
+ (char)0x02bc, (char)0x0027, // Modifier Letter Apostrophe -> '
+ (char)0x02c4, (char)0x005e, // Modifier Letter Up Arrowhead -> ^
+ (char)0x02c6, (char)0x005e, // Modifier Letter Circumflex Accent -> ^
+ (char)0x02c8, (char)0x0027, // Modifier Letter Vertical Line -> '
+ (char)0x02c9, (char)0x003f, // Modifier Letter Macron
+ (char)0x02ca, (char)0x003f, // Modifier Letter Acute Accent
+ (char)0x02cb, (char)0x0060, // Modifier Letter Grave Accent -> `
+ (char)0x02cd, (char)0x005f, // Modifier Letter Low Macron -> _
+ (char)0x02da, (char)0x003f, // Ring Above
+ (char)0x02dc, (char)0x007e, // Small Tilde -> ~
+ (char)0x0300, (char)0x0060, // Combining Grave Accent -> `
+ (char)0x0302, (char)0x005e, // Combining Circumflex Accent -> ^
+ (char)0x0303, (char)0x007e, // Combining Tilde -> ~
+ (char)0x030e, (char)0x0022, // Combining Double Vertical Line Above -> "
+ (char)0x0331, (char)0x005f, // Combining Macron Below -> _
+ (char)0x0332, (char)0x005f, // Combining Low Line -> _
+ (char)0x2000, (char)0x0020, // En Quad
+ (char)0x2001, (char)0x0020, // Em Quad
+ (char)0x2002, (char)0x0020, // En Space
+ (char)0x2003, (char)0x0020, // Em Space
+ (char)0x2004, (char)0x0020, // Three-Per-Em Space
+ (char)0x2005, (char)0x0020, // Four-Per-Em Space
+ (char)0x2006, (char)0x0020, // Six-Per-Em Space
+ (char)0x2010, (char)0x002d, // Hyphen -> -
+ (char)0x2011, (char)0x002d, // Non-Breaking Hyphen -> -
+ (char)0x2013, (char)0x002d, // En Dash -> -
+ (char)0x2014, (char)0x002d, // Em Dash -> -
+ (char)0x2018, (char)0x0027, // Left Single Quotation Mark -> '
+ (char)0x2019, (char)0x0027, // Right Single Quotation Mark -> '
+ (char)0x201a, (char)0x002c, // Single Low-9 Quotation Mark -> ,
+ (char)0x201c, (char)0x0022, // Left Double Quotation Mark -> "
+ (char)0x201d, (char)0x0022, // Right Double Quotation Mark -> "
+ (char)0x201e, (char)0x0022, // Double Low-9 Quotation Mark -> "
+ (char)0x2020, (char)0x003f, // Dagger
+ (char)0x2021, (char)0x003f, // Double Dagger
+ (char)0x2022, (char)0x002e, // Bullet -> .
+ (char)0x2026, (char)0x002e, // Horizontal Ellipsis -> .
+ (char)0x2030, (char)0x003f, // Per Mille Sign
+ (char)0x2032, (char)0x0027, // Prime -> '
+ (char)0x2035, (char)0x0060, // Reversed Prime -> `
+ (char)0x2039, (char)0x003c, // Single Left-Pointing Angle Quotation Mark -> <
+ (char)0x203a, (char)0x003e, // Single Right-Pointing Angle Quotation Mark -> >
+ (char)0x2122, (char)0x0054, // Trade Mark Sign -> T
+ (char)0xff01, (char)0x0021, // Fullwidth Exclamation Mark -> !
+ (char)0xff02, (char)0x0022, // Fullwidth Quotation Mark -> "
+ (char)0xff03, (char)0x0023, // Fullwidth Number Sign -> #
+ (char)0xff04, (char)0x0024, // Fullwidth Dollar Sign -> $
+ (char)0xff05, (char)0x0025, // Fullwidth Percent Sign -> %
+ (char)0xff06, (char)0x0026, // Fullwidth Ampersand -> &
+ (char)0xff07, (char)0x0027, // Fullwidth Apostrophe -> '
+ (char)0xff08, (char)0x0028, // Fullwidth Left Parenthesis -> (
+ (char)0xff09, (char)0x0029, // Fullwidth Right Parenthesis -> )
+ (char)0xff0a, (char)0x002a, // Fullwidth Asterisk -> *
+ (char)0xff0b, (char)0x002b, // Fullwidth Plus Sign -> +
+ (char)0xff0c, (char)0x002c, // Fullwidth Comma -> ,
+ (char)0xff0d, (char)0x002d, // Fullwidth Hyphen-Minus -> -
+ (char)0xff0e, (char)0x002e, // Fullwidth Full Stop -> .
+ (char)0xff0f, (char)0x002f, // Fullwidth Solidus -> /
+ (char)0xff10, (char)0x0030, // Fullwidth Digit Zero -> 0
+ (char)0xff11, (char)0x0031, // Fullwidth Digit One -> 1
+ (char)0xff12, (char)0x0032, // Fullwidth Digit Two -> 2
+ (char)0xff13, (char)0x0033, // Fullwidth Digit Three -> 3
+ (char)0xff14, (char)0x0034, // Fullwidth Digit Four -> 4
+ (char)0xff15, (char)0x0035, // Fullwidth Digit Five -> 5
+ (char)0xff16, (char)0x0036, // Fullwidth Digit Six -> 6
+ (char)0xff17, (char)0x0037, // Fullwidth Digit Seven -> 7
+ (char)0xff18, (char)0x0038, // Fullwidth Digit Eight -> 8
+ (char)0xff19, (char)0x0039, // Fullwidth Digit Nine -> 9
+ (char)0xff1a, (char)0x003a, // Fullwidth Colon -> :
+ (char)0xff1b, (char)0x003b, // Fullwidth Semicolon -> ;
+ (char)0xff1c, (char)0x003c, // Fullwidth Less-Than Sign -> <
+ (char)0xff1d, (char)0x003d, // Fullwidth Equals Sign -> =
+ (char)0xff1e, (char)0x003e, // Fullwidth Greater-Than Sign -> >
+ (char)0xff1f, (char)0x003f, // Fullwidth Question Mark
+ (char)0xff20, (char)0x0040, // Fullwidth Commercial At -> @
+ (char)0xff21, (char)0x0041, // Fullwidth Latin Capital Letter A -> A
+ (char)0xff22, (char)0x0042, // Fullwidth Latin Capital Letter B -> B
+ (char)0xff23, (char)0x0043, // Fullwidth Latin Capital Letter C -> C
+ (char)0xff24, (char)0x0044, // Fullwidth Latin Capital Letter D -> D
+ (char)0xff25, (char)0x0045, // Fullwidth Latin Capital Letter E -> E
+ (char)0xff26, (char)0x0046, // Fullwidth Latin Capital Letter F -> F
+ (char)0xff27, (char)0x0047, // Fullwidth Latin Capital Letter G -> G
+ (char)0xff28, (char)0x0048, // Fullwidth Latin Capital Letter H -> H
+ (char)0xff29, (char)0x0049, // Fullwidth Latin Capital Letter I -> I
+ (char)0xff2a, (char)0x004a, // Fullwidth Latin Capital Letter J -> J
+ (char)0xff2b, (char)0x004b, // Fullwidth Latin Capital Letter K -> K
+ (char)0xff2c, (char)0x004c, // Fullwidth Latin Capital Letter L -> L
+ (char)0xff2d, (char)0x004d, // Fullwidth Latin Capital Letter M -> M
+ (char)0xff2e, (char)0x004e, // Fullwidth Latin Capital Letter N -> N
+ (char)0xff2f, (char)0x004f, // Fullwidth Latin Capital Letter O -> O
+ (char)0xff30, (char)0x0050, // Fullwidth Latin Capital Letter P -> P
+ (char)0xff31, (char)0x0051, // Fullwidth Latin Capital Letter Q -> Q
+ (char)0xff32, (char)0x0052, // Fullwidth Latin Capital Letter R -> R
+ (char)0xff33, (char)0x0053, // Fullwidth Latin Capital Letter S -> S
+ (char)0xff34, (char)0x0054, // Fullwidth Latin Capital Letter T -> T
+ (char)0xff35, (char)0x0055, // Fullwidth Latin Capital Letter U -> U
+ (char)0xff36, (char)0x0056, // Fullwidth Latin Capital Letter V -> V
+ (char)0xff37, (char)0x0057, // Fullwidth Latin Capital Letter W -> W
+ (char)0xff38, (char)0x0058, // Fullwidth Latin Capital Letter X -> X
+ (char)0xff39, (char)0x0059, // Fullwidth Latin Capital Letter Y -> Y
+ (char)0xff3a, (char)0x005a, // Fullwidth Latin Capital Letter Z -> Z
+ (char)0xff3b, (char)0x005b, // Fullwidth Left Square Bracket -> [
+ (char)0xff3c, (char)0x005c, // Fullwidth Reverse Solidus -> \
+ (char)0xff3d, (char)0x005d, // Fullwidth Right Square Bracket -> ]
+ (char)0xff3e, (char)0x005e, // Fullwidth Circumflex Accent -> ^
+ (char)0xff3f, (char)0x005f, // Fullwidth Low Line -> _
+ (char)0xff40, (char)0x0060, // Fullwidth Grave Accent -> `
+ (char)0xff41, (char)0x0061, // Fullwidth Latin Small Letter A -> a
+ (char)0xff42, (char)0x0062, // Fullwidth Latin Small Letter B -> b
+ (char)0xff43, (char)0x0063, // Fullwidth Latin Small Letter C -> c
+ (char)0xff44, (char)0x0064, // Fullwidth Latin Small Letter D -> d
+ (char)0xff45, (char)0x0065, // Fullwidth Latin Small Letter E -> e
+ (char)0xff46, (char)0x0066, // Fullwidth Latin Small Letter F -> f
+ (char)0xff47, (char)0x0067, // Fullwidth Latin Small Letter G -> g
+ (char)0xff48, (char)0x0068, // Fullwidth Latin Small Letter H -> h
+ (char)0xff49, (char)0x0069, // Fullwidth Latin Small Letter I -> i
+ (char)0xff4a, (char)0x006a, // Fullwidth Latin Small Letter J -> j
+ (char)0xff4b, (char)0x006b, // Fullwidth Latin Small Letter K -> k
+ (char)0xff4c, (char)0x006c, // Fullwidth Latin Small Letter L -> l
+ (char)0xff4d, (char)0x006d, // Fullwidth Latin Small Letter M -> m
+ (char)0xff4e, (char)0x006e, // Fullwidth Latin Small Letter N -> n
+ (char)0xff4f, (char)0x006f, // Fullwidth Latin Small Letter O -> o
+ (char)0xff50, (char)0x0070, // Fullwidth Latin Small Letter P -> p
+ (char)0xff51, (char)0x0071, // Fullwidth Latin Small Letter Q -> q
+ (char)0xff52, (char)0x0072, // Fullwidth Latin Small Letter R -> r
+ (char)0xff53, (char)0x0073, // Fullwidth Latin Small Letter S -> s
+ (char)0xff54, (char)0x0074, // Fullwidth Latin Small Letter T -> t
+ (char)0xff55, (char)0x0075, // Fullwidth Latin Small Letter U -> u
+ (char)0xff56, (char)0x0076, // Fullwidth Latin Small Letter V -> v
+ (char)0xff57, (char)0x0077, // Fullwidth Latin Small Letter W -> w
+ (char)0xff58, (char)0x0078, // Fullwidth Latin Small Letter X -> x
+ (char)0xff59, (char)0x0079, // Fullwidth Latin Small Letter Y -> y
+ (char)0xff5a, (char)0x007a, // Fullwidth Latin Small Letter Z -> z
+ (char)0xff5b, (char)0x007b, // Fullwidth Left Curly Bracket -> {
+ (char)0xff5c, (char)0x007c, // Fullwidth Vertical Line -> |
+ (char)0xff5d, (char)0x007d, // Fullwidth Right Curly Bracket -> }
+ (char)0xff5e, (char)0x007e // Fullwidth Tilde -> ~
+ };
+ }
+}
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
+//
+
+using System;
+using System.Runtime.Serialization;
+using System.Diagnostics;
+using System.Diagnostics.Contracts;
+
+namespace System.Text
+{
+ public class UTF7Encoding : Encoding
+ {
+ private const String base64Chars =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+ // 0123456789111111111122222222223333333333444444444455555555556666
+ // 012345678901234567890123456789012345678901234567890123
+
+ // These are the characters that can be directly encoded in UTF7.
+ private const String directChars =
+ "\t\n\r '(),-./0123456789:?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+
+ // These are the characters that can be optionally directly encoded in UTF7.
+ private const String optionalChars =
+ "!\"#$%&*;<=>@[]^_`{|}";
+
+ // Used by Encoding.UTF7 for lazy initialization
+ // The initialization code will not be run until a static member of the class is referenced
+ internal static readonly UTF7Encoding s_default = new UTF7Encoding();
+
+ // The set of base 64 characters.
+ private byte[] base64Bytes;
+ // The decoded bits for every base64 values. This array has a size of 128 elements.
+ // The index is the code point value of the base 64 characters. The value is -1 if
+ // the code point is not a valid base 64 character. Otherwise, the value is a value
+ // from 0 ~ 63.
+ private sbyte[] base64Values;
+ // The array to decide if a Unicode code point below 0x80 can be directly encoded in UTF7.
+ // This array has a size of 128.
+ private bool[] directEncode;
+
+ [OptionalField(VersionAdded = 2)]
+ private bool m_allowOptionals;
+
+ private const int UTF7_CODEPAGE = 65000;
+
+
+ public UTF7Encoding()
+ : this(false)
+ {
+ }
+
+ public UTF7Encoding(bool allowOptionals)
+ : base(UTF7_CODEPAGE) //Set the data item.
+ {
+ // Allowing optionals?
+ m_allowOptionals = allowOptionals;
+
+ // Make our tables
+ MakeTables();
+ }
+
+ private void MakeTables()
+ {
+ // Build our tables
+ base64Bytes = new byte[64];
+ for (int i = 0; i < 64; i++) base64Bytes[i] = (byte)base64Chars[i];
+ base64Values = new sbyte[128];
+ for (int i = 0; i < 128; i++) base64Values[i] = -1;
+ for (int i = 0; i < 64; i++) base64Values[base64Bytes[i]] = (sbyte)i;
+ directEncode = new bool[128];
+ int count = directChars.Length;
+ for (int i = 0; i < count; i++)
+ {
+ directEncode[directChars[i]] = true;
+ }
+
+ if (m_allowOptionals)
+ {
+ count = optionalChars.Length;
+ for (int i = 0; i < count; i++)
+ {
+ directEncode[optionalChars[i]] = true;
+ }
+ }
+ }
+
+ // We go ahead and set this because Encoding expects it, however nothing can fall back in UTF7.
+ internal override void SetDefaultFallbacks()
+ {
+ // UTF7 had an odd decoderFallback behavior, and the Encoder fallback
+ // is irrelevant because we encode surrogates individually and never check for unmatched ones
+ // (so nothing can fallback during encoding)
+ this.encoderFallback = new EncoderReplacementFallback(String.Empty);
+ this.decoderFallback = new DecoderUTF7Fallback();
+ }
+
+
+ [OnDeserializing]
+ private void OnDeserializing(StreamingContext ctx)
+ {
+ // make sure the optional fields initialized correctly.
+ base.OnDeserializing();
+ }
+
+ [OnDeserialized]
+ private void OnDeserialized(StreamingContext ctx)
+ {
+ base.OnDeserialized();
+
+ if (m_deserializedFromEverett)
+ {
+ // If 1st optional char is encoded we're allowing optionals
+ m_allowOptionals = directEncode[optionalChars[0]];
+ }
+
+ MakeTables();
+ }
+
+
+
+ public override bool Equals(Object value)
+ {
+ UTF7Encoding that = value as UTF7Encoding;
+ if (that != null)
+ {
+ return (m_allowOptionals == that.m_allowOptionals) &&
+ (EncoderFallback.Equals(that.EncoderFallback)) &&
+ (DecoderFallback.Equals(that.DecoderFallback));
+ }
+ return (false);
+ }
+
+ // Compared to all the other encodings, variations of UTF7 are unlikely
+
+ public override int GetHashCode()
+ {
+ return this.CodePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode();
+ }
+
+ // The following methods are copied from EncodingNLS.cs.
+ // Unfortunately EncodingNLS.cs is internal and we're public, so we have to reimpliment them here.
+ // These should be kept in sync for the following classes:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ // Returns the number of bytes required to encode a range of characters in
+ // a character array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetByteCount(char[] chars, int index, int count)
+ {
+ // Validate input parameters
+ if (chars == null)
+ throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
+
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (chars.Length - index < count)
+ throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ // If no input, return 0, avoid fixed empty array problem
+ if (count == 0)
+ return 0;
+
+ // Just call the pointer version
+ fixed (char* pChars = chars)
+ return GetByteCount(pChars + index, count, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetByteCount(string s)
+ {
+ // Validate input
+ if (s==null)
+ throw new ArgumentNullException("s");
+ Contract.EndContractBlock();
+
+ fixed (char* pChars = s)
+ return GetByteCount(pChars, s.Length, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public override unsafe int GetByteCount(char* chars, int count)
+ {
+ // Validate Parameters
+ if (chars == null)
+ throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
+
+ if (count < 0)
+ throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Call it with empty encoder
+ return GetByteCount(chars, count, null);
+ }
+
+ // Parent method is safe.
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ public override unsafe int GetBytes(string s, int charIndex, int charCount,
+ byte[] bytes, int byteIndex)
+ {
+ if (s == null || bytes == null)
+ throw new ArgumentNullException((s == null ? "s" : "bytes"), SR.ArgumentNull_Array);
+
+ if (charIndex < 0 || charCount < 0)
+ throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (s.Length - charIndex < charCount)
+ throw new ArgumentOutOfRangeException("s", SR.ArgumentOutOfRange_IndexCount);
+
+ if (byteIndex < 0 || byteIndex > bytes.Length)
+ throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
+ Contract.EndContractBlock();
+
+ int byteCount = bytes.Length - byteIndex;
+
+ // Fixed doesn't like empty arrays
+ if (bytes.Length == 0)
+ bytes = new byte[1];
+
+ fixed (char* pChars = s) fixed (byte* pBytes = &bytes[0])
+ return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
+ }
+
+ // Encodes a range of characters in a character array into a range of bytes
+ // in a byte array. An exception occurs if the byte array is not large
+ // enough to hold the complete encoding of the characters. The
+ // GetByteCount method can be used to determine the exact number of
+ // bytes that will be produced for a given range of characters.
+ // Alternatively, the GetMaxByteCount method can be used to
+ // determine the maximum number of bytes that will be produced for a given
+ // number of characters, regardless of the actual character values.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
+ byte[] bytes, int byteIndex)
+ {
+ // Validate parameters
+ if (chars == null || bytes == null)
+ throw new ArgumentNullException((chars == null ? "chars" : "bytes"), SR.ArgumentNull_Array);
+
+ if (charIndex < 0 || charCount < 0)
+ throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (chars.Length - charIndex < charCount)
+ throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
+
+ if (byteIndex < 0 || byteIndex > bytes.Length)
+ throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
+ Contract.EndContractBlock();
+
+ // If nothing to encode return 0, avoid fixed problem
+ if (charCount == 0)
+ return 0;
+
+ // Just call pointer version
+ int byteCount = bytes.Length - byteIndex;
+
+ // Fixed doesn't like empty arrays
+ if (bytes.Length == 0)
+ bytes = new byte[1];
+
+ fixed (char* pChars = chars) fixed (byte* pBytes = &bytes[0])
+ // Remember that byteCount is # to decode, not size of array.
+ return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
+ {
+ // Validate Parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
+
+ if (charCount < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return GetBytes(chars, charCount, bytes, byteCount, null);
+ }
+
+ // Returns the number of characters produced by decoding a range of bytes
+ // in a byte array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetCharCount(byte[] bytes, int index, int count)
+ {
+ // Validate Parameters
+ if (bytes == null)
+ throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
+
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (bytes.Length - index < count)
+ throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ // If no input just return 0, fixed doesn't like 0 length arrays.
+ if (count == 0)
+ return 0;
+
+ // Just call pointer version
+ fixed (byte* pBytes = bytes)
+ return GetCharCount(pBytes + index, count, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public override unsafe int GetCharCount(byte* bytes, int count)
+ {
+ // Validate Parameters
+ if (bytes == null)
+ throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
+
+ if (count < 0)
+ throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return GetCharCount(bytes, count, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
+ char[] chars, int charIndex)
+ {
+ // Validate Parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
+
+ if (byteIndex < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((byteIndex < 0 ? "byteIndex" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if ( bytes.Length - byteIndex < byteCount)
+ throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
+
+ if (charIndex < 0 || charIndex > chars.Length)
+ throw new ArgumentOutOfRangeException("charIndex", SR.ArgumentOutOfRange_Index);
+ Contract.EndContractBlock();
+
+ // If no input, return 0 & avoid fixed problem
+ if (byteCount == 0)
+ return 0;
+
+ // Just call pointer version
+ int charCount = chars.Length - charIndex;
+
+ // Fixed doesn't like empty arrays
+ if (chars.Length == 0)
+ chars = new char[1];
+
+ fixed (byte* pBytes = bytes) fixed (char* pChars = &chars[0])
+ // Remember that charCount is # to decode, not size of array
+ return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
+ {
+ // Validate Parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
+
+ if (charCount < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return GetChars(bytes, byteCount, chars, charCount, null);
+ }
+
+ // Returns a string containing the decoded representation of a range of
+ // bytes in a byte array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe String GetString(byte[] bytes, int index, int count)
+ {
+ // Validate Parameters
+ if (bytes == null)
+ throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
+
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (bytes.Length - index < count)
+ throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ // Avoid problems with empty input buffer
+ if (count == 0) return String.Empty;
+
+ fixed (byte* pBytes = bytes)
+ return String.CreateStringFromEncoding(
+ pBytes + index, count, this);
+ }
+
+ //
+ // End of standard methods copied from EncodingNLS.cs
+ //
+
+ internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS baseEncoder)
+ {
+ Debug.Assert(chars != null, "[UTF7Encoding.GetByteCount]chars!=null");
+ Debug.Assert(count >= 0, "[UTF7Encoding.GetByteCount]count >=0");
+
+ // Just call GetBytes with bytes == null
+ return GetBytes(chars, count, null, 0, baseEncoder);
+ }
+
+ internal override unsafe int GetBytes(char* chars, int charCount,
+ byte* bytes, int byteCount, EncoderNLS baseEncoder)
+ {
+ Debug.Assert(byteCount >= 0, "[UTF7Encoding.GetBytes]byteCount >=0");
+ Debug.Assert(chars != null, "[UTF7Encoding.GetBytes]chars!=null");
+ Debug.Assert(charCount >= 0, "[UTF7Encoding.GetBytes]charCount >=0");
+
+ // Get encoder info
+ UTF7Encoding.Encoder encoder = (UTF7Encoding.Encoder)baseEncoder;
+
+ // Default bits & count
+ int bits = 0;
+ int bitCount = -1;
+
+ // prepare our helpers
+ Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer(
+ this, encoder, bytes, byteCount, chars, charCount);
+
+ if (encoder != null)
+ {
+ bits = encoder.bits;
+ bitCount = encoder.bitCount;
+
+ // May have had too many left over
+ while (bitCount >= 6)
+ {
+ bitCount -= 6;
+ // If we fail we'll never really have enough room
+ if (!buffer.AddByte(base64Bytes[(bits >> bitCount) & 0x3F]))
+ ThrowBytesOverflow(encoder, buffer.Count == 0);
+ }
+ }
+
+ while (buffer.MoreData)
+ {
+ char currentChar = buffer.GetNextChar();
+
+ if (currentChar < 0x80 && directEncode[currentChar])
+ {
+ if (bitCount >= 0)
+ {
+ if (bitCount > 0)
+ {
+ // Try to add the next byte
+ if (!buffer.AddByte(base64Bytes[bits << 6 - bitCount & 0x3F]))
+ break; // Stop here, didn't throw
+
+ bitCount = 0;
+ }
+
+ // Need to get emit '-' and our char, 2 bytes total
+ if (!buffer.AddByte((byte)'-'))
+ break; // Stop here, didn't throw
+
+ bitCount = -1;
+ }
+
+ // Need to emit our char
+ if (!buffer.AddByte((byte)currentChar))
+ break; // Stop here, didn't throw
+ }
+ else if (bitCount < 0 && currentChar == '+')
+ {
+ if (!buffer.AddByte((byte)'+', (byte)'-'))
+ break; // Stop here, didn't throw
+ }
+ else
+ {
+ if (bitCount < 0)
+ {
+ // Need to emit a + and 12 bits (3 bytes)
+ // Only 12 of the 16 bits will be emitted this time, the other 4 wait 'til next time
+ if (!buffer.AddByte((byte)'+'))
+ break; // Stop here, didn't throw
+
+ // We're now in bit mode, but haven't stored data yet
+ bitCount = 0;
+ }
+
+ // Add our bits
+ bits = bits << 16 | currentChar;
+ bitCount += 16;
+
+ while (bitCount >= 6)
+ {
+ bitCount -= 6;
+ if (!buffer.AddByte(base64Bytes[(bits >> bitCount) & 0x3F]))
+ {
+ bitCount += 6; // We didn't use these bits
+ currentChar = buffer.GetNextChar(); // We're processing this char still, but AddByte
+ // --'d it when we ran out of space
+ break; // Stop here, not enough room for bytes
+ }
+ }
+
+ if (bitCount >= 6)
+ break; // Didn't have room to encode enough bits
+ }
+ }
+
+ // Now if we have bits left over we have to encode them.
+ // MustFlush may have been cleared by encoding.ThrowBytesOverflow earlier if converting
+ if (bitCount >= 0 && (encoder == null || encoder.MustFlush))
+ {
+ // Do we have bits we have to stick in?
+ if (bitCount > 0)
+ {
+ if (buffer.AddByte(base64Bytes[(bits << (6 - bitCount)) & 0x3F]))
+ {
+ // Emitted spare bits, 0 bits left
+ bitCount = 0;
+ }
+ }
+
+ // If converting and failed bitCount above, then we'll fail this too
+ if (buffer.AddByte((byte)'-'))
+ {
+ // turned off bit mode';
+ bits = 0;
+ bitCount = -1;
+ }
+ else
+ // If not successful, convert will maintain state for next time, also
+ // AddByte will have decremented our char count, however we need it to remain the same
+ buffer.GetNextChar();
+ }
+
+ // Do we have an encoder we're allowed to use?
+ // bytes == null if counting, so don't use encoder then
+ if (bytes != null && encoder != null)
+ {
+ // We already cleared bits & bitcount for mustflush case
+ encoder.bits = bits;
+ encoder.bitCount = bitCount;
+ encoder.m_charsUsed = buffer.CharsUsed;
+ }
+
+ return buffer.Count;
+ }
+
+ internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
+ {
+ Debug.Assert(count >= 0, "[UTF7Encoding.GetCharCount]count >=0");
+ Debug.Assert(bytes != null, "[UTF7Encoding.GetCharCount]bytes!=null");
+
+ // Just call GetChars with null char* to do counting
+ return GetChars(bytes, count, null, 0, baseDecoder);
+ }
+
+ internal override unsafe int GetChars(byte* bytes, int byteCount,
+ char* chars, int charCount, DecoderNLS baseDecoder)
+ {
+ Debug.Assert(byteCount >= 0, "[UTF7Encoding.GetChars]byteCount >=0");
+ Debug.Assert(bytes != null, "[UTF7Encoding.GetChars]bytes!=null");
+ Debug.Assert(charCount >= 0, "[UTF7Encoding.GetChars]charCount >=0");
+
+ // Might use a decoder
+ UTF7Encoding.Decoder decoder = (UTF7Encoding.Decoder)baseDecoder;
+
+ // Get our output buffer info.
+ Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
+ this, decoder, chars, charCount, bytes, byteCount);
+
+ // Get decoder info
+ int bits = 0;
+ int bitCount = -1;
+ bool firstByte = false;
+ if (decoder != null)
+ {
+ bits = decoder.bits;
+ bitCount = decoder.bitCount;
+ firstByte = decoder.firstByte;
+
+ Debug.Assert(firstByte == false || decoder.bitCount <= 0,
+ "[UTF7Encoding.GetChars]If remembered bits, then first byte flag shouldn't be set");
+ }
+
+ // We may have had bits in the decoder that we couldn't output last time, so do so now
+ if (bitCount >= 16)
+ {
+ // Check our decoder buffer
+ if (!buffer.AddChar((char)((bits >> (bitCount - 16)) & 0xFFFF)))
+ ThrowCharsOverflow(decoder, true); // Always throw, they need at least 1 char even in Convert
+
+ // Used this one, clean up extra bits
+ bitCount -= 16;
+ }
+
+ // Loop through the input
+ while (buffer.MoreData)
+ {
+ byte currentByte = buffer.GetNextByte();
+ int c;
+
+ if (bitCount >= 0)
+ {
+ //
+ // Modified base 64 encoding.
+ //
+ sbyte v;
+ if (currentByte < 0x80 && ((v = base64Values[currentByte]) >= 0))
+ {
+ firstByte = false;
+ bits = (bits << 6) | ((byte)v);
+ bitCount += 6;
+ if (bitCount >= 16)
+ {
+ c = (bits >> (bitCount - 16)) & 0xFFFF;
+ bitCount -= 16;
+ }
+ // If not enough bits just continue
+ else continue;
+ }
+ else
+ {
+ // If it wasn't a base 64 byte, everything's going to turn off base 64 mode
+ bitCount = -1;
+
+ if (currentByte != '-')
+ {
+ // >= 0x80 (because of 1st if statemtn)
+ // We need this check since the base64Values[b] check below need b <= 0x7f.
+ // This is not a valid base 64 byte. Terminate the shifted-sequence and
+ // emit this byte.
+
+ // not in base 64 table
+ // According to the RFC 1642 and the example code of UTF-7
+ // in Unicode 2.0, we should just zero-extend the invalid UTF7 byte
+
+ // Chars won't be updated unless this works, try to fallback
+ if (!buffer.Fallback(currentByte))
+ break; // Stop here, didn't throw
+
+ // Used that byte, we're done with it
+ continue;
+ }
+
+ //
+ // The encoding for '+' is "+-".
+ //
+ if (firstByte) c = '+';
+ // We just turn it off if not emitting a +, so we're done.
+ else continue;
+ }
+ //
+ // End of modified base 64 encoding block.
+ //
+ }
+ else if (currentByte == '+')
+ {
+ //
+ // Found the start of a modified base 64 encoding block or a plus sign.
+ //
+ bitCount = 0;
+ firstByte = true;
+ continue;
+ }
+ else
+ {
+ // Normal character
+ if (currentByte >= 0x80)
+ {
+ // Try to fallback
+ if (!buffer.Fallback(currentByte))
+ break; // Stop here, didn't throw
+
+ // Done falling back
+ continue;
+ }
+
+ // Use the normal character
+ c = currentByte;
+ }
+
+ if (c >= 0)
+ {
+ // Check our buffer
+ if (!buffer.AddChar((char)c))
+ {
+ // No room. If it was a plain char we'll try again later.
+ // Note, we'll consume this byte and stick it in decoder, even if we can't output it
+ if (bitCount >= 0) // Can we rememmber this byte (char)
+ {
+ buffer.AdjustBytes(+1); // Need to readd the byte that AddChar subtracted when it failed
+ bitCount += 16; // We'll still need that char we have in our bits
+ }
+ break; // didn't throw, stop
+ }
+ }
+ }
+
+ // Stick stuff in the decoder if we can (chars == null if counting, so don't store decoder)
+ if (chars != null && decoder != null)
+ {
+ // MustFlush? (Could've been cleared by ThrowCharsOverflow if Convert & didn't reach end of buffer)
+ if (decoder.MustFlush)
+ {
+ // RFC doesn't specify what would happen if we have non-0 leftover bits, we just drop them
+ decoder.bits = 0;
+ decoder.bitCount = -1;
+ decoder.firstByte = false;
+ }
+ else
+ {
+ decoder.bits = bits;
+ decoder.bitCount = bitCount;
+ decoder.firstByte = firstByte;
+ }
+ decoder.m_bytesUsed = buffer.BytesUsed;
+ }
+ // else ignore any hanging bits.
+
+ // Return our count
+ return buffer.Count;
+ }
+
+
+ public override System.Text.Decoder GetDecoder()
+ {
+ return new UTF7Encoding.Decoder(this);
+ }
+
+
+ public override System.Text.Encoder GetEncoder()
+ {
+ return new UTF7Encoding.Encoder(this);
+ }
+
+
+ public override int GetMaxByteCount(int charCount)
+ {
+ if (charCount < 0)
+ throw new ArgumentOutOfRangeException(nameof(charCount),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Suppose that every char can not be direct-encoded, we know that
+ // a byte can encode 6 bits of the Unicode character. And we will
+ // also need two extra bytes for the shift-in ('+') and shift-out ('-') mark.
+ // Therefore, the max byte should be:
+ // byteCount = 2 + Math.Ceiling((double)charCount * 16 / 6);
+ // That is always <= 2 + 3 * charCount;
+ // Longest case is alternating encoded, direct, encoded data for 5 + 1 + 5... bytes per char.
+ // UTF7 doesn't have left over surrogates, but if no input we may need an output - to turn off
+ // encoding if MustFlush is true.
+
+ // Its easiest to think of this as 2 bytes to turn on/off the base64 mode, then 3 bytes per char.
+ // 3 bytes is 18 bits of encoding, which is more than we need, but if its direct encoded then 3
+ // bytes allows us to turn off and then back on base64 mode if necessary.
+
+ // Note that UTF7 encoded surrogates individually and isn't worried about mismatches, so all
+ // code points are encodable int UTF7.
+ long byteCount = (long)charCount * 3 + 2;
+
+ // check for overflow
+ if (byteCount > 0x7fffffff)
+ throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
+
+ return (int)byteCount;
+ }
+
+
+ public override int GetMaxCharCount(int byteCount)
+ {
+ if (byteCount < 0)
+ throw new ArgumentOutOfRangeException(nameof(byteCount),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Worst case is 1 char per byte. Minimum 1 for left over bits in case decoder is being flushed
+ // Also note that we ignore extra bits (per spec), so UTF7 doesn't have unknown in this direction.
+ int charCount = byteCount;
+ if (charCount == 0) charCount = 1;
+
+ return charCount;
+ }
+
+ // Of all the amazing things... This MUST be Decoder so that our com name
+ // for System.Text.Decoder doesn't change
+ private sealed class Decoder : DecoderNLS, ISerializable
+ {
+ /*private*/
+ internal int bits;
+ /*private*/
+ internal int bitCount;
+ /*private*/
+ internal bool firstByte;
+
+ public Decoder(UTF7Encoding encoding) : base(encoding)
+ {
+ // base calls reset
+ }
+
+ // ISerializable implementation, get data for this object
+ void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
+ {
+ throw new PlatformNotSupportedException();
+ }
+
+ public override void Reset()
+ {
+ this.bits = 0;
+ this.bitCount = -1;
+ this.firstByte = false;
+ if (m_fallbackBuffer != null)
+ m_fallbackBuffer.Reset();
+ }
+
+ // Anything left in our encoder?
+ internal override bool HasState
+ {
+ get
+ {
+ // NOTE: This forces the last -, which some encoder might not encode. If we
+ // don't see it we don't think we're done reading.
+ return (this.bitCount != -1);
+ }
+ }
+ }
+
+ // Of all the amazing things... This MUST be Encoder so that our com name
+ // for System.Text.Encoder doesn't change
+ private sealed class Encoder : EncoderNLS, ISerializable
+ {
+ /*private*/
+ internal int bits;
+ /*private*/
+ internal int bitCount;
+
+ public Encoder(UTF7Encoding encoding) : base(encoding)
+ {
+ // base calls reset
+ }
+
+ // ISerializable implementation
+ void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
+ {
+ throw new PlatformNotSupportedException();
+ }
+
+ public override void Reset()
+ {
+ this.bitCount = -1;
+ this.bits = 0;
+ if (m_fallbackBuffer != null)
+ m_fallbackBuffer.Reset();
+ }
+
+ // Anything left in our encoder?
+ internal override bool HasState
+ {
+ get
+ {
+ return (this.bits != 0 || this.bitCount != -1);
+ }
+ }
+ }
+
+ // Preexisting UTF7 behavior for bad bytes was just to spit out the byte as the next char
+ // and turn off base64 mode if it was in that mode. We still exit the mode, but now we fallback.
+ private sealed class DecoderUTF7Fallback : DecoderFallback
+ {
+ // Construction. Default replacement fallback uses no best fit and ? replacement string
+ public DecoderUTF7Fallback()
+ {
+ }
+
+ public override DecoderFallbackBuffer CreateFallbackBuffer()
+ {
+ return new DecoderUTF7FallbackBuffer(this);
+ }
+
+ // Maximum number of characters that this instance of this fallback could return
+ public override int MaxCharCount
+ {
+ get
+ {
+ // returns 1 char per bad byte
+ return 1;
+ }
+ }
+
+ public override bool Equals(Object value)
+ {
+ DecoderUTF7Fallback that = value as DecoderUTF7Fallback;
+ if (that != null)
+ {
+ return true;
+ }
+ return (false);
+ }
+
+ public override int GetHashCode()
+ {
+ return 984;
+ }
+ }
+
+ private sealed class DecoderUTF7FallbackBuffer : DecoderFallbackBuffer
+ {
+ // Store our default string
+ private char cFallback = (char)0;
+ private int iCount = -1;
+ private int iSize;
+
+ // Construction
+ public DecoderUTF7FallbackBuffer(DecoderUTF7Fallback fallback)
+ {
+ }
+
+ // Fallback Methods
+ public override bool Fallback(byte[] bytesUnknown, int index)
+ {
+ // We expect no previous fallback in our buffer
+ Debug.Assert(iCount < 0, "[DecoderUTF7FallbackBuffer.Fallback] Can't have recursive fallbacks");
+ Debug.Assert(bytesUnknown.Length == 1, "[DecoderUTF7FallbackBuffer.Fallback] Only possible fallback case should be 1 unknown byte");
+
+ // Go ahead and get our fallback
+ cFallback = (char)bytesUnknown[0];
+
+ // Any of the fallback characters can be handled except for 0
+ if (cFallback == 0)
+ {
+ return false;
+ }
+
+ iCount = iSize = 1;
+
+ return true;
+ }
+
+ public override char GetNextChar()
+ {
+ if (iCount-- > 0)
+ return cFallback;
+
+ // Note: this means that 0 in UTF7 stream will never be emitted.
+ return (char)0;
+ }
+
+ public override bool MovePrevious()
+ {
+ if (iCount >= 0)
+ {
+ iCount++;
+ }
+
+ // return true if we were allowed to do this
+ return (iCount >= 0 && iCount <= iSize);
+ }
+
+ // Return # of chars left in this fallback
+ public override int Remaining
+ {
+ get
+ {
+ return (iCount > 0) ? iCount : 0;
+ }
+ }
+
+ // Clear the buffer
+ public override unsafe void Reset()
+ {
+ iCount = -1;
+ byteStart = null;
+ }
+
+ // This version just counts the fallback and doesn't actually copy anything.
+ internal unsafe override int InternalFallback(byte[] bytes, byte* pBytes)
+ // Right now this has both bytes and bytes[], since we might have extra bytes, hence the
+ // array, and we might need the index, hence the byte*
+ {
+ // We expect no previous fallback in our buffer
+ Debug.Assert(iCount < 0, "[DecoderUTF7FallbackBuffer.InternalFallback] Can't have recursive fallbacks");
+ if (bytes.Length != 1)
+ {
+ throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
+ }
+
+ // Can't fallback a byte 0, so return for that case, 1 otherwise.
+ return bytes[0] == 0 ? 0 : 1;
+ }
+ }
+ }
+}