1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 using System.Diagnostics;
6 using System.Globalization;
7 using System.Diagnostics.Contracts;
8 using System.Threading;
9 using System.Runtime.Serialization;
10 using System.Diagnostics.CodeAnalysis;
14 // This abstract base class represents a character encoding. The class provides
15 // methods to convert arrays and strings of Unicode characters to and from
16 // arrays of bytes. A number of Encoding implementations are provided in
17 // the System.Text package, including:
19 // ASCIIEncoding, which encodes Unicode characters as single 7-bit
20 // ASCII characters. This encoding only supports character values between 0x00
22 // BaseCodePageEncoding, which encapsulates a Windows code page. Any
23 // installed code page can be accessed through this encoding, and conversions
24 // are performed using the WideCharToMultiByte and
25 // MultiByteToWideChar Windows API functions.
26 // UnicodeEncoding, which encodes each Unicode character as two
27 // consecutive bytes. Both little-endian (code page 1200) and big-endian (code
28 // page 1201) encodings are recognized.
29 // UTF7Encoding, which encodes Unicode characters using the UTF-7
30 // encoding (UTF-7 stands for UCS Transformation Format, 7-bit form). This
31 // encoding supports all Unicode character values, and can also be accessed
32 // as code page 65000.
33 // UTF8Encoding, which encodes Unicode characters using the UTF-8
34 // encoding (UTF-8 stands for UCS Transformation Format, 8-bit form). This
35 // encoding supports all Unicode character values, and can also be accessed
36 // as code page 65001.
37 // UTF32Encoding, both 12000 (little endian) & 12001 (big endian)
39 // In addition to directly instantiating Encoding objects, an
40 // application can use the ForCodePage, GetASCII,
41 // GetDefault, GetUnicode, GetUTF7, and GetUTF8
42 // methods in this class to obtain encodings.
44 // Through an encoding, the GetBytes method is used to convert arrays
45 // of characters to arrays of bytes, and the GetChars method is used to
46 // convert arrays of bytes to arrays of characters. The GetBytes and
47 // GetChars methods maintain no state between conversions, and are
48 // generally intended for conversions of complete blocks of bytes and
49 // characters in one operation. When the data to be converted is only available
50 // in sequential blocks (such as data read from a stream) or when the amount of
51 // data is so large that it needs to be divided into smaller blocks, an
52 // application may choose to use a Decoder or an Encoder to
53 // perform the conversion. Decoders and encoders allow sequential blocks of
54 // data to be converted and they maintain the state required to support
55 // conversions of data that spans adjacent blocks. Decoders and encoders are
56 // obtained using the GetDecoder and GetEncoder methods.
58 // The core GetBytes and GetChars methods require the caller
59 // to provide the destination buffer and ensure that the buffer is large enough
60 // to hold the entire result of the conversion. When using these methods,
61 // either directly on an Encoding object or on an associated
62 // Decoder or Encoder, an application can use one of two methods
63 // to allocate destination buffers.
65 // The GetByteCount and GetCharCount methods can be used to
66 // compute the exact size of the result of a particular conversion, and an
67 // appropriately sized buffer for that conversion can then be allocated.
68 // The GetMaxByteCount and GetMaxCharCount methods can be
69 // be used to compute the maximum possible size of a conversion of a given
70 // number of bytes or characters, and a buffer of that size can then be reused
71 // for multiple conversions.
73 // The first method generally uses less memory, whereas the second method
74 // generally executes faster.
77 public abstract class Encoding : ICloneable
79 // For netcore we use UTF8 as default encoding since ANSI isn't available
80 private static readonly UTF8Encoding.UTF8EncodingSealed s_defaultEncoding = new UTF8Encoding.UTF8EncodingSealed(encoderShouldEmitUTF8Identifier: false);
82 // Returns an encoding for the system's current ANSI code page.
83 public static Encoding Default => s_defaultEncoding;
86 // The following values are from mlang.idl. These values
87 // should be in sync with those in mlang.idl.
89 internal const int MIMECONTF_MAILNEWS = 0x00000001;
90 internal const int MIMECONTF_BROWSER = 0x00000002;
91 internal const int MIMECONTF_SAVABLE_MAILNEWS = 0x00000100;
92 internal const int MIMECONTF_SAVABLE_BROWSER = 0x00000200;
94 // Special Case Code Pages
95 private const int CodePageDefault = 0;
96 private const int CodePageNoOEM = 1; // OEM Code page not supported
97 private const int CodePageNoMac = 2; // MAC code page not supported
98 private const int CodePageNoThread = 3; // Thread code page not supported
99 private const int CodePageNoSymbol = 42; // Symbol code page not supported
100 private const int CodePageUnicode = 1200; // Unicode
101 private const int CodePageBigEndian = 1201; // Big Endian Unicode
102 private const int CodePageWindows1252 = 1252; // Windows 1252 code page
104 // 20936 has same code page as 10008, so we'll special case it
105 private const int CodePageMacGB2312 = 10008;
106 private const int CodePageGB2312 = 20936;
107 private const int CodePageMacKorean = 10003;
108 private const int CodePageDLLKorean = 20949;
110 // ISO 2022 Code Pages
111 private const int ISO2022JP = 50220;
112 private const int ISO2022JPESC = 50221;
113 private const int ISO2022JPSISO = 50222;
114 private const int ISOKorean = 50225;
115 private const int ISOSimplifiedCN = 50227;
116 private const int EUCJP = 51932;
117 private const int ChineseHZ = 52936; // HZ has ~}~{~~ sequences
119 // 51936 is the same as 936
120 private const int DuplicateEUCCN = 51936;
121 private const int EUCCN = 936;
123 private const int EUCKR = 51949;
125 // Latin 1 & ASCII Code Pages
126 internal const int CodePageASCII = 20127; // ASCII
127 internal const int ISO_8859_1 = 28591; // Latin1
130 private const int ISCIIAssemese = 57006;
131 private const int ISCIIBengali = 57003;
132 private const int ISCIIDevanagari = 57002;
133 private const int ISCIIGujarathi = 57010;
134 private const int ISCIIKannada = 57008;
135 private const int ISCIIMalayalam = 57009;
136 private const int ISCIIOriya = 57007;
137 private const int ISCIIPanjabi = 57011;
138 private const int ISCIITamil = 57004;
139 private const int ISCIITelugu = 57005;
142 private const int GB18030 = 54936;
145 private const int ISO_8859_8I = 38598;
146 private const int ISO_8859_8_Visual = 28598;
148 // 50229 is currently unsupported // "Chinese Traditional (ISO-2022)"
149 private const int ENC50229 = 50229;
151 // Special code pages
152 private const int CodePageUTF7 = 65000;
153 private const int CodePageUTF8 = 65001;
154 private const int CodePageUTF32 = 12000;
155 private const int CodePageUTF32BE = 12001;
157 internal int m_codePage = 0;
159 // dataItem should be internal (not private). otherwise it will break during the deserialization
160 // of the data came from Everett
161 internal CodePageDataItem dataItem = null;
164 internal bool m_deserializedFromEverett = false;
166 // Because of encoders we may be read only
167 [OptionalField(VersionAdded = 2)]
168 private bool m_isReadOnly = true;
170 // Encoding (encoder) fallback
171 [OptionalField(VersionAdded = 2)]
172 internal EncoderFallback encoderFallback = null;
173 [OptionalField(VersionAdded = 2)]
174 internal DecoderFallback decoderFallback = null;
176 protected Encoding() : this(0)
181 protected Encoding(int codePage)
183 // Validate code page
186 throw new ArgumentOutOfRangeException(nameof(codePage));
188 Contract.EndContractBlock();
190 // Remember code page
191 m_codePage = codePage;
193 // Use default encoder/decoder fallbacks
194 this.SetDefaultFallbacks();
197 // This constructor is needed to allow any sub-classing implementation to provide encoder/decoder fallback objects
198 // because the encoding object is always created as read-only object and don't allow setting encoder/decoder fallback
199 // after the creation is done.
200 protected Encoding(int codePage, EncoderFallback encoderFallback, DecoderFallback decoderFallback)
202 // Validate code page
205 throw new ArgumentOutOfRangeException(nameof(codePage));
207 Contract.EndContractBlock();
209 // Remember code page
210 m_codePage = codePage;
212 this.encoderFallback = encoderFallback ?? new InternalEncoderBestFitFallback(this);
213 this.decoderFallback = decoderFallback ?? new InternalDecoderBestFitFallback(this);
216 // Default fallback that we'll use.
217 internal virtual void SetDefaultFallbacks()
219 // For UTF-X encodings, we use a replacement fallback with an "\xFFFD" string,
220 // For ASCII we use "?" replacement fallback, etc.
221 this.encoderFallback = new InternalEncoderBestFitFallback(this);
222 this.decoderFallback = new InternalDecoderBestFitFallback(this);
226 #region Serialization
227 internal void OnDeserializing()
229 // intialize the optional Whidbey fields
230 encoderFallback = null;
231 decoderFallback = null;
235 internal void OnDeserialized()
237 if (encoderFallback == null || decoderFallback == null)
239 m_deserializedFromEverett = true;
240 SetDefaultFallbacks();
243 // dataItem is always recalculated from the code page #
248 private void OnDeserializing(StreamingContext ctx)
255 private void OnDeserialized(StreamingContext ctx)
261 private void OnSerializing(StreamingContext ctx)
263 // to be consistent with SerializeEncoding
267 // the following two methods are used for the inherited classes which implemented ISerializable
268 // Deserialization Helper
269 internal void DeserializeEncoding(SerializationInfo info, StreamingContext context)
272 if (info == null) throw new ArgumentNullException(nameof(info));
273 Contract.EndContractBlock();
275 // All versions have a code page
276 this.m_codePage = (int)info.GetValue("m_codePage", typeof(int));
278 // We can get dataItem on the fly if needed, and the index is different between versions
279 // so ignore whatever dataItem data we get from Everett.
280 this.dataItem = null;
282 // See if we have a code page
286 // Try Whidbey V2.0 Fields
289 m_isReadOnly = (bool)info.GetValue("m_isReadOnly", typeof(bool));
291 this.encoderFallback = (EncoderFallback)info.GetValue("encoderFallback", typeof(EncoderFallback));
292 this.decoderFallback = (DecoderFallback)info.GetValue("decoderFallback", typeof(DecoderFallback));
294 catch (SerializationException)
297 // Didn't have Whidbey things, must be Everett
299 this.m_deserializedFromEverett = true;
301 // May as well be read only
303 SetDefaultFallbacks();
307 // Serialization Helper
308 internal void SerializeEncoding(SerializationInfo info, StreamingContext context)
311 if (info == null) throw new ArgumentNullException(nameof(info));
312 Contract.EndContractBlock();
314 // These are new V2.0 Whidbey stuff
315 info.AddValue("m_isReadOnly", m_isReadOnly);
316 info.AddValue("encoderFallback", this.EncoderFallback);
317 info.AddValue("decoderFallback", this.DecoderFallback);
319 // These were in Everett V1.1 as well
320 info.AddValue("m_codePage", this.m_codePage);
322 // This was unique to Everett V1.1
323 info.AddValue("dataItem", null);
325 // Everett duplicated these fields, so these are needed for portability
326 info.AddValue("Encoding+m_codePage", this.m_codePage);
327 info.AddValue("Encoding+dataItem", null);
330 #endregion Serialization
332 // Converts a byte array from one encoding to another. The bytes in the
333 // bytes array are converted from srcEncoding to
334 // dstEncoding, and the returned value is a new byte array
335 // containing the result of the conversion.
338 public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
342 throw new ArgumentNullException(nameof(bytes));
343 Contract.Ensures(Contract.Result<byte[]>() != null);
345 return Convert(srcEncoding, dstEncoding, bytes, 0, bytes.Length);
348 // Converts a range of bytes in a byte array from one encoding to another.
349 // This method converts count bytes from bytes starting at
350 // index index from srcEncoding to dstEncoding, and
351 // returns a new byte array containing the result of the conversion.
354 public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
355 byte[] bytes, int index, int count)
357 if (srcEncoding == null || dstEncoding == null)
359 throw new ArgumentNullException((srcEncoding == null ? nameof(srcEncoding) : nameof(dstEncoding)),
360 SR.ArgumentNull_Array);
364 throw new ArgumentNullException(nameof(bytes),
365 SR.ArgumentNull_Array);
367 Contract.Ensures(Contract.Result<byte[]>() != null);
369 return dstEncoding.GetBytes(srcEncoding.GetChars(bytes, index, count));
372 public static void RegisterProvider(EncodingProvider provider)
374 // Parameters validated inside EncodingProvider
375 EncodingProvider.AddProvider(provider);
379 public static Encoding GetEncoding(int codepage)
381 Encoding result = EncodingProvider.GetEncodingFromProvider(codepage);
386 // NOTE: If you add a new encoding that can be retrieved by codepage, be sure to
387 // add the corresponding item in EncodingTable.
388 // Otherwise, the code below will throw exception when trying to call
389 // EncodingTable.GetDataItem().
391 if (codepage < 0 || codepage > 65535)
393 throw new ArgumentOutOfRangeException(
394 nameof(codepage), SR.Format(SR.ArgumentOutOfRange_Range, 0, 65535));
397 Contract.EndContractBlock();
401 case CodePageDefault: return Default; // 0
402 case CodePageUnicode: return Unicode; // 1200
403 case CodePageBigEndian: return BigEndianUnicode; // 1201
404 case CodePageUTF32: return UTF32; // 12000
405 case CodePageUTF32BE: return BigEndianUTF32; // 12001
406 case CodePageUTF7: return UTF7; // 65000
407 case CodePageUTF8: return UTF8; // 65001
408 case CodePageASCII: return ASCII; // 20127
409 case ISO_8859_1: return Latin1; // 28591
411 // We don't allow the following special code page values that Win32 allows.
412 case CodePageNoOEM: // 1 CP_OEMCP
413 case CodePageNoMac: // 2 CP_MACCP
414 case CodePageNoThread: // 3 CP_THREAD_ACP
415 case CodePageNoSymbol: // 42 CP_SYMBOL
416 throw new ArgumentException(SR.Format(SR.Argument_CodepageNotSupported, codepage), nameof(codepage));
419 // Is it a valid code page?
420 if (EncodingTable.GetCodePageDataItem(codepage) == null)
422 throw new NotSupportedException(
423 SR.Format(SR.NotSupported_NoCodepageData, codepage));
430 public static Encoding GetEncoding(int codepage,
431 EncoderFallback encoderFallback, DecoderFallback decoderFallback)
433 Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(codepage, encoderFallback, decoderFallback);
435 if (baseEncoding != null)
438 // Get the default encoding (which is cached and read only)
439 baseEncoding = GetEncoding(codepage);
441 // Clone it and set the fallback
442 Encoding fallbackEncoding = (Encoding)baseEncoding.Clone();
443 fallbackEncoding.EncoderFallback = encoderFallback;
444 fallbackEncoding.DecoderFallback = decoderFallback;
446 return fallbackEncoding;
449 // Returns an Encoding object for a given name or a given code page value.
452 public static Encoding GetEncoding(String name)
454 Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(name);
455 if (baseEncoding != null)
459 // NOTE: If you add a new encoding that can be requested by name, be sure to
460 // add the corresponding item in EncodingTable.
461 // Otherwise, the code below will throw exception when trying to call
462 // EncodingTable.GetCodePageFromName().
464 return GetEncoding(EncodingTable.GetCodePageFromName(name));
467 // Returns an Encoding object for a given name or a given code page value.
470 public static Encoding GetEncoding(String name,
471 EncoderFallback encoderFallback, DecoderFallback decoderFallback)
473 Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(name, encoderFallback, decoderFallback);
474 if (baseEncoding != null)
478 // NOTE: If you add a new encoding that can be requested by name, be sure to
479 // add the corresponding item in EncodingTable.
480 // Otherwise, the code below will throw exception when trying to call
481 // EncodingTable.GetCodePageFromName().
483 return (GetEncoding(EncodingTable.GetCodePageFromName(name), encoderFallback, decoderFallback));
486 // Return a list of all EncodingInfo objects describing all of our encodings
488 public static EncodingInfo[] GetEncodings()
490 return EncodingTable.GetEncodings();
494 public virtual byte[] GetPreamble()
496 return Array.Empty<byte>();
499 private void GetDataItem()
501 if (dataItem == null)
503 dataItem = EncodingTable.GetCodePageDataItem(m_codePage);
504 if (dataItem == null)
506 throw new NotSupportedException(
507 SR.Format(SR.NotSupported_NoCodepageData, m_codePage));
512 // Returns the name for this encoding that can be used with mail agent body tags.
513 // If the encoding may not be used, the string is empty.
515 public virtual String BodyName
519 if (dataItem == null)
523 return (dataItem.BodyName);
527 // Returns the human-readable description of the encoding ( e.g. Hebrew (DOS)).
529 public virtual String EncodingName
533 string encodingName = GetLocalizedEncodingNameResource(this.CodePage);
534 if (encodingName == null)
536 throw new NotSupportedException(SR.Format(SR.MissingEncodingNameResource, this.CodePage));
539 if (encodingName.StartsWith("Globalization_cp_", StringComparison.Ordinal))
541 // On ProjectN, resource strings are stripped from retail builds and replaced by
542 // their identifier names. Since this property is meant to be a localized string,
543 // but we don't localize ProjectN, we specifically need to do something reasonable
544 // in this case. This currently returns the English name of the encoding from a
545 // static data table.
546 encodingName = EncodingTable.GetCodePageDataItem(this.CodePage).EnglishName;
547 if (encodingName == null)
549 throw new NotSupportedException(SR.Format(SR.MissingEncodingNameResource, this.WebName, this.CodePage));
556 private static string GetLocalizedEncodingNameResource(int codePage)
560 case 1200: return SR.Globalization_cp_1200;
561 case 1201: return SR.Globalization_cp_1201;
562 case 12000: return SR.Globalization_cp_12000;
563 case 12001: return SR.Globalization_cp_12001;
564 case 20127: return SR.Globalization_cp_20127;
565 case 28591: return SR.Globalization_cp_28591;
566 case 65000: return SR.Globalization_cp_65000;
567 case 65001: return SR.Globalization_cp_65001;
568 default: return null;
572 public virtual String EncodingName
576 return SR.GetResourceString("Globalization_cp_" + m_codePage.ToString());
580 // Returns the name for this encoding that can be used with mail agent header
581 // tags. If the encoding may not be used, the string is empty.
583 public virtual String HeaderName
587 if (dataItem == null)
591 return (dataItem.HeaderName);
595 // Returns the IANA preferred name for this encoding.
596 public virtual String WebName
600 if (dataItem == null)
604 return (dataItem.WebName);
608 // Returns the windows code page that most closely corresponds to this encoding.
610 public virtual int WindowsCodePage
614 if (dataItem == null)
618 return (dataItem.UIFamilyCodePage);
623 // True if and only if the encoding is used for display by browsers clients.
625 public virtual bool IsBrowserDisplay
629 if (dataItem == null)
633 return ((dataItem.Flags & MIMECONTF_BROWSER) != 0);
637 // True if and only if the encoding is used for saving by browsers clients.
639 public virtual bool IsBrowserSave
643 if (dataItem == null)
647 return ((dataItem.Flags & MIMECONTF_SAVABLE_BROWSER) != 0);
651 // True if and only if the encoding is used for display by mail and news clients.
653 public virtual bool IsMailNewsDisplay
657 if (dataItem == null)
661 return ((dataItem.Flags & MIMECONTF_MAILNEWS) != 0);
666 // True if and only if the encoding is used for saving documents by mail and
669 public virtual bool IsMailNewsSave
673 if (dataItem == null)
677 return ((dataItem.Flags & MIMECONTF_SAVABLE_MAILNEWS) != 0);
681 // True if and only if the encoding only uses single byte code points. (Ie, ASCII, 1252, etc)
683 public virtual bool IsSingleByte
692 public EncoderFallback EncoderFallback
696 return encoderFallback;
702 throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
705 throw new ArgumentNullException(nameof(value));
706 Contract.EndContractBlock();
708 encoderFallback = value;
713 public DecoderFallback DecoderFallback
717 return decoderFallback;
723 throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
726 throw new ArgumentNullException(nameof(value));
727 Contract.EndContractBlock();
729 decoderFallback = value;
734 public virtual Object Clone()
736 Encoding newEncoding = (Encoding)this.MemberwiseClone();
738 // New one should be readable
739 newEncoding.m_isReadOnly = false;
744 public bool IsReadOnly
748 return (m_isReadOnly);
752 // Returns an encoding for the ASCII character set. The returned encoding
753 // will be an instance of the ASCIIEncoding class.
755 public static Encoding ASCII => ASCIIEncoding.s_default;
757 // Returns an encoding for the Latin1 character set. The returned encoding
758 // will be an instance of the Latin1Encoding class.
760 // This is for our optimizations
761 private static Encoding Latin1 => Latin1Encoding.s_default;
763 // Returns the number of bytes required to encode the given character
767 public virtual int GetByteCount(char[] chars)
771 throw new ArgumentNullException(nameof(chars),
772 SR.ArgumentNull_Array);
774 Contract.EndContractBlock();
776 return GetByteCount(chars, 0, chars.Length);
780 public virtual int GetByteCount(String s)
783 throw new ArgumentNullException(nameof(s));
784 Contract.EndContractBlock();
786 char[] chars = s.ToCharArray();
787 return GetByteCount(chars, 0, chars.Length);
790 // Returns the number of bytes required to encode a range of characters in
791 // a character array.
794 public abstract int GetByteCount(char[] chars, int index, int count);
796 // Returns the number of bytes required to encode a string range.
799 public int GetByteCount(string s, int index, int count)
802 throw new ArgumentNullException(nameof(s),
803 SR.ArgumentNull_String);
805 throw new ArgumentOutOfRangeException(nameof(index),
806 SR.ArgumentOutOfRange_NeedNonNegNum);
808 throw new ArgumentOutOfRangeException(nameof(count),
809 SR.ArgumentOutOfRange_NeedNonNegNum);
810 if (index > s.Length - count)
811 throw new ArgumentOutOfRangeException(nameof(index),
812 SR.ArgumentOutOfRange_IndexCount);
813 Contract.EndContractBlock();
817 fixed (char* pChar = s)
819 return GetByteCount(pChar + index, count);
824 // We expect this to be the workhorse for NLS encodings
825 // unfortunately for existing overrides, it has to call the [] version,
826 // which is really slow, so this method should be avoided if you're calling
827 // a 3rd party encoding.
829 [CLSCompliant(false)]
830 public virtual unsafe int GetByteCount(char* chars, int count)
832 // Validate input parameters
834 throw new ArgumentNullException(nameof(chars),
835 SR.ArgumentNull_Array);
838 throw new ArgumentOutOfRangeException(nameof(count),
839 SR.ArgumentOutOfRange_NeedNonNegNum);
840 Contract.EndContractBlock();
842 char[] arrChar = new char[count];
845 for (index = 0; index < count; index++)
846 arrChar[index] = chars[index];
848 return GetByteCount(arrChar, 0, count);
851 // For NLS Encodings, workhorse takes an encoder (may be null)
852 // Always validate parameters before calling internal version, which will only assert.
853 internal virtual unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
855 Debug.Assert(chars != null);
856 Debug.Assert(count >= 0);
858 return GetByteCount(chars, count);
861 // Returns a byte array containing the encoded representation of the given
865 public virtual byte[] GetBytes(char[] chars)
869 throw new ArgumentNullException(nameof(chars),
870 SR.ArgumentNull_Array);
872 Contract.EndContractBlock();
873 return GetBytes(chars, 0, chars.Length);
876 // Returns a byte array containing the encoded representation of a range
877 // of characters in a character array.
880 public virtual byte[] GetBytes(char[] chars, int index, int count)
882 byte[] result = new byte[GetByteCount(chars, index, count)];
883 GetBytes(chars, index, count, result, 0);
887 // Encodes a range of characters in a character array into a range of bytes
888 // in a byte array. An exception occurs if the byte array is not large
889 // enough to hold the complete encoding of the characters. The
890 // GetByteCount method can be used to determine the exact number of
891 // bytes that will be produced for a given range of characters.
892 // Alternatively, the GetMaxByteCount method can be used to
893 // determine the maximum number of bytes that will be produced for a given
894 // number of characters, regardless of the actual character values.
896 public abstract int GetBytes(char[] chars, int charIndex, int charCount,
897 byte[] bytes, int byteIndex);
899 // Returns a byte array containing the encoded representation of the given
903 public virtual byte[] GetBytes(String s)
906 throw new ArgumentNullException(nameof(s),
907 SR.ArgumentNull_String);
908 Contract.EndContractBlock();
910 int byteCount = GetByteCount(s);
911 byte[] bytes = new byte[byteCount];
912 int bytesReceived = GetBytes(s, 0, s.Length, bytes, 0);
913 Debug.Assert(byteCount == bytesReceived);
917 // Returns a byte array containing the encoded representation of the given
921 public byte[] GetBytes(string s, int index, int count)
924 throw new ArgumentNullException(nameof(s),
925 SR.ArgumentNull_String);
927 throw new ArgumentOutOfRangeException(nameof(index),
928 SR.ArgumentOutOfRange_NeedNonNegNum);
930 throw new ArgumentOutOfRangeException(nameof(count),
931 SR.ArgumentOutOfRange_NeedNonNegNum);
932 if (index > s.Length - count)
933 throw new ArgumentOutOfRangeException(nameof(index),
934 SR.ArgumentOutOfRange_IndexCount);
935 Contract.EndContractBlock();
939 fixed (char* pChar = s)
941 int byteCount = GetByteCount(pChar + index, count);
943 return Array.Empty<byte>();
945 byte[] bytes = new byte[byteCount];
946 fixed (byte* pBytes = &bytes[0])
948 int bytesReceived = GetBytes(pChar + index, count, pBytes, byteCount);
949 Debug.Assert(byteCount == bytesReceived);
956 public virtual int GetBytes(String s, int charIndex, int charCount,
957 byte[] bytes, int byteIndex)
960 throw new ArgumentNullException(nameof(s));
961 Contract.EndContractBlock();
962 return GetBytes(s.ToCharArray(), charIndex, charCount, bytes, byteIndex);
965 // This is our internal workhorse
966 // Always validate parameters before calling internal version, which will only assert.
967 internal virtual unsafe int GetBytes(char* chars, int charCount,
968 byte* bytes, int byteCount, EncoderNLS encoder)
970 return GetBytes(chars, charCount, bytes, byteCount);
973 // We expect this to be the workhorse for NLS Encodings, but for existing
974 // ones we need a working (if slow) default implementation)
976 // WARNING WARNING WARNING
978 // WARNING: If this breaks it could be a security threat. Obviously we
979 // call this internally, so you need to make sure that your pointers, counts
980 // and indexes are correct when you call this method.
982 // In addition, we have internal code, which will be marked as "safe" calling
983 // this code. However this code is dependent upon the implementation of an
984 // external GetBytes() method, which could be overridden by a third party and
985 // the results of which cannot be guaranteed. We use that result to copy
986 // the byte[] to our byte* output buffer. If the result count was wrong, we
987 // could easily overflow our output buffer. Therefore we do an extra test
988 // when we copy the buffer so that we don't overflow byteCount either.
990 [CLSCompliant(false)]
991 public virtual unsafe int GetBytes(char* chars, int charCount,
992 byte* bytes, int byteCount)
994 // Validate input parameters
995 if (bytes == null || chars == null)
996 throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
997 SR.ArgumentNull_Array);
999 if (charCount < 0 || byteCount < 0)
1000 throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)),
1001 SR.ArgumentOutOfRange_NeedNonNegNum);
1002 Contract.EndContractBlock();
1004 // Get the char array to convert
1005 char[] arrChar = new char[charCount];
1008 for (index = 0; index < charCount; index++)
1009 arrChar[index] = chars[index];
1011 // Get the byte array to fill
1012 byte[] arrByte = new byte[byteCount];
1015 int result = GetBytes(arrChar, 0, charCount, arrByte, 0);
1017 Debug.Assert(result <= byteCount, "[Encoding.GetBytes]Returned more bytes than we have space for");
1019 // Copy the byte array
1020 // WARNING: We MUST make sure that we don't copy too many bytes. We can't
1021 // rely on result because it could be a 3rd party implementation. We need
1022 // to make sure we never copy more than byteCount bytes no matter the value
1024 if (result < byteCount)
1027 // Copy the data, don't overrun our array!
1028 for (index = 0; index < byteCount; index++)
1029 bytes[index] = arrByte[index];
1034 // Returns the number of characters produced by decoding the given byte
1038 public virtual int GetCharCount(byte[] bytes)
1042 throw new ArgumentNullException(nameof(bytes),
1043 SR.ArgumentNull_Array);
1045 Contract.EndContractBlock();
1046 return GetCharCount(bytes, 0, bytes.Length);
1049 // Returns the number of characters produced by decoding a range of bytes
1053 public abstract int GetCharCount(byte[] bytes, int index, int count);
1055 // We expect this to be the workhorse for NLS Encodings, but for existing
1056 // ones we need a working (if slow) default implementation)
1058 [CLSCompliant(false)]
1059 public virtual unsafe int GetCharCount(byte* bytes, int count)
1061 // Validate input parameters
1063 throw new ArgumentNullException(nameof(bytes),
1064 SR.ArgumentNull_Array);
1067 throw new ArgumentOutOfRangeException(nameof(count),
1068 SR.ArgumentOutOfRange_NeedNonNegNum);
1069 Contract.EndContractBlock();
1071 byte[] arrbyte = new byte[count];
1074 for (index = 0; index < count; index++)
1075 arrbyte[index] = bytes[index];
1077 return GetCharCount(arrbyte, 0, count);
1080 // This is our internal workhorse
1081 // Always validate parameters before calling internal version, which will only assert.
1082 internal virtual unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder)
1084 return GetCharCount(bytes, count);
1087 // Returns a character array containing the decoded representation of a
1088 // given byte array.
1091 public virtual char[] GetChars(byte[] bytes)
1095 throw new ArgumentNullException(nameof(bytes),
1096 SR.ArgumentNull_Array);
1098 Contract.EndContractBlock();
1099 return GetChars(bytes, 0, bytes.Length);
1102 // Returns a character array containing the decoded representation of a
1103 // range of bytes in a byte array.
1106 public virtual char[] GetChars(byte[] bytes, int index, int count)
1108 char[] result = new char[GetCharCount(bytes, index, count)];
1109 GetChars(bytes, index, count, result, 0);
1113 // Decodes a range of bytes in a byte array into a range of characters in a
1114 // character array. An exception occurs if the character array is not large
1115 // enough to hold the complete decoding of the bytes. The
1116 // GetCharCount method can be used to determine the exact number of
1117 // characters that will be produced for a given range of bytes.
1118 // Alternatively, the GetMaxCharCount method can be used to
1119 // determine the maximum number of characterss that will be produced for a
1120 // given number of bytes, regardless of the actual byte values.
1123 public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount,
1124 char[] chars, int charIndex);
1127 // We expect this to be the workhorse for NLS Encodings, but for existing
1128 // ones we need a working (if slow) default implementation)
1130 // WARNING WARNING WARNING
1132 // WARNING: If this breaks it could be a security threat. Obviously we
1133 // call this internally, so you need to make sure that your pointers, counts
1134 // and indexes are correct when you call this method.
1136 // In addition, we have internal code, which will be marked as "safe" calling
1137 // this code. However this code is dependent upon the implementation of an
1138 // external GetChars() method, which could be overridden by a third party and
1139 // the results of which cannot be guaranteed. We use that result to copy
1140 // the char[] to our char* output buffer. If the result count was wrong, we
1141 // could easily overflow our output buffer. Therefore we do an extra test
1142 // when we copy the buffer so that we don't overflow charCount either.
1144 [CLSCompliant(false)]
1145 public virtual unsafe int GetChars(byte* bytes, int byteCount,
1146 char* chars, int charCount)
1148 // Validate input parameters
1149 if (chars == null || bytes == null)
1150 throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
1151 SR.ArgumentNull_Array);
1153 if (byteCount < 0 || charCount < 0)
1154 throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
1155 SR.ArgumentOutOfRange_NeedNonNegNum);
1156 Contract.EndContractBlock();
1158 // Get the byte array to convert
1159 byte[] arrByte = new byte[byteCount];
1162 for (index = 0; index < byteCount; index++)
1163 arrByte[index] = bytes[index];
1165 // Get the char array to fill
1166 char[] arrChar = new char[charCount];
1169 int result = GetChars(arrByte, 0, byteCount, arrChar, 0);
1171 Debug.Assert(result <= charCount, "[Encoding.GetChars]Returned more chars than we have space for");
1173 // Copy the char array
1174 // WARNING: We MUST make sure that we don't copy too many chars. We can't
1175 // rely on result because it could be a 3rd party implementation. We need
1176 // to make sure we never copy more than charCount chars no matter the value
1178 if (result < charCount)
1181 // Copy the data, don't overrun our array!
1182 for (index = 0; index < charCount; index++)
1183 chars[index] = arrChar[index];
1189 // This is our internal workhorse
1190 // Always validate parameters before calling internal version, which will only assert.
1191 internal virtual unsafe int GetChars(byte* bytes, int byteCount,
1192 char* chars, int charCount, DecoderNLS decoder)
1194 return GetChars(bytes, byteCount, chars, charCount);
1198 [CLSCompliant(false)]
1199 public unsafe string GetString(byte* bytes, int byteCount)
1202 throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
1205 throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_NeedNonNegNum);
1206 Contract.EndContractBlock();
1208 return String.CreateStringFromEncoding(bytes, byteCount, this);
1211 // Returns the code page identifier of this encoding. The returned value is
1212 // an integer between 0 and 65535 if the encoding has a code page
1213 // identifier, or -1 if the encoding does not represent a code page.
1216 public virtual int CodePage
1224 // IsAlwaysNormalized
1225 // Returns true if the encoding is always normalized for the specified encoding form
1227 public bool IsAlwaysNormalized()
1229 return this.IsAlwaysNormalized(NormalizationForm.FormC);
1233 public virtual bool IsAlwaysNormalized(NormalizationForm form)
1235 // Assume false unless the encoding knows otherwise
1239 // Returns a Decoder object for this encoding. The returned object
1240 // can be used to decode a sequence of bytes into a sequence of characters.
1241 // Contrary to the GetChars family of methods, a Decoder can
1242 // convert partial sequences of bytes into partial sequences of characters
1243 // by maintaining the appropriate state between the conversions.
1245 // This default implementation returns a Decoder that simply
1246 // forwards calls to the GetCharCount and GetChars methods to
1247 // the corresponding methods of this encoding. Encodings that require state
1248 // to be maintained between successive conversions should override this
1249 // method and return an instance of an appropriate Decoder
1253 public virtual Decoder GetDecoder()
1255 return new DefaultDecoder(this);
1258 // Returns an Encoder object for this encoding. The returned object
1259 // can be used to encode a sequence of characters into a sequence of bytes.
1260 // Contrary to the GetBytes family of methods, an Encoder can
1261 // convert partial sequences of characters into partial sequences of bytes
1262 // by maintaining the appropriate state between the conversions.
1264 // This default implementation returns an Encoder that simply
1265 // forwards calls to the GetByteCount and GetBytes methods to
1266 // the corresponding methods of this encoding. Encodings that require state
1267 // to be maintained between successive conversions should override this
1268 // method and return an instance of an appropriate Encoder
1272 public virtual Encoder GetEncoder()
1274 return new DefaultEncoder(this);
1277 // Returns the maximum number of bytes required to encode a given number of
1278 // characters. This method can be used to determine an appropriate buffer
1279 // size for byte arrays passed to the GetBytes method of this
1280 // encoding or the GetBytes method of an Encoder for this
1281 // encoding. All encodings must guarantee that no buffer overflow
1282 // exceptions will occur if buffers are sized according to the results of
1285 // WARNING: If you're using something besides the default replacement encoder fallback,
1286 // then you could have more bytes than this returned from an actual call to GetBytes().
1289 public abstract int GetMaxByteCount(int charCount);
1291 // Returns the maximum number of characters produced by decoding a given
1292 // number of bytes. This method can be used to determine an appropriate
1293 // buffer size for character arrays passed to the GetChars method of
1294 // this encoding or the GetChars method of a Decoder for this
1295 // encoding. All encodings must guarantee that no buffer overflow
1296 // exceptions will occur if buffers are sized according to the results of
1300 public abstract int GetMaxCharCount(int byteCount);
1302 // Returns a string containing the decoded representation of a given byte
1306 public virtual String GetString(byte[] bytes)
1309 throw new ArgumentNullException(nameof(bytes),
1310 SR.ArgumentNull_Array);
1311 Contract.EndContractBlock();
1313 return GetString(bytes, 0, bytes.Length);
1316 // Returns a string containing the decoded representation of a range of
1317 // bytes in a byte array.
1319 // Internally we override this for performance
1322 public virtual String GetString(byte[] bytes, int index, int count)
1324 return new String(GetChars(bytes, index, count));
1327 // Returns an encoding for Unicode format. The returned encoding will be
1328 // an instance of the UnicodeEncoding class.
1330 // It will use little endian byte order, but will detect
1331 // input in big endian if it finds a byte order mark per Unicode 2.0.
1333 public static Encoding Unicode => UnicodeEncoding.s_littleEndianDefault;
1335 // Returns an encoding for Unicode format. The returned encoding will be
1336 // an instance of the UnicodeEncoding class.
1338 // It will use big endian byte order, but will detect
1339 // input in little endian if it finds a byte order mark per Unicode 2.0.
1341 public static Encoding BigEndianUnicode => UnicodeEncoding.s_bigEndianDefault;
1343 // Returns an encoding for the UTF-7 format. The returned encoding will be
1344 // an instance of the UTF7Encoding class.
1346 public static Encoding UTF7 => UTF7Encoding.s_default;
1348 // Returns an encoding for the UTF-8 format. The returned encoding will be
1349 // an instance of the UTF8Encoding class.
1351 public static Encoding UTF8 => UTF8Encoding.s_default;
1353 // Returns an encoding for the UTF-32 format. The returned encoding will be
1354 // an instance of the UTF32Encoding class.
1356 public static Encoding UTF32 => UTF32Encoding.s_default;
1358 // Returns an encoding for the UTF-32 format. The returned encoding will be
1359 // an instance of the UTF32Encoding class.
1361 // It will use big endian byte order.
1363 private static Encoding BigEndianUTF32 => UTF32Encoding.s_bigEndianDefault;
1365 public override bool Equals(Object value)
1367 Encoding that = value as Encoding;
1369 return (m_codePage == that.m_codePage) &&
1370 (EncoderFallback.Equals(that.EncoderFallback)) &&
1371 (DecoderFallback.Equals(that.DecoderFallback));
1376 public override int GetHashCode()
1378 return m_codePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode();
1381 internal virtual char[] GetBestFitUnicodeToBytesData()
1383 // Normally we don't have any best fit data.
1384 return Array.Empty<char>();
1387 internal virtual char[] GetBestFitBytesToUnicodeData()
1389 // Normally we don't have any best fit data.
1390 return Array.Empty<char>();
1393 internal void ThrowBytesOverflow()
1395 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1396 // This happens if user has implimented an encoder fallback with a broken GetMaxCharCount
1397 throw new ArgumentException(
1398 SR.Format(SR.Argument_EncodingConversionOverflowBytes, EncodingName, EncoderFallback.GetType()), "bytes");
1401 internal void ThrowBytesOverflow(EncoderNLS encoder, bool nothingEncoded)
1403 if (encoder == null || encoder.m_throwOnOverflow || nothingEncoded)
1405 if (encoder != null && encoder.InternalHasFallbackBuffer)
1406 encoder.FallbackBuffer.InternalReset();
1407 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1408 // This happens if user has implimented an encoder fallback with a broken GetMaxCharCount
1409 ThrowBytesOverflow();
1412 // If we didn't throw, we are in convert and have to remember our flushing
1413 encoder.ClearMustFlush();
1416 internal void ThrowCharsOverflow()
1418 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1419 // This happens if user has implimented a decoder fallback with a broken GetMaxCharCount
1420 throw new ArgumentException(
1421 SR.Format(SR.Argument_EncodingConversionOverflowChars, EncodingName, DecoderFallback.GetType()), "chars");
1424 internal void ThrowCharsOverflow(DecoderNLS decoder, bool nothingDecoded)
1426 if (decoder == null || decoder.m_throwOnOverflow || nothingDecoded)
1428 if (decoder != null && decoder.InternalHasFallbackBuffer)
1429 decoder.FallbackBuffer.InternalReset();
1431 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1432 // This happens if user has implimented a decoder fallback with a broken GetMaxCharCount
1433 ThrowCharsOverflow();
1436 // If we didn't throw, we are in convert and have to remember our flushing
1437 decoder.ClearMustFlush();
1440 internal sealed class DefaultEncoder : Encoder, IObjectReference, ISerializable
1442 private Encoding m_encoding;
1444 public DefaultEncoder(Encoding encoding)
1446 m_encoding = encoding;
1449 public Object GetRealObject(StreamingContext context)
1451 throw new PlatformNotSupportedException();
1454 // ISerializable implementation, get data for this object
1455 void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
1457 throw new PlatformNotSupportedException();
1460 // Returns the number of bytes the next call to GetBytes will
1461 // produce if presented with the given range of characters and the given
1462 // value of the flush parameter. The returned value takes into
1463 // account the state in which the encoder was left following the last call
1464 // to GetBytes. The state of the encoder is not affected by a call
1468 public override int GetByteCount(char[] chars, int index, int count, bool flush)
1470 return m_encoding.GetByteCount(chars, index, count);
1473 [SuppressMessage("Microsoft.Contracts", "CC1055")] // Skip extra error checking to avoid *potential* AppCompat problems.
1474 public unsafe override int GetByteCount(char* chars, int count, bool flush)
1476 return m_encoding.GetByteCount(chars, count);
1479 // Encodes a range of characters in a character array into a range of bytes
1480 // in a byte array. The method encodes charCount characters from
1481 // chars starting at index charIndex, storing the resulting
1482 // bytes in bytes starting at index byteIndex. The encoding
1483 // takes into account the state in which the encoder was left following the
1484 // last call to this method. The flush parameter indicates whether
1485 // the encoder should flush any shift-states and partial characters at the
1486 // end of the conversion. To ensure correct termination of a sequence of
1487 // blocks of encoded bytes, the last call to GetBytes should specify
1488 // a value of true for the flush parameter.
1490 // An exception occurs if the byte array is not large enough to hold the
1491 // complete encoding of the characters. The GetByteCount method can
1492 // be used to determine the exact number of bytes that will be produced for
1493 // a given range of characters. Alternatively, the GetMaxByteCount
1494 // method of the Encoding that produced this encoder can be used to
1495 // determine the maximum number of bytes that will be produced for a given
1496 // number of characters, regardless of the actual character values.
1499 public override int GetBytes(char[] chars, int charIndex, int charCount,
1500 byte[] bytes, int byteIndex, bool flush)
1502 return m_encoding.GetBytes(chars, charIndex, charCount, bytes, byteIndex);
1505 [SuppressMessage("Microsoft.Contracts", "CC1055")] // Skip extra error checking to avoid *potential* AppCompat problems.
1506 public unsafe override int GetBytes(char* chars, int charCount,
1507 byte* bytes, int byteCount, bool flush)
1509 return m_encoding.GetBytes(chars, charCount, bytes, byteCount);
1513 internal sealed class DefaultDecoder : Decoder, IObjectReference, ISerializable
1515 private Encoding m_encoding;
1517 public DefaultDecoder(Encoding encoding)
1519 m_encoding = encoding;
1522 public Object GetRealObject(StreamingContext context)
1524 throw new PlatformNotSupportedException();
1527 // ISerializable implementation
1528 void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
1530 throw new PlatformNotSupportedException();
1533 // Returns the number of characters the next call to GetChars will
1534 // produce if presented with the given range of bytes. The returned value
1535 // takes into account the state in which the decoder was left following the
1536 // last call to GetChars. The state of the decoder is not affected
1537 // by a call to this method.
1540 public override int GetCharCount(byte[] bytes, int index, int count)
1542 return GetCharCount(bytes, index, count, false);
1545 public override int GetCharCount(byte[] bytes, int index, int count, bool flush)
1547 return m_encoding.GetCharCount(bytes, index, count);
1550 [SuppressMessage("Microsoft.Contracts", "CC1055")] // Skip extra error checking to avoid *potential* AppCompat problems.
1551 public unsafe override int GetCharCount(byte* bytes, int count, bool flush)
1553 // By default just call the encoding version, no flush by default
1554 return m_encoding.GetCharCount(bytes, count);
1557 // Decodes a range of bytes in a byte array into a range of characters
1558 // in a character array. The method decodes byteCount bytes from
1559 // bytes starting at index byteIndex, storing the resulting
1560 // characters in chars starting at index charIndex. The
1561 // decoding takes into account the state in which the decoder was left
1562 // following the last call to this method.
1564 // An exception occurs if the character array is not large enough to
1565 // hold the complete decoding of the bytes. The GetCharCount method
1566 // can be used to determine the exact number of characters that will be
1567 // produced for a given range of bytes. Alternatively, the
1568 // GetMaxCharCount method of the Encoding that produced this
1569 // decoder can be used to determine the maximum number of characters that
1570 // will be produced for a given number of bytes, regardless of the actual
1574 public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
1575 char[] chars, int charIndex)
1577 return GetChars(bytes, byteIndex, byteCount, chars, charIndex, false);
1580 public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
1581 char[] chars, int charIndex, bool flush)
1583 return m_encoding.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
1586 [SuppressMessage("Microsoft.Contracts", "CC1055")] // Skip extra error checking to avoid *potential* AppCompat problems.
1587 public unsafe override int GetChars(byte* bytes, int byteCount,
1588 char* chars, int charCount, bool flush)
1590 // By default just call the encoding's version
1591 return m_encoding.GetChars(bytes, byteCount, chars, charCount);
1595 internal class EncodingCharBuffer
1597 private unsafe char* _chars;
1598 private unsafe char* _charStart;
1599 private unsafe char* _charEnd;
1600 private int _charCountResult = 0;
1601 private Encoding _enc;
1602 private DecoderNLS _decoder;
1603 private unsafe byte* _byteStart;
1604 private unsafe byte* _byteEnd;
1605 private unsafe byte* _bytes;
1606 private DecoderFallbackBuffer _fallbackBuffer;
1608 internal unsafe EncodingCharBuffer(Encoding enc, DecoderNLS decoder, char* charStart, int charCount,
1609 byte* byteStart, int byteCount)
1615 _charStart = charStart;
1616 _charEnd = charStart + charCount;
1618 _byteStart = byteStart;
1620 _byteEnd = byteStart + byteCount;
1622 if (_decoder == null)
1623 _fallbackBuffer = enc.DecoderFallback.CreateFallbackBuffer();
1625 _fallbackBuffer = _decoder.FallbackBuffer;
1627 // If we're getting chars or getting char count we don't expect to have
1628 // to remember fallbacks between calls (so it should be empty)
1629 Debug.Assert(_fallbackBuffer.Remaining == 0,
1630 "[Encoding.EncodingCharBuffer.EncodingCharBuffer]Expected empty fallback buffer for getchars/charcount");
1631 _fallbackBuffer.InternalInitialize(_bytes, _charEnd);
1634 internal unsafe bool AddChar(char ch, int numBytes)
1638 if (_chars >= _charEnd)
1641 _bytes -= numBytes; // Didn't encode these bytes
1642 _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw?
1643 return false; // No throw, but no store either
1652 internal unsafe bool AddChar(char ch)
1654 return AddChar(ch, 1);
1658 internal unsafe bool AddChar(char ch1, char ch2, int numBytes)
1660 // Need room for 2 chars
1661 if (_chars >= _charEnd - 1)
1664 _bytes -= numBytes; // Didn't encode these bytes
1665 _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw?
1666 return false; // No throw, but no store either
1668 return AddChar(ch1, numBytes) && AddChar(ch2, numBytes);
1671 internal unsafe void AdjustBytes(int count)
1676 internal unsafe bool MoreData
1680 return _bytes < _byteEnd;
1684 // Do we have count more bytes?
1685 internal unsafe bool EvenMoreData(int count)
1687 return (_bytes <= _byteEnd - count);
1690 // GetNextByte shouldn't be called unless the caller's already checked more data or even more data,
1691 // but we'll double check just to make sure.
1692 internal unsafe byte GetNextByte()
1694 Debug.Assert(_bytes < _byteEnd, "[EncodingCharBuffer.GetNextByte]Expected more date");
1695 if (_bytes >= _byteEnd)
1700 internal unsafe int BytesUsed
1704 return (int)(_bytes - _byteStart);
1708 internal unsafe bool Fallback(byte fallbackByte)
1711 byte[] byteBuffer = new byte[] { fallbackByte };
1713 // Do the fallback and add the data.
1714 return Fallback(byteBuffer);
1717 internal unsafe bool Fallback(byte byte1, byte byte2)
1720 byte[] byteBuffer = new byte[] { byte1, byte2 };
1722 // Do the fallback and add the data.
1723 return Fallback(byteBuffer);
1726 internal unsafe bool Fallback(byte byte1, byte byte2, byte byte3, byte byte4)
1729 byte[] byteBuffer = new byte[] { byte1, byte2, byte3, byte4 };
1731 // Do the fallback and add the data.
1732 return Fallback(byteBuffer);
1735 internal unsafe bool Fallback(byte[] byteBuffer)
1737 // Do the fallback and add the data.
1740 char* pTemp = _chars;
1741 if (_fallbackBuffer.InternalFallback(byteBuffer, _bytes, ref _chars) == false)
1744 _bytes -= byteBuffer.Length; // Didn't use how many ever bytes we're falling back
1745 _fallbackBuffer.InternalReset(); // We didn't use this fallback.
1746 _enc.ThrowCharsOverflow(_decoder, _chars == _charStart); // Throw?
1747 return false; // No throw, but no store either
1749 _charCountResult += unchecked((int)(_chars - pTemp));
1753 _charCountResult += _fallbackBuffer.InternalFallback(byteBuffer, _bytes);
1759 internal unsafe int Count
1763 return _charCountResult;
1768 internal class EncodingByteBuffer
1770 private unsafe byte* _bytes;
1771 private unsafe byte* _byteStart;
1772 private unsafe byte* _byteEnd;
1773 private unsafe char* _chars;
1774 private unsafe char* _charStart;
1775 private unsafe char* _charEnd;
1776 private int _byteCountResult = 0;
1777 private Encoding _enc;
1778 private EncoderNLS _encoder;
1779 internal EncoderFallbackBuffer fallbackBuffer;
1781 internal unsafe EncodingByteBuffer(Encoding inEncoding, EncoderNLS inEncoder,
1782 byte* inByteStart, int inByteCount, char* inCharStart, int inCharCount)
1785 _encoder = inEncoder;
1787 _charStart = inCharStart;
1788 _chars = inCharStart;
1789 _charEnd = inCharStart + inCharCount;
1791 _bytes = inByteStart;
1792 _byteStart = inByteStart;
1793 _byteEnd = inByteStart + inByteCount;
1795 if (_encoder == null)
1796 this.fallbackBuffer = _enc.EncoderFallback.CreateFallbackBuffer();
1799 this.fallbackBuffer = _encoder.FallbackBuffer;
1800 // If we're not converting we must not have data in our fallback buffer
1801 if (_encoder.m_throwOnOverflow && _encoder.InternalHasFallbackBuffer &&
1802 this.fallbackBuffer.Remaining > 0)
1803 throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty,
1804 _encoder.Encoding.EncodingName, _encoder.Fallback.GetType()));
1806 fallbackBuffer.InternalInitialize(_chars, _charEnd, _encoder, _bytes != null);
1809 internal unsafe bool AddByte(byte b, int moreBytesExpected)
1811 Debug.Assert(moreBytesExpected >= 0, "[EncodingByteBuffer.AddByte]expected non-negative moreBytesExpected");
1814 if (_bytes >= _byteEnd - moreBytesExpected)
1816 // Throw maybe. Check which buffer to back up (only matters if Converting)
1817 this.MovePrevious(true); // Throw if necessary
1818 return false; // No throw, but no store either
1827 internal unsafe bool AddByte(byte b1)
1829 return (AddByte(b1, 0));
1832 internal unsafe bool AddByte(byte b1, byte b2)
1834 return (AddByte(b1, b2, 0));
1837 internal unsafe bool AddByte(byte b1, byte b2, int moreBytesExpected)
1839 return (AddByte(b1, 1 + moreBytesExpected) && AddByte(b2, moreBytesExpected));
1842 internal unsafe bool AddByte(byte b1, byte b2, byte b3)
1844 return AddByte(b1, b2, b3, (int)0);
1847 internal unsafe bool AddByte(byte b1, byte b2, byte b3, int moreBytesExpected)
1849 return (AddByte(b1, 2 + moreBytesExpected) &&
1850 AddByte(b2, 1 + moreBytesExpected) &&
1851 AddByte(b3, moreBytesExpected));
1854 internal unsafe bool AddByte(byte b1, byte b2, byte b3, byte b4)
1856 return (AddByte(b1, 3) &&
1862 internal unsafe void MovePrevious(bool bThrow)
1864 if (fallbackBuffer.bFallingBack)
1865 fallbackBuffer.MovePrevious(); // don't use last fallback
1868 Debug.Assert(_chars > _charStart ||
1869 ((bThrow == true) && (_bytes == _byteStart)),
1870 "[EncodingByteBuffer.MovePrevious]expected previous data or throw");
1871 if (_chars > _charStart)
1872 _chars--; // don't use last char
1876 _enc.ThrowBytesOverflow(_encoder, _bytes == _byteStart); // Throw? (and reset fallback if not converting)
1879 internal unsafe bool Fallback(char charFallback)
1882 return fallbackBuffer.InternalFallback(charFallback, ref _chars);
1885 internal unsafe bool MoreData
1889 // See if fallbackBuffer is not empty or if there's data left in chars buffer.
1890 return ((fallbackBuffer.Remaining > 0) || (_chars < _charEnd));
1894 internal unsafe char GetNextChar()
1896 // See if there's something in our fallback buffer
1897 char cReturn = fallbackBuffer.InternalGetNextChar();
1899 // Nothing in the fallback buffer, return our normal data.
1902 if (_chars < _charEnd)
1903 cReturn = *(_chars++);
1909 internal unsafe int CharsUsed
1913 return (int)(_chars - _charStart);
1917 internal unsafe int Count
1921 return _byteCountResult;