1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 using System.Diagnostics;
6 using System.Diagnostics.Contracts;
7 using System.Threading;
8 using System.Runtime.Serialization;
9 using System.Diagnostics.CodeAnalysis;
13 // This abstract base class represents a character encoding. The class provides
14 // methods to convert arrays and strings of Unicode characters to and from
15 // arrays of bytes. A number of Encoding implementations are provided in
16 // the System.Text package, including:
18 // ASCIIEncoding, which encodes Unicode characters as single 7-bit
19 // ASCII characters. This encoding only supports character values between 0x00
21 // BaseCodePageEncoding, which encapsulates a Windows code page. Any
22 // installed code page can be accessed through this encoding, and conversions
23 // are performed using the WideCharToMultiByte and
24 // MultiByteToWideChar Windows API functions.
25 // UnicodeEncoding, which encodes each Unicode character as two
26 // consecutive bytes. Both little-endian (code page 1200) and big-endian (code
27 // page 1201) encodings are recognized.
28 // UTF7Encoding, which encodes Unicode characters using the UTF-7
29 // encoding (UTF-7 stands for UCS Transformation Format, 7-bit form). This
30 // encoding supports all Unicode character values, and can also be accessed
31 // as code page 65000.
32 // UTF8Encoding, which encodes Unicode characters using the UTF-8
33 // encoding (UTF-8 stands for UCS Transformation Format, 8-bit form). This
34 // encoding supports all Unicode character values, and can also be accessed
35 // as code page 65001.
36 // UTF32Encoding, both 12000 (little endian) & 12001 (big endian)
38 // In addition to directly instantiating Encoding objects, an
39 // application can use the ForCodePage, GetASCII,
40 // GetDefault, GetUnicode, GetUTF7, and GetUTF8
41 // methods in this class to obtain encodings.
43 // Through an encoding, the GetBytes method is used to convert arrays
44 // of characters to arrays of bytes, and the GetChars method is used to
45 // convert arrays of bytes to arrays of characters. The GetBytes and
46 // GetChars methods maintain no state between conversions, and are
47 // generally intended for conversions of complete blocks of bytes and
48 // characters in one operation. When the data to be converted is only available
49 // in sequential blocks (such as data read from a stream) or when the amount of
50 // data is so large that it needs to be divided into smaller blocks, an
51 // application may choose to use a Decoder or an Encoder to
52 // perform the conversion. Decoders and encoders allow sequential blocks of
53 // data to be converted and they maintain the state required to support
54 // conversions of data that spans adjacent blocks. Decoders and encoders are
55 // obtained using the GetDecoder and GetEncoder methods.
57 // The core GetBytes and GetChars methods require the caller
58 // to provide the destination buffer and ensure that the buffer is large enough
59 // to hold the entire result of the conversion. When using these methods,
60 // either directly on an Encoding object or on an associated
61 // Decoder or Encoder, an application can use one of two methods
62 // to allocate destination buffers.
64 // The GetByteCount and GetCharCount methods can be used to
65 // compute the exact size of the result of a particular conversion, and an
66 // appropriately sized buffer for that conversion can then be allocated.
67 // The GetMaxByteCount and GetMaxCharCount methods can be
68 // be used to compute the maximum possible size of a conversion of a given
69 // number of bytes or characters, and a buffer of that size can then be reused
70 // for multiple conversions.
72 // The first method generally uses less memory, whereas the second method
73 // generally executes faster.
76 public abstract class Encoding : ICloneable
78 // For netcore we use UTF8 as default encoding since ANSI isn't available
79 private static readonly UTF8Encoding.UTF8EncodingSealed s_defaultEncoding = new UTF8Encoding.UTF8EncodingSealed(encoderShouldEmitUTF8Identifier: false);
81 // Returns an encoding for the system's current ANSI code page.
82 public static Encoding Default => s_defaultEncoding;
85 // The following values are from mlang.idl. These values
86 // should be in sync with those in mlang.idl.
88 internal const int MIMECONTF_MAILNEWS = 0x00000001;
89 internal const int MIMECONTF_BROWSER = 0x00000002;
90 internal const int MIMECONTF_SAVABLE_MAILNEWS = 0x00000100;
91 internal const int MIMECONTF_SAVABLE_BROWSER = 0x00000200;
93 // Special Case Code Pages
94 private const int CodePageDefault = 0;
95 private const int CodePageNoOEM = 1; // OEM Code page not supported
96 private const int CodePageNoMac = 2; // MAC code page not supported
97 private const int CodePageNoThread = 3; // Thread code page not supported
98 private const int CodePageNoSymbol = 42; // Symbol code page not supported
99 private const int CodePageUnicode = 1200; // Unicode
100 private const int CodePageBigEndian = 1201; // Big Endian Unicode
101 private const int CodePageWindows1252 = 1252; // Windows 1252 code page
103 // 20936 has same code page as 10008, so we'll special case it
104 private const int CodePageMacGB2312 = 10008;
105 private const int CodePageGB2312 = 20936;
106 private const int CodePageMacKorean = 10003;
107 private const int CodePageDLLKorean = 20949;
109 // ISO 2022 Code Pages
110 private const int ISO2022JP = 50220;
111 private const int ISO2022JPESC = 50221;
112 private const int ISO2022JPSISO = 50222;
113 private const int ISOKorean = 50225;
114 private const int ISOSimplifiedCN = 50227;
115 private const int EUCJP = 51932;
116 private const int ChineseHZ = 52936; // HZ has ~}~{~~ sequences
118 // 51936 is the same as 936
119 private const int DuplicateEUCCN = 51936;
120 private const int EUCCN = 936;
122 private const int EUCKR = 51949;
124 // Latin 1 & ASCII Code Pages
125 internal const int CodePageASCII = 20127; // ASCII
126 internal const int ISO_8859_1 = 28591; // Latin1
129 private const int ISCIIAssemese = 57006;
130 private const int ISCIIBengali = 57003;
131 private const int ISCIIDevanagari = 57002;
132 private const int ISCIIGujarathi = 57010;
133 private const int ISCIIKannada = 57008;
134 private const int ISCIIMalayalam = 57009;
135 private const int ISCIIOriya = 57007;
136 private const int ISCIIPanjabi = 57011;
137 private const int ISCIITamil = 57004;
138 private const int ISCIITelugu = 57005;
141 private const int GB18030 = 54936;
144 private const int ISO_8859_8I = 38598;
145 private const int ISO_8859_8_Visual = 28598;
147 // 50229 is currently unsupported // "Chinese Traditional (ISO-2022)"
148 private const int ENC50229 = 50229;
150 // Special code pages
151 private const int CodePageUTF7 = 65000;
152 private const int CodePageUTF8 = 65001;
153 private const int CodePageUTF32 = 12000;
154 private const int CodePageUTF32BE = 12001;
156 internal int m_codePage = 0;
158 // dataItem should be internal (not private). otherwise it will break during the deserialization
159 // of the data came from Everett
160 internal CodePageDataItem dataItem = null;
163 internal bool m_deserializedFromEverett = false;
165 // Because of encoders we may be read only
166 [OptionalField(VersionAdded = 2)]
167 private bool m_isReadOnly = true;
169 // Encoding (encoder) fallback
170 [OptionalField(VersionAdded = 2)]
171 internal EncoderFallback encoderFallback = null;
172 [OptionalField(VersionAdded = 2)]
173 internal DecoderFallback decoderFallback = null;
175 protected Encoding() : this(0)
180 protected Encoding(int codePage)
182 // Validate code page
185 throw new ArgumentOutOfRangeException(nameof(codePage));
187 Contract.EndContractBlock();
189 // Remember code page
190 m_codePage = codePage;
192 // Use default encoder/decoder fallbacks
193 this.SetDefaultFallbacks();
196 // This constructor is needed to allow any sub-classing implementation to provide encoder/decoder fallback objects
197 // because the encoding object is always created as read-only object and don't allow setting encoder/decoder fallback
198 // after the creation is done.
199 protected Encoding(int codePage, EncoderFallback encoderFallback, DecoderFallback decoderFallback)
201 // Validate code page
204 throw new ArgumentOutOfRangeException(nameof(codePage));
206 Contract.EndContractBlock();
208 // Remember code page
209 m_codePage = codePage;
211 this.encoderFallback = encoderFallback ?? new InternalEncoderBestFitFallback(this);
212 this.decoderFallback = decoderFallback ?? new InternalDecoderBestFitFallback(this);
215 // Default fallback that we'll use.
216 internal virtual void SetDefaultFallbacks()
218 // For UTF-X encodings, we use a replacement fallback with an "\xFFFD" string,
219 // For ASCII we use "?" replacement fallback, etc.
220 this.encoderFallback = new InternalEncoderBestFitFallback(this);
221 this.decoderFallback = new InternalDecoderBestFitFallback(this);
225 #region Serialization
226 internal void OnDeserializing()
228 // intialize the optional Whidbey fields
229 encoderFallback = null;
230 decoderFallback = null;
234 internal void OnDeserialized()
236 if (encoderFallback == null || decoderFallback == null)
238 m_deserializedFromEverett = true;
239 SetDefaultFallbacks();
242 // dataItem is always recalculated from the code page #
247 private void OnDeserializing(StreamingContext ctx)
254 private void OnDeserialized(StreamingContext ctx)
260 private void OnSerializing(StreamingContext ctx)
262 // to be consistent with SerializeEncoding
266 // the following two methods are used for the inherited classes which implemented ISerializable
267 // Deserialization Helper
268 internal void DeserializeEncoding(SerializationInfo info, StreamingContext context)
271 if (info == null) throw new ArgumentNullException(nameof(info));
272 Contract.EndContractBlock();
274 // All versions have a code page
275 this.m_codePage = (int)info.GetValue("m_codePage", typeof(int));
277 // We can get dataItem on the fly if needed, and the index is different between versions
278 // so ignore whatever dataItem data we get from Everett.
279 this.dataItem = null;
281 // See if we have a code page
285 // Try Whidbey V2.0 Fields
288 m_isReadOnly = (bool)info.GetValue("m_isReadOnly", typeof(bool));
290 this.encoderFallback = (EncoderFallback)info.GetValue("encoderFallback", typeof(EncoderFallback));
291 this.decoderFallback = (DecoderFallback)info.GetValue("decoderFallback", typeof(DecoderFallback));
293 catch (SerializationException)
296 // Didn't have Whidbey things, must be Everett
298 this.m_deserializedFromEverett = true;
300 // May as well be read only
302 SetDefaultFallbacks();
306 // Serialization Helper
307 internal void SerializeEncoding(SerializationInfo info, StreamingContext context)
310 if (info == null) throw new ArgumentNullException(nameof(info));
311 Contract.EndContractBlock();
313 // These are new V2.0 Whidbey stuff
314 info.AddValue("m_isReadOnly", m_isReadOnly);
315 info.AddValue("encoderFallback", this.EncoderFallback);
316 info.AddValue("decoderFallback", this.DecoderFallback);
318 // These were in Everett V1.1 as well
319 info.AddValue("m_codePage", this.m_codePage);
321 // This was unique to Everett V1.1
322 info.AddValue("dataItem", null);
324 // Everett duplicated these fields, so these are needed for portability
325 info.AddValue("Encoding+m_codePage", this.m_codePage);
326 info.AddValue("Encoding+dataItem", null);
329 #endregion Serialization
331 // Converts a byte array from one encoding to another. The bytes in the
332 // bytes array are converted from srcEncoding to
333 // dstEncoding, and the returned value is a new byte array
334 // containing the result of the conversion.
337 public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
341 throw new ArgumentNullException(nameof(bytes));
342 Contract.Ensures(Contract.Result<byte[]>() != null);
344 return Convert(srcEncoding, dstEncoding, bytes, 0, bytes.Length);
347 // Converts a range of bytes in a byte array from one encoding to another.
348 // This method converts count bytes from bytes starting at
349 // index index from srcEncoding to dstEncoding, and
350 // returns a new byte array containing the result of the conversion.
353 public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
354 byte[] bytes, int index, int count)
356 if (srcEncoding == null || dstEncoding == null)
358 throw new ArgumentNullException((srcEncoding == null ? nameof(srcEncoding) : nameof(dstEncoding)),
359 SR.ArgumentNull_Array);
363 throw new ArgumentNullException(nameof(bytes),
364 SR.ArgumentNull_Array);
366 Contract.Ensures(Contract.Result<byte[]>() != null);
368 return dstEncoding.GetBytes(srcEncoding.GetChars(bytes, index, count));
371 public static void RegisterProvider(EncodingProvider provider)
373 // Parameters validated inside EncodingProvider
374 EncodingProvider.AddProvider(provider);
378 public static Encoding GetEncoding(int codepage)
380 Encoding result = EncodingProvider.GetEncodingFromProvider(codepage);
385 // NOTE: If you add a new encoding that can be retrieved by codepage, be sure to
386 // add the corresponding item in EncodingTable.
387 // Otherwise, the code below will throw exception when trying to call
388 // EncodingTable.GetDataItem().
390 if (codepage < 0 || codepage > 65535)
392 throw new ArgumentOutOfRangeException(
393 nameof(codepage), SR.Format(SR.ArgumentOutOfRange_Range, 0, 65535));
396 Contract.EndContractBlock();
400 case CodePageDefault: return Default; // 0
401 case CodePageUnicode: return Unicode; // 1200
402 case CodePageBigEndian: return BigEndianUnicode; // 1201
403 case CodePageUTF32: return UTF32; // 12000
404 case CodePageUTF32BE: return BigEndianUTF32; // 12001
405 case CodePageUTF7: return UTF7; // 65000
406 case CodePageUTF8: return UTF8; // 65001
407 case CodePageASCII: return ASCII; // 20127
408 case ISO_8859_1: return Latin1; // 28591
410 // We don't allow the following special code page values that Win32 allows.
411 case CodePageNoOEM: // 1 CP_OEMCP
412 case CodePageNoMac: // 2 CP_MACCP
413 case CodePageNoThread: // 3 CP_THREAD_ACP
414 case CodePageNoSymbol: // 42 CP_SYMBOL
415 throw new ArgumentException(SR.Format(SR.Argument_CodepageNotSupported, codepage), nameof(codepage));
418 // Is it a valid code page?
419 if (EncodingTable.GetCodePageDataItem(codepage) == null)
421 throw new NotSupportedException(
422 SR.Format(SR.NotSupported_NoCodepageData, codepage));
429 public static Encoding GetEncoding(int codepage,
430 EncoderFallback encoderFallback, DecoderFallback decoderFallback)
432 Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(codepage, encoderFallback, decoderFallback);
434 if (baseEncoding != null)
437 // Get the default encoding (which is cached and read only)
438 baseEncoding = GetEncoding(codepage);
440 // Clone it and set the fallback
441 Encoding fallbackEncoding = (Encoding)baseEncoding.Clone();
442 fallbackEncoding.EncoderFallback = encoderFallback;
443 fallbackEncoding.DecoderFallback = decoderFallback;
445 return fallbackEncoding;
448 // Returns an Encoding object for a given name or a given code page value.
451 public static Encoding GetEncoding(String name)
453 Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(name);
454 if (baseEncoding != null)
458 // NOTE: If you add a new encoding that can be requested by name, be sure to
459 // add the corresponding item in EncodingTable.
460 // Otherwise, the code below will throw exception when trying to call
461 // EncodingTable.GetCodePageFromName().
463 return GetEncoding(EncodingTable.GetCodePageFromName(name));
466 // Returns an Encoding object for a given name or a given code page value.
469 public static Encoding GetEncoding(String name,
470 EncoderFallback encoderFallback, DecoderFallback decoderFallback)
472 Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(name, encoderFallback, decoderFallback);
473 if (baseEncoding != null)
477 // NOTE: If you add a new encoding that can be requested by name, be sure to
478 // add the corresponding item in EncodingTable.
479 // Otherwise, the code below will throw exception when trying to call
480 // EncodingTable.GetCodePageFromName().
482 return (GetEncoding(EncodingTable.GetCodePageFromName(name), encoderFallback, decoderFallback));
485 // Return a list of all EncodingInfo objects describing all of our encodings
487 public static EncodingInfo[] GetEncodings()
489 return EncodingTable.GetEncodings();
493 public virtual byte[] GetPreamble()
495 return Array.Empty<byte>();
498 private void GetDataItem()
500 if (dataItem == null)
502 dataItem = EncodingTable.GetCodePageDataItem(m_codePage);
503 if (dataItem == null)
505 throw new NotSupportedException(
506 SR.Format(SR.NotSupported_NoCodepageData, m_codePage));
511 // Returns the name for this encoding that can be used with mail agent body tags.
512 // If the encoding may not be used, the string is empty.
514 public virtual String BodyName
518 if (dataItem == null)
522 return (dataItem.BodyName);
526 // Returns the human-readable description of the encoding ( e.g. Hebrew (DOS)).
528 public virtual String EncodingName
532 string encodingName = GetLocalizedEncodingNameResource(this.CodePage);
533 if (encodingName == null)
535 throw new NotSupportedException(SR.Format(SR.MissingEncodingNameResource, this.CodePage));
538 if (encodingName.StartsWith("Globalization_cp_", StringComparison.Ordinal))
540 // On ProjectN, resource strings are stripped from retail builds and replaced by
541 // their identifier names. Since this property is meant to be a localized string,
542 // but we don't localize ProjectN, we specifically need to do something reasonable
543 // in this case. This currently returns the English name of the encoding from a
544 // static data table.
545 encodingName = EncodingTable.GetCodePageDataItem(this.CodePage).EnglishName;
546 if (encodingName == null)
548 throw new NotSupportedException(SR.Format(SR.MissingEncodingNameResource, this.WebName, this.CodePage));
555 private static string GetLocalizedEncodingNameResource(int codePage)
559 case 1200: return SR.Globalization_cp_1200;
560 case 1201: return SR.Globalization_cp_1201;
561 case 12000: return SR.Globalization_cp_12000;
562 case 12001: return SR.Globalization_cp_12001;
563 case 20127: return SR.Globalization_cp_20127;
564 case 28591: return SR.Globalization_cp_28591;
565 case 65000: return SR.Globalization_cp_65000;
566 case 65001: return SR.Globalization_cp_65001;
567 default: return null;
571 public virtual String EncodingName
575 return SR.GetResourceString("Globalization_cp_" + m_codePage.ToString());
579 // Returns the name for this encoding that can be used with mail agent header
580 // tags. If the encoding may not be used, the string is empty.
582 public virtual String HeaderName
586 if (dataItem == null)
590 return (dataItem.HeaderName);
594 // Returns the IANA preferred name for this encoding.
595 public virtual String WebName
599 if (dataItem == null)
603 return (dataItem.WebName);
607 // Returns the windows code page that most closely corresponds to this encoding.
609 public virtual int WindowsCodePage
613 if (dataItem == null)
617 return (dataItem.UIFamilyCodePage);
622 // True if and only if the encoding is used for display by browsers clients.
624 public virtual bool IsBrowserDisplay
628 if (dataItem == null)
632 return ((dataItem.Flags & MIMECONTF_BROWSER) != 0);
636 // True if and only if the encoding is used for saving by browsers clients.
638 public virtual bool IsBrowserSave
642 if (dataItem == null)
646 return ((dataItem.Flags & MIMECONTF_SAVABLE_BROWSER) != 0);
650 // True if and only if the encoding is used for display by mail and news clients.
652 public virtual bool IsMailNewsDisplay
656 if (dataItem == null)
660 return ((dataItem.Flags & MIMECONTF_MAILNEWS) != 0);
665 // True if and only if the encoding is used for saving documents by mail and
668 public virtual bool IsMailNewsSave
672 if (dataItem == null)
676 return ((dataItem.Flags & MIMECONTF_SAVABLE_MAILNEWS) != 0);
680 // True if and only if the encoding only uses single byte code points. (Ie, ASCII, 1252, etc)
682 public virtual bool IsSingleByte
691 public EncoderFallback EncoderFallback
695 return encoderFallback;
701 throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
704 throw new ArgumentNullException(nameof(value));
705 Contract.EndContractBlock();
707 encoderFallback = value;
712 public DecoderFallback DecoderFallback
716 return decoderFallback;
722 throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
725 throw new ArgumentNullException(nameof(value));
726 Contract.EndContractBlock();
728 decoderFallback = value;
733 public virtual Object Clone()
735 Encoding newEncoding = (Encoding)this.MemberwiseClone();
737 // New one should be readable
738 newEncoding.m_isReadOnly = false;
743 public bool IsReadOnly
747 return (m_isReadOnly);
751 // Returns an encoding for the ASCII character set. The returned encoding
752 // will be an instance of the ASCIIEncoding class.
754 public static Encoding ASCII => ASCIIEncoding.s_default;
756 // Returns an encoding for the Latin1 character set. The returned encoding
757 // will be an instance of the Latin1Encoding class.
759 // This is for our optimizations
760 private static Encoding Latin1 => Latin1Encoding.s_default;
762 // Returns the number of bytes required to encode the given character
766 public virtual int GetByteCount(char[] chars)
770 throw new ArgumentNullException(nameof(chars),
771 SR.ArgumentNull_Array);
773 Contract.EndContractBlock();
775 return GetByteCount(chars, 0, chars.Length);
779 public virtual int GetByteCount(String s)
782 throw new ArgumentNullException(nameof(s));
783 Contract.EndContractBlock();
785 char[] chars = s.ToCharArray();
786 return GetByteCount(chars, 0, chars.Length);
789 // Returns the number of bytes required to encode a range of characters in
790 // a character array.
793 public abstract int GetByteCount(char[] chars, int index, int count);
795 // Returns the number of bytes required to encode a string range.
798 public int GetByteCount(string s, int index, int count)
801 throw new ArgumentNullException(nameof(s),
802 SR.ArgumentNull_String);
804 throw new ArgumentOutOfRangeException(nameof(index),
805 SR.ArgumentOutOfRange_NeedNonNegNum);
807 throw new ArgumentOutOfRangeException(nameof(count),
808 SR.ArgumentOutOfRange_NeedNonNegNum);
809 if (index > s.Length - count)
810 throw new ArgumentOutOfRangeException(nameof(index),
811 SR.ArgumentOutOfRange_IndexCount);
812 Contract.EndContractBlock();
816 fixed (char* pChar = s)
818 return GetByteCount(pChar + index, count);
823 // We expect this to be the workhorse for NLS encodings
824 // unfortunately for existing overrides, it has to call the [] version,
825 // which is really slow, so this method should be avoided if you're calling
826 // a 3rd party encoding.
828 [CLSCompliant(false)]
829 public virtual unsafe int GetByteCount(char* chars, int count)
831 // Validate input parameters
833 throw new ArgumentNullException(nameof(chars),
834 SR.ArgumentNull_Array);
837 throw new ArgumentOutOfRangeException(nameof(count),
838 SR.ArgumentOutOfRange_NeedNonNegNum);
839 Contract.EndContractBlock();
841 char[] arrChar = new char[count];
844 for (index = 0; index < count; index++)
845 arrChar[index] = chars[index];
847 return GetByteCount(arrChar, 0, count);
850 // For NLS Encodings, workhorse takes an encoder (may be null)
851 // Always validate parameters before calling internal version, which will only assert.
852 internal virtual unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
854 Debug.Assert(chars != null);
855 Debug.Assert(count >= 0);
857 return GetByteCount(chars, count);
860 // Returns a byte array containing the encoded representation of the given
864 public virtual byte[] GetBytes(char[] chars)
868 throw new ArgumentNullException(nameof(chars),
869 SR.ArgumentNull_Array);
871 Contract.EndContractBlock();
872 return GetBytes(chars, 0, chars.Length);
875 // Returns a byte array containing the encoded representation of a range
876 // of characters in a character array.
879 public virtual byte[] GetBytes(char[] chars, int index, int count)
881 byte[] result = new byte[GetByteCount(chars, index, count)];
882 GetBytes(chars, index, count, result, 0);
886 // Encodes a range of characters in a character array into a range of bytes
887 // in a byte array. An exception occurs if the byte array is not large
888 // enough to hold the complete encoding of the characters. The
889 // GetByteCount method can be used to determine the exact number of
890 // bytes that will be produced for a given range of characters.
891 // Alternatively, the GetMaxByteCount method can be used to
892 // determine the maximum number of bytes that will be produced for a given
893 // number of characters, regardless of the actual character values.
895 public abstract int GetBytes(char[] chars, int charIndex, int charCount,
896 byte[] bytes, int byteIndex);
898 // Returns a byte array containing the encoded representation of the given
902 public virtual byte[] GetBytes(String s)
905 throw new ArgumentNullException(nameof(s),
906 SR.ArgumentNull_String);
907 Contract.EndContractBlock();
909 int byteCount = GetByteCount(s);
910 byte[] bytes = new byte[byteCount];
911 int bytesReceived = GetBytes(s, 0, s.Length, bytes, 0);
912 Debug.Assert(byteCount == bytesReceived);
916 // Returns a byte array containing the encoded representation of the given
920 public byte[] GetBytes(string s, int index, int count)
923 throw new ArgumentNullException(nameof(s),
924 SR.ArgumentNull_String);
926 throw new ArgumentOutOfRangeException(nameof(index),
927 SR.ArgumentOutOfRange_NeedNonNegNum);
929 throw new ArgumentOutOfRangeException(nameof(count),
930 SR.ArgumentOutOfRange_NeedNonNegNum);
931 if (index > s.Length - count)
932 throw new ArgumentOutOfRangeException(nameof(index),
933 SR.ArgumentOutOfRange_IndexCount);
934 Contract.EndContractBlock();
938 fixed (char* pChar = s)
940 int byteCount = GetByteCount(pChar + index, count);
942 return Array.Empty<byte>();
944 byte[] bytes = new byte[byteCount];
945 fixed (byte* pBytes = &bytes[0])
947 int bytesReceived = GetBytes(pChar + index, count, pBytes, byteCount);
948 Debug.Assert(byteCount == bytesReceived);
955 public virtual int GetBytes(String s, int charIndex, int charCount,
956 byte[] bytes, int byteIndex)
959 throw new ArgumentNullException(nameof(s));
960 Contract.EndContractBlock();
961 return GetBytes(s.ToCharArray(), charIndex, charCount, bytes, byteIndex);
964 // This is our internal workhorse
965 // Always validate parameters before calling internal version, which will only assert.
966 internal virtual unsafe int GetBytes(char* chars, int charCount,
967 byte* bytes, int byteCount, EncoderNLS encoder)
969 return GetBytes(chars, charCount, bytes, byteCount);
972 // We expect this to be the workhorse for NLS Encodings, but for existing
973 // ones we need a working (if slow) default implementation)
975 // WARNING WARNING WARNING
977 // WARNING: If this breaks it could be a security threat. Obviously we
978 // call this internally, so you need to make sure that your pointers, counts
979 // and indexes are correct when you call this method.
981 // In addition, we have internal code, which will be marked as "safe" calling
982 // this code. However this code is dependent upon the implementation of an
983 // external GetBytes() method, which could be overridden by a third party and
984 // the results of which cannot be guaranteed. We use that result to copy
985 // the byte[] to our byte* output buffer. If the result count was wrong, we
986 // could easily overflow our output buffer. Therefore we do an extra test
987 // when we copy the buffer so that we don't overflow byteCount either.
989 [CLSCompliant(false)]
990 public virtual unsafe int GetBytes(char* chars, int charCount,
991 byte* bytes, int byteCount)
993 // Validate input parameters
994 if (bytes == null || chars == null)
995 throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
996 SR.ArgumentNull_Array);
998 if (charCount < 0 || byteCount < 0)
999 throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)),
1000 SR.ArgumentOutOfRange_NeedNonNegNum);
1001 Contract.EndContractBlock();
1003 // Get the char array to convert
1004 char[] arrChar = new char[charCount];
1007 for (index = 0; index < charCount; index++)
1008 arrChar[index] = chars[index];
1010 // Get the byte array to fill
1011 byte[] arrByte = new byte[byteCount];
1014 int result = GetBytes(arrChar, 0, charCount, arrByte, 0);
1016 Debug.Assert(result <= byteCount, "[Encoding.GetBytes]Returned more bytes than we have space for");
1018 // Copy the byte array
1019 // WARNING: We MUST make sure that we don't copy too many bytes. We can't
1020 // rely on result because it could be a 3rd party implementation. We need
1021 // to make sure we never copy more than byteCount bytes no matter the value
1023 if (result < byteCount)
1026 // Copy the data, don't overrun our array!
1027 for (index = 0; index < byteCount; index++)
1028 bytes[index] = arrByte[index];
1033 // Returns the number of characters produced by decoding the given byte
1037 public virtual int GetCharCount(byte[] bytes)
1041 throw new ArgumentNullException(nameof(bytes),
1042 SR.ArgumentNull_Array);
1044 Contract.EndContractBlock();
1045 return GetCharCount(bytes, 0, bytes.Length);
1048 // Returns the number of characters produced by decoding a range of bytes
1052 public abstract int GetCharCount(byte[] bytes, int index, int count);
1054 // We expect this to be the workhorse for NLS Encodings, but for existing
1055 // ones we need a working (if slow) default implementation)
1057 [CLSCompliant(false)]
1058 public virtual unsafe int GetCharCount(byte* bytes, int count)
1060 // Validate input parameters
1062 throw new ArgumentNullException(nameof(bytes),
1063 SR.ArgumentNull_Array);
1066 throw new ArgumentOutOfRangeException(nameof(count),
1067 SR.ArgumentOutOfRange_NeedNonNegNum);
1068 Contract.EndContractBlock();
1070 byte[] arrbyte = new byte[count];
1073 for (index = 0; index < count; index++)
1074 arrbyte[index] = bytes[index];
1076 return GetCharCount(arrbyte, 0, count);
1079 // This is our internal workhorse
1080 // Always validate parameters before calling internal version, which will only assert.
1081 internal virtual unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder)
1083 return GetCharCount(bytes, count);
1086 // Returns a character array containing the decoded representation of a
1087 // given byte array.
1090 public virtual char[] GetChars(byte[] bytes)
1094 throw new ArgumentNullException(nameof(bytes),
1095 SR.ArgumentNull_Array);
1097 Contract.EndContractBlock();
1098 return GetChars(bytes, 0, bytes.Length);
1101 // Returns a character array containing the decoded representation of a
1102 // range of bytes in a byte array.
1105 public virtual char[] GetChars(byte[] bytes, int index, int count)
1107 char[] result = new char[GetCharCount(bytes, index, count)];
1108 GetChars(bytes, index, count, result, 0);
1112 // Decodes a range of bytes in a byte array into a range of characters in a
1113 // character array. An exception occurs if the character array is not large
1114 // enough to hold the complete decoding of the bytes. The
1115 // GetCharCount method can be used to determine the exact number of
1116 // characters that will be produced for a given range of bytes.
1117 // Alternatively, the GetMaxCharCount method can be used to
1118 // determine the maximum number of characterss that will be produced for a
1119 // given number of bytes, regardless of the actual byte values.
1122 public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount,
1123 char[] chars, int charIndex);
1126 // We expect this to be the workhorse for NLS Encodings, but for existing
1127 // ones we need a working (if slow) default implementation)
1129 // WARNING WARNING WARNING
1131 // WARNING: If this breaks it could be a security threat. Obviously we
1132 // call this internally, so you need to make sure that your pointers, counts
1133 // and indexes are correct when you call this method.
1135 // In addition, we have internal code, which will be marked as "safe" calling
1136 // this code. However this code is dependent upon the implementation of an
1137 // external GetChars() method, which could be overridden by a third party and
1138 // the results of which cannot be guaranteed. We use that result to copy
1139 // the char[] to our char* output buffer. If the result count was wrong, we
1140 // could easily overflow our output buffer. Therefore we do an extra test
1141 // when we copy the buffer so that we don't overflow charCount either.
1143 [CLSCompliant(false)]
1144 public virtual unsafe int GetChars(byte* bytes, int byteCount,
1145 char* chars, int charCount)
1147 // Validate input parameters
1148 if (chars == null || bytes == null)
1149 throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
1150 SR.ArgumentNull_Array);
1152 if (byteCount < 0 || charCount < 0)
1153 throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
1154 SR.ArgumentOutOfRange_NeedNonNegNum);
1155 Contract.EndContractBlock();
1157 // Get the byte array to convert
1158 byte[] arrByte = new byte[byteCount];
1161 for (index = 0; index < byteCount; index++)
1162 arrByte[index] = bytes[index];
1164 // Get the char array to fill
1165 char[] arrChar = new char[charCount];
1168 int result = GetChars(arrByte, 0, byteCount, arrChar, 0);
1170 Debug.Assert(result <= charCount, "[Encoding.GetChars]Returned more chars than we have space for");
1172 // Copy the char array
1173 // WARNING: We MUST make sure that we don't copy too many chars. We can't
1174 // rely on result because it could be a 3rd party implementation. We need
1175 // to make sure we never copy more than charCount chars no matter the value
1177 if (result < charCount)
1180 // Copy the data, don't overrun our array!
1181 for (index = 0; index < charCount; index++)
1182 chars[index] = arrChar[index];
1188 // This is our internal workhorse
1189 // Always validate parameters before calling internal version, which will only assert.
1190 internal virtual unsafe int GetChars(byte* bytes, int byteCount,
1191 char* chars, int charCount, DecoderNLS decoder)
1193 return GetChars(bytes, byteCount, chars, charCount);
1197 [CLSCompliant(false)]
1198 public unsafe string GetString(byte* bytes, int byteCount)
1201 throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
1204 throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_NeedNonNegNum);
1205 Contract.EndContractBlock();
1207 return String.CreateStringFromEncoding(bytes, byteCount, this);
1210 // Returns the code page identifier of this encoding. The returned value is
1211 // an integer between 0 and 65535 if the encoding has a code page
1212 // identifier, or -1 if the encoding does not represent a code page.
1215 public virtual int CodePage
1223 // IsAlwaysNormalized
1224 // Returns true if the encoding is always normalized for the specified encoding form
1226 public bool IsAlwaysNormalized()
1228 return this.IsAlwaysNormalized(NormalizationForm.FormC);
1232 public virtual bool IsAlwaysNormalized(NormalizationForm form)
1234 // Assume false unless the encoding knows otherwise
1238 // Returns a Decoder object for this encoding. The returned object
1239 // can be used to decode a sequence of bytes into a sequence of characters.
1240 // Contrary to the GetChars family of methods, a Decoder can
1241 // convert partial sequences of bytes into partial sequences of characters
1242 // by maintaining the appropriate state between the conversions.
1244 // This default implementation returns a Decoder that simply
1245 // forwards calls to the GetCharCount and GetChars methods to
1246 // the corresponding methods of this encoding. Encodings that require state
1247 // to be maintained between successive conversions should override this
1248 // method and return an instance of an appropriate Decoder
1252 public virtual Decoder GetDecoder()
1254 return new DefaultDecoder(this);
1257 // Returns an Encoder object for this encoding. The returned object
1258 // can be used to encode a sequence of characters into a sequence of bytes.
1259 // Contrary to the GetBytes family of methods, an Encoder can
1260 // convert partial sequences of characters into partial sequences of bytes
1261 // by maintaining the appropriate state between the conversions.
1263 // This default implementation returns an Encoder that simply
1264 // forwards calls to the GetByteCount and GetBytes methods to
1265 // the corresponding methods of this encoding. Encodings that require state
1266 // to be maintained between successive conversions should override this
1267 // method and return an instance of an appropriate Encoder
1271 public virtual Encoder GetEncoder()
1273 return new DefaultEncoder(this);
1276 // Returns the maximum number of bytes required to encode a given number of
1277 // characters. This method can be used to determine an appropriate buffer
1278 // size for byte arrays passed to the GetBytes method of this
1279 // encoding or the GetBytes method of an Encoder for this
1280 // encoding. All encodings must guarantee that no buffer overflow
1281 // exceptions will occur if buffers are sized according to the results of
1284 // WARNING: If you're using something besides the default replacement encoder fallback,
1285 // then you could have more bytes than this returned from an actual call to GetBytes().
1288 public abstract int GetMaxByteCount(int charCount);
1290 // Returns the maximum number of characters produced by decoding a given
1291 // number of bytes. This method can be used to determine an appropriate
1292 // buffer size for character arrays passed to the GetChars method of
1293 // this encoding or the GetChars method of a Decoder for this
1294 // encoding. All encodings must guarantee that no buffer overflow
1295 // exceptions will occur if buffers are sized according to the results of
1299 public abstract int GetMaxCharCount(int byteCount);
1301 // Returns a string containing the decoded representation of a given byte
1305 public virtual String GetString(byte[] bytes)
1308 throw new ArgumentNullException(nameof(bytes),
1309 SR.ArgumentNull_Array);
1310 Contract.EndContractBlock();
1312 return GetString(bytes, 0, bytes.Length);
1315 // Returns a string containing the decoded representation of a range of
1316 // bytes in a byte array.
1318 // Internally we override this for performance
1321 public virtual String GetString(byte[] bytes, int index, int count)
1323 return new String(GetChars(bytes, index, count));
1326 // Returns an encoding for Unicode format. The returned encoding will be
1327 // an instance of the UnicodeEncoding class.
1329 // It will use little endian byte order, but will detect
1330 // input in big endian if it finds a byte order mark per Unicode 2.0.
1332 public static Encoding Unicode => UnicodeEncoding.s_littleEndianDefault;
1334 // Returns an encoding for Unicode format. The returned encoding will be
1335 // an instance of the UnicodeEncoding class.
1337 // It will use big endian byte order, but will detect
1338 // input in little endian if it finds a byte order mark per Unicode 2.0.
1340 public static Encoding BigEndianUnicode => UnicodeEncoding.s_bigEndianDefault;
1342 // Returns an encoding for the UTF-7 format. The returned encoding will be
1343 // an instance of the UTF7Encoding class.
1345 public static Encoding UTF7 => UTF7Encoding.s_default;
1347 // Returns an encoding for the UTF-8 format. The returned encoding will be
1348 // an instance of the UTF8Encoding class.
1350 public static Encoding UTF8 => UTF8Encoding.s_default;
1352 // Returns an encoding for the UTF-32 format. The returned encoding will be
1353 // an instance of the UTF32Encoding class.
1355 public static Encoding UTF32 => UTF32Encoding.s_default;
1357 // Returns an encoding for the UTF-32 format. The returned encoding will be
1358 // an instance of the UTF32Encoding class.
1360 // It will use big endian byte order.
1362 private static Encoding BigEndianUTF32 => UTF32Encoding.s_bigEndianDefault;
1364 public override bool Equals(Object value)
1366 Encoding that = value as Encoding;
1368 return (m_codePage == that.m_codePage) &&
1369 (EncoderFallback.Equals(that.EncoderFallback)) &&
1370 (DecoderFallback.Equals(that.DecoderFallback));
1375 public override int GetHashCode()
1377 return m_codePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode();
1380 internal virtual char[] GetBestFitUnicodeToBytesData()
1382 // Normally we don't have any best fit data.
1383 return Array.Empty<char>();
1386 internal virtual char[] GetBestFitBytesToUnicodeData()
1388 // Normally we don't have any best fit data.
1389 return Array.Empty<char>();
1392 internal void ThrowBytesOverflow()
1394 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1395 // This happens if user has implimented an encoder fallback with a broken GetMaxCharCount
1396 throw new ArgumentException(
1397 SR.Format(SR.Argument_EncodingConversionOverflowBytes, EncodingName, EncoderFallback.GetType()), "bytes");
1400 internal void ThrowBytesOverflow(EncoderNLS encoder, bool nothingEncoded)
1402 if (encoder == null || encoder.m_throwOnOverflow || nothingEncoded)
1404 if (encoder != null && encoder.InternalHasFallbackBuffer)
1405 encoder.FallbackBuffer.InternalReset();
1406 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1407 // This happens if user has implimented an encoder fallback with a broken GetMaxCharCount
1408 ThrowBytesOverflow();
1411 // If we didn't throw, we are in convert and have to remember our flushing
1412 encoder.ClearMustFlush();
1415 internal void ThrowCharsOverflow()
1417 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1418 // This happens if user has implimented a decoder fallback with a broken GetMaxCharCount
1419 throw new ArgumentException(
1420 SR.Format(SR.Argument_EncodingConversionOverflowChars, EncodingName, DecoderFallback.GetType()), "chars");
1423 internal void ThrowCharsOverflow(DecoderNLS decoder, bool nothingDecoded)
1425 if (decoder == null || decoder.m_throwOnOverflow || nothingDecoded)
1427 if (decoder != null && decoder.InternalHasFallbackBuffer)
1428 decoder.FallbackBuffer.InternalReset();
1430 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1431 // This happens if user has implimented a decoder fallback with a broken GetMaxCharCount
1432 ThrowCharsOverflow();
1435 // If we didn't throw, we are in convert and have to remember our flushing
1436 decoder.ClearMustFlush();
1439 internal sealed class DefaultEncoder : Encoder, IObjectReference, ISerializable
1441 private Encoding m_encoding;
1443 public DefaultEncoder(Encoding encoding)
1445 m_encoding = encoding;
1448 public Object GetRealObject(StreamingContext context)
1450 throw new PlatformNotSupportedException();
1453 // ISerializable implementation, get data for this object
1454 void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
1456 throw new PlatformNotSupportedException();
1459 // Returns the number of bytes the next call to GetBytes will
1460 // produce if presented with the given range of characters and the given
1461 // value of the flush parameter. The returned value takes into
1462 // account the state in which the encoder was left following the last call
1463 // to GetBytes. The state of the encoder is not affected by a call
1467 public override int GetByteCount(char[] chars, int index, int count, bool flush)
1469 return m_encoding.GetByteCount(chars, index, count);
1472 [SuppressMessage("Microsoft.Contracts", "CC1055")] // Skip extra error checking to avoid *potential* AppCompat problems.
1473 public unsafe override int GetByteCount(char* chars, int count, bool flush)
1475 return m_encoding.GetByteCount(chars, count);
1478 // Encodes a range of characters in a character array into a range of bytes
1479 // in a byte array. The method encodes charCount characters from
1480 // chars starting at index charIndex, storing the resulting
1481 // bytes in bytes starting at index byteIndex. The encoding
1482 // takes into account the state in which the encoder was left following the
1483 // last call to this method. The flush parameter indicates whether
1484 // the encoder should flush any shift-states and partial characters at the
1485 // end of the conversion. To ensure correct termination of a sequence of
1486 // blocks of encoded bytes, the last call to GetBytes should specify
1487 // a value of true for the flush parameter.
1489 // An exception occurs if the byte array is not large enough to hold the
1490 // complete encoding of the characters. The GetByteCount method can
1491 // be used to determine the exact number of bytes that will be produced for
1492 // a given range of characters. Alternatively, the GetMaxByteCount
1493 // method of the Encoding that produced this encoder can be used to
1494 // determine the maximum number of bytes that will be produced for a given
1495 // number of characters, regardless of the actual character values.
1498 public override int GetBytes(char[] chars, int charIndex, int charCount,
1499 byte[] bytes, int byteIndex, bool flush)
1501 return m_encoding.GetBytes(chars, charIndex, charCount, bytes, byteIndex);
1504 [SuppressMessage("Microsoft.Contracts", "CC1055")] // Skip extra error checking to avoid *potential* AppCompat problems.
1505 public unsafe override int GetBytes(char* chars, int charCount,
1506 byte* bytes, int byteCount, bool flush)
1508 return m_encoding.GetBytes(chars, charCount, bytes, byteCount);
1512 internal sealed class DefaultDecoder : Decoder, IObjectReference, ISerializable
1514 private Encoding m_encoding;
1516 public DefaultDecoder(Encoding encoding)
1518 m_encoding = encoding;
1521 public Object GetRealObject(StreamingContext context)
1523 throw new PlatformNotSupportedException();
1526 // ISerializable implementation
1527 void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
1529 throw new PlatformNotSupportedException();
1532 // Returns the number of characters the next call to GetChars will
1533 // produce if presented with the given range of bytes. The returned value
1534 // takes into account the state in which the decoder was left following the
1535 // last call to GetChars. The state of the decoder is not affected
1536 // by a call to this method.
1539 public override int GetCharCount(byte[] bytes, int index, int count)
1541 return GetCharCount(bytes, index, count, false);
1544 public override int GetCharCount(byte[] bytes, int index, int count, bool flush)
1546 return m_encoding.GetCharCount(bytes, index, count);
1549 [SuppressMessage("Microsoft.Contracts", "CC1055")] // Skip extra error checking to avoid *potential* AppCompat problems.
1550 public unsafe override int GetCharCount(byte* bytes, int count, bool flush)
1552 // By default just call the encoding version, no flush by default
1553 return m_encoding.GetCharCount(bytes, count);
1556 // Decodes a range of bytes in a byte array into a range of characters
1557 // in a character array. The method decodes byteCount bytes from
1558 // bytes starting at index byteIndex, storing the resulting
1559 // characters in chars starting at index charIndex. The
1560 // decoding takes into account the state in which the decoder was left
1561 // following the last call to this method.
1563 // An exception occurs if the character array is not large enough to
1564 // hold the complete decoding of the bytes. The GetCharCount method
1565 // can be used to determine the exact number of characters that will be
1566 // produced for a given range of bytes. Alternatively, the
1567 // GetMaxCharCount method of the Encoding that produced this
1568 // decoder can be used to determine the maximum number of characters that
1569 // will be produced for a given number of bytes, regardless of the actual
1573 public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
1574 char[] chars, int charIndex)
1576 return GetChars(bytes, byteIndex, byteCount, chars, charIndex, false);
1579 public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
1580 char[] chars, int charIndex, bool flush)
1582 return m_encoding.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
1585 [SuppressMessage("Microsoft.Contracts", "CC1055")] // Skip extra error checking to avoid *potential* AppCompat problems.
1586 public unsafe override int GetChars(byte* bytes, int byteCount,
1587 char* chars, int charCount, bool flush)
1589 // By default just call the encoding's version
1590 return m_encoding.GetChars(bytes, byteCount, chars, charCount);
1594 internal class EncodingCharBuffer
1596 private unsafe char* _chars;
1597 private unsafe char* _charStart;
1598 private unsafe char* _charEnd;
1599 private int _charCountResult = 0;
1600 private Encoding _enc;
1601 private DecoderNLS _decoder;
1602 private unsafe byte* _byteStart;
1603 private unsafe byte* _byteEnd;
1604 private unsafe byte* _bytes;
1605 private DecoderFallbackBuffer _fallbackBuffer;
1607 internal unsafe EncodingCharBuffer(Encoding enc, DecoderNLS decoder, char* charStart, int charCount,
1608 byte* byteStart, int byteCount)
1614 _charStart = charStart;
1615 _charEnd = charStart + charCount;
1617 _byteStart = byteStart;
1619 _byteEnd = byteStart + byteCount;
1621 if (_decoder == null)
1622 _fallbackBuffer = enc.DecoderFallback.CreateFallbackBuffer();
1624 _fallbackBuffer = _decoder.FallbackBuffer;
1626 // If we're getting chars or getting char count we don't expect to have
1627 // to remember fallbacks between calls (so it should be empty)
1628 Debug.Assert(_fallbackBuffer.Remaining == 0,
1629 "[Encoding.EncodingCharBuffer.EncodingCharBuffer]Expected empty fallback buffer for getchars/charcount");
1630 _fallbackBuffer.InternalInitialize(_bytes, _charEnd);
1633 internal unsafe bool AddChar(char ch, int numBytes)
1637 if (_chars >= _charEnd)
1640 _bytes -= numBytes; // Didn't encode these bytes
1641 _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw?
1642 return false; // No throw, but no store either
1651 internal unsafe bool AddChar(char ch)
1653 return AddChar(ch, 1);
1657 internal unsafe bool AddChar(char ch1, char ch2, int numBytes)
1659 // Need room for 2 chars
1660 if (_chars >= _charEnd - 1)
1663 _bytes -= numBytes; // Didn't encode these bytes
1664 _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw?
1665 return false; // No throw, but no store either
1667 return AddChar(ch1, numBytes) && AddChar(ch2, numBytes);
1670 internal unsafe void AdjustBytes(int count)
1675 internal unsafe bool MoreData
1679 return _bytes < _byteEnd;
1683 // Do we have count more bytes?
1684 internal unsafe bool EvenMoreData(int count)
1686 return (_bytes <= _byteEnd - count);
1689 // GetNextByte shouldn't be called unless the caller's already checked more data or even more data,
1690 // but we'll double check just to make sure.
1691 internal unsafe byte GetNextByte()
1693 Debug.Assert(_bytes < _byteEnd, "[EncodingCharBuffer.GetNextByte]Expected more date");
1694 if (_bytes >= _byteEnd)
1699 internal unsafe int BytesUsed
1703 return (int)(_bytes - _byteStart);
1707 internal unsafe bool Fallback(byte fallbackByte)
1710 byte[] byteBuffer = new byte[] { fallbackByte };
1712 // Do the fallback and add the data.
1713 return Fallback(byteBuffer);
1716 internal unsafe bool Fallback(byte byte1, byte byte2)
1719 byte[] byteBuffer = new byte[] { byte1, byte2 };
1721 // Do the fallback and add the data.
1722 return Fallback(byteBuffer);
1725 internal unsafe bool Fallback(byte byte1, byte byte2, byte byte3, byte byte4)
1728 byte[] byteBuffer = new byte[] { byte1, byte2, byte3, byte4 };
1730 // Do the fallback and add the data.
1731 return Fallback(byteBuffer);
1734 internal unsafe bool Fallback(byte[] byteBuffer)
1736 // Do the fallback and add the data.
1739 char* pTemp = _chars;
1740 if (_fallbackBuffer.InternalFallback(byteBuffer, _bytes, ref _chars) == false)
1743 _bytes -= byteBuffer.Length; // Didn't use how many ever bytes we're falling back
1744 _fallbackBuffer.InternalReset(); // We didn't use this fallback.
1745 _enc.ThrowCharsOverflow(_decoder, _chars == _charStart); // Throw?
1746 return false; // No throw, but no store either
1748 _charCountResult += unchecked((int)(_chars - pTemp));
1752 _charCountResult += _fallbackBuffer.InternalFallback(byteBuffer, _bytes);
1758 internal unsafe int Count
1762 return _charCountResult;
1767 internal class EncodingByteBuffer
1769 private unsafe byte* _bytes;
1770 private unsafe byte* _byteStart;
1771 private unsafe byte* _byteEnd;
1772 private unsafe char* _chars;
1773 private unsafe char* _charStart;
1774 private unsafe char* _charEnd;
1775 private int _byteCountResult = 0;
1776 private Encoding _enc;
1777 private EncoderNLS _encoder;
1778 internal EncoderFallbackBuffer fallbackBuffer;
1780 internal unsafe EncodingByteBuffer(Encoding inEncoding, EncoderNLS inEncoder,
1781 byte* inByteStart, int inByteCount, char* inCharStart, int inCharCount)
1784 _encoder = inEncoder;
1786 _charStart = inCharStart;
1787 _chars = inCharStart;
1788 _charEnd = inCharStart + inCharCount;
1790 _bytes = inByteStart;
1791 _byteStart = inByteStart;
1792 _byteEnd = inByteStart + inByteCount;
1794 if (_encoder == null)
1795 this.fallbackBuffer = _enc.EncoderFallback.CreateFallbackBuffer();
1798 this.fallbackBuffer = _encoder.FallbackBuffer;
1799 // If we're not converting we must not have data in our fallback buffer
1800 if (_encoder.m_throwOnOverflow && _encoder.InternalHasFallbackBuffer &&
1801 this.fallbackBuffer.Remaining > 0)
1802 throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty,
1803 _encoder.Encoding.EncodingName, _encoder.Fallback.GetType()));
1805 fallbackBuffer.InternalInitialize(_chars, _charEnd, _encoder, _bytes != null);
1808 internal unsafe bool AddByte(byte b, int moreBytesExpected)
1810 Debug.Assert(moreBytesExpected >= 0, "[EncodingByteBuffer.AddByte]expected non-negative moreBytesExpected");
1813 if (_bytes >= _byteEnd - moreBytesExpected)
1815 // Throw maybe. Check which buffer to back up (only matters if Converting)
1816 this.MovePrevious(true); // Throw if necessary
1817 return false; // No throw, but no store either
1826 internal unsafe bool AddByte(byte b1)
1828 return (AddByte(b1, 0));
1831 internal unsafe bool AddByte(byte b1, byte b2)
1833 return (AddByte(b1, b2, 0));
1836 internal unsafe bool AddByte(byte b1, byte b2, int moreBytesExpected)
1838 return (AddByte(b1, 1 + moreBytesExpected) && AddByte(b2, moreBytesExpected));
1841 internal unsafe bool AddByte(byte b1, byte b2, byte b3)
1843 return AddByte(b1, b2, b3, (int)0);
1846 internal unsafe bool AddByte(byte b1, byte b2, byte b3, int moreBytesExpected)
1848 return (AddByte(b1, 2 + moreBytesExpected) &&
1849 AddByte(b2, 1 + moreBytesExpected) &&
1850 AddByte(b3, moreBytesExpected));
1853 internal unsafe bool AddByte(byte b1, byte b2, byte b3, byte b4)
1855 return (AddByte(b1, 3) &&
1861 internal unsafe void MovePrevious(bool bThrow)
1863 if (fallbackBuffer.bFallingBack)
1864 fallbackBuffer.MovePrevious(); // don't use last fallback
1867 Debug.Assert(_chars > _charStart ||
1868 ((bThrow == true) && (_bytes == _byteStart)),
1869 "[EncodingByteBuffer.MovePrevious]expected previous data or throw");
1870 if (_chars > _charStart)
1871 _chars--; // don't use last char
1875 _enc.ThrowBytesOverflow(_encoder, _bytes == _byteStart); // Throw? (and reset fallback if not converting)
1878 internal unsafe bool Fallback(char charFallback)
1881 return fallbackBuffer.InternalFallback(charFallback, ref _chars);
1884 internal unsafe bool MoreData
1888 // See if fallbackBuffer is not empty or if there's data left in chars buffer.
1889 return ((fallbackBuffer.Remaining > 0) || (_chars < _charEnd));
1893 internal unsafe char GetNextChar()
1895 // See if there's something in our fallback buffer
1896 char cReturn = fallbackBuffer.InternalGetNextChar();
1898 // Nothing in the fallback buffer, return our normal data.
1901 if (_chars < _charEnd)
1902 cReturn = *(_chars++);
1908 internal unsafe int CharsUsed
1912 return (int)(_chars - _charStart);
1916 internal unsafe int Count
1920 return _byteCountResult;