1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 using System.Diagnostics;
6 using System.Globalization;
7 using System.Diagnostics.Contracts;
8 using System.Threading;
9 using System.Runtime.Serialization;
10 using System.Diagnostics.CodeAnalysis;
14 // This abstract base class represents a character encoding. The class provides
15 // methods to convert arrays and strings of Unicode characters to and from
16 // arrays of bytes. A number of Encoding implementations are provided in
17 // the System.Text package, including:
19 // ASCIIEncoding, which encodes Unicode characters as single 7-bit
20 // ASCII characters. This encoding only supports character values between 0x00
22 // BaseCodePageEncoding, which encapsulates a Windows code page. Any
23 // installed code page can be accessed through this encoding, and conversions
24 // are performed using the WideCharToMultiByte and
25 // MultiByteToWideChar Windows API functions.
26 // UnicodeEncoding, which encodes each Unicode character as two
27 // consecutive bytes. Both little-endian (code page 1200) and big-endian (code
28 // page 1201) encodings are recognized.
29 // UTF7Encoding, which encodes Unicode characters using the UTF-7
30 // encoding (UTF-7 stands for UCS Transformation Format, 7-bit form). This
31 // encoding supports all Unicode character values, and can also be accessed
32 // as code page 65000.
33 // UTF8Encoding, which encodes Unicode characters using the UTF-8
34 // encoding (UTF-8 stands for UCS Transformation Format, 8-bit form). This
35 // encoding supports all Unicode character values, and can also be accessed
36 // as code page 65001.
37 // UTF32Encoding, both 12000 (little endian) & 12001 (big endian)
39 // In addition to directly instantiating Encoding objects, an
40 // application can use the ForCodePage, GetASCII,
41 // GetDefault, GetUnicode, GetUTF7, and GetUTF8
42 // methods in this class to obtain encodings.
44 // Through an encoding, the GetBytes method is used to convert arrays
45 // of characters to arrays of bytes, and the GetChars method is used to
46 // convert arrays of bytes to arrays of characters. The GetBytes and
47 // GetChars methods maintain no state between conversions, and are
48 // generally intended for conversions of complete blocks of bytes and
49 // characters in one operation. When the data to be converted is only available
50 // in sequential blocks (such as data read from a stream) or when the amount of
51 // data is so large that it needs to be divided into smaller blocks, an
52 // application may choose to use a Decoder or an Encoder to
53 // perform the conversion. Decoders and encoders allow sequential blocks of
54 // data to be converted and they maintain the state required to support
55 // conversions of data that spans adjacent blocks. Decoders and encoders are
56 // obtained using the GetDecoder and GetEncoder methods.
58 // The core GetBytes and GetChars methods require the caller
59 // to provide the destination buffer and ensure that the buffer is large enough
60 // to hold the entire result of the conversion. When using these methods,
61 // either directly on an Encoding object or on an associated
62 // Decoder or Encoder, an application can use one of two methods
63 // to allocate destination buffers.
65 // The GetByteCount and GetCharCount methods can be used to
66 // compute the exact size of the result of a particular conversion, and an
67 // appropriately sized buffer for that conversion can then be allocated.
68 // The GetMaxByteCount and GetMaxCharCount methods can be
69 // be used to compute the maximum possible size of a conversion of a given
70 // number of bytes or characters, and a buffer of that size can then be reused
71 // for multiple conversions.
73 // The first method generally uses less memory, whereas the second method
74 // generally executes faster.
77 public abstract class Encoding : ICloneable
79 // For netcore we use UTF8 as default encoding since ANSI isn't available
80 private static readonly UTF8Encoding.UTF8EncodingSealed s_defaultEncoding = new UTF8Encoding.UTF8EncodingSealed(encoderShouldEmitUTF8Identifier: false);
82 // Returns an encoding for the system's current ANSI code page.
83 public static Encoding Default => s_defaultEncoding;
86 // The following values are from mlang.idl. These values
87 // should be in sync with those in mlang.idl.
89 internal const int MIMECONTF_MAILNEWS = 0x00000001;
90 internal const int MIMECONTF_BROWSER = 0x00000002;
91 internal const int MIMECONTF_SAVABLE_MAILNEWS = 0x00000100;
92 internal const int MIMECONTF_SAVABLE_BROWSER = 0x00000200;
94 // Special Case Code Pages
95 private const int CodePageDefault = 0;
96 private const int CodePageNoOEM = 1; // OEM Code page not supported
97 private const int CodePageNoMac = 2; // MAC code page not supported
98 private const int CodePageNoThread = 3; // Thread code page not supported
99 private const int CodePageNoSymbol = 42; // Symbol code page not supported
100 private const int CodePageUnicode = 1200; // Unicode
101 private const int CodePageBigEndian = 1201; // Big Endian Unicode
102 private const int CodePageWindows1252 = 1252; // Windows 1252 code page
104 // 20936 has same code page as 10008, so we'll special case it
105 private const int CodePageMacGB2312 = 10008;
106 private const int CodePageGB2312 = 20936;
107 private const int CodePageMacKorean = 10003;
108 private const int CodePageDLLKorean = 20949;
110 // ISO 2022 Code Pages
111 private const int ISO2022JP = 50220;
112 private const int ISO2022JPESC = 50221;
113 private const int ISO2022JPSISO = 50222;
114 private const int ISOKorean = 50225;
115 private const int ISOSimplifiedCN = 50227;
116 private const int EUCJP = 51932;
117 private const int ChineseHZ = 52936; // HZ has ~}~{~~ sequences
119 // 51936 is the same as 936
120 private const int DuplicateEUCCN = 51936;
121 private const int EUCCN = 936;
123 private const int EUCKR = 51949;
125 // Latin 1 & ASCII Code Pages
126 internal const int CodePageASCII = 20127; // ASCII
127 internal const int ISO_8859_1 = 28591; // Latin1
130 private const int ISCIIAssemese = 57006;
131 private const int ISCIIBengali = 57003;
132 private const int ISCIIDevanagari = 57002;
133 private const int ISCIIGujarathi = 57010;
134 private const int ISCIIKannada = 57008;
135 private const int ISCIIMalayalam = 57009;
136 private const int ISCIIOriya = 57007;
137 private const int ISCIIPanjabi = 57011;
138 private const int ISCIITamil = 57004;
139 private const int ISCIITelugu = 57005;
142 private const int GB18030 = 54936;
145 private const int ISO_8859_8I = 38598;
146 private const int ISO_8859_8_Visual = 28598;
148 // 50229 is currently unsupported // "Chinese Traditional (ISO-2022)"
149 private const int ENC50229 = 50229;
151 // Special code pages
152 private const int CodePageUTF7 = 65000;
153 private const int CodePageUTF8 = 65001;
154 private const int CodePageUTF32 = 12000;
155 private const int CodePageUTF32BE = 12001;
157 internal int _codePage = 0;
159 internal CodePageDataItem _dataItem = null;
161 // Because of encoders we may be read only
162 [OptionalField(VersionAdded = 2)]
163 private bool _isReadOnly = true;
165 // Encoding (encoder) fallback
166 internal EncoderFallback encoderFallback = null;
167 internal DecoderFallback decoderFallback = null;
169 protected Encoding() : this(0)
174 protected Encoding(int codePage)
176 // Validate code page
179 throw new ArgumentOutOfRangeException(nameof(codePage));
181 Contract.EndContractBlock();
183 // Remember code page
184 _codePage = codePage;
186 // Use default encoder/decoder fallbacks
187 this.SetDefaultFallbacks();
190 // This constructor is needed to allow any sub-classing implementation to provide encoder/decoder fallback objects
191 // because the encoding object is always created as read-only object and don't allow setting encoder/decoder fallback
192 // after the creation is done.
193 protected Encoding(int codePage, EncoderFallback encoderFallback, DecoderFallback decoderFallback)
195 // Validate code page
198 throw new ArgumentOutOfRangeException(nameof(codePage));
200 Contract.EndContractBlock();
202 // Remember code page
203 _codePage = codePage;
205 this.encoderFallback = encoderFallback ?? new InternalEncoderBestFitFallback(this);
206 this.decoderFallback = decoderFallback ?? new InternalDecoderBestFitFallback(this);
209 // Default fallback that we'll use.
210 internal virtual void SetDefaultFallbacks()
212 // For UTF-X encodings, we use a replacement fallback with an "\xFFFD" string,
213 // For ASCII we use "?" replacement fallback, etc.
214 encoderFallback = new InternalEncoderBestFitFallback(this);
215 decoderFallback = new InternalDecoderBestFitFallback(this);
218 // Converts a byte array from one encoding to another. The bytes in the
219 // bytes array are converted from srcEncoding to
220 // dstEncoding, and the returned value is a new byte array
221 // containing the result of the conversion.
224 public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
228 throw new ArgumentNullException(nameof(bytes));
229 Contract.Ensures(Contract.Result<byte[]>() != null);
231 return Convert(srcEncoding, dstEncoding, bytes, 0, bytes.Length);
234 // Converts a range of bytes in a byte array from one encoding to another.
235 // This method converts count bytes from bytes starting at
236 // index index from srcEncoding to dstEncoding, and
237 // returns a new byte array containing the result of the conversion.
240 public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
241 byte[] bytes, int index, int count)
243 if (srcEncoding == null || dstEncoding == null)
245 throw new ArgumentNullException((srcEncoding == null ? nameof(srcEncoding) : nameof(dstEncoding)),
246 SR.ArgumentNull_Array);
250 throw new ArgumentNullException(nameof(bytes),
251 SR.ArgumentNull_Array);
253 Contract.Ensures(Contract.Result<byte[]>() != null);
255 return dstEncoding.GetBytes(srcEncoding.GetChars(bytes, index, count));
258 public static void RegisterProvider(EncodingProvider provider)
260 // Parameters validated inside EncodingProvider
261 EncodingProvider.AddProvider(provider);
265 public static Encoding GetEncoding(int codepage)
267 Encoding result = EncodingProvider.GetEncodingFromProvider(codepage);
272 // NOTE: If you add a new encoding that can be retrieved by codepage, be sure to
273 // add the corresponding item in EncodingTable.
274 // Otherwise, the code below will throw exception when trying to call
275 // EncodingTable.GetDataItem().
277 if (codepage < 0 || codepage > 65535)
279 throw new ArgumentOutOfRangeException(
280 nameof(codepage), SR.Format(SR.ArgumentOutOfRange_Range, 0, 65535));
283 Contract.EndContractBlock();
287 case CodePageDefault: return Default; // 0
288 case CodePageUnicode: return Unicode; // 1200
289 case CodePageBigEndian: return BigEndianUnicode; // 1201
290 case CodePageUTF32: return UTF32; // 12000
291 case CodePageUTF32BE: return BigEndianUTF32; // 12001
292 case CodePageUTF7: return UTF7; // 65000
293 case CodePageUTF8: return UTF8; // 65001
294 case CodePageASCII: return ASCII; // 20127
295 case ISO_8859_1: return Latin1; // 28591
297 // We don't allow the following special code page values that Win32 allows.
298 case CodePageNoOEM: // 1 CP_OEMCP
299 case CodePageNoMac: // 2 CP_MACCP
300 case CodePageNoThread: // 3 CP_THREAD_ACP
301 case CodePageNoSymbol: // 42 CP_SYMBOL
302 throw new ArgumentException(SR.Format(SR.Argument_CodepageNotSupported, codepage), nameof(codepage));
305 // Is it a valid code page?
306 if (EncodingTable.GetCodePageDataItem(codepage) == null)
308 throw new NotSupportedException(
309 SR.Format(SR.NotSupported_NoCodepageData, codepage));
316 public static Encoding GetEncoding(int codepage,
317 EncoderFallback encoderFallback, DecoderFallback decoderFallback)
319 Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(codepage, encoderFallback, decoderFallback);
321 if (baseEncoding != null)
324 // Get the default encoding (which is cached and read only)
325 baseEncoding = GetEncoding(codepage);
327 // Clone it and set the fallback
328 Encoding fallbackEncoding = (Encoding)baseEncoding.Clone();
329 fallbackEncoding.EncoderFallback = encoderFallback;
330 fallbackEncoding.DecoderFallback = decoderFallback;
332 return fallbackEncoding;
335 // Returns an Encoding object for a given name or a given code page value.
338 public static Encoding GetEncoding(String name)
340 Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(name);
341 if (baseEncoding != null)
345 // NOTE: If you add a new encoding that can be requested by name, be sure to
346 // add the corresponding item in EncodingTable.
347 // Otherwise, the code below will throw exception when trying to call
348 // EncodingTable.GetCodePageFromName().
350 return GetEncoding(EncodingTable.GetCodePageFromName(name));
353 // Returns an Encoding object for a given name or a given code page value.
356 public static Encoding GetEncoding(String name,
357 EncoderFallback encoderFallback, DecoderFallback decoderFallback)
359 Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(name, encoderFallback, decoderFallback);
360 if (baseEncoding != null)
364 // NOTE: If you add a new encoding that can be requested by name, be sure to
365 // add the corresponding item in EncodingTable.
366 // Otherwise, the code below will throw exception when trying to call
367 // EncodingTable.GetCodePageFromName().
369 return (GetEncoding(EncodingTable.GetCodePageFromName(name), encoderFallback, decoderFallback));
372 // Return a list of all EncodingInfo objects describing all of our encodings
374 public static EncodingInfo[] GetEncodings()
376 return EncodingTable.GetEncodings();
380 public virtual byte[] GetPreamble()
382 return Array.Empty<byte>();
385 private void GetDataItem()
387 if (_dataItem == null)
389 _dataItem = EncodingTable.GetCodePageDataItem(_codePage);
390 if (_dataItem == null)
392 throw new NotSupportedException(SR.Format(SR.NotSupported_NoCodepageData, _codePage));
397 // Returns the name for this encoding that can be used with mail agent body tags.
398 // If the encoding may not be used, the string is empty.
400 public virtual String BodyName
404 if (_dataItem == null)
408 return (_dataItem.BodyName);
412 // Returns the human-readable description of the encoding ( e.g. Hebrew (DOS)).
414 public virtual String EncodingName
418 string encodingName = GetLocalizedEncodingNameResource(this.CodePage);
419 if (encodingName == null)
421 throw new NotSupportedException(SR.Format(SR.MissingEncodingNameResource, this.CodePage));
424 if (encodingName.StartsWith("Globalization_cp_", StringComparison.Ordinal))
426 // On ProjectN, resource strings are stripped from retail builds and replaced by
427 // their identifier names. Since this property is meant to be a localized string,
428 // but we don't localize ProjectN, we specifically need to do something reasonable
429 // in this case. This currently returns the English name of the encoding from a
430 // static data table.
431 encodingName = EncodingTable.GetCodePageDataItem(this.CodePage).EnglishName;
432 if (encodingName == null)
434 throw new NotSupportedException(SR.Format(SR.MissingEncodingNameResource, this.WebName, this.CodePage));
441 private static string GetLocalizedEncodingNameResource(int codePage)
445 case 1200: return SR.Globalization_cp_1200;
446 case 1201: return SR.Globalization_cp_1201;
447 case 12000: return SR.Globalization_cp_12000;
448 case 12001: return SR.Globalization_cp_12001;
449 case 20127: return SR.Globalization_cp_20127;
450 case 28591: return SR.Globalization_cp_28591;
451 case 65000: return SR.Globalization_cp_65000;
452 case 65001: return SR.Globalization_cp_65001;
453 default: return null;
457 public virtual String EncodingName
461 return SR.GetResourceString("Globalization_cp_" + _codePage.ToString());
465 // Returns the name for this encoding that can be used with mail agent header
466 // tags. If the encoding may not be used, the string is empty.
468 public virtual String HeaderName
472 if (_dataItem == null)
476 return (_dataItem.HeaderName);
480 // Returns the IANA preferred name for this encoding.
481 public virtual String WebName
485 if (_dataItem == null)
489 return (_dataItem.WebName);
493 // Returns the windows code page that most closely corresponds to this encoding.
495 public virtual int WindowsCodePage
499 if (_dataItem == null)
503 return (_dataItem.UIFamilyCodePage);
508 // True if and only if the encoding is used for display by browsers clients.
510 public virtual bool IsBrowserDisplay
514 if (_dataItem == null)
518 return ((_dataItem.Flags & MIMECONTF_BROWSER) != 0);
522 // True if and only if the encoding is used for saving by browsers clients.
524 public virtual bool IsBrowserSave
528 if (_dataItem == null)
532 return ((_dataItem.Flags & MIMECONTF_SAVABLE_BROWSER) != 0);
536 // True if and only if the encoding is used for display by mail and news clients.
538 public virtual bool IsMailNewsDisplay
542 if (_dataItem == null)
546 return ((_dataItem.Flags & MIMECONTF_MAILNEWS) != 0);
551 // True if and only if the encoding is used for saving documents by mail and
554 public virtual bool IsMailNewsSave
558 if (_dataItem == null)
562 return ((_dataItem.Flags & MIMECONTF_SAVABLE_MAILNEWS) != 0);
566 // True if and only if the encoding only uses single byte code points. (Ie, ASCII, 1252, etc)
568 public virtual bool IsSingleByte
577 public EncoderFallback EncoderFallback
581 return encoderFallback;
587 throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
590 throw new ArgumentNullException(nameof(value));
591 Contract.EndContractBlock();
593 encoderFallback = value;
598 public DecoderFallback DecoderFallback
602 return decoderFallback;
608 throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
611 throw new ArgumentNullException(nameof(value));
612 Contract.EndContractBlock();
614 decoderFallback = value;
619 public virtual Object Clone()
621 Encoding newEncoding = (Encoding)this.MemberwiseClone();
623 // New one should be readable
624 newEncoding._isReadOnly = false;
629 public bool IsReadOnly
633 return (_isReadOnly);
637 // Returns an encoding for the ASCII character set. The returned encoding
638 // will be an instance of the ASCIIEncoding class.
640 public static Encoding ASCII => ASCIIEncoding.s_default;
642 // Returns an encoding for the Latin1 character set. The returned encoding
643 // will be an instance of the Latin1Encoding class.
645 // This is for our optimizations
646 private static Encoding Latin1 => Latin1Encoding.s_default;
648 // Returns the number of bytes required to encode the given character
652 public virtual int GetByteCount(char[] chars)
656 throw new ArgumentNullException(nameof(chars),
657 SR.ArgumentNull_Array);
659 Contract.EndContractBlock();
661 return GetByteCount(chars, 0, chars.Length);
665 public virtual int GetByteCount(String s)
668 throw new ArgumentNullException(nameof(s));
669 Contract.EndContractBlock();
671 char[] chars = s.ToCharArray();
672 return GetByteCount(chars, 0, chars.Length);
675 // Returns the number of bytes required to encode a range of characters in
676 // a character array.
679 public abstract int GetByteCount(char[] chars, int index, int count);
681 // Returns the number of bytes required to encode a string range.
684 public int GetByteCount(string s, int index, int count)
687 throw new ArgumentNullException(nameof(s),
688 SR.ArgumentNull_String);
690 throw new ArgumentOutOfRangeException(nameof(index),
691 SR.ArgumentOutOfRange_NeedNonNegNum);
693 throw new ArgumentOutOfRangeException(nameof(count),
694 SR.ArgumentOutOfRange_NeedNonNegNum);
695 if (index > s.Length - count)
696 throw new ArgumentOutOfRangeException(nameof(index),
697 SR.ArgumentOutOfRange_IndexCount);
698 Contract.EndContractBlock();
702 fixed (char* pChar = s)
704 return GetByteCount(pChar + index, count);
709 // We expect this to be the workhorse for NLS encodings
710 // unfortunately for existing overrides, it has to call the [] version,
711 // which is really slow, so this method should be avoided if you're calling
712 // a 3rd party encoding.
714 [CLSCompliant(false)]
715 public virtual unsafe int GetByteCount(char* chars, int count)
717 // Validate input parameters
719 throw new ArgumentNullException(nameof(chars),
720 SR.ArgumentNull_Array);
723 throw new ArgumentOutOfRangeException(nameof(count),
724 SR.ArgumentOutOfRange_NeedNonNegNum);
725 Contract.EndContractBlock();
727 char[] arrChar = new char[count];
730 for (index = 0; index < count; index++)
731 arrChar[index] = chars[index];
733 return GetByteCount(arrChar, 0, count);
736 // For NLS Encodings, workhorse takes an encoder (may be null)
737 // Always validate parameters before calling internal version, which will only assert.
738 internal virtual unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
740 Debug.Assert(chars != null);
741 Debug.Assert(count >= 0);
743 return GetByteCount(chars, count);
746 // Returns a byte array containing the encoded representation of the given
750 public virtual byte[] GetBytes(char[] chars)
754 throw new ArgumentNullException(nameof(chars),
755 SR.ArgumentNull_Array);
757 Contract.EndContractBlock();
758 return GetBytes(chars, 0, chars.Length);
761 // Returns a byte array containing the encoded representation of a range
762 // of characters in a character array.
765 public virtual byte[] GetBytes(char[] chars, int index, int count)
767 byte[] result = new byte[GetByteCount(chars, index, count)];
768 GetBytes(chars, index, count, result, 0);
772 // Encodes a range of characters in a character array into a range of bytes
773 // in a byte array. An exception occurs if the byte array is not large
774 // enough to hold the complete encoding of the characters. The
775 // GetByteCount method can be used to determine the exact number of
776 // bytes that will be produced for a given range of characters.
777 // Alternatively, the GetMaxByteCount method can be used to
778 // determine the maximum number of bytes that will be produced for a given
779 // number of characters, regardless of the actual character values.
781 public abstract int GetBytes(char[] chars, int charIndex, int charCount,
782 byte[] bytes, int byteIndex);
784 // Returns a byte array containing the encoded representation of the given
788 public virtual byte[] GetBytes(String s)
791 throw new ArgumentNullException(nameof(s),
792 SR.ArgumentNull_String);
793 Contract.EndContractBlock();
795 int byteCount = GetByteCount(s);
796 byte[] bytes = new byte[byteCount];
797 int bytesReceived = GetBytes(s, 0, s.Length, bytes, 0);
798 Debug.Assert(byteCount == bytesReceived);
802 // Returns a byte array containing the encoded representation of the given
806 public byte[] GetBytes(string s, int index, int count)
809 throw new ArgumentNullException(nameof(s),
810 SR.ArgumentNull_String);
812 throw new ArgumentOutOfRangeException(nameof(index),
813 SR.ArgumentOutOfRange_NeedNonNegNum);
815 throw new ArgumentOutOfRangeException(nameof(count),
816 SR.ArgumentOutOfRange_NeedNonNegNum);
817 if (index > s.Length - count)
818 throw new ArgumentOutOfRangeException(nameof(index),
819 SR.ArgumentOutOfRange_IndexCount);
820 Contract.EndContractBlock();
824 fixed (char* pChar = s)
826 int byteCount = GetByteCount(pChar + index, count);
828 return Array.Empty<byte>();
830 byte[] bytes = new byte[byteCount];
831 fixed (byte* pBytes = &bytes[0])
833 int bytesReceived = GetBytes(pChar + index, count, pBytes, byteCount);
834 Debug.Assert(byteCount == bytesReceived);
841 public virtual int GetBytes(String s, int charIndex, int charCount,
842 byte[] bytes, int byteIndex)
845 throw new ArgumentNullException(nameof(s));
846 Contract.EndContractBlock();
847 return GetBytes(s.ToCharArray(), charIndex, charCount, bytes, byteIndex);
850 // This is our internal workhorse
851 // Always validate parameters before calling internal version, which will only assert.
852 internal virtual unsafe int GetBytes(char* chars, int charCount,
853 byte* bytes, int byteCount, EncoderNLS encoder)
855 return GetBytes(chars, charCount, bytes, byteCount);
858 // We expect this to be the workhorse for NLS Encodings, but for existing
859 // ones we need a working (if slow) default implementation)
861 // WARNING WARNING WARNING
863 // WARNING: If this breaks it could be a security threat. Obviously we
864 // call this internally, so you need to make sure that your pointers, counts
865 // and indexes are correct when you call this method.
867 // In addition, we have internal code, which will be marked as "safe" calling
868 // this code. However this code is dependent upon the implementation of an
869 // external GetBytes() method, which could be overridden by a third party and
870 // the results of which cannot be guaranteed. We use that result to copy
871 // the byte[] to our byte* output buffer. If the result count was wrong, we
872 // could easily overflow our output buffer. Therefore we do an extra test
873 // when we copy the buffer so that we don't overflow byteCount either.
875 [CLSCompliant(false)]
876 public virtual unsafe int GetBytes(char* chars, int charCount,
877 byte* bytes, int byteCount)
879 // Validate input parameters
880 if (bytes == null || chars == null)
881 throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
882 SR.ArgumentNull_Array);
884 if (charCount < 0 || byteCount < 0)
885 throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)),
886 SR.ArgumentOutOfRange_NeedNonNegNum);
887 Contract.EndContractBlock();
889 // Get the char array to convert
890 char[] arrChar = new char[charCount];
893 for (index = 0; index < charCount; index++)
894 arrChar[index] = chars[index];
896 // Get the byte array to fill
897 byte[] arrByte = new byte[byteCount];
900 int result = GetBytes(arrChar, 0, charCount, arrByte, 0);
902 Debug.Assert(result <= byteCount, "[Encoding.GetBytes]Returned more bytes than we have space for");
904 // Copy the byte array
905 // WARNING: We MUST make sure that we don't copy too many bytes. We can't
906 // rely on result because it could be a 3rd party implementation. We need
907 // to make sure we never copy more than byteCount bytes no matter the value
909 if (result < byteCount)
912 // Copy the data, don't overrun our array!
913 for (index = 0; index < byteCount; index++)
914 bytes[index] = arrByte[index];
919 // Returns the number of characters produced by decoding the given byte
923 public virtual int GetCharCount(byte[] bytes)
927 throw new ArgumentNullException(nameof(bytes),
928 SR.ArgumentNull_Array);
930 Contract.EndContractBlock();
931 return GetCharCount(bytes, 0, bytes.Length);
934 // Returns the number of characters produced by decoding a range of bytes
938 public abstract int GetCharCount(byte[] bytes, int index, int count);
940 // We expect this to be the workhorse for NLS Encodings, but for existing
941 // ones we need a working (if slow) default implementation)
943 [CLSCompliant(false)]
944 public virtual unsafe int GetCharCount(byte* bytes, int count)
946 // Validate input parameters
948 throw new ArgumentNullException(nameof(bytes),
949 SR.ArgumentNull_Array);
952 throw new ArgumentOutOfRangeException(nameof(count),
953 SR.ArgumentOutOfRange_NeedNonNegNum);
954 Contract.EndContractBlock();
956 byte[] arrbyte = new byte[count];
959 for (index = 0; index < count; index++)
960 arrbyte[index] = bytes[index];
962 return GetCharCount(arrbyte, 0, count);
965 // This is our internal workhorse
966 // Always validate parameters before calling internal version, which will only assert.
967 internal virtual unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder)
969 return GetCharCount(bytes, count);
972 // Returns a character array containing the decoded representation of a
976 public virtual char[] GetChars(byte[] bytes)
980 throw new ArgumentNullException(nameof(bytes),
981 SR.ArgumentNull_Array);
983 Contract.EndContractBlock();
984 return GetChars(bytes, 0, bytes.Length);
987 // Returns a character array containing the decoded representation of a
988 // range of bytes in a byte array.
991 public virtual char[] GetChars(byte[] bytes, int index, int count)
993 char[] result = new char[GetCharCount(bytes, index, count)];
994 GetChars(bytes, index, count, result, 0);
998 // Decodes a range of bytes in a byte array into a range of characters in a
999 // character array. An exception occurs if the character array is not large
1000 // enough to hold the complete decoding of the bytes. The
1001 // GetCharCount method can be used to determine the exact number of
1002 // characters that will be produced for a given range of bytes.
1003 // Alternatively, the GetMaxCharCount method can be used to
1004 // determine the maximum number of characterss that will be produced for a
1005 // given number of bytes, regardless of the actual byte values.
1008 public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount,
1009 char[] chars, int charIndex);
1012 // We expect this to be the workhorse for NLS Encodings, but for existing
1013 // ones we need a working (if slow) default implementation)
1015 // WARNING WARNING WARNING
1017 // WARNING: If this breaks it could be a security threat. Obviously we
1018 // call this internally, so you need to make sure that your pointers, counts
1019 // and indexes are correct when you call this method.
1021 // In addition, we have internal code, which will be marked as "safe" calling
1022 // this code. However this code is dependent upon the implementation of an
1023 // external GetChars() method, which could be overridden by a third party and
1024 // the results of which cannot be guaranteed. We use that result to copy
1025 // the char[] to our char* output buffer. If the result count was wrong, we
1026 // could easily overflow our output buffer. Therefore we do an extra test
1027 // when we copy the buffer so that we don't overflow charCount either.
1029 [CLSCompliant(false)]
1030 public virtual unsafe int GetChars(byte* bytes, int byteCount,
1031 char* chars, int charCount)
1033 // Validate input parameters
1034 if (chars == null || bytes == null)
1035 throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
1036 SR.ArgumentNull_Array);
1038 if (byteCount < 0 || charCount < 0)
1039 throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
1040 SR.ArgumentOutOfRange_NeedNonNegNum);
1041 Contract.EndContractBlock();
1043 // Get the byte array to convert
1044 byte[] arrByte = new byte[byteCount];
1047 for (index = 0; index < byteCount; index++)
1048 arrByte[index] = bytes[index];
1050 // Get the char array to fill
1051 char[] arrChar = new char[charCount];
1054 int result = GetChars(arrByte, 0, byteCount, arrChar, 0);
1056 Debug.Assert(result <= charCount, "[Encoding.GetChars]Returned more chars than we have space for");
1058 // Copy the char array
1059 // WARNING: We MUST make sure that we don't copy too many chars. We can't
1060 // rely on result because it could be a 3rd party implementation. We need
1061 // to make sure we never copy more than charCount chars no matter the value
1063 if (result < charCount)
1066 // Copy the data, don't overrun our array!
1067 for (index = 0; index < charCount; index++)
1068 chars[index] = arrChar[index];
1074 // This is our internal workhorse
1075 // Always validate parameters before calling internal version, which will only assert.
1076 internal virtual unsafe int GetChars(byte* bytes, int byteCount,
1077 char* chars, int charCount, DecoderNLS decoder)
1079 return GetChars(bytes, byteCount, chars, charCount);
1083 [CLSCompliant(false)]
1084 public unsafe string GetString(byte* bytes, int byteCount)
1087 throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
1090 throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_NeedNonNegNum);
1091 Contract.EndContractBlock();
1093 return String.CreateStringFromEncoding(bytes, byteCount, this);
1096 // Returns the code page identifier of this encoding. The returned value is
1097 // an integer between 0 and 65535 if the encoding has a code page
1098 // identifier, or -1 if the encoding does not represent a code page.
1101 public virtual int CodePage
1109 // IsAlwaysNormalized
1110 // Returns true if the encoding is always normalized for the specified encoding form
1112 public bool IsAlwaysNormalized()
1114 return this.IsAlwaysNormalized(NormalizationForm.FormC);
1118 public virtual bool IsAlwaysNormalized(NormalizationForm form)
1120 // Assume false unless the encoding knows otherwise
1124 // Returns a Decoder object for this encoding. The returned object
1125 // can be used to decode a sequence of bytes into a sequence of characters.
1126 // Contrary to the GetChars family of methods, a Decoder can
1127 // convert partial sequences of bytes into partial sequences of characters
1128 // by maintaining the appropriate state between the conversions.
1130 // This default implementation returns a Decoder that simply
1131 // forwards calls to the GetCharCount and GetChars methods to
1132 // the corresponding methods of this encoding. Encodings that require state
1133 // to be maintained between successive conversions should override this
1134 // method and return an instance of an appropriate Decoder
1138 public virtual Decoder GetDecoder()
1140 return new DefaultDecoder(this);
1143 // Returns an Encoder object for this encoding. The returned object
1144 // can be used to encode a sequence of characters into a sequence of bytes.
1145 // Contrary to the GetBytes family of methods, an Encoder can
1146 // convert partial sequences of characters into partial sequences of bytes
1147 // by maintaining the appropriate state between the conversions.
1149 // This default implementation returns an Encoder that simply
1150 // forwards calls to the GetByteCount and GetBytes methods to
1151 // the corresponding methods of this encoding. Encodings that require state
1152 // to be maintained between successive conversions should override this
1153 // method and return an instance of an appropriate Encoder
1157 public virtual Encoder GetEncoder()
1159 return new DefaultEncoder(this);
1162 // Returns the maximum number of bytes required to encode a given number of
1163 // characters. This method can be used to determine an appropriate buffer
1164 // size for byte arrays passed to the GetBytes method of this
1165 // encoding or the GetBytes method of an Encoder for this
1166 // encoding. All encodings must guarantee that no buffer overflow
1167 // exceptions will occur if buffers are sized according to the results of
1170 // WARNING: If you're using something besides the default replacement encoder fallback,
1171 // then you could have more bytes than this returned from an actual call to GetBytes().
1174 public abstract int GetMaxByteCount(int charCount);
1176 // Returns the maximum number of characters produced by decoding a given
1177 // number of bytes. This method can be used to determine an appropriate
1178 // buffer size for character arrays passed to the GetChars method of
1179 // this encoding or the GetChars method of a Decoder for this
1180 // encoding. All encodings must guarantee that no buffer overflow
1181 // exceptions will occur if buffers are sized according to the results of
1185 public abstract int GetMaxCharCount(int byteCount);
1187 // Returns a string containing the decoded representation of a given byte
1191 public virtual String GetString(byte[] bytes)
1194 throw new ArgumentNullException(nameof(bytes),
1195 SR.ArgumentNull_Array);
1196 Contract.EndContractBlock();
1198 return GetString(bytes, 0, bytes.Length);
1201 // Returns a string containing the decoded representation of a range of
1202 // bytes in a byte array.
1204 // Internally we override this for performance
1207 public virtual String GetString(byte[] bytes, int index, int count)
1209 return new String(GetChars(bytes, index, count));
1212 // Returns an encoding for Unicode format. The returned encoding will be
1213 // an instance of the UnicodeEncoding class.
1215 // It will use little endian byte order, but will detect
1216 // input in big endian if it finds a byte order mark per Unicode 2.0.
1218 public static Encoding Unicode => UnicodeEncoding.s_littleEndianDefault;
1220 // Returns an encoding for Unicode format. The returned encoding will be
1221 // an instance of the UnicodeEncoding class.
1223 // It will use big endian byte order, but will detect
1224 // input in little endian if it finds a byte order mark per Unicode 2.0.
1226 public static Encoding BigEndianUnicode => UnicodeEncoding.s_bigEndianDefault;
1228 // Returns an encoding for the UTF-7 format. The returned encoding will be
1229 // an instance of the UTF7Encoding class.
1231 public static Encoding UTF7 => UTF7Encoding.s_default;
1233 // Returns an encoding for the UTF-8 format. The returned encoding will be
1234 // an instance of the UTF8Encoding class.
1236 public static Encoding UTF8 => UTF8Encoding.s_default;
1238 // Returns an encoding for the UTF-32 format. The returned encoding will be
1239 // an instance of the UTF32Encoding class.
1241 public static Encoding UTF32 => UTF32Encoding.s_default;
1243 // Returns an encoding for the UTF-32 format. The returned encoding will be
1244 // an instance of the UTF32Encoding class.
1246 // It will use big endian byte order.
1248 private static Encoding BigEndianUTF32 => UTF32Encoding.s_bigEndianDefault;
1250 public override bool Equals(Object value)
1252 Encoding that = value as Encoding;
1254 return (_codePage == that._codePage) &&
1255 (EncoderFallback.Equals(that.EncoderFallback)) &&
1256 (DecoderFallback.Equals(that.DecoderFallback));
1261 public override int GetHashCode()
1263 return _codePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode();
1266 internal virtual char[] GetBestFitUnicodeToBytesData()
1268 // Normally we don't have any best fit data.
1269 return Array.Empty<char>();
1272 internal virtual char[] GetBestFitBytesToUnicodeData()
1274 // Normally we don't have any best fit data.
1275 return Array.Empty<char>();
1278 internal void ThrowBytesOverflow()
1280 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1281 // This happens if user has implimented an encoder fallback with a broken GetMaxCharCount
1282 throw new ArgumentException(
1283 SR.Format(SR.Argument_EncodingConversionOverflowBytes, EncodingName, EncoderFallback.GetType()), "bytes");
1286 internal void ThrowBytesOverflow(EncoderNLS encoder, bool nothingEncoded)
1288 if (encoder == null || encoder.m_throwOnOverflow || nothingEncoded)
1290 if (encoder != null && encoder.InternalHasFallbackBuffer)
1291 encoder.FallbackBuffer.InternalReset();
1292 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1293 // This happens if user has implimented an encoder fallback with a broken GetMaxCharCount
1294 ThrowBytesOverflow();
1297 // If we didn't throw, we are in convert and have to remember our flushing
1298 encoder.ClearMustFlush();
1301 internal void ThrowCharsOverflow()
1303 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1304 // This happens if user has implimented a decoder fallback with a broken GetMaxCharCount
1305 throw new ArgumentException(
1306 SR.Format(SR.Argument_EncodingConversionOverflowChars, EncodingName, DecoderFallback.GetType()), "chars");
1309 internal void ThrowCharsOverflow(DecoderNLS decoder, bool nothingDecoded)
1311 if (decoder == null || decoder.m_throwOnOverflow || nothingDecoded)
1313 if (decoder != null && decoder.InternalHasFallbackBuffer)
1314 decoder.FallbackBuffer.InternalReset();
1316 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1317 // This happens if user has implimented a decoder fallback with a broken GetMaxCharCount
1318 ThrowCharsOverflow();
1321 // If we didn't throw, we are in convert and have to remember our flushing
1322 decoder.ClearMustFlush();
1325 internal sealed class DefaultEncoder : Encoder, IObjectReference, ISerializable
1327 private Encoding m_encoding;
1329 public DefaultEncoder(Encoding encoding)
1331 m_encoding = encoding;
1334 public Object GetRealObject(StreamingContext context)
1336 throw new PlatformNotSupportedException();
1339 // ISerializable implementation, get data for this object
1340 void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
1342 throw new PlatformNotSupportedException();
1345 // Returns the number of bytes the next call to GetBytes will
1346 // produce if presented with the given range of characters and the given
1347 // value of the flush parameter. The returned value takes into
1348 // account the state in which the encoder was left following the last call
1349 // to GetBytes. The state of the encoder is not affected by a call
1353 public override int GetByteCount(char[] chars, int index, int count, bool flush)
1355 return m_encoding.GetByteCount(chars, index, count);
1358 [SuppressMessage("Microsoft.Contracts", "CC1055")] // Skip extra error checking to avoid *potential* AppCompat problems.
1359 public unsafe override int GetByteCount(char* chars, int count, bool flush)
1361 return m_encoding.GetByteCount(chars, count);
1364 // Encodes a range of characters in a character array into a range of bytes
1365 // in a byte array. The method encodes charCount characters from
1366 // chars starting at index charIndex, storing the resulting
1367 // bytes in bytes starting at index byteIndex. The encoding
1368 // takes into account the state in which the encoder was left following the
1369 // last call to this method. The flush parameter indicates whether
1370 // the encoder should flush any shift-states and partial characters at the
1371 // end of the conversion. To ensure correct termination of a sequence of
1372 // blocks of encoded bytes, the last call to GetBytes should specify
1373 // a value of true for the flush parameter.
1375 // An exception occurs if the byte array is not large enough to hold the
1376 // complete encoding of the characters. The GetByteCount method can
1377 // be used to determine the exact number of bytes that will be produced for
1378 // a given range of characters. Alternatively, the GetMaxByteCount
1379 // method of the Encoding that produced this encoder can be used to
1380 // determine the maximum number of bytes that will be produced for a given
1381 // number of characters, regardless of the actual character values.
1384 public override int GetBytes(char[] chars, int charIndex, int charCount,
1385 byte[] bytes, int byteIndex, bool flush)
1387 return m_encoding.GetBytes(chars, charIndex, charCount, bytes, byteIndex);
1390 [SuppressMessage("Microsoft.Contracts", "CC1055")] // Skip extra error checking to avoid *potential* AppCompat problems.
1391 public unsafe override int GetBytes(char* chars, int charCount,
1392 byte* bytes, int byteCount, bool flush)
1394 return m_encoding.GetBytes(chars, charCount, bytes, byteCount);
1398 internal sealed class DefaultDecoder : Decoder, IObjectReference, ISerializable
1400 private Encoding m_encoding;
1402 public DefaultDecoder(Encoding encoding)
1404 m_encoding = encoding;
1407 public Object GetRealObject(StreamingContext context)
1409 throw new PlatformNotSupportedException();
1412 // ISerializable implementation
1413 void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
1415 throw new PlatformNotSupportedException();
1418 // Returns the number of characters the next call to GetChars will
1419 // produce if presented with the given range of bytes. The returned value
1420 // takes into account the state in which the decoder was left following the
1421 // last call to GetChars. The state of the decoder is not affected
1422 // by a call to this method.
1425 public override int GetCharCount(byte[] bytes, int index, int count)
1427 return GetCharCount(bytes, index, count, false);
1430 public override int GetCharCount(byte[] bytes, int index, int count, bool flush)
1432 return m_encoding.GetCharCount(bytes, index, count);
1435 [SuppressMessage("Microsoft.Contracts", "CC1055")] // Skip extra error checking to avoid *potential* AppCompat problems.
1436 public unsafe override int GetCharCount(byte* bytes, int count, bool flush)
1438 // By default just call the encoding version, no flush by default
1439 return m_encoding.GetCharCount(bytes, count);
1442 // Decodes a range of bytes in a byte array into a range of characters
1443 // in a character array. The method decodes byteCount bytes from
1444 // bytes starting at index byteIndex, storing the resulting
1445 // characters in chars starting at index charIndex. The
1446 // decoding takes into account the state in which the decoder was left
1447 // following the last call to this method.
1449 // An exception occurs if the character array is not large enough to
1450 // hold the complete decoding of the bytes. The GetCharCount method
1451 // can be used to determine the exact number of characters that will be
1452 // produced for a given range of bytes. Alternatively, the
1453 // GetMaxCharCount method of the Encoding that produced this
1454 // decoder can be used to determine the maximum number of characters that
1455 // will be produced for a given number of bytes, regardless of the actual
1459 public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
1460 char[] chars, int charIndex)
1462 return GetChars(bytes, byteIndex, byteCount, chars, charIndex, false);
1465 public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
1466 char[] chars, int charIndex, bool flush)
1468 return m_encoding.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
1471 [SuppressMessage("Microsoft.Contracts", "CC1055")] // Skip extra error checking to avoid *potential* AppCompat problems.
1472 public unsafe override int GetChars(byte* bytes, int byteCount,
1473 char* chars, int charCount, bool flush)
1475 // By default just call the encoding's version
1476 return m_encoding.GetChars(bytes, byteCount, chars, charCount);
1480 internal class EncodingCharBuffer
1482 private unsafe char* _chars;
1483 private unsafe char* _charStart;
1484 private unsafe char* _charEnd;
1485 private int _charCountResult = 0;
1486 private Encoding _enc;
1487 private DecoderNLS _decoder;
1488 private unsafe byte* _byteStart;
1489 private unsafe byte* _byteEnd;
1490 private unsafe byte* _bytes;
1491 private DecoderFallbackBuffer _fallbackBuffer;
1493 internal unsafe EncodingCharBuffer(Encoding enc, DecoderNLS decoder, char* charStart, int charCount,
1494 byte* byteStart, int byteCount)
1500 _charStart = charStart;
1501 _charEnd = charStart + charCount;
1503 _byteStart = byteStart;
1505 _byteEnd = byteStart + byteCount;
1507 if (_decoder == null)
1508 _fallbackBuffer = enc.DecoderFallback.CreateFallbackBuffer();
1510 _fallbackBuffer = _decoder.FallbackBuffer;
1512 // If we're getting chars or getting char count we don't expect to have
1513 // to remember fallbacks between calls (so it should be empty)
1514 Debug.Assert(_fallbackBuffer.Remaining == 0,
1515 "[Encoding.EncodingCharBuffer.EncodingCharBuffer]Expected empty fallback buffer for getchars/charcount");
1516 _fallbackBuffer.InternalInitialize(_bytes, _charEnd);
1519 internal unsafe bool AddChar(char ch, int numBytes)
1523 if (_chars >= _charEnd)
1526 _bytes -= numBytes; // Didn't encode these bytes
1527 _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw?
1528 return false; // No throw, but no store either
1537 internal unsafe bool AddChar(char ch)
1539 return AddChar(ch, 1);
1543 internal unsafe bool AddChar(char ch1, char ch2, int numBytes)
1545 // Need room for 2 chars
1546 if (_chars >= _charEnd - 1)
1549 _bytes -= numBytes; // Didn't encode these bytes
1550 _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw?
1551 return false; // No throw, but no store either
1553 return AddChar(ch1, numBytes) && AddChar(ch2, numBytes);
1556 internal unsafe void AdjustBytes(int count)
1561 internal unsafe bool MoreData
1565 return _bytes < _byteEnd;
1569 // Do we have count more bytes?
1570 internal unsafe bool EvenMoreData(int count)
1572 return (_bytes <= _byteEnd - count);
1575 // GetNextByte shouldn't be called unless the caller's already checked more data or even more data,
1576 // but we'll double check just to make sure.
1577 internal unsafe byte GetNextByte()
1579 Debug.Assert(_bytes < _byteEnd, "[EncodingCharBuffer.GetNextByte]Expected more date");
1580 if (_bytes >= _byteEnd)
1585 internal unsafe int BytesUsed
1589 return (int)(_bytes - _byteStart);
1593 internal unsafe bool Fallback(byte fallbackByte)
1596 byte[] byteBuffer = new byte[] { fallbackByte };
1598 // Do the fallback and add the data.
1599 return Fallback(byteBuffer);
1602 internal unsafe bool Fallback(byte byte1, byte byte2)
1605 byte[] byteBuffer = new byte[] { byte1, byte2 };
1607 // Do the fallback and add the data.
1608 return Fallback(byteBuffer);
1611 internal unsafe bool Fallback(byte byte1, byte byte2, byte byte3, byte byte4)
1614 byte[] byteBuffer = new byte[] { byte1, byte2, byte3, byte4 };
1616 // Do the fallback and add the data.
1617 return Fallback(byteBuffer);
1620 internal unsafe bool Fallback(byte[] byteBuffer)
1622 // Do the fallback and add the data.
1625 char* pTemp = _chars;
1626 if (_fallbackBuffer.InternalFallback(byteBuffer, _bytes, ref _chars) == false)
1629 _bytes -= byteBuffer.Length; // Didn't use how many ever bytes we're falling back
1630 _fallbackBuffer.InternalReset(); // We didn't use this fallback.
1631 _enc.ThrowCharsOverflow(_decoder, _chars == _charStart); // Throw?
1632 return false; // No throw, but no store either
1634 _charCountResult += unchecked((int)(_chars - pTemp));
1638 _charCountResult += _fallbackBuffer.InternalFallback(byteBuffer, _bytes);
1644 internal unsafe int Count
1648 return _charCountResult;
1653 internal class EncodingByteBuffer
1655 private unsafe byte* _bytes;
1656 private unsafe byte* _byteStart;
1657 private unsafe byte* _byteEnd;
1658 private unsafe char* _chars;
1659 private unsafe char* _charStart;
1660 private unsafe char* _charEnd;
1661 private int _byteCountResult = 0;
1662 private Encoding _enc;
1663 private EncoderNLS _encoder;
1664 internal EncoderFallbackBuffer fallbackBuffer;
1666 internal unsafe EncodingByteBuffer(Encoding inEncoding, EncoderNLS inEncoder,
1667 byte* inByteStart, int inByteCount, char* inCharStart, int inCharCount)
1670 _encoder = inEncoder;
1672 _charStart = inCharStart;
1673 _chars = inCharStart;
1674 _charEnd = inCharStart + inCharCount;
1676 _bytes = inByteStart;
1677 _byteStart = inByteStart;
1678 _byteEnd = inByteStart + inByteCount;
1680 if (_encoder == null)
1681 this.fallbackBuffer = _enc.EncoderFallback.CreateFallbackBuffer();
1684 this.fallbackBuffer = _encoder.FallbackBuffer;
1685 // If we're not converting we must not have data in our fallback buffer
1686 if (_encoder.m_throwOnOverflow && _encoder.InternalHasFallbackBuffer &&
1687 this.fallbackBuffer.Remaining > 0)
1688 throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty,
1689 _encoder.Encoding.EncodingName, _encoder.Fallback.GetType()));
1691 fallbackBuffer.InternalInitialize(_chars, _charEnd, _encoder, _bytes != null);
1694 internal unsafe bool AddByte(byte b, int moreBytesExpected)
1696 Debug.Assert(moreBytesExpected >= 0, "[EncodingByteBuffer.AddByte]expected non-negative moreBytesExpected");
1699 if (_bytes >= _byteEnd - moreBytesExpected)
1701 // Throw maybe. Check which buffer to back up (only matters if Converting)
1702 this.MovePrevious(true); // Throw if necessary
1703 return false; // No throw, but no store either
1712 internal unsafe bool AddByte(byte b1)
1714 return (AddByte(b1, 0));
1717 internal unsafe bool AddByte(byte b1, byte b2)
1719 return (AddByte(b1, b2, 0));
1722 internal unsafe bool AddByte(byte b1, byte b2, int moreBytesExpected)
1724 return (AddByte(b1, 1 + moreBytesExpected) && AddByte(b2, moreBytesExpected));
1727 internal unsafe bool AddByte(byte b1, byte b2, byte b3)
1729 return AddByte(b1, b2, b3, (int)0);
1732 internal unsafe bool AddByte(byte b1, byte b2, byte b3, int moreBytesExpected)
1734 return (AddByte(b1, 2 + moreBytesExpected) &&
1735 AddByte(b2, 1 + moreBytesExpected) &&
1736 AddByte(b3, moreBytesExpected));
1739 internal unsafe bool AddByte(byte b1, byte b2, byte b3, byte b4)
1741 return (AddByte(b1, 3) &&
1747 internal unsafe void MovePrevious(bool bThrow)
1749 if (fallbackBuffer.bFallingBack)
1750 fallbackBuffer.MovePrevious(); // don't use last fallback
1753 Debug.Assert(_chars > _charStart ||
1754 ((bThrow == true) && (_bytes == _byteStart)),
1755 "[EncodingByteBuffer.MovePrevious]expected previous data or throw");
1756 if (_chars > _charStart)
1757 _chars--; // don't use last char
1761 _enc.ThrowBytesOverflow(_encoder, _bytes == _byteStart); // Throw? (and reset fallback if not converting)
1764 internal unsafe bool Fallback(char charFallback)
1767 return fallbackBuffer.InternalFallback(charFallback, ref _chars);
1770 internal unsafe bool MoreData
1774 // See if fallbackBuffer is not empty or if there's data left in chars buffer.
1775 return ((fallbackBuffer.Remaining > 0) || (_chars < _charEnd));
1779 internal unsafe char GetNextChar()
1781 // See if there's something in our fallback buffer
1782 char cReturn = fallbackBuffer.InternalGetNextChar();
1784 // Nothing in the fallback buffer, return our normal data.
1787 if (_chars < _charEnd)
1788 cReturn = *(_chars++);
1794 internal unsafe int CharsUsed
1798 return (int)(_chars - _charStart);
1802 internal unsafe int Count
1806 return _byteCountResult;