1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 using System.Diagnostics;
6 using System.Globalization;
7 using System.Threading;
8 using System.Runtime.InteropServices;
9 using System.Runtime.Serialization;
10 using System.Diagnostics.CodeAnalysis;
14 // This abstract base class represents a character encoding. The class provides
15 // methods to convert arrays and strings of Unicode characters to and from
16 // arrays of bytes. A number of Encoding implementations are provided in
17 // the System.Text package, including:
19 // ASCIIEncoding, which encodes Unicode characters as single 7-bit
20 // ASCII characters. This encoding only supports character values between 0x00
22 // BaseCodePageEncoding, which encapsulates a Windows code page. Any
23 // installed code page can be accessed through this encoding, and conversions
24 // are performed using the WideCharToMultiByte and
25 // MultiByteToWideChar Windows API functions.
26 // UnicodeEncoding, which encodes each Unicode character as two
27 // consecutive bytes. Both little-endian (code page 1200) and big-endian (code
28 // page 1201) encodings are recognized.
29 // UTF7Encoding, which encodes Unicode characters using the UTF-7
30 // encoding (UTF-7 stands for UCS Transformation Format, 7-bit form). This
31 // encoding supports all Unicode character values, and can also be accessed
32 // as code page 65000.
33 // UTF8Encoding, which encodes Unicode characters using the UTF-8
34 // encoding (UTF-8 stands for UCS Transformation Format, 8-bit form). This
35 // encoding supports all Unicode character values, and can also be accessed
36 // as code page 65001.
37 // UTF32Encoding, both 12000 (little endian) & 12001 (big endian)
39 // In addition to directly instantiating Encoding objects, an
40 // application can use the ForCodePage, GetASCII,
41 // GetDefault, GetUnicode, GetUTF7, and GetUTF8
42 // methods in this class to obtain encodings.
44 // Through an encoding, the GetBytes method is used to convert arrays
45 // of characters to arrays of bytes, and the GetChars method is used to
46 // convert arrays of bytes to arrays of characters. The GetBytes and
47 // GetChars methods maintain no state between conversions, and are
48 // generally intended for conversions of complete blocks of bytes and
49 // characters in one operation. When the data to be converted is only available
50 // in sequential blocks (such as data read from a stream) or when the amount of
51 // data is so large that it needs to be divided into smaller blocks, an
52 // application may choose to use a Decoder or an Encoder to
53 // perform the conversion. Decoders and encoders allow sequential blocks of
54 // data to be converted and they maintain the state required to support
55 // conversions of data that spans adjacent blocks. Decoders and encoders are
56 // obtained using the GetDecoder and GetEncoder methods.
58 // The core GetBytes and GetChars methods require the caller
59 // to provide the destination buffer and ensure that the buffer is large enough
60 // to hold the entire result of the conversion. When using these methods,
61 // either directly on an Encoding object or on an associated
62 // Decoder or Encoder, an application can use one of two methods
63 // to allocate destination buffers.
65 // The GetByteCount and GetCharCount methods can be used to
66 // compute the exact size of the result of a particular conversion, and an
67 // appropriately sized buffer for that conversion can then be allocated.
68 // The GetMaxByteCount and GetMaxCharCount methods can be
69 // be used to compute the maximum possible size of a conversion of a given
70 // number of bytes or characters, and a buffer of that size can then be reused
71 // for multiple conversions.
73 // The first method generally uses less memory, whereas the second method
74 // generally executes faster.
77 public abstract class Encoding : ICloneable
79 // For netcore we use UTF8 as default encoding since ANSI isn't available
80 private static readonly UTF8Encoding.UTF8EncodingSealed s_defaultEncoding = new UTF8Encoding.UTF8EncodingSealed(encoderShouldEmitUTF8Identifier: false);
82 // Returns an encoding for the system's current ANSI code page.
83 public static Encoding Default => s_defaultEncoding;
86 // The following values are from mlang.idl. These values
87 // should be in sync with those in mlang.idl.
89 internal const int MIMECONTF_MAILNEWS = 0x00000001;
90 internal const int MIMECONTF_BROWSER = 0x00000002;
91 internal const int MIMECONTF_SAVABLE_MAILNEWS = 0x00000100;
92 internal const int MIMECONTF_SAVABLE_BROWSER = 0x00000200;
94 // Special Case Code Pages
95 private const int CodePageDefault = 0;
96 private const int CodePageNoOEM = 1; // OEM Code page not supported
97 private const int CodePageNoMac = 2; // MAC code page not supported
98 private const int CodePageNoThread = 3; // Thread code page not supported
99 private const int CodePageNoSymbol = 42; // Symbol code page not supported
100 private const int CodePageUnicode = 1200; // Unicode
101 private const int CodePageBigEndian = 1201; // Big Endian Unicode
102 private const int CodePageWindows1252 = 1252; // Windows 1252 code page
104 // 20936 has same code page as 10008, so we'll special case it
105 private const int CodePageMacGB2312 = 10008;
106 private const int CodePageGB2312 = 20936;
107 private const int CodePageMacKorean = 10003;
108 private const int CodePageDLLKorean = 20949;
110 // ISO 2022 Code Pages
111 private const int ISO2022JP = 50220;
112 private const int ISO2022JPESC = 50221;
113 private const int ISO2022JPSISO = 50222;
114 private const int ISOKorean = 50225;
115 private const int ISOSimplifiedCN = 50227;
116 private const int EUCJP = 51932;
117 private const int ChineseHZ = 52936; // HZ has ~}~{~~ sequences
119 // 51936 is the same as 936
120 private const int DuplicateEUCCN = 51936;
121 private const int EUCCN = 936;
123 private const int EUCKR = 51949;
125 // Latin 1 & ASCII Code Pages
126 internal const int CodePageASCII = 20127; // ASCII
127 internal const int ISO_8859_1 = 28591; // Latin1
130 private const int ISCIIAssemese = 57006;
131 private const int ISCIIBengali = 57003;
132 private const int ISCIIDevanagari = 57002;
133 private const int ISCIIGujarathi = 57010;
134 private const int ISCIIKannada = 57008;
135 private const int ISCIIMalayalam = 57009;
136 private const int ISCIIOriya = 57007;
137 private const int ISCIIPanjabi = 57011;
138 private const int ISCIITamil = 57004;
139 private const int ISCIITelugu = 57005;
142 private const int GB18030 = 54936;
145 private const int ISO_8859_8I = 38598;
146 private const int ISO_8859_8_Visual = 28598;
148 // 50229 is currently unsupported // "Chinese Traditional (ISO-2022)"
149 private const int ENC50229 = 50229;
151 // Special code pages
152 private const int CodePageUTF7 = 65000;
153 private const int CodePageUTF8 = 65001;
154 private const int CodePageUTF32 = 12000;
155 private const int CodePageUTF32BE = 12001;
157 internal int _codePage = 0;
159 internal CodePageDataItem _dataItem = null;
161 // Because of encoders we may be read only
162 [OptionalField(VersionAdded = 2)]
163 private bool _isReadOnly = true;
165 // Encoding (encoder) fallback
166 internal EncoderFallback encoderFallback = null;
167 internal DecoderFallback decoderFallback = null;
169 protected Encoding() : this(0)
174 protected Encoding(int codePage)
176 // Validate code page
179 throw new ArgumentOutOfRangeException(nameof(codePage));
182 // Remember code page
183 _codePage = codePage;
185 // Use default encoder/decoder fallbacks
186 this.SetDefaultFallbacks();
189 // This constructor is needed to allow any sub-classing implementation to provide encoder/decoder fallback objects
190 // because the encoding object is always created as read-only object and don't allow setting encoder/decoder fallback
191 // after the creation is done.
192 protected Encoding(int codePage, EncoderFallback encoderFallback, DecoderFallback decoderFallback)
194 // Validate code page
197 throw new ArgumentOutOfRangeException(nameof(codePage));
200 // Remember code page
201 _codePage = codePage;
203 this.encoderFallback = encoderFallback ?? new InternalEncoderBestFitFallback(this);
204 this.decoderFallback = decoderFallback ?? new InternalDecoderBestFitFallback(this);
207 // Default fallback that we'll use.
208 internal virtual void SetDefaultFallbacks()
210 // For UTF-X encodings, we use a replacement fallback with an "\xFFFD" string,
211 // For ASCII we use "?" replacement fallback, etc.
212 encoderFallback = new InternalEncoderBestFitFallback(this);
213 decoderFallback = new InternalDecoderBestFitFallback(this);
216 // Converts a byte array from one encoding to another. The bytes in the
217 // bytes array are converted from srcEncoding to
218 // dstEncoding, and the returned value is a new byte array
219 // containing the result of the conversion.
221 public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
225 throw new ArgumentNullException(nameof(bytes));
227 return Convert(srcEncoding, dstEncoding, bytes, 0, bytes.Length);
230 // Converts a range of bytes in a byte array from one encoding to another.
231 // This method converts count bytes from bytes starting at
232 // index index from srcEncoding to dstEncoding, and
233 // returns a new byte array containing the result of the conversion.
235 public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
236 byte[] bytes, int index, int count)
238 if (srcEncoding == null || dstEncoding == null)
240 throw new ArgumentNullException((srcEncoding == null ? nameof(srcEncoding) : nameof(dstEncoding)),
241 SR.ArgumentNull_Array);
245 throw new ArgumentNullException(nameof(bytes),
246 SR.ArgumentNull_Array);
249 return dstEncoding.GetBytes(srcEncoding.GetChars(bytes, index, count));
252 public static void RegisterProvider(EncodingProvider provider)
254 // Parameters validated inside EncodingProvider
255 EncodingProvider.AddProvider(provider);
258 public static Encoding GetEncoding(int codepage)
260 Encoding result = EncodingProvider.GetEncodingFromProvider(codepage);
265 // NOTE: If you add a new encoding that can be retrieved by codepage, be sure to
266 // add the corresponding item in EncodingTable.
267 // Otherwise, the code below will throw exception when trying to call
268 // EncodingTable.GetDataItem().
270 if (codepage < 0 || codepage > 65535)
272 throw new ArgumentOutOfRangeException(
273 nameof(codepage), SR.Format(SR.ArgumentOutOfRange_Range, 0, 65535));
279 case CodePageDefault: return Default; // 0
280 case CodePageUnicode: return Unicode; // 1200
281 case CodePageBigEndian: return BigEndianUnicode; // 1201
282 case CodePageUTF32: return UTF32; // 12000
283 case CodePageUTF32BE: return BigEndianUTF32; // 12001
284 case CodePageUTF7: return UTF7; // 65000
285 case CodePageUTF8: return UTF8; // 65001
286 case CodePageASCII: return ASCII; // 20127
287 case ISO_8859_1: return Latin1; // 28591
289 // We don't allow the following special code page values that Win32 allows.
290 case CodePageNoOEM: // 1 CP_OEMCP
291 case CodePageNoMac: // 2 CP_MACCP
292 case CodePageNoThread: // 3 CP_THREAD_ACP
293 case CodePageNoSymbol: // 42 CP_SYMBOL
294 throw new ArgumentException(SR.Format(SR.Argument_CodepageNotSupported, codepage), nameof(codepage));
297 // Is it a valid code page?
298 if (EncodingTable.GetCodePageDataItem(codepage) == null)
300 throw new NotSupportedException(
301 SR.Format(SR.NotSupported_NoCodepageData, codepage));
307 public static Encoding GetEncoding(int codepage,
308 EncoderFallback encoderFallback, DecoderFallback decoderFallback)
310 Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(codepage, encoderFallback, decoderFallback);
312 if (baseEncoding != null)
315 // Get the default encoding (which is cached and read only)
316 baseEncoding = GetEncoding(codepage);
318 // Clone it and set the fallback
319 Encoding fallbackEncoding = (Encoding)baseEncoding.Clone();
320 fallbackEncoding.EncoderFallback = encoderFallback;
321 fallbackEncoding.DecoderFallback = decoderFallback;
323 return fallbackEncoding;
326 // Returns an Encoding object for a given name or a given code page value.
328 public static Encoding GetEncoding(String name)
330 Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(name);
331 if (baseEncoding != null)
335 // NOTE: If you add a new encoding that can be requested by name, be sure to
336 // add the corresponding item in EncodingTable.
337 // Otherwise, the code below will throw exception when trying to call
338 // EncodingTable.GetCodePageFromName().
340 return GetEncoding(EncodingTable.GetCodePageFromName(name));
343 // Returns an Encoding object for a given name or a given code page value.
345 public static Encoding GetEncoding(String name,
346 EncoderFallback encoderFallback, DecoderFallback decoderFallback)
348 Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(name, encoderFallback, decoderFallback);
349 if (baseEncoding != null)
353 // NOTE: If you add a new encoding that can be requested by name, be sure to
354 // add the corresponding item in EncodingTable.
355 // Otherwise, the code below will throw exception when trying to call
356 // EncodingTable.GetCodePageFromName().
358 return (GetEncoding(EncodingTable.GetCodePageFromName(name), encoderFallback, decoderFallback));
361 // Return a list of all EncodingInfo objects describing all of our encodings
362 public static EncodingInfo[] GetEncodings()
364 return EncodingTable.GetEncodings();
367 public virtual byte[] GetPreamble()
369 return Array.Empty<byte>();
372 public virtual ReadOnlySpan<byte> Preamble => GetPreamble();
374 private void GetDataItem()
376 if (_dataItem == null)
378 _dataItem = EncodingTable.GetCodePageDataItem(_codePage);
379 if (_dataItem == null)
381 throw new NotSupportedException(SR.Format(SR.NotSupported_NoCodepageData, _codePage));
386 // Returns the name for this encoding that can be used with mail agent body tags.
387 // If the encoding may not be used, the string is empty.
389 public virtual String BodyName
393 if (_dataItem == null)
397 return (_dataItem.BodyName);
401 // Returns the human-readable description of the encoding ( e.g. Hebrew (DOS)).
403 public virtual String EncodingName
407 string encodingName = GetLocalizedEncodingNameResource(this.CodePage);
408 if (encodingName == null)
410 throw new NotSupportedException(SR.Format(SR.MissingEncodingNameResource, this.CodePage));
413 if (encodingName.StartsWith("Globalization_cp_", StringComparison.Ordinal))
415 // On ProjectN, resource strings are stripped from retail builds and replaced by
416 // their identifier names. Since this property is meant to be a localized string,
417 // but we don't localize ProjectN, we specifically need to do something reasonable
418 // in this case. This currently returns the English name of the encoding from a
419 // static data table.
420 encodingName = EncodingTable.GetCodePageDataItem(this.CodePage).EnglishName;
421 if (encodingName == null)
423 throw new NotSupportedException(SR.Format(SR.MissingEncodingNameResource, this.WebName, this.CodePage));
430 private static string GetLocalizedEncodingNameResource(int codePage)
434 case 1200: return SR.Globalization_cp_1200;
435 case 1201: return SR.Globalization_cp_1201;
436 case 12000: return SR.Globalization_cp_12000;
437 case 12001: return SR.Globalization_cp_12001;
438 case 20127: return SR.Globalization_cp_20127;
439 case 28591: return SR.Globalization_cp_28591;
440 case 65000: return SR.Globalization_cp_65000;
441 case 65001: return SR.Globalization_cp_65001;
442 default: return null;
446 public virtual String EncodingName
450 return SR.GetResourceString("Globalization_cp_" + _codePage.ToString());
454 // Returns the name for this encoding that can be used with mail agent header
455 // tags. If the encoding may not be used, the string is empty.
457 public virtual String HeaderName
461 if (_dataItem == null)
465 return (_dataItem.HeaderName);
469 // Returns the IANA preferred name for this encoding.
470 public virtual String WebName
474 if (_dataItem == null)
478 return (_dataItem.WebName);
482 // Returns the windows code page that most closely corresponds to this encoding.
484 public virtual int WindowsCodePage
488 if (_dataItem == null)
492 return (_dataItem.UIFamilyCodePage);
497 // True if and only if the encoding is used for display by browsers clients.
499 public virtual bool IsBrowserDisplay
503 if (_dataItem == null)
507 return ((_dataItem.Flags & MIMECONTF_BROWSER) != 0);
511 // True if and only if the encoding is used for saving by browsers clients.
513 public virtual bool IsBrowserSave
517 if (_dataItem == null)
521 return ((_dataItem.Flags & MIMECONTF_SAVABLE_BROWSER) != 0);
525 // True if and only if the encoding is used for display by mail and news clients.
527 public virtual bool IsMailNewsDisplay
531 if (_dataItem == null)
535 return ((_dataItem.Flags & MIMECONTF_MAILNEWS) != 0);
540 // True if and only if the encoding is used for saving documents by mail and
543 public virtual bool IsMailNewsSave
547 if (_dataItem == null)
551 return ((_dataItem.Flags & MIMECONTF_SAVABLE_MAILNEWS) != 0);
555 // True if and only if the encoding only uses single byte code points. (Ie, ASCII, 1252, etc)
557 public virtual bool IsSingleByte
566 public EncoderFallback EncoderFallback
570 return encoderFallback;
576 throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
579 throw new ArgumentNullException(nameof(value));
581 encoderFallback = value;
586 public DecoderFallback DecoderFallback
590 return decoderFallback;
596 throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
599 throw new ArgumentNullException(nameof(value));
601 decoderFallback = value;
606 public virtual Object Clone()
608 Encoding newEncoding = (Encoding)this.MemberwiseClone();
610 // New one should be readable
611 newEncoding._isReadOnly = false;
616 public bool IsReadOnly
620 return (_isReadOnly);
624 // Returns an encoding for the ASCII character set. The returned encoding
625 // will be an instance of the ASCIIEncoding class.
627 public static Encoding ASCII => ASCIIEncoding.s_default;
629 // Returns an encoding for the Latin1 character set. The returned encoding
630 // will be an instance of the Latin1Encoding class.
632 // This is for our optimizations
633 private static Encoding Latin1 => Latin1Encoding.s_default;
635 // Returns the number of bytes required to encode the given character
638 public virtual int GetByteCount(char[] chars)
642 throw new ArgumentNullException(nameof(chars),
643 SR.ArgumentNull_Array);
646 return GetByteCount(chars, 0, chars.Length);
649 public virtual int GetByteCount(String s)
652 throw new ArgumentNullException(nameof(s));
654 char[] chars = s.ToCharArray();
655 return GetByteCount(chars, 0, chars.Length);
658 // Returns the number of bytes required to encode a range of characters in
659 // a character array.
661 public abstract int GetByteCount(char[] chars, int index, int count);
663 // Returns the number of bytes required to encode a string range.
665 public int GetByteCount(string s, int index, int count)
668 throw new ArgumentNullException(nameof(s),
669 SR.ArgumentNull_String);
671 throw new ArgumentOutOfRangeException(nameof(index),
672 SR.ArgumentOutOfRange_NeedNonNegNum);
674 throw new ArgumentOutOfRangeException(nameof(count),
675 SR.ArgumentOutOfRange_NeedNonNegNum);
676 if (index > s.Length - count)
677 throw new ArgumentOutOfRangeException(nameof(index),
678 SR.ArgumentOutOfRange_IndexCount);
682 fixed (char* pChar = s)
684 return GetByteCount(pChar + index, count);
689 // We expect this to be the workhorse for NLS encodings
690 // unfortunately for existing overrides, it has to call the [] version,
691 // which is really slow, so this method should be avoided if you're calling
692 // a 3rd party encoding.
693 [CLSCompliant(false)]
694 public virtual unsafe int GetByteCount(char* chars, int count)
696 // Validate input parameters
698 throw new ArgumentNullException(nameof(chars),
699 SR.ArgumentNull_Array);
702 throw new ArgumentOutOfRangeException(nameof(count),
703 SR.ArgumentOutOfRange_NeedNonNegNum);
705 char[] arrChar = new char[count];
708 for (index = 0; index < count; index++)
709 arrChar[index] = chars[index];
711 return GetByteCount(arrChar, 0, count);
714 public virtual unsafe int GetByteCount(ReadOnlySpan<char> chars)
716 fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
718 return GetByteCount(charsPtr, chars.Length);
722 // For NLS Encodings, workhorse takes an encoder (may be null)
723 // Always validate parameters before calling internal version, which will only assert.
724 internal virtual unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
726 Debug.Assert(chars != null);
727 Debug.Assert(count >= 0);
729 return GetByteCount(chars, count);
732 // Returns a byte array containing the encoded representation of the given
735 public virtual byte[] GetBytes(char[] chars)
739 throw new ArgumentNullException(nameof(chars),
740 SR.ArgumentNull_Array);
742 return GetBytes(chars, 0, chars.Length);
745 // Returns a byte array containing the encoded representation of a range
746 // of characters in a character array.
748 public virtual byte[] GetBytes(char[] chars, int index, int count)
750 byte[] result = new byte[GetByteCount(chars, index, count)];
751 GetBytes(chars, index, count, result, 0);
755 // Encodes a range of characters in a character array into a range of bytes
756 // in a byte array. An exception occurs if the byte array is not large
757 // enough to hold the complete encoding of the characters. The
758 // GetByteCount method can be used to determine the exact number of
759 // bytes that will be produced for a given range of characters.
760 // Alternatively, the GetMaxByteCount method can be used to
761 // determine the maximum number of bytes that will be produced for a given
762 // number of characters, regardless of the actual character values.
764 public abstract int GetBytes(char[] chars, int charIndex, int charCount,
765 byte[] bytes, int byteIndex);
767 // Returns a byte array containing the encoded representation of the given
770 public virtual byte[] GetBytes(String s)
773 throw new ArgumentNullException(nameof(s),
774 SR.ArgumentNull_String);
776 int byteCount = GetByteCount(s);
777 byte[] bytes = new byte[byteCount];
778 int bytesReceived = GetBytes(s, 0, s.Length, bytes, 0);
779 Debug.Assert(byteCount == bytesReceived);
783 // Returns a byte array containing the encoded representation of the given
786 public byte[] GetBytes(string s, int index, int count)
789 throw new ArgumentNullException(nameof(s),
790 SR.ArgumentNull_String);
792 throw new ArgumentOutOfRangeException(nameof(index),
793 SR.ArgumentOutOfRange_NeedNonNegNum);
795 throw new ArgumentOutOfRangeException(nameof(count),
796 SR.ArgumentOutOfRange_NeedNonNegNum);
797 if (index > s.Length - count)
798 throw new ArgumentOutOfRangeException(nameof(index),
799 SR.ArgumentOutOfRange_IndexCount);
803 fixed (char* pChar = s)
805 int byteCount = GetByteCount(pChar + index, count);
807 return Array.Empty<byte>();
809 byte[] bytes = new byte[byteCount];
810 fixed (byte* pBytes = &bytes[0])
812 int bytesReceived = GetBytes(pChar + index, count, pBytes, byteCount);
813 Debug.Assert(byteCount == bytesReceived);
820 public virtual int GetBytes(String s, int charIndex, int charCount,
821 byte[] bytes, int byteIndex)
824 throw new ArgumentNullException(nameof(s));
825 return GetBytes(s.ToCharArray(), charIndex, charCount, bytes, byteIndex);
828 // This is our internal workhorse
829 // Always validate parameters before calling internal version, which will only assert.
830 internal virtual unsafe int GetBytes(char* chars, int charCount,
831 byte* bytes, int byteCount, EncoderNLS encoder)
833 return GetBytes(chars, charCount, bytes, byteCount);
836 // We expect this to be the workhorse for NLS Encodings, but for existing
837 // ones we need a working (if slow) default implementation)
839 // WARNING WARNING WARNING
841 // WARNING: If this breaks it could be a security threat. Obviously we
842 // call this internally, so you need to make sure that your pointers, counts
843 // and indexes are correct when you call this method.
845 // In addition, we have internal code, which will be marked as "safe" calling
846 // this code. However this code is dependent upon the implementation of an
847 // external GetBytes() method, which could be overridden by a third party and
848 // the results of which cannot be guaranteed. We use that result to copy
849 // the byte[] to our byte* output buffer. If the result count was wrong, we
850 // could easily overflow our output buffer. Therefore we do an extra test
851 // when we copy the buffer so that we don't overflow byteCount either.
853 [CLSCompliant(false)]
854 public virtual unsafe int GetBytes(char* chars, int charCount,
855 byte* bytes, int byteCount)
857 // Validate input parameters
858 if (bytes == null || chars == null)
859 throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
860 SR.ArgumentNull_Array);
862 if (charCount < 0 || byteCount < 0)
863 throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)),
864 SR.ArgumentOutOfRange_NeedNonNegNum);
866 // Get the char array to convert
867 char[] arrChar = new char[charCount];
870 for (index = 0; index < charCount; index++)
871 arrChar[index] = chars[index];
873 // Get the byte array to fill
874 byte[] arrByte = new byte[byteCount];
877 int result = GetBytes(arrChar, 0, charCount, arrByte, 0);
879 Debug.Assert(result <= byteCount, "[Encoding.GetBytes]Returned more bytes than we have space for");
881 // Copy the byte array
882 // WARNING: We MUST make sure that we don't copy too many bytes. We can't
883 // rely on result because it could be a 3rd party implementation. We need
884 // to make sure we never copy more than byteCount bytes no matter the value
886 if (result < byteCount)
889 // Copy the data, don't overrun our array!
890 for (index = 0; index < byteCount; index++)
891 bytes[index] = arrByte[index];
896 public virtual unsafe int GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes)
898 fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
899 fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
901 return GetBytes(charsPtr, chars.Length, bytesPtr, bytes.Length);
905 // Returns the number of characters produced by decoding the given byte
908 public virtual int GetCharCount(byte[] bytes)
912 throw new ArgumentNullException(nameof(bytes),
913 SR.ArgumentNull_Array);
915 return GetCharCount(bytes, 0, bytes.Length);
918 // Returns the number of characters produced by decoding a range of bytes
921 public abstract int GetCharCount(byte[] bytes, int index, int count);
923 // We expect this to be the workhorse for NLS Encodings, but for existing
924 // ones we need a working (if slow) default implementation)
925 [CLSCompliant(false)]
926 public virtual unsafe int GetCharCount(byte* bytes, int count)
928 // Validate input parameters
930 throw new ArgumentNullException(nameof(bytes),
931 SR.ArgumentNull_Array);
934 throw new ArgumentOutOfRangeException(nameof(count),
935 SR.ArgumentOutOfRange_NeedNonNegNum);
937 byte[] arrbyte = new byte[count];
940 for (index = 0; index < count; index++)
941 arrbyte[index] = bytes[index];
943 return GetCharCount(arrbyte, 0, count);
946 public virtual unsafe int GetCharCount(ReadOnlySpan<byte> bytes)
948 fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
950 return GetCharCount(bytesPtr, bytes.Length);
954 // This is our internal workhorse
955 // Always validate parameters before calling internal version, which will only assert.
956 internal virtual unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder)
958 return GetCharCount(bytes, count);
961 // Returns a character array containing the decoded representation of a
964 public virtual char[] GetChars(byte[] bytes)
968 throw new ArgumentNullException(nameof(bytes),
969 SR.ArgumentNull_Array);
971 return GetChars(bytes, 0, bytes.Length);
974 // Returns a character array containing the decoded representation of a
975 // range of bytes in a byte array.
977 public virtual char[] GetChars(byte[] bytes, int index, int count)
979 char[] result = new char[GetCharCount(bytes, index, count)];
980 GetChars(bytes, index, count, result, 0);
984 // Decodes a range of bytes in a byte array into a range of characters in a
985 // character array. An exception occurs if the character array is not large
986 // enough to hold the complete decoding of the bytes. The
987 // GetCharCount method can be used to determine the exact number of
988 // characters that will be produced for a given range of bytes.
989 // Alternatively, the GetMaxCharCount method can be used to
990 // determine the maximum number of characters that will be produced for a
991 // given number of bytes, regardless of the actual byte values.
994 public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount,
995 char[] chars, int charIndex);
998 // We expect this to be the workhorse for NLS Encodings, but for existing
999 // ones we need a working (if slow) default implementation)
1001 // WARNING WARNING WARNING
1003 // WARNING: If this breaks it could be a security threat. Obviously we
1004 // call this internally, so you need to make sure that your pointers, counts
1005 // and indexes are correct when you call this method.
1007 // In addition, we have internal code, which will be marked as "safe" calling
1008 // this code. However this code is dependent upon the implementation of an
1009 // external GetChars() method, which could be overridden by a third party and
1010 // the results of which cannot be guaranteed. We use that result to copy
1011 // the char[] to our char* output buffer. If the result count was wrong, we
1012 // could easily overflow our output buffer. Therefore we do an extra test
1013 // when we copy the buffer so that we don't overflow charCount either.
1015 [CLSCompliant(false)]
1016 public virtual unsafe int GetChars(byte* bytes, int byteCount,
1017 char* chars, int charCount)
1019 // Validate input parameters
1020 if (chars == null || bytes == null)
1021 throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
1022 SR.ArgumentNull_Array);
1024 if (byteCount < 0 || charCount < 0)
1025 throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
1026 SR.ArgumentOutOfRange_NeedNonNegNum);
1028 // Get the byte array to convert
1029 byte[] arrByte = new byte[byteCount];
1032 for (index = 0; index < byteCount; index++)
1033 arrByte[index] = bytes[index];
1035 // Get the char array to fill
1036 char[] arrChar = new char[charCount];
1039 int result = GetChars(arrByte, 0, byteCount, arrChar, 0);
1041 Debug.Assert(result <= charCount, "[Encoding.GetChars]Returned more chars than we have space for");
1043 // Copy the char array
1044 // WARNING: We MUST make sure that we don't copy too many chars. We can't
1045 // rely on result because it could be a 3rd party implementation. We need
1046 // to make sure we never copy more than charCount chars no matter the value
1048 if (result < charCount)
1051 // Copy the data, don't overrun our array!
1052 for (index = 0; index < charCount; index++)
1053 chars[index] = arrChar[index];
1058 public virtual unsafe int GetChars(ReadOnlySpan<byte> bytes, Span<char> chars)
1060 fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
1061 fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
1063 return GetChars(bytesPtr, bytes.Length, charsPtr, chars.Length);
1067 // This is our internal workhorse
1068 // Always validate parameters before calling internal version, which will only assert.
1069 internal virtual unsafe int GetChars(byte* bytes, int byteCount,
1070 char* chars, int charCount, DecoderNLS decoder)
1072 return GetChars(bytes, byteCount, chars, charCount);
1076 [CLSCompliant(false)]
1077 public unsafe string GetString(byte* bytes, int byteCount)
1080 throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
1083 throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_NeedNonNegNum);
1085 return String.CreateStringFromEncoding(bytes, byteCount, this);
1088 public unsafe string GetString(ReadOnlySpan<byte> bytes)
1090 fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
1092 return GetString(bytesPtr, bytes.Length);
1097 // Returns the code page identifier of this encoding. The returned value is
1098 // an integer between 0 and 65535 if the encoding has a code page
1099 // identifier, or -1 if the encoding does not represent a code page.
1102 public virtual int CodePage
1110 // IsAlwaysNormalized
1111 // Returns true if the encoding is always normalized for the specified encoding form
1112 public bool IsAlwaysNormalized()
1114 return this.IsAlwaysNormalized(NormalizationForm.FormC);
1117 public virtual bool IsAlwaysNormalized(NormalizationForm form)
1119 // Assume false unless the encoding knows otherwise
1123 // Returns a Decoder object for this encoding. The returned object
1124 // can be used to decode a sequence of bytes into a sequence of characters.
1125 // Contrary to the GetChars family of methods, a Decoder can
1126 // convert partial sequences of bytes into partial sequences of characters
1127 // by maintaining the appropriate state between the conversions.
1129 // This default implementation returns a Decoder that simply
1130 // forwards calls to the GetCharCount and GetChars methods to
1131 // the corresponding methods of this encoding. Encodings that require state
1132 // to be maintained between successive conversions should override this
1133 // method and return an instance of an appropriate Decoder
1137 public virtual Decoder GetDecoder()
1139 return new DefaultDecoder(this);
1142 // Returns an Encoder object for this encoding. The returned object
1143 // can be used to encode a sequence of characters into a sequence of bytes.
1144 // Contrary to the GetBytes family of methods, an Encoder can
1145 // convert partial sequences of characters into partial sequences of bytes
1146 // by maintaining the appropriate state between the conversions.
1148 // This default implementation returns an Encoder that simply
1149 // forwards calls to the GetByteCount and GetBytes methods to
1150 // the corresponding methods of this encoding. Encodings that require state
1151 // to be maintained between successive conversions should override this
1152 // method and return an instance of an appropriate Encoder
1156 public virtual Encoder GetEncoder()
1158 return new DefaultEncoder(this);
1161 // Returns the maximum number of bytes required to encode a given number of
1162 // characters. This method can be used to determine an appropriate buffer
1163 // size for byte arrays passed to the GetBytes method of this
1164 // encoding or the GetBytes method of an Encoder for this
1165 // encoding. All encodings must guarantee that no buffer overflow
1166 // exceptions will occur if buffers are sized according to the results of
1169 // WARNING: If you're using something besides the default replacement encoder fallback,
1170 // then you could have more bytes than this returned from an actual call to GetBytes().
1172 public abstract int GetMaxByteCount(int charCount);
1174 // Returns the maximum number of characters produced by decoding a given
1175 // number of bytes. This method can be used to determine an appropriate
1176 // buffer size for character arrays passed to the GetChars method of
1177 // this encoding or the GetChars method of a Decoder for this
1178 // encoding. All encodings must guarantee that no buffer overflow
1179 // exceptions will occur if buffers are sized according to the results of
1182 public abstract int GetMaxCharCount(int byteCount);
1184 // Returns a string containing the decoded representation of a given byte
1187 public virtual String GetString(byte[] bytes)
1190 throw new ArgumentNullException(nameof(bytes),
1191 SR.ArgumentNull_Array);
1193 return GetString(bytes, 0, bytes.Length);
1196 // Returns a string containing the decoded representation of a range of
1197 // bytes in a byte array.
1199 // Internally we override this for performance
1201 public virtual String GetString(byte[] bytes, int index, int count)
1203 return new String(GetChars(bytes, index, count));
1206 // Returns an encoding for Unicode format. The returned encoding will be
1207 // an instance of the UnicodeEncoding class.
1209 // It will use little endian byte order, but will detect
1210 // input in big endian if it finds a byte order mark per Unicode 2.0.
1212 public static Encoding Unicode => UnicodeEncoding.s_littleEndianDefault;
1214 // Returns an encoding for Unicode format. The returned encoding will be
1215 // an instance of the UnicodeEncoding class.
1217 // It will use big endian byte order, but will detect
1218 // input in little endian if it finds a byte order mark per Unicode 2.0.
1220 public static Encoding BigEndianUnicode => UnicodeEncoding.s_bigEndianDefault;
1222 // Returns an encoding for the UTF-7 format. The returned encoding will be
1223 // an instance of the UTF7Encoding class.
1225 public static Encoding UTF7 => UTF7Encoding.s_default;
1227 // Returns an encoding for the UTF-8 format. The returned encoding will be
1228 // an instance of the UTF8Encoding class.
1230 public static Encoding UTF8 => UTF8Encoding.s_default;
1232 // Returns an encoding for the UTF-32 format. The returned encoding will be
1233 // an instance of the UTF32Encoding class.
1235 public static Encoding UTF32 => UTF32Encoding.s_default;
1237 // Returns an encoding for the UTF-32 format. The returned encoding will be
1238 // an instance of the UTF32Encoding class.
1240 // It will use big endian byte order.
1242 private static Encoding BigEndianUTF32 => UTF32Encoding.s_bigEndianDefault;
1244 public override bool Equals(Object value)
1246 Encoding that = value as Encoding;
1248 return (_codePage == that._codePage) &&
1249 (EncoderFallback.Equals(that.EncoderFallback)) &&
1250 (DecoderFallback.Equals(that.DecoderFallback));
1255 public override int GetHashCode()
1257 return _codePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode();
1260 internal virtual char[] GetBestFitUnicodeToBytesData()
1262 // Normally we don't have any best fit data.
1263 return Array.Empty<char>();
1266 internal virtual char[] GetBestFitBytesToUnicodeData()
1268 // Normally we don't have any best fit data.
1269 return Array.Empty<char>();
1272 internal void ThrowBytesOverflow()
1274 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1275 // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
1276 throw new ArgumentException(
1277 SR.Format(SR.Argument_EncodingConversionOverflowBytes, EncodingName, EncoderFallback.GetType()), "bytes");
1280 internal void ThrowBytesOverflow(EncoderNLS encoder, bool nothingEncoded)
1282 if (encoder == null || encoder._throwOnOverflow || nothingEncoded)
1284 if (encoder != null && encoder.InternalHasFallbackBuffer)
1285 encoder.FallbackBuffer.InternalReset();
1286 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1287 // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
1288 ThrowBytesOverflow();
1291 // If we didn't throw, we are in convert and have to remember our flushing
1292 encoder.ClearMustFlush();
1295 internal void ThrowCharsOverflow()
1297 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1298 // This happens if user has implemented a decoder fallback with a broken GetMaxCharCount
1299 throw new ArgumentException(
1300 SR.Format(SR.Argument_EncodingConversionOverflowChars, EncodingName, DecoderFallback.GetType()), "chars");
1303 internal void ThrowCharsOverflow(DecoderNLS decoder, bool nothingDecoded)
1305 if (decoder == null || decoder._throwOnOverflow || nothingDecoded)
1307 if (decoder != null && decoder.InternalHasFallbackBuffer)
1308 decoder.FallbackBuffer.InternalReset();
1310 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1311 // This happens if user has implemented a decoder fallback with a broken GetMaxCharCount
1312 ThrowCharsOverflow();
1315 // If we didn't throw, we are in convert and have to remember our flushing
1316 decoder.ClearMustFlush();
1319 internal sealed class DefaultEncoder : Encoder, IObjectReference
1321 private Encoding _encoding;
1323 public DefaultEncoder(Encoding encoding)
1325 _encoding = encoding;
1328 public Object GetRealObject(StreamingContext context)
1330 throw new PlatformNotSupportedException();
1333 // Returns the number of bytes the next call to GetBytes will
1334 // produce if presented with the given range of characters and the given
1335 // value of the flush parameter. The returned value takes into
1336 // account the state in which the encoder was left following the last call
1337 // to GetBytes. The state of the encoder is not affected by a call
1341 public override int GetByteCount(char[] chars, int index, int count, bool flush)
1343 return _encoding.GetByteCount(chars, index, count);
1346 public unsafe override int GetByteCount(char* chars, int count, bool flush)
1348 return _encoding.GetByteCount(chars, count);
1351 // Encodes a range of characters in a character array into a range of bytes
1352 // in a byte array. The method encodes charCount characters from
1353 // chars starting at index charIndex, storing the resulting
1354 // bytes in bytes starting at index byteIndex. The encoding
1355 // takes into account the state in which the encoder was left following the
1356 // last call to this method. The flush parameter indicates whether
1357 // the encoder should flush any shift-states and partial characters at the
1358 // end of the conversion. To ensure correct termination of a sequence of
1359 // blocks of encoded bytes, the last call to GetBytes should specify
1360 // a value of true for the flush parameter.
1362 // An exception occurs if the byte array is not large enough to hold the
1363 // complete encoding of the characters. The GetByteCount method can
1364 // be used to determine the exact number of bytes that will be produced for
1365 // a given range of characters. Alternatively, the GetMaxByteCount
1366 // method of the Encoding that produced this encoder can be used to
1367 // determine the maximum number of bytes that will be produced for a given
1368 // number of characters, regardless of the actual character values.
1371 public override int GetBytes(char[] chars, int charIndex, int charCount,
1372 byte[] bytes, int byteIndex, bool flush)
1374 return _encoding.GetBytes(chars, charIndex, charCount, bytes, byteIndex);
1377 public unsafe override int GetBytes(char* chars, int charCount,
1378 byte* bytes, int byteCount, bool flush)
1380 return _encoding.GetBytes(chars, charCount, bytes, byteCount);
1384 internal sealed class DefaultDecoder : Decoder, IObjectReference
1386 private Encoding _encoding;
1388 public DefaultDecoder(Encoding encoding)
1390 _encoding = encoding;
1393 public Object GetRealObject(StreamingContext context)
1395 throw new PlatformNotSupportedException();
1398 // Returns the number of characters the next call to GetChars will
1399 // produce if presented with the given range of bytes. The returned value
1400 // takes into account the state in which the decoder was left following the
1401 // last call to GetChars. The state of the decoder is not affected
1402 // by a call to this method.
1405 public override int GetCharCount(byte[] bytes, int index, int count)
1407 return GetCharCount(bytes, index, count, false);
1410 public override int GetCharCount(byte[] bytes, int index, int count, bool flush)
1412 return _encoding.GetCharCount(bytes, index, count);
1415 public unsafe override int GetCharCount(byte* bytes, int count, bool flush)
1417 // By default just call the encoding version, no flush by default
1418 return _encoding.GetCharCount(bytes, count);
1421 // Decodes a range of bytes in a byte array into a range of characters
1422 // in a character array. The method decodes byteCount bytes from
1423 // bytes starting at index byteIndex, storing the resulting
1424 // characters in chars starting at index charIndex. The
1425 // decoding takes into account the state in which the decoder was left
1426 // following the last call to this method.
1428 // An exception occurs if the character array is not large enough to
1429 // hold the complete decoding of the bytes. The GetCharCount method
1430 // can be used to determine the exact number of characters that will be
1431 // produced for a given range of bytes. Alternatively, the
1432 // GetMaxCharCount method of the Encoding that produced this
1433 // decoder can be used to determine the maximum number of characters that
1434 // will be produced for a given number of bytes, regardless of the actual
1438 public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
1439 char[] chars, int charIndex)
1441 return GetChars(bytes, byteIndex, byteCount, chars, charIndex, false);
1444 public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
1445 char[] chars, int charIndex, bool flush)
1447 return _encoding.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
1450 public unsafe override int GetChars(byte* bytes, int byteCount,
1451 char* chars, int charCount, bool flush)
1453 // By default just call the encoding's version
1454 return _encoding.GetChars(bytes, byteCount, chars, charCount);
1458 internal class EncodingCharBuffer
1460 private unsafe char* _chars;
1461 private unsafe char* _charStart;
1462 private unsafe char* _charEnd;
1463 private int _charCountResult = 0;
1464 private Encoding _enc;
1465 private DecoderNLS _decoder;
1466 private unsafe byte* _byteStart;
1467 private unsafe byte* _byteEnd;
1468 private unsafe byte* _bytes;
1469 private DecoderFallbackBuffer _fallbackBuffer;
1471 internal unsafe EncodingCharBuffer(Encoding enc, DecoderNLS decoder, char* charStart, int charCount,
1472 byte* byteStart, int byteCount)
1478 _charStart = charStart;
1479 _charEnd = charStart + charCount;
1481 _byteStart = byteStart;
1483 _byteEnd = byteStart + byteCount;
1485 if (_decoder == null)
1486 _fallbackBuffer = enc.DecoderFallback.CreateFallbackBuffer();
1488 _fallbackBuffer = _decoder.FallbackBuffer;
1490 // If we're getting chars or getting char count we don't expect to have
1491 // to remember fallbacks between calls (so it should be empty)
1492 Debug.Assert(_fallbackBuffer.Remaining == 0,
1493 "[Encoding.EncodingCharBuffer.EncodingCharBuffer]Expected empty fallback buffer for getchars/charcount");
1494 _fallbackBuffer.InternalInitialize(_bytes, _charEnd);
1497 internal unsafe bool AddChar(char ch, int numBytes)
1501 if (_chars >= _charEnd)
1504 _bytes -= numBytes; // Didn't encode these bytes
1505 _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw?
1506 return false; // No throw, but no store either
1515 internal unsafe bool AddChar(char ch)
1517 return AddChar(ch, 1);
1521 internal unsafe bool AddChar(char ch1, char ch2, int numBytes)
1523 // Need room for 2 chars
1524 if (_chars >= _charEnd - 1)
1527 _bytes -= numBytes; // Didn't encode these bytes
1528 _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw?
1529 return false; // No throw, but no store either
1531 return AddChar(ch1, numBytes) && AddChar(ch2, numBytes);
1534 internal unsafe void AdjustBytes(int count)
1539 internal unsafe bool MoreData
1543 return _bytes < _byteEnd;
1547 // Do we have count more bytes?
1548 internal unsafe bool EvenMoreData(int count)
1550 return (_bytes <= _byteEnd - count);
1553 // GetNextByte shouldn't be called unless the caller's already checked more data or even more data,
1554 // but we'll double check just to make sure.
1555 internal unsafe byte GetNextByte()
1557 Debug.Assert(_bytes < _byteEnd, "[EncodingCharBuffer.GetNextByte]Expected more date");
1558 if (_bytes >= _byteEnd)
1563 internal unsafe int BytesUsed
1567 return (int)(_bytes - _byteStart);
1571 internal unsafe bool Fallback(byte fallbackByte)
1574 byte[] byteBuffer = new byte[] { fallbackByte };
1576 // Do the fallback and add the data.
1577 return Fallback(byteBuffer);
1580 internal unsafe bool Fallback(byte byte1, byte byte2)
1583 byte[] byteBuffer = new byte[] { byte1, byte2 };
1585 // Do the fallback and add the data.
1586 return Fallback(byteBuffer);
1589 internal unsafe bool Fallback(byte byte1, byte byte2, byte byte3, byte byte4)
1592 byte[] byteBuffer = new byte[] { byte1, byte2, byte3, byte4 };
1594 // Do the fallback and add the data.
1595 return Fallback(byteBuffer);
1598 internal unsafe bool Fallback(byte[] byteBuffer)
1600 // Do the fallback and add the data.
1603 char* pTemp = _chars;
1604 if (_fallbackBuffer.InternalFallback(byteBuffer, _bytes, ref _chars) == false)
1607 _bytes -= byteBuffer.Length; // Didn't use how many ever bytes we're falling back
1608 _fallbackBuffer.InternalReset(); // We didn't use this fallback.
1609 _enc.ThrowCharsOverflow(_decoder, _chars == _charStart); // Throw?
1610 return false; // No throw, but no store either
1612 _charCountResult += unchecked((int)(_chars - pTemp));
1616 _charCountResult += _fallbackBuffer.InternalFallback(byteBuffer, _bytes);
1622 internal unsafe int Count
1626 return _charCountResult;
1631 internal class EncodingByteBuffer
1633 private unsafe byte* _bytes;
1634 private unsafe byte* _byteStart;
1635 private unsafe byte* _byteEnd;
1636 private unsafe char* _chars;
1637 private unsafe char* _charStart;
1638 private unsafe char* _charEnd;
1639 private int _byteCountResult = 0;
1640 private Encoding _enc;
1641 private EncoderNLS _encoder;
1642 internal EncoderFallbackBuffer fallbackBuffer;
1644 internal unsafe EncodingByteBuffer(Encoding inEncoding, EncoderNLS inEncoder,
1645 byte* inByteStart, int inByteCount, char* inCharStart, int inCharCount)
1648 _encoder = inEncoder;
1650 _charStart = inCharStart;
1651 _chars = inCharStart;
1652 _charEnd = inCharStart + inCharCount;
1654 _bytes = inByteStart;
1655 _byteStart = inByteStart;
1656 _byteEnd = inByteStart + inByteCount;
1658 if (_encoder == null)
1659 this.fallbackBuffer = _enc.EncoderFallback.CreateFallbackBuffer();
1662 this.fallbackBuffer = _encoder.FallbackBuffer;
1663 // If we're not converting we must not have data in our fallback buffer
1664 if (_encoder._throwOnOverflow && _encoder.InternalHasFallbackBuffer &&
1665 this.fallbackBuffer.Remaining > 0)
1666 throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty,
1667 _encoder.Encoding.EncodingName, _encoder.Fallback.GetType()));
1669 fallbackBuffer.InternalInitialize(_chars, _charEnd, _encoder, _bytes != null);
1672 internal unsafe bool AddByte(byte b, int moreBytesExpected)
1674 Debug.Assert(moreBytesExpected >= 0, "[EncodingByteBuffer.AddByte]expected non-negative moreBytesExpected");
1677 if (_bytes >= _byteEnd - moreBytesExpected)
1679 // Throw maybe. Check which buffer to back up (only matters if Converting)
1680 this.MovePrevious(true); // Throw if necessary
1681 return false; // No throw, but no store either
1690 internal unsafe bool AddByte(byte b1)
1692 return (AddByte(b1, 0));
1695 internal unsafe bool AddByte(byte b1, byte b2)
1697 return (AddByte(b1, b2, 0));
1700 internal unsafe bool AddByte(byte b1, byte b2, int moreBytesExpected)
1702 return (AddByte(b1, 1 + moreBytesExpected) && AddByte(b2, moreBytesExpected));
1705 internal unsafe bool AddByte(byte b1, byte b2, byte b3)
1707 return AddByte(b1, b2, b3, (int)0);
1710 internal unsafe bool AddByte(byte b1, byte b2, byte b3, int moreBytesExpected)
1712 return (AddByte(b1, 2 + moreBytesExpected) &&
1713 AddByte(b2, 1 + moreBytesExpected) &&
1714 AddByte(b3, moreBytesExpected));
1717 internal unsafe bool AddByte(byte b1, byte b2, byte b3, byte b4)
1719 return (AddByte(b1, 3) &&
1725 internal unsafe void MovePrevious(bool bThrow)
1727 if (fallbackBuffer.bFallingBack)
1728 fallbackBuffer.MovePrevious(); // don't use last fallback
1731 Debug.Assert(_chars > _charStart ||
1732 ((bThrow == true) && (_bytes == _byteStart)),
1733 "[EncodingByteBuffer.MovePrevious]expected previous data or throw");
1734 if (_chars > _charStart)
1735 _chars--; // don't use last char
1739 _enc.ThrowBytesOverflow(_encoder, _bytes == _byteStart); // Throw? (and reset fallback if not converting)
1742 internal unsafe bool Fallback(char charFallback)
1745 return fallbackBuffer.InternalFallback(charFallback, ref _chars);
1748 internal unsafe bool MoreData
1752 // See if fallbackBuffer is not empty or if there's data left in chars buffer.
1753 return ((fallbackBuffer.Remaining > 0) || (_chars < _charEnd));
1757 internal unsafe char GetNextChar()
1759 // See if there's something in our fallback buffer
1760 char cReturn = fallbackBuffer.InternalGetNextChar();
1762 // Nothing in the fallback buffer, return our normal data.
1765 if (_chars < _charEnd)
1766 cReturn = *(_chars++);
1772 internal unsafe int CharsUsed
1776 return (int)(_chars - _charStart);
1780 internal unsafe int Count
1784 return _byteCountResult;