1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
6 // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
10 using System.Diagnostics;
11 using System.Diagnostics.Contracts;
12 using System.Globalization;
16 // Encodes text into and out of UTF-32. UTF-32 is a way of writing
17 // Unicode characters with a single storage unit (32 bits) per character,
19 // The UTF-32 byte order mark is simply the Unicode byte order mark
20 // (0x00FEFF) written in UTF-32 (0x0000FEFF or 0xFFFE0000). The byte order
21 // mark is used mostly to distinguish UTF-32 text from other encodings, and doesn't
22 // switch the byte orderings.
24 public sealed class UTF32Encoding : Encoding
27 words bits UTF-32 representation
28 ----- ---- -----------------------------------
29 1 16 00000000 00000000 xxxxxxxx xxxxxxxx
30 2 21 00000000 000xxxxx hhhhhhll llllllll
31 ----- ---- -----------------------------------
34 Real Unicode value = (HighSurrogate - 0xD800) * 0x400 + (LowSurrogate - 0xDC00) + 0x10000
37 // Used by Encoding.UTF32/BigEndianUTF32 for lazy initialization
38 // The initialization code will not be run until a static member of the class is referenced
39 internal static readonly UTF32Encoding s_default = new UTF32Encoding(bigEndian: false, byteOrderMark: true);
40 internal static readonly UTF32Encoding s_bigEndianDefault = new UTF32Encoding(bigEndian: true, byteOrderMark: true);
42 private bool _emitUTF32ByteOrderMark = false;
43 private bool _isThrowException = false;
44 private bool _bigEndian = false;
47 public UTF32Encoding() : this(false, true, false)
52 public UTF32Encoding(bool bigEndian, bool byteOrderMark) :
53 this(bigEndian, byteOrderMark, false)
58 public UTF32Encoding(bool bigEndian, bool byteOrderMark, bool throwOnInvalidCharacters) :
59 base(bigEndian ? 12001 : 12000)
61 _bigEndian = bigEndian;
62 _emitUTF32ByteOrderMark = byteOrderMark;
63 _isThrowException = throwOnInvalidCharacters;
65 // Encoding constructor already did this, but it'll be wrong if we're throwing exceptions
66 if (_isThrowException)
67 SetDefaultFallbacks();
70 internal override void SetDefaultFallbacks()
72 // For UTF-X encodings, we use a replacement fallback with an empty string
73 if (_isThrowException)
75 this.encoderFallback = EncoderFallback.ExceptionFallback;
76 this.decoderFallback = DecoderFallback.ExceptionFallback;
80 this.encoderFallback = new EncoderReplacementFallback("\xFFFD");
81 this.decoderFallback = new DecoderReplacementFallback("\xFFFD");
86 // The following methods are copied from EncodingNLS.cs.
87 // Unfortunately EncodingNLS.cs is internal and we're public, so we have to re-implement them here.
88 // These should be kept in sync for the following classes:
89 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
91 // Returns the number of bytes required to encode a range of characters in
94 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
95 // So if you fix this, fix the others. Currently those include:
96 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
97 // parent method is safe
99 public override unsafe int GetByteCount(char[] chars, int index, int count)
101 // Validate input parameters
103 throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
105 if (index < 0 || count < 0)
106 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
108 if (chars.Length - index < count)
109 throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
110 Contract.EndContractBlock();
112 // If no input, return 0, avoid fixed empty array problem
116 // Just call the pointer version
117 fixed (char* pChars = chars)
118 return GetByteCount(pChars + index, count, null);
121 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
122 // So if you fix this, fix the others. Currently those include:
123 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
124 // parent method is safe
126 public override unsafe int GetByteCount(String s)
130 throw new ArgumentNullException("s");
131 Contract.EndContractBlock();
133 fixed (char* pChars = s)
134 return GetByteCount(pChars, s.Length, null);
137 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
138 // So if you fix this, fix the others. Currently those include:
139 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
141 [CLSCompliant(false)]
142 public override unsafe int GetByteCount(char* chars, int count)
144 // Validate Parameters
146 throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
149 throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
150 Contract.EndContractBlock();
152 // Call it with empty encoder
153 return GetByteCount(chars, count, null);
156 // Parent method is safe.
157 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
158 // So if you fix this, fix the others. Currently those include:
159 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
161 public override unsafe int GetBytes(String s, int charIndex, int charCount,
162 byte[] bytes, int byteIndex)
164 if (s == null || bytes == null)
165 throw new ArgumentNullException((s == null ? "s" : "bytes"), SR.ArgumentNull_Array);
167 if (charIndex < 0 || charCount < 0)
168 throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
170 if (s.Length - charIndex < charCount)
171 throw new ArgumentOutOfRangeException("s", SR.ArgumentOutOfRange_IndexCount);
173 if (byteIndex < 0 || byteIndex > bytes.Length)
174 throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
175 Contract.EndContractBlock();
177 int byteCount = bytes.Length - byteIndex;
179 // Fix our input array if 0 length because fixed doesn't like 0 length arrays
180 if (bytes.Length == 0)
183 fixed (char* pChars = s) fixed (byte* pBytes = &bytes[0])
184 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
187 // Encodes a range of characters in a character array into a range of bytes
188 // in a byte array. An exception occurs if the byte array is not large
189 // enough to hold the complete encoding of the characters. The
190 // GetByteCount method can be used to determine the exact number of
191 // bytes that will be produced for a given range of characters.
192 // Alternatively, the GetMaxByteCount method can be used to
193 // determine the maximum number of bytes that will be produced for a given
194 // number of characters, regardless of the actual character values.
196 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
197 // So if you fix this, fix the others. Currently those include:
198 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
199 // parent method is safe
201 public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
202 byte[] bytes, int byteIndex)
204 // Validate parameters
205 if (chars == null || bytes == null)
206 throw new ArgumentNullException((chars == null ? "chars" : "bytes"), SR.ArgumentNull_Array);
208 if (charIndex < 0 || charCount < 0)
209 throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
211 if (chars.Length - charIndex < charCount)
212 throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
214 if (byteIndex < 0 || byteIndex > bytes.Length)
215 throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
216 Contract.EndContractBlock();
218 // If nothing to encode return 0, avoid fixed problem
222 // Just call pointer version
223 int byteCount = bytes.Length - byteIndex;
225 // Fix our input array if 0 length because fixed doesn't like 0 length arrays
226 if (bytes.Length == 0)
229 fixed (char* pChars = chars) fixed (byte* pBytes = &bytes[0])
230 // Remember that byteCount is # to decode, not size of array.
231 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
234 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
235 // So if you fix this, fix the others. Currently those include:
236 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
238 [CLSCompliant(false)]
239 public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
241 // Validate Parameters
242 if (bytes == null || chars == null)
243 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
245 if (charCount < 0 || byteCount < 0)
246 throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
247 Contract.EndContractBlock();
249 return GetBytes(chars, charCount, bytes, byteCount, null);
252 // Returns the number of characters produced by decoding a range of bytes
255 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
256 // So if you fix this, fix the others. Currently those include:
257 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
258 // parent method is safe
260 public override unsafe int GetCharCount(byte[] bytes, int index, int count)
262 // Validate Parameters
264 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
266 if (index < 0 || count < 0)
267 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
269 if (bytes.Length - index < count)
270 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
271 Contract.EndContractBlock();
273 // If no input just return 0, fixed doesn't like 0 length arrays.
277 // Just call pointer version
278 fixed (byte* pBytes = bytes)
279 return GetCharCount(pBytes + index, count, null);
282 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
283 // So if you fix this, fix the others. Currently those include:
284 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
286 [CLSCompliant(false)]
287 public override unsafe int GetCharCount(byte* bytes, int count)
289 // Validate Parameters
291 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
294 throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
295 Contract.EndContractBlock();
297 return GetCharCount(bytes, count, null);
300 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
301 // So if you fix this, fix the others. Currently those include:
302 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
303 // parent method is safe
305 public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
306 char[] chars, int charIndex)
308 // Validate Parameters
309 if (bytes == null || chars == null)
310 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
312 if (byteIndex < 0 || byteCount < 0)
313 throw new ArgumentOutOfRangeException((byteIndex < 0 ? "byteIndex" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
315 if ( bytes.Length - byteIndex < byteCount)
316 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
318 if (charIndex < 0 || charIndex > chars.Length)
319 throw new ArgumentOutOfRangeException("charIndex", SR.ArgumentOutOfRange_Index);
320 Contract.EndContractBlock();
322 // If no input, return 0 & avoid fixed problem
326 // Just call pointer version
327 int charCount = chars.Length - charIndex;
329 // Fix our input array if 0 length because fixed doesn't like 0 length arrays
330 if (chars.Length == 0)
333 fixed (byte* pBytes = bytes) fixed (char* pChars = &chars[0])
334 // Remember that charCount is # to decode, not size of array
335 return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
338 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
339 // So if you fix this, fix the others. Currently those include:
340 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
342 [CLSCompliant(false)]
343 public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
345 // Validate Parameters
346 if (bytes == null || chars == null)
347 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
349 if (charCount < 0 || byteCount < 0)
350 throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
351 Contract.EndContractBlock();
353 return GetChars(bytes, byteCount, chars, charCount, null);
356 // Returns a string containing the decoded representation of a range of
357 // bytes in a byte array.
359 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
360 // So if you fix this, fix the others. Currently those include:
361 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
362 // parent method is safe
364 public override unsafe String GetString(byte[] bytes, int index, int count)
366 // Validate Parameters
368 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
370 if (index < 0 || count < 0)
371 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
373 if (bytes.Length - index < count)
374 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
375 Contract.EndContractBlock();
377 // Avoid problems with empty input buffer
378 if (count == 0) return String.Empty;
380 fixed (byte* pBytes = bytes)
381 return String.CreateStringFromEncoding(
382 pBytes + index, count, this);
386 // End of standard methods copied from EncodingNLS.cs
389 internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
391 Debug.Assert(chars != null, "[UTF32Encoding.GetByteCount]chars!=null");
392 Debug.Assert(count >= 0, "[UTF32Encoding.GetByteCount]count >=0");
394 char* end = chars + count;
395 char* charStart = chars;
398 char highSurrogate = '\0';
400 // For fallback we may need a fallback buffer
401 EncoderFallbackBuffer fallbackBuffer = null;
402 char* charsForFallback;
406 highSurrogate = encoder._charLeftOver;
407 fallbackBuffer = encoder.FallbackBuffer;
409 // We mustn't have left over fallback data when counting
410 if (fallbackBuffer.Remaining > 0)
411 throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
415 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
418 // Set our internal fallback interesting things.
419 fallbackBuffer.InternalInitialize(charStart, end, encoder, false);
424 while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < end)
426 // First unwind any fallback
429 // No fallback, just get next char
434 // Do we need a low surrogate?
435 if (highSurrogate != '\0')
438 // In previous char, we encounter a high surrogate, so we are expecting a low surrogate here.
440 if (Char.IsLowSurrogate(ch))
443 highSurrogate = '\0';
446 // One surrogate pair will be translated into 4 bytes UTF32.
453 // We are missing our low surrogate, decrement chars and fallback the high surrogate
454 // The high surrogate may have come from the encoder, but nothing else did.
455 Debug.Assert(chars > charStart,
456 "[UTF32Encoding.GetByteCount]Expected chars to have advanced if no low surrogate");
460 charsForFallback = chars;
461 fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
462 chars = charsForFallback;
464 // We're going to fallback the old high surrogate.
465 highSurrogate = '\0';
469 // Do we have another high surrogate?
470 if (Char.IsHighSurrogate(ch))
473 // We'll have a high surrogate to check next time.
479 // Check for illegal characters
480 if (Char.IsLowSurrogate(ch))
482 // We have a leading low surrogate, do the fallback
483 charsForFallback = chars;
484 fallbackBuffer.InternalFallback(ch, ref charsForFallback);
485 chars = charsForFallback;
487 // Try again with fallback buffer
491 // We get to add the character (4 bytes UTF32)
495 // May have to do our last surrogate
496 if ((encoder == null || encoder.MustFlush) && highSurrogate > 0)
498 // We have to do the fallback for the lonely high surrogate
499 charsForFallback = chars;
500 fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
501 chars = charsForFallback;
503 highSurrogate = (char)0;
507 // Check for overflows.
509 throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_GetByteCountOverflow);
511 // Shouldn't have anything in fallback buffer for GetByteCount
512 // (don't have to check _throwOnOverflow for count)
513 Debug.Assert(fallbackBuffer.Remaining == 0,
514 "[UTF32Encoding.GetByteCount]Expected empty fallback buffer at end");
520 internal override unsafe int GetBytes(char* chars, int charCount,
521 byte* bytes, int byteCount, EncoderNLS encoder)
523 Debug.Assert(chars != null, "[UTF32Encoding.GetBytes]chars!=null");
524 Debug.Assert(bytes != null, "[UTF32Encoding.GetBytes]bytes!=null");
525 Debug.Assert(byteCount >= 0, "[UTF32Encoding.GetBytes]byteCount >=0");
526 Debug.Assert(charCount >= 0, "[UTF32Encoding.GetBytes]charCount >=0");
528 char* charStart = chars;
529 char* charEnd = chars + charCount;
530 byte* byteStart = bytes;
531 byte* byteEnd = bytes + byteCount;
533 char highSurrogate = '\0';
535 // For fallback we may need a fallback buffer
536 EncoderFallbackBuffer fallbackBuffer = null;
537 char* charsForFallback;
541 highSurrogate = encoder._charLeftOver;
542 fallbackBuffer = encoder.FallbackBuffer;
544 // We mustn't have left over fallback data when not converting
545 if (encoder._throwOnOverflow && fallbackBuffer.Remaining > 0)
546 throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
550 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
553 // Set our internal fallback interesting things.
554 fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
559 while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < charEnd)
561 // First unwind any fallback
564 // No fallback, just get next char
569 // Do we need a low surrogate?
570 if (highSurrogate != '\0')
573 // In previous char, we encountered a high surrogate, so we are expecting a low surrogate here.
575 if (Char.IsLowSurrogate(ch))
577 // Is it a legal one?
578 uint iTemp = GetSurrogate(highSurrogate, ch);
579 highSurrogate = '\0';
582 // One surrogate pair will be translated into 4 bytes UTF32.
584 if (bytes + 3 >= byteEnd)
586 // Don't have 4 bytes
587 if (fallbackBuffer.bFallingBack)
589 fallbackBuffer.MovePrevious(); // Aren't using these 2 fallback chars
590 fallbackBuffer.MovePrevious();
594 // If we don't have enough room, then either we should've advanced a while
595 // or we should have bytes==byteStart and throw below
596 Debug.Assert(chars > charStart + 1 || bytes == byteStart,
597 "[UnicodeEncoding.GetBytes]Expected chars to have when no room to add surrogate pair");
598 chars -= 2; // Aren't using those 2 chars
600 ThrowBytesOverflow(encoder, bytes == byteStart); // Throw maybe (if no bytes written)
601 highSurrogate = (char)0; // Nothing left over (we backed up to start of pair if supplimentary)
607 *(bytes++) = (byte)(0x00);
608 *(bytes++) = (byte)(iTemp >> 16); // Implies & 0xFF, which isn't needed cause high are all 0
609 *(bytes++) = (byte)(iTemp >> 8); // Implies & 0xFF
610 *(bytes++) = (byte)(iTemp); // Implies & 0xFF
614 *(bytes++) = (byte)(iTemp); // Implies & 0xFF
615 *(bytes++) = (byte)(iTemp >> 8); // Implies & 0xFF
616 *(bytes++) = (byte)(iTemp >> 16); // Implies & 0xFF, which isn't needed cause high are all 0
617 *(bytes++) = (byte)(0x00);
622 // We are missing our low surrogate, decrement chars and fallback the high surrogate
623 // The high surrogate may have come from the encoder, but nothing else did.
624 Debug.Assert(chars > charStart,
625 "[UTF32Encoding.GetBytes]Expected chars to have advanced if no low surrogate");
629 charsForFallback = chars;
630 fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
631 chars = charsForFallback;
633 // We're going to fallback the old high surrogate.
634 highSurrogate = '\0';
638 // Do we have another high surrogate?, if so remember it
639 if (Char.IsHighSurrogate(ch))
642 // We'll have a high surrogate to check next time.
648 // Check for illegal characters (low surrogate)
649 if (Char.IsLowSurrogate(ch))
651 // We have a leading low surrogate, do the fallback
652 charsForFallback = chars;
653 fallbackBuffer.InternalFallback(ch, ref charsForFallback);
654 chars = charsForFallback;
656 // Try again with fallback buffer
660 // We get to add the character, yippee.
661 if (bytes + 3 >= byteEnd)
663 // Don't have 4 bytes
664 if (fallbackBuffer.bFallingBack)
665 fallbackBuffer.MovePrevious(); // Aren't using this fallback char
668 // Must've advanced already
669 Debug.Assert(chars > charStart,
670 "[UTF32Encoding.GetBytes]Expected chars to have advanced if normal character");
671 chars--; // Aren't using this char
673 ThrowBytesOverflow(encoder, bytes == byteStart); // Throw maybe (if no bytes written)
674 break; // Didn't throw, stop
679 *(bytes++) = (byte)(0x00);
680 *(bytes++) = (byte)(0x00);
681 *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF
682 *(bytes++) = (byte)(ch); // Implies & 0xFF
686 *(bytes++) = (byte)(ch); // Implies & 0xFF
687 *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF
688 *(bytes++) = (byte)(0x00);
689 *(bytes++) = (byte)(0x00);
693 // May have to do our last surrogate
694 if ((encoder == null || encoder.MustFlush) && highSurrogate > 0)
696 // We have to do the fallback for the lonely high surrogate
697 charsForFallback = chars;
698 fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
699 chars = charsForFallback;
701 highSurrogate = (char)0;
705 // Fix our encoder if we have one
706 Debug.Assert(highSurrogate == 0 || (encoder != null && !encoder.MustFlush),
707 "[UTF32Encoding.GetBytes]Expected encoder to be flushed.");
711 // Remember our left over surrogate (or 0 if flushing)
712 encoder._charLeftOver = highSurrogate;
715 encoder._charsUsed = (int)(chars - charStart);
718 // return the new length
719 return (int)(bytes - byteStart);
722 internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
724 Debug.Assert(bytes != null, "[UTF32Encoding.GetCharCount]bytes!=null");
725 Debug.Assert(count >= 0, "[UTF32Encoding.GetCharCount]count >=0");
727 UTF32Decoder decoder = (UTF32Decoder)baseDecoder;
731 byte* end = bytes + count;
732 byte* byteStart = bytes;
738 // For fallback we may need a fallback buffer
739 DecoderFallbackBuffer fallbackBuffer = null;
741 // See if there's anything in our decoder
744 readCount = decoder.readByteCount;
745 iChar = (uint)decoder.iChar;
746 fallbackBuffer = decoder.FallbackBuffer;
748 // Shouldn't have anything in fallback buffer for GetCharCount
749 // (don't have to check _throwOnOverflow for chars or count)
750 Debug.Assert(fallbackBuffer.Remaining == 0,
751 "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at start");
755 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
758 // Set our internal fallback interesting things.
759 fallbackBuffer.InternalInitialize(byteStart, null);
761 // Loop through our input, 4 characters at a time!
762 while (bytes < end && charCount >= 0)
764 // Get our next character
767 // Scoot left and add it to the bottom
773 // Scoot right and add it to the top
775 iChar += (uint)(*(bytes++)) << 24;
780 // See if we have all the bytes yet
787 // See if its valid to encode
788 if (iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF))
790 // Need to fall back these 4 bytes
791 byte[] fallbackBytes;
794 fallbackBytes = new byte[] {
795 unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)),
796 unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) };
800 fallbackBytes = new byte[] {
801 unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)),
802 unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) };
805 charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);
807 // Ignore the illegal character
812 // Ok, we have something we can add to our output
813 if (iChar >= 0x10000)
819 // Add the rest of the surrogate or our normal character
822 // iChar is back to 0
826 // See if we have something left over that has to be decoded
827 if (readCount > 0 && (decoder == null || decoder.MustFlush))
829 // Oops, there's something left over with no place to go.
830 byte[] fallbackBytes = new byte[readCount];
833 while (readCount > 0)
835 fallbackBytes[--readCount] = unchecked((byte)iChar);
841 while (readCount > 0)
843 fallbackBytes[--readCount] = unchecked((byte)(iChar >> 24));
848 charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);
851 // Check for overflows.
853 throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_GetByteCountOverflow);
855 // Shouldn't have anything in fallback buffer for GetCharCount
856 // (don't have to check _throwOnOverflow for chars or count)
857 Debug.Assert(fallbackBuffer.Remaining == 0,
858 "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at end");
864 internal override unsafe int GetChars(byte* bytes, int byteCount,
865 char* chars, int charCount, DecoderNLS baseDecoder)
867 Debug.Assert(chars != null, "[UTF32Encoding.GetChars]chars!=null");
868 Debug.Assert(bytes != null, "[UTF32Encoding.GetChars]bytes!=null");
869 Debug.Assert(byteCount >= 0, "[UTF32Encoding.GetChars]byteCount >=0");
870 Debug.Assert(charCount >= 0, "[UTF32Encoding.GetChars]charCount >=0");
872 UTF32Decoder decoder = (UTF32Decoder)baseDecoder;
875 char* charStart = chars;
876 char* charEnd = chars + charCount;
878 byte* byteStart = bytes;
879 byte* byteEnd = bytes + byteCount;
881 // See if there's anything in our decoder (but don't clear it yet)
885 // For fallback we may need a fallback buffer
886 DecoderFallbackBuffer fallbackBuffer = null;
887 char* charsForFallback;
889 // See if there's anything in our decoder
892 readCount = decoder.readByteCount;
893 iChar = (uint)decoder.iChar;
894 fallbackBuffer = baseDecoder.FallbackBuffer;
896 // Shouldn't have anything in fallback buffer for GetChars
897 // (don't have to check _throwOnOverflow for chars)
898 Debug.Assert(fallbackBuffer.Remaining == 0,
899 "[UTF32Encoding.GetChars]Expected empty fallback buffer at start");
903 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
906 // Set our internal fallback interesting things.
907 fallbackBuffer.InternalInitialize(bytes, chars + charCount);
909 // Loop through our input, 4 characters at a time!
910 while (bytes < byteEnd)
912 // Get our next character
915 // Scoot left and add it to the bottom
921 // Scoot right and add it to the top
923 iChar += (uint)(*(bytes++)) << 24;
928 // See if we have all the bytes yet
935 // See if its valid to encode
936 if (iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF))
938 // Need to fall back these 4 bytes
939 byte[] fallbackBytes;
942 fallbackBytes = new byte[] {
943 unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)),
944 unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) };
948 fallbackBytes = new byte[] {
949 unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)),
950 unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) };
953 // Chars won't be updated unless this works.
954 charsForFallback = chars;
955 bool fallbackResult = fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref charsForFallback);
956 chars = charsForFallback;
961 // Couldn't fallback, throw or wait til next time
962 // We either read enough bytes for bytes-=4 to work, or we're
963 // going to throw in ThrowCharsOverflow because chars == charStart
964 Debug.Assert(bytes >= byteStart + 4 || chars == charStart,
965 "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (bad surrogate)");
966 bytes -= 4; // get back to where we were
967 iChar = 0; // Remembering nothing
968 fallbackBuffer.InternalReset();
969 ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
970 break; // Stop here, didn't throw
973 // Ignore the illegal character
979 // Ok, we have something we can add to our output
980 if (iChar >= 0x10000)
983 if (chars >= charEnd - 1)
985 // Throwing or stopping
986 // We either read enough bytes for bytes-=4 to work, or we're
987 // going to throw in ThrowCharsOverflow because chars == charStart
988 Debug.Assert(bytes >= byteStart + 4 || chars == charStart,
989 "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (surrogate)");
990 bytes -= 4; // get back to where we were
991 iChar = 0; // Remembering nothing
992 ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
993 break; // Stop here, didn't throw
996 *(chars++) = GetHighSurrogate(iChar);
997 iChar = GetLowSurrogate(iChar);
999 // Bounds check for normal character
1000 else if (chars >= charEnd)
1002 // Throwing or stopping
1003 // We either read enough bytes for bytes-=4 to work, or we're
1004 // going to throw in ThrowCharsOverflow because chars == charStart
1005 Debug.Assert(bytes >= byteStart + 4 || chars == charStart,
1006 "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (normal char)");
1007 bytes -= 4; // get back to where we were
1008 iChar = 0; // Remembering nothing
1009 ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1010 break; // Stop here, didn't throw
1013 // Add the rest of the surrogate or our normal character
1014 *(chars++) = (char)iChar;
1016 // iChar is back to 0
1020 // See if we have something left over that has to be decoded
1021 if (readCount > 0 && (decoder == null || decoder.MustFlush))
1023 // Oops, there's something left over with no place to go.
1024 byte[] fallbackBytes = new byte[readCount];
1025 int tempCount = readCount;
1028 while (tempCount > 0)
1030 fallbackBytes[--tempCount] = unchecked((byte)iChar);
1036 while (tempCount > 0)
1038 fallbackBytes[--tempCount] = unchecked((byte)(iChar >> 24));
1043 charsForFallback = chars;
1044 bool fallbackResult = fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref charsForFallback);
1045 chars = charsForFallback;
1047 if (!fallbackResult)
1049 // Couldn't fallback.
1050 fallbackBuffer.InternalReset();
1051 ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1052 // Stop here, didn't throw, backed up, so still nothing in buffer
1056 // Don't clear our decoder unless we could fall it back.
1057 // If we caught the if above, then we're a convert() and will catch this next time.
1063 // Remember any left over stuff, clearing buffer as well for MustFlush
1064 if (decoder != null)
1066 decoder.iChar = (int)iChar;
1067 decoder.readByteCount = readCount;
1068 decoder._bytesUsed = (int)(bytes - byteStart);
1071 // Shouldn't have anything in fallback buffer for GetChars
1072 // (don't have to check _throwOnOverflow for chars)
1073 Debug.Assert(fallbackBuffer.Remaining == 0,
1074 "[UTF32Encoding.GetChars]Expected empty fallback buffer at end");
1077 return (int)(chars - charStart);
1081 private uint GetSurrogate(char cHigh, char cLow)
1083 return (((uint)cHigh - 0xD800) * 0x400) + ((uint)cLow - 0xDC00) + 0x10000;
1086 private char GetHighSurrogate(uint iChar)
1088 return (char)((iChar - 0x10000) / 0x400 + 0xD800);
1091 private char GetLowSurrogate(uint iChar)
1093 return (char)((iChar - 0x10000) % 0x400 + 0xDC00);
1097 public override Decoder GetDecoder()
1099 return new UTF32Decoder(this);
1103 public override Encoder GetEncoder()
1105 return new EncoderNLS(this);
1109 public override int GetMaxByteCount(int charCount)
1112 throw new ArgumentOutOfRangeException(nameof(charCount),
1113 SR.ArgumentOutOfRange_NeedNonNegNum);
1114 Contract.EndContractBlock();
1116 // Characters would be # of characters + 1 in case left over high surrogate is ? * max fallback
1117 long byteCount = (long)charCount + 1;
1119 if (EncoderFallback.MaxCharCount > 1)
1120 byteCount *= EncoderFallback.MaxCharCount;
1125 if (byteCount > 0x7fffffff)
1126 throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
1128 return (int)byteCount;
1132 public override int GetMaxCharCount(int byteCount)
1135 throw new ArgumentOutOfRangeException(nameof(byteCount),
1136 SR.ArgumentOutOfRange_NeedNonNegNum);
1137 Contract.EndContractBlock();
1139 // A supplementary character becomes 2 surrogate characters, so 4 input bytes becomes 2 chars,
1140 // plus we may have 1 surrogate char left over if the decoder has 3 bytes in it already for a non-bmp char.
1141 // Have to add another one because 1/2 == 0, but 3 bytes left over could be 2 char surrogate pair
1142 int charCount = (byteCount / 2) + 2;
1144 // Also consider fallback because our input bytes could be out of range of unicode.
1145 // Since fallback would fallback 4 bytes at a time, we'll only fall back 1/2 of MaxCharCount.
1146 if (DecoderFallback.MaxCharCount > 2)
1148 // Multiply time fallback size
1149 charCount *= DecoderFallback.MaxCharCount;
1151 // We were already figuring 2 chars per 4 bytes, but fallback will be different #
1155 if (charCount > 0x7fffffff)
1156 throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_GetCharCountOverflow);
1158 return (int)charCount;
1162 public override byte[] GetPreamble()
1164 if (_emitUTF32ByteOrderMark)
1166 // Allocate new array to prevent users from modifying it.
1169 return new byte[4] { 0x00, 0x00, 0xFE, 0xFF };
1173 return new byte[4] { 0xFF, 0xFE, 0x00, 0x00 }; // 00 00 FE FF
1177 return Array.Empty<byte>();
1181 public override bool Equals(Object value)
1183 UTF32Encoding that = value as UTF32Encoding;
1186 return (_emitUTF32ByteOrderMark == that._emitUTF32ByteOrderMark) &&
1187 (_bigEndian == that._bigEndian) &&
1188 (EncoderFallback.Equals(that.EncoderFallback)) &&
1189 (DecoderFallback.Equals(that.DecoderFallback));
1195 public override int GetHashCode()
1197 //Not great distribution, but this is relatively unlikely to be used as the key in a hashtable.
1198 return this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode() +
1199 CodePage + (_emitUTF32ByteOrderMark ? 4 : 0) + (_bigEndian ? 8 : 0);
1202 private sealed class UTF32Decoder : DecoderNLS
1204 // Need a place to store any extra bytes we may have picked up
1205 internal int iChar = 0;
1206 internal int readByteCount = 0;
1208 public UTF32Decoder(UTF32Encoding encoding) : base(encoding)
1213 public override void Reset()
1216 this.readByteCount = 0;
1217 if (_fallbackBuffer != null)
1218 _fallbackBuffer.Reset();
1221 // Anything left in our decoder?
1222 internal override bool HasState
1226 // ReadByteCount is our flag. (iChar==0 doesn't mean much).
1227 return (this.readByteCount != 0);