Encoding code clean up (#12864)
[platform/upstream/coreclr.git] / src / mscorlib / shared / System / Text / UTF32Encoding.cs
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 //
6 // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
7 //
8
9 using System;
10 using System.Diagnostics;
11 using System.Diagnostics.Contracts;
12 using System.Globalization;
13
14 namespace System.Text
15 {
16     // Encodes text into and out of UTF-32.  UTF-32 is a way of writing
17     // Unicode characters with a single storage unit (32 bits) per character,
18     //
19     // The UTF-32 byte order mark is simply the Unicode byte order mark
20     // (0x00FEFF) written in UTF-32 (0x0000FEFF or 0xFFFE0000).  The byte order
21     // mark is used mostly to distinguish UTF-32 text from other encodings, and doesn't
22     // switch the byte orderings.
23
24     public sealed class UTF32Encoding : Encoding
25     {
26         /*
27             words   bits    UTF-32 representation
28             -----   ----    -----------------------------------
29             1       16      00000000 00000000 xxxxxxxx xxxxxxxx
30             2       21      00000000 000xxxxx hhhhhhll llllllll
31             -----   ----    -----------------------------------
32
33             Surrogate:
34             Real Unicode value = (HighSurrogate - 0xD800) * 0x400 + (LowSurrogate - 0xDC00) + 0x10000
35         */
36
37         // Used by Encoding.UTF32/BigEndianUTF32 for lazy initialization
38         // The initialization code will not be run until a static member of the class is referenced
39         internal static readonly UTF32Encoding s_default = new UTF32Encoding(bigEndian: false, byteOrderMark: true);
40         internal static readonly UTF32Encoding s_bigEndianDefault = new UTF32Encoding(bigEndian: true, byteOrderMark: true);
41
42         private bool _emitUTF32ByteOrderMark = false;
43         private bool _isThrowException = false;
44         private bool _bigEndian = false;
45
46
47         public UTF32Encoding() : this(false, true, false)
48         {
49         }
50
51
52         public UTF32Encoding(bool bigEndian, bool byteOrderMark) :
53             this(bigEndian, byteOrderMark, false)
54         {
55         }
56
57
58         public UTF32Encoding(bool bigEndian, bool byteOrderMark, bool throwOnInvalidCharacters) :
59             base(bigEndian ? 12001 : 12000)
60         {
61             _bigEndian = bigEndian;
62             _emitUTF32ByteOrderMark = byteOrderMark;
63             _isThrowException = throwOnInvalidCharacters;
64
65             // Encoding constructor already did this, but it'll be wrong if we're throwing exceptions
66             if (_isThrowException)
67                 SetDefaultFallbacks();
68         }
69
70         internal override void SetDefaultFallbacks()
71         {
72             // For UTF-X encodings, we use a replacement fallback with an empty string
73             if (_isThrowException)
74             {
75                 this.encoderFallback = EncoderFallback.ExceptionFallback;
76                 this.decoderFallback = DecoderFallback.ExceptionFallback;
77             }
78             else
79             {
80                 this.encoderFallback = new EncoderReplacementFallback("\xFFFD");
81                 this.decoderFallback = new DecoderReplacementFallback("\xFFFD");
82             }
83         }
84
85
86         // The following methods are copied from EncodingNLS.cs.
87         // Unfortunately EncodingNLS.cs is internal and we're public, so we have to re-implement them here.
88         // These should be kept in sync for the following classes:
89         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
90
91         // Returns the number of bytes required to encode a range of characters in
92         // a character array.
93         //
94         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
95         // So if you fix this, fix the others.  Currently those include:
96         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
97         // parent method is safe
98
99         public override unsafe int GetByteCount(char[] chars, int index, int count)
100         {
101             // Validate input parameters
102             if (chars == null)
103                 throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
104
105             if (index < 0 || count < 0)
106                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
107
108             if (chars.Length - index < count)
109                 throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
110             Contract.EndContractBlock();
111
112             // If no input, return 0, avoid fixed empty array problem
113             if (count == 0)
114                 return 0;
115
116             // Just call the pointer version
117             fixed (char* pChars = chars)
118                 return GetByteCount(pChars + index, count, null);
119         }
120
121         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
122         // So if you fix this, fix the others.  Currently those include:
123         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
124         // parent method is safe
125
126         public override unsafe int GetByteCount(String s)
127         {
128             // Validate input
129             if (s==null)
130                 throw new ArgumentNullException("s");
131             Contract.EndContractBlock();
132
133             fixed (char* pChars = s)
134                 return GetByteCount(pChars, s.Length, null);
135         }
136
137         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
138         // So if you fix this, fix the others.  Currently those include:
139         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
140
141         [CLSCompliant(false)]
142         public override unsafe int GetByteCount(char* chars, int count)
143         {
144             // Validate Parameters
145             if (chars == null)
146                 throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
147
148             if (count < 0)
149                 throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
150             Contract.EndContractBlock();
151
152             // Call it with empty encoder
153             return GetByteCount(chars, count, null);
154         }
155
156         // Parent method is safe.
157         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
158         // So if you fix this, fix the others.  Currently those include:
159         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
160
161         public override unsafe int GetBytes(String s, int charIndex, int charCount,
162                                               byte[] bytes, int byteIndex)
163         {
164             if (s == null || bytes == null)
165                 throw new ArgumentNullException((s == null ? "s" : "bytes"), SR.ArgumentNull_Array);
166
167             if (charIndex < 0 || charCount < 0)
168                 throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
169
170             if (s.Length - charIndex < charCount)
171                 throw new ArgumentOutOfRangeException("s", SR.ArgumentOutOfRange_IndexCount);
172
173             if (byteIndex < 0 || byteIndex > bytes.Length)
174                 throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
175             Contract.EndContractBlock();
176
177             int byteCount = bytes.Length - byteIndex;
178
179             // Fix our input array if 0 length because fixed doesn't like 0 length arrays
180             if (bytes.Length == 0)
181                 bytes = new byte[1];
182
183             fixed (char* pChars = s) fixed (byte* pBytes = &bytes[0])
184                 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
185         }
186
187         // Encodes a range of characters in a character array into a range of bytes
188         // in a byte array. An exception occurs if the byte array is not large
189         // enough to hold the complete encoding of the characters. The
190         // GetByteCount method can be used to determine the exact number of
191         // bytes that will be produced for a given range of characters.
192         // Alternatively, the GetMaxByteCount method can be used to
193         // determine the maximum number of bytes that will be produced for a given
194         // number of characters, regardless of the actual character values.
195         //
196         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
197         // So if you fix this, fix the others.  Currently those include:
198         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
199         // parent method is safe
200
201         public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
202                                                byte[] bytes, int byteIndex)
203         {
204             // Validate parameters
205             if (chars == null || bytes == null)
206                 throw new ArgumentNullException((chars == null ? "chars" : "bytes"), SR.ArgumentNull_Array);
207
208             if (charIndex < 0 || charCount < 0)
209                 throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
210
211             if (chars.Length - charIndex < charCount)
212                 throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
213
214             if (byteIndex < 0 || byteIndex > bytes.Length)
215                 throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
216             Contract.EndContractBlock();
217
218             // If nothing to encode return 0, avoid fixed problem
219             if (charCount == 0)
220                 return 0;
221
222             // Just call pointer version
223             int byteCount = bytes.Length - byteIndex;
224
225             // Fix our input array if 0 length because fixed doesn't like 0 length arrays
226             if (bytes.Length == 0)
227                 bytes = new byte[1];
228
229             fixed (char* pChars = chars) fixed (byte* pBytes = &bytes[0])
230                 // Remember that byteCount is # to decode, not size of array.
231                 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
232         }
233
234         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
235         // So if you fix this, fix the others.  Currently those include:
236         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
237
238         [CLSCompliant(false)]
239         public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
240         {
241             // Validate Parameters
242             if (bytes == null || chars == null)
243                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
244
245             if (charCount < 0 || byteCount < 0)
246                 throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
247             Contract.EndContractBlock();
248
249             return GetBytes(chars, charCount, bytes, byteCount, null);
250         }
251
252         // Returns the number of characters produced by decoding a range of bytes
253         // in a byte array.
254         //
255         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
256         // So if you fix this, fix the others.  Currently those include:
257         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
258         // parent method is safe
259
260         public override unsafe int GetCharCount(byte[] bytes, int index, int count)
261         {
262             // Validate Parameters
263             if (bytes == null)
264                 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
265
266             if (index < 0 || count < 0)
267                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
268
269             if (bytes.Length - index < count)
270                 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
271             Contract.EndContractBlock();
272
273             // If no input just return 0, fixed doesn't like 0 length arrays.
274             if (count == 0)
275                 return 0;
276
277             // Just call pointer version
278             fixed (byte* pBytes = bytes)
279                 return GetCharCount(pBytes + index, count, null);
280         }
281
282         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
283         // So if you fix this, fix the others.  Currently those include:
284         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
285
286         [CLSCompliant(false)]
287         public override unsafe int GetCharCount(byte* bytes, int count)
288         {
289             // Validate Parameters
290             if (bytes == null)
291                 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
292
293             if (count < 0)
294                 throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
295             Contract.EndContractBlock();
296
297             return GetCharCount(bytes, count, null);
298         }
299
300         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
301         // So if you fix this, fix the others.  Currently those include:
302         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
303         // parent method is safe
304
305         public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
306                                               char[] chars, int charIndex)
307         {
308             // Validate Parameters
309             if (bytes == null || chars == null)
310                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
311
312             if (byteIndex < 0 || byteCount < 0)
313                 throw new ArgumentOutOfRangeException((byteIndex < 0 ? "byteIndex" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
314
315             if ( bytes.Length - byteIndex < byteCount)
316                 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
317
318             if (charIndex < 0 || charIndex > chars.Length)
319                 throw new ArgumentOutOfRangeException("charIndex", SR.ArgumentOutOfRange_Index);
320             Contract.EndContractBlock();
321
322             // If no input, return 0 & avoid fixed problem
323             if (byteCount == 0)
324                 return 0;
325
326             // Just call pointer version
327             int charCount = chars.Length - charIndex;
328
329             // Fix our input array if 0 length because fixed doesn't like 0 length arrays
330             if (chars.Length == 0)
331                 chars = new char[1];
332
333             fixed (byte* pBytes = bytes) fixed (char* pChars = &chars[0])
334                 // Remember that charCount is # to decode, not size of array
335                 return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
336         }
337
338         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
339         // So if you fix this, fix the others.  Currently those include:
340         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
341
342         [CLSCompliant(false)]
343         public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
344         {
345             // Validate Parameters
346             if (bytes == null || chars == null)
347                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
348
349             if (charCount < 0 || byteCount < 0)
350                 throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
351             Contract.EndContractBlock();
352
353             return GetChars(bytes, byteCount, chars, charCount, null);
354         }
355
356         // Returns a string containing the decoded representation of a range of
357         // bytes in a byte array.
358         //
359         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
360         // So if you fix this, fix the others.  Currently those include:
361         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
362         // parent method is safe
363
364         public override unsafe String GetString(byte[] bytes, int index, int count)
365         {
366             // Validate Parameters
367             if (bytes == null)
368                 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
369
370             if (index < 0 || count < 0)
371                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
372
373             if (bytes.Length - index < count)
374                 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
375             Contract.EndContractBlock();
376
377             // Avoid problems with empty input buffer
378             if (count == 0) return String.Empty;
379
380             fixed (byte* pBytes = bytes)
381                 return String.CreateStringFromEncoding(
382                     pBytes + index, count, this);
383         }
384
385         //
386         // End of standard methods copied from EncodingNLS.cs
387         //
388
389         internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
390         {
391             Debug.Assert(chars != null, "[UTF32Encoding.GetByteCount]chars!=null");
392             Debug.Assert(count >= 0, "[UTF32Encoding.GetByteCount]count >=0");
393
394             char* end = chars + count;
395             char* charStart = chars;
396             int byteCount = 0;
397
398             char highSurrogate = '\0';
399
400             // For fallback we may need a fallback buffer
401             EncoderFallbackBuffer fallbackBuffer = null;
402             char* charsForFallback;
403
404             if (encoder != null)
405             {
406                 highSurrogate = encoder._charLeftOver;
407                 fallbackBuffer = encoder.FallbackBuffer;
408
409                 // We mustn't have left over fallback data when counting
410                 if (fallbackBuffer.Remaining > 0)
411                     throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
412             }
413             else
414             {
415                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
416             }
417
418             // Set our internal fallback interesting things.
419             fallbackBuffer.InternalInitialize(charStart, end, encoder, false);
420
421             char ch;
422         TryAgain:
423
424             while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < end)
425             {
426                 // First unwind any fallback
427                 if (ch == 0)
428                 {
429                     // No fallback, just get next char
430                     ch = *chars;
431                     chars++;
432                 }
433
434                 // Do we need a low surrogate?
435                 if (highSurrogate != '\0')
436                 {
437                     //
438                     // In previous char, we encounter a high surrogate, so we are expecting a low surrogate here.
439                     //
440                     if (Char.IsLowSurrogate(ch))
441                     {
442                         // They're all legal
443                         highSurrogate = '\0';
444
445                         //
446                         // One surrogate pair will be translated into 4 bytes UTF32.
447                         //
448
449                         byteCount += 4;
450                         continue;
451                     }
452
453                     // We are missing our low surrogate, decrement chars and fallback the high surrogate
454                     // The high surrogate may have come from the encoder, but nothing else did.
455                     Debug.Assert(chars > charStart,
456                         "[UTF32Encoding.GetByteCount]Expected chars to have advanced if no low surrogate");
457                     chars--;
458
459                     // Do the fallback
460                     charsForFallback = chars;
461                     fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
462                     chars = charsForFallback;
463
464                     // We're going to fallback the old high surrogate.
465                     highSurrogate = '\0';
466                     continue;
467                 }
468
469                 // Do we have another high surrogate?
470                 if (Char.IsHighSurrogate(ch))
471                 {
472                     //
473                     // We'll have a high surrogate to check next time.
474                     //
475                     highSurrogate = ch;
476                     continue;
477                 }
478
479                 // Check for illegal characters
480                 if (Char.IsLowSurrogate(ch))
481                 {
482                     // We have a leading low surrogate, do the fallback
483                     charsForFallback = chars;
484                     fallbackBuffer.InternalFallback(ch, ref charsForFallback);
485                     chars = charsForFallback;
486
487                     // Try again with fallback buffer
488                     continue;
489                 }
490
491                 // We get to add the character (4 bytes UTF32)
492                 byteCount += 4;
493             }
494
495             // May have to do our last surrogate
496             if ((encoder == null || encoder.MustFlush) && highSurrogate > 0)
497             {
498                 // We have to do the fallback for the lonely high surrogate
499                 charsForFallback = chars;
500                 fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
501                 chars = charsForFallback;
502
503                 highSurrogate = (char)0;
504                 goto TryAgain;
505             }
506
507             // Check for overflows.
508             if (byteCount < 0)
509                 throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_GetByteCountOverflow);
510
511             // Shouldn't have anything in fallback buffer for GetByteCount
512             // (don't have to check _throwOnOverflow for count)
513             Debug.Assert(fallbackBuffer.Remaining == 0,
514                 "[UTF32Encoding.GetByteCount]Expected empty fallback buffer at end");
515
516             // Return our count
517             return byteCount;
518         }
519
520         internal override unsafe int GetBytes(char* chars, int charCount,
521                                                  byte* bytes, int byteCount, EncoderNLS encoder)
522         {
523             Debug.Assert(chars != null, "[UTF32Encoding.GetBytes]chars!=null");
524             Debug.Assert(bytes != null, "[UTF32Encoding.GetBytes]bytes!=null");
525             Debug.Assert(byteCount >= 0, "[UTF32Encoding.GetBytes]byteCount >=0");
526             Debug.Assert(charCount >= 0, "[UTF32Encoding.GetBytes]charCount >=0");
527
528             char* charStart = chars;
529             char* charEnd = chars + charCount;
530             byte* byteStart = bytes;
531             byte* byteEnd = bytes + byteCount;
532
533             char highSurrogate = '\0';
534
535             // For fallback we may need a fallback buffer
536             EncoderFallbackBuffer fallbackBuffer = null;
537             char* charsForFallback;
538
539             if (encoder != null)
540             {
541                 highSurrogate = encoder._charLeftOver;
542                 fallbackBuffer = encoder.FallbackBuffer;
543
544                 // We mustn't have left over fallback data when not converting
545                 if (encoder._throwOnOverflow && fallbackBuffer.Remaining > 0)
546                     throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
547             }
548             else
549             {
550                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
551             }
552
553             // Set our internal fallback interesting things.
554             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
555
556             char ch;
557         TryAgain:
558
559             while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < charEnd)
560             {
561                 // First unwind any fallback
562                 if (ch == 0)
563                 {
564                     // No fallback, just get next char
565                     ch = *chars;
566                     chars++;
567                 }
568
569                 // Do we need a low surrogate?
570                 if (highSurrogate != '\0')
571                 {
572                     //
573                     // In previous char, we encountered a high surrogate, so we are expecting a low surrogate here.
574                     //
575                     if (Char.IsLowSurrogate(ch))
576                     {
577                         // Is it a legal one?
578                         uint iTemp = GetSurrogate(highSurrogate, ch);
579                         highSurrogate = '\0';
580
581                         //
582                         // One surrogate pair will be translated into 4 bytes UTF32.
583                         //
584                         if (bytes + 3 >= byteEnd)
585                         {
586                             // Don't have 4 bytes
587                             if (fallbackBuffer.bFallingBack)
588                             {
589                                 fallbackBuffer.MovePrevious();                  // Aren't using these 2 fallback chars
590                                 fallbackBuffer.MovePrevious();
591                             }
592                             else
593                             {
594                                 // If we don't have enough room, then either we should've advanced a while
595                                 // or we should have bytes==byteStart and throw below
596                                 Debug.Assert(chars > charStart + 1 || bytes == byteStart,
597                                     "[UnicodeEncoding.GetBytes]Expected chars to have when no room to add surrogate pair");
598                                 chars -= 2;                                       // Aren't using those 2 chars
599                             }
600                             ThrowBytesOverflow(encoder, bytes == byteStart);    // Throw maybe (if no bytes written)
601                             highSurrogate = (char)0;                            // Nothing left over (we backed up to start of pair if supplimentary)
602                             break;
603                         }
604
605                         if (_bigEndian)
606                         {
607                             *(bytes++) = (byte)(0x00);
608                             *(bytes++) = (byte)(iTemp >> 16);       // Implies & 0xFF, which isn't needed cause high are all 0
609                             *(bytes++) = (byte)(iTemp >> 8);        // Implies & 0xFF
610                             *(bytes++) = (byte)(iTemp);             // Implies & 0xFF
611                         }
612                         else
613                         {
614                             *(bytes++) = (byte)(iTemp);             // Implies & 0xFF
615                             *(bytes++) = (byte)(iTemp >> 8);        // Implies & 0xFF
616                             *(bytes++) = (byte)(iTemp >> 16);       // Implies & 0xFF, which isn't needed cause high are all 0
617                             *(bytes++) = (byte)(0x00);
618                         }
619                         continue;
620                     }
621
622                     // We are missing our low surrogate, decrement chars and fallback the high surrogate
623                     // The high surrogate may have come from the encoder, but nothing else did.
624                     Debug.Assert(chars > charStart,
625                         "[UTF32Encoding.GetBytes]Expected chars to have advanced if no low surrogate");
626                     chars--;
627
628                     // Do the fallback
629                     charsForFallback = chars;
630                     fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
631                     chars = charsForFallback;
632
633                     // We're going to fallback the old high surrogate.
634                     highSurrogate = '\0';
635                     continue;
636                 }
637
638                 // Do we have another high surrogate?, if so remember it
639                 if (Char.IsHighSurrogate(ch))
640                 {
641                     //
642                     // We'll have a high surrogate to check next time.
643                     //
644                     highSurrogate = ch;
645                     continue;
646                 }
647
648                 // Check for illegal characters (low surrogate)
649                 if (Char.IsLowSurrogate(ch))
650                 {
651                     // We have a leading low surrogate, do the fallback
652                     charsForFallback = chars;
653                     fallbackBuffer.InternalFallback(ch, ref charsForFallback);
654                     chars = charsForFallback;
655
656                     // Try again with fallback buffer
657                     continue;
658                 }
659
660                 // We get to add the character, yippee.
661                 if (bytes + 3 >= byteEnd)
662                 {
663                     // Don't have 4 bytes
664                     if (fallbackBuffer.bFallingBack)
665                         fallbackBuffer.MovePrevious();                  // Aren't using this fallback char
666                     else
667                     {
668                         // Must've advanced already
669                         Debug.Assert(chars > charStart,
670                             "[UTF32Encoding.GetBytes]Expected chars to have advanced if normal character");
671                         chars--;                                        // Aren't using this char
672                     }
673                     ThrowBytesOverflow(encoder, bytes == byteStart);    // Throw maybe (if no bytes written)
674                     break;                                              // Didn't throw, stop
675                 }
676
677                 if (_bigEndian)
678                 {
679                     *(bytes++) = (byte)(0x00);
680                     *(bytes++) = (byte)(0x00);
681                     *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF
682                     *(bytes++) = (byte)(ch);            // Implies & 0xFF
683                 }
684                 else
685                 {
686                     *(bytes++) = (byte)(ch);            // Implies & 0xFF
687                     *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF
688                     *(bytes++) = (byte)(0x00);
689                     *(bytes++) = (byte)(0x00);
690                 }
691             }
692
693             // May have to do our last surrogate
694             if ((encoder == null || encoder.MustFlush) && highSurrogate > 0)
695             {
696                 // We have to do the fallback for the lonely high surrogate
697                 charsForFallback = chars;
698                 fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
699                 chars = charsForFallback;
700
701                 highSurrogate = (char)0;
702                 goto TryAgain;
703             }
704
705             // Fix our encoder if we have one
706             Debug.Assert(highSurrogate == 0 || (encoder != null && !encoder.MustFlush),
707                 "[UTF32Encoding.GetBytes]Expected encoder to be flushed.");
708
709             if (encoder != null)
710             {
711                 // Remember our left over surrogate (or 0 if flushing)
712                 encoder._charLeftOver = highSurrogate;
713
714                 // Need # chars used
715                 encoder._charsUsed = (int)(chars - charStart);
716             }
717
718             // return the new length
719             return (int)(bytes - byteStart);
720         }
721
722         internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
723         {
724             Debug.Assert(bytes != null, "[UTF32Encoding.GetCharCount]bytes!=null");
725             Debug.Assert(count >= 0, "[UTF32Encoding.GetCharCount]count >=0");
726
727             UTF32Decoder decoder = (UTF32Decoder)baseDecoder;
728
729             // None so far!
730             int charCount = 0;
731             byte* end = bytes + count;
732             byte* byteStart = bytes;
733
734             // Set up decoder
735             int readCount = 0;
736             uint iChar = 0;
737
738             // For fallback we may need a fallback buffer
739             DecoderFallbackBuffer fallbackBuffer = null;
740
741             // See if there's anything in our decoder
742             if (decoder != null)
743             {
744                 readCount = decoder.readByteCount;
745                 iChar = (uint)decoder.iChar;
746                 fallbackBuffer = decoder.FallbackBuffer;
747
748                 // Shouldn't have anything in fallback buffer for GetCharCount
749                 // (don't have to check _throwOnOverflow for chars or count)
750                 Debug.Assert(fallbackBuffer.Remaining == 0,
751                     "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at start");
752             }
753             else
754             {
755                 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
756             }
757
758             // Set our internal fallback interesting things.
759             fallbackBuffer.InternalInitialize(byteStart, null);
760
761             // Loop through our input, 4 characters at a time!
762             while (bytes < end && charCount >= 0)
763             {
764                 // Get our next character
765                 if (_bigEndian)
766                 {
767                     // Scoot left and add it to the bottom
768                     iChar <<= 8;
769                     iChar += *(bytes++);
770                 }
771                 else
772                 {
773                     // Scoot right and add it to the top
774                     iChar >>= 8;
775                     iChar += (uint)(*(bytes++)) << 24;
776                 }
777
778                 readCount++;
779
780                 // See if we have all the bytes yet
781                 if (readCount < 4)
782                     continue;
783
784                 // Have the bytes
785                 readCount = 0;
786
787                 // See if its valid to encode
788                 if (iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF))
789                 {
790                     // Need to fall back these 4 bytes
791                     byte[] fallbackBytes;
792                     if (_bigEndian)
793                     {
794                         fallbackBytes = new byte[] {
795                             unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)),
796                             unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) };
797                     }
798                     else
799                     {
800                         fallbackBytes = new byte[] {
801                             unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)),
802                             unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) };
803                     }
804
805                     charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);
806
807                     // Ignore the illegal character
808                     iChar = 0;
809                     continue;
810                 }
811
812                 // Ok, we have something we can add to our output
813                 if (iChar >= 0x10000)
814                 {
815                     // Surrogates take 2
816                     charCount++;
817                 }
818
819                 // Add the rest of the surrogate or our normal character
820                 charCount++;
821
822                 // iChar is back to 0
823                 iChar = 0;
824             }
825
826             // See if we have something left over that has to be decoded
827             if (readCount > 0 && (decoder == null || decoder.MustFlush))
828             {
829                 // Oops, there's something left over with no place to go.
830                 byte[] fallbackBytes = new byte[readCount];
831                 if (_bigEndian)
832                 {
833                     while (readCount > 0)
834                     {
835                         fallbackBytes[--readCount] = unchecked((byte)iChar);
836                         iChar >>= 8;
837                     }
838                 }
839                 else
840                 {
841                     while (readCount > 0)
842                     {
843                         fallbackBytes[--readCount] = unchecked((byte)(iChar >> 24));
844                         iChar <<= 8;
845                     }
846                 }
847
848                 charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);
849             }
850
851             // Check for overflows.
852             if (charCount < 0)
853                 throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_GetByteCountOverflow);
854
855             // Shouldn't have anything in fallback buffer for GetCharCount
856             // (don't have to check _throwOnOverflow for chars or count)
857             Debug.Assert(fallbackBuffer.Remaining == 0,
858                 "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at end");
859
860             // Return our count
861             return charCount;
862         }
863
864         internal override unsafe int GetChars(byte* bytes, int byteCount,
865                                                 char* chars, int charCount, DecoderNLS baseDecoder)
866         {
867             Debug.Assert(chars != null, "[UTF32Encoding.GetChars]chars!=null");
868             Debug.Assert(bytes != null, "[UTF32Encoding.GetChars]bytes!=null");
869             Debug.Assert(byteCount >= 0, "[UTF32Encoding.GetChars]byteCount >=0");
870             Debug.Assert(charCount >= 0, "[UTF32Encoding.GetChars]charCount >=0");
871
872             UTF32Decoder decoder = (UTF32Decoder)baseDecoder;
873
874             // None so far!
875             char* charStart = chars;
876             char* charEnd = chars + charCount;
877
878             byte* byteStart = bytes;
879             byte* byteEnd = bytes + byteCount;
880
881             // See if there's anything in our decoder (but don't clear it yet)
882             int readCount = 0;
883             uint iChar = 0;
884
885             // For fallback we may need a fallback buffer
886             DecoderFallbackBuffer fallbackBuffer = null;
887             char* charsForFallback;
888
889             // See if there's anything in our decoder
890             if (decoder != null)
891             {
892                 readCount = decoder.readByteCount;
893                 iChar = (uint)decoder.iChar;
894                 fallbackBuffer = baseDecoder.FallbackBuffer;
895
896                 // Shouldn't have anything in fallback buffer for GetChars
897                 // (don't have to check _throwOnOverflow for chars)
898                 Debug.Assert(fallbackBuffer.Remaining == 0,
899                     "[UTF32Encoding.GetChars]Expected empty fallback buffer at start");
900             }
901             else
902             {
903                 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
904             }
905
906             // Set our internal fallback interesting things.
907             fallbackBuffer.InternalInitialize(bytes, chars + charCount);
908
909             // Loop through our input, 4 characters at a time!
910             while (bytes < byteEnd)
911             {
912                 // Get our next character
913                 if (_bigEndian)
914                 {
915                     // Scoot left and add it to the bottom
916                     iChar <<= 8;
917                     iChar += *(bytes++);
918                 }
919                 else
920                 {
921                     // Scoot right and add it to the top
922                     iChar >>= 8;
923                     iChar += (uint)(*(bytes++)) << 24;
924                 }
925
926                 readCount++;
927
928                 // See if we have all the bytes yet
929                 if (readCount < 4)
930                     continue;
931
932                 // Have the bytes
933                 readCount = 0;
934
935                 // See if its valid to encode
936                 if (iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF))
937                 {
938                     // Need to fall back these 4 bytes
939                     byte[] fallbackBytes;
940                     if (_bigEndian)
941                     {
942                         fallbackBytes = new byte[] {
943                             unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)),
944                             unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) };
945                     }
946                     else
947                     {
948                         fallbackBytes = new byte[] {
949                             unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)),
950                             unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) };
951                     }
952
953                     // Chars won't be updated unless this works.
954                     charsForFallback = chars;
955                     bool fallbackResult = fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref charsForFallback);
956                     chars = charsForFallback;
957
958                     if (!fallbackResult)
959                     {
960
961                         // Couldn't fallback, throw or wait til next time
962                         // We either read enough bytes for bytes-=4 to work, or we're
963                         // going to throw in ThrowCharsOverflow because chars == charStart
964                         Debug.Assert(bytes >= byteStart + 4 || chars == charStart,
965                             "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (bad surrogate)");
966                         bytes -= 4;                                       // get back to where we were
967                         iChar = 0;                                        // Remembering nothing
968                         fallbackBuffer.InternalReset();
969                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
970                         break;                                          // Stop here, didn't throw
971                     }
972
973                     // Ignore the illegal character
974                     iChar = 0;
975                     continue;
976                 }
977
978
979                 // Ok, we have something we can add to our output
980                 if (iChar >= 0x10000)
981                 {
982                     // Surrogates take 2
983                     if (chars >= charEnd - 1)
984                     {
985                         // Throwing or stopping
986                         // We either read enough bytes for bytes-=4 to work, or we're
987                         // going to throw in ThrowCharsOverflow because chars == charStart
988                         Debug.Assert(bytes >= byteStart + 4 || chars == charStart,
989                             "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (surrogate)");
990                         bytes -= 4;                                       // get back to where we were
991                         iChar = 0;                                        // Remembering nothing
992                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
993                         break;                                          // Stop here, didn't throw
994                     }
995
996                     *(chars++) = GetHighSurrogate(iChar);
997                     iChar = GetLowSurrogate(iChar);
998                 }
999                 // Bounds check for normal character
1000                 else if (chars >= charEnd)
1001                 {
1002                     // Throwing or stopping
1003                     // We either read enough bytes for bytes-=4 to work, or we're
1004                     // going to throw in ThrowCharsOverflow because chars == charStart
1005                     Debug.Assert(bytes >= byteStart + 4 || chars == charStart,
1006                         "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (normal char)");
1007                     bytes -= 4;                                       // get back to where we were
1008                     iChar = 0;                                        // Remembering nothing                    
1009                     ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1010                     break;                                          // Stop here, didn't throw
1011                 }
1012
1013                 // Add the rest of the surrogate or our normal character
1014                 *(chars++) = (char)iChar;
1015
1016                 // iChar is back to 0
1017                 iChar = 0;
1018             }
1019
1020             // See if we have something left over that has to be decoded
1021             if (readCount > 0 && (decoder == null || decoder.MustFlush))
1022             {
1023                 // Oops, there's something left over with no place to go.
1024                 byte[] fallbackBytes = new byte[readCount];
1025                 int tempCount = readCount;
1026                 if (_bigEndian)
1027                 {
1028                     while (tempCount > 0)
1029                     {
1030                         fallbackBytes[--tempCount] = unchecked((byte)iChar);
1031                         iChar >>= 8;
1032                     }
1033                 }
1034                 else
1035                 {
1036                     while (tempCount > 0)
1037                     {
1038                         fallbackBytes[--tempCount] = unchecked((byte)(iChar >> 24));
1039                         iChar <<= 8;
1040                     }
1041                 }
1042
1043                 charsForFallback = chars;
1044                 bool fallbackResult = fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref charsForFallback);
1045                 chars = charsForFallback;
1046
1047                 if (!fallbackResult)
1048                 {
1049                     // Couldn't fallback.
1050                     fallbackBuffer.InternalReset();
1051                     ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1052                     // Stop here, didn't throw, backed up, so still nothing in buffer
1053                 }
1054                 else
1055                 {
1056                     // Don't clear our decoder unless we could fall it back.
1057                     // If we caught the if above, then we're a convert() and will catch this next time.
1058                     readCount = 0;
1059                     iChar = 0;
1060                 }
1061             }
1062
1063             // Remember any left over stuff, clearing buffer as well for MustFlush
1064             if (decoder != null)
1065             {
1066                 decoder.iChar = (int)iChar;
1067                 decoder.readByteCount = readCount;
1068                 decoder._bytesUsed = (int)(bytes - byteStart);
1069             }
1070
1071             // Shouldn't have anything in fallback buffer for GetChars
1072             // (don't have to check _throwOnOverflow for chars)
1073             Debug.Assert(fallbackBuffer.Remaining == 0,
1074                 "[UTF32Encoding.GetChars]Expected empty fallback buffer at end");
1075
1076             // Return our count
1077             return (int)(chars - charStart);
1078         }
1079
1080
1081         private uint GetSurrogate(char cHigh, char cLow)
1082         {
1083             return (((uint)cHigh - 0xD800) * 0x400) + ((uint)cLow - 0xDC00) + 0x10000;
1084         }
1085
1086         private char GetHighSurrogate(uint iChar)
1087         {
1088             return (char)((iChar - 0x10000) / 0x400 + 0xD800);
1089         }
1090
1091         private char GetLowSurrogate(uint iChar)
1092         {
1093             return (char)((iChar - 0x10000) % 0x400 + 0xDC00);
1094         }
1095
1096
1097         public override Decoder GetDecoder()
1098         {
1099             return new UTF32Decoder(this);
1100         }
1101
1102
1103         public override Encoder GetEncoder()
1104         {
1105             return new EncoderNLS(this);
1106         }
1107
1108
1109         public override int GetMaxByteCount(int charCount)
1110         {
1111             if (charCount < 0)
1112                 throw new ArgumentOutOfRangeException(nameof(charCount),
1113                      SR.ArgumentOutOfRange_NeedNonNegNum);
1114             Contract.EndContractBlock();
1115
1116             // Characters would be # of characters + 1 in case left over high surrogate is ? * max fallback
1117             long byteCount = (long)charCount + 1;
1118
1119             if (EncoderFallback.MaxCharCount > 1)
1120                 byteCount *= EncoderFallback.MaxCharCount;
1121
1122             // 4 bytes per char
1123             byteCount *= 4;
1124
1125             if (byteCount > 0x7fffffff)
1126                 throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
1127
1128             return (int)byteCount;
1129         }
1130
1131
1132         public override int GetMaxCharCount(int byteCount)
1133         {
1134             if (byteCount < 0)
1135                 throw new ArgumentOutOfRangeException(nameof(byteCount),
1136                      SR.ArgumentOutOfRange_NeedNonNegNum);
1137             Contract.EndContractBlock();
1138
1139             // A supplementary character becomes 2 surrogate characters, so 4 input bytes becomes 2 chars,
1140             // plus we may have 1 surrogate char left over if the decoder has 3 bytes in it already for a non-bmp char.
1141             // Have to add another one because 1/2 == 0, but 3 bytes left over could be 2 char surrogate pair
1142             int charCount = (byteCount / 2) + 2;
1143
1144             // Also consider fallback because our input bytes could be out of range of unicode.
1145             // Since fallback would fallback 4 bytes at a time, we'll only fall back 1/2 of MaxCharCount.
1146             if (DecoderFallback.MaxCharCount > 2)
1147             {
1148                 // Multiply time fallback size
1149                 charCount *= DecoderFallback.MaxCharCount;
1150
1151                 // We were already figuring 2 chars per 4 bytes, but fallback will be different #
1152                 charCount /= 2;
1153             }
1154
1155             if (charCount > 0x7fffffff)
1156                 throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_GetCharCountOverflow);
1157
1158             return (int)charCount;
1159         }
1160
1161
1162         public override byte[] GetPreamble()
1163         {
1164             if (_emitUTF32ByteOrderMark)
1165             {
1166                 // Allocate new array to prevent users from modifying it.
1167                 if (_bigEndian)
1168                 {
1169                     return new byte[4] { 0x00, 0x00, 0xFE, 0xFF };
1170                 }
1171                 else
1172                 {
1173                     return new byte[4] { 0xFF, 0xFE, 0x00, 0x00 }; // 00 00 FE FF
1174                 }
1175             }
1176             else
1177                 return Array.Empty<byte>();
1178         }
1179
1180
1181         public override bool Equals(Object value)
1182         {
1183             UTF32Encoding that = value as UTF32Encoding;
1184             if (that != null)
1185             {
1186                 return (_emitUTF32ByteOrderMark == that._emitUTF32ByteOrderMark) &&
1187                        (_bigEndian == that._bigEndian) &&
1188                        (EncoderFallback.Equals(that.EncoderFallback)) &&
1189                        (DecoderFallback.Equals(that.DecoderFallback));
1190             }
1191             return (false);
1192         }
1193
1194
1195         public override int GetHashCode()
1196         {
1197             //Not great distribution, but this is relatively unlikely to be used as the key in a hashtable.
1198             return this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode() +
1199                    CodePage + (_emitUTF32ByteOrderMark ? 4 : 0) + (_bigEndian ? 8 : 0);
1200         }
1201
1202         private sealed class UTF32Decoder : DecoderNLS
1203         {
1204             // Need a place to store any extra bytes we may have picked up
1205             internal int iChar = 0;
1206             internal int readByteCount = 0;
1207
1208             public UTF32Decoder(UTF32Encoding encoding) : base(encoding)
1209             {
1210                 // base calls reset
1211             }
1212
1213             public override void Reset()
1214             {
1215                 this.iChar = 0;
1216                 this.readByteCount = 0;
1217                 if (_fallbackBuffer != null)
1218                     _fallbackBuffer.Reset();
1219             }
1220
1221             // Anything left in our decoder?
1222             internal override bool HasState
1223             {
1224                 get
1225                 {
1226                     // ReadByteCount is our flag.  (iChar==0 doesn't mean much).
1227                     return (this.readByteCount != 0);
1228                 }
1229             }
1230         }
1231     }
1232 }