Encoding code clean up (#12864)
[platform/upstream/coreclr.git] / src / mscorlib / shared / System / Text / UnicodeEncoding.cs
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 //
6 // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
7 //
8
9 using System;
10 using System.Globalization;
11 using System.Diagnostics;
12 using System.Diagnostics.Contracts;
13
14 namespace System.Text
15 {
16     public class UnicodeEncoding : Encoding
17     {
18         // Used by Encoding.BigEndianUnicode/Unicode for lazy initialization
19         // The initialization code will not be run until a static member of the class is referenced
20         internal static readonly UnicodeEncoding s_bigEndianDefault = new UnicodeEncoding(bigEndian: true, byteOrderMark: true);
21         internal static readonly UnicodeEncoding s_littleEndianDefault = new UnicodeEncoding(bigEndian: false, byteOrderMark: true);
22
23         internal bool isThrowException = false;
24
25         internal bool bigEndian = false;
26         internal bool byteOrderMark = true;
27
28         // Unicode version 2.0 character size in bytes
29         public const int CharSize = 2;
30
31
32         public UnicodeEncoding()
33             : this(false, true)
34         {
35         }
36
37
38         public UnicodeEncoding(bool bigEndian, bool byteOrderMark)
39             : this(bigEndian, byteOrderMark, false)
40         {
41         }
42
43
44         public UnicodeEncoding(bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes)
45             : base(bigEndian ? 1201 : 1200)  //Set the data item.
46         {
47             this.isThrowException = throwOnInvalidBytes;
48             this.bigEndian = bigEndian;
49             this.byteOrderMark = byteOrderMark;
50
51             // Encoding constructor already did this, but it'll be wrong if we're throwing exceptions
52             if (this.isThrowException)
53                 SetDefaultFallbacks();
54         }
55
56         internal override void SetDefaultFallbacks()
57         {
58             // For UTF-X encodings, we use a replacement fallback with an empty string
59             if (this.isThrowException)
60             {
61                 this.encoderFallback = EncoderFallback.ExceptionFallback;
62                 this.decoderFallback = DecoderFallback.ExceptionFallback;
63             }
64             else
65             {
66                 this.encoderFallback = new EncoderReplacementFallback("\xFFFD");
67                 this.decoderFallback = new DecoderReplacementFallback("\xFFFD");
68             }
69         }
70
71         // The following methods are copied from EncodingNLS.cs.
72         // Unfortunately EncodingNLS.cs is internal and we're public, so we have to re-implement them here.
73         // These should be kept in sync for the following classes:
74         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
75         //
76
77         // Returns the number of bytes required to encode a range of characters in
78         // a character array.
79         //
80         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
81         // So if you fix this, fix the others.  Currently those include:
82         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
83         // parent method is safe
84
85         public override unsafe int GetByteCount(char[] chars, int index, int count)
86         {
87             // Validate input parameters
88             if (chars == null)
89                 throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
90
91             if (index < 0 || count < 0)
92                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
93
94             if (chars.Length - index < count)
95                 throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
96             Contract.EndContractBlock();
97
98             // If no input, return 0, avoid fixed empty array problem
99             if (count == 0)
100                 return 0;
101
102             // Just call the pointer version
103             fixed (char* pChars = chars)
104                 return GetByteCount(pChars + index, count, null);
105         }
106
107         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
108         // So if you fix this, fix the others.  Currently those include:
109         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
110         // parent method is safe
111
112         public override unsafe int GetByteCount(String s)
113         {
114             // Validate input
115             if (s==null)
116                 throw new ArgumentNullException("s");
117             Contract.EndContractBlock();
118
119             fixed (char* pChars = s)
120                 return GetByteCount(pChars, s.Length, null);
121         }
122
123         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
124         // So if you fix this, fix the others.  Currently those include:
125         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
126
127         [CLSCompliant(false)]
128         public override unsafe int GetByteCount(char* chars, int count)
129         {
130             // Validate Parameters
131             if (chars == null)
132                 throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
133
134             if (count < 0)
135                 throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
136             Contract.EndContractBlock();
137
138             // Call it with empty encoder
139             return GetByteCount(chars, count, null);
140         }
141
142         // Parent method is safe.
143         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
144         // So if you fix this, fix the others.  Currently those include:
145         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
146
147         public override unsafe int GetBytes(String s, int charIndex, int charCount,
148                                               byte[] bytes, int byteIndex)
149         {
150             if (s == null || bytes == null)
151                 throw new ArgumentNullException((s == null ? "s" : "bytes"), SR.ArgumentNull_Array);
152
153             if (charIndex < 0 || charCount < 0)
154                 throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
155
156             if (s.Length - charIndex < charCount)
157                 throw new ArgumentOutOfRangeException("s", SR.ArgumentOutOfRange_IndexCount);
158
159             if (byteIndex < 0 || byteIndex > bytes.Length)
160                 throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
161             Contract.EndContractBlock();
162
163             int byteCount = bytes.Length - byteIndex;
164
165             // Fixed doesn't like 0 length arrays.
166             if (bytes.Length == 0)
167                 bytes = new byte[1];
168
169             fixed (char* pChars = s) fixed (byte* pBytes = &bytes[0])
170                 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
171         }
172
173         // Encodes a range of characters in a character array into a range of bytes
174         // in a byte array. An exception occurs if the byte array is not large
175         // enough to hold the complete encoding of the characters. The
176         // GetByteCount method can be used to determine the exact number of
177         // bytes that will be produced for a given range of characters.
178         // Alternatively, the GetMaxByteCount method can be used to
179         // determine the maximum number of bytes that will be produced for a given
180         // number of characters, regardless of the actual character values.
181         //
182         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
183         // So if you fix this, fix the others.  Currently those include:
184         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
185         // parent method is safe
186
187         public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
188                                                byte[] bytes, int byteIndex)
189         {
190             // Validate parameters
191             if (chars == null || bytes == null)
192                 throw new ArgumentNullException((chars == null ? "chars" : "bytes"), SR.ArgumentNull_Array);
193
194             if (charIndex < 0 || charCount < 0)
195                 throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
196
197             if (chars.Length - charIndex < charCount)
198                 throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
199
200             if (byteIndex < 0 || byteIndex > bytes.Length)
201                 throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
202             Contract.EndContractBlock();
203
204             // If nothing to encode return 0, avoid fixed problem
205             if (charCount == 0)
206                 return 0;
207
208             // Just call pointer version
209             int byteCount = bytes.Length - byteIndex;
210
211             // Fixed doesn't like 0 length arrays.
212             if (bytes.Length == 0)
213                 bytes = new byte[1];
214
215             fixed (char* pChars = chars) fixed (byte* pBytes = &bytes[0])
216                 // Remember that byteCount is # to decode, not size of array.
217                 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
218         }
219
220         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
221         // So if you fix this, fix the others.  Currently those include:
222         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
223
224         [CLSCompliant(false)]
225         public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
226         {
227             // Validate Parameters
228             if (bytes == null || chars == null)
229                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
230
231             if (charCount < 0 || byteCount < 0)
232                 throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
233             Contract.EndContractBlock();
234
235             return GetBytes(chars, charCount, bytes, byteCount, null);
236         }
237
238         // Returns the number of characters produced by decoding a range of bytes
239         // in a byte array.
240         //
241         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
242         // So if you fix this, fix the others.  Currently those include:
243         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
244         // parent method is safe
245
246         public override unsafe int GetCharCount(byte[] bytes, int index, int count)
247         {
248             // Validate Parameters
249             if (bytes == null)
250                 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
251
252             if (index < 0 || count < 0)
253                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
254
255             if (bytes.Length - index < count)
256                 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
257             Contract.EndContractBlock();
258
259             // If no input just return 0, fixed doesn't like 0 length arrays
260             if (count == 0)
261                 return 0;
262
263             // Just call pointer version
264             fixed (byte* pBytes = bytes)
265                 return GetCharCount(pBytes + index, count, null);
266         }
267
268         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
269         // So if you fix this, fix the others.  Currently those include:
270         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
271
272         [CLSCompliant(false)]
273         public override unsafe int GetCharCount(byte* bytes, int count)
274         {
275             // Validate Parameters
276             if (bytes == null)
277                 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
278
279             if (count < 0)
280                 throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
281             Contract.EndContractBlock();
282
283             return GetCharCount(bytes, count, null);
284         }
285
286         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
287         // So if you fix this, fix the others.  Currently those include:
288         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
289         // parent method is safe
290
291         public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
292                                               char[] chars, int charIndex)
293         {
294             // Validate Parameters
295             if (bytes == null || chars == null)
296                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
297
298             if (byteIndex < 0 || byteCount < 0)
299                 throw new ArgumentOutOfRangeException((byteIndex < 0 ? "byteIndex" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
300
301             if ( bytes.Length - byteIndex < byteCount)
302                 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
303
304             if (charIndex < 0 || charIndex > chars.Length)
305                 throw new ArgumentOutOfRangeException("charIndex", SR.ArgumentOutOfRange_Index);
306             Contract.EndContractBlock();
307
308             // If no input, return 0 & avoid fixed problem
309             if (byteCount == 0)
310                 return 0;
311
312             // Just call pointer version
313             int charCount = chars.Length - charIndex;
314
315             // Fixed doesn't like 0 length arrays.
316             if (chars.Length == 0)
317                 chars = new char[1];
318
319             fixed (byte* pBytes = bytes) fixed (char* pChars = &chars[0])
320                 // Remember that charCount is # to decode, not size of array
321                 return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
322         }
323
324         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
325         // So if you fix this, fix the others.  Currently those include:
326         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
327
328         [CLSCompliant(false)]
329         public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
330         {
331             // Validate Parameters
332             if (bytes == null || chars == null)
333                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
334
335             if (charCount < 0 || byteCount < 0)
336                 throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
337             Contract.EndContractBlock();
338
339             return GetChars(bytes, byteCount, chars, charCount, null);
340         }
341
342         // Returns a string containing the decoded representation of a range of
343         // bytes in a byte array.
344         //
345         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
346         // So if you fix this, fix the others.  Currently those include:
347         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
348         // parent method is safe
349
350         public override unsafe string GetString(byte[] bytes, int index, int count)
351         {
352             // Validate Parameters
353             if (bytes == null)
354                 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
355
356             if (index < 0 || count < 0)
357                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
358
359             if (bytes.Length - index < count)
360                 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
361             Contract.EndContractBlock();
362
363             // Avoid problems with empty input buffer
364             if (count == 0) return String.Empty;
365
366             fixed (byte* pBytes = bytes)
367                 return String.CreateStringFromEncoding(
368                     pBytes + index, count, this);
369         }
370
371         //
372         // End of standard methods copied from EncodingNLS.cs
373         //
374
375         internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
376         {
377             Debug.Assert(chars != null, "[UnicodeEncoding.GetByteCount]chars!=null");
378             Debug.Assert(count >= 0, "[UnicodeEncoding.GetByteCount]count >=0");
379
380             // Start by assuming each char gets 2 bytes
381             int byteCount = count << 1;
382
383             // Check for overflow in byteCount
384             // (If they were all invalid chars, this would actually be wrong,
385             // but that's a ridiculously large # so we're not concerned about that case)
386             if (byteCount < 0)
387                 throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_GetByteCountOverflow);
388
389             char* charStart = chars;
390             char* charEnd = chars + count;
391             char charLeftOver = (char)0;
392
393             bool wasHereBefore = false;
394
395             // Need -1 to check 2 at a time.  If we have an even #, longChars will go
396             // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longChars
397             // will go from longEnd - 1 long to longEnd. (Might not get to use this)
398             ulong* longEnd = (ulong*)(charEnd - 3);
399
400             // For fallback we may need a fallback buffer
401             EncoderFallbackBuffer fallbackBuffer = null;
402             char* charsForFallback;
403
404             if (encoder != null)
405             {
406                 charLeftOver = encoder._charLeftOver;
407
408                 // Assume extra bytes to encode charLeftOver if it existed
409                 if (charLeftOver > 0)
410                     byteCount += 2;
411
412                 // We mustn't have left over fallback data when counting
413                 if (encoder.InternalHasFallbackBuffer)
414                 {
415                     fallbackBuffer = encoder.FallbackBuffer;
416                     if (fallbackBuffer.Remaining > 0)
417                         throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
418
419                     // Set our internal fallback interesting things.
420                     fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
421                 }
422             }
423
424             char ch;
425         TryAgain:
426
427             while (((ch = (fallbackBuffer == null) ? (char)0 : fallbackBuffer.InternalGetNextChar()) != 0) || chars < charEnd)
428             {
429                 // First unwind any fallback
430                 if (ch == 0)
431                 {
432                     // No fallback, maybe we can do it fast
433 #if !NO_FAST_UNICODE_LOOP
434 #if BIGENDIAN       // If endianess is backwards then each pair of bytes would be backwards.
435                     if ( bigEndian &&
436 #else
437                     if (!bigEndian &&
438 #endif // BIGENDIAN
439
440 #if BIT64           // 64 bit CPU needs to be long aligned for this to work.
441                           charLeftOver == 0 && (unchecked((long)chars) & 7) == 0)
442 #else
443                           charLeftOver == 0 && (unchecked((int)chars) & 3) == 0)
444 #endif
445                     {
446                         // Need new char* so we can check 4 at a time
447                         ulong* longChars = (ulong*)chars;
448
449                         while (longChars < longEnd)
450                         {
451                             // See if we potentially have surrogates (0x8000 bit set)
452                             // (We're either big endian on a big endian machine or little endian on 
453                             // a little endian machine so that'll work)                            
454                             if ((0x8000800080008000 & *longChars) != 0)
455                             {
456                                 // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
457                                 // 5 bits looks like 11011, then its a high or low surrogate.
458                                 // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
459                                 // Note that we expect BMP characters to be more common than surrogates
460                                 // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
461                                 ulong uTemp = (0xf800f800f800f800 & *longChars) ^ 0xd800d800d800d800;
462
463                                 // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
464                                 // but no clue if they're high or low.
465                                 // If each of the 4 characters are non-zero, then none are surrogates.
466                                 if ((uTemp & 0xFFFF000000000000) == 0 ||
467                                     (uTemp & 0x0000FFFF00000000) == 0 ||
468                                     (uTemp & 0x00000000FFFF0000) == 0 ||
469                                     (uTemp & 0x000000000000FFFF) == 0)
470                                 {
471                                     // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
472                                     // or if there's 1 or 4 surrogates
473
474                                     // If they happen to be high/low/high/low, we may as well continue.  Check the next
475                                     // bit to see if its set (low) or not (high) in the right pattern
476 #if BIGENDIAN
477                                     if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xd800dc00d800dc00) != 0)
478 #else
479                                     if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xdc00d800dc00d800) != 0)
480 #endif
481                                     {
482                                         // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
483                                         // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
484
485                                         // Drop out to the slow loop to resolve the surrogates
486                                         break;
487                                     }
488                                     // else they are all surrogates in High/Low/High/Low order, so we can use them.
489                                 }
490                                 // else none are surrogates, so we can use them.
491                             }
492                             // else all < 0x8000 so we can use them                            
493
494                             // We already counted these four chars, go to next long.
495                             longChars++;
496                         }
497
498                         chars = (char*)longChars;
499
500                         if (chars >= charEnd)
501                             break;
502                     }
503 #endif // !NO_FAST_UNICODE_LOOP
504
505                     // No fallback, just get next char
506                     ch = *chars;
507                     chars++;
508                 }
509                 else
510                 {
511                     // We weren't preallocating fallback space.
512                     byteCount += 2;
513                 }
514
515                 // Check for high or low surrogates
516                 if (ch >= 0xd800 && ch <= 0xdfff)
517                 {
518                     // Was it a high surrogate?
519                     if (ch <= 0xdbff)
520                     {
521                         // Its a high surrogate, if we already had a high surrogate do its fallback
522                         if (charLeftOver > 0)
523                         {
524                             // Unwind the current character, this should be safe because we
525                             // don't have leftover data in the fallback, so chars must have
526                             // advanced already.
527                             Debug.Assert(chars > charStart,
528                                 "[UnicodeEncoding.GetByteCount]Expected chars to have advanced in unexpected high surrogate");
529                             chars--;
530
531                             // If previous high surrogate deallocate 2 bytes
532                             byteCount -= 2;
533
534                             // Fallback the previous surrogate
535                             // Need to initialize fallback buffer?
536                             if (fallbackBuffer == null)
537                             {
538                                 if (encoder == null)
539                                     fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
540                                 else
541                                     fallbackBuffer = encoder.FallbackBuffer;
542
543                                 // Set our internal fallback interesting things.
544                                 fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
545                             }
546
547                             charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
548                             fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
549                             chars = charsForFallback;
550
551                             // Now no high surrogate left over
552                             charLeftOver = (char)0;
553                             continue;
554                         }
555
556                         // Remember this high surrogate
557                         charLeftOver = ch;
558                         continue;
559                     }
560
561
562                     // Its a low surrogate
563                     if (charLeftOver == 0)
564                     {
565                         // Expected a previous high surrogate.
566                         // Don't count this one (we'll count its fallback if necessary)
567                         byteCount -= 2;
568
569                         // fallback this one
570                         // Need to initialize fallback buffer?
571                         if (fallbackBuffer == null)
572                         {
573                             if (encoder == null)
574                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
575                             else
576                                 fallbackBuffer = encoder.FallbackBuffer;
577
578                             // Set our internal fallback interesting things.
579                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
580                         }
581                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
582                         fallbackBuffer.InternalFallback(ch, ref charsForFallback);
583                         chars = charsForFallback;
584                         continue;
585                     }
586
587                     // Valid surrogate pair, add our charLeftOver
588                     charLeftOver = (char)0;
589                     continue;
590                 }
591                 else if (charLeftOver > 0)
592                 {
593                     // Expected a low surrogate, but this char is normal
594
595                     // Rewind the current character, fallback previous character.
596                     // this should be safe because we don't have leftover data in the
597                     // fallback, so chars must have advanced already.
598                     Debug.Assert(chars > charStart,
599                         "[UnicodeEncoding.GetByteCount]Expected chars to have advanced when expected low surrogate");
600                     chars--;
601
602                     // fallback previous chars
603                     // Need to initialize fallback buffer?
604                     if (fallbackBuffer == null)
605                     {
606                         if (encoder == null)
607                             fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
608                         else
609                             fallbackBuffer = encoder.FallbackBuffer;
610
611                         // Set our internal fallback interesting things.
612                         fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
613                     }
614                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
615                     fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
616                     chars = charsForFallback;
617
618                     // Ignore charLeftOver or throw
619                     byteCount -= 2;
620                     charLeftOver = (char)0;
621
622                     continue;
623                 }
624
625                 // Ok we had something to add (already counted)
626             }
627
628             // Don't allocate space for left over char
629             if (charLeftOver > 0)
630             {
631                 byteCount -= 2;
632
633                 // If we have to flush, stick it in fallback and try again
634                 if (encoder == null || encoder.MustFlush)
635                 {
636                     if (wasHereBefore)
637                     {
638                         // Throw it, using our complete character
639                         throw new ArgumentException(
640                             SR.Format(SR.Argument_RecursiveFallback, charLeftOver), nameof(chars));
641                     }
642                     else
643                     {
644                         // Need to initialize fallback buffer?
645                         if (fallbackBuffer == null)
646                         {
647                             if (encoder == null)
648                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
649                             else
650                                 fallbackBuffer = encoder.FallbackBuffer;
651
652                             // Set our internal fallback interesting things.
653                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
654                         }
655                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
656                         fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
657                         chars = charsForFallback;
658                         charLeftOver = (char)0;
659                         wasHereBefore = true;
660                         goto TryAgain;
661                     }
662                 }
663             }
664
665             // Shouldn't have anything in fallback buffer for GetByteCount
666             // (don't have to check _throwOnOverflow for count)
667             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
668                 "[UnicodeEncoding.GetByteCount]Expected empty fallback buffer at end");
669
670             // Don't remember fallbackBuffer.encoder for counting
671             return byteCount;
672         }
673
674         internal override unsafe int GetBytes(char* chars, int charCount,
675                                                 byte* bytes, int byteCount, EncoderNLS encoder)
676         {
677             Debug.Assert(chars != null, "[UnicodeEncoding.GetBytes]chars!=null");
678             Debug.Assert(byteCount >= 0, "[UnicodeEncoding.GetBytes]byteCount >=0");
679             Debug.Assert(charCount >= 0, "[UnicodeEncoding.GetBytes]charCount >=0");
680             Debug.Assert(bytes != null, "[UnicodeEncoding.GetBytes]bytes!=null");
681
682             char charLeftOver = (char)0;
683             char ch;
684             bool wasHereBefore = false;
685
686
687             byte* byteEnd = bytes + byteCount;
688             char* charEnd = chars + charCount;
689             byte* byteStart = bytes;
690             char* charStart = chars;
691
692             // For fallback we may need a fallback buffer
693             EncoderFallbackBuffer fallbackBuffer = null;
694             char* charsForFallback;
695
696             // Get our encoder, but don't clear it yet.
697             if (encoder != null)
698             {
699                 charLeftOver = encoder._charLeftOver;
700
701                 // We mustn't have left over fallback data when counting
702                 if (encoder.InternalHasFallbackBuffer)
703                 {
704                     // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary
705                     fallbackBuffer = encoder.FallbackBuffer;
706                     if (fallbackBuffer.Remaining > 0 && encoder._throwOnOverflow)
707                         throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
708
709                     // Set our internal fallback interesting things.
710                     fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
711                 }
712             }
713
714         TryAgain:
715             while (((ch = (fallbackBuffer == null) ?
716                         (char)0 : fallbackBuffer.InternalGetNextChar()) != 0) ||
717                     chars < charEnd)
718             {
719                 // First unwind any fallback
720                 if (ch == 0)
721                 {
722                     // No fallback, maybe we can do it fast
723 #if !NO_FAST_UNICODE_LOOP
724 #if BIGENDIAN           // If endianess is backwards then each pair of bytes would be backwards.
725                     if ( bigEndian &&
726 #else
727                     if (!bigEndian &&
728 #endif // BIGENDIAN
729 #if BIT64           // 64 bit CPU needs to be long aligned for this to work, 32 bit CPU needs to be 32 bit aligned
730                         (unchecked((long)chars) & 7) == 0 && (unchecked((long)bytes) & 7) == 0 &&
731 #else
732                         (unchecked((int)chars) & 3) == 0 && (unchecked((int)bytes) & 3) == 0 &&
733 #endif // BIT64
734                         charLeftOver == 0)
735                     {
736                         // Need -1 to check 2 at a time.  If we have an even #, longChars will go
737                         // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longChars
738                         // will go from longEnd - 1 long to longEnd. (Might not get to use this)
739                         // We can only go iCount units (limited by shorter of char or byte buffers.
740                         ulong* longEnd = (ulong*)(chars - 3 +
741                                                   (((byteEnd - bytes) >> 1 < charEnd - chars) ?
742                                                     (byteEnd - bytes) >> 1 : charEnd - chars));
743
744                         // Need new char* so we can check 4 at a time
745                         ulong* longChars = (ulong*)chars;
746                         ulong* longBytes = (ulong*)bytes;
747
748                         while (longChars < longEnd)
749                         {
750                             // See if we potentially have surrogates (0x8000 bit set)
751                             // (We're either big endian on a big endian machine or little endian on 
752                             // a little endian machine so that'll work)                            
753                             if ((0x8000800080008000 & *longChars) != 0)
754                             {
755                                 // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
756                                 // 5 bits looks like 11011, then its a high or low surrogate.
757                                 // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
758                                 // Note that we expect BMP characters to be more common than surrogates
759                                 // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
760                                 ulong uTemp = (0xf800f800f800f800 & *longChars) ^ 0xd800d800d800d800;
761
762                                 // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
763                                 // but no clue if they're high or low.
764                                 // If each of the 4 characters are non-zero, then none are surrogates.
765                                 if ((uTemp & 0xFFFF000000000000) == 0 ||
766                                     (uTemp & 0x0000FFFF00000000) == 0 ||
767                                     (uTemp & 0x00000000FFFF0000) == 0 ||
768                                     (uTemp & 0x000000000000FFFF) == 0)
769                                 {
770                                     // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
771                                     // or if there's 1 or 4 surrogates
772
773                                     // If they happen to be high/low/high/low, we may as well continue.  Check the next
774                                     // bit to see if its set (low) or not (high) in the right pattern
775 #if BIGENDIAN
776                                     if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xd800dc00d800dc00) != 0)
777 #else
778                                     if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xdc00d800dc00d800) != 0)
779 #endif
780                                     {
781                                         // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
782                                         // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
783
784                                         // Drop out to the slow loop to resolve the surrogates
785                                         break;
786                                     }
787                                     // else they are all surrogates in High/Low/High/Low order, so we can use them.
788                                 }
789                                 // else none are surrogates, so we can use them.
790                             }
791                             // else all < 0x8000 so we can use them
792
793                             // We can use these 4 chars.
794                             *longBytes = *longChars;
795                             longChars++;
796                             longBytes++;
797                         }
798
799                         chars = (char*)longChars;
800                         bytes = (byte*)longBytes;
801
802                         if (chars >= charEnd)
803                             break;
804                     }
805                     // Not aligned, but maybe we can still be somewhat faster
806                     // Also somehow this optimizes the above loop?  It seems to cause something above
807                     // to get enregistered, but I haven't figured out how to make that happen without this loop.
808                     else if ((charLeftOver == 0) &&
809 #if BIGENDIAN
810                         bigEndian &&
811 #else
812                         !bigEndian &&
813 #endif // BIGENDIAN
814
815 #if BIT64
816                         (unchecked((long)chars) & 7) != (unchecked((long)bytes) & 7) &&  // Only do this if chars & bytes are out of line, otherwise faster loop will be faster next time
817 #else
818                         (unchecked((int)chars) & 3) != (unchecked((int)bytes) & 3) &&  // Only do this if chars & bytes are out of line, otherwise faster loop will be faster next time
819 #endif // BIT64
820                         (unchecked((int)(bytes)) & 1) == 0)
821                     {
822                         // # to use
823                         long iCount = ((byteEnd - bytes) >> 1 < charEnd - chars) ?
824                                        (byteEnd - bytes) >> 1 : charEnd - chars;
825
826                         // Need new char*
827                         char* charOut = ((char*)bytes);     // a char* for our output
828                         char* tempEnd = chars + iCount - 1; // Our end pointer
829
830                         while (chars < tempEnd)
831                         {
832                             if (*chars >= (char)0xd800 && *chars <= (char)0xdfff)
833                             {
834                                 // break for fallback for low surrogate
835                                 if (*chars >= 0xdc00)
836                                     break;
837
838                                 // break if next one's not a low surrogate (will do fallback)
839                                 if (*(chars + 1) < 0xdc00 || *(chars + 1) > 0xdfff)
840                                     break;
841
842                                 // They both exist, use them
843                             }
844                             // If 2nd char is surrogate & this one isn't then only add one
845                             else if (*(chars + 1) >= (char)0xd800 && *(chars + 1) <= 0xdfff)
846                             {
847                                 *charOut = *chars;
848                                 charOut++;
849                                 chars++;
850                                 continue;
851                             }
852
853                             *charOut = *chars;
854                             *(charOut + 1) = *(chars + 1);
855                             charOut += 2;
856                             chars += 2;
857                         }
858
859                         bytes = (byte*)charOut;
860
861                         if (chars >= charEnd)
862                             break;
863                     }
864 #endif // !NO_FAST_UNICODE_LOOP
865
866                     // No fallback, just get next char
867                     ch = *chars;
868                     chars++;
869                 }
870
871                 // Check for high or low surrogates
872                 if (ch >= 0xd800 && ch <= 0xdfff)
873                 {
874                     // Was it a high surrogate?
875                     if (ch <= 0xdbff)
876                     {
877                         // Its a high surrogate, see if we already had a high surrogate
878                         if (charLeftOver > 0)
879                         {
880                             // Unwind the current character, this should be safe because we
881                             // don't have leftover data in the fallback, so chars must have
882                             // advanced already.
883                             Debug.Assert(chars > charStart,
884                                 "[UnicodeEncoding.GetBytes]Expected chars to have advanced in unexpected high surrogate");
885                             chars--;
886
887                             // Fallback the previous surrogate
888                             // Might need to create our fallback buffer
889                             if (fallbackBuffer == null)
890                             {
891                                 if (encoder == null)
892                                     fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
893                                 else
894                                     fallbackBuffer = encoder.FallbackBuffer;
895
896                                 // Set our internal fallback interesting things.
897                                 fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
898                             }
899
900                             charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
901                             fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
902                             chars = charsForFallback;
903
904                             charLeftOver = (char)0;
905                             continue;
906                         }
907
908                         // Remember this high surrogate
909                         charLeftOver = ch;
910                         continue;
911                     }
912
913                     // Its a low surrogate
914                     if (charLeftOver == 0)
915                     {
916                         // We'll fall back this one
917                         // Might need to create our fallback buffer
918                         if (fallbackBuffer == null)
919                         {
920                             if (encoder == null)
921                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
922                             else
923                                 fallbackBuffer = encoder.FallbackBuffer;
924
925                             // Set our internal fallback interesting things.
926                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
927                         }
928
929                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
930                         fallbackBuffer.InternalFallback(ch, ref charsForFallback);
931                         chars = charsForFallback;
932                         continue;
933                     }
934
935                     // Valid surrogate pair, add our charLeftOver
936                     if (bytes + 3 >= byteEnd)
937                     {
938                         // Not enough room to add this surrogate pair
939                         if (fallbackBuffer != null && fallbackBuffer.bFallingBack)
940                         {
941                             // These must have both been from the fallbacks.
942                             // Both of these MUST have been from a fallback because if the 1st wasn't
943                             // from a fallback, then a high surrogate followed by an illegal char 
944                             // would've caused the high surrogate to fall back.  If a high surrogate
945                             // fell back, then it was consumed and both chars came from the fallback.
946                             fallbackBuffer.MovePrevious();                     // Didn't use either fallback surrogate
947                             fallbackBuffer.MovePrevious();
948                         }
949                         else
950                         {
951                             // If we don't have enough room, then either we should've advanced a while
952                             // or we should have bytes==byteStart and throw below
953                             Debug.Assert(chars > charStart + 1 || bytes == byteStart,
954                                 "[UnicodeEncoding.GetBytes]Expected chars to have when no room to add surrogate pair");
955                             chars -= 2;                                        // Didn't use either surrogate
956                         }
957                         ThrowBytesOverflow(encoder, bytes == byteStart);    // Throw maybe (if no bytes written)
958                         charLeftOver = (char)0;                             // we'll retry it later
959                         break;                                               // Didn't throw, but stop 'til next time.
960                     }
961
962                     if (bigEndian)
963                     {
964                         *(bytes++) = (byte)(charLeftOver >> 8);
965                         *(bytes++) = (byte)charLeftOver;
966                     }
967                     else
968                     {
969                         *(bytes++) = (byte)charLeftOver;
970                         *(bytes++) = (byte)(charLeftOver >> 8);
971                     }
972
973                     charLeftOver = (char)0;
974                 }
975                 else if (charLeftOver > 0)
976                 {
977                     // Expected a low surrogate, but this char is normal
978
979                     // Rewind the current character, fallback previous character.
980                     // this should be safe because we don't have leftover data in the
981                     // fallback, so chars must have advanced already.
982                     Debug.Assert(chars > charStart,
983                         "[UnicodeEncoding.GetBytes]Expected chars to have advanced after expecting low surrogate");
984                     chars--;
985
986                     // fallback previous chars
987                     // Might need to create our fallback buffer
988                     if (fallbackBuffer == null)
989                     {
990                         if (encoder == null)
991                             fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
992                         else
993                             fallbackBuffer = encoder.FallbackBuffer;
994
995                         // Set our internal fallback interesting things.
996                         fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
997                     }
998
999                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1000                     fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
1001                     chars = charsForFallback;
1002
1003                     // Ignore charLeftOver or throw
1004                     charLeftOver = (char)0;
1005                     continue;
1006                 }
1007
1008                 // Ok, we have a char to add
1009                 if (bytes + 1 >= byteEnd)
1010                 {
1011                     // Couldn't add this char
1012                     if (fallbackBuffer != null && fallbackBuffer.bFallingBack)
1013                         fallbackBuffer.MovePrevious();                     // Not using this fallback char
1014                     else
1015                     {
1016                         // Lonely charLeftOver (from previous call) would've been caught up above,
1017                         // so this must be a case where we've already read an input char.
1018                         Debug.Assert(chars > charStart,
1019                             "[UnicodeEncoding.GetBytes]Expected chars to have advanced for failed fallback");
1020                         chars--;                                         // Not using this char
1021                     }
1022                     ThrowBytesOverflow(encoder, bytes == byteStart);    // Throw maybe (if no bytes written)
1023                     break;                                               // didn't throw, just stop
1024                 }
1025
1026                 if (bigEndian)
1027                 {
1028                     *(bytes++) = (byte)(ch >> 8);
1029                     *(bytes++) = (byte)ch;
1030                 }
1031                 else
1032                 {
1033                     *(bytes++) = (byte)ch;
1034                     *(bytes++) = (byte)(ch >> 8);
1035                 }
1036             }
1037
1038             // Don't allocate space for left over char
1039             if (charLeftOver > 0)
1040             {
1041                 // If we aren't flushing we need to fall this back
1042                 if (encoder == null || encoder.MustFlush)
1043                 {
1044                     if (wasHereBefore)
1045                     {
1046                         // Throw it, using our complete character
1047                         throw new ArgumentException(
1048                             SR.Format(SR.Argument_RecursiveFallback, charLeftOver), nameof(chars));
1049                     }
1050                     else
1051                     {
1052                         // If we have to flush, stick it in fallback and try again
1053                         // Might need to create our fallback buffer
1054                         if (fallbackBuffer == null)
1055                         {
1056                             if (encoder == null)
1057                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
1058                             else
1059                                 fallbackBuffer = encoder.FallbackBuffer;
1060
1061                             // Set our internal fallback interesting things.
1062                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
1063                         }
1064
1065                         // If we're not flushing, that'll remember the left over character.
1066                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1067                         fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
1068                         chars = charsForFallback;
1069
1070                         charLeftOver = (char)0;
1071                         wasHereBefore = true;
1072                         goto TryAgain;
1073                     }
1074                 }
1075             }
1076
1077             // Not flushing, remember it in the encoder
1078             if (encoder != null)
1079             {
1080                 encoder._charLeftOver = charLeftOver;
1081                 encoder._charsUsed = (int)(chars - charStart);
1082             }
1083
1084             // Remember charLeftOver if we must, or clear it if we're flushing
1085             // (charLeftOver should be 0 if we're flushing)
1086             Debug.Assert((encoder != null && !encoder.MustFlush) || charLeftOver == (char)0,
1087                 "[UnicodeEncoding.GetBytes] Expected no left over characters if flushing");
1088
1089             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0 ||
1090                 encoder == null || !encoder._throwOnOverflow,
1091                 "[UnicodeEncoding.GetBytes]Expected empty fallback buffer if not converting");
1092
1093             // We used to copy it fast, but this doesn't check for surrogates
1094             // System.IO.__UnmanagedMemoryStream.memcpyimpl(bytes, (byte*)chars, usedByteCount);
1095
1096             return (int)(bytes - byteStart);
1097         }
1098
1099         internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
1100         {
1101             Debug.Assert(bytes != null, "[UnicodeEncoding.GetCharCount]bytes!=null");
1102             Debug.Assert(count >= 0, "[UnicodeEncoding.GetCharCount]count >=0");
1103
1104             UnicodeEncoding.Decoder decoder = (UnicodeEncoding.Decoder)baseDecoder;
1105
1106             byte* byteEnd = bytes + count;
1107             byte* byteStart = bytes;
1108
1109             // Need last vars
1110             int lastByte = -1;
1111             char lastChar = (char)0;
1112
1113             // Start by assuming same # of chars as bytes
1114             int charCount = count >> 1;
1115
1116             // Need -1 to check 2 at a time.  If we have an even #, longBytes will go
1117             // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longBytes
1118             // will go from longEnd - 1 long to longEnd. (Might not get to use this)
1119             ulong* longEnd = (ulong*)(byteEnd - 7);
1120
1121             // For fallback we may need a fallback buffer
1122             DecoderFallbackBuffer fallbackBuffer = null;
1123
1124             if (decoder != null)
1125             {
1126                 lastByte = decoder.lastByte;
1127                 lastChar = decoder.lastChar;
1128
1129                 // Assume extra char if last char was around
1130                 if (lastChar > 0)
1131                     charCount++;
1132
1133                 // Assume extra char if extra last byte makes up odd # of input bytes
1134                 if (lastByte >= 0 && (count & 1) == 1)
1135                 {
1136                     charCount++;
1137                 }
1138
1139                 // Shouldn't have anything in fallback buffer for GetCharCount
1140                 // (don't have to check _throwOnOverflow for count)
1141                 Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
1142                     "[UnicodeEncoding.GetCharCount]Expected empty fallback buffer at start");
1143             }
1144
1145             while (bytes < byteEnd)
1146             {
1147                 // If we're aligned then maybe we can do it fast
1148                 // That'll hurt if we're unaligned because we'll always test but never be aligned
1149 #if !NO_FAST_UNICODE_LOOP
1150 #if BIGENDIAN
1151                 if (bigEndian &&
1152 #else // BIGENDIAN
1153                 if (!bigEndian &&
1154 #endif // BIGENDIAN
1155 #if BIT64 // win64 has to be long aligned
1156                     (unchecked((long)bytes) & 7) == 0 &&
1157 #else
1158                     (unchecked((int)bytes) & 3) == 0 &&
1159 #endif // BIT64
1160                     lastByte == -1 && lastChar == 0)
1161                 {
1162                     // Need new char* so we can check 4 at a time
1163                     ulong* longBytes = (ulong*)bytes;
1164
1165                     while (longBytes < longEnd)
1166                     {
1167                         // See if we potentially have surrogates (0x8000 bit set)
1168                         // (We're either big endian on a big endian machine or little endian on 
1169                         // a little endian machine so that'll work)
1170                         if ((0x8000800080008000 & *longBytes) != 0)
1171                         {
1172                             // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
1173                             // 5 bits looks like 11011, then its a high or low surrogate.
1174                             // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
1175                             // Note that we expect BMP characters to be more common than surrogates
1176                             // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
1177                             ulong uTemp = (0xf800f800f800f800 & *longBytes) ^ 0xd800d800d800d800;
1178
1179                             // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
1180                             // but no clue if they're high or low.
1181                             // If each of the 4 characters are non-zero, then none are surrogates.
1182                             if ((uTemp & 0xFFFF000000000000) == 0 ||
1183                                 (uTemp & 0x0000FFFF00000000) == 0 ||
1184                                 (uTemp & 0x00000000FFFF0000) == 0 ||
1185                                 (uTemp & 0x000000000000FFFF) == 0)
1186                             {
1187                                 // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
1188                                 // or if there's 1 or 4 surrogates
1189
1190                                 // If they happen to be high/low/high/low, we may as well continue.  Check the next
1191                                 // bit to see if its set (low) or not (high) in the right pattern
1192 #if BIGENDIAN
1193                                 if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xd800dc00d800dc00) != 0)
1194 #else
1195                                 if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xdc00d800dc00d800) != 0)
1196 #endif
1197                                 {
1198                                     // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
1199                                     // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
1200
1201                                     // Drop out to the slow loop to resolve the surrogates
1202                                     break;
1203                                 }
1204                                 // else they are all surrogates in High/Low/High/Low order, so we can use them.
1205                             }
1206                             // else none are surrogates, so we can use them.
1207                         }
1208                         // else all < 0x8000 so we can use them
1209
1210                         // We can use these 4 chars.
1211                         longBytes++;
1212                     }
1213
1214                     bytes = (byte*)longBytes;
1215
1216                     if (bytes >= byteEnd)
1217                         break;
1218                 }
1219 #endif // !NO_FAST_UNICODE_LOOP
1220
1221                 // Get 1st byte
1222                 if (lastByte < 0)
1223                 {
1224                     lastByte = *bytes++;
1225                     if (bytes >= byteEnd) break;
1226                 }
1227
1228                 // Get full char
1229                 char ch;
1230                 if (bigEndian)
1231                 {
1232                     ch = (char)(lastByte << 8 | *(bytes++));
1233                 }
1234                 else
1235                 {
1236                     ch = (char)(*(bytes++) << 8 | lastByte);
1237                 }
1238                 lastByte = -1;
1239
1240                 // See if the char's valid
1241                 if (ch >= 0xd800 && ch <= 0xdfff)
1242                 {
1243                     // Was it a high surrogate?
1244                     if (ch <= 0xdbff)
1245                     {
1246                         // Its a high surrogate, if we had one then do fallback for previous one
1247                         if (lastChar > 0)
1248                         {
1249                             // Ignore previous bad high surrogate
1250                             charCount--;
1251
1252                             // Get fallback for previous high surrogate
1253                             // Note we have to reconstruct bytes because some may have been in decoder
1254                             byte[] byteBuffer = null;
1255                             if (bigEndian)
1256                             {
1257                                 byteBuffer = new byte[]
1258                                     { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1259                             }
1260                             else
1261                             {
1262                                 byteBuffer = new byte[]
1263                                     { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1264                             }
1265
1266                             if (fallbackBuffer == null)
1267                             {
1268                                 if (decoder == null)
1269                                     fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1270                                 else
1271                                     fallbackBuffer = decoder.FallbackBuffer;
1272
1273                                 // Set our internal fallback interesting things.
1274                                 fallbackBuffer.InternalInitialize(byteStart, null);
1275                             }
1276
1277                             // Get fallback.
1278                             charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1279                         }
1280
1281                         // Ignore the last one which fell back already,
1282                         // and remember the new high surrogate
1283                         lastChar = ch;
1284                         continue;
1285                     }
1286
1287                     // Its a low surrogate
1288                     if (lastChar == 0)
1289                     {
1290                         // Expected a previous high surrogate
1291                         charCount--;
1292
1293                         // Get fallback for this low surrogate
1294                         // Note we have to reconstruct bytes because some may have been in decoder
1295                         byte[] byteBuffer = null;
1296                         if (bigEndian)
1297                         {
1298                             byteBuffer = new byte[]
1299                                 { unchecked((byte)(ch >> 8)), unchecked((byte)ch) };
1300                         }
1301                         else
1302                         {
1303                             byteBuffer = new byte[]
1304                                 { unchecked((byte)ch), unchecked((byte)(ch >> 8)) };
1305                         }
1306
1307                         if (fallbackBuffer == null)
1308                         {
1309                             if (decoder == null)
1310                                 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1311                             else
1312                                 fallbackBuffer = decoder.FallbackBuffer;
1313
1314                             // Set our internal fallback interesting things.
1315                             fallbackBuffer.InternalInitialize(byteStart, null);
1316                         }
1317
1318                         charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1319
1320                         // Ignore this one (we already did its fallback)
1321                         continue;
1322                     }
1323
1324                     // Valid surrogate pair, already counted.
1325                     lastChar = (char)0;
1326                 }
1327                 else if (lastChar > 0)
1328                 {
1329                     // Had a high surrogate, expected a low surrogate
1330                     // Un-count the last high surrogate
1331                     charCount--;
1332
1333                     // fall back the high surrogate.
1334                     byte[] byteBuffer = null;
1335                     if (bigEndian)
1336                     {
1337                         byteBuffer = new byte[]
1338                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1339                     }
1340                     else
1341                     {
1342                         byteBuffer = new byte[]
1343                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1344                     }
1345
1346                     if (fallbackBuffer == null)
1347                     {
1348                         if (decoder == null)
1349                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1350                         else
1351                             fallbackBuffer = decoder.FallbackBuffer;
1352
1353                         // Set our internal fallback interesting things.
1354                         fallbackBuffer.InternalInitialize(byteStart, null);
1355                     }
1356
1357                     // Already subtracted high surrogate
1358                     charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1359
1360                     // Not left over now, clear previous high surrogate and continue to add current char
1361                     lastChar = (char)0;
1362                 }
1363
1364                 // Valid char, already counted
1365             }
1366
1367             // Extra space if we can't use decoder
1368             if (decoder == null || decoder.MustFlush)
1369             {
1370                 if (lastChar > 0)
1371                 {
1372                     // No hanging high surrogates allowed, do fallback and remove count for it
1373                     charCount--;
1374                     byte[] byteBuffer = null;
1375                     if (bigEndian)
1376                     {
1377                         byteBuffer = new byte[]
1378                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1379                     }
1380                     else
1381                     {
1382                         byteBuffer = new byte[]
1383                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1384                     }
1385
1386                     if (fallbackBuffer == null)
1387                     {
1388                         if (decoder == null)
1389                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1390                         else
1391                             fallbackBuffer = decoder.FallbackBuffer;
1392
1393                         // Set our internal fallback interesting things.
1394                         fallbackBuffer.InternalInitialize(byteStart, null);
1395                     }
1396
1397                     charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1398
1399                     lastChar = (char)0;
1400                 }
1401
1402                 if (lastByte >= 0)
1403                 {
1404                     if (fallbackBuffer == null)
1405                     {
1406                         if (decoder == null)
1407                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1408                         else
1409                             fallbackBuffer = decoder.FallbackBuffer;
1410
1411                         // Set our internal fallback interesting things.
1412                         fallbackBuffer.InternalInitialize(byteStart, null);
1413                     }
1414
1415                     // No hanging odd bytes allowed if must flush
1416                     charCount += fallbackBuffer.InternalFallback(new byte[] { unchecked((byte)lastByte) }, bytes);
1417                     lastByte = -1;
1418                 }
1419             }
1420
1421             // If we had a high surrogate left over, we can't count it
1422             if (lastChar > 0)
1423                 charCount--;
1424
1425             // Shouldn't have anything in fallback buffer for GetCharCount
1426             // (don't have to check _throwOnOverflow for count)
1427             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
1428                 "[UnicodeEncoding.GetCharCount]Expected empty fallback buffer at end");
1429
1430             return charCount;
1431         }
1432
1433         internal override unsafe int GetChars(byte* bytes, int byteCount,
1434                                                 char* chars, int charCount, DecoderNLS baseDecoder)
1435         {
1436             Debug.Assert(chars != null, "[UnicodeEncoding.GetChars]chars!=null");
1437             Debug.Assert(byteCount >= 0, "[UnicodeEncoding.GetChars]byteCount >=0");
1438             Debug.Assert(charCount >= 0, "[UnicodeEncoding.GetChars]charCount >=0");
1439             Debug.Assert(bytes != null, "[UnicodeEncoding.GetChars]bytes!=null");
1440
1441             UnicodeEncoding.Decoder decoder = (UnicodeEncoding.Decoder)baseDecoder;
1442
1443             // Need last vars
1444             int lastByte = -1;
1445             char lastChar = (char)0;
1446
1447             // Get our decoder (but don't clear it yet)
1448             if (decoder != null)
1449             {
1450                 lastByte = decoder.lastByte;
1451                 lastChar = decoder.lastChar;
1452
1453                 // Shouldn't have anything in fallback buffer for GetChars
1454                 // (don't have to check _throwOnOverflow for chars)
1455                 Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
1456                     "[UnicodeEncoding.GetChars]Expected empty fallback buffer at start");
1457             }
1458
1459             // For fallback we may need a fallback buffer
1460             DecoderFallbackBuffer fallbackBuffer = null;
1461             char* charsForFallback;
1462
1463             byte* byteEnd = bytes + byteCount;
1464             char* charEnd = chars + charCount;
1465             byte* byteStart = bytes;
1466             char* charStart = chars;
1467
1468             while (bytes < byteEnd)
1469             {
1470                 // If we're aligned then maybe we can do it fast
1471                 // That'll hurt if we're unaligned because we'll always test but never be aligned
1472 #if !NO_FAST_UNICODE_LOOP
1473 #if BIGENDIAN
1474                 if (bigEndian &&
1475 #else // BIGENDIAN
1476                 if (!bigEndian &&
1477 #endif // BIGENDIAN
1478 #if BIT64 // win64 has to be long aligned
1479                     (unchecked((long)chars) & 7) == 0 && (unchecked((long)bytes) & 7) == 0 &&
1480 #else
1481                     (unchecked((int)chars) & 3) == 0 && (unchecked((int)bytes) & 3) == 0 &&
1482 #endif // BIT64
1483                     lastByte == -1 && lastChar == 0)
1484                 {
1485                     // Need -1 to check 2 at a time.  If we have an even #, longChars will go
1486                     // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longChars
1487                     // will go from longEnd - 1 long to longEnd. (Might not get to use this)
1488                     // We can only go iCount units (limited by shorter of char or byte buffers.
1489                     ulong* longEnd = (ulong*)(bytes - 7 +
1490                                                 (((byteEnd - bytes) >> 1 < charEnd - chars) ?
1491                                                   (byteEnd - bytes) : (charEnd - chars) << 1));
1492
1493                     // Need new char* so we can check 4 at a time
1494                     ulong* longBytes = (ulong*)bytes;
1495                     ulong* longChars = (ulong*)chars;
1496
1497                     while (longBytes < longEnd)
1498                     {
1499                         // See if we potentially have surrogates (0x8000 bit set)
1500                         // (We're either big endian on a big endian machine or little endian on 
1501                         // a little endian machine so that'll work)
1502                         if ((0x8000800080008000 & *longBytes) != 0)
1503                         {
1504                             // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
1505                             // 5 bits looks like 11011, then its a high or low surrogate.
1506                             // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
1507                             // Note that we expect BMP characters to be more common than surrogates
1508                             // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
1509                             ulong uTemp = (0xf800f800f800f800 & *longBytes) ^ 0xd800d800d800d800;
1510
1511                             // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
1512                             // but no clue if they're high or low.
1513                             // If each of the 4 characters are non-zero, then none are surrogates.
1514                             if ((uTemp & 0xFFFF000000000000) == 0 ||
1515                                 (uTemp & 0x0000FFFF00000000) == 0 ||
1516                                 (uTemp & 0x00000000FFFF0000) == 0 ||
1517                                 (uTemp & 0x000000000000FFFF) == 0)
1518                             {
1519                                 // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
1520                                 // or if there's 1 or 4 surrogates
1521
1522                                 // If they happen to be high/low/high/low, we may as well continue.  Check the next
1523                                 // bit to see if its set (low) or not (high) in the right pattern
1524 #if BIGENDIAN
1525                                 if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xd800dc00d800dc00) != 0)
1526 #else
1527                                 if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xdc00d800dc00d800) != 0)
1528 #endif
1529                                 {
1530                                     // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
1531                                     // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
1532
1533                                     // Drop out to the slow loop to resolve the surrogates
1534                                     break;
1535                                 }
1536                                 // else they are all surrogates in High/Low/High/Low order, so we can use them.
1537                             }
1538                             // else none are surrogates, so we can use them.
1539                         }
1540                         // else all < 0x8000 so we can use them
1541
1542                         // We can use these 4 chars.
1543                         *longChars = *longBytes;
1544                         longBytes++;
1545                         longChars++;
1546                     }
1547
1548                     chars = (char*)longChars;
1549                     bytes = (byte*)longBytes;
1550
1551                     if (bytes >= byteEnd)
1552                         break;
1553                 }
1554 #endif // !NO_FAST_UNICODE_LOOP
1555
1556                 // Get 1st byte
1557                 if (lastByte < 0)
1558                 {
1559                     lastByte = *bytes++;
1560                     continue;
1561                 }
1562
1563                 // Get full char
1564                 char ch;
1565                 if (bigEndian)
1566                 {
1567                     ch = (char)(lastByte << 8 | *(bytes++));
1568                 }
1569                 else
1570                 {
1571                     ch = (char)(*(bytes++) << 8 | lastByte);
1572                 }
1573                 lastByte = -1;
1574
1575                 // See if the char's valid
1576                 if (ch >= 0xd800 && ch <= 0xdfff)
1577                 {
1578                     // Was it a high surrogate?
1579                     if (ch <= 0xdbff)
1580                     {
1581                         // Its a high surrogate, if we had one then do fallback for previous one
1582                         if (lastChar > 0)
1583                         {
1584                             // Get fallback for previous high surrogate
1585                             // Note we have to reconstruct bytes because some may have been in decoder
1586                             byte[] byteBuffer = null;
1587                             if (bigEndian)
1588                             {
1589                                 byteBuffer = new byte[]
1590                                     { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1591                             }
1592                             else
1593                             {
1594                                 byteBuffer = new byte[]
1595                                     { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1596                             }
1597
1598                             if (fallbackBuffer == null)
1599                             {
1600                                 if (decoder == null)
1601                                     fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1602                                 else
1603                                     fallbackBuffer = decoder.FallbackBuffer;
1604
1605                                 // Set our internal fallback interesting things.
1606                                 fallbackBuffer.InternalInitialize(byteStart, charEnd);
1607                             }
1608
1609                             charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1610                             bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1611                             chars = charsForFallback;
1612
1613                             if (!fallbackResult)
1614                             {
1615                                 // couldn't fall back lonely surrogate
1616                                 // We either advanced bytes or chars should == charStart and throw below
1617                                 Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1618                                     "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (bad surrogate)");
1619                                 bytes -= 2;                                       // didn't use these 2 bytes
1620                                 fallbackBuffer.InternalReset();
1621                                 ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1622                                 break;                                          // couldn't fallback but didn't throw
1623                             }
1624                         }
1625
1626                         // Ignore the previous high surrogate which fell back already,
1627                         // yet remember the current high surrogate for next time.
1628                         lastChar = ch;
1629                         continue;
1630                     }
1631
1632                     // Its a low surrogate
1633                     if (lastChar == 0)
1634                     {
1635                         // Expected a previous high surrogate
1636                         // Get fallback for this low surrogate
1637                         // Note we have to reconstruct bytes because some may have been in decoder
1638                         byte[] byteBuffer = null;
1639                         if (bigEndian)
1640                         {
1641                             byteBuffer = new byte[]
1642                                 { unchecked((byte)(ch >> 8)), unchecked((byte)ch) };
1643                         }
1644                         else
1645                         {
1646                             byteBuffer = new byte[]
1647                                 { unchecked((byte)ch), unchecked((byte)(ch >> 8)) };
1648                         }
1649
1650                         if (fallbackBuffer == null)
1651                         {
1652                             if (decoder == null)
1653                                 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1654                             else
1655                                 fallbackBuffer = decoder.FallbackBuffer;
1656
1657                             // Set our internal fallback interesting things.
1658                             fallbackBuffer.InternalInitialize(byteStart, charEnd);
1659                         }
1660
1661                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1662                         bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1663                         chars = charsForFallback;
1664
1665                         if (!fallbackResult)
1666                         {
1667                             // couldn't fall back lonely surrogate
1668                             // We either advanced bytes or chars should == charStart and throw below
1669                             Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1670                                 "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (lonely surrogate)");
1671                             bytes -= 2;                                       // didn't use these 2 bytes
1672                             fallbackBuffer.InternalReset();
1673                             ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1674                             break;                                          // couldn't fallback but didn't throw
1675                         }
1676
1677                         // Didn't throw, ignore this one (we already did its fallback)
1678                         continue;
1679                     }
1680
1681                     // Valid surrogate pair, add our lastChar (will need 2 chars)
1682                     if (chars >= charEnd - 1)
1683                     {
1684                         // couldn't find room for this surrogate pair
1685                         // We either advanced bytes or chars should == charStart and throw below
1686                         Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1687                             "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (surrogate pair)");
1688                         bytes -= 2;                                       // didn't use these 2 bytes
1689                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1690                         // Leave lastChar for next call to Convert()
1691                         break;                                          // couldn't fallback but didn't throw
1692                     }
1693
1694                     *chars++ = lastChar;
1695                     lastChar = (char)0;
1696                 }
1697                 else if (lastChar > 0)
1698                 {
1699                     // Had a high surrogate, expected a low surrogate, fall back the high surrogate.
1700                     byte[] byteBuffer = null;
1701                     if (bigEndian)
1702                     {
1703                         byteBuffer = new byte[]
1704                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1705                     }
1706                     else
1707                     {
1708                         byteBuffer = new byte[]
1709                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1710                     }
1711
1712                     if (fallbackBuffer == null)
1713                     {
1714                         if (decoder == null)
1715                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1716                         else
1717                             fallbackBuffer = decoder.FallbackBuffer;
1718
1719                         // Set our internal fallback interesting things.
1720                         fallbackBuffer.InternalInitialize(byteStart, charEnd);
1721                     }
1722
1723                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1724                     bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1725                     chars = charsForFallback;
1726
1727                     if (!fallbackResult)
1728                     {
1729                         // couldn't fall back high surrogate, or char that would be next
1730                         // We either advanced bytes or chars should == charStart and throw below
1731                         Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1732                             "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (no low surrogate)");
1733                         bytes -= 2;                                       // didn't use these 2 bytes
1734                         fallbackBuffer.InternalReset();
1735                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1736                         break;                                          // couldn't fallback but didn't throw
1737                     }
1738
1739                     // Not left over now, clear previous high surrogate and continue to add current char
1740                     lastChar = (char)0;
1741                 }
1742
1743                 // Valid char, room for it?
1744                 if (chars >= charEnd)
1745                 {
1746                     // 2 bytes couldn't fall back
1747                     // We either advanced bytes or chars should == charStart and throw below
1748                     Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1749                         "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (normal)");
1750                     bytes -= 2;                                       // didn't use these bytes
1751                     ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1752                     break;                                          // couldn't fallback but didn't throw
1753                 }
1754
1755                 // add it
1756                 *chars++ = ch;
1757             }
1758
1759             // Remember our decoder if we must
1760             if (decoder == null || decoder.MustFlush)
1761             {
1762                 if (lastChar > 0)
1763                 {
1764                     // No hanging high surrogates allowed, do fallback and remove count for it
1765                     byte[] byteBuffer = null;
1766                     if (bigEndian)
1767                     {
1768                         byteBuffer = new byte[]
1769                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1770                     }
1771                     else
1772                     {
1773                         byteBuffer = new byte[]
1774                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1775                     }
1776
1777                     if (fallbackBuffer == null)
1778                     {
1779                         if (decoder == null)
1780                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1781                         else
1782                             fallbackBuffer = decoder.FallbackBuffer;
1783
1784                         // Set our internal fallback interesting things.
1785                         fallbackBuffer.InternalInitialize(byteStart, charEnd);
1786                     }
1787
1788                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1789                     bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1790                     chars = charsForFallback;
1791
1792                     if (!fallbackResult)
1793                     {
1794                         // 2 bytes couldn't fall back
1795                         // We either advanced bytes or chars should == charStart and throw below
1796                         Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1797                             "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (decoder)");
1798                         bytes -= 2;                                       // didn't use these bytes
1799                         if (lastByte >= 0)
1800                             bytes--;                                    // had an extra last byte hanging around
1801                         fallbackBuffer.InternalReset();
1802                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1803                         // We'll remember these in our decoder though
1804                         bytes += 2;
1805                         if (lastByte >= 0)
1806                             bytes++;
1807                         goto End;
1808                     }
1809
1810                     // done with this one
1811                     lastChar = (char)0;
1812                 }
1813
1814                 if (lastByte >= 0)
1815                 {
1816                     if (fallbackBuffer == null)
1817                     {
1818                         if (decoder == null)
1819                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1820                         else
1821                             fallbackBuffer = decoder.FallbackBuffer;
1822
1823                         // Set our internal fallback interesting things.
1824                         fallbackBuffer.InternalInitialize(byteStart, charEnd);
1825                     }
1826
1827                     // No hanging odd bytes allowed if must flush
1828                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1829                     bool fallbackResult = fallbackBuffer.InternalFallback(new byte[] { unchecked((byte)lastByte) }, bytes, ref charsForFallback);
1830                     chars = charsForFallback;
1831
1832                     if (!fallbackResult)
1833                     {
1834                         // odd byte couldn't fall back
1835                         bytes--;                                        // didn't use this byte
1836                         fallbackBuffer.InternalReset();
1837                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1838                         // didn't throw, but we'll remember it in the decoder
1839                         bytes++;
1840                         goto End;
1841                     }
1842
1843                     // Didn't fail, clear buffer
1844                     lastByte = -1;
1845                 }
1846             }
1847
1848         End:
1849
1850             // Remember our decoder if we must
1851             if (decoder != null)
1852             {
1853                 Debug.Assert((decoder.MustFlush == false) || ((lastChar == (char)0) && (lastByte == -1)),
1854                     "[UnicodeEncoding.GetChars] Expected no left over chars or bytes if flushing"
1855                     //                    + " " + ((int)lastChar).ToString("X4") + " " + lastByte.ToString("X2")
1856                     );
1857
1858                 decoder._bytesUsed = (int)(bytes - byteStart);
1859                 decoder.lastChar = lastChar;
1860                 decoder.lastByte = lastByte;
1861             }
1862
1863             // Used to do this the old way
1864             // System.IO.__UnmanagedMemoryStream.memcpyimpl((byte*)chars, bytes, byteCount);
1865
1866             // Shouldn't have anything in fallback buffer for GetChars
1867             // (don't have to check _throwOnOverflow for count or chars)
1868             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
1869                 "[UnicodeEncoding.GetChars]Expected empty fallback buffer at end");
1870
1871             return (int)(chars - charStart);
1872         }
1873
1874
1875         public override System.Text.Encoder GetEncoder()
1876         {
1877             return new EncoderNLS(this);
1878         }
1879
1880
1881         public override System.Text.Decoder GetDecoder()
1882         {
1883             return new UnicodeEncoding.Decoder(this);
1884         }
1885
1886
1887         public override byte[] GetPreamble()
1888         {
1889             if (byteOrderMark)
1890             {
1891                 // Note - we must allocate new byte[]'s here to prevent someone
1892                 // from modifying a cached byte[].
1893                 if (bigEndian)
1894                     return new byte[2] { 0xfe, 0xff };
1895                 else
1896                     return new byte[2] { 0xff, 0xfe };
1897             }
1898             return Array.Empty<Byte>();
1899         }
1900
1901
1902         public override int GetMaxByteCount(int charCount)
1903         {
1904             if (charCount < 0)
1905                 throw new ArgumentOutOfRangeException(nameof(charCount),
1906                      SR.ArgumentOutOfRange_NeedNonNegNum);
1907             Contract.EndContractBlock();
1908
1909             // Characters would be # of characters + 1 in case left over high surrogate is ? * max fallback
1910             long byteCount = (long)charCount + 1;
1911
1912             if (EncoderFallback.MaxCharCount > 1)
1913                 byteCount *= EncoderFallback.MaxCharCount;
1914
1915             // 2 bytes per char
1916             byteCount <<= 1;
1917
1918             if (byteCount > 0x7fffffff)
1919                 throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
1920
1921             return (int)byteCount;
1922         }
1923
1924
1925         public override int GetMaxCharCount(int byteCount)
1926         {
1927             if (byteCount < 0)
1928                 throw new ArgumentOutOfRangeException(nameof(byteCount),
1929                      SR.ArgumentOutOfRange_NeedNonNegNum);
1930             Contract.EndContractBlock();
1931
1932             // long because byteCount could be biggest int.
1933             // 1 char per 2 bytes.  Round up in case 1 left over in decoder.
1934             // Round up using &1 in case byteCount is max size
1935             // Might also need an extra 1 if there's a left over high surrogate in the decoder.
1936             long charCount = (long)(byteCount >> 1) + (byteCount & 1) + 1;
1937
1938             // Don't forget fallback (in case they have a bunch of lonely surrogates or something bizarre like that)
1939             if (DecoderFallback.MaxCharCount > 1)
1940                 charCount *= DecoderFallback.MaxCharCount;
1941
1942             if (charCount > 0x7fffffff)
1943                 throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_GetCharCountOverflow);
1944
1945             return (int)charCount;
1946         }
1947
1948
1949         public override bool Equals(Object value)
1950         {
1951             UnicodeEncoding that = value as UnicodeEncoding;
1952             if (that != null)
1953             {
1954                 //
1955                 // Big Endian Unicode has different code page (1201) than small Endian one (1200),
1956                 // so we still have to check _codePage here.
1957                 //
1958                 return (CodePage == that.CodePage) &&
1959                         byteOrderMark == that.byteOrderMark &&
1960                         //                        isThrowException == that.isThrowException &&  // Same as Encoder/Decoder being exception fallbacks
1961                         bigEndian == that.bigEndian &&
1962                        (EncoderFallback.Equals(that.EncoderFallback)) &&
1963                        (DecoderFallback.Equals(that.DecoderFallback));
1964             }
1965             return (false);
1966         }
1967
1968         public override int GetHashCode()
1969         {
1970             return CodePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode() +
1971                    (byteOrderMark ? 4 : 0) + (bigEndian ? 8 : 0);
1972         }
1973
1974         private sealed class Decoder : System.Text.DecoderNLS
1975         {
1976             internal int lastByte = -1;
1977             internal char lastChar = '\0';
1978
1979             public Decoder(UnicodeEncoding encoding) : base(encoding)
1980             {
1981                 // base calls reset
1982             }
1983             
1984             public override void Reset()
1985             {
1986                 lastByte = -1;
1987                 lastChar = '\0';
1988                 if (_fallbackBuffer != null)
1989                     _fallbackBuffer.Reset();
1990             }
1991
1992             // Anything left in our decoder?
1993             internal override bool HasState
1994             {
1995                 get
1996                 {
1997                     return (this.lastByte != -1 || this.lastChar != '\0');
1998                 }
1999             }
2000         }
2001     }
2002 }
2003