846946ce9479522fce7749b9e76b1e66054c601e
[platform/upstream/coreclr.git] / src / mscorlib / shared / System / Text / UnicodeEncoding.cs
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 //
6 // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
7 //
8
9 using System;
10 using System.Globalization;
11 using System.Runtime.Serialization;
12 using System.Diagnostics;
13 using System.Diagnostics.Contracts;
14
15 namespace System.Text
16 {
17     public class UnicodeEncoding : Encoding
18     {
19         // Used by Encoding.BigEndianUnicode/Unicode for lazy initialization
20         // The initialization code will not be run until a static member of the class is referenced
21         internal static readonly UnicodeEncoding s_bigEndianDefault = new UnicodeEncoding(bigEndian: true, byteOrderMark: true);
22         internal static readonly UnicodeEncoding s_littleEndianDefault = new UnicodeEncoding(bigEndian: false, byteOrderMark: true);
23
24         [OptionalField(VersionAdded = 2)]
25         internal bool isThrowException = false;
26
27         internal bool bigEndian = false;
28         internal bool byteOrderMark = true;
29
30         // Unicode version 2.0 character size in bytes
31         public const int CharSize = 2;
32
33
34         public UnicodeEncoding()
35             : this(false, true)
36         {
37         }
38
39
40         public UnicodeEncoding(bool bigEndian, bool byteOrderMark)
41             : this(bigEndian, byteOrderMark, false)
42         {
43         }
44
45
46         public UnicodeEncoding(bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes)
47             : base(bigEndian ? 1201 : 1200)  //Set the data item.
48         {
49             this.isThrowException = throwOnInvalidBytes;
50             this.bigEndian = bigEndian;
51             this.byteOrderMark = byteOrderMark;
52
53             // Encoding constructor already did this, but it'll be wrong if we're throwing exceptions
54             if (this.isThrowException)
55                 SetDefaultFallbacks();
56         }
57
58         #region Serialization 
59         [OnDeserializing]
60         private void OnDeserializing(StreamingContext ctx)
61         {
62             // In Everett it is false. Whidbey will overwrite this value.
63             isThrowException = false;
64         }
65         #endregion Serialization
66
67         internal override void SetDefaultFallbacks()
68         {
69             // For UTF-X encodings, we use a replacement fallback with an empty string
70             if (this.isThrowException)
71             {
72                 this.encoderFallback = EncoderFallback.ExceptionFallback;
73                 this.decoderFallback = DecoderFallback.ExceptionFallback;
74             }
75             else
76             {
77                 this.encoderFallback = new EncoderReplacementFallback("\xFFFD");
78                 this.decoderFallback = new DecoderReplacementFallback("\xFFFD");
79             }
80         }
81
82         // The following methods are copied from EncodingNLS.cs.
83         // Unfortunately EncodingNLS.cs is internal and we're public, so we have to reimpliment them here.
84         // These should be kept in sync for the following classes:
85         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
86         //
87
88         // Returns the number of bytes required to encode a range of characters in
89         // a character array.
90         //
91         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
92         // So if you fix this, fix the others.  Currently those include:
93         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
94         // parent method is safe
95
96         public override unsafe int GetByteCount(char[] chars, int index, int count)
97         {
98             // Validate input parameters
99             if (chars == null)
100                 throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
101
102             if (index < 0 || count < 0)
103                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
104
105             if (chars.Length - index < count)
106                 throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
107             Contract.EndContractBlock();
108
109             // If no input, return 0, avoid fixed empty array problem
110             if (count == 0)
111                 return 0;
112
113             // Just call the pointer version
114             fixed (char* pChars = chars)
115                 return GetByteCount(pChars + index, count, null);
116         }
117
118         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
119         // So if you fix this, fix the others.  Currently those include:
120         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
121         // parent method is safe
122
123         public override unsafe int GetByteCount(String s)
124         {
125             // Validate input
126             if (s==null)
127                 throw new ArgumentNullException("s");
128             Contract.EndContractBlock();
129
130             fixed (char* pChars = s)
131                 return GetByteCount(pChars, s.Length, null);
132         }
133
134         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
135         // So if you fix this, fix the others.  Currently those include:
136         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
137
138         [CLSCompliant(false)]
139         public override unsafe int GetByteCount(char* chars, int count)
140         {
141             // Validate Parameters
142             if (chars == null)
143                 throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
144
145             if (count < 0)
146                 throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
147             Contract.EndContractBlock();
148
149             // Call it with empty encoder
150             return GetByteCount(chars, count, null);
151         }
152
153         // Parent method is safe.
154         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
155         // So if you fix this, fix the others.  Currently those include:
156         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
157
158         public override unsafe int GetBytes(String s, int charIndex, int charCount,
159                                               byte[] bytes, int byteIndex)
160         {
161             if (s == null || bytes == null)
162                 throw new ArgumentNullException((s == null ? "s" : "bytes"), SR.ArgumentNull_Array);
163
164             if (charIndex < 0 || charCount < 0)
165                 throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
166
167             if (s.Length - charIndex < charCount)
168                 throw new ArgumentOutOfRangeException("s", SR.ArgumentOutOfRange_IndexCount);
169
170             if (byteIndex < 0 || byteIndex > bytes.Length)
171                 throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
172             Contract.EndContractBlock();
173
174             int byteCount = bytes.Length - byteIndex;
175
176             // Fixed doesn't like 0 length arrays.
177             if (bytes.Length == 0)
178                 bytes = new byte[1];
179
180             fixed (char* pChars = s) fixed (byte* pBytes = &bytes[0])
181                 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
182         }
183
184         // Encodes a range of characters in a character array into a range of bytes
185         // in a byte array. An exception occurs if the byte array is not large
186         // enough to hold the complete encoding of the characters. The
187         // GetByteCount method can be used to determine the exact number of
188         // bytes that will be produced for a given range of characters.
189         // Alternatively, the GetMaxByteCount method can be used to
190         // determine the maximum number of bytes that will be produced for a given
191         // number of characters, regardless of the actual character values.
192         //
193         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
194         // So if you fix this, fix the others.  Currently those include:
195         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
196         // parent method is safe
197
198         public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
199                                                byte[] bytes, int byteIndex)
200         {
201             // Validate parameters
202             if (chars == null || bytes == null)
203                 throw new ArgumentNullException((chars == null ? "chars" : "bytes"), SR.ArgumentNull_Array);
204
205             if (charIndex < 0 || charCount < 0)
206                 throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
207
208             if (chars.Length - charIndex < charCount)
209                 throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
210
211             if (byteIndex < 0 || byteIndex > bytes.Length)
212                 throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
213             Contract.EndContractBlock();
214
215             // If nothing to encode return 0, avoid fixed problem
216             if (charCount == 0)
217                 return 0;
218
219             // Just call pointer version
220             int byteCount = bytes.Length - byteIndex;
221
222             // Fixed doesn't like 0 length arrays.
223             if (bytes.Length == 0)
224                 bytes = new byte[1];
225
226             fixed (char* pChars = chars) fixed (byte* pBytes = &bytes[0])
227                 // Remember that byteCount is # to decode, not size of array.
228                 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
229         }
230
231         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
232         // So if you fix this, fix the others.  Currently those include:
233         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
234
235         [CLSCompliant(false)]
236         public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
237         {
238             // Validate Parameters
239             if (bytes == null || chars == null)
240                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
241
242             if (charCount < 0 || byteCount < 0)
243                 throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
244             Contract.EndContractBlock();
245
246             return GetBytes(chars, charCount, bytes, byteCount, null);
247         }
248
249         // Returns the number of characters produced by decoding a range of bytes
250         // in a byte array.
251         //
252         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
253         // So if you fix this, fix the others.  Currently those include:
254         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
255         // parent method is safe
256
257         public override unsafe int GetCharCount(byte[] bytes, int index, int count)
258         {
259             // Validate Parameters
260             if (bytes == null)
261                 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
262
263             if (index < 0 || count < 0)
264                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
265
266             if (bytes.Length - index < count)
267                 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
268             Contract.EndContractBlock();
269
270             // If no input just return 0, fixed doesn't like 0 length arrays
271             if (count == 0)
272                 return 0;
273
274             // Just call pointer version
275             fixed (byte* pBytes = bytes)
276                 return GetCharCount(pBytes + index, count, null);
277         }
278
279         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
280         // So if you fix this, fix the others.  Currently those include:
281         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
282
283         [CLSCompliant(false)]
284         public override unsafe int GetCharCount(byte* bytes, int count)
285         {
286             // Validate Parameters
287             if (bytes == null)
288                 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
289
290             if (count < 0)
291                 throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
292             Contract.EndContractBlock();
293
294             return GetCharCount(bytes, count, null);
295         }
296
297         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
298         // So if you fix this, fix the others.  Currently those include:
299         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
300         // parent method is safe
301
302         public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
303                                               char[] chars, int charIndex)
304         {
305             // Validate Parameters
306             if (bytes == null || chars == null)
307                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
308
309             if (byteIndex < 0 || byteCount < 0)
310                 throw new ArgumentOutOfRangeException((byteIndex < 0 ? "byteIndex" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
311
312             if ( bytes.Length - byteIndex < byteCount)
313                 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
314
315             if (charIndex < 0 || charIndex > chars.Length)
316                 throw new ArgumentOutOfRangeException("charIndex", SR.ArgumentOutOfRange_Index);
317             Contract.EndContractBlock();
318
319             // If no input, return 0 & avoid fixed problem
320             if (byteCount == 0)
321                 return 0;
322
323             // Just call pointer version
324             int charCount = chars.Length - charIndex;
325
326             // Fixed doesn't like 0 length arrays.
327             if (chars.Length == 0)
328                 chars = new char[1];
329
330             fixed (byte* pBytes = bytes) fixed (char* pChars = &chars[0])
331                 // Remember that charCount is # to decode, not size of array
332                 return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
333         }
334
335         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
336         // So if you fix this, fix the others.  Currently those include:
337         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
338
339         [CLSCompliant(false)]
340         public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
341         {
342             // Validate Parameters
343             if (bytes == null || chars == null)
344                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
345
346             if (charCount < 0 || byteCount < 0)
347                 throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
348             Contract.EndContractBlock();
349
350             return GetChars(bytes, byteCount, chars, charCount, null);
351         }
352
353         // Returns a string containing the decoded representation of a range of
354         // bytes in a byte array.
355         //
356         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
357         // So if you fix this, fix the others.  Currently those include:
358         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
359         // parent method is safe
360
361         public override unsafe string GetString(byte[] bytes, int index, int count)
362         {
363             // Validate Parameters
364             if (bytes == null)
365                 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
366
367             if (index < 0 || count < 0)
368                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
369
370             if (bytes.Length - index < count)
371                 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
372             Contract.EndContractBlock();
373
374             // Avoid problems with empty input buffer
375             if (count == 0) return String.Empty;
376
377             fixed (byte* pBytes = bytes)
378                 return String.CreateStringFromEncoding(
379                     pBytes + index, count, this);
380         }
381
382         //
383         // End of standard methods copied from EncodingNLS.cs
384         //
385
386         internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
387         {
388             Debug.Assert(chars != null, "[UnicodeEncoding.GetByteCount]chars!=null");
389             Debug.Assert(count >= 0, "[UnicodeEncoding.GetByteCount]count >=0");
390
391             // Start by assuming each char gets 2 bytes
392             int byteCount = count << 1;
393
394             // Check for overflow in byteCount
395             // (If they were all invalid chars, this would actually be wrong,
396             // but that's a ridiculously large # so we're not concerned about that case)
397             if (byteCount < 0)
398                 throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_GetByteCountOverflow);
399
400             char* charStart = chars;
401             char* charEnd = chars + count;
402             char charLeftOver = (char)0;
403
404             bool wasHereBefore = false;
405
406             // Need -1 to check 2 at a time.  If we have an even #, longChars will go
407             // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longChars
408             // will go from longEnd - 1 long to longEnd. (Might not get to use this)
409             ulong* longEnd = (ulong*)(charEnd - 3);
410
411             // For fallback we may need a fallback buffer
412             EncoderFallbackBuffer fallbackBuffer = null;
413             char* charsForFallback;
414
415             if (encoder != null)
416             {
417                 charLeftOver = encoder.charLeftOver;
418
419                 // Assume extra bytes to encode charLeftOver if it existed
420                 if (charLeftOver > 0)
421                     byteCount += 2;
422
423                 // We mustn't have left over fallback data when counting
424                 if (encoder.InternalHasFallbackBuffer)
425                 {
426                     fallbackBuffer = encoder.FallbackBuffer;
427                     if (fallbackBuffer.Remaining > 0)
428                         throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
429
430                     // Set our internal fallback interesting things.
431                     fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
432                 }
433             }
434
435             char ch;
436         TryAgain:
437
438             while (((ch = (fallbackBuffer == null) ? (char)0 : fallbackBuffer.InternalGetNextChar()) != 0) || chars < charEnd)
439             {
440                 // First unwind any fallback
441                 if (ch == 0)
442                 {
443                     // No fallback, maybe we can do it fast
444 #if !NO_FAST_UNICODE_LOOP
445 #if BIGENDIAN       // If endianess is backwards then each pair of bytes would be backwards.
446                     if ( bigEndian &&
447 #else
448                     if (!bigEndian &&
449 #endif // BIGENDIAN
450
451 #if BIT64           // 64 bit CPU needs to be long aligned for this to work.
452                           charLeftOver == 0 && (unchecked((long)chars) & 7) == 0)
453 #else
454                           charLeftOver == 0 && (unchecked((int)chars) & 3) == 0)
455 #endif
456                     {
457                         // Need new char* so we can check 4 at a time
458                         ulong* longChars = (ulong*)chars;
459
460                         while (longChars < longEnd)
461                         {
462                             // See if we potentially have surrogates (0x8000 bit set)
463                             // (We're either big endian on a big endian machine or little endian on 
464                             // a little endian machine so this'll work)                            
465                             if ((0x8000800080008000 & *longChars) != 0)
466                             {
467                                 // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
468                                 // 5 bits looks like 11011, then its a high or low surrogate.
469                                 // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
470                                 // Note that we expect BMP characters to be more common than surrogates
471                                 // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
472                                 ulong uTemp = (0xf800f800f800f800 & *longChars) ^ 0xd800d800d800d800;
473
474                                 // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
475                                 // but no clue if they're high or low.
476                                 // If each of the 4 characters are non-zero, then none are surrogates.
477                                 if ((uTemp & 0xFFFF000000000000) == 0 ||
478                                     (uTemp & 0x0000FFFF00000000) == 0 ||
479                                     (uTemp & 0x00000000FFFF0000) == 0 ||
480                                     (uTemp & 0x000000000000FFFF) == 0)
481                                 {
482                                     // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
483                                     // or if there's 1 or 4 surrogates
484
485                                     // If they happen to be high/low/high/low, we may as well continue.  Check the next
486                                     // bit to see if its set (low) or not (high) in the right pattern
487 #if BIGENDIAN
488                                     if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xd800dc00d800dc00) != 0)
489 #else
490                                     if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xdc00d800dc00d800) != 0)
491 #endif
492                                     {
493                                         // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
494                                         // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
495
496                                         // Drop out to the slow loop to resolve the surrogates
497                                         break;
498                                     }
499                                     // else they are all surrogates in High/Low/High/Low order, so we can use them.
500                                 }
501                                 // else none are surrogates, so we can use them.
502                             }
503                             // else all < 0x8000 so we can use them                            
504
505                             // We already counted these four chars, go to next long.
506                             longChars++;
507                         }
508
509                         chars = (char*)longChars;
510
511                         if (chars >= charEnd)
512                             break;
513                     }
514 #endif // !NO_FAST_UNICODE_LOOP
515
516                     // No fallback, just get next char
517                     ch = *chars;
518                     chars++;
519                 }
520                 else
521                 {
522                     // We weren't preallocating fallback space.
523                     byteCount += 2;
524                 }
525
526                 // Check for high or low surrogates
527                 if (ch >= 0xd800 && ch <= 0xdfff)
528                 {
529                     // Was it a high surrogate?
530                     if (ch <= 0xdbff)
531                     {
532                         // Its a high surrogate, if we already had a high surrogate do its fallback
533                         if (charLeftOver > 0)
534                         {
535                             // Unwind the current character, this should be safe because we
536                             // don't have leftover data in the fallback, so chars must have
537                             // advanced already.
538                             Debug.Assert(chars > charStart,
539                                 "[UnicodeEncoding.GetByteCount]Expected chars to have advanced in unexpected high surrogate");
540                             chars--;
541
542                             // If previous high surrogate deallocate 2 bytes
543                             byteCount -= 2;
544
545                             // Fallback the previous surrogate
546                             // Need to initialize fallback buffer?
547                             if (fallbackBuffer == null)
548                             {
549                                 if (encoder == null)
550                                     fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
551                                 else
552                                     fallbackBuffer = encoder.FallbackBuffer;
553
554                                 // Set our internal fallback interesting things.
555                                 fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
556                             }
557
558                             charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
559                             fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
560                             chars = charsForFallback;
561
562                             // Now no high surrogate left over
563                             charLeftOver = (char)0;
564                             continue;
565                         }
566
567                         // Remember this high surrogate
568                         charLeftOver = ch;
569                         continue;
570                     }
571
572
573                     // Its a low surrogate
574                     if (charLeftOver == 0)
575                     {
576                         // Expected a previous high surrogate.
577                         // Don't count this one (we'll count its fallback if necessary)
578                         byteCount -= 2;
579
580                         // fallback this one
581                         // Need to initialize fallback buffer?
582                         if (fallbackBuffer == null)
583                         {
584                             if (encoder == null)
585                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
586                             else
587                                 fallbackBuffer = encoder.FallbackBuffer;
588
589                             // Set our internal fallback interesting things.
590                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
591                         }
592                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
593                         fallbackBuffer.InternalFallback(ch, ref charsForFallback);
594                         chars = charsForFallback;
595                         continue;
596                     }
597
598                     // Valid surrogate pair, add our charLeftOver
599                     charLeftOver = (char)0;
600                     continue;
601                 }
602                 else if (charLeftOver > 0)
603                 {
604                     // Expected a low surrogate, but this char is normal
605
606                     // Rewind the current character, fallback previous character.
607                     // this should be safe because we don't have leftover data in the
608                     // fallback, so chars must have advanced already.
609                     Debug.Assert(chars > charStart,
610                         "[UnicodeEncoding.GetByteCount]Expected chars to have advanced when expected low surrogate");
611                     chars--;
612
613                     // fallback previous chars
614                     // Need to initialize fallback buffer?
615                     if (fallbackBuffer == null)
616                     {
617                         if (encoder == null)
618                             fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
619                         else
620                             fallbackBuffer = encoder.FallbackBuffer;
621
622                         // Set our internal fallback interesting things.
623                         fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
624                     }
625                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
626                     fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
627                     chars = charsForFallback;
628
629                     // Ignore charLeftOver or throw
630                     byteCount -= 2;
631                     charLeftOver = (char)0;
632
633                     continue;
634                 }
635
636                 // Ok we had something to add (already counted)
637             }
638
639             // Don't allocate space for left over char
640             if (charLeftOver > 0)
641             {
642                 byteCount -= 2;
643
644                 // If we have to flush, stick it in fallback and try again
645                 if (encoder == null || encoder.MustFlush)
646                 {
647                     if (wasHereBefore)
648                     {
649                         // Throw it, using our complete character
650                         throw new ArgumentException(
651                             SR.Format(SR.Argument_RecursiveFallback, charLeftOver), nameof(chars));
652                     }
653                     else
654                     {
655                         // Need to initialize fallback buffer?
656                         if (fallbackBuffer == null)
657                         {
658                             if (encoder == null)
659                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
660                             else
661                                 fallbackBuffer = encoder.FallbackBuffer;
662
663                             // Set our internal fallback interesting things.
664                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
665                         }
666                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
667                         fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
668                         chars = charsForFallback;
669                         charLeftOver = (char)0;
670                         wasHereBefore = true;
671                         goto TryAgain;
672                     }
673                 }
674             }
675
676             // Shouldn't have anything in fallback buffer for GetByteCount
677             // (don't have to check m_throwOnOverflow for count)
678             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
679                 "[UnicodeEncoding.GetByteCount]Expected empty fallback buffer at end");
680
681             // Don't remember fallbackBuffer.encoder for counting
682             return byteCount;
683         }
684
685         internal override unsafe int GetBytes(char* chars, int charCount,
686                                                 byte* bytes, int byteCount, EncoderNLS encoder)
687         {
688             Debug.Assert(chars != null, "[UnicodeEncoding.GetBytes]chars!=null");
689             Debug.Assert(byteCount >= 0, "[UnicodeEncoding.GetBytes]byteCount >=0");
690             Debug.Assert(charCount >= 0, "[UnicodeEncoding.GetBytes]charCount >=0");
691             Debug.Assert(bytes != null, "[UnicodeEncoding.GetBytes]bytes!=null");
692
693             char charLeftOver = (char)0;
694             char ch;
695             bool wasHereBefore = false;
696
697
698             byte* byteEnd = bytes + byteCount;
699             char* charEnd = chars + charCount;
700             byte* byteStart = bytes;
701             char* charStart = chars;
702
703             // For fallback we may need a fallback buffer
704             EncoderFallbackBuffer fallbackBuffer = null;
705             char* charsForFallback;
706
707             // Get our encoder, but don't clear it yet.
708             if (encoder != null)
709             {
710                 charLeftOver = encoder.charLeftOver;
711
712                 // We mustn't have left over fallback data when counting
713                 if (encoder.InternalHasFallbackBuffer)
714                 {
715                     // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary
716                     fallbackBuffer = encoder.FallbackBuffer;
717                     if (fallbackBuffer.Remaining > 0 && encoder.m_throwOnOverflow)
718                         throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
719
720                     // Set our internal fallback interesting things.
721                     fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
722                 }
723             }
724
725         TryAgain:
726             while (((ch = (fallbackBuffer == null) ?
727                         (char)0 : fallbackBuffer.InternalGetNextChar()) != 0) ||
728                     chars < charEnd)
729             {
730                 // First unwind any fallback
731                 if (ch == 0)
732                 {
733                     // No fallback, maybe we can do it fast
734 #if !NO_FAST_UNICODE_LOOP
735 #if BIGENDIAN           // If endianess is backwards then each pair of bytes would be backwards.
736                     if ( bigEndian &&
737 #else
738                     if (!bigEndian &&
739 #endif // BIGENDIAN
740 #if BIT64           // 64 bit CPU needs to be long aligned for this to work, 32 bit CPU needs to be 32 bit aligned
741                         (unchecked((long)chars) & 7) == 0 && (unchecked((long)bytes) & 7) == 0 &&
742 #else
743                         (unchecked((int)chars) & 3) == 0 && (unchecked((int)bytes) & 3) == 0 &&
744 #endif // BIT64
745                         charLeftOver == 0)
746                     {
747                         // Need -1 to check 2 at a time.  If we have an even #, longChars will go
748                         // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longChars
749                         // will go from longEnd - 1 long to longEnd. (Might not get to use this)
750                         // We can only go iCount units (limited by shorter of char or byte buffers.
751                         ulong* longEnd = (ulong*)(chars - 3 +
752                                                   (((byteEnd - bytes) >> 1 < charEnd - chars) ?
753                                                     (byteEnd - bytes) >> 1 : charEnd - chars));
754
755                         // Need new char* so we can check 4 at a time
756                         ulong* longChars = (ulong*)chars;
757                         ulong* longBytes = (ulong*)bytes;
758
759                         while (longChars < longEnd)
760                         {
761                             // See if we potentially have surrogates (0x8000 bit set)
762                             // (We're either big endian on a big endian machine or little endian on 
763                             // a little endian machine so this'll work)                            
764                             if ((0x8000800080008000 & *longChars) != 0)
765                             {
766                                 // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
767                                 // 5 bits looks like 11011, then its a high or low surrogate.
768                                 // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
769                                 // Note that we expect BMP characters to be more common than surrogates
770                                 // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
771                                 ulong uTemp = (0xf800f800f800f800 & *longChars) ^ 0xd800d800d800d800;
772
773                                 // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
774                                 // but no clue if they're high or low.
775                                 // If each of the 4 characters are non-zero, then none are surrogates.
776                                 if ((uTemp & 0xFFFF000000000000) == 0 ||
777                                     (uTemp & 0x0000FFFF00000000) == 0 ||
778                                     (uTemp & 0x00000000FFFF0000) == 0 ||
779                                     (uTemp & 0x000000000000FFFF) == 0)
780                                 {
781                                     // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
782                                     // or if there's 1 or 4 surrogates
783
784                                     // If they happen to be high/low/high/low, we may as well continue.  Check the next
785                                     // bit to see if its set (low) or not (high) in the right pattern
786 #if BIGENDIAN
787                                     if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xd800dc00d800dc00) != 0)
788 #else
789                                     if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xdc00d800dc00d800) != 0)
790 #endif
791                                     {
792                                         // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
793                                         // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
794
795                                         // Drop out to the slow loop to resolve the surrogates
796                                         break;
797                                     }
798                                     // else they are all surrogates in High/Low/High/Low order, so we can use them.
799                                 }
800                                 // else none are surrogates, so we can use them.
801                             }
802                             // else all < 0x8000 so we can use them
803
804                             // We can use these 4 chars.
805                             *longBytes = *longChars;
806                             longChars++;
807                             longBytes++;
808                         }
809
810                         chars = (char*)longChars;
811                         bytes = (byte*)longBytes;
812
813                         if (chars >= charEnd)
814                             break;
815                     }
816                     // Not aligned, but maybe we can still be somewhat faster
817                     // Also somehow this optimizes the above loop?  It seems to cause something above
818                     // to get enregistered, but I haven't figured out how to make that happen without this loop.
819                     else if ((charLeftOver == 0) &&
820 #if BIGENDIAN
821                         bigEndian &&
822 #else
823                         !bigEndian &&
824 #endif // BIGENDIAN
825
826 #if BIT64
827                         (unchecked((long)chars) & 7) != (unchecked((long)bytes) & 7) &&  // Only do this if chars & bytes are out of line, otherwise faster loop'll be faster next time
828 #else
829                         (unchecked((int)chars) & 3) != (unchecked((int)bytes) & 3) &&  // Only do this if chars & bytes are out of line, otherwise faster loop'll be faster next time
830 #endif // BIT64
831                         (unchecked((int)(bytes)) & 1) == 0)
832                     {
833                         // # to use
834                         long iCount = ((byteEnd - bytes) >> 1 < charEnd - chars) ?
835                                        (byteEnd - bytes) >> 1 : charEnd - chars;
836
837                         // Need new char*
838                         char* charOut = ((char*)bytes);     // a char* for our output
839                         char* tempEnd = chars + iCount - 1; // Our end pointer
840
841                         while (chars < tempEnd)
842                         {
843                             if (*chars >= (char)0xd800 && *chars <= (char)0xdfff)
844                             {
845                                 // break for fallback for low surrogate
846                                 if (*chars >= 0xdc00)
847                                     break;
848
849                                 // break if next one's not a low surrogate (will do fallback)
850                                 if (*(chars + 1) < 0xdc00 || *(chars + 1) > 0xdfff)
851                                     break;
852
853                                 // They both exist, use them
854                             }
855                             // If 2nd char is surrogate & this one isn't then only add one
856                             else if (*(chars + 1) >= (char)0xd800 && *(chars + 1) <= 0xdfff)
857                             {
858                                 *charOut = *chars;
859                                 charOut++;
860                                 chars++;
861                                 continue;
862                             }
863
864                             *charOut = *chars;
865                             *(charOut + 1) = *(chars + 1);
866                             charOut += 2;
867                             chars += 2;
868                         }
869
870                         bytes = (byte*)charOut;
871
872                         if (chars >= charEnd)
873                             break;
874                     }
875 #endif // !NO_FAST_UNICODE_LOOP
876
877                     // No fallback, just get next char
878                     ch = *chars;
879                     chars++;
880                 }
881
882                 // Check for high or low surrogates
883                 if (ch >= 0xd800 && ch <= 0xdfff)
884                 {
885                     // Was it a high surrogate?
886                     if (ch <= 0xdbff)
887                     {
888                         // Its a high surrogate, see if we already had a high surrogate
889                         if (charLeftOver > 0)
890                         {
891                             // Unwind the current character, this should be safe because we
892                             // don't have leftover data in the fallback, so chars must have
893                             // advanced already.
894                             Debug.Assert(chars > charStart,
895                                 "[UnicodeEncoding.GetBytes]Expected chars to have advanced in unexpected high surrogate");
896                             chars--;
897
898                             // Fallback the previous surrogate
899                             // Might need to create our fallback buffer
900                             if (fallbackBuffer == null)
901                             {
902                                 if (encoder == null)
903                                     fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
904                                 else
905                                     fallbackBuffer = encoder.FallbackBuffer;
906
907                                 // Set our internal fallback interesting things.
908                                 fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
909                             }
910
911                             charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
912                             fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
913                             chars = charsForFallback;
914
915                             charLeftOver = (char)0;
916                             continue;
917                         }
918
919                         // Remember this high surrogate
920                         charLeftOver = ch;
921                         continue;
922                     }
923
924                     // Its a low surrogate
925                     if (charLeftOver == 0)
926                     {
927                         // We'll fall back this one
928                         // Might need to create our fallback buffer
929                         if (fallbackBuffer == null)
930                         {
931                             if (encoder == null)
932                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
933                             else
934                                 fallbackBuffer = encoder.FallbackBuffer;
935
936                             // Set our internal fallback interesting things.
937                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
938                         }
939
940                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
941                         fallbackBuffer.InternalFallback(ch, ref charsForFallback);
942                         chars = charsForFallback;
943                         continue;
944                     }
945
946                     // Valid surrogate pair, add our charLeftOver
947                     if (bytes + 3 >= byteEnd)
948                     {
949                         // Not enough room to add this surrogate pair
950                         if (fallbackBuffer != null && fallbackBuffer.bFallingBack)
951                         {
952                             // These must have both been from the fallbacks.
953                             // Both of these MUST have been from a fallback because if the 1st wasn't
954                             // from a fallback, then a high surrogate followed by an illegal char 
955                             // would've caused the high surrogate to fall back.  If a high surrogate
956                             // fell back, then it was consumed and both chars came from the fallback.
957                             fallbackBuffer.MovePrevious();                     // Didn't use either fallback surrogate
958                             fallbackBuffer.MovePrevious();
959                         }
960                         else
961                         {
962                             // If we don't have enough room, then either we should've advanced a while
963                             // or we should have bytes==byteStart and throw below
964                             Debug.Assert(chars > charStart + 1 || bytes == byteStart,
965                                 "[UnicodeEncoding.GetBytes]Expected chars to have when no room to add surrogate pair");
966                             chars -= 2;                                        // Didn't use either surrogate
967                         }
968                         ThrowBytesOverflow(encoder, bytes == byteStart);    // Throw maybe (if no bytes written)
969                         charLeftOver = (char)0;                             // we'll retry it later
970                         break;                                               // Didn't throw, but stop 'til next time.
971                     }
972
973                     if (bigEndian)
974                     {
975                         *(bytes++) = (byte)(charLeftOver >> 8);
976                         *(bytes++) = (byte)charLeftOver;
977                     }
978                     else
979                     {
980                         *(bytes++) = (byte)charLeftOver;
981                         *(bytes++) = (byte)(charLeftOver >> 8);
982                     }
983
984                     charLeftOver = (char)0;
985                 }
986                 else if (charLeftOver > 0)
987                 {
988                     // Expected a low surrogate, but this char is normal
989
990                     // Rewind the current character, fallback previous character.
991                     // this should be safe because we don't have leftover data in the
992                     // fallback, so chars must have advanced already.
993                     Debug.Assert(chars > charStart,
994                         "[UnicodeEncoding.GetBytes]Expected chars to have advanced after expecting low surrogate");
995                     chars--;
996
997                     // fallback previous chars
998                     // Might need to create our fallback buffer
999                     if (fallbackBuffer == null)
1000                     {
1001                         if (encoder == null)
1002                             fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
1003                         else
1004                             fallbackBuffer = encoder.FallbackBuffer;
1005
1006                         // Set our internal fallback interesting things.
1007                         fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
1008                     }
1009
1010                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
1011                     fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
1012                     chars = charsForFallback;
1013
1014                     // Ignore charLeftOver or throw
1015                     charLeftOver = (char)0;
1016                     continue;
1017                 }
1018
1019                 // Ok, we have a char to add
1020                 if (bytes + 1 >= byteEnd)
1021                 {
1022                     // Couldn't add this char
1023                     if (fallbackBuffer != null && fallbackBuffer.bFallingBack)
1024                         fallbackBuffer.MovePrevious();                     // Not using this fallback char
1025                     else
1026                     {
1027                         // Lonely charLeftOver (from previous call) would've been caught up above,
1028                         // so this must be a case where we've already read an input char.
1029                         Debug.Assert(chars > charStart,
1030                             "[UnicodeEncoding.GetBytes]Expected chars to have advanced for failed fallback");
1031                         chars--;                                         // Not using this char
1032                     }
1033                     ThrowBytesOverflow(encoder, bytes == byteStart);    // Throw maybe (if no bytes written)
1034                     break;                                               // didn't throw, just stop
1035                 }
1036
1037                 if (bigEndian)
1038                 {
1039                     *(bytes++) = (byte)(ch >> 8);
1040                     *(bytes++) = (byte)ch;
1041                 }
1042                 else
1043                 {
1044                     *(bytes++) = (byte)ch;
1045                     *(bytes++) = (byte)(ch >> 8);
1046                 }
1047             }
1048
1049             // Don't allocate space for left over char
1050             if (charLeftOver > 0)
1051             {
1052                 // If we aren't flushing we need to fall this back
1053                 if (encoder == null || encoder.MustFlush)
1054                 {
1055                     if (wasHereBefore)
1056                     {
1057                         // Throw it, using our complete character
1058                         throw new ArgumentException(
1059                             SR.Format(SR.Argument_RecursiveFallback, charLeftOver), nameof(chars));
1060                     }
1061                     else
1062                     {
1063                         // If we have to flush, stick it in fallback and try again
1064                         // Might need to create our fallback buffer
1065                         if (fallbackBuffer == null)
1066                         {
1067                             if (encoder == null)
1068                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
1069                             else
1070                                 fallbackBuffer = encoder.FallbackBuffer;
1071
1072                             // Set our internal fallback interesting things.
1073                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
1074                         }
1075
1076                         // If we're not flushing, this'll remember the left over character.
1077                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
1078                         fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
1079                         chars = charsForFallback;
1080
1081                         charLeftOver = (char)0;
1082                         wasHereBefore = true;
1083                         goto TryAgain;
1084                     }
1085                 }
1086             }
1087
1088             // Not flushing, remember it in the encoder
1089             if (encoder != null)
1090             {
1091                 encoder.charLeftOver = charLeftOver;
1092                 encoder.m_charsUsed = (int)(chars - charStart);
1093             }
1094
1095             // Remember charLeftOver if we must, or clear it if we're flushing
1096             // (charLeftOver should be 0 if we're flushing)
1097             Debug.Assert((encoder != null && !encoder.MustFlush) || charLeftOver == (char)0,
1098                 "[UnicodeEncoding.GetBytes] Expected no left over characters if flushing");
1099
1100             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0 ||
1101                 encoder == null || !encoder.m_throwOnOverflow,
1102                 "[UnicodeEncoding.GetBytes]Expected empty fallback buffer if not converting");
1103
1104             // We used to copy it fast, but this doesn't check for surrogates
1105             // System.IO.__UnmanagedMemoryStream.memcpyimpl(bytes, (byte*)chars, usedByteCount);
1106
1107             return (int)(bytes - byteStart);
1108         }
1109
1110         internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
1111         {
1112             Debug.Assert(bytes != null, "[UnicodeEncoding.GetCharCount]bytes!=null");
1113             Debug.Assert(count >= 0, "[UnicodeEncoding.GetCharCount]count >=0");
1114
1115             UnicodeEncoding.Decoder decoder = (UnicodeEncoding.Decoder)baseDecoder;
1116
1117             byte* byteEnd = bytes + count;
1118             byte* byteStart = bytes;
1119
1120             // Need last vars
1121             int lastByte = -1;
1122             char lastChar = (char)0;
1123
1124             // Start by assuming same # of chars as bytes
1125             int charCount = count >> 1;
1126
1127             // Need -1 to check 2 at a time.  If we have an even #, longBytes will go
1128             // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longBytes
1129             // will go from longEnd - 1 long to longEnd. (Might not get to use this)
1130             ulong* longEnd = (ulong*)(byteEnd - 7);
1131
1132             // For fallback we may need a fallback buffer
1133             DecoderFallbackBuffer fallbackBuffer = null;
1134
1135             if (decoder != null)
1136             {
1137                 lastByte = decoder.lastByte;
1138                 lastChar = decoder.lastChar;
1139
1140                 // Assume extra char if last char was around
1141                 if (lastChar > 0)
1142                     charCount++;
1143
1144                 // Assume extra char if extra last byte makes up odd # of input bytes
1145                 if (lastByte >= 0 && (count & 1) == 1)
1146                 {
1147                     charCount++;
1148                 }
1149
1150                 // Shouldn't have anything in fallback buffer for GetCharCount
1151                 // (don't have to check m_throwOnOverflow for count)
1152                 Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
1153                     "[UnicodeEncoding.GetCharCount]Expected empty fallback buffer at start");
1154             }
1155
1156             while (bytes < byteEnd)
1157             {
1158                 // If we're aligned then maybe we can do it fast
1159                 // This'll hurt if we're unaligned because we'll always test but never be aligned
1160 #if !NO_FAST_UNICODE_LOOP
1161 #if BIGENDIAN
1162                 if (bigEndian &&
1163 #else // BIGENDIAN
1164                 if (!bigEndian &&
1165 #endif // BIGENDIAN
1166 #if BIT64 // win64 has to be long aligned
1167                     (unchecked((long)bytes) & 7) == 0 &&
1168 #else
1169                     (unchecked((int)bytes) & 3) == 0 &&
1170 #endif // BIT64
1171                     lastByte == -1 && lastChar == 0)
1172                 {
1173                     // Need new char* so we can check 4 at a time
1174                     ulong* longBytes = (ulong*)bytes;
1175
1176                     while (longBytes < longEnd)
1177                     {
1178                         // See if we potentially have surrogates (0x8000 bit set)
1179                         // (We're either big endian on a big endian machine or little endian on 
1180                         // a little endian machine so this'll work)
1181                         if ((0x8000800080008000 & *longBytes) != 0)
1182                         {
1183                             // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
1184                             // 5 bits looks like 11011, then its a high or low surrogate.
1185                             // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
1186                             // Note that we expect BMP characters to be more common than surrogates
1187                             // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
1188                             ulong uTemp = (0xf800f800f800f800 & *longBytes) ^ 0xd800d800d800d800;
1189
1190                             // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
1191                             // but no clue if they're high or low.
1192                             // If each of the 4 characters are non-zero, then none are surrogates.
1193                             if ((uTemp & 0xFFFF000000000000) == 0 ||
1194                                 (uTemp & 0x0000FFFF00000000) == 0 ||
1195                                 (uTemp & 0x00000000FFFF0000) == 0 ||
1196                                 (uTemp & 0x000000000000FFFF) == 0)
1197                             {
1198                                 // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
1199                                 // or if there's 1 or 4 surrogates
1200
1201                                 // If they happen to be high/low/high/low, we may as well continue.  Check the next
1202                                 // bit to see if its set (low) or not (high) in the right pattern
1203 #if BIGENDIAN
1204                                 if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xd800dc00d800dc00) != 0)
1205 #else
1206                                 if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xdc00d800dc00d800) != 0)
1207 #endif
1208                                 {
1209                                     // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
1210                                     // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
1211
1212                                     // Drop out to the slow loop to resolve the surrogates
1213                                     break;
1214                                 }
1215                                 // else they are all surrogates in High/Low/High/Low order, so we can use them.
1216                             }
1217                             // else none are surrogates, so we can use them.
1218                         }
1219                         // else all < 0x8000 so we can use them
1220
1221                         // We can use these 4 chars.
1222                         longBytes++;
1223                     }
1224
1225                     bytes = (byte*)longBytes;
1226
1227                     if (bytes >= byteEnd)
1228                         break;
1229                 }
1230 #endif // !NO_FAST_UNICODE_LOOP
1231
1232                 // Get 1st byte
1233                 if (lastByte < 0)
1234                 {
1235                     lastByte = *bytes++;
1236                     if (bytes >= byteEnd) break;
1237                 }
1238
1239                 // Get full char
1240                 char ch;
1241                 if (bigEndian)
1242                 {
1243                     ch = (char)(lastByte << 8 | *(bytes++));
1244                 }
1245                 else
1246                 {
1247                     ch = (char)(*(bytes++) << 8 | lastByte);
1248                 }
1249                 lastByte = -1;
1250
1251                 // See if the char's valid
1252                 if (ch >= 0xd800 && ch <= 0xdfff)
1253                 {
1254                     // Was it a high surrogate?
1255                     if (ch <= 0xdbff)
1256                     {
1257                         // Its a high surrogate, if we had one then do fallback for previous one
1258                         if (lastChar > 0)
1259                         {
1260                             // Ignore previous bad high surrogate
1261                             charCount--;
1262
1263                             // Get fallback for previous high surrogate
1264                             // Note we have to reconstruct bytes because some may have been in decoder
1265                             byte[] byteBuffer = null;
1266                             if (bigEndian)
1267                             {
1268                                 byteBuffer = new byte[]
1269                                     { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1270                             }
1271                             else
1272                             {
1273                                 byteBuffer = new byte[]
1274                                     { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1275                             }
1276
1277                             if (fallbackBuffer == null)
1278                             {
1279                                 if (decoder == null)
1280                                     fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1281                                 else
1282                                     fallbackBuffer = decoder.FallbackBuffer;
1283
1284                                 // Set our internal fallback interesting things.
1285                                 fallbackBuffer.InternalInitialize(byteStart, null);
1286                             }
1287
1288                             // Get fallback.
1289                             charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1290                         }
1291
1292                         // Ignore the last one which fell back already,
1293                         // and remember the new high surrogate
1294                         lastChar = ch;
1295                         continue;
1296                     }
1297
1298                     // Its a low surrogate
1299                     if (lastChar == 0)
1300                     {
1301                         // Expected a previous high surrogate
1302                         charCount--;
1303
1304                         // Get fallback for this low surrogate
1305                         // Note we have to reconstruct bytes because some may have been in decoder
1306                         byte[] byteBuffer = null;
1307                         if (bigEndian)
1308                         {
1309                             byteBuffer = new byte[]
1310                                 { unchecked((byte)(ch >> 8)), unchecked((byte)ch) };
1311                         }
1312                         else
1313                         {
1314                             byteBuffer = new byte[]
1315                                 { unchecked((byte)ch), unchecked((byte)(ch >> 8)) };
1316                         }
1317
1318                         if (fallbackBuffer == null)
1319                         {
1320                             if (decoder == null)
1321                                 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1322                             else
1323                                 fallbackBuffer = decoder.FallbackBuffer;
1324
1325                             // Set our internal fallback interesting things.
1326                             fallbackBuffer.InternalInitialize(byteStart, null);
1327                         }
1328
1329                         charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1330
1331                         // Ignore this one (we already did its fallback)
1332                         continue;
1333                     }
1334
1335                     // Valid surrogate pair, already counted.
1336                     lastChar = (char)0;
1337                 }
1338                 else if (lastChar > 0)
1339                 {
1340                     // Had a high surrogate, expected a low surrogate
1341                     // Uncount the last high surrogate
1342                     charCount--;
1343
1344                     // fall back the high surrogate.
1345                     byte[] byteBuffer = null;
1346                     if (bigEndian)
1347                     {
1348                         byteBuffer = new byte[]
1349                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1350                     }
1351                     else
1352                     {
1353                         byteBuffer = new byte[]
1354                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1355                     }
1356
1357                     if (fallbackBuffer == null)
1358                     {
1359                         if (decoder == null)
1360                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1361                         else
1362                             fallbackBuffer = decoder.FallbackBuffer;
1363
1364                         // Set our internal fallback interesting things.
1365                         fallbackBuffer.InternalInitialize(byteStart, null);
1366                     }
1367
1368                     // Already subtracted high surrogate
1369                     charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1370
1371                     // Not left over now, clear previous high surrogate and continue to add current char
1372                     lastChar = (char)0;
1373                 }
1374
1375                 // Valid char, already counted
1376             }
1377
1378             // Extra space if we can't use decoder
1379             if (decoder == null || decoder.MustFlush)
1380             {
1381                 if (lastChar > 0)
1382                 {
1383                     // No hanging high surrogates allowed, do fallback and remove count for it
1384                     charCount--;
1385                     byte[] byteBuffer = null;
1386                     if (bigEndian)
1387                     {
1388                         byteBuffer = new byte[]
1389                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1390                     }
1391                     else
1392                     {
1393                         byteBuffer = new byte[]
1394                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1395                     }
1396
1397                     if (fallbackBuffer == null)
1398                     {
1399                         if (decoder == null)
1400                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1401                         else
1402                             fallbackBuffer = decoder.FallbackBuffer;
1403
1404                         // Set our internal fallback interesting things.
1405                         fallbackBuffer.InternalInitialize(byteStart, null);
1406                     }
1407
1408                     charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1409
1410                     lastChar = (char)0;
1411                 }
1412
1413                 if (lastByte >= 0)
1414                 {
1415                     if (fallbackBuffer == null)
1416                     {
1417                         if (decoder == null)
1418                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1419                         else
1420                             fallbackBuffer = decoder.FallbackBuffer;
1421
1422                         // Set our internal fallback interesting things.
1423                         fallbackBuffer.InternalInitialize(byteStart, null);
1424                     }
1425
1426                     // No hanging odd bytes allowed if must flush
1427                     charCount += fallbackBuffer.InternalFallback(new byte[] { unchecked((byte)lastByte) }, bytes);
1428                     lastByte = -1;
1429                 }
1430             }
1431
1432             // If we had a high surrogate left over, we can't count it
1433             if (lastChar > 0)
1434                 charCount--;
1435
1436             // Shouldn't have anything in fallback buffer for GetCharCount
1437             // (don't have to check m_throwOnOverflow for count)
1438             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
1439                 "[UnicodeEncoding.GetCharCount]Expected empty fallback buffer at end");
1440
1441             return charCount;
1442         }
1443
1444         internal override unsafe int GetChars(byte* bytes, int byteCount,
1445                                                 char* chars, int charCount, DecoderNLS baseDecoder)
1446         {
1447             Debug.Assert(chars != null, "[UnicodeEncoding.GetChars]chars!=null");
1448             Debug.Assert(byteCount >= 0, "[UnicodeEncoding.GetChars]byteCount >=0");
1449             Debug.Assert(charCount >= 0, "[UnicodeEncoding.GetChars]charCount >=0");
1450             Debug.Assert(bytes != null, "[UnicodeEncoding.GetChars]bytes!=null");
1451
1452             UnicodeEncoding.Decoder decoder = (UnicodeEncoding.Decoder)baseDecoder;
1453
1454             // Need last vars
1455             int lastByte = -1;
1456             char lastChar = (char)0;
1457
1458             // Get our decoder (but don't clear it yet)
1459             if (decoder != null)
1460             {
1461                 lastByte = decoder.lastByte;
1462                 lastChar = decoder.lastChar;
1463
1464                 // Shouldn't have anything in fallback buffer for GetChars
1465                 // (don't have to check m_throwOnOverflow for chars)
1466                 Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
1467                     "[UnicodeEncoding.GetChars]Expected empty fallback buffer at start");
1468             }
1469
1470             // For fallback we may need a fallback buffer
1471             DecoderFallbackBuffer fallbackBuffer = null;
1472             char* charsForFallback;
1473
1474             byte* byteEnd = bytes + byteCount;
1475             char* charEnd = chars + charCount;
1476             byte* byteStart = bytes;
1477             char* charStart = chars;
1478
1479             while (bytes < byteEnd)
1480             {
1481                 // If we're aligned then maybe we can do it fast
1482                 // This'll hurt if we're unaligned because we'll always test but never be aligned
1483 #if !NO_FAST_UNICODE_LOOP
1484 #if BIGENDIAN
1485                 if (bigEndian &&
1486 #else // BIGENDIAN
1487                 if (!bigEndian &&
1488 #endif // BIGENDIAN
1489 #if BIT64 // win64 has to be long aligned
1490                     (unchecked((long)chars) & 7) == 0 && (unchecked((long)bytes) & 7) == 0 &&
1491 #else
1492                     (unchecked((int)chars) & 3) == 0 && (unchecked((int)bytes) & 3) == 0 &&
1493 #endif // BIT64
1494                     lastByte == -1 && lastChar == 0)
1495                 {
1496                     // Need -1 to check 2 at a time.  If we have an even #, longChars will go
1497                     // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longChars
1498                     // will go from longEnd - 1 long to longEnd. (Might not get to use this)
1499                     // We can only go iCount units (limited by shorter of char or byte buffers.
1500                     ulong* longEnd = (ulong*)(bytes - 7 +
1501                                                 (((byteEnd - bytes) >> 1 < charEnd - chars) ?
1502                                                   (byteEnd - bytes) : (charEnd - chars) << 1));
1503
1504                     // Need new char* so we can check 4 at a time
1505                     ulong* longBytes = (ulong*)bytes;
1506                     ulong* longChars = (ulong*)chars;
1507
1508                     while (longBytes < longEnd)
1509                     {
1510                         // See if we potentially have surrogates (0x8000 bit set)
1511                         // (We're either big endian on a big endian machine or little endian on 
1512                         // a little endian machine so this'll work)
1513                         if ((0x8000800080008000 & *longBytes) != 0)
1514                         {
1515                             // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
1516                             // 5 bits looks like 11011, then its a high or low surrogate.
1517                             // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
1518                             // Note that we expect BMP characters to be more common than surrogates
1519                             // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
1520                             ulong uTemp = (0xf800f800f800f800 & *longBytes) ^ 0xd800d800d800d800;
1521
1522                             // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
1523                             // but no clue if they're high or low.
1524                             // If each of the 4 characters are non-zero, then none are surrogates.
1525                             if ((uTemp & 0xFFFF000000000000) == 0 ||
1526                                 (uTemp & 0x0000FFFF00000000) == 0 ||
1527                                 (uTemp & 0x00000000FFFF0000) == 0 ||
1528                                 (uTemp & 0x000000000000FFFF) == 0)
1529                             {
1530                                 // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
1531                                 // or if there's 1 or 4 surrogates
1532
1533                                 // If they happen to be high/low/high/low, we may as well continue.  Check the next
1534                                 // bit to see if its set (low) or not (high) in the right pattern
1535 #if BIGENDIAN
1536                                 if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xd800dc00d800dc00) != 0)
1537 #else
1538                                 if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xdc00d800dc00d800) != 0)
1539 #endif
1540                                 {
1541                                     // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
1542                                     // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
1543
1544                                     // Drop out to the slow loop to resolve the surrogates
1545                                     break;
1546                                 }
1547                                 // else they are all surrogates in High/Low/High/Low order, so we can use them.
1548                             }
1549                             // else none are surrogates, so we can use them.
1550                         }
1551                         // else all < 0x8000 so we can use them
1552
1553                         // We can use these 4 chars.
1554                         *longChars = *longBytes;
1555                         longBytes++;
1556                         longChars++;
1557                     }
1558
1559                     chars = (char*)longChars;
1560                     bytes = (byte*)longBytes;
1561
1562                     if (bytes >= byteEnd)
1563                         break;
1564                 }
1565 #endif // !NO_FAST_UNICODE_LOOP
1566
1567                 // Get 1st byte
1568                 if (lastByte < 0)
1569                 {
1570                     lastByte = *bytes++;
1571                     continue;
1572                 }
1573
1574                 // Get full char
1575                 char ch;
1576                 if (bigEndian)
1577                 {
1578                     ch = (char)(lastByte << 8 | *(bytes++));
1579                 }
1580                 else
1581                 {
1582                     ch = (char)(*(bytes++) << 8 | lastByte);
1583                 }
1584                 lastByte = -1;
1585
1586                 // See if the char's valid
1587                 if (ch >= 0xd800 && ch <= 0xdfff)
1588                 {
1589                     // Was it a high surrogate?
1590                     if (ch <= 0xdbff)
1591                     {
1592                         // Its a high surrogate, if we had one then do fallback for previous one
1593                         if (lastChar > 0)
1594                         {
1595                             // Get fallback for previous high surrogate
1596                             // Note we have to reconstruct bytes because some may have been in decoder
1597                             byte[] byteBuffer = null;
1598                             if (bigEndian)
1599                             {
1600                                 byteBuffer = new byte[]
1601                                     { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1602                             }
1603                             else
1604                             {
1605                                 byteBuffer = new byte[]
1606                                     { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1607                             }
1608
1609                             if (fallbackBuffer == null)
1610                             {
1611                                 if (decoder == null)
1612                                     fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1613                                 else
1614                                     fallbackBuffer = decoder.FallbackBuffer;
1615
1616                                 // Set our internal fallback interesting things.
1617                                 fallbackBuffer.InternalInitialize(byteStart, charEnd);
1618                             }
1619
1620                             charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
1621                             bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1622                             chars = charsForFallback;
1623
1624                             if (!fallbackResult)
1625                             {
1626                                 // couldn't fall back lonely surrogate
1627                                 // We either advanced bytes or chars should == charStart and throw below
1628                                 Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1629                                     "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (bad surrogate)");
1630                                 bytes -= 2;                                       // didn't use these 2 bytes
1631                                 fallbackBuffer.InternalReset();
1632                                 ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1633                                 break;                                          // couldn't fallback but didn't throw
1634                             }
1635                         }
1636
1637                         // Ignore the previous high surrogate which fell back already,
1638                         // yet remember the current high surrogate for next time.
1639                         lastChar = ch;
1640                         continue;
1641                     }
1642
1643                     // Its a low surrogate
1644                     if (lastChar == 0)
1645                     {
1646                         // Expected a previous high surrogate
1647                         // Get fallback for this low surrogate
1648                         // Note we have to reconstruct bytes because some may have been in decoder
1649                         byte[] byteBuffer = null;
1650                         if (bigEndian)
1651                         {
1652                             byteBuffer = new byte[]
1653                                 { unchecked((byte)(ch >> 8)), unchecked((byte)ch) };
1654                         }
1655                         else
1656                         {
1657                             byteBuffer = new byte[]
1658                                 { unchecked((byte)ch), unchecked((byte)(ch >> 8)) };
1659                         }
1660
1661                         if (fallbackBuffer == null)
1662                         {
1663                             if (decoder == null)
1664                                 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1665                             else
1666                                 fallbackBuffer = decoder.FallbackBuffer;
1667
1668                             // Set our internal fallback interesting things.
1669                             fallbackBuffer.InternalInitialize(byteStart, charEnd);
1670                         }
1671
1672                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
1673                         bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1674                         chars = charsForFallback;
1675
1676                         if (!fallbackResult)
1677                         {
1678                             // couldn't fall back lonely surrogate
1679                             // We either advanced bytes or chars should == charStart and throw below
1680                             Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1681                                 "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (lonely surrogate)");
1682                             bytes -= 2;                                       // didn't use these 2 bytes
1683                             fallbackBuffer.InternalReset();
1684                             ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1685                             break;                                          // couldn't fallback but didn't throw
1686                         }
1687
1688                         // Didn't throw, ignore this one (we already did its fallback)
1689                         continue;
1690                     }
1691
1692                     // Valid surrogate pair, add our lastChar (will need 2 chars)
1693                     if (chars >= charEnd - 1)
1694                     {
1695                         // couldn't find room for this surrogate pair
1696                         // We either advanced bytes or chars should == charStart and throw below
1697                         Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1698                             "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (surrogate pair)");
1699                         bytes -= 2;                                       // didn't use these 2 bytes
1700                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1701                         // Leave lastChar for next call to Convert()
1702                         break;                                          // couldn't fallback but didn't throw
1703                     }
1704
1705                     *chars++ = lastChar;
1706                     lastChar = (char)0;
1707                 }
1708                 else if (lastChar > 0)
1709                 {
1710                     // Had a high surrogate, expected a low surrogate, fall back the high surrogate.
1711                     byte[] byteBuffer = null;
1712                     if (bigEndian)
1713                     {
1714                         byteBuffer = new byte[]
1715                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1716                     }
1717                     else
1718                     {
1719                         byteBuffer = new byte[]
1720                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1721                     }
1722
1723                     if (fallbackBuffer == null)
1724                     {
1725                         if (decoder == null)
1726                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1727                         else
1728                             fallbackBuffer = decoder.FallbackBuffer;
1729
1730                         // Set our internal fallback interesting things.
1731                         fallbackBuffer.InternalInitialize(byteStart, charEnd);
1732                     }
1733
1734                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
1735                     bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1736                     chars = charsForFallback;
1737
1738                     if (!fallbackResult)
1739                     {
1740                         // couldn't fall back high surrogate, or char that would be next
1741                         // We either advanced bytes or chars should == charStart and throw below
1742                         Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1743                             "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (no low surrogate)");
1744                         bytes -= 2;                                       // didn't use these 2 bytes
1745                         fallbackBuffer.InternalReset();
1746                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1747                         break;                                          // couldn't fallback but didn't throw
1748                     }
1749
1750                     // Not left over now, clear previous high surrogate and continue to add current char
1751                     lastChar = (char)0;
1752                 }
1753
1754                 // Valid char, room for it?
1755                 if (chars >= charEnd)
1756                 {
1757                     // 2 bytes couldn't fall back
1758                     // We either advanced bytes or chars should == charStart and throw below
1759                     Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1760                         "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (normal)");
1761                     bytes -= 2;                                       // didn't use these bytes
1762                     ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1763                     break;                                          // couldn't fallback but didn't throw
1764                 }
1765
1766                 // add it
1767                 *chars++ = ch;
1768             }
1769
1770             // Remember our decoder if we must
1771             if (decoder == null || decoder.MustFlush)
1772             {
1773                 if (lastChar > 0)
1774                 {
1775                     // No hanging high surrogates allowed, do fallback and remove count for it
1776                     byte[] byteBuffer = null;
1777                     if (bigEndian)
1778                     {
1779                         byteBuffer = new byte[]
1780                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1781                     }
1782                     else
1783                     {
1784                         byteBuffer = new byte[]
1785                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1786                     }
1787
1788                     if (fallbackBuffer == null)
1789                     {
1790                         if (decoder == null)
1791                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1792                         else
1793                             fallbackBuffer = decoder.FallbackBuffer;
1794
1795                         // Set our internal fallback interesting things.
1796                         fallbackBuffer.InternalInitialize(byteStart, charEnd);
1797                     }
1798
1799                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
1800                     bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1801                     chars = charsForFallback;
1802
1803                     if (!fallbackResult)
1804                     {
1805                         // 2 bytes couldn't fall back
1806                         // We either advanced bytes or chars should == charStart and throw below
1807                         Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1808                             "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (decoder)");
1809                         bytes -= 2;                                       // didn't use these bytes
1810                         if (lastByte >= 0)
1811                             bytes--;                                    // had an extra last byte hanging around
1812                         fallbackBuffer.InternalReset();
1813                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1814                         // We'll remember these in our decoder though
1815                         bytes += 2;
1816                         if (lastByte >= 0)
1817                             bytes++;
1818                         goto End;
1819                     }
1820
1821                     // done with this one
1822                     lastChar = (char)0;
1823                 }
1824
1825                 if (lastByte >= 0)
1826                 {
1827                     if (fallbackBuffer == null)
1828                     {
1829                         if (decoder == null)
1830                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1831                         else
1832                             fallbackBuffer = decoder.FallbackBuffer;
1833
1834                         // Set our internal fallback interesting things.
1835                         fallbackBuffer.InternalInitialize(byteStart, charEnd);
1836                     }
1837
1838                     // No hanging odd bytes allowed if must flush
1839                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
1840                     bool fallbackResult = fallbackBuffer.InternalFallback(new byte[] { unchecked((byte)lastByte) }, bytes, ref charsForFallback);
1841                     chars = charsForFallback;
1842
1843                     if (!fallbackResult)
1844                     {
1845                         // odd byte couldn't fall back
1846                         bytes--;                                        // didn't use this byte
1847                         fallbackBuffer.InternalReset();
1848                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1849                         // didn't throw, but we'll remember it in the decoder
1850                         bytes++;
1851                         goto End;
1852                     }
1853
1854                     // Didn't fail, clear buffer
1855                     lastByte = -1;
1856                 }
1857             }
1858
1859         End:
1860
1861             // Remember our decoder if we must
1862             if (decoder != null)
1863             {
1864                 Debug.Assert((decoder.MustFlush == false) || ((lastChar == (char)0) && (lastByte == -1)),
1865                     "[UnicodeEncoding.GetChars] Expected no left over chars or bytes if flushing"
1866                     //                    + " " + ((int)lastChar).ToString("X4") + " " + lastByte.ToString("X2")
1867                     );
1868
1869                 decoder.m_bytesUsed = (int)(bytes - byteStart);
1870                 decoder.lastChar = lastChar;
1871                 decoder.lastByte = lastByte;
1872             }
1873
1874             // Used to do this the old way
1875             // System.IO.__UnmanagedMemoryStream.memcpyimpl((byte*)chars, bytes, byteCount);
1876
1877             // Shouldn't have anything in fallback buffer for GetChars
1878             // (don't have to check m_throwOnOverflow for count or chars)
1879             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
1880                 "[UnicodeEncoding.GetChars]Expected empty fallback buffer at end");
1881
1882             return (int)(chars - charStart);
1883         }
1884
1885
1886         public override System.Text.Encoder GetEncoder()
1887         {
1888             return new EncoderNLS(this);
1889         }
1890
1891
1892         public override System.Text.Decoder GetDecoder()
1893         {
1894             return new UnicodeEncoding.Decoder(this);
1895         }
1896
1897
1898         public override byte[] GetPreamble()
1899         {
1900             if (byteOrderMark)
1901             {
1902                 // Note - we must allocate new byte[]'s here to prevent someone
1903                 // from modifying a cached byte[].
1904                 if (bigEndian)
1905                     return new byte[2] { 0xfe, 0xff };
1906                 else
1907                     return new byte[2] { 0xff, 0xfe };
1908             }
1909             return Array.Empty<Byte>();
1910         }
1911
1912
1913         public override int GetMaxByteCount(int charCount)
1914         {
1915             if (charCount < 0)
1916                 throw new ArgumentOutOfRangeException(nameof(charCount),
1917                      SR.ArgumentOutOfRange_NeedNonNegNum);
1918             Contract.EndContractBlock();
1919
1920             // Characters would be # of characters + 1 in case left over high surrogate is ? * max fallback
1921             long byteCount = (long)charCount + 1;
1922
1923             if (EncoderFallback.MaxCharCount > 1)
1924                 byteCount *= EncoderFallback.MaxCharCount;
1925
1926             // 2 bytes per char
1927             byteCount <<= 1;
1928
1929             if (byteCount > 0x7fffffff)
1930                 throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
1931
1932             return (int)byteCount;
1933         }
1934
1935
1936         public override int GetMaxCharCount(int byteCount)
1937         {
1938             if (byteCount < 0)
1939                 throw new ArgumentOutOfRangeException(nameof(byteCount),
1940                      SR.ArgumentOutOfRange_NeedNonNegNum);
1941             Contract.EndContractBlock();
1942
1943             // long because byteCount could be biggest int.
1944             // 1 char per 2 bytes.  Round up in case 1 left over in decoder.
1945             // Round up using &1 in case byteCount is max size
1946             // Might also need an extra 1 if there's a left over high surrogate in the decoder.
1947             long charCount = (long)(byteCount >> 1) + (byteCount & 1) + 1;
1948
1949             // Don't forget fallback (in case they have a bunch of lonely surrogates or something bizzare like that)
1950             if (DecoderFallback.MaxCharCount > 1)
1951                 charCount *= DecoderFallback.MaxCharCount;
1952
1953             if (charCount > 0x7fffffff)
1954                 throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_GetCharCountOverflow);
1955
1956             return (int)charCount;
1957         }
1958
1959
1960         public override bool Equals(Object value)
1961         {
1962             UnicodeEncoding that = value as UnicodeEncoding;
1963             if (that != null)
1964             {
1965                 //
1966                 // Big Endian Unicode has different code page (1201) than small Endian one (1200),
1967                 // so we still have to check m_codePage here.
1968                 //
1969                 return (CodePage == that.CodePage) &&
1970                         byteOrderMark == that.byteOrderMark &&
1971                         //                        isThrowException == that.isThrowException &&  // Same as Encoder/Decoder being exception fallbacks
1972                         bigEndian == that.bigEndian &&
1973                        (EncoderFallback.Equals(that.EncoderFallback)) &&
1974                        (DecoderFallback.Equals(that.DecoderFallback));
1975             }
1976             return (false);
1977         }
1978
1979         public override int GetHashCode()
1980         {
1981             return CodePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode() +
1982                    (byteOrderMark ? 4 : 0) + (bigEndian ? 8 : 0);
1983         }
1984
1985         private sealed class Decoder : System.Text.DecoderNLS, ISerializable
1986         {
1987             internal int lastByte = -1;
1988             internal char lastChar = '\0';
1989
1990             public Decoder(UnicodeEncoding encoding) : base(encoding)
1991             {
1992                 // base calls reset
1993             }
1994             
1995             internal Decoder(SerializationInfo info, StreamingContext context)
1996             {
1997                 throw new PlatformNotSupportedException();
1998             }
1999
2000             // ISerializable implementation
2001             void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
2002             {
2003                 throw new PlatformNotSupportedException();
2004             }
2005
2006             public override void Reset()
2007             {
2008                 lastByte = -1;
2009                 lastChar = '\0';
2010                 if (m_fallbackBuffer != null)
2011                     m_fallbackBuffer.Reset();
2012             }
2013
2014             // Anything left in our decoder?
2015             internal override bool HasState
2016             {
2017                 get
2018                 {
2019                     return (this.lastByte != -1 || this.lastChar != '\0');
2020                 }
2021             }
2022         }
2023     }
2024 }
2025