src/mscorlib/shared/System/Text/UnicodeEncoding.cs

   1 // Licensed to the .NET Foundation under one or more agreements.
   2 // The .NET Foundation licenses this file to you under the MIT license.
   3 // See the LICENSE file in the project root for more information.
   4
   5 //
   6 // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
   7 //
   8
   9 using System;
  10 using System.Globalization;
  11 using System.Runtime.Serialization;
  12 using System.Diagnostics;
  13 using System.Diagnostics.Contracts;
  14
  15 namespace System.Text
  16 {
  17     public class UnicodeEncoding : Encoding
  18     {
  19         // Used by Encoding.BigEndianUnicode/Unicode for lazy initialization
  20         // The initialization code will not be run until a static member of the class is referenced
  21         internal static readonly UnicodeEncoding s_bigEndianDefault = new UnicodeEncoding(bigEndian: true, byteOrderMark: true);
  22         internal static readonly UnicodeEncoding s_littleEndianDefault = new UnicodeEncoding(bigEndian: false, byteOrderMark: true);
  23
  24         [OptionalField(VersionAdded = 2)]
  25         internal bool isThrowException = false;
  26
  27         internal bool bigEndian = false;
  28         internal bool byteOrderMark = true;
  29
  30         // Unicode version 2.0 character size in bytes
  31         public const int CharSize = 2;
  32
  33
  34         public UnicodeEncoding()
  35             : this(false, true)
  36         {
  37         }
  38
  39
  40         public UnicodeEncoding(bool bigEndian, bool byteOrderMark)
  41             : this(bigEndian, byteOrderMark, false)
  42         {
  43         }
  44
  45
  46         public UnicodeEncoding(bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes)
  47             : base(bigEndian ? 1201 : 1200)  //Set the data item.
  48         {
  49             this.isThrowException = throwOnInvalidBytes;
  50             this.bigEndian = bigEndian;
  51             this.byteOrderMark = byteOrderMark;
  52
  53             // Encoding constructor already did this, but it'll be wrong if we're throwing exceptions
  54             if (this.isThrowException)
  55                 SetDefaultFallbacks();
  56         }
  57
  58         #region Serialization
  59         [OnDeserializing]
  60         private void OnDeserializing(StreamingContext ctx)
  61         {
  62             // In Everett it is false. Whidbey will overwrite this value.
  63             isThrowException = false;
  64         }
  65         #endregion Serialization
  66
  67         internal override void SetDefaultFallbacks()
  68         {
  69             // For UTF-X encodings, we use a replacement fallback with an empty string
  70             if (this.isThrowException)
  71             {
  72                 this.encoderFallback = EncoderFallback.ExceptionFallback;
  73                 this.decoderFallback = DecoderFallback.ExceptionFallback;
  74             }
  75             else
  76             {
  77                 this.encoderFallback = new EncoderReplacementFallback("\xFFFD");
  78                 this.decoderFallback = new DecoderReplacementFallback("\xFFFD");
  79             }
  80         }
  81
  82         // The following methods are copied from EncodingNLS.cs.
  83         // Unfortunately EncodingNLS.cs is internal and we're public, so we have to reimpliment them here.
  84         // These should be kept in sync for the following classes:
  85         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  86         //
  87
  88         // Returns the number of bytes required to encode a range of characters in
  89         // a character array.
  90         //
  91         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  92         // So if you fix this, fix the others.  Currently those include:
  93         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  94         // parent method is safe
  95
  96         public override unsafe int GetByteCount(char[] chars, int index, int count)
  97         {
  98             // Validate input parameters
  99             if (chars == null)
 100                 throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
 101
 102             if (index < 0 || count < 0)
 103                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
 104
 105             if (chars.Length - index < count)
 106                 throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
 107             Contract.EndContractBlock();
 108
 109             // If no input, return 0, avoid fixed empty array problem
 110             if (count == 0)
 111                 return 0;
 112
 113             // Just call the pointer version
 114             fixed (char* pChars = chars)
 115                 return GetByteCount(pChars + index, count, null);
 116         }
 117
 118         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 119         // So if you fix this, fix the others.  Currently those include:
 120         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 121         // parent method is safe
 122
 123         public override unsafe int GetByteCount(String s)
 124         {
 125             // Validate input
 126             if (s==null)
 127                 throw new ArgumentNullException("s");
 128             Contract.EndContractBlock();
 129
 130             fixed (char* pChars = s)
 131                 return GetByteCount(pChars, s.Length, null);
 132         }
 133
 134         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 135         // So if you fix this, fix the others.  Currently those include:
 136         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 137
 138         [CLSCompliant(false)]
 139         public override unsafe int GetByteCount(char* chars, int count)
 140         {
 141             // Validate Parameters
 142             if (chars == null)
 143                 throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
 144
 145             if (count < 0)
 146                 throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
 147             Contract.EndContractBlock();
 148
 149             // Call it with empty encoder
 150             return GetByteCount(chars, count, null);
 151         }
 152
 153         // Parent method is safe.
 154         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 155         // So if you fix this, fix the others.  Currently those include:
 156         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 157
 158         public override unsafe int GetBytes(String s, int charIndex, int charCount,
 159                                               byte[] bytes, int byteIndex)
 160         {
 161             if (s == null || bytes == null)
 162                 throw new ArgumentNullException((s == null ? "s" : "bytes"), SR.ArgumentNull_Array);
 163
 164             if (charIndex < 0 || charCount < 0)
 165                 throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
 166
 167             if (s.Length - charIndex < charCount)
 168                 throw new ArgumentOutOfRangeException("s", SR.ArgumentOutOfRange_IndexCount);
 169
 170             if (byteIndex < 0 || byteIndex > bytes.Length)
 171                 throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
 172             Contract.EndContractBlock();
 173
 174             int byteCount = bytes.Length - byteIndex;
 175
 176             // Fixed doesn't like 0 length arrays.
 177             if (bytes.Length == 0)
 178                 bytes = new byte[1];
 179
 180             fixed (char* pChars = s) fixed (byte* pBytes = &bytes[0])
 181                 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
 182         }
 183
 184         // Encodes a range of characters in a character array into a range of bytes
 185         // in a byte array. An exception occurs if the byte array is not large
 186         // enough to hold the complete encoding of the characters. The
 187         // GetByteCount method can be used to determine the exact number of
 188         // bytes that will be produced for a given range of characters.
 189         // Alternatively, the GetMaxByteCount method can be used to
 190         // determine the maximum number of bytes that will be produced for a given
 191         // number of characters, regardless of the actual character values.
 192         //
 193         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 194         // So if you fix this, fix the others.  Currently those include:
 195         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 196         // parent method is safe
 197
 198         public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
 199                                                byte[] bytes, int byteIndex)
 200         {
 201             // Validate parameters
 202             if (chars == null || bytes == null)
 203                 throw new ArgumentNullException((chars == null ? "chars" : "bytes"), SR.ArgumentNull_Array);
 204
 205             if (charIndex < 0 || charCount < 0)
 206                 throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
 207
 208             if (chars.Length - charIndex < charCount)
 209                 throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
 210
 211             if (byteIndex < 0 || byteIndex > bytes.Length)
 212                 throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
 213             Contract.EndContractBlock();
 214
 215             // If nothing to encode return 0, avoid fixed problem
 216             if (charCount == 0)
 217                 return 0;
 218
 219             // Just call pointer version
 220             int byteCount = bytes.Length - byteIndex;
 221
 222             // Fixed doesn't like 0 length arrays.
 223             if (bytes.Length == 0)
 224                 bytes = new byte[1];
 225
 226             fixed (char* pChars = chars) fixed (byte* pBytes = &bytes[0])
 227                 // Remember that byteCount is # to decode, not size of array.
 228                 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
 229         }
 230
 231         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 232         // So if you fix this, fix the others.  Currently those include:
 233         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 234
 235         [CLSCompliant(false)]
 236         public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
 237         {
 238             // Validate Parameters
 239             if (bytes == null || chars == null)
 240                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
 241
 242             if (charCount < 0 || byteCount < 0)
 243                 throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
 244             Contract.EndContractBlock();
 245
 246             return GetBytes(chars, charCount, bytes, byteCount, null);
 247         }
 248
 249         // Returns the number of characters produced by decoding a range of bytes
 250         // in a byte array.
 251         //
 252         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 253         // So if you fix this, fix the others.  Currently those include:
 254         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 255         // parent method is safe
 256
 257         public override unsafe int GetCharCount(byte[] bytes, int index, int count)
 258         {
 259             // Validate Parameters
 260             if (bytes == null)
 261                 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
 262
 263             if (index < 0 || count < 0)
 264                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
 265
 266             if (bytes.Length - index < count)
 267                 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
 268             Contract.EndContractBlock();
 269
 270             // If no input just return 0, fixed doesn't like 0 length arrays
 271             if (count == 0)
 272                 return 0;
 273
 274             // Just call pointer version
 275             fixed (byte* pBytes = bytes)
 276                 return GetCharCount(pBytes + index, count, null);
 277         }
 278
 279         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 280         // So if you fix this, fix the others.  Currently those include:
 281         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 282
 283         [CLSCompliant(false)]
 284         public override unsafe int GetCharCount(byte* bytes, int count)
 285         {
 286             // Validate Parameters
 287             if (bytes == null)
 288                 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
 289
 290             if (count < 0)
 291                 throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
 292             Contract.EndContractBlock();
 293
 294             return GetCharCount(bytes, count, null);
 295         }
 296
 297         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 298         // So if you fix this, fix the others.  Currently those include:
 299         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 300         // parent method is safe
 301
 302         public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
 303                                               char[] chars, int charIndex)
 304         {
 305             // Validate Parameters
 306             if (bytes == null || chars == null)
 307                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
 308
 309             if (byteIndex < 0 || byteCount < 0)
 310                 throw new ArgumentOutOfRangeException((byteIndex < 0 ? "byteIndex" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
 311
 312             if ( bytes.Length - byteIndex < byteCount)
 313                 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
 314
 315             if (charIndex < 0 || charIndex > chars.Length)
 316                 throw new ArgumentOutOfRangeException("charIndex", SR.ArgumentOutOfRange_Index);
 317             Contract.EndContractBlock();
 318
 319             // If no input, return 0 & avoid fixed problem
 320             if (byteCount == 0)
 321                 return 0;
 322
 323             // Just call pointer version
 324             int charCount = chars.Length - charIndex;
 325
 326             // Fixed doesn't like 0 length arrays.
 327             if (chars.Length == 0)
 328                 chars = new char[1];
 329
 330             fixed (byte* pBytes = bytes) fixed (char* pChars = &chars[0])
 331                 // Remember that charCount is # to decode, not size of array
 332                 return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
 333         }
 334
 335         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 336         // So if you fix this, fix the others.  Currently those include:
 337         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 338
 339         [CLSCompliant(false)]
 340         public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
 341         {
 342             // Validate Parameters
 343             if (bytes == null || chars == null)
 344                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
 345
 346             if (charCount < 0 || byteCount < 0)
 347                 throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
 348             Contract.EndContractBlock();
 349
 350             return GetChars(bytes, byteCount, chars, charCount, null);
 351         }
 352
 353         // Returns a string containing the decoded representation of a range of
 354         // bytes in a byte array.
 355         //
 356         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 357         // So if you fix this, fix the others.  Currently those include:
 358         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 359         // parent method is safe
 360
 361         public override unsafe string GetString(byte[] bytes, int index, int count)
 362         {
 363             // Validate Parameters
 364             if (bytes == null)
 365                 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
 366
 367             if (index < 0 || count < 0)
 368                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
 369
 370             if (bytes.Length - index < count)
 371                 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
 372             Contract.EndContractBlock();
 373
 374             // Avoid problems with empty input buffer
 375             if (count == 0) return String.Empty;
 376
 377             fixed (byte* pBytes = bytes)
 378                 return String.CreateStringFromEncoding(
 379                     pBytes + index, count, this);
 380         }
 381
 382         //
 383         // End of standard methods copied from EncodingNLS.cs
 384         //
 385
 386         internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
 387         {
 388             Debug.Assert(chars != null, "[UnicodeEncoding.GetByteCount]chars!=null");
 389             Debug.Assert(count >= 0, "[UnicodeEncoding.GetByteCount]count >=0");
 390
 391             // Start by assuming each char gets 2 bytes
 392             int byteCount = count << 1;
 393
 394             // Check for overflow in byteCount
 395             // (If they were all invalid chars, this would actually be wrong,
 396             // but that's a ridiculously large # so we're not concerned about that case)
 397             if (byteCount < 0)
 398                 throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_GetByteCountOverflow);
 399
 400             char* charStart = chars;
 401             char* charEnd = chars + count;
 402             char charLeftOver = (char)0;
 403
 404             bool wasHereBefore = false;
 405
 406             // Need -1 to check 2 at a time.  If we have an even #, longChars will go
 407             // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longChars
 408             // will go from longEnd - 1 long to longEnd. (Might not get to use this)
 409             ulong* longEnd = (ulong*)(charEnd - 3);
 410
 411             // For fallback we may need a fallback buffer
 412             EncoderFallbackBuffer fallbackBuffer = null;
 413             char* charsForFallback;
 414
 415             if (encoder != null)
 416             {
 417                 charLeftOver = encoder.charLeftOver;
 418
 419                 // Assume extra bytes to encode charLeftOver if it existed
 420                 if (charLeftOver > 0)
 421                     byteCount += 2;
 422
 423                 // We mustn't have left over fallback data when counting
 424                 if (encoder.InternalHasFallbackBuffer)
 425                 {
 426                     fallbackBuffer = encoder.FallbackBuffer;
 427                     if (fallbackBuffer.Remaining > 0)
 428                         throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
 429
 430                     // Set our internal fallback interesting things.
 431                     fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
 432                 }
 433             }
 434
 435             char ch;
 436         TryAgain:
 437
 438             while (((ch = (fallbackBuffer == null) ? (char)0 : fallbackBuffer.InternalGetNextChar()) != 0) || chars < charEnd)
 439             {
 440                 // First unwind any fallback
 441                 if (ch == 0)
 442                 {
 443                     // No fallback, maybe we can do it fast
 444 #if !NO_FAST_UNICODE_LOOP
 445 #if BIGENDIAN       // If endianess is backwards then each pair of bytes would be backwards.
 446                     if ( bigEndian &&
 447 #else
 448                     if (!bigEndian &&
 449 #endif // BIGENDIAN
 450
 451 #if BIT64           // 64 bit CPU needs to be long aligned for this to work.
 452                           charLeftOver == 0 && (unchecked((long)chars) & 7) == 0)
 453 #else
 454                           charLeftOver == 0 && (unchecked((int)chars) & 3) == 0)
 455 #endif
 456                     {
 457                         // Need new char* so we can check 4 at a time
 458                         ulong* longChars = (ulong*)chars;
 459
 460                         while (longChars < longEnd)
 461                         {
 462                             // See if we potentially have surrogates (0x8000 bit set)
 463                             // (We're either big endian on a big endian machine or little endian on
 464                             // a little endian machine so this'll work)
 465                             if ((0x8000800080008000 & *longChars) != 0)
 466                             {
 467                                 // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
 468                                 // 5 bits looks like 11011, then its a high or low surrogate.
 469                                 // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
 470                                 // Note that we expect BMP characters to be more common than surrogates
 471                                 // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
 472                                 ulong uTemp = (0xf800f800f800f800 & *longChars) ^ 0xd800d800d800d800;
 473
 474                                 // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
 475                                 // but no clue if they're high or low.
 476                                 // If each of the 4 characters are non-zero, then none are surrogates.
 477                                 if ((uTemp & 0xFFFF000000000000) == 0 ||
 478                                     (uTemp & 0x0000FFFF00000000) == 0 ||
 479                                     (uTemp & 0x00000000FFFF0000) == 0 ||
 480                                     (uTemp & 0x000000000000FFFF) == 0)
 481                                 {
 482                                     // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
 483                                     // or if there's 1 or 4 surrogates
 484
 485                                     // If they happen to be high/low/high/low, we may as well continue.  Check the next
 486                                     // bit to see if its set (low) or not (high) in the right pattern
 487 #if BIGENDIAN
 488                                     if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xd800dc00d800dc00) != 0)
 489 #else
 490                                     if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xdc00d800dc00d800) != 0)
 491 #endif
 492                                     {
 493                                         // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
 494                                         // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
 495
 496                                         // Drop out to the slow loop to resolve the surrogates
 497                                         break;
 498                                     }
 499                                     // else they are all surrogates in High/Low/High/Low order, so we can use them.
 500                                 }
 501                                 // else none are surrogates, so we can use them.
 502                             }
 503                             // else all < 0x8000 so we can use them
 504
 505                             // We already counted these four chars, go to next long.
 506                             longChars++;
 507                         }
 508
 509                         chars = (char*)longChars;
 510
 511                         if (chars >= charEnd)
 512                             break;
 513                     }
 514 #endif // !NO_FAST_UNICODE_LOOP
 515
 516                     // No fallback, just get next char
 517                     ch = *chars;
 518                     chars++;
 519                 }
 520                 else
 521                 {
 522                     // We weren't preallocating fallback space.
 523                     byteCount += 2;
 524                 }
 525
 526                 // Check for high or low surrogates
 527                 if (ch >= 0xd800 && ch <= 0xdfff)
 528                 {
 529                     // Was it a high surrogate?
 530                     if (ch <= 0xdbff)
 531                     {
 532                         // Its a high surrogate, if we already had a high surrogate do its fallback
 533                         if (charLeftOver > 0)
 534                         {
 535                             // Unwind the current character, this should be safe because we
 536                             // don't have leftover data in the fallback, so chars must have
 537                             // advanced already.
 538                             Debug.Assert(chars > charStart,
 539                                 "[UnicodeEncoding.GetByteCount]Expected chars to have advanced in unexpected high surrogate");
 540                             chars--;
 541
 542                             // If previous high surrogate deallocate 2 bytes
 543                             byteCount -= 2;
 544
 545                             // Fallback the previous surrogate
 546                             // Need to initialize fallback buffer?
 547                             if (fallbackBuffer == null)
 548                             {
 549                                 if (encoder == null)
 550                                     fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
 551                                 else
 552                                     fallbackBuffer = encoder.FallbackBuffer;
 553
 554                                 // Set our internal fallback interesting things.
 555                                 fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
 556                             }
 557
 558                             charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
 559                             fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
 560                             chars = charsForFallback;
 561
 562                             // Now no high surrogate left over
 563                             charLeftOver = (char)0;
 564                             continue;
 565                         }
 566
 567                         // Remember this high surrogate
 568                         charLeftOver = ch;
 569                         continue;
 570                     }
 571
 572
 573                     // Its a low surrogate
 574                     if (charLeftOver == 0)
 575                     {
 576                         // Expected a previous high surrogate.
 577                         // Don't count this one (we'll count its fallback if necessary)
 578                         byteCount -= 2;
 579
 580                         // fallback this one
 581                         // Need to initialize fallback buffer?
 582                         if (fallbackBuffer == null)
 583                         {
 584                             if (encoder == null)
 585                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
 586                             else
 587                                 fallbackBuffer = encoder.FallbackBuffer;
 588
 589                             // Set our internal fallback interesting things.
 590                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
 591                         }
 592                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
 593                         fallbackBuffer.InternalFallback(ch, ref charsForFallback);
 594                         chars = charsForFallback;
 595                         continue;
 596                     }
 597
 598                     // Valid surrogate pair, add our charLeftOver
 599                     charLeftOver = (char)0;
 600                     continue;
 601                 }
 602                 else if (charLeftOver > 0)
 603                 {
 604                     // Expected a low surrogate, but this char is normal
 605
 606                     // Rewind the current character, fallback previous character.
 607                     // this should be safe because we don't have leftover data in the
 608                     // fallback, so chars must have advanced already.
 609                     Debug.Assert(chars > charStart,
 610                         "[UnicodeEncoding.GetByteCount]Expected chars to have advanced when expected low surrogate");
 611                     chars--;
 612
 613                     // fallback previous chars
 614                     // Need to initialize fallback buffer?
 615                     if (fallbackBuffer == null)
 616                     {
 617                         if (encoder == null)
 618                             fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
 619                         else
 620                             fallbackBuffer = encoder.FallbackBuffer;
 621
 622                         // Set our internal fallback interesting things.
 623                         fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
 624                     }
 625                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
 626                     fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
 627                     chars = charsForFallback;
 628
 629                     // Ignore charLeftOver or throw
 630                     byteCount -= 2;
 631                     charLeftOver = (char)0;
 632
 633                     continue;
 634                 }
 635
 636                 // Ok we had something to add (already counted)
 637             }
 638
 639             // Don't allocate space for left over char
 640             if (charLeftOver > 0)
 641             {
 642                 byteCount -= 2;
 643
 644                 // If we have to flush, stick it in fallback and try again
 645                 if (encoder == null || encoder.MustFlush)
 646                 {
 647                     if (wasHereBefore)
 648                     {
 649                         // Throw it, using our complete character
 650                         throw new ArgumentException(
 651                             SR.Format(SR.Argument_RecursiveFallback, charLeftOver), nameof(chars));
 652                     }
 653                     else
 654                     {
 655                         // Need to initialize fallback buffer?
 656                         if (fallbackBuffer == null)
 657                         {
 658                             if (encoder == null)
 659                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
 660                             else
 661                                 fallbackBuffer = encoder.FallbackBuffer;
 662
 663                             // Set our internal fallback interesting things.
 664                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
 665                         }
 666                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
 667                         fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
 668                         chars = charsForFallback;
 669                         charLeftOver = (char)0;
 670                         wasHereBefore = true;
 671                         goto TryAgain;
 672                     }
 673                 }
 674             }
 675
 676             // Shouldn't have anything in fallback buffer for GetByteCount
 677             // (don't have to check m_throwOnOverflow for count)
 678             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
 679                 "[UnicodeEncoding.GetByteCount]Expected empty fallback buffer at end");
 680
 681             // Don't remember fallbackBuffer.encoder for counting
 682             return byteCount;
 683         }
 684
 685         internal override unsafe int GetBytes(char* chars, int charCount,
 686                                                 byte* bytes, int byteCount, EncoderNLS encoder)
 687         {
 688             Debug.Assert(chars != null, "[UnicodeEncoding.GetBytes]chars!=null");
 689             Debug.Assert(byteCount >= 0, "[UnicodeEncoding.GetBytes]byteCount >=0");
 690             Debug.Assert(charCount >= 0, "[UnicodeEncoding.GetBytes]charCount >=0");
 691             Debug.Assert(bytes != null, "[UnicodeEncoding.GetBytes]bytes!=null");
 692
 693             char charLeftOver = (char)0;
 694             char ch;
 695             bool wasHereBefore = false;
 696
 697
 698             byte* byteEnd = bytes + byteCount;
 699             char* charEnd = chars + charCount;
 700             byte* byteStart = bytes;
 701             char* charStart = chars;
 702
 703             // For fallback we may need a fallback buffer
 704             EncoderFallbackBuffer fallbackBuffer = null;
 705             char* charsForFallback;
 706
 707             // Get our encoder, but don't clear it yet.
 708             if (encoder != null)
 709             {
 710                 charLeftOver = encoder.charLeftOver;
 711
 712                 // We mustn't have left over fallback data when counting
 713                 if (encoder.InternalHasFallbackBuffer)
 714                 {
 715                     // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary
 716                     fallbackBuffer = encoder.FallbackBuffer;
 717                     if (fallbackBuffer.Remaining > 0 && encoder.m_throwOnOverflow)
 718                         throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
 719
 720                     // Set our internal fallback interesting things.
 721                     fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
 722                 }
 723             }
 724
 725         TryAgain:
 726             while (((ch = (fallbackBuffer == null) ?
 727                         (char)0 : fallbackBuffer.InternalGetNextChar()) != 0) ||
 728                     chars < charEnd)
 729             {
 730                 // First unwind any fallback
 731                 if (ch == 0)
 732                 {
 733                     // No fallback, maybe we can do it fast
 734 #if !NO_FAST_UNICODE_LOOP
 735 #if BIGENDIAN           // If endianess is backwards then each pair of bytes would be backwards.
 736                     if ( bigEndian &&
 737 #else
 738                     if (!bigEndian &&
 739 #endif // BIGENDIAN
 740 #if BIT64           // 64 bit CPU needs to be long aligned for this to work, 32 bit CPU needs to be 32 bit aligned
 741                         (unchecked((long)chars) & 7) == 0 && (unchecked((long)bytes) & 7) == 0 &&
 742 #else
 743                         (unchecked((int)chars) & 3) == 0 && (unchecked((int)bytes) & 3) == 0 &&
 744 #endif // BIT64
 745                         charLeftOver == 0)
 746                     {
 747                         // Need -1 to check 2 at a time.  If we have an even #, longChars will go
 748                         // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longChars
 749                         // will go from longEnd - 1 long to longEnd. (Might not get to use this)
 750                         // We can only go iCount units (limited by shorter of char or byte buffers.
 751                         ulong* longEnd = (ulong*)(chars - 3 +
 752                                                   (((byteEnd - bytes) >> 1 < charEnd - chars) ?
 753                                                     (byteEnd - bytes) >> 1 : charEnd - chars));
 754
 755                         // Need new char* so we can check 4 at a time
 756                         ulong* longChars = (ulong*)chars;
 757                         ulong* longBytes = (ulong*)bytes;
 758
 759                         while (longChars < longEnd)
 760                         {
 761                             // See if we potentially have surrogates (0x8000 bit set)
 762                             // (We're either big endian on a big endian machine or little endian on
 763                             // a little endian machine so this'll work)
 764                             if ((0x8000800080008000 & *longChars) != 0)
 765                             {
 766                                 // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
 767                                 // 5 bits looks like 11011, then its a high or low surrogate.
 768                                 // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
 769                                 // Note that we expect BMP characters to be more common than surrogates
 770                                 // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
 771                                 ulong uTemp = (0xf800f800f800f800 & *longChars) ^ 0xd800d800d800d800;
 772
 773                                 // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
 774                                 // but no clue if they're high or low.
 775                                 // If each of the 4 characters are non-zero, then none are surrogates.
 776                                 if ((uTemp & 0xFFFF000000000000) == 0 ||
 777                                     (uTemp & 0x0000FFFF00000000) == 0 ||
 778                                     (uTemp & 0x00000000FFFF0000) == 0 ||
 779                                     (uTemp & 0x000000000000FFFF) == 0)
 780                                 {
 781                                     // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
 782                                     // or if there's 1 or 4 surrogates
 783
 784                                     // If they happen to be high/low/high/low, we may as well continue.  Check the next
 785                                     // bit to see if its set (low) or not (high) in the right pattern
 786 #if BIGENDIAN
 787                                     if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xd800dc00d800dc00) != 0)
 788 #else
 789                                     if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xdc00d800dc00d800) != 0)
 790 #endif
 791                                     {
 792                                         // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
 793                                         // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
 794
 795                                         // Drop out to the slow loop to resolve the surrogates
 796                                         break;
 797                                     }
 798                                     // else they are all surrogates in High/Low/High/Low order, so we can use them.
 799                                 }
 800                                 // else none are surrogates, so we can use them.
 801                             }
 802                             // else all < 0x8000 so we can use them
 803
 804                             // We can use these 4 chars.
 805                             *longBytes = *longChars;
 806                             longChars++;
 807                             longBytes++;
 808                         }
 809
 810                         chars = (char*)longChars;
 811                         bytes = (byte*)longBytes;
 812
 813                         if (chars >= charEnd)
 814                             break;
 815                     }
 816                     // Not aligned, but maybe we can still be somewhat faster
 817                     // Also somehow this optimizes the above loop?  It seems to cause something above
 818                     // to get enregistered, but I haven't figured out how to make that happen without this loop.
 819                     else if ((charLeftOver == 0) &&
 820 #if BIGENDIAN
 821                         bigEndian &&
 822 #else
 823                         !bigEndian &&
 824 #endif // BIGENDIAN
 825
 826 #if BIT64
 827                         (unchecked((long)chars) & 7) != (unchecked((long)bytes) & 7) &&  // Only do this if chars & bytes are out of line, otherwise faster loop'll be faster next time
 828 #else
 829                         (unchecked((int)chars) & 3) != (unchecked((int)bytes) & 3) &&  // Only do this if chars & bytes are out of line, otherwise faster loop'll be faster next time
 830 #endif // BIT64
 831                         (unchecked((int)(bytes)) & 1) == 0)
 832                     {
 833                         // # to use
 834                         long iCount = ((byteEnd - bytes) >> 1 < charEnd - chars) ?
 835                                        (byteEnd - bytes) >> 1 : charEnd - chars;
 836
 837                         // Need new char*
 838                         char* charOut = ((char*)bytes);     // a char* for our output
 839                         char* tempEnd = chars + iCount - 1; // Our end pointer
 840
 841                         while (chars < tempEnd)
 842                         {
 843                             if (*chars >= (char)0xd800 && *chars <= (char)0xdfff)
 844                             {
 845                                 // break for fallback for low surrogate
 846                                 if (*chars >= 0xdc00)
 847                                     break;
 848
 849                                 // break if next one's not a low surrogate (will do fallback)
 850                                 if (*(chars + 1) < 0xdc00 || *(chars + 1) > 0xdfff)
 851                                     break;
 852
 853                                 // They both exist, use them
 854                             }
 855                             // If 2nd char is surrogate & this one isn't then only add one
 856                             else if (*(chars + 1) >= (char)0xd800 && *(chars + 1) <= 0xdfff)
 857                             {
 858                                 *charOut = *chars;
 859                                 charOut++;
 860                                 chars++;
 861                                 continue;
 862                             }
 863
 864                             *charOut = *chars;
 865                             *(charOut + 1) = *(chars + 1);
 866                             charOut += 2;
 867                             chars += 2;
 868                         }
 869
 870                         bytes = (byte*)charOut;
 871
 872                         if (chars >= charEnd)
 873                             break;
 874                     }
 875 #endif // !NO_FAST_UNICODE_LOOP
 876
 877                     // No fallback, just get next char
 878                     ch = *chars;
 879                     chars++;
 880                 }
 881
 882                 // Check for high or low surrogates
 883                 if (ch >= 0xd800 && ch <= 0xdfff)
 884                 {
 885                     // Was it a high surrogate?
 886                     if (ch <= 0xdbff)
 887                     {
 888                         // Its a high surrogate, see if we already had a high surrogate
 889                         if (charLeftOver > 0)
 890                         {
 891                             // Unwind the current character, this should be safe because we
 892                             // don't have leftover data in the fallback, so chars must have
 893                             // advanced already.
 894                             Debug.Assert(chars > charStart,
 895                                 "[UnicodeEncoding.GetBytes]Expected chars to have advanced in unexpected high surrogate");
 896                             chars--;
 897
 898                             // Fallback the previous surrogate
 899                             // Might need to create our fallback buffer
 900                             if (fallbackBuffer == null)
 901                             {
 902                                 if (encoder == null)
 903                                     fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
 904                                 else
 905                                     fallbackBuffer = encoder.FallbackBuffer;
 906
 907                                 // Set our internal fallback interesting things.
 908                                 fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
 909                             }
 910
 911                             charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
 912                             fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
 913                             chars = charsForFallback;
 914
 915                             charLeftOver = (char)0;
 916                             continue;
 917                         }
 918
 919                         // Remember this high surrogate
 920                         charLeftOver = ch;
 921                         continue;
 922                     }
 923
 924                     // Its a low surrogate
 925                     if (charLeftOver == 0)
 926                     {
 927                         // We'll fall back this one
 928                         // Might need to create our fallback buffer
 929                         if (fallbackBuffer == null)
 930                         {
 931                             if (encoder == null)
 932                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
 933                             else
 934                                 fallbackBuffer = encoder.FallbackBuffer;
 935
 936                             // Set our internal fallback interesting things.
 937                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
 938                         }
 939
 940                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
 941                         fallbackBuffer.InternalFallback(ch, ref charsForFallback);
 942                         chars = charsForFallback;
 943                         continue;
 944                     }
 945
 946                     // Valid surrogate pair, add our charLeftOver
 947                     if (bytes + 3 >= byteEnd)
 948                     {
 949                         // Not enough room to add this surrogate pair
 950                         if (fallbackBuffer != null && fallbackBuffer.bFallingBack)
 951                         {
 952                             // These must have both been from the fallbacks.
 953                             // Both of these MUST have been from a fallback because if the 1st wasn't
 954                             // from a fallback, then a high surrogate followed by an illegal char
 955                             // would've caused the high surrogate to fall back.  If a high surrogate
 956                             // fell back, then it was consumed and both chars came from the fallback.
 957                             fallbackBuffer.MovePrevious();                     // Didn't use either fallback surrogate
 958                             fallbackBuffer.MovePrevious();
 959                         }
 960                         else
 961                         {
 962                             // If we don't have enough room, then either we should've advanced a while
 963                             // or we should have bytes==byteStart and throw below
 964                             Debug.Assert(chars > charStart + 1 || bytes == byteStart,
 965                                 "[UnicodeEncoding.GetBytes]Expected chars to have when no room to add surrogate pair");
 966                             chars -= 2;                                        // Didn't use either surrogate
 967                         }
 968                         ThrowBytesOverflow(encoder, bytes == byteStart);    // Throw maybe (if no bytes written)
 969                         charLeftOver = (char)0;                             // we'll retry it later
 970                         break;                                               // Didn't throw, but stop 'til next time.
 971                     }
 972
 973                     if (bigEndian)
 974                     {
 975                         *(bytes++) = (byte)(charLeftOver >> 8);
 976                         *(bytes++) = (byte)charLeftOver;
 977                     }
 978                     else
 979                     {
 980                         *(bytes++) = (byte)charLeftOver;
 981                         *(bytes++) = (byte)(charLeftOver >> 8);
 982                     }
 983
 984                     charLeftOver = (char)0;
 985                 }
 986                 else if (charLeftOver > 0)
 987                 {
 988                     // Expected a low surrogate, but this char is normal
 989
 990                     // Rewind the current character, fallback previous character.
 991                     // this should be safe because we don't have leftover data in the
 992                     // fallback, so chars must have advanced already.
 993                     Debug.Assert(chars > charStart,
 994                         "[UnicodeEncoding.GetBytes]Expected chars to have advanced after expecting low surrogate");
 995                     chars--;
 996
 997                     // fallback previous chars
 998                     // Might need to create our fallback buffer
 999                     if (fallbackBuffer == null)
1000                     {
1001                         if (encoder == null)
1002                             fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
1003                         else
1004                             fallbackBuffer = encoder.FallbackBuffer;
1005
1006                         // Set our internal fallback interesting things.
1007                         fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
1008                     }
1009
1010                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
1011                     fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
1012                     chars = charsForFallback;
1013
1014                     // Ignore charLeftOver or throw
1015                     charLeftOver = (char)0;
1016                     continue;
1017                 }
1018
1019                 // Ok, we have a char to add
1020                 if (bytes + 1 >= byteEnd)
1021                 {
1022                     // Couldn't add this char
1023                     if (fallbackBuffer != null && fallbackBuffer.bFallingBack)
1024                         fallbackBuffer.MovePrevious();                     // Not using this fallback char
1025                     else
1026                     {
1027                         // Lonely charLeftOver (from previous call) would've been caught up above,
1028                         // so this must be a case where we've already read an input char.
1029                         Debug.Assert(chars > charStart,
1030                             "[UnicodeEncoding.GetBytes]Expected chars to have advanced for failed fallback");
1031                         chars--;                                         // Not using this char
1032                     }
1033                     ThrowBytesOverflow(encoder, bytes == byteStart);    // Throw maybe (if no bytes written)
1034                     break;                                               // didn't throw, just stop
1035                 }
1036
1037                 if (bigEndian)
1038                 {
1039                     *(bytes++) = (byte)(ch >> 8);
1040                     *(bytes++) = (byte)ch;
1041                 }
1042                 else
1043                 {
1044                     *(bytes++) = (byte)ch;
1045                     *(bytes++) = (byte)(ch >> 8);
1046                 }
1047             }
1048
1049             // Don't allocate space for left over char
1050             if (charLeftOver > 0)
1051             {
1052                 // If we aren't flushing we need to fall this back
1053                 if (encoder == null || encoder.MustFlush)
1054                 {
1055                     if (wasHereBefore)
1056                     {
1057                         // Throw it, using our complete character
1058                         throw new ArgumentException(
1059                             SR.Format(SR.Argument_RecursiveFallback, charLeftOver), nameof(chars));
1060                     }
1061                     else
1062                     {
1063                         // If we have to flush, stick it in fallback and try again
1064                         // Might need to create our fallback buffer
1065                         if (fallbackBuffer == null)
1066                         {
1067                             if (encoder == null)
1068                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
1069                             else
1070                                 fallbackBuffer = encoder.FallbackBuffer;
1071
1072                             // Set our internal fallback interesting things.
1073                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
1074                         }
1075
1076                         // If we're not flushing, this'll remember the left over character.
1077                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
1078                         fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
1079                         chars = charsForFallback;
1080
1081                         charLeftOver = (char)0;
1082                         wasHereBefore = true;
1083                         goto TryAgain;
1084                     }
1085                 }
1086             }
1087
1088             // Not flushing, remember it in the encoder
1089             if (encoder != null)
1090             {
1091                 encoder.charLeftOver = charLeftOver;
1092                 encoder.m_charsUsed = (int)(chars - charStart);
1093             }
1094
1095             // Remember charLeftOver if we must, or clear it if we're flushing
1096             // (charLeftOver should be 0 if we're flushing)
1097             Debug.Assert((encoder != null && !encoder.MustFlush) || charLeftOver == (char)0,
1098                 "[UnicodeEncoding.GetBytes] Expected no left over characters if flushing");
1099
1100             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0 ||
1101                 encoder == null || !encoder.m_throwOnOverflow,
1102                 "[UnicodeEncoding.GetBytes]Expected empty fallback buffer if not converting");
1103
1104             // We used to copy it fast, but this doesn't check for surrogates
1105             // System.IO.__UnmanagedMemoryStream.memcpyimpl(bytes, (byte*)chars, usedByteCount);
1106
1107             return (int)(bytes - byteStart);
1108         }
1109
1110         internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
1111         {
1112             Debug.Assert(bytes != null, "[UnicodeEncoding.GetCharCount]bytes!=null");
1113             Debug.Assert(count >= 0, "[UnicodeEncoding.GetCharCount]count >=0");
1114
1115             UnicodeEncoding.Decoder decoder = (UnicodeEncoding.Decoder)baseDecoder;
1116
1117             byte* byteEnd = bytes + count;
1118             byte* byteStart = bytes;
1119
1120             // Need last vars
1121             int lastByte = -1;
1122             char lastChar = (char)0;
1123
1124             // Start by assuming same # of chars as bytes
1125             int charCount = count >> 1;
1126
1127             // Need -1 to check 2 at a time.  If we have an even #, longBytes will go
1128             // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longBytes
1129             // will go from longEnd - 1 long to longEnd. (Might not get to use this)
1130             ulong* longEnd = (ulong*)(byteEnd - 7);
1131
1132             // For fallback we may need a fallback buffer
1133             DecoderFallbackBuffer fallbackBuffer = null;
1134
1135             if (decoder != null)
1136             {
1137                 lastByte = decoder.lastByte;
1138                 lastChar = decoder.lastChar;
1139
1140                 // Assume extra char if last char was around
1141                 if (lastChar > 0)
1142                     charCount++;
1143
1144                 // Assume extra char if extra last byte makes up odd # of input bytes
1145                 if (lastByte >= 0 && (count & 1) == 1)
1146                 {
1147                     charCount++;
1148                 }
1149
1150                 // Shouldn't have anything in fallback buffer for GetCharCount
1151                 // (don't have to check m_throwOnOverflow for count)
1152                 Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
1153                     "[UnicodeEncoding.GetCharCount]Expected empty fallback buffer at start");
1154             }
1155
1156             while (bytes < byteEnd)
1157             {
1158                 // If we're aligned then maybe we can do it fast
1159                 // This'll hurt if we're unaligned because we'll always test but never be aligned
1160 #if !NO_FAST_UNICODE_LOOP
1161 #if BIGENDIAN
1162                 if (bigEndian &&
1163 #else // BIGENDIAN
1164                 if (!bigEndian &&
1165 #endif // BIGENDIAN
1166 #if BIT64 // win64 has to be long aligned
1167                     (unchecked((long)bytes) & 7) == 0 &&
1168 #else
1169                     (unchecked((int)bytes) & 3) == 0 &&
1170 #endif // BIT64
1171                     lastByte == -1 && lastChar == 0)
1172                 {
1173                     // Need new char* so we can check 4 at a time
1174                     ulong* longBytes = (ulong*)bytes;
1175
1176                     while (longBytes < longEnd)
1177                     {
1178                         // See if we potentially have surrogates (0x8000 bit set)
1179                         // (We're either big endian on a big endian machine or little endian on
1180                         // a little endian machine so this'll work)
1181                         if ((0x8000800080008000 & *longBytes) != 0)
1182                         {
1183                             // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
1184                             // 5 bits looks like 11011, then its a high or low surrogate.
1185                             // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
1186                             // Note that we expect BMP characters to be more common than surrogates
1187                             // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
1188                             ulong uTemp = (0xf800f800f800f800 & *longBytes) ^ 0xd800d800d800d800;
1189
1190                             // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
1191                             // but no clue if they're high or low.
1192                             // If each of the 4 characters are non-zero, then none are surrogates.
1193                             if ((uTemp & 0xFFFF000000000000) == 0 ||
1194                                 (uTemp & 0x0000FFFF00000000) == 0 ||
1195                                 (uTemp & 0x00000000FFFF0000) == 0 ||
1196                                 (uTemp & 0x000000000000FFFF) == 0)
1197                             {
1198                                 // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
1199                                 // or if there's 1 or 4 surrogates
1200
1201                                 // If they happen to be high/low/high/low, we may as well continue.  Check the next
1202                                 // bit to see if its set (low) or not (high) in the right pattern
1203 #if BIGENDIAN
1204                                 if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xd800dc00d800dc00) != 0)
1205 #else
1206                                 if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xdc00d800dc00d800) != 0)
1207 #endif
1208                                 {
1209                                     // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
1210                                     // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
1211
1212                                     // Drop out to the slow loop to resolve the surrogates
1213                                     break;
1214                                 }
1215                                 // else they are all surrogates in High/Low/High/Low order, so we can use them.
1216                             }
1217                             // else none are surrogates, so we can use them.
1218                         }
1219                         // else all < 0x8000 so we can use them
1220
1221                         // We can use these 4 chars.
1222                         longBytes++;
1223                     }
1224
1225                     bytes = (byte*)longBytes;
1226
1227                     if (bytes >= byteEnd)
1228                         break;
1229                 }
1230 #endif // !NO_FAST_UNICODE_LOOP
1231
1232                 // Get 1st byte
1233                 if (lastByte < 0)
1234                 {
1235                     lastByte = *bytes++;
1236                     if (bytes >= byteEnd) break;
1237                 }
1238
1239                 // Get full char
1240                 char ch;
1241                 if (bigEndian)
1242                 {
1243                     ch = (char)(lastByte << 8 | *(bytes++));
1244                 }
1245                 else
1246                 {
1247                     ch = (char)(*(bytes++) << 8 | lastByte);
1248                 }
1249                 lastByte = -1;
1250
1251                 // See if the char's valid
1252                 if (ch >= 0xd800 && ch <= 0xdfff)
1253                 {
1254                     // Was it a high surrogate?
1255                     if (ch <= 0xdbff)
1256                     {
1257                         // Its a high surrogate, if we had one then do fallback for previous one
1258                         if (lastChar > 0)
1259                         {
1260                             // Ignore previous bad high surrogate
1261                             charCount--;
1262
1263                             // Get fallback for previous high surrogate
1264                             // Note we have to reconstruct bytes because some may have been in decoder
1265                             byte[] byteBuffer = null;
1266                             if (bigEndian)
1267                             {
1268                                 byteBuffer = new byte[]
1269                                     { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1270                             }
1271                             else
1272                             {
1273                                 byteBuffer = new byte[]
1274                                     { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1275                             }
1276
1277                             if (fallbackBuffer == null)
1278                             {
1279                                 if (decoder == null)
1280                                     fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1281                                 else
1282                                     fallbackBuffer = decoder.FallbackBuffer;
1283
1284                                 // Set our internal fallback interesting things.
1285                                 fallbackBuffer.InternalInitialize(byteStart, null);
1286                             }
1287
1288                             // Get fallback.
1289                             charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1290                         }
1291
1292                         // Ignore the last one which fell back already,
1293                         // and remember the new high surrogate
1294                         lastChar = ch;
1295                         continue;
1296                     }
1297
1298                     // Its a low surrogate
1299                     if (lastChar == 0)
1300                     {
1301                         // Expected a previous high surrogate
1302                         charCount--;
1303
1304                         // Get fallback for this low surrogate
1305                         // Note we have to reconstruct bytes because some may have been in decoder
1306                         byte[] byteBuffer = null;
1307                         if (bigEndian)
1308                         {
1309                             byteBuffer = new byte[]
1310                                 { unchecked((byte)(ch >> 8)), unchecked((byte)ch) };
1311                         }
1312                         else
1313                         {
1314                             byteBuffer = new byte[]
1315                                 { unchecked((byte)ch), unchecked((byte)(ch >> 8)) };
1316                         }
1317
1318                         if (fallbackBuffer == null)
1319                         {
1320                             if (decoder == null)
1321                                 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1322                             else
1323                                 fallbackBuffer = decoder.FallbackBuffer;
1324
1325                             // Set our internal fallback interesting things.
1326                             fallbackBuffer.InternalInitialize(byteStart, null);
1327                         }
1328
1329                         charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1330
1331                         // Ignore this one (we already did its fallback)
1332                         continue;
1333                     }
1334
1335                     // Valid surrogate pair, already counted.
1336                     lastChar = (char)0;
1337                 }
1338                 else if (lastChar > 0)
1339                 {
1340                     // Had a high surrogate, expected a low surrogate
1341                     // Uncount the last high surrogate
1342                     charCount--;
1343
1344                     // fall back the high surrogate.
1345                     byte[] byteBuffer = null;
1346                     if (bigEndian)
1347                     {
1348                         byteBuffer = new byte[]
1349                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1350                     }
1351                     else
1352                     {
1353                         byteBuffer = new byte[]
1354                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1355                     }
1356
1357                     if (fallbackBuffer == null)
1358                     {
1359                         if (decoder == null)
1360                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1361                         else
1362                             fallbackBuffer = decoder.FallbackBuffer;
1363
1364                         // Set our internal fallback interesting things.
1365                         fallbackBuffer.InternalInitialize(byteStart, null);
1366                     }
1367
1368                     // Already subtracted high surrogate
1369                     charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1370
1371                     // Not left over now, clear previous high surrogate and continue to add current char
1372                     lastChar = (char)0;
1373                 }
1374
1375                 // Valid char, already counted
1376             }
1377
1378             // Extra space if we can't use decoder
1379             if (decoder == null || decoder.MustFlush)
1380             {
1381                 if (lastChar > 0)
1382                 {
1383                     // No hanging high surrogates allowed, do fallback and remove count for it
1384                     charCount--;
1385                     byte[] byteBuffer = null;
1386                     if (bigEndian)
1387                     {
1388                         byteBuffer = new byte[]
1389                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1390                     }
1391                     else
1392                     {
1393                         byteBuffer = new byte[]
1394                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1395                     }
1396
1397                     if (fallbackBuffer == null)
1398                     {
1399                         if (decoder == null)
1400                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1401                         else
1402                             fallbackBuffer = decoder.FallbackBuffer;
1403
1404                         // Set our internal fallback interesting things.
1405                         fallbackBuffer.InternalInitialize(byteStart, null);
1406                     }
1407
1408                     charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1409
1410                     lastChar = (char)0;
1411                 }
1412
1413                 if (lastByte >= 0)
1414                 {
1415                     if (fallbackBuffer == null)
1416                     {
1417                         if (decoder == null)
1418                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1419                         else
1420                             fallbackBuffer = decoder.FallbackBuffer;
1421
1422                         // Set our internal fallback interesting things.
1423                         fallbackBuffer.InternalInitialize(byteStart, null);
1424                     }
1425
1426                     // No hanging odd bytes allowed if must flush
1427                     charCount += fallbackBuffer.InternalFallback(new byte[] { unchecked((byte)lastByte) }, bytes);
1428                     lastByte = -1;
1429                 }
1430             }
1431
1432             // If we had a high surrogate left over, we can't count it
1433             if (lastChar > 0)
1434                 charCount--;
1435
1436             // Shouldn't have anything in fallback buffer for GetCharCount
1437             // (don't have to check m_throwOnOverflow for count)
1438             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
1439                 "[UnicodeEncoding.GetCharCount]Expected empty fallback buffer at end");
1440
1441             return charCount;
1442         }
1443
1444         internal override unsafe int GetChars(byte* bytes, int byteCount,
1445                                                 char* chars, int charCount, DecoderNLS baseDecoder)
1446         {
1447             Debug.Assert(chars != null, "[UnicodeEncoding.GetChars]chars!=null");
1448             Debug.Assert(byteCount >= 0, "[UnicodeEncoding.GetChars]byteCount >=0");
1449             Debug.Assert(charCount >= 0, "[UnicodeEncoding.GetChars]charCount >=0");
1450             Debug.Assert(bytes != null, "[UnicodeEncoding.GetChars]bytes!=null");
1451
1452             UnicodeEncoding.Decoder decoder = (UnicodeEncoding.Decoder)baseDecoder;
1453
1454             // Need last vars
1455             int lastByte = -1;
1456             char lastChar = (char)0;
1457
1458             // Get our decoder (but don't clear it yet)
1459             if (decoder != null)
1460             {
1461                 lastByte = decoder.lastByte;
1462                 lastChar = decoder.lastChar;
1463
1464                 // Shouldn't have anything in fallback buffer for GetChars
1465                 // (don't have to check m_throwOnOverflow for chars)
1466                 Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
1467                     "[UnicodeEncoding.GetChars]Expected empty fallback buffer at start");
1468             }
1469
1470             // For fallback we may need a fallback buffer
1471             DecoderFallbackBuffer fallbackBuffer = null;
1472             char* charsForFallback;
1473
1474             byte* byteEnd = bytes + byteCount;
1475             char* charEnd = chars + charCount;
1476             byte* byteStart = bytes;
1477             char* charStart = chars;
1478
1479             while (bytes < byteEnd)
1480             {
1481                 // If we're aligned then maybe we can do it fast
1482                 // This'll hurt if we're unaligned because we'll always test but never be aligned
1483 #if !NO_FAST_UNICODE_LOOP
1484 #if BIGENDIAN
1485                 if (bigEndian &&
1486 #else // BIGENDIAN
1487                 if (!bigEndian &&
1488 #endif // BIGENDIAN
1489 #if BIT64 // win64 has to be long aligned
1490                     (unchecked((long)chars) & 7) == 0 && (unchecked((long)bytes) & 7) == 0 &&
1491 #else
1492                     (unchecked((int)chars) & 3) == 0 && (unchecked((int)bytes) & 3) == 0 &&
1493 #endif // BIT64
1494                     lastByte == -1 && lastChar == 0)
1495                 {
1496                     // Need -1 to check 2 at a time.  If we have an even #, longChars will go
1497                     // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longChars
1498                     // will go from longEnd - 1 long to longEnd. (Might not get to use this)
1499                     // We can only go iCount units (limited by shorter of char or byte buffers.
1500                     ulong* longEnd = (ulong*)(bytes - 7 +
1501                                                 (((byteEnd - bytes) >> 1 < charEnd - chars) ?
1502                                                   (byteEnd - bytes) : (charEnd - chars) << 1));
1503
1504                     // Need new char* so we can check 4 at a time
1505                     ulong* longBytes = (ulong*)bytes;
1506                     ulong* longChars = (ulong*)chars;
1507
1508                     while (longBytes < longEnd)
1509                     {
1510                         // See if we potentially have surrogates (0x8000 bit set)
1511                         // (We're either big endian on a big endian machine or little endian on
1512                         // a little endian machine so this'll work)
1513                         if ((0x8000800080008000 & *longBytes) != 0)
1514                         {
1515                             // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
1516                             // 5 bits looks like 11011, then its a high or low surrogate.
1517                             // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
1518                             // Note that we expect BMP characters to be more common than surrogates
1519                             // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
1520                             ulong uTemp = (0xf800f800f800f800 & *longBytes) ^ 0xd800d800d800d800;
1521
1522                             // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
1523                             // but no clue if they're high or low.
1524                             // If each of the 4 characters are non-zero, then none are surrogates.
1525                             if ((uTemp & 0xFFFF000000000000) == 0 ||
1526                                 (uTemp & 0x0000FFFF00000000) == 0 ||
1527                                 (uTemp & 0x00000000FFFF0000) == 0 ||
1528                                 (uTemp & 0x000000000000FFFF) == 0)
1529                             {
1530                                 // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
1531                                 // or if there's 1 or 4 surrogates
1532
1533                                 // If they happen to be high/low/high/low, we may as well continue.  Check the next
1534                                 // bit to see if its set (low) or not (high) in the right pattern
1535 #if BIGENDIAN
1536                                 if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xd800dc00d800dc00) != 0)
1537 #else
1538                                 if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xdc00d800dc00d800) != 0)
1539 #endif
1540                                 {
1541                                     // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
1542                                     // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
1543
1544                                     // Drop out to the slow loop to resolve the surrogates
1545                                     break;
1546                                 }
1547                                 // else they are all surrogates in High/Low/High/Low order, so we can use them.
1548                             }
1549                             // else none are surrogates, so we can use them.
1550                         }
1551                         // else all < 0x8000 so we can use them
1552
1553                         // We can use these 4 chars.
1554                         *longChars = *longBytes;
1555                         longBytes++;
1556                         longChars++;
1557                     }
1558
1559                     chars = (char*)longChars;
1560                     bytes = (byte*)longBytes;
1561
1562                     if (bytes >= byteEnd)
1563                         break;
1564                 }
1565 #endif // !NO_FAST_UNICODE_LOOP
1566
1567                 // Get 1st byte
1568                 if (lastByte < 0)
1569                 {
1570                     lastByte = *bytes++;
1571                     continue;
1572                 }
1573
1574                 // Get full char
1575                 char ch;
1576                 if (bigEndian)
1577                 {
1578                     ch = (char)(lastByte << 8 | *(bytes++));
1579                 }
1580                 else
1581                 {
1582                     ch = (char)(*(bytes++) << 8 | lastByte);
1583                 }
1584                 lastByte = -1;
1585
1586                 // See if the char's valid
1587                 if (ch >= 0xd800 && ch <= 0xdfff)
1588                 {
1589                     // Was it a high surrogate?
1590                     if (ch <= 0xdbff)
1591                     {
1592                         // Its a high surrogate, if we had one then do fallback for previous one
1593                         if (lastChar > 0)
1594                         {
1595                             // Get fallback for previous high surrogate
1596                             // Note we have to reconstruct bytes because some may have been in decoder
1597                             byte[] byteBuffer = null;
1598                             if (bigEndian)
1599                             {
1600                                 byteBuffer = new byte[]
1601                                     { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1602                             }
1603                             else
1604                             {
1605                                 byteBuffer = new byte[]
1606                                     { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1607                             }
1608
1609                             if (fallbackBuffer == null)
1610                             {
1611                                 if (decoder == null)
1612                                     fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1613                                 else
1614                                     fallbackBuffer = decoder.FallbackBuffer;
1615
1616                                 // Set our internal fallback interesting things.
1617                                 fallbackBuffer.InternalInitialize(byteStart, charEnd);
1618                             }
1619
1620                             charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
1621                             bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1622                             chars = charsForFallback;
1623
1624                             if (!fallbackResult)
1625                             {
1626                                 // couldn't fall back lonely surrogate
1627                                 // We either advanced bytes or chars should == charStart and throw below
1628                                 Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1629                                     "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (bad surrogate)");
1630                                 bytes -= 2;                                       // didn't use these 2 bytes
1631                                 fallbackBuffer.InternalReset();
1632                                 ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1633                                 break;                                          // couldn't fallback but didn't throw
1634                             }
1635                         }
1636
1637                         // Ignore the previous high surrogate which fell back already,
1638                         // yet remember the current high surrogate for next time.
1639                         lastChar = ch;
1640                         continue;
1641                     }
1642
1643                     // Its a low surrogate
1644                     if (lastChar == 0)
1645                     {
1646                         // Expected a previous high surrogate
1647                         // Get fallback for this low surrogate
1648                         // Note we have to reconstruct bytes because some may have been in decoder
1649                         byte[] byteBuffer = null;
1650                         if (bigEndian)
1651                         {
1652                             byteBuffer = new byte[]
1653                                 { unchecked((byte)(ch >> 8)), unchecked((byte)ch) };
1654                         }
1655                         else
1656                         {
1657                             byteBuffer = new byte[]
1658                                 { unchecked((byte)ch), unchecked((byte)(ch >> 8)) };
1659                         }
1660
1661                         if (fallbackBuffer == null)
1662                         {
1663                             if (decoder == null)
1664                                 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1665                             else
1666                                 fallbackBuffer = decoder.FallbackBuffer;
1667
1668                             // Set our internal fallback interesting things.
1669                             fallbackBuffer.InternalInitialize(byteStart, charEnd);
1670                         }
1671
1672                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
1673                         bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1674                         chars = charsForFallback;
1675
1676                         if (!fallbackResult)
1677                         {
1678                             // couldn't fall back lonely surrogate
1679                             // We either advanced bytes or chars should == charStart and throw below
1680                             Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1681                                 "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (lonely surrogate)");
1682                             bytes -= 2;                                       // didn't use these 2 bytes
1683                             fallbackBuffer.InternalReset();
1684                             ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1685                             break;                                          // couldn't fallback but didn't throw
1686                         }
1687
1688                         // Didn't throw, ignore this one (we already did its fallback)
1689                         continue;
1690                     }
1691
1692                     // Valid surrogate pair, add our lastChar (will need 2 chars)
1693                     if (chars >= charEnd - 1)
1694                     {
1695                         // couldn't find room for this surrogate pair
1696                         // We either advanced bytes or chars should == charStart and throw below
1697                         Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1698                             "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (surrogate pair)");
1699                         bytes -= 2;                                       // didn't use these 2 bytes
1700                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1701                         // Leave lastChar for next call to Convert()
1702                         break;                                          // couldn't fallback but didn't throw
1703                     }
1704
1705                     *chars++ = lastChar;
1706                     lastChar = (char)0;
1707                 }
1708                 else if (lastChar > 0)
1709                 {
1710                     // Had a high surrogate, expected a low surrogate, fall back the high surrogate.
1711                     byte[] byteBuffer = null;
1712                     if (bigEndian)
1713                     {
1714                         byteBuffer = new byte[]
1715                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1716                     }
1717                     else
1718                     {
1719                         byteBuffer = new byte[]
1720                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1721                     }
1722
1723                     if (fallbackBuffer == null)
1724                     {
1725                         if (decoder == null)
1726                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1727                         else
1728                             fallbackBuffer = decoder.FallbackBuffer;
1729
1730                         // Set our internal fallback interesting things.
1731                         fallbackBuffer.InternalInitialize(byteStart, charEnd);
1732                     }
1733
1734                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
1735                     bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1736                     chars = charsForFallback;
1737
1738                     if (!fallbackResult)
1739                     {
1740                         // couldn't fall back high surrogate, or char that would be next
1741                         // We either advanced bytes or chars should == charStart and throw below
1742                         Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1743                             "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (no low surrogate)");
1744                         bytes -= 2;                                       // didn't use these 2 bytes
1745                         fallbackBuffer.InternalReset();
1746                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1747                         break;                                          // couldn't fallback but didn't throw
1748                     }
1749
1750                     // Not left over now, clear previous high surrogate and continue to add current char
1751                     lastChar = (char)0;
1752                 }
1753
1754                 // Valid char, room for it?
1755                 if (chars >= charEnd)
1756                 {
1757                     // 2 bytes couldn't fall back
1758                     // We either advanced bytes or chars should == charStart and throw below
1759                     Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1760                         "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (normal)");
1761                     bytes -= 2;                                       // didn't use these bytes
1762                     ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1763                     break;                                          // couldn't fallback but didn't throw
1764                 }
1765
1766                 // add it
1767                 *chars++ = ch;
1768             }
1769
1770             // Remember our decoder if we must
1771             if (decoder == null || decoder.MustFlush)
1772             {
1773                 if (lastChar > 0)
1774                 {
1775                     // No hanging high surrogates allowed, do fallback and remove count for it
1776                     byte[] byteBuffer = null;
1777                     if (bigEndian)
1778                     {
1779                         byteBuffer = new byte[]
1780                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1781                     }
1782                     else
1783                     {
1784                         byteBuffer = new byte[]
1785                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1786                     }
1787
1788                     if (fallbackBuffer == null)
1789                     {
1790                         if (decoder == null)
1791                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1792                         else
1793                             fallbackBuffer = decoder.FallbackBuffer;
1794
1795                         // Set our internal fallback interesting things.
1796                         fallbackBuffer.InternalInitialize(byteStart, charEnd);
1797                     }
1798
1799                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
1800                     bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1801                     chars = charsForFallback;
1802
1803                     if (!fallbackResult)
1804                     {
1805                         // 2 bytes couldn't fall back
1806                         // We either advanced bytes or chars should == charStart and throw below
1807                         Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1808                             "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (decoder)");
1809                         bytes -= 2;                                       // didn't use these bytes
1810                         if (lastByte >= 0)
1811                             bytes--;                                    // had an extra last byte hanging around
1812                         fallbackBuffer.InternalReset();
1813                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1814                         // We'll remember these in our decoder though
1815                         bytes += 2;
1816                         if (lastByte >= 0)
1817                             bytes++;
1818                         goto End;
1819                     }
1820
1821                     // done with this one
1822                     lastChar = (char)0;
1823                 }
1824
1825                 if (lastByte >= 0)
1826                 {
1827                     if (fallbackBuffer == null)
1828                     {
1829                         if (decoder == null)
1830                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1831                         else
1832                             fallbackBuffer = decoder.FallbackBuffer;
1833
1834                         // Set our internal fallback interesting things.
1835                         fallbackBuffer.InternalInitialize(byteStart, charEnd);
1836                     }
1837
1838                     // No hanging odd bytes allowed if must flush
1839                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
1840                     bool fallbackResult = fallbackBuffer.InternalFallback(new byte[] { unchecked((byte)lastByte) }, bytes, ref charsForFallback);
1841                     chars = charsForFallback;
1842
1843                     if (!fallbackResult)
1844                     {
1845                         // odd byte couldn't fall back
1846                         bytes--;                                        // didn't use this byte
1847                         fallbackBuffer.InternalReset();
1848                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1849                         // didn't throw, but we'll remember it in the decoder
1850                         bytes++;
1851                         goto End;
1852                     }
1853
1854                     // Didn't fail, clear buffer
1855                     lastByte = -1;
1856                 }
1857             }
1858
1859         End:
1860
1861             // Remember our decoder if we must
1862             if (decoder != null)
1863             {
1864                 Debug.Assert((decoder.MustFlush == false) || ((lastChar == (char)0) && (lastByte == -1)),
1865                     "[UnicodeEncoding.GetChars] Expected no left over chars or bytes if flushing"
1866                     //                    + " " + ((int)lastChar).ToString("X4") + " " + lastByte.ToString("X2")
1867                     );
1868
1869                 decoder.m_bytesUsed = (int)(bytes - byteStart);
1870                 decoder.lastChar = lastChar;
1871                 decoder.lastByte = lastByte;
1872             }
1873
1874             // Used to do this the old way
1875             // System.IO.__UnmanagedMemoryStream.memcpyimpl((byte*)chars, bytes, byteCount);
1876
1877             // Shouldn't have anything in fallback buffer for GetChars
1878             // (don't have to check m_throwOnOverflow for count or chars)
1879             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
1880                 "[UnicodeEncoding.GetChars]Expected empty fallback buffer at end");
1881
1882             return (int)(chars - charStart);
1883         }
1884
1885
1886         public override System.Text.Encoder GetEncoder()
1887         {
1888             return new EncoderNLS(this);
1889         }
1890
1891
1892         public override System.Text.Decoder GetDecoder()
1893         {
1894             return new UnicodeEncoding.Decoder(this);
1895         }
1896
1897
1898         public override byte[] GetPreamble()
1899         {
1900             if (byteOrderMark)
1901             {
1902                 // Note - we must allocate new byte[]'s here to prevent someone
1903                 // from modifying a cached byte[].
1904                 if (bigEndian)
1905                     return new byte[2] { 0xfe, 0xff };
1906                 else
1907                     return new byte[2] { 0xff, 0xfe };
1908             }
1909             return Array.Empty<Byte>();
1910         }
1911
1912
1913         public override int GetMaxByteCount(int charCount)
1914         {
1915             if (charCount < 0)
1916                 throw new ArgumentOutOfRangeException(nameof(charCount),
1917                      SR.ArgumentOutOfRange_NeedNonNegNum);
1918             Contract.EndContractBlock();
1919
1920             // Characters would be # of characters + 1 in case left over high surrogate is ? * max fallback
1921             long byteCount = (long)charCount + 1;
1922
1923             if (EncoderFallback.MaxCharCount > 1)
1924                 byteCount *= EncoderFallback.MaxCharCount;
1925
1926             // 2 bytes per char
1927             byteCount <<= 1;
1928
1929             if (byteCount > 0x7fffffff)
1930                 throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
1931
1932             return (int)byteCount;
1933         }
1934
1935
1936         public override int GetMaxCharCount(int byteCount)
1937         {
1938             if (byteCount < 0)
1939                 throw new ArgumentOutOfRangeException(nameof(byteCount),
1940                      SR.ArgumentOutOfRange_NeedNonNegNum);
1941             Contract.EndContractBlock();
1942
1943             // long because byteCount could be biggest int.
1944             // 1 char per 2 bytes.  Round up in case 1 left over in decoder.
1945             // Round up using &1 in case byteCount is max size
1946             // Might also need an extra 1 if there's a left over high surrogate in the decoder.
1947             long charCount = (long)(byteCount >> 1) + (byteCount & 1) + 1;
1948
1949             // Don't forget fallback (in case they have a bunch of lonely surrogates or something bizzare like that)
1950             if (DecoderFallback.MaxCharCount > 1)
1951                 charCount *= DecoderFallback.MaxCharCount;
1952
1953             if (charCount > 0x7fffffff)
1954                 throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_GetCharCountOverflow);
1955
1956             return (int)charCount;
1957         }
1958
1959
1960         public override bool Equals(Object value)
1961         {
1962             UnicodeEncoding that = value as UnicodeEncoding;
1963             if (that != null)
1964             {
1965                 //
1966                 // Big Endian Unicode has different code page (1201) than small Endian one (1200),
1967                 // so we still have to check m_codePage here.
1968                 //
1969                 return (CodePage == that.CodePage) &&
1970                         byteOrderMark == that.byteOrderMark &&
1971                         //                        isThrowException == that.isThrowException &&  // Same as Encoder/Decoder being exception fallbacks
1972                         bigEndian == that.bigEndian &&
1973                        (EncoderFallback.Equals(that.EncoderFallback)) &&
1974                        (DecoderFallback.Equals(that.DecoderFallback));
1975             }
1976             return (false);
1977         }
1978
1979         public override int GetHashCode()
1980         {
1981             return CodePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode() +
1982                    (byteOrderMark ? 4 : 0) + (bigEndian ? 8 : 0);
1983         }
1984
1985         private sealed class Decoder : System.Text.DecoderNLS, ISerializable
1986         {
1987             internal int lastByte = -1;
1988             internal char lastChar = '\0';
1989
1990             public Decoder(UnicodeEncoding encoding) : base(encoding)
1991             {
1992                 // base calls reset
1993             }
1994
1995             internal Decoder(SerializationInfo info, StreamingContext context)
1996             {
1997                 throw new PlatformNotSupportedException();
1998             }
1999
2000             // ISerializable implementation
2001             void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
2002             {
2003                 throw new PlatformNotSupportedException();
2004             }
2005
2006             public override void Reset()
2007             {
2008                 lastByte = -1;
2009                 lastChar = '\0';
2010                 if (m_fallbackBuffer != null)
2011                     m_fallbackBuffer.Reset();
2012             }
2013
2014             // Anything left in our decoder?
2015             internal override bool HasState
2016             {
2017                 get
2018                 {
2019                     return (this.lastByte != -1 || this.lastChar != '\0');
2020                 }
2021             }
2022         }
2023     }
2024 }
2025