src/mscorlib/shared/System/Text/UnicodeEncoding.cs

   1 // Licensed to the .NET Foundation under one or more agreements.
   2 // The .NET Foundation licenses this file to you under the MIT license.
   3 // See the LICENSE file in the project root for more information.
   4
   5 //
   6 // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
   7 //
   8
   9 using System;
  10 using System.Globalization;
  11 using System.Diagnostics;
  12 using System.Diagnostics.Contracts;
  13
  14 namespace System.Text
  15 {
  16     public class UnicodeEncoding : Encoding
  17     {
  18         // Used by Encoding.BigEndianUnicode/Unicode for lazy initialization
  19         // The initialization code will not be run until a static member of the class is referenced
  20         internal static readonly UnicodeEncoding s_bigEndianDefault = new UnicodeEncoding(bigEndian: true, byteOrderMark: true);
  21         internal static readonly UnicodeEncoding s_littleEndianDefault = new UnicodeEncoding(bigEndian: false, byteOrderMark: true);
  22
  23         internal bool isThrowException = false;
  24
  25         internal bool bigEndian = false;
  26         internal bool byteOrderMark = true;
  27
  28         // Unicode version 2.0 character size in bytes
  29         public const int CharSize = 2;
  30
  31
  32         public UnicodeEncoding()
  33             : this(false, true)
  34         {
  35         }
  36
  37
  38         public UnicodeEncoding(bool bigEndian, bool byteOrderMark)
  39             : this(bigEndian, byteOrderMark, false)
  40         {
  41         }
  42
  43
  44         public UnicodeEncoding(bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes)
  45             : base(bigEndian ? 1201 : 1200)  //Set the data item.
  46         {
  47             this.isThrowException = throwOnInvalidBytes;
  48             this.bigEndian = bigEndian;
  49             this.byteOrderMark = byteOrderMark;
  50
  51             // Encoding constructor already did this, but it'll be wrong if we're throwing exceptions
  52             if (this.isThrowException)
  53                 SetDefaultFallbacks();
  54         }
  55
  56         internal override void SetDefaultFallbacks()
  57         {
  58             // For UTF-X encodings, we use a replacement fallback with an empty string
  59             if (this.isThrowException)
  60             {
  61                 this.encoderFallback = EncoderFallback.ExceptionFallback;
  62                 this.decoderFallback = DecoderFallback.ExceptionFallback;
  63             }
  64             else
  65             {
  66                 this.encoderFallback = new EncoderReplacementFallback("\xFFFD");
  67                 this.decoderFallback = new DecoderReplacementFallback("\xFFFD");
  68             }
  69         }
  70
  71         // The following methods are copied from EncodingNLS.cs.
  72         // Unfortunately EncodingNLS.cs is internal and we're public, so we have to re-implement them here.
  73         // These should be kept in sync for the following classes:
  74         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  75         //
  76
  77         // Returns the number of bytes required to encode a range of characters in
  78         // a character array.
  79         //
  80         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  81         // So if you fix this, fix the others.  Currently those include:
  82         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  83         // parent method is safe
  84
  85         public override unsafe int GetByteCount(char[] chars, int index, int count)
  86         {
  87             // Validate input parameters
  88             if (chars == null)
  89                 throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
  90
  91             if (index < 0 || count < 0)
  92                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
  93
  94             if (chars.Length - index < count)
  95                 throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
  96             Contract.EndContractBlock();
  97
  98             // If no input, return 0, avoid fixed empty array problem
  99             if (count == 0)
 100                 return 0;
 101
 102             // Just call the pointer version
 103             fixed (char* pChars = chars)
 104                 return GetByteCount(pChars + index, count, null);
 105         }
 106
 107         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 108         // So if you fix this, fix the others.  Currently those include:
 109         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 110         // parent method is safe
 111
 112         public override unsafe int GetByteCount(String s)
 113         {
 114             // Validate input
 115             if (s==null)
 116                 throw new ArgumentNullException("s");
 117             Contract.EndContractBlock();
 118
 119             fixed (char* pChars = s)
 120                 return GetByteCount(pChars, s.Length, null);
 121         }
 122
 123         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 124         // So if you fix this, fix the others.  Currently those include:
 125         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 126
 127         [CLSCompliant(false)]
 128         public override unsafe int GetByteCount(char* chars, int count)
 129         {
 130             // Validate Parameters
 131             if (chars == null)
 132                 throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
 133
 134             if (count < 0)
 135                 throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
 136             Contract.EndContractBlock();
 137
 138             // Call it with empty encoder
 139             return GetByteCount(chars, count, null);
 140         }
 141
 142         // Parent method is safe.
 143         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 144         // So if you fix this, fix the others.  Currently those include:
 145         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 146
 147         public override unsafe int GetBytes(String s, int charIndex, int charCount,
 148                                               byte[] bytes, int byteIndex)
 149         {
 150             if (s == null || bytes == null)
 151                 throw new ArgumentNullException((s == null ? "s" : "bytes"), SR.ArgumentNull_Array);
 152
 153             if (charIndex < 0 || charCount < 0)
 154                 throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
 155
 156             if (s.Length - charIndex < charCount)
 157                 throw new ArgumentOutOfRangeException("s", SR.ArgumentOutOfRange_IndexCount);
 158
 159             if (byteIndex < 0 || byteIndex > bytes.Length)
 160                 throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
 161             Contract.EndContractBlock();
 162
 163             int byteCount = bytes.Length - byteIndex;
 164
 165             // Fixed doesn't like 0 length arrays.
 166             if (bytes.Length == 0)
 167                 bytes = new byte[1];
 168
 169             fixed (char* pChars = s) fixed (byte* pBytes = &bytes[0])
 170                 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
 171         }
 172
 173         // Encodes a range of characters in a character array into a range of bytes
 174         // in a byte array. An exception occurs if the byte array is not large
 175         // enough to hold the complete encoding of the characters. The
 176         // GetByteCount method can be used to determine the exact number of
 177         // bytes that will be produced for a given range of characters.
 178         // Alternatively, the GetMaxByteCount method can be used to
 179         // determine the maximum number of bytes that will be produced for a given
 180         // number of characters, regardless of the actual character values.
 181         //
 182         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 183         // So if you fix this, fix the others.  Currently those include:
 184         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 185         // parent method is safe
 186
 187         public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
 188                                                byte[] bytes, int byteIndex)
 189         {
 190             // Validate parameters
 191             if (chars == null || bytes == null)
 192                 throw new ArgumentNullException((chars == null ? "chars" : "bytes"), SR.ArgumentNull_Array);
 193
 194             if (charIndex < 0 || charCount < 0)
 195                 throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
 196
 197             if (chars.Length - charIndex < charCount)
 198                 throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
 199
 200             if (byteIndex < 0 || byteIndex > bytes.Length)
 201                 throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
 202             Contract.EndContractBlock();
 203
 204             // If nothing to encode return 0, avoid fixed problem
 205             if (charCount == 0)
 206                 return 0;
 207
 208             // Just call pointer version
 209             int byteCount = bytes.Length - byteIndex;
 210
 211             // Fixed doesn't like 0 length arrays.
 212             if (bytes.Length == 0)
 213                 bytes = new byte[1];
 214
 215             fixed (char* pChars = chars) fixed (byte* pBytes = &bytes[0])
 216                 // Remember that byteCount is # to decode, not size of array.
 217                 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
 218         }
 219
 220         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 221         // So if you fix this, fix the others.  Currently those include:
 222         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 223
 224         [CLSCompliant(false)]
 225         public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
 226         {
 227             // Validate Parameters
 228             if (bytes == null || chars == null)
 229                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
 230
 231             if (charCount < 0 || byteCount < 0)
 232                 throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
 233             Contract.EndContractBlock();
 234
 235             return GetBytes(chars, charCount, bytes, byteCount, null);
 236         }
 237
 238         // Returns the number of characters produced by decoding a range of bytes
 239         // in a byte array.
 240         //
 241         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 242         // So if you fix this, fix the others.  Currently those include:
 243         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 244         // parent method is safe
 245
 246         public override unsafe int GetCharCount(byte[] bytes, int index, int count)
 247         {
 248             // Validate Parameters
 249             if (bytes == null)
 250                 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
 251
 252             if (index < 0 || count < 0)
 253                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
 254
 255             if (bytes.Length - index < count)
 256                 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
 257             Contract.EndContractBlock();
 258
 259             // If no input just return 0, fixed doesn't like 0 length arrays
 260             if (count == 0)
 261                 return 0;
 262
 263             // Just call pointer version
 264             fixed (byte* pBytes = bytes)
 265                 return GetCharCount(pBytes + index, count, null);
 266         }
 267
 268         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 269         // So if you fix this, fix the others.  Currently those include:
 270         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 271
 272         [CLSCompliant(false)]
 273         public override unsafe int GetCharCount(byte* bytes, int count)
 274         {
 275             // Validate Parameters
 276             if (bytes == null)
 277                 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
 278
 279             if (count < 0)
 280                 throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
 281             Contract.EndContractBlock();
 282
 283             return GetCharCount(bytes, count, null);
 284         }
 285
 286         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 287         // So if you fix this, fix the others.  Currently those include:
 288         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 289         // parent method is safe
 290
 291         public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
 292                                               char[] chars, int charIndex)
 293         {
 294             // Validate Parameters
 295             if (bytes == null || chars == null)
 296                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
 297
 298             if (byteIndex < 0 || byteCount < 0)
 299                 throw new ArgumentOutOfRangeException((byteIndex < 0 ? "byteIndex" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
 300
 301             if ( bytes.Length - byteIndex < byteCount)
 302                 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
 303
 304             if (charIndex < 0 || charIndex > chars.Length)
 305                 throw new ArgumentOutOfRangeException("charIndex", SR.ArgumentOutOfRange_Index);
 306             Contract.EndContractBlock();
 307
 308             // If no input, return 0 & avoid fixed problem
 309             if (byteCount == 0)
 310                 return 0;
 311
 312             // Just call pointer version
 313             int charCount = chars.Length - charIndex;
 314
 315             // Fixed doesn't like 0 length arrays.
 316             if (chars.Length == 0)
 317                 chars = new char[1];
 318
 319             fixed (byte* pBytes = bytes) fixed (char* pChars = &chars[0])
 320                 // Remember that charCount is # to decode, not size of array
 321                 return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
 322         }
 323
 324         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 325         // So if you fix this, fix the others.  Currently those include:
 326         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 327
 328         [CLSCompliant(false)]
 329         public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
 330         {
 331             // Validate Parameters
 332             if (bytes == null || chars == null)
 333                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
 334
 335             if (charCount < 0 || byteCount < 0)
 336                 throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
 337             Contract.EndContractBlock();
 338
 339             return GetChars(bytes, byteCount, chars, charCount, null);
 340         }
 341
 342         // Returns a string containing the decoded representation of a range of
 343         // bytes in a byte array.
 344         //
 345         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 346         // So if you fix this, fix the others.  Currently those include:
 347         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 348         // parent method is safe
 349
 350         public override unsafe string GetString(byte[] bytes, int index, int count)
 351         {
 352             // Validate Parameters
 353             if (bytes == null)
 354                 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
 355
 356             if (index < 0 || count < 0)
 357                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
 358
 359             if (bytes.Length - index < count)
 360                 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
 361             Contract.EndContractBlock();
 362
 363             // Avoid problems with empty input buffer
 364             if (count == 0) return String.Empty;
 365
 366             fixed (byte* pBytes = bytes)
 367                 return String.CreateStringFromEncoding(
 368                     pBytes + index, count, this);
 369         }
 370
 371         //
 372         // End of standard methods copied from EncodingNLS.cs
 373         //
 374
 375         internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
 376         {
 377             Debug.Assert(chars != null, "[UnicodeEncoding.GetByteCount]chars!=null");
 378             Debug.Assert(count >= 0, "[UnicodeEncoding.GetByteCount]count >=0");
 379
 380             // Start by assuming each char gets 2 bytes
 381             int byteCount = count << 1;
 382
 383             // Check for overflow in byteCount
 384             // (If they were all invalid chars, this would actually be wrong,
 385             // but that's a ridiculously large # so we're not concerned about that case)
 386             if (byteCount < 0)
 387                 throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_GetByteCountOverflow);
 388
 389             char* charStart = chars;
 390             char* charEnd = chars + count;
 391             char charLeftOver = (char)0;
 392
 393             bool wasHereBefore = false;
 394
 395             // Need -1 to check 2 at a time.  If we have an even #, longChars will go
 396             // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longChars
 397             // will go from longEnd - 1 long to longEnd. (Might not get to use this)
 398             ulong* longEnd = (ulong*)(charEnd - 3);
 399
 400             // For fallback we may need a fallback buffer
 401             EncoderFallbackBuffer fallbackBuffer = null;
 402             char* charsForFallback;
 403
 404             if (encoder != null)
 405             {
 406                 charLeftOver = encoder._charLeftOver;
 407
 408                 // Assume extra bytes to encode charLeftOver if it existed
 409                 if (charLeftOver > 0)
 410                     byteCount += 2;
 411
 412                 // We mustn't have left over fallback data when counting
 413                 if (encoder.InternalHasFallbackBuffer)
 414                 {
 415                     fallbackBuffer = encoder.FallbackBuffer;
 416                     if (fallbackBuffer.Remaining > 0)
 417                         throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
 418
 419                     // Set our internal fallback interesting things.
 420                     fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
 421                 }
 422             }
 423
 424             char ch;
 425         TryAgain:
 426
 427             while (((ch = (fallbackBuffer == null) ? (char)0 : fallbackBuffer.InternalGetNextChar()) != 0) || chars < charEnd)
 428             {
 429                 // First unwind any fallback
 430                 if (ch == 0)
 431                 {
 432                     // No fallback, maybe we can do it fast
 433 #if !NO_FAST_UNICODE_LOOP
 434 #if BIGENDIAN       // If endianess is backwards then each pair of bytes would be backwards.
 435                     if ( bigEndian &&
 436 #else
 437                     if (!bigEndian &&
 438 #endif // BIGENDIAN
 439
 440 #if BIT64           // 64 bit CPU needs to be long aligned for this to work.
 441                           charLeftOver == 0 && (unchecked((long)chars) & 7) == 0)
 442 #else
 443                           charLeftOver == 0 && (unchecked((int)chars) & 3) == 0)
 444 #endif
 445                     {
 446                         // Need new char* so we can check 4 at a time
 447                         ulong* longChars = (ulong*)chars;
 448
 449                         while (longChars < longEnd)
 450                         {
 451                             // See if we potentially have surrogates (0x8000 bit set)
 452                             // (We're either big endian on a big endian machine or little endian on
 453                             // a little endian machine so that'll work)
 454                             if ((0x8000800080008000 & *longChars) != 0)
 455                             {
 456                                 // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
 457                                 // 5 bits looks like 11011, then its a high or low surrogate.
 458                                 // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
 459                                 // Note that we expect BMP characters to be more common than surrogates
 460                                 // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
 461                                 ulong uTemp = (0xf800f800f800f800 & *longChars) ^ 0xd800d800d800d800;
 462
 463                                 // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
 464                                 // but no clue if they're high or low.
 465                                 // If each of the 4 characters are non-zero, then none are surrogates.
 466                                 if ((uTemp & 0xFFFF000000000000) == 0 ||
 467                                     (uTemp & 0x0000FFFF00000000) == 0 ||
 468                                     (uTemp & 0x00000000FFFF0000) == 0 ||
 469                                     (uTemp & 0x000000000000FFFF) == 0)
 470                                 {
 471                                     // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
 472                                     // or if there's 1 or 4 surrogates
 473
 474                                     // If they happen to be high/low/high/low, we may as well continue.  Check the next
 475                                     // bit to see if its set (low) or not (high) in the right pattern
 476 #if BIGENDIAN
 477                                     if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xd800dc00d800dc00) != 0)
 478 #else
 479                                     if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xdc00d800dc00d800) != 0)
 480 #endif
 481                                     {
 482                                         // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
 483                                         // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
 484
 485                                         // Drop out to the slow loop to resolve the surrogates
 486                                         break;
 487                                     }
 488                                     // else they are all surrogates in High/Low/High/Low order, so we can use them.
 489                                 }
 490                                 // else none are surrogates, so we can use them.
 491                             }
 492                             // else all < 0x8000 so we can use them
 493
 494                             // We already counted these four chars, go to next long.
 495                             longChars++;
 496                         }
 497
 498                         chars = (char*)longChars;
 499
 500                         if (chars >= charEnd)
 501                             break;
 502                     }
 503 #endif // !NO_FAST_UNICODE_LOOP
 504
 505                     // No fallback, just get next char
 506                     ch = *chars;
 507                     chars++;
 508                 }
 509                 else
 510                 {
 511                     // We weren't preallocating fallback space.
 512                     byteCount += 2;
 513                 }
 514
 515                 // Check for high or low surrogates
 516                 if (ch >= 0xd800 && ch <= 0xdfff)
 517                 {
 518                     // Was it a high surrogate?
 519                     if (ch <= 0xdbff)
 520                     {
 521                         // Its a high surrogate, if we already had a high surrogate do its fallback
 522                         if (charLeftOver > 0)
 523                         {
 524                             // Unwind the current character, this should be safe because we
 525                             // don't have leftover data in the fallback, so chars must have
 526                             // advanced already.
 527                             Debug.Assert(chars > charStart,
 528                                 "[UnicodeEncoding.GetByteCount]Expected chars to have advanced in unexpected high surrogate");
 529                             chars--;
 530
 531                             // If previous high surrogate deallocate 2 bytes
 532                             byteCount -= 2;
 533
 534                             // Fallback the previous surrogate
 535                             // Need to initialize fallback buffer?
 536                             if (fallbackBuffer == null)
 537                             {
 538                                 if (encoder == null)
 539                                     fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
 540                                 else
 541                                     fallbackBuffer = encoder.FallbackBuffer;
 542
 543                                 // Set our internal fallback interesting things.
 544                                 fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
 545                             }
 546
 547                             charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
 548                             fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
 549                             chars = charsForFallback;
 550
 551                             // Now no high surrogate left over
 552                             charLeftOver = (char)0;
 553                             continue;
 554                         }
 555
 556                         // Remember this high surrogate
 557                         charLeftOver = ch;
 558                         continue;
 559                     }
 560
 561
 562                     // Its a low surrogate
 563                     if (charLeftOver == 0)
 564                     {
 565                         // Expected a previous high surrogate.
 566                         // Don't count this one (we'll count its fallback if necessary)
 567                         byteCount -= 2;
 568
 569                         // fallback this one
 570                         // Need to initialize fallback buffer?
 571                         if (fallbackBuffer == null)
 572                         {
 573                             if (encoder == null)
 574                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
 575                             else
 576                                 fallbackBuffer = encoder.FallbackBuffer;
 577
 578                             // Set our internal fallback interesting things.
 579                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
 580                         }
 581                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
 582                         fallbackBuffer.InternalFallback(ch, ref charsForFallback);
 583                         chars = charsForFallback;
 584                         continue;
 585                     }
 586
 587                     // Valid surrogate pair, add our charLeftOver
 588                     charLeftOver = (char)0;
 589                     continue;
 590                 }
 591                 else if (charLeftOver > 0)
 592                 {
 593                     // Expected a low surrogate, but this char is normal
 594
 595                     // Rewind the current character, fallback previous character.
 596                     // this should be safe because we don't have leftover data in the
 597                     // fallback, so chars must have advanced already.
 598                     Debug.Assert(chars > charStart,
 599                         "[UnicodeEncoding.GetByteCount]Expected chars to have advanced when expected low surrogate");
 600                     chars--;
 601
 602                     // fallback previous chars
 603                     // Need to initialize fallback buffer?
 604                     if (fallbackBuffer == null)
 605                     {
 606                         if (encoder == null)
 607                             fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
 608                         else
 609                             fallbackBuffer = encoder.FallbackBuffer;
 610
 611                         // Set our internal fallback interesting things.
 612                         fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
 613                     }
 614                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
 615                     fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
 616                     chars = charsForFallback;
 617
 618                     // Ignore charLeftOver or throw
 619                     byteCount -= 2;
 620                     charLeftOver = (char)0;
 621
 622                     continue;
 623                 }
 624
 625                 // Ok we had something to add (already counted)
 626             }
 627
 628             // Don't allocate space for left over char
 629             if (charLeftOver > 0)
 630             {
 631                 byteCount -= 2;
 632
 633                 // If we have to flush, stick it in fallback and try again
 634                 if (encoder == null || encoder.MustFlush)
 635                 {
 636                     if (wasHereBefore)
 637                     {
 638                         // Throw it, using our complete character
 639                         throw new ArgumentException(
 640                             SR.Format(SR.Argument_RecursiveFallback, charLeftOver), nameof(chars));
 641                     }
 642                     else
 643                     {
 644                         // Need to initialize fallback buffer?
 645                         if (fallbackBuffer == null)
 646                         {
 647                             if (encoder == null)
 648                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
 649                             else
 650                                 fallbackBuffer = encoder.FallbackBuffer;
 651
 652                             // Set our internal fallback interesting things.
 653                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
 654                         }
 655                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
 656                         fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
 657                         chars = charsForFallback;
 658                         charLeftOver = (char)0;
 659                         wasHereBefore = true;
 660                         goto TryAgain;
 661                     }
 662                 }
 663             }
 664
 665             // Shouldn't have anything in fallback buffer for GetByteCount
 666             // (don't have to check _throwOnOverflow for count)
 667             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
 668                 "[UnicodeEncoding.GetByteCount]Expected empty fallback buffer at end");
 669
 670             // Don't remember fallbackBuffer.encoder for counting
 671             return byteCount;
 672         }
 673
 674         internal override unsafe int GetBytes(char* chars, int charCount,
 675                                                 byte* bytes, int byteCount, EncoderNLS encoder)
 676         {
 677             Debug.Assert(chars != null, "[UnicodeEncoding.GetBytes]chars!=null");
 678             Debug.Assert(byteCount >= 0, "[UnicodeEncoding.GetBytes]byteCount >=0");
 679             Debug.Assert(charCount >= 0, "[UnicodeEncoding.GetBytes]charCount >=0");
 680             Debug.Assert(bytes != null, "[UnicodeEncoding.GetBytes]bytes!=null");
 681
 682             char charLeftOver = (char)0;
 683             char ch;
 684             bool wasHereBefore = false;
 685
 686
 687             byte* byteEnd = bytes + byteCount;
 688             char* charEnd = chars + charCount;
 689             byte* byteStart = bytes;
 690             char* charStart = chars;
 691
 692             // For fallback we may need a fallback buffer
 693             EncoderFallbackBuffer fallbackBuffer = null;
 694             char* charsForFallback;
 695
 696             // Get our encoder, but don't clear it yet.
 697             if (encoder != null)
 698             {
 699                 charLeftOver = encoder._charLeftOver;
 700
 701                 // We mustn't have left over fallback data when counting
 702                 if (encoder.InternalHasFallbackBuffer)
 703                 {
 704                     // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary
 705                     fallbackBuffer = encoder.FallbackBuffer;
 706                     if (fallbackBuffer.Remaining > 0 && encoder._throwOnOverflow)
 707                         throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
 708
 709                     // Set our internal fallback interesting things.
 710                     fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
 711                 }
 712             }
 713
 714         TryAgain:
 715             while (((ch = (fallbackBuffer == null) ?
 716                         (char)0 : fallbackBuffer.InternalGetNextChar()) != 0) ||
 717                     chars < charEnd)
 718             {
 719                 // First unwind any fallback
 720                 if (ch == 0)
 721                 {
 722                     // No fallback, maybe we can do it fast
 723 #if !NO_FAST_UNICODE_LOOP
 724 #if BIGENDIAN           // If endianess is backwards then each pair of bytes would be backwards.
 725                     if ( bigEndian &&
 726 #else
 727                     if (!bigEndian &&
 728 #endif // BIGENDIAN
 729 #if BIT64           // 64 bit CPU needs to be long aligned for this to work, 32 bit CPU needs to be 32 bit aligned
 730                         (unchecked((long)chars) & 7) == 0 && (unchecked((long)bytes) & 7) == 0 &&
 731 #else
 732                         (unchecked((int)chars) & 3) == 0 && (unchecked((int)bytes) & 3) == 0 &&
 733 #endif // BIT64
 734                         charLeftOver == 0)
 735                     {
 736                         // Need -1 to check 2 at a time.  If we have an even #, longChars will go
 737                         // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longChars
 738                         // will go from longEnd - 1 long to longEnd. (Might not get to use this)
 739                         // We can only go iCount units (limited by shorter of char or byte buffers.
 740                         ulong* longEnd = (ulong*)(chars - 3 +
 741                                                   (((byteEnd - bytes) >> 1 < charEnd - chars) ?
 742                                                     (byteEnd - bytes) >> 1 : charEnd - chars));
 743
 744                         // Need new char* so we can check 4 at a time
 745                         ulong* longChars = (ulong*)chars;
 746                         ulong* longBytes = (ulong*)bytes;
 747
 748                         while (longChars < longEnd)
 749                         {
 750                             // See if we potentially have surrogates (0x8000 bit set)
 751                             // (We're either big endian on a big endian machine or little endian on
 752                             // a little endian machine so that'll work)
 753                             if ((0x8000800080008000 & *longChars) != 0)
 754                             {
 755                                 // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
 756                                 // 5 bits looks like 11011, then its a high or low surrogate.
 757                                 // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
 758                                 // Note that we expect BMP characters to be more common than surrogates
 759                                 // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
 760                                 ulong uTemp = (0xf800f800f800f800 & *longChars) ^ 0xd800d800d800d800;
 761
 762                                 // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
 763                                 // but no clue if they're high or low.
 764                                 // If each of the 4 characters are non-zero, then none are surrogates.
 765                                 if ((uTemp & 0xFFFF000000000000) == 0 ||
 766                                     (uTemp & 0x0000FFFF00000000) == 0 ||
 767                                     (uTemp & 0x00000000FFFF0000) == 0 ||
 768                                     (uTemp & 0x000000000000FFFF) == 0)
 769                                 {
 770                                     // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
 771                                     // or if there's 1 or 4 surrogates
 772
 773                                     // If they happen to be high/low/high/low, we may as well continue.  Check the next
 774                                     // bit to see if its set (low) or not (high) in the right pattern
 775 #if BIGENDIAN
 776                                     if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xd800dc00d800dc00) != 0)
 777 #else
 778                                     if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xdc00d800dc00d800) != 0)
 779 #endif
 780                                     {
 781                                         // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
 782                                         // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
 783
 784                                         // Drop out to the slow loop to resolve the surrogates
 785                                         break;
 786                                     }
 787                                     // else they are all surrogates in High/Low/High/Low order, so we can use them.
 788                                 }
 789                                 // else none are surrogates, so we can use them.
 790                             }
 791                             // else all < 0x8000 so we can use them
 792
 793                             // We can use these 4 chars.
 794                             *longBytes = *longChars;
 795                             longChars++;
 796                             longBytes++;
 797                         }
 798
 799                         chars = (char*)longChars;
 800                         bytes = (byte*)longBytes;
 801
 802                         if (chars >= charEnd)
 803                             break;
 804                     }
 805                     // Not aligned, but maybe we can still be somewhat faster
 806                     // Also somehow this optimizes the above loop?  It seems to cause something above
 807                     // to get enregistered, but I haven't figured out how to make that happen without this loop.
 808                     else if ((charLeftOver == 0) &&
 809 #if BIGENDIAN
 810                         bigEndian &&
 811 #else
 812                         !bigEndian &&
 813 #endif // BIGENDIAN
 814
 815 #if BIT64
 816                         (unchecked((long)chars) & 7) != (unchecked((long)bytes) & 7) &&  // Only do this if chars & bytes are out of line, otherwise faster loop will be faster next time
 817 #else
 818                         (unchecked((int)chars) & 3) != (unchecked((int)bytes) & 3) &&  // Only do this if chars & bytes are out of line, otherwise faster loop will be faster next time
 819 #endif // BIT64
 820                         (unchecked((int)(bytes)) & 1) == 0)
 821                     {
 822                         // # to use
 823                         long iCount = ((byteEnd - bytes) >> 1 < charEnd - chars) ?
 824                                        (byteEnd - bytes) >> 1 : charEnd - chars;
 825
 826                         // Need new char*
 827                         char* charOut = ((char*)bytes);     // a char* for our output
 828                         char* tempEnd = chars + iCount - 1; // Our end pointer
 829
 830                         while (chars < tempEnd)
 831                         {
 832                             if (*chars >= (char)0xd800 && *chars <= (char)0xdfff)
 833                             {
 834                                 // break for fallback for low surrogate
 835                                 if (*chars >= 0xdc00)
 836                                     break;
 837
 838                                 // break if next one's not a low surrogate (will do fallback)
 839                                 if (*(chars + 1) < 0xdc00 || *(chars + 1) > 0xdfff)
 840                                     break;
 841
 842                                 // They both exist, use them
 843                             }
 844                             // If 2nd char is surrogate & this one isn't then only add one
 845                             else if (*(chars + 1) >= (char)0xd800 && *(chars + 1) <= 0xdfff)
 846                             {
 847                                 *charOut = *chars;
 848                                 charOut++;
 849                                 chars++;
 850                                 continue;
 851                             }
 852
 853                             *charOut = *chars;
 854                             *(charOut + 1) = *(chars + 1);
 855                             charOut += 2;
 856                             chars += 2;
 857                         }
 858
 859                         bytes = (byte*)charOut;
 860
 861                         if (chars >= charEnd)
 862                             break;
 863                     }
 864 #endif // !NO_FAST_UNICODE_LOOP
 865
 866                     // No fallback, just get next char
 867                     ch = *chars;
 868                     chars++;
 869                 }
 870
 871                 // Check for high or low surrogates
 872                 if (ch >= 0xd800 && ch <= 0xdfff)
 873                 {
 874                     // Was it a high surrogate?
 875                     if (ch <= 0xdbff)
 876                     {
 877                         // Its a high surrogate, see if we already had a high surrogate
 878                         if (charLeftOver > 0)
 879                         {
 880                             // Unwind the current character, this should be safe because we
 881                             // don't have leftover data in the fallback, so chars must have
 882                             // advanced already.
 883                             Debug.Assert(chars > charStart,
 884                                 "[UnicodeEncoding.GetBytes]Expected chars to have advanced in unexpected high surrogate");
 885                             chars--;
 886
 887                             // Fallback the previous surrogate
 888                             // Might need to create our fallback buffer
 889                             if (fallbackBuffer == null)
 890                             {
 891                                 if (encoder == null)
 892                                     fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
 893                                 else
 894                                     fallbackBuffer = encoder.FallbackBuffer;
 895
 896                                 // Set our internal fallback interesting things.
 897                                 fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
 898                             }
 899
 900                             charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
 901                             fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
 902                             chars = charsForFallback;
 903
 904                             charLeftOver = (char)0;
 905                             continue;
 906                         }
 907
 908                         // Remember this high surrogate
 909                         charLeftOver = ch;
 910                         continue;
 911                     }
 912
 913                     // Its a low surrogate
 914                     if (charLeftOver == 0)
 915                     {
 916                         // We'll fall back this one
 917                         // Might need to create our fallback buffer
 918                         if (fallbackBuffer == null)
 919                         {
 920                             if (encoder == null)
 921                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
 922                             else
 923                                 fallbackBuffer = encoder.FallbackBuffer;
 924
 925                             // Set our internal fallback interesting things.
 926                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
 927                         }
 928
 929                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
 930                         fallbackBuffer.InternalFallback(ch, ref charsForFallback);
 931                         chars = charsForFallback;
 932                         continue;
 933                     }
 934
 935                     // Valid surrogate pair, add our charLeftOver
 936                     if (bytes + 3 >= byteEnd)
 937                     {
 938                         // Not enough room to add this surrogate pair
 939                         if (fallbackBuffer != null && fallbackBuffer.bFallingBack)
 940                         {
 941                             // These must have both been from the fallbacks.
 942                             // Both of these MUST have been from a fallback because if the 1st wasn't
 943                             // from a fallback, then a high surrogate followed by an illegal char
 944                             // would've caused the high surrogate to fall back.  If a high surrogate
 945                             // fell back, then it was consumed and both chars came from the fallback.
 946                             fallbackBuffer.MovePrevious();                     // Didn't use either fallback surrogate
 947                             fallbackBuffer.MovePrevious();
 948                         }
 949                         else
 950                         {
 951                             // If we don't have enough room, then either we should've advanced a while
 952                             // or we should have bytes==byteStart and throw below
 953                             Debug.Assert(chars > charStart + 1 || bytes == byteStart,
 954                                 "[UnicodeEncoding.GetBytes]Expected chars to have when no room to add surrogate pair");
 955                             chars -= 2;                                        // Didn't use either surrogate
 956                         }
 957                         ThrowBytesOverflow(encoder, bytes == byteStart);    // Throw maybe (if no bytes written)
 958                         charLeftOver = (char)0;                             // we'll retry it later
 959                         break;                                               // Didn't throw, but stop 'til next time.
 960                     }
 961
 962                     if (bigEndian)
 963                     {
 964                         *(bytes++) = (byte)(charLeftOver >> 8);
 965                         *(bytes++) = (byte)charLeftOver;
 966                     }
 967                     else
 968                     {
 969                         *(bytes++) = (byte)charLeftOver;
 970                         *(bytes++) = (byte)(charLeftOver >> 8);
 971                     }
 972
 973                     charLeftOver = (char)0;
 974                 }
 975                 else if (charLeftOver > 0)
 976                 {
 977                     // Expected a low surrogate, but this char is normal
 978
 979                     // Rewind the current character, fallback previous character.
 980                     // this should be safe because we don't have leftover data in the
 981                     // fallback, so chars must have advanced already.
 982                     Debug.Assert(chars > charStart,
 983                         "[UnicodeEncoding.GetBytes]Expected chars to have advanced after expecting low surrogate");
 984                     chars--;
 985
 986                     // fallback previous chars
 987                     // Might need to create our fallback buffer
 988                     if (fallbackBuffer == null)
 989                     {
 990                         if (encoder == null)
 991                             fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
 992                         else
 993                             fallbackBuffer = encoder.FallbackBuffer;
 994
 995                         // Set our internal fallback interesting things.
 996                         fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
 997                     }
 998
 999                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1000                     fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
1001                     chars = charsForFallback;
1002
1003                     // Ignore charLeftOver or throw
1004                     charLeftOver = (char)0;
1005                     continue;
1006                 }
1007
1008                 // Ok, we have a char to add
1009                 if (bytes + 1 >= byteEnd)
1010                 {
1011                     // Couldn't add this char
1012                     if (fallbackBuffer != null && fallbackBuffer.bFallingBack)
1013                         fallbackBuffer.MovePrevious();                     // Not using this fallback char
1014                     else
1015                     {
1016                         // Lonely charLeftOver (from previous call) would've been caught up above,
1017                         // so this must be a case where we've already read an input char.
1018                         Debug.Assert(chars > charStart,
1019                             "[UnicodeEncoding.GetBytes]Expected chars to have advanced for failed fallback");
1020                         chars--;                                         // Not using this char
1021                     }
1022                     ThrowBytesOverflow(encoder, bytes == byteStart);    // Throw maybe (if no bytes written)
1023                     break;                                               // didn't throw, just stop
1024                 }
1025
1026                 if (bigEndian)
1027                 {
1028                     *(bytes++) = (byte)(ch >> 8);
1029                     *(bytes++) = (byte)ch;
1030                 }
1031                 else
1032                 {
1033                     *(bytes++) = (byte)ch;
1034                     *(bytes++) = (byte)(ch >> 8);
1035                 }
1036             }
1037
1038             // Don't allocate space for left over char
1039             if (charLeftOver > 0)
1040             {
1041                 // If we aren't flushing we need to fall this back
1042                 if (encoder == null || encoder.MustFlush)
1043                 {
1044                     if (wasHereBefore)
1045                     {
1046                         // Throw it, using our complete character
1047                         throw new ArgumentException(
1048                             SR.Format(SR.Argument_RecursiveFallback, charLeftOver), nameof(chars));
1049                     }
1050                     else
1051                     {
1052                         // If we have to flush, stick it in fallback and try again
1053                         // Might need to create our fallback buffer
1054                         if (fallbackBuffer == null)
1055                         {
1056                             if (encoder == null)
1057                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
1058                             else
1059                                 fallbackBuffer = encoder.FallbackBuffer;
1060
1061                             // Set our internal fallback interesting things.
1062                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
1063                         }
1064
1065                         // If we're not flushing, that'll remember the left over character.
1066                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1067                         fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
1068                         chars = charsForFallback;
1069
1070                         charLeftOver = (char)0;
1071                         wasHereBefore = true;
1072                         goto TryAgain;
1073                     }
1074                 }
1075             }
1076
1077             // Not flushing, remember it in the encoder
1078             if (encoder != null)
1079             {
1080                 encoder._charLeftOver = charLeftOver;
1081                 encoder._charsUsed = (int)(chars - charStart);
1082             }
1083
1084             // Remember charLeftOver if we must, or clear it if we're flushing
1085             // (charLeftOver should be 0 if we're flushing)
1086             Debug.Assert((encoder != null && !encoder.MustFlush) || charLeftOver == (char)0,
1087                 "[UnicodeEncoding.GetBytes] Expected no left over characters if flushing");
1088
1089             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0 ||
1090                 encoder == null || !encoder._throwOnOverflow,
1091                 "[UnicodeEncoding.GetBytes]Expected empty fallback buffer if not converting");
1092
1093             // We used to copy it fast, but this doesn't check for surrogates
1094             // System.IO.__UnmanagedMemoryStream.memcpyimpl(bytes, (byte*)chars, usedByteCount);
1095
1096             return (int)(bytes - byteStart);
1097         }
1098
1099         internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
1100         {
1101             Debug.Assert(bytes != null, "[UnicodeEncoding.GetCharCount]bytes!=null");
1102             Debug.Assert(count >= 0, "[UnicodeEncoding.GetCharCount]count >=0");
1103
1104             UnicodeEncoding.Decoder decoder = (UnicodeEncoding.Decoder)baseDecoder;
1105
1106             byte* byteEnd = bytes + count;
1107             byte* byteStart = bytes;
1108
1109             // Need last vars
1110             int lastByte = -1;
1111             char lastChar = (char)0;
1112
1113             // Start by assuming same # of chars as bytes
1114             int charCount = count >> 1;
1115
1116             // Need -1 to check 2 at a time.  If we have an even #, longBytes will go
1117             // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longBytes
1118             // will go from longEnd - 1 long to longEnd. (Might not get to use this)
1119             ulong* longEnd = (ulong*)(byteEnd - 7);
1120
1121             // For fallback we may need a fallback buffer
1122             DecoderFallbackBuffer fallbackBuffer = null;
1123
1124             if (decoder != null)
1125             {
1126                 lastByte = decoder.lastByte;
1127                 lastChar = decoder.lastChar;
1128
1129                 // Assume extra char if last char was around
1130                 if (lastChar > 0)
1131                     charCount++;
1132
1133                 // Assume extra char if extra last byte makes up odd # of input bytes
1134                 if (lastByte >= 0 && (count & 1) == 1)
1135                 {
1136                     charCount++;
1137                 }
1138
1139                 // Shouldn't have anything in fallback buffer for GetCharCount
1140                 // (don't have to check _throwOnOverflow for count)
1141                 Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
1142                     "[UnicodeEncoding.GetCharCount]Expected empty fallback buffer at start");
1143             }
1144
1145             while (bytes < byteEnd)
1146             {
1147                 // If we're aligned then maybe we can do it fast
1148                 // That'll hurt if we're unaligned because we'll always test but never be aligned
1149 #if !NO_FAST_UNICODE_LOOP
1150 #if BIGENDIAN
1151                 if (bigEndian &&
1152 #else // BIGENDIAN
1153                 if (!bigEndian &&
1154 #endif // BIGENDIAN
1155 #if BIT64 // win64 has to be long aligned
1156                     (unchecked((long)bytes) & 7) == 0 &&
1157 #else
1158                     (unchecked((int)bytes) & 3) == 0 &&
1159 #endif // BIT64
1160                     lastByte == -1 && lastChar == 0)
1161                 {
1162                     // Need new char* so we can check 4 at a time
1163                     ulong* longBytes = (ulong*)bytes;
1164
1165                     while (longBytes < longEnd)
1166                     {
1167                         // See if we potentially have surrogates (0x8000 bit set)
1168                         // (We're either big endian on a big endian machine or little endian on
1169                         // a little endian machine so that'll work)
1170                         if ((0x8000800080008000 & *longBytes) != 0)
1171                         {
1172                             // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
1173                             // 5 bits looks like 11011, then its a high or low surrogate.
1174                             // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
1175                             // Note that we expect BMP characters to be more common than surrogates
1176                             // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
1177                             ulong uTemp = (0xf800f800f800f800 & *longBytes) ^ 0xd800d800d800d800;
1178
1179                             // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
1180                             // but no clue if they're high or low.
1181                             // If each of the 4 characters are non-zero, then none are surrogates.
1182                             if ((uTemp & 0xFFFF000000000000) == 0 ||
1183                                 (uTemp & 0x0000FFFF00000000) == 0 ||
1184                                 (uTemp & 0x00000000FFFF0000) == 0 ||
1185                                 (uTemp & 0x000000000000FFFF) == 0)
1186                             {
1187                                 // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
1188                                 // or if there's 1 or 4 surrogates
1189
1190                                 // If they happen to be high/low/high/low, we may as well continue.  Check the next
1191                                 // bit to see if its set (low) or not (high) in the right pattern
1192 #if BIGENDIAN
1193                                 if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xd800dc00d800dc00) != 0)
1194 #else
1195                                 if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xdc00d800dc00d800) != 0)
1196 #endif
1197                                 {
1198                                     // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
1199                                     // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
1200
1201                                     // Drop out to the slow loop to resolve the surrogates
1202                                     break;
1203                                 }
1204                                 // else they are all surrogates in High/Low/High/Low order, so we can use them.
1205                             }
1206                             // else none are surrogates, so we can use them.
1207                         }
1208                         // else all < 0x8000 so we can use them
1209
1210                         // We can use these 4 chars.
1211                         longBytes++;
1212                     }
1213
1214                     bytes = (byte*)longBytes;
1215
1216                     if (bytes >= byteEnd)
1217                         break;
1218                 }
1219 #endif // !NO_FAST_UNICODE_LOOP
1220
1221                 // Get 1st byte
1222                 if (lastByte < 0)
1223                 {
1224                     lastByte = *bytes++;
1225                     if (bytes >= byteEnd) break;
1226                 }
1227
1228                 // Get full char
1229                 char ch;
1230                 if (bigEndian)
1231                 {
1232                     ch = (char)(lastByte << 8 | *(bytes++));
1233                 }
1234                 else
1235                 {
1236                     ch = (char)(*(bytes++) << 8 | lastByte);
1237                 }
1238                 lastByte = -1;
1239
1240                 // See if the char's valid
1241                 if (ch >= 0xd800 && ch <= 0xdfff)
1242                 {
1243                     // Was it a high surrogate?
1244                     if (ch <= 0xdbff)
1245                     {
1246                         // Its a high surrogate, if we had one then do fallback for previous one
1247                         if (lastChar > 0)
1248                         {
1249                             // Ignore previous bad high surrogate
1250                             charCount--;
1251
1252                             // Get fallback for previous high surrogate
1253                             // Note we have to reconstruct bytes because some may have been in decoder
1254                             byte[] byteBuffer = null;
1255                             if (bigEndian)
1256                             {
1257                                 byteBuffer = new byte[]
1258                                     { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1259                             }
1260                             else
1261                             {
1262                                 byteBuffer = new byte[]
1263                                     { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1264                             }
1265
1266                             if (fallbackBuffer == null)
1267                             {
1268                                 if (decoder == null)
1269                                     fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1270                                 else
1271                                     fallbackBuffer = decoder.FallbackBuffer;
1272
1273                                 // Set our internal fallback interesting things.
1274                                 fallbackBuffer.InternalInitialize(byteStart, null);
1275                             }
1276
1277                             // Get fallback.
1278                             charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1279                         }
1280
1281                         // Ignore the last one which fell back already,
1282                         // and remember the new high surrogate
1283                         lastChar = ch;
1284                         continue;
1285                     }
1286
1287                     // Its a low surrogate
1288                     if (lastChar == 0)
1289                     {
1290                         // Expected a previous high surrogate
1291                         charCount--;
1292
1293                         // Get fallback for this low surrogate
1294                         // Note we have to reconstruct bytes because some may have been in decoder
1295                         byte[] byteBuffer = null;
1296                         if (bigEndian)
1297                         {
1298                             byteBuffer = new byte[]
1299                                 { unchecked((byte)(ch >> 8)), unchecked((byte)ch) };
1300                         }
1301                         else
1302                         {
1303                             byteBuffer = new byte[]
1304                                 { unchecked((byte)ch), unchecked((byte)(ch >> 8)) };
1305                         }
1306
1307                         if (fallbackBuffer == null)
1308                         {
1309                             if (decoder == null)
1310                                 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1311                             else
1312                                 fallbackBuffer = decoder.FallbackBuffer;
1313
1314                             // Set our internal fallback interesting things.
1315                             fallbackBuffer.InternalInitialize(byteStart, null);
1316                         }
1317
1318                         charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1319
1320                         // Ignore this one (we already did its fallback)
1321                         continue;
1322                     }
1323
1324                     // Valid surrogate pair, already counted.
1325                     lastChar = (char)0;
1326                 }
1327                 else if (lastChar > 0)
1328                 {
1329                     // Had a high surrogate, expected a low surrogate
1330                     // Un-count the last high surrogate
1331                     charCount--;
1332
1333                     // fall back the high surrogate.
1334                     byte[] byteBuffer = null;
1335                     if (bigEndian)
1336                     {
1337                         byteBuffer = new byte[]
1338                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1339                     }
1340                     else
1341                     {
1342                         byteBuffer = new byte[]
1343                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1344                     }
1345
1346                     if (fallbackBuffer == null)
1347                     {
1348                         if (decoder == null)
1349                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1350                         else
1351                             fallbackBuffer = decoder.FallbackBuffer;
1352
1353                         // Set our internal fallback interesting things.
1354                         fallbackBuffer.InternalInitialize(byteStart, null);
1355                     }
1356
1357                     // Already subtracted high surrogate
1358                     charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1359
1360                     // Not left over now, clear previous high surrogate and continue to add current char
1361                     lastChar = (char)0;
1362                 }
1363
1364                 // Valid char, already counted
1365             }
1366
1367             // Extra space if we can't use decoder
1368             if (decoder == null || decoder.MustFlush)
1369             {
1370                 if (lastChar > 0)
1371                 {
1372                     // No hanging high surrogates allowed, do fallback and remove count for it
1373                     charCount--;
1374                     byte[] byteBuffer = null;
1375                     if (bigEndian)
1376                     {
1377                         byteBuffer = new byte[]
1378                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1379                     }
1380                     else
1381                     {
1382                         byteBuffer = new byte[]
1383                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1384                     }
1385
1386                     if (fallbackBuffer == null)
1387                     {
1388                         if (decoder == null)
1389                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1390                         else
1391                             fallbackBuffer = decoder.FallbackBuffer;
1392
1393                         // Set our internal fallback interesting things.
1394                         fallbackBuffer.InternalInitialize(byteStart, null);
1395                     }
1396
1397                     charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1398
1399                     lastChar = (char)0;
1400                 }
1401
1402                 if (lastByte >= 0)
1403                 {
1404                     if (fallbackBuffer == null)
1405                     {
1406                         if (decoder == null)
1407                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1408                         else
1409                             fallbackBuffer = decoder.FallbackBuffer;
1410
1411                         // Set our internal fallback interesting things.
1412                         fallbackBuffer.InternalInitialize(byteStart, null);
1413                     }
1414
1415                     // No hanging odd bytes allowed if must flush
1416                     charCount += fallbackBuffer.InternalFallback(new byte[] { unchecked((byte)lastByte) }, bytes);
1417                     lastByte = -1;
1418                 }
1419             }
1420
1421             // If we had a high surrogate left over, we can't count it
1422             if (lastChar > 0)
1423                 charCount--;
1424
1425             // Shouldn't have anything in fallback buffer for GetCharCount
1426             // (don't have to check _throwOnOverflow for count)
1427             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
1428                 "[UnicodeEncoding.GetCharCount]Expected empty fallback buffer at end");
1429
1430             return charCount;
1431         }
1432
1433         internal override unsafe int GetChars(byte* bytes, int byteCount,
1434                                                 char* chars, int charCount, DecoderNLS baseDecoder)
1435         {
1436             Debug.Assert(chars != null, "[UnicodeEncoding.GetChars]chars!=null");
1437             Debug.Assert(byteCount >= 0, "[UnicodeEncoding.GetChars]byteCount >=0");
1438             Debug.Assert(charCount >= 0, "[UnicodeEncoding.GetChars]charCount >=0");
1439             Debug.Assert(bytes != null, "[UnicodeEncoding.GetChars]bytes!=null");
1440
1441             UnicodeEncoding.Decoder decoder = (UnicodeEncoding.Decoder)baseDecoder;
1442
1443             // Need last vars
1444             int lastByte = -1;
1445             char lastChar = (char)0;
1446
1447             // Get our decoder (but don't clear it yet)
1448             if (decoder != null)
1449             {
1450                 lastByte = decoder.lastByte;
1451                 lastChar = decoder.lastChar;
1452
1453                 // Shouldn't have anything in fallback buffer for GetChars
1454                 // (don't have to check _throwOnOverflow for chars)
1455                 Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
1456                     "[UnicodeEncoding.GetChars]Expected empty fallback buffer at start");
1457             }
1458
1459             // For fallback we may need a fallback buffer
1460             DecoderFallbackBuffer fallbackBuffer = null;
1461             char* charsForFallback;
1462
1463             byte* byteEnd = bytes + byteCount;
1464             char* charEnd = chars + charCount;
1465             byte* byteStart = bytes;
1466             char* charStart = chars;
1467
1468             while (bytes < byteEnd)
1469             {
1470                 // If we're aligned then maybe we can do it fast
1471                 // That'll hurt if we're unaligned because we'll always test but never be aligned
1472 #if !NO_FAST_UNICODE_LOOP
1473 #if BIGENDIAN
1474                 if (bigEndian &&
1475 #else // BIGENDIAN
1476                 if (!bigEndian &&
1477 #endif // BIGENDIAN
1478 #if BIT64 // win64 has to be long aligned
1479                     (unchecked((long)chars) & 7) == 0 && (unchecked((long)bytes) & 7) == 0 &&
1480 #else
1481                     (unchecked((int)chars) & 3) == 0 && (unchecked((int)bytes) & 3) == 0 &&
1482 #endif // BIT64
1483                     lastByte == -1 && lastChar == 0)
1484                 {
1485                     // Need -1 to check 2 at a time.  If we have an even #, longChars will go
1486                     // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longChars
1487                     // will go from longEnd - 1 long to longEnd. (Might not get to use this)
1488                     // We can only go iCount units (limited by shorter of char or byte buffers.
1489                     ulong* longEnd = (ulong*)(bytes - 7 +
1490                                                 (((byteEnd - bytes) >> 1 < charEnd - chars) ?
1491                                                   (byteEnd - bytes) : (charEnd - chars) << 1));
1492
1493                     // Need new char* so we can check 4 at a time
1494                     ulong* longBytes = (ulong*)bytes;
1495                     ulong* longChars = (ulong*)chars;
1496
1497                     while (longBytes < longEnd)
1498                     {
1499                         // See if we potentially have surrogates (0x8000 bit set)
1500                         // (We're either big endian on a big endian machine or little endian on
1501                         // a little endian machine so that'll work)
1502                         if ((0x8000800080008000 & *longBytes) != 0)
1503                         {
1504                             // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
1505                             // 5 bits looks like 11011, then its a high or low surrogate.
1506                             // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
1507                             // Note that we expect BMP characters to be more common than surrogates
1508                             // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
1509                             ulong uTemp = (0xf800f800f800f800 & *longBytes) ^ 0xd800d800d800d800;
1510
1511                             // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
1512                             // but no clue if they're high or low.
1513                             // If each of the 4 characters are non-zero, then none are surrogates.
1514                             if ((uTemp & 0xFFFF000000000000) == 0 ||
1515                                 (uTemp & 0x0000FFFF00000000) == 0 ||
1516                                 (uTemp & 0x00000000FFFF0000) == 0 ||
1517                                 (uTemp & 0x000000000000FFFF) == 0)
1518                             {
1519                                 // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
1520                                 // or if there's 1 or 4 surrogates
1521
1522                                 // If they happen to be high/low/high/low, we may as well continue.  Check the next
1523                                 // bit to see if its set (low) or not (high) in the right pattern
1524 #if BIGENDIAN
1525                                 if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xd800dc00d800dc00) != 0)
1526 #else
1527                                 if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xdc00d800dc00d800) != 0)
1528 #endif
1529                                 {
1530                                     // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
1531                                     // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
1532
1533                                     // Drop out to the slow loop to resolve the surrogates
1534                                     break;
1535                                 }
1536                                 // else they are all surrogates in High/Low/High/Low order, so we can use them.
1537                             }
1538                             // else none are surrogates, so we can use them.
1539                         }
1540                         // else all < 0x8000 so we can use them
1541
1542                         // We can use these 4 chars.
1543                         *longChars = *longBytes;
1544                         longBytes++;
1545                         longChars++;
1546                     }
1547
1548                     chars = (char*)longChars;
1549                     bytes = (byte*)longBytes;
1550
1551                     if (bytes >= byteEnd)
1552                         break;
1553                 }
1554 #endif // !NO_FAST_UNICODE_LOOP
1555
1556                 // Get 1st byte
1557                 if (lastByte < 0)
1558                 {
1559                     lastByte = *bytes++;
1560                     continue;
1561                 }
1562
1563                 // Get full char
1564                 char ch;
1565                 if (bigEndian)
1566                 {
1567                     ch = (char)(lastByte << 8 | *(bytes++));
1568                 }
1569                 else
1570                 {
1571                     ch = (char)(*(bytes++) << 8 | lastByte);
1572                 }
1573                 lastByte = -1;
1574
1575                 // See if the char's valid
1576                 if (ch >= 0xd800 && ch <= 0xdfff)
1577                 {
1578                     // Was it a high surrogate?
1579                     if (ch <= 0xdbff)
1580                     {
1581                         // Its a high surrogate, if we had one then do fallback for previous one
1582                         if (lastChar > 0)
1583                         {
1584                             // Get fallback for previous high surrogate
1585                             // Note we have to reconstruct bytes because some may have been in decoder
1586                             byte[] byteBuffer = null;
1587                             if (bigEndian)
1588                             {
1589                                 byteBuffer = new byte[]
1590                                     { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1591                             }
1592                             else
1593                             {
1594                                 byteBuffer = new byte[]
1595                                     { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1596                             }
1597
1598                             if (fallbackBuffer == null)
1599                             {
1600                                 if (decoder == null)
1601                                     fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1602                                 else
1603                                     fallbackBuffer = decoder.FallbackBuffer;
1604
1605                                 // Set our internal fallback interesting things.
1606                                 fallbackBuffer.InternalInitialize(byteStart, charEnd);
1607                             }
1608
1609                             charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1610                             bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1611                             chars = charsForFallback;
1612
1613                             if (!fallbackResult)
1614                             {
1615                                 // couldn't fall back lonely surrogate
1616                                 // We either advanced bytes or chars should == charStart and throw below
1617                                 Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1618                                     "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (bad surrogate)");
1619                                 bytes -= 2;                                       // didn't use these 2 bytes
1620                                 fallbackBuffer.InternalReset();
1621                                 ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1622                                 break;                                          // couldn't fallback but didn't throw
1623                             }
1624                         }
1625
1626                         // Ignore the previous high surrogate which fell back already,
1627                         // yet remember the current high surrogate for next time.
1628                         lastChar = ch;
1629                         continue;
1630                     }
1631
1632                     // Its a low surrogate
1633                     if (lastChar == 0)
1634                     {
1635                         // Expected a previous high surrogate
1636                         // Get fallback for this low surrogate
1637                         // Note we have to reconstruct bytes because some may have been in decoder
1638                         byte[] byteBuffer = null;
1639                         if (bigEndian)
1640                         {
1641                             byteBuffer = new byte[]
1642                                 { unchecked((byte)(ch >> 8)), unchecked((byte)ch) };
1643                         }
1644                         else
1645                         {
1646                             byteBuffer = new byte[]
1647                                 { unchecked((byte)ch), unchecked((byte)(ch >> 8)) };
1648                         }
1649
1650                         if (fallbackBuffer == null)
1651                         {
1652                             if (decoder == null)
1653                                 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1654                             else
1655                                 fallbackBuffer = decoder.FallbackBuffer;
1656
1657                             // Set our internal fallback interesting things.
1658                             fallbackBuffer.InternalInitialize(byteStart, charEnd);
1659                         }
1660
1661                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1662                         bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1663                         chars = charsForFallback;
1664
1665                         if (!fallbackResult)
1666                         {
1667                             // couldn't fall back lonely surrogate
1668                             // We either advanced bytes or chars should == charStart and throw below
1669                             Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1670                                 "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (lonely surrogate)");
1671                             bytes -= 2;                                       // didn't use these 2 bytes
1672                             fallbackBuffer.InternalReset();
1673                             ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1674                             break;                                          // couldn't fallback but didn't throw
1675                         }
1676
1677                         // Didn't throw, ignore this one (we already did its fallback)
1678                         continue;
1679                     }
1680
1681                     // Valid surrogate pair, add our lastChar (will need 2 chars)
1682                     if (chars >= charEnd - 1)
1683                     {
1684                         // couldn't find room for this surrogate pair
1685                         // We either advanced bytes or chars should == charStart and throw below
1686                         Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1687                             "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (surrogate pair)");
1688                         bytes -= 2;                                       // didn't use these 2 bytes
1689                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1690                         // Leave lastChar for next call to Convert()
1691                         break;                                          // couldn't fallback but didn't throw
1692                     }
1693
1694                     *chars++ = lastChar;
1695                     lastChar = (char)0;
1696                 }
1697                 else if (lastChar > 0)
1698                 {
1699                     // Had a high surrogate, expected a low surrogate, fall back the high surrogate.
1700                     byte[] byteBuffer = null;
1701                     if (bigEndian)
1702                     {
1703                         byteBuffer = new byte[]
1704                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1705                     }
1706                     else
1707                     {
1708                         byteBuffer = new byte[]
1709                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1710                     }
1711
1712                     if (fallbackBuffer == null)
1713                     {
1714                         if (decoder == null)
1715                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1716                         else
1717                             fallbackBuffer = decoder.FallbackBuffer;
1718
1719                         // Set our internal fallback interesting things.
1720                         fallbackBuffer.InternalInitialize(byteStart, charEnd);
1721                     }
1722
1723                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1724                     bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1725                     chars = charsForFallback;
1726
1727                     if (!fallbackResult)
1728                     {
1729                         // couldn't fall back high surrogate, or char that would be next
1730                         // We either advanced bytes or chars should == charStart and throw below
1731                         Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1732                             "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (no low surrogate)");
1733                         bytes -= 2;                                       // didn't use these 2 bytes
1734                         fallbackBuffer.InternalReset();
1735                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1736                         break;                                          // couldn't fallback but didn't throw
1737                     }
1738
1739                     // Not left over now, clear previous high surrogate and continue to add current char
1740                     lastChar = (char)0;
1741                 }
1742
1743                 // Valid char, room for it?
1744                 if (chars >= charEnd)
1745                 {
1746                     // 2 bytes couldn't fall back
1747                     // We either advanced bytes or chars should == charStart and throw below
1748                     Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1749                         "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (normal)");
1750                     bytes -= 2;                                       // didn't use these bytes
1751                     ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1752                     break;                                          // couldn't fallback but didn't throw
1753                 }
1754
1755                 // add it
1756                 *chars++ = ch;
1757             }
1758
1759             // Remember our decoder if we must
1760             if (decoder == null || decoder.MustFlush)
1761             {
1762                 if (lastChar > 0)
1763                 {
1764                     // No hanging high surrogates allowed, do fallback and remove count for it
1765                     byte[] byteBuffer = null;
1766                     if (bigEndian)
1767                     {
1768                         byteBuffer = new byte[]
1769                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1770                     }
1771                     else
1772                     {
1773                         byteBuffer = new byte[]
1774                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1775                     }
1776
1777                     if (fallbackBuffer == null)
1778                     {
1779                         if (decoder == null)
1780                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1781                         else
1782                             fallbackBuffer = decoder.FallbackBuffer;
1783
1784                         // Set our internal fallback interesting things.
1785                         fallbackBuffer.InternalInitialize(byteStart, charEnd);
1786                     }
1787
1788                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1789                     bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1790                     chars = charsForFallback;
1791
1792                     if (!fallbackResult)
1793                     {
1794                         // 2 bytes couldn't fall back
1795                         // We either advanced bytes or chars should == charStart and throw below
1796                         Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1797                             "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (decoder)");
1798                         bytes -= 2;                                       // didn't use these bytes
1799                         if (lastByte >= 0)
1800                             bytes--;                                    // had an extra last byte hanging around
1801                         fallbackBuffer.InternalReset();
1802                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1803                         // We'll remember these in our decoder though
1804                         bytes += 2;
1805                         if (lastByte >= 0)
1806                             bytes++;
1807                         goto End;
1808                     }
1809
1810                     // done with this one
1811                     lastChar = (char)0;
1812                 }
1813
1814                 if (lastByte >= 0)
1815                 {
1816                     if (fallbackBuffer == null)
1817                     {
1818                         if (decoder == null)
1819                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1820                         else
1821                             fallbackBuffer = decoder.FallbackBuffer;
1822
1823                         // Set our internal fallback interesting things.
1824                         fallbackBuffer.InternalInitialize(byteStart, charEnd);
1825                     }
1826
1827                     // No hanging odd bytes allowed if must flush
1828                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1829                     bool fallbackResult = fallbackBuffer.InternalFallback(new byte[] { unchecked((byte)lastByte) }, bytes, ref charsForFallback);
1830                     chars = charsForFallback;
1831
1832                     if (!fallbackResult)
1833                     {
1834                         // odd byte couldn't fall back
1835                         bytes--;                                        // didn't use this byte
1836                         fallbackBuffer.InternalReset();
1837                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1838                         // didn't throw, but we'll remember it in the decoder
1839                         bytes++;
1840                         goto End;
1841                     }
1842
1843                     // Didn't fail, clear buffer
1844                     lastByte = -1;
1845                 }
1846             }
1847
1848         End:
1849
1850             // Remember our decoder if we must
1851             if (decoder != null)
1852             {
1853                 Debug.Assert((decoder.MustFlush == false) || ((lastChar == (char)0) && (lastByte == -1)),
1854                     "[UnicodeEncoding.GetChars] Expected no left over chars or bytes if flushing"
1855                     //                    + " " + ((int)lastChar).ToString("X4") + " " + lastByte.ToString("X2")
1856                     );
1857
1858                 decoder._bytesUsed = (int)(bytes - byteStart);
1859                 decoder.lastChar = lastChar;
1860                 decoder.lastByte = lastByte;
1861             }
1862
1863             // Used to do this the old way
1864             // System.IO.__UnmanagedMemoryStream.memcpyimpl((byte*)chars, bytes, byteCount);
1865
1866             // Shouldn't have anything in fallback buffer for GetChars
1867             // (don't have to check _throwOnOverflow for count or chars)
1868             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
1869                 "[UnicodeEncoding.GetChars]Expected empty fallback buffer at end");
1870
1871             return (int)(chars - charStart);
1872         }
1873
1874
1875         public override System.Text.Encoder GetEncoder()
1876         {
1877             return new EncoderNLS(this);
1878         }
1879
1880
1881         public override System.Text.Decoder GetDecoder()
1882         {
1883             return new UnicodeEncoding.Decoder(this);
1884         }
1885
1886
1887         public override byte[] GetPreamble()
1888         {
1889             if (byteOrderMark)
1890             {
1891                 // Note - we must allocate new byte[]'s here to prevent someone
1892                 // from modifying a cached byte[].
1893                 if (bigEndian)
1894                     return new byte[2] { 0xfe, 0xff };
1895                 else
1896                     return new byte[2] { 0xff, 0xfe };
1897             }
1898             return Array.Empty<Byte>();
1899         }
1900
1901
1902         public override int GetMaxByteCount(int charCount)
1903         {
1904             if (charCount < 0)
1905                 throw new ArgumentOutOfRangeException(nameof(charCount),
1906                      SR.ArgumentOutOfRange_NeedNonNegNum);
1907             Contract.EndContractBlock();
1908
1909             // Characters would be # of characters + 1 in case left over high surrogate is ? * max fallback
1910             long byteCount = (long)charCount + 1;
1911
1912             if (EncoderFallback.MaxCharCount > 1)
1913                 byteCount *= EncoderFallback.MaxCharCount;
1914
1915             // 2 bytes per char
1916             byteCount <<= 1;
1917
1918             if (byteCount > 0x7fffffff)
1919                 throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
1920
1921             return (int)byteCount;
1922         }
1923
1924
1925         public override int GetMaxCharCount(int byteCount)
1926         {
1927             if (byteCount < 0)
1928                 throw new ArgumentOutOfRangeException(nameof(byteCount),
1929                      SR.ArgumentOutOfRange_NeedNonNegNum);
1930             Contract.EndContractBlock();
1931
1932             // long because byteCount could be biggest int.
1933             // 1 char per 2 bytes.  Round up in case 1 left over in decoder.
1934             // Round up using &1 in case byteCount is max size
1935             // Might also need an extra 1 if there's a left over high surrogate in the decoder.
1936             long charCount = (long)(byteCount >> 1) + (byteCount & 1) + 1;
1937
1938             // Don't forget fallback (in case they have a bunch of lonely surrogates or something bizarre like that)
1939             if (DecoderFallback.MaxCharCount > 1)
1940                 charCount *= DecoderFallback.MaxCharCount;
1941
1942             if (charCount > 0x7fffffff)
1943                 throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_GetCharCountOverflow);
1944
1945             return (int)charCount;
1946         }
1947
1948
1949         public override bool Equals(Object value)
1950         {
1951             UnicodeEncoding that = value as UnicodeEncoding;
1952             if (that != null)
1953             {
1954                 //
1955                 // Big Endian Unicode has different code page (1201) than small Endian one (1200),
1956                 // so we still have to check _codePage here.
1957                 //
1958                 return (CodePage == that.CodePage) &&
1959                         byteOrderMark == that.byteOrderMark &&
1960                         //                        isThrowException == that.isThrowException &&  // Same as Encoder/Decoder being exception fallbacks
1961                         bigEndian == that.bigEndian &&
1962                        (EncoderFallback.Equals(that.EncoderFallback)) &&
1963                        (DecoderFallback.Equals(that.DecoderFallback));
1964             }
1965             return (false);
1966         }
1967
1968         public override int GetHashCode()
1969         {
1970             return CodePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode() +
1971                    (byteOrderMark ? 4 : 0) + (bigEndian ? 8 : 0);
1972         }
1973
1974         private sealed class Decoder : System.Text.DecoderNLS
1975         {
1976             internal int lastByte = -1;
1977             internal char lastChar = '\0';
1978
1979             public Decoder(UnicodeEncoding encoding) : base(encoding)
1980             {
1981                 // base calls reset
1982             }
1983
1984             public override void Reset()
1985             {
1986                 lastByte = -1;
1987                 lastChar = '\0';
1988                 if (_fallbackBuffer != null)
1989                     _fallbackBuffer.Reset();
1990             }
1991
1992             // Anything left in our decoder?
1993             internal override bool HasState
1994             {
1995                 get
1996                 {
1997                     return (this.lastByte != -1 || this.lastChar != '\0');
1998                 }
1999             }
2000         }
2001     }
2002 }
2003