src/mscorlib/shared/System/Globalization/TextInfo.cs

   1 // Licensed to the .NET Foundation under one or more agreements.
   2 // The .NET Foundation licenses this file to you under the MIT license.
   3 // See the LICENSE file in the project root for more information.
   4
   5 ////////////////////////////////////////////////////////////////////////////
   6 //
   7 //
   8 //  Purpose:  This Class defines behaviors specific to a writing system.
   9 //            A writing system is the collection of scripts and
  10 //            orthographic rules required to represent a language as text.
  11 //
  12 //
  13 ////////////////////////////////////////////////////////////////////////////
  14
  15 using System.Diagnostics;
  16 using System.Runtime.Serialization;
  17 using System.Text;
  18
  19 namespace System.Globalization
  20 {
  21     public partial class TextInfo : ICloneable, IDeserializationCallback
  22     {
  23         private enum Tristate : byte
  24         {
  25             NotInitialized,
  26             True,
  27             False,
  28         }
  29
  30         private string _listSeparator;
  31         private bool _isReadOnly = false;
  32
  33         /*    _cultureName is the name of the creating culture.
  34               _cultureData is the data that backs this class.
  35               _textInfoName is the actual name of the textInfo (from cultureData.STEXTINFO)
  36                       In the desktop, when we call the sorting dll, it doesn't
  37                       know how to resolve custom locle names to sort ids so we have to have already resolved this.
  38         */
  39
  40         private readonly string _cultureName;      // Name of the culture that created this text info
  41         private readonly CultureData _cultureData; // Data record for the culture that made us, not for this textinfo
  42         private readonly string _textInfoName;     // Name of the text info we're using (ie: _cultureData.STEXTINFO)
  43
  44         private Tristate _isAsciiCasingSameAsInvariant = Tristate.NotInitialized;
  45
  46         // _invariantMode is defined for the perf reason as accessing the instance field is faster than access the static property GlobalizationMode.Invariant
  47         private readonly bool _invariantMode = GlobalizationMode.Invariant;
  48
  49         // Invariant text info
  50         internal static TextInfo Invariant
  51         {
  52             get
  53             {
  54                 if (s_Invariant == null)
  55                     s_Invariant = new TextInfo(CultureData.Invariant);
  56                 return s_Invariant;
  57             }
  58         }
  59         internal volatile static TextInfo s_Invariant;
  60
  61         //////////////////////////////////////////////////////////////////////////
  62         ////
  63         ////  TextInfo Constructors
  64         ////
  65         ////  Implements CultureInfo.TextInfo.
  66         ////
  67         //////////////////////////////////////////////////////////////////////////
  68         internal TextInfo(CultureData cultureData)
  69         {
  70             // This is our primary data source, we don't need most of the rest of this
  71             _cultureData = cultureData;
  72             _cultureName = _cultureData.CultureName;
  73             _textInfoName = _cultureData.STEXTINFO;
  74
  75             FinishInitialization();
  76         }
  77
  78         void IDeserializationCallback.OnDeserialization(Object sender)
  79         {
  80             throw new PlatformNotSupportedException();
  81         }
  82
  83         //
  84         // Internal ordinal comparison functions
  85         //
  86
  87         internal static int GetHashCodeOrdinalIgnoreCase(string s)
  88         {
  89             // This is the same as an case insensitive hash for Invariant
  90             // (not necessarily true for sorting, but OK for casing & then we apply normal hash code rules)
  91             return Invariant.GetCaseInsensitiveHashCode(s);
  92         }
  93
  94         // Currently we don't have native functions to do this, so we do it the hard way
  95         internal static int IndexOfStringOrdinalIgnoreCase(string source, string value, int startIndex, int count)
  96         {
  97             if (count > source.Length || count < 0 || startIndex < 0 || startIndex > source.Length - count)
  98             {
  99                 return -1;
 100             }
 101
 102             return CultureInfo.InvariantCulture.CompareInfo.IndexOfOrdinal(source, value, startIndex, count, ignoreCase: true);
 103         }
 104
 105         // Currently we don't have native functions to do this, so we do it the hard way
 106         internal static int LastIndexOfStringOrdinalIgnoreCase(string source, string value, int startIndex, int count)
 107         {
 108             if (count > source.Length || count < 0 || startIndex < 0 || startIndex > source.Length - 1 || (startIndex - count + 1 < 0))
 109             {
 110                 return -1;
 111             }
 112
 113             return CultureInfo.InvariantCulture.CompareInfo.LastIndexOfOrdinal(source, value, startIndex, count, ignoreCase: true);
 114         }
 115
 116         public virtual int ANSICodePage => _cultureData.IDEFAULTANSICODEPAGE;
 117
 118         public virtual int OEMCodePage => _cultureData.IDEFAULTOEMCODEPAGE;
 119
 120         public virtual int MacCodePage => _cultureData.IDEFAULTMACCODEPAGE;
 121
 122         public virtual int EBCDICCodePage => _cultureData.IDEFAULTEBCDICCODEPAGE;
 123
 124         // Just use the LCID from our text info name
 125         public int LCID => CultureInfo.GetCultureInfo(_textInfoName).LCID;
 126
 127         public string CultureName => _textInfoName;
 128
 129         public bool IsReadOnly => _isReadOnly;
 130
 131         //////////////////////////////////////////////////////////////////////////
 132         ////
 133         ////  Clone
 134         ////
 135         ////  Is the implementation of ICloneable.
 136         ////
 137         //////////////////////////////////////////////////////////////////////////
 138         public virtual object Clone()
 139         {
 140             object o = MemberwiseClone();
 141             ((TextInfo)o).SetReadOnlyState(false);
 142             return o;
 143         }
 144
 145         ////////////////////////////////////////////////////////////////////////
 146         //
 147         //  ReadOnly
 148         //
 149         //  Create a cloned readonly instance or return the input one if it is
 150         //  readonly.
 151         //
 152         ////////////////////////////////////////////////////////////////////////
 153         public static TextInfo ReadOnly(TextInfo textInfo)
 154         {
 155             if (textInfo == null) { throw new ArgumentNullException(nameof(textInfo)); }
 156             if (textInfo.IsReadOnly) { return textInfo; }
 157
 158             TextInfo clonedTextInfo = (TextInfo)(textInfo.MemberwiseClone());
 159             clonedTextInfo.SetReadOnlyState(true);
 160
 161             return clonedTextInfo;
 162         }
 163
 164         private void VerifyWritable()
 165         {
 166             if (_isReadOnly)
 167             {
 168                 throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
 169             }
 170         }
 171
 172         internal void SetReadOnlyState(bool readOnly)
 173         {
 174             _isReadOnly = readOnly;
 175         }
 176
 177
 178         ////////////////////////////////////////////////////////////////////////
 179         //
 180         //  ListSeparator
 181         //
 182         //  Returns the string used to separate items in a list.
 183         //
 184         ////////////////////////////////////////////////////////////////////////
 185         public virtual string ListSeparator
 186         {
 187             get
 188             {
 189                 if (_listSeparator == null)
 190                 {
 191                     _listSeparator = _cultureData.SLIST;
 192                 }
 193                 return _listSeparator;
 194             }
 195
 196             set
 197             {
 198                 if (value == null)
 199                 {
 200                     throw new ArgumentNullException(nameof(value), SR.ArgumentNull_String);
 201                 }
 202                 VerifyWritable();
 203                 _listSeparator = value;
 204             }
 205         }
 206
 207         ////////////////////////////////////////////////////////////////////////
 208         //
 209         //  ToLower
 210         //
 211         //  Converts the character or string to lower case.  Certain locales
 212         //  have different casing semantics from the file systems in Win32.
 213         //
 214         ////////////////////////////////////////////////////////////////////////
 215         public unsafe virtual char ToLower(char c)
 216         {
 217             if (_invariantMode || (IsAscii(c) && IsAsciiCasingSameAsInvariant))
 218             {
 219                 return ToLowerAsciiInvariant(c);
 220             }
 221
 222             return ChangeCase(c, toUpper: false);
 223         }
 224
 225         public unsafe virtual string ToLower(string str)
 226         {
 227             if (str == null) { throw new ArgumentNullException(nameof(str)); }
 228
 229             if (_invariantMode)
 230             {
 231                 return ToLowerAsciiInvariant(str);
 232             }
 233
 234             return ChangeCase(str, toUpper: false);
 235         }
 236
 237         private unsafe string ToLowerAsciiInvariant(string s)
 238         {
 239             if (s.Length == 0)
 240             {
 241                 return string.Empty;
 242             }
 243
 244             fixed (char* pSource = s)
 245             {
 246                 int i = 0;
 247                 while (i < s.Length)
 248                 {
 249                     if ((uint)(pSource[i] - 'A') <= (uint)('Z' - 'A'))
 250                     {
 251                         break;
 252                     }
 253                     i++;
 254                 }
 255
 256                 if (i >= s.Length)
 257                 {
 258                     return s;
 259                 }
 260
 261                 string result = string.FastAllocateString(s.Length);
 262                 fixed (char* pResult = result)
 263                 {
 264                     for (int j = 0; j < i; j++)
 265                     {
 266                         pResult[j] = pSource[j];
 267                     }
 268
 269                     pResult[i] = (char)(pSource[i] | 0x20);
 270                     i++;
 271
 272                     while (i < s.Length)
 273                     {
 274                         pResult[i] = ToLowerAsciiInvariant(pSource[i]);
 275                         i++;
 276                     }
 277                 }
 278
 279                 return result;
 280             }
 281         }
 282
 283         private unsafe string ToUpperAsciiInvariant(string s)
 284         {
 285             if (s.Length == 0)
 286             {
 287                 return string.Empty;
 288             }
 289
 290             fixed (char* pSource = s)
 291             {
 292                 int i = 0;
 293                 while (i < s.Length)
 294                 {
 295                     if ((uint)(pSource[i] - 'a') <= (uint)('z' - 'a'))
 296                     {
 297                         break;
 298                     }
 299                     i++;
 300                 }
 301
 302                 if (i >= s.Length)
 303                 {
 304                     return s;
 305                 }
 306
 307                 string result = string.FastAllocateString(s.Length);
 308                 fixed (char* pResult = result)
 309                 {
 310                     for (int j = 0; j < i; j++)
 311                     {
 312                         pResult[j] = pSource[j];
 313                     }
 314
 315                     pResult[i] = (char)(pSource[i] & ~0x20);
 316                     i++;
 317
 318                     while (i < s.Length)
 319                     {
 320                         pResult[i] = ToUpperAsciiInvariant(pSource[i]);
 321                         i++;
 322                     }
 323                 }
 324
 325                 return result;
 326             }
 327         }
 328
 329         private static char ToLowerAsciiInvariant(char c)
 330         {
 331             if ((uint)(c - 'A') <= (uint)('Z' - 'A'))
 332             {
 333                 c = (char)(c | 0x20);
 334             }
 335             return c;
 336         }
 337
 338         ////////////////////////////////////////////////////////////////////////
 339         //
 340         //  ToUpper
 341         //
 342         //  Converts the character or string to upper case.  Certain locales
 343         //  have different casing semantics from the file systems in Win32.
 344         //
 345         ////////////////////////////////////////////////////////////////////////
 346         public unsafe virtual char ToUpper(char c)
 347         {
 348             if (_invariantMode || (IsAscii(c) && IsAsciiCasingSameAsInvariant))
 349             {
 350                 return ToUpperAsciiInvariant(c);
 351             }
 352
 353             return ChangeCase(c, toUpper: true);
 354         }
 355
 356         public unsafe virtual string ToUpper(string str)
 357         {
 358             if (str == null) { throw new ArgumentNullException(nameof(str)); }
 359
 360             if (_invariantMode)
 361             {
 362                 return ToUpperAsciiInvariant(str);
 363             }
 364
 365             return ChangeCase(str, toUpper: true);
 366         }
 367
 368         internal static char ToUpperAsciiInvariant(char c)
 369         {
 370             if ((uint)(c - 'a') <= (uint)('z' - 'a'))
 371             {
 372                 c = (char)(c & ~0x20);
 373             }
 374             return c;
 375         }
 376
 377         private static bool IsAscii(char c)
 378         {
 379             return c < 0x80;
 380         }
 381
 382         private bool IsAsciiCasingSameAsInvariant
 383         {
 384             get
 385             {
 386                 if (_isAsciiCasingSameAsInvariant == Tristate.NotInitialized)
 387                 {
 388                     _isAsciiCasingSameAsInvariant = CultureInfo.GetCultureInfo(_textInfoName).CompareInfo.Compare("abcdefghijklmnopqrstuvwxyz",
 389                                                                              "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
 390                                                                              CompareOptions.IgnoreCase) == 0 ? Tristate.True : Tristate.False;
 391                 }
 392                 return _isAsciiCasingSameAsInvariant == Tristate.True;
 393             }
 394         }
 395
 396         // IsRightToLeft
 397         //
 398         // Returns true if the dominant direction of text and UI such as the relative position of buttons and scroll bars
 399         //
 400         public bool IsRightToLeft => _cultureData.IsRightToLeft;
 401
 402         ////////////////////////////////////////////////////////////////////////
 403         //
 404         //  Equals
 405         //
 406         //  Implements Object.Equals().  Returns a boolean indicating whether
 407         //  or not object refers to the same CultureInfo as the current instance.
 408         //
 409         ////////////////////////////////////////////////////////////////////////
 410         public override bool Equals(Object obj)
 411         {
 412             TextInfo that = obj as TextInfo;
 413
 414             if (that != null)
 415             {
 416                 return CultureName.Equals(that.CultureName);
 417             }
 418
 419             return false;
 420         }
 421
 422         ////////////////////////////////////////////////////////////////////////
 423         //
 424         //  GetHashCode
 425         //
 426         //  Implements Object.GetHashCode().  Returns the hash code for the
 427         //  CultureInfo.  The hash code is guaranteed to be the same for CultureInfo A
 428         //  and B where A.Equals(B) is true.
 429         //
 430         ////////////////////////////////////////////////////////////////////////
 431         public override int GetHashCode()
 432         {
 433             return CultureName.GetHashCode();
 434         }
 435
 436         ////////////////////////////////////////////////////////////////////////
 437         //
 438         //  ToString
 439         //
 440         //  Implements Object.ToString().  Returns a string describing the
 441         //  TextInfo.
 442         //
 443         ////////////////////////////////////////////////////////////////////////
 444         public override string ToString()
 445         {
 446             return "TextInfo - " + _cultureData.CultureName;
 447         }
 448
 449         //
 450         // Titlecasing:
 451         // -----------
 452         // Titlecasing refers to a casing practice wherein the first letter of a word is an uppercase letter
 453         // and the rest of the letters are lowercase.  The choice of which words to titlecase in headings
 454         // and titles is dependent on language and local conventions.  For example, "The Merry Wives of Windor"
 455         // is the appropriate titlecasing of that play's name in English, with the word "of" not titlecased.
 456         // In German, however, the title is "Die lustigen Weiber von Windsor," and both "lustigen" and "von"
 457         // are not titlecased.  In French even fewer words are titlecased: "Les joyeuses commeres de Windsor."
 458         //
 459         // Moreover, the determination of what actually constitutes a word is language dependent, and this can
 460         // influence which letter or letters of a "word" are uppercased when titlecasing strings.  For example
 461         // "l'arbre" is considered two words in French, whereas "can't" is considered one word in English.
 462         //
 463         public unsafe string ToTitleCase(string str)
 464         {
 465             if (str == null)
 466             {
 467                 throw new ArgumentNullException(nameof(str));
 468             }
 469             if (str.Length == 0)
 470             {
 471                 return str;
 472             }
 473
 474             StringBuilder result = new StringBuilder();
 475             string lowercaseData = null;
 476             // Store if the current culture is Dutch (special case)
 477             bool isDutchCulture = CultureName.StartsWith("nl-", StringComparison.OrdinalIgnoreCase);
 478
 479             for (int i = 0; i < str.Length; i++)
 480             {
 481                 UnicodeCategory charType;
 482                 int charLen;
 483
 484                 charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
 485                 if (char.CheckLetter(charType))
 486                 {
 487                     // Special case to check for Dutch specific titlecasing with "IJ" characters
 488                     // at the beginning of a word
 489                     if (isDutchCulture && i < str.Length - 1 && (str[i] == 'i' || str[i] == 'I') && (str[i+1] == 'j' || str[i+1] == 'J'))
 490                     {
 491                         result.Append("IJ");
 492                         i += 2;
 493                     }
 494                     else
 495                     {
 496                         // Do the titlecasing for the first character of the word.
 497                         i = AddTitlecaseLetter(ref result, ref str, i, charLen) + 1;
 498                     }
 499
 500                     //
 501                     // Convert the characters until the end of the this word
 502                     // to lowercase.
 503                     //
 504                     int lowercaseStart = i;
 505
 506                     //
 507                     // Use hasLowerCase flag to prevent from lowercasing acronyms (like "URT", "USA", etc)
 508                     // This is in line with Word 2000 behavior of titlecasing.
 509                     //
 510                     bool hasLowerCase = (charType == UnicodeCategory.LowercaseLetter);
 511                     // Use a loop to find all of the other letters following this letter.
 512                     while (i < str.Length)
 513                     {
 514                         charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
 515                         if (IsLetterCategory(charType))
 516                         {
 517                             if (charType == UnicodeCategory.LowercaseLetter)
 518                             {
 519                                 hasLowerCase = true;
 520                             }
 521                             i += charLen;
 522                         }
 523                         else if (str[i] == '\'')
 524                         {
 525                             i++;
 526                             if (hasLowerCase)
 527                             {
 528                                 if (lowercaseData == null)
 529                                 {
 530                                     lowercaseData = ToLower(str);
 531                                 }
 532                                 result.Append(lowercaseData, lowercaseStart, i - lowercaseStart);
 533                             }
 534                             else
 535                             {
 536                                 result.Append(str, lowercaseStart, i - lowercaseStart);
 537                             }
 538                             lowercaseStart = i;
 539                             hasLowerCase = true;
 540                         }
 541                         else if (!IsWordSeparator(charType))
 542                         {
 543                             // This category is considered to be part of the word.
 544                             // This is any category that is marked as false in wordSeprator array.
 545                             i+= charLen;
 546                         }
 547                         else
 548                         {
 549                             // A word separator. Break out of the loop.
 550                             break;
 551                         }
 552                     }
 553
 554                     int count = i - lowercaseStart;
 555
 556                     if (count > 0)
 557                     {
 558                         if (hasLowerCase)
 559                         {
 560                             if (lowercaseData == null)
 561                             {
 562                                 lowercaseData = ToLower(str);
 563                             }
 564                             result.Append(lowercaseData, lowercaseStart, count);
 565                         }
 566                         else
 567                         {
 568                             result.Append(str, lowercaseStart, count);
 569                         }
 570                     }
 571
 572                     if (i < str.Length)
 573                     {
 574                         // not a letter, just append it
 575                         i = AddNonLetter(ref result, ref str, i, charLen);
 576                     }
 577                 }
 578                 else
 579                 {
 580                     // not a letter, just append it
 581                     i = AddNonLetter(ref result, ref str, i, charLen);
 582                 }
 583             }
 584             return result.ToString();
 585         }
 586
 587         private static int AddNonLetter(ref StringBuilder result, ref string input, int inputIndex, int charLen)
 588         {
 589             Debug.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddNonLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!");
 590             if (charLen == 2)
 591             {
 592                 // Surrogate pair
 593                 result.Append(input[inputIndex++]);
 594                 result.Append(input[inputIndex]);
 595             }
 596             else
 597             {
 598                 result.Append(input[inputIndex]);
 599             }
 600             return inputIndex;
 601         }
 602
 603         private int AddTitlecaseLetter(ref StringBuilder result, ref string input, int inputIndex, int charLen)
 604         {
 605             Debug.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddTitlecaseLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!");
 606
 607             // for surrogate pairs do a simple ToUpper operation on the substring
 608             if (charLen == 2)
 609             {
 610                 // Surrogate pair
 611                 result.Append(ToUpper(input.Substring(inputIndex, charLen)));
 612                 inputIndex++;
 613             }
 614             else
 615             {
 616                 switch (input[inputIndex])
 617                 {
 618                     //
 619                     // For AppCompat, the Titlecase Case Mapping data from NDP 2.0 is used below.
 620                     case (char) 0x01C4:  // DZ with Caron -> Dz with Caron
 621                     case (char) 0x01C5:  // Dz with Caron -> Dz with Caron
 622                     case (char) 0x01C6:  // dz with Caron -> Dz with Caron
 623                         result.Append((char) 0x01C5);
 624                         break;
 625                     case (char) 0x01C7:  // LJ -> Lj
 626                     case (char) 0x01C8:  // Lj -> Lj
 627                     case (char) 0x01C9:  // lj -> Lj
 628                         result.Append((char) 0x01C8);
 629                         break;
 630                     case (char) 0x01CA:  // NJ -> Nj
 631                     case (char) 0x01CB:  // Nj -> Nj
 632                     case (char) 0x01CC:  // nj -> Nj
 633                         result.Append((char) 0x01CB);
 634                         break;
 635                     case (char) 0x01F1:  // DZ -> Dz
 636                     case (char) 0x01F2:  // Dz -> Dz
 637                     case (char) 0x01F3:  // dz -> Dz
 638                         result.Append((char) 0x01F2);
 639                         break;
 640                     default:
 641                         result.Append(ToUpper(input[inputIndex]));
 642                         break;
 643                 }
 644             }
 645             return inputIndex;
 646         }
 647
 648         //
 649         // Used in ToTitleCase():
 650         // When we find a starting letter, the following array decides if a category should be
 651         // considered as word seprator or not.
 652         //
 653         private const int c_wordSeparatorMask =
 654             /* false */ (0 <<  0) | // UppercaseLetter = 0,
 655             /* false */ (0 <<  1) | // LowercaseLetter = 1,
 656             /* false */ (0 <<  2) | // TitlecaseLetter = 2,
 657             /* false */ (0 <<  3) | // ModifierLetter = 3,
 658             /* false */ (0 <<  4) | // OtherLetter = 4,
 659             /* false */ (0 <<  5) | // NonSpacingMark = 5,
 660             /* false */ (0 <<  6) | // SpacingCombiningMark = 6,
 661             /* false */ (0 <<  7) | // EnclosingMark = 7,
 662             /* false */ (0 <<  8) | // DecimalDigitNumber = 8,
 663             /* false */ (0 <<  9) | // LetterNumber = 9,
 664             /* false */ (0 << 10) | // OtherNumber = 10,
 665             /* true  */ (1 << 11) | // SpaceSeparator = 11,
 666             /* true  */ (1 << 12) | // LineSeparator = 12,
 667             /* true  */ (1 << 13) | // ParagraphSeparator = 13,
 668             /* true  */ (1 << 14) | // Control = 14,
 669             /* true  */ (1 << 15) | // Format = 15,
 670             /* false */ (0 << 16) | // Surrogate = 16,
 671             /* false */ (0 << 17) | // PrivateUse = 17,
 672             /* true  */ (1 << 18) | // ConnectorPunctuation = 18,
 673             /* true  */ (1 << 19) | // DashPunctuation = 19,
 674             /* true  */ (1 << 20) | // OpenPunctuation = 20,
 675             /* true  */ (1 << 21) | // ClosePunctuation = 21,
 676             /* true  */ (1 << 22) | // InitialQuotePunctuation = 22,
 677             /* true  */ (1 << 23) | // FinalQuotePunctuation = 23,
 678             /* true  */ (1 << 24) | // OtherPunctuation = 24,
 679             /* true  */ (1 << 25) | // MathSymbol = 25,
 680             /* true  */ (1 << 26) | // CurrencySymbol = 26,
 681             /* true  */ (1 << 27) | // ModifierSymbol = 27,
 682             /* true  */ (1 << 28) | // OtherSymbol = 28,
 683             /* false */ (0 << 29);  // OtherNotAssigned = 29;
 684
 685         private static bool IsWordSeparator(UnicodeCategory category)
 686         {
 687             return (c_wordSeparatorMask & (1 << (int) category)) != 0;
 688         }
 689
 690         private static bool IsLetterCategory(UnicodeCategory uc)
 691         {
 692             return (uc == UnicodeCategory.UppercaseLetter
 693                  || uc == UnicodeCategory.LowercaseLetter
 694                  || uc == UnicodeCategory.TitlecaseLetter
 695                  || uc == UnicodeCategory.ModifierLetter
 696                  || uc == UnicodeCategory.OtherLetter);
 697         }
 698
 699         //
 700         // Get case-insensitive hash code for the specified string.
 701         //
 702         internal unsafe int GetCaseInsensitiveHashCode(string str)
 703         {
 704             // Validate inputs
 705             if (str == null)
 706             {
 707                 throw new ArgumentNullException(nameof(str));
 708             }
 709
 710             // This code assumes that ASCII casing is safe for whatever context is passed in.
 711             // this is true today, because we only ever call these methods on Invariant.  It would be ideal to refactor
 712             // these methods so they were correct by construction and we could only ever use Invariant.
 713
 714             uint hash = 5381;
 715             uint c;
 716
 717             // Note: We assume that str contains only ASCII characters until
 718             // we hit a non-ASCII character to optimize the common case.
 719             for (int i = 0; i < str.Length; i++)
 720             {
 721                 c = str[i];
 722                 if (c >= 0x80)
 723                 {
 724                     return GetCaseInsensitiveHashCodeSlow(str);
 725                 }
 726
 727                 // If we have a lowercase character, ANDing off 0x20
 728                 // will make it an uppercase character.
 729                 if ((c - 'a') <= ('z' - 'a'))
 730                 {
 731                     c = (uint)((int)c & ~0x20);
 732                 }
 733
 734                 hash = ((hash << 5) + hash) ^ c;
 735             }
 736
 737             return (int)hash;
 738         }
 739
 740         private unsafe int GetCaseInsensitiveHashCodeSlow(string str)
 741         {
 742             Debug.Assert(str != null);
 743
 744             string upper = ToUpper(str);
 745
 746             uint hash = 5381;
 747             uint c;
 748
 749             for (int i = 0; i < upper.Length; i++)
 750             {
 751                 c = upper[i];
 752                 hash = ((hash << 5) + hash) ^ c;
 753             }
 754
 755             return (int)hash;
 756         }
 757     }
 758 }