src/mscorlib/src/System/Globalization/TextInfo.cs

   1 // Licensed to the .NET Foundation under one or more agreements.
   2 // The .NET Foundation licenses this file to you under the MIT license.
   3 // See the LICENSE file in the project root for more information.
   4
   5 ////////////////////////////////////////////////////////////////////////////
   6 //
   7 //
   8 //  Purpose:  This Class defines behaviors specific to a writing system.
   9 //            A writing system is the collection of scripts and
  10 //            orthographic rules required to represent a language as text.
  11 //
  12 //
  13 ////////////////////////////////////////////////////////////////////////////
  14
  15 using System.Security;
  16
  17 namespace System.Globalization {
  18     using System;
  19     using System.Text;
  20     using System.Threading;
  21     using System.Runtime;
  22     using System.Runtime.InteropServices;
  23     using System.Runtime.CompilerServices;
  24     using System.Runtime.Serialization;
  25     using System.Runtime.Versioning;
  26     using System.Security.Permissions;
  27     using System.Diagnostics.Contracts;
  28
  29
  30     [Serializable]
  31     [System.Runtime.InteropServices.ComVisible(true)]
  32     public partial class TextInfo : ICloneable, IDeserializationCallback
  33     {
  34         //--------------------------------------------------------------------//
  35         //                        Internal Information                        //
  36         //--------------------------------------------------------------------//
  37
  38
  39         //
  40         //  Variables.
  41         //
  42
  43         [OptionalField(VersionAdded = 2)]
  44         private String m_listSeparator;
  45
  46         [OptionalField(VersionAdded = 2)]
  47         private bool m_isReadOnly = false;
  48
  49         //
  50         // In Whidbey we had several names:
  51         //      m_win32LangID is the name of the culture, but only used for (de)serialization.
  52         //      customCultureName is the name of the creating custom culture (if custom)  In combination with m_win32LangID
  53         //              this is authoratative, ie when deserializing.
  54         //      m_cultureTableRecord was the data record of the creating culture.  (could have different name if custom)
  55         //      m_textInfoID is the LCID of the textinfo itself (no longer used)
  56         //      m_name is the culture name (from cultureinfo.name)
  57         //
  58         // In Silverlight/Arrowhead this is slightly different:
  59         //      m_cultureName is the name of the creating culture.  Note that we consider this authoratative,
  60         //              if the culture's textinfo changes when deserializing, then behavior may change.
  61         //              (ala Whidbey behavior).  This is the only string Arrowhead needs to serialize.
  62         //      m_cultureData is the data that backs this class.
  63         //      m_textInfoName  is the actual name of the textInfo (from cultureData.STEXTINFO)
  64         //              m_textInfoName can be the same as m_cultureName on Silverlight since the OS knows
  65         //              how to do the sorting. However in the desktop, when we call the sorting dll, it doesn't
  66         //              know how to resolve custom locle names to sort ids so we have to have alredy resolved this.
  67         //
  68
  69         [OptionalField(VersionAdded = 3)]
  70         private String                          m_cultureName;      // Name of the culture that created this text info
  71         [NonSerialized]private CultureData      m_cultureData;      // Data record for the culture that made us, not for this textinfo
  72         [NonSerialized]private String           m_textInfoName;     // Name of the text info we're using (ie: m_cultureData.STEXTINFO)
  73         [NonSerialized]private IntPtr           m_dataHandle;       // Sort handle
  74         [NonSerialized]private IntPtr           m_handleOrigin;
  75         [NonSerialized]private bool?            m_IsAsciiCasingSameAsInvariant;
  76
  77
  78         // Invariant text info
  79         internal static TextInfo Invariant
  80         {
  81             get
  82             {
  83                 if (s_Invariant == null)
  84                     s_Invariant = new TextInfo(CultureData.Invariant);
  85                 return s_Invariant;
  86             }
  87         }
  88         internal volatile static TextInfo s_Invariant;
  89
  90         ////////////////////////////////////////////////////////////////////////
  91         //
  92         //  TextInfo Constructors
  93         //
  94         //  Implements CultureInfo.TextInfo.
  95         //
  96         ////////////////////////////////////////////////////////////////////////
  97         internal TextInfo(CultureData cultureData)
  98         {
  99             // This is our primary data source, we don't need most of the rest of this
 100             this.m_cultureData = cultureData;
 101             this.m_cultureName = this.m_cultureData.CultureName;
 102             this.m_textInfoName = this.m_cultureData.STEXTINFO;
 103 #if !FEATURE_CORECLR
 104             IntPtr handleOrigin;
 105             this.m_dataHandle = CompareInfo.InternalInitSortHandle(m_textInfoName, out handleOrigin);
 106             this.m_handleOrigin = handleOrigin;
 107 #endif
 108         }
 109
 110         ////////////////////////////////////////////////////////////////////////
 111         //
 112         //  Serialization / Deserialization
 113         //
 114         //  Note that we have to respect the Whidbey behavior for serialization compatibility
 115         //
 116         ////////////////////////////////////////////////////////////////////////
 117
 118 #region Serialization
 119         // the following fields are defined to keep the compatibility with Whidbey.
 120         // don't change/remove the names/types of these fields.
 121         [OptionalField(VersionAdded = 2)]
 122         private string customCultureName;
 123
 124         // the following fields are defined to keep compatibility with Everett.
 125         // don't change/remove the names/types of these fields.
 126         [OptionalField(VersionAdded = 1)]
 127         internal int    m_nDataItem;
 128         [OptionalField(VersionAdded = 1)]
 129         internal bool   m_useUserOverride;
 130         [OptionalField(VersionAdded = 1)]
 131         internal int    m_win32LangID;
 132
 133
 134         [OnDeserializing]
 135         private void OnDeserializing(StreamingContext ctx)
 136         {
 137             // Clear these so we can check if we've fixed them yet
 138             this.m_cultureData = null;
 139             this.m_cultureName = null;
 140         }
 141
 142         private void OnDeserialized()
 143         {
 144             // this method will be called twice because of the support of IDeserializationCallback
 145             if (this.m_cultureData == null)
 146             {
 147                 if (this.m_cultureName == null)
 148                 {
 149                     // This is whidbey data, get it from customCultureName/win32langid
 150                     if (this.customCultureName != null)
 151                     {
 152                         // They gave a custom cultuer name, so use that
 153                         this.m_cultureName = this.customCultureName;
 154                     }
 155 #if FEATURE_USE_LCID
 156                     else
 157                     {
 158                         if (m_win32LangID == 0)
 159                         {
 160                             // m_cultureName and m_win32LangID are nulls which means we got uninitialized textinfo serialization stream.
 161                             // To be compatible with v2/3/3.5 we need to return ar-SA TextInfo in this case.
 162                             m_cultureName = "ar-SA";
 163                         }
 164                         else
 165                         {
 166                             // No custom culture, use the name from the LCID
 167                             m_cultureName = CultureInfo.GetCultureInfo(m_win32LangID).m_cultureData.CultureName;
 168                         }
 169                     }
 170 #endif
 171                 }
 172
 173                 // Get the text info name belonging to that culture
 174                 this.m_cultureData = CultureInfo.GetCultureInfo(m_cultureName).m_cultureData;
 175                 this.m_textInfoName = this.m_cultureData.STEXTINFO;
 176 #if !FEATURE_CORECLR
 177                 IntPtr handleOrigin;
 178                 this.m_dataHandle = CompareInfo.InternalInitSortHandle(m_textInfoName, out handleOrigin);
 179                 this.m_handleOrigin = handleOrigin;
 180 #endif
 181             }
 182         }
 183
 184
 185         [OnDeserialized]
 186         private void OnDeserialized(StreamingContext ctx)
 187         {
 188             OnDeserialized();
 189         }
 190
 191         [OnSerializing]
 192         private void OnSerializing(StreamingContext ctx)
 193         {
 194 #if !FEATURE_CORECLR
 195             // Initialize the fields Whidbey expects:
 196             // Whidbey expected this, so set it, but the value doesn't matter much
 197             this.m_useUserOverride = false;
 198 #endif // FEATURE_CORECLR
 199
 200             // Relabel our name since Whidbey expects it to be called customCultureName
 201             this.customCultureName = this.m_cultureName;
 202
 203 #if FEATURE_USE_LCID
 204             // Ignore the m_win32LangId because whidbey'll just get it by name if we make it the LOCALE_CUSTOM_UNSPECIFIED.
 205             this.m_win32LangID     = (CultureInfo.GetCultureInfo(m_cultureName)).LCID;
 206 #endif
 207         }
 208
 209 #endregion Serialization
 210
 211         //
 212         // Internal ordinal comparison functions
 213         //
 214         internal static int GetHashCodeOrdinalIgnoreCase(String s)
 215         {
 216             return GetHashCodeOrdinalIgnoreCase(s, false, 0);
 217         }
 218
 219         internal static int GetHashCodeOrdinalIgnoreCase(String s, bool forceRandomizedHashing, long additionalEntropy)
 220         {
 221             // This is the same as an case insensitive hash for Invariant
 222             // (not necessarily true for sorting, but OK for casing & then we apply normal hash code rules)
 223             return (Invariant.GetCaseInsensitiveHashCode(s, forceRandomizedHashing, additionalEntropy));
 224         }
 225
 226         [System.Security.SecuritySafeCritical]
 227         internal static unsafe bool TryFastFindStringOrdinalIgnoreCase(int searchFlags, String source, int startIndex, String value, int count, ref int foundIndex)
 228         {
 229             return InternalTryFindStringOrdinalIgnoreCase(searchFlags, source, count, startIndex, value, value.Length, ref foundIndex);
 230         }
 231
 232         // This function doesn't check arguments. Please do check in the caller.
 233         // The underlying unmanaged code will assert the sanity of arguments.
 234         [System.Security.SecuritySafeCritical]  // auto-generated
 235         internal static unsafe int CompareOrdinalIgnoreCase(String str1, String str2)
 236         {
 237             // Compare the whole string and ignore case.
 238             return InternalCompareStringOrdinalIgnoreCase(str1, 0, str2, 0, str1.Length, str2.Length);
 239         }
 240
 241         // This function doesn't check arguments. Please do check in the caller.
 242         // The underlying unmanaged code will assert the sanity of arguments.
 243         [System.Security.SecuritySafeCritical]  // auto-generated
 244         internal static unsafe int CompareOrdinalIgnoreCaseEx(String strA, int indexA, String strB, int indexB, int lengthA, int lengthB )
 245         {
 246             Contract.Assert(strA.Length >= indexA + lengthA,  "[TextInfo.CompareOrdinalIgnoreCaseEx] Caller should've validated strA.Length >= indexA + lengthA");
 247             Contract.Assert(strB.Length >= indexB + lengthB, "[TextInfo.CompareOrdinalIgnoreCaseEx]  Caller should've validated strB.Length >= indexB + lengthB");
 248             return InternalCompareStringOrdinalIgnoreCase(strA, indexA, strB, indexB, lengthA, lengthB);
 249         }
 250
 251         internal static int IndexOfStringOrdinalIgnoreCase(String source, String value, int startIndex, int count)
 252         {
 253             Contract.Assert(source != null, "[TextInfo.IndexOfStringOrdinalIgnoreCase] Caller should've validated source != null");
 254             Contract.Assert(value != null, "[TextInfo.IndexOfStringOrdinalIgnoreCase] Caller should've validated value != null");
 255             Contract.Assert(startIndex + count <= source.Length, "[TextInfo.IndexOfStringOrdinalIgnoreCase] Caller should've validated startIndex + count <= source.Length");
 256
 257             // We return 0 if both inputs are empty strings
 258             if (source.Length == 0 && value.Length == 0)
 259             {
 260                 return 0;
 261             }
 262
 263             // fast path
 264             int ret = -1;
 265             if (TryFastFindStringOrdinalIgnoreCase(Microsoft.Win32.Win32Native.FIND_FROMSTART, source, startIndex, value, count, ref ret))
 266                 return ret;
 267
 268             // the search space within [source] starts at offset [startIndex] inclusive and includes
 269             // [count] characters (thus the last included character is at index [startIndex + count -1]
 270             // [end] is the index of the next character after the search space
 271             // (it points past the end of the search space)
 272             int end = startIndex + count;
 273
 274             // maxStartIndex is the index beyond which we never *start* searching, inclusive; in other words;
 275             // a search could include characters beyond maxStartIndex, but we'd never begin a search at an
 276             // index strictly greater than maxStartIndex.
 277             int maxStartIndex = end - value.Length;
 278
 279             for (; startIndex <= maxStartIndex; startIndex++)
 280             {
 281                 // We should always have the same or more characters left to search than our actual pattern
 282                 Contract.Assert(end - startIndex >= value.Length);
 283                 // since this is an ordinal comparison, we can assume that the lengths must match
 284                 if (CompareOrdinalIgnoreCaseEx(source, startIndex, value, 0, value.Length, value.Length) == 0)
 285                 {
 286                     return startIndex;
 287                 }
 288             }
 289
 290             // Not found
 291             return -1;
 292         }
 293
 294         internal static int LastIndexOfStringOrdinalIgnoreCase(String source, String value, int startIndex, int count)
 295         {
 296             Contract.Assert(source != null, "[TextInfo.LastIndexOfStringOrdinalIgnoreCase] Caller should've validated source != null");
 297             Contract.Assert(value != null, "[TextInfo.LastIndexOfStringOrdinalIgnoreCase] Caller should've validated value != null");
 298             Contract.Assert(startIndex - count+1 >= 0, "[TextInfo.LastIndexOfStringOrdinalIgnoreCase] Caller should've validated startIndex - count+1 >= 0");
 299             Contract.Assert(startIndex <= source.Length, "[TextInfo.LastIndexOfStringOrdinalIgnoreCase] Caller should've validated startIndex <= source.Length");
 300
 301             // If value is Empty, the return value is startIndex
 302             if (value.Length == 0)
 303             {
 304                 return startIndex;
 305             }
 306
 307             // fast path
 308             int ret = -1;
 309             if (TryFastFindStringOrdinalIgnoreCase(Microsoft.Win32.Win32Native.FIND_FROMEND, source, startIndex, value, count, ref ret))
 310                 return ret;
 311
 312             // the search space within [source] ends at offset [startIndex] inclusive
 313             // and includes [count] characters
 314             // minIndex is the first included character and is at index [startIndex - count + 1]
 315             int minIndex = startIndex - count + 1;
 316
 317             // First place we can find it is start index - (value.length -1)
 318             if (value.Length > 0)
 319             {
 320                 startIndex -= (value.Length - 1);
 321             }
 322
 323             for (; startIndex >= minIndex; startIndex--)
 324             {
 325                 if (CompareOrdinalIgnoreCaseEx(source, startIndex, value, 0, value.Length, value.Length) == 0)
 326                 {
 327                     return startIndex;
 328                 }
 329             }
 330
 331             // Not found
 332             return -1;
 333         }
 334
 335
 336         ////////////////////////////////////////////////////////////////////////
 337         //
 338         //  CodePage
 339         //
 340         //  Returns the number of the code page used by this writing system.
 341         //  The type parameter can be any of the following values:
 342         //      ANSICodePage
 343         //      OEMCodePage
 344         //      MACCodePage
 345         //
 346         ////////////////////////////////////////////////////////////////////////
 347
 348
 349         public virtual int ANSICodePage
 350         {
 351             get
 352             {
 353                 return (this.m_cultureData.IDEFAULTANSICODEPAGE);
 354             }
 355         }
 356
 357
 358         public virtual int OEMCodePage
 359         {
 360             get
 361             {
 362                 return (this.m_cultureData.IDEFAULTOEMCODEPAGE);
 363             }
 364         }
 365
 366
 367         public virtual int MacCodePage
 368         {
 369             get
 370             {
 371                 return (this.m_cultureData.IDEFAULTMACCODEPAGE);
 372             }
 373         }
 374
 375
 376         public virtual int EBCDICCodePage
 377         {
 378             get
 379             {
 380                 return (this.m_cultureData.IDEFAULTEBCDICCODEPAGE);
 381             }
 382         }
 383
 384         ////////////////////////////////////////////////////////////////////////
 385         //
 386         //  LCID
 387         //
 388         //  We need a way to get an LCID from outside of the BCL. This prop is the way.
 389         //  NOTE: neutral cultures will cause GPS incorrect LCIDS from this
 390         //
 391         ////////////////////////////////////////////////////////////////////////
 392
 393 #if FEATURE_USE_LCID
 394         [System.Runtime.InteropServices.ComVisible(false)]
 395         public int LCID
 396         {
 397             get
 398             {
 399                 // Just use the LCID from our text info name
 400                 return CultureInfo.GetCultureInfo(this.m_textInfoName).LCID;
 401             }
 402         }
 403 #endif
 404         ////////////////////////////////////////////////////////////////////////
 405         //
 406         //  CultureName
 407         //
 408         //  The name of the culture associated with the current TextInfo.
 409         //
 410         ////////////////////////////////////////////////////////////////////////
 411         [System.Runtime.InteropServices.ComVisible(false)]
 412         public string CultureName
 413         {
 414             get
 415             {
 416                 return(this.m_textInfoName);
 417             }
 418         }
 419
 420         ////////////////////////////////////////////////////////////////////////
 421         //
 422         //  IsReadOnly
 423         //
 424         //  Detect if the object is readonly.
 425         //
 426         ////////////////////////////////////////////////////////////////////////
 427         [System.Runtime.InteropServices.ComVisible(false)]
 428         public bool IsReadOnly
 429         {
 430             get { return (m_isReadOnly); }
 431         }
 432
 433         ////////////////////////////////////////////////////////////////////////
 434         //
 435         //  Clone
 436         //
 437         //  Is the implementation of ICloneable.
 438         //
 439         ////////////////////////////////////////////////////////////////////////
 440         [System.Runtime.InteropServices.ComVisible(false)]
 441         public virtual Object Clone()
 442         {
 443             object o = MemberwiseClone();
 444             ((TextInfo) o).SetReadOnlyState(false);
 445             return (o);
 446         }
 447
 448         ////////////////////////////////////////////////////////////////////////
 449         //
 450         //  ReadOnly
 451         //
 452         //  Create a cloned readonly instance or return the input one if it is
 453         //  readonly.
 454         //
 455         ////////////////////////////////////////////////////////////////////////
 456         [System.Runtime.InteropServices.ComVisible(false)]
 457         public static TextInfo ReadOnly(TextInfo textInfo)
 458         {
 459             if (textInfo == null)       { throw new ArgumentNullException("textInfo"); }
 460             Contract.EndContractBlock();
 461             if (textInfo.IsReadOnly)    { return (textInfo); }
 462
 463             TextInfo clonedTextInfo = (TextInfo)(textInfo.MemberwiseClone());
 464             clonedTextInfo.SetReadOnlyState(true);
 465
 466             return (clonedTextInfo);
 467         }
 468
 469         private void VerifyWritable()
 470         {
 471             if (m_isReadOnly)
 472             {
 473                 throw new InvalidOperationException(Environment.GetResourceString("InvalidOperation_ReadOnly"));
 474             }
 475             Contract.EndContractBlock();
 476         }
 477
 478         internal void SetReadOnlyState(bool readOnly)
 479         {
 480             m_isReadOnly = readOnly;
 481         }
 482
 483
 484         ////////////////////////////////////////////////////////////////////////
 485         //
 486         //  ListSeparator
 487         //
 488         //  Returns the string used to separate items in a list.
 489         //
 490         ////////////////////////////////////////////////////////////////////////
 491
 492
 493         public virtual String ListSeparator
 494         {
 495             [System.Security.SecuritySafeCritical]  // auto-generated
 496             get
 497             {
 498                 if (m_listSeparator == null) {
 499                     m_listSeparator = this.m_cultureData.SLIST;
 500                 }
 501                 return (m_listSeparator);
 502             }
 503
 504             [System.Runtime.InteropServices.ComVisible(false)]
 505             set
 506             {
 507                 if (value == null)
 508                 {
 509                     throw new ArgumentNullException("value", Environment.GetResourceString("ArgumentNull_String"));
 510                 }
 511                 Contract.EndContractBlock();
 512                 VerifyWritable();
 513                 m_listSeparator = value;
 514             }
 515         }
 516
 517         ////////////////////////////////////////////////////////////////////////
 518         //
 519         //  ToLower
 520         //
 521         //  Converts the character or string to lower case.  Certain locales
 522         //  have different casing semantics from the file systems in Win32.
 523         //
 524         ////////////////////////////////////////////////////////////////////////
 525
 526         [System.Security.SecuritySafeCritical]  // auto-generated
 527         public unsafe virtual char ToLower(char c)
 528         {
 529             if(IsAscii(c) && IsAsciiCasingSameAsInvariant)
 530             {
 531                 return ToLowerAsciiInvariant(c);
 532             }
 533             return (InternalChangeCaseChar(this.m_dataHandle, this.m_handleOrigin, this.m_textInfoName, c, false));
 534         }
 535
 536         [System.Security.SecuritySafeCritical]  // auto-generated
 537         public unsafe virtual String ToLower(String str)
 538         {
 539             if (str == null) { throw new ArgumentNullException("str"); }
 540             Contract.EndContractBlock();
 541
 542             return InternalChangeCaseString(this.m_dataHandle, this.m_handleOrigin, this.m_textInfoName, str, false);
 543
 544         }
 545
 546         static private Char ToLowerAsciiInvariant(Char c)
 547         {
 548             if ('A' <= c && c <= 'Z')
 549             {
 550                 c = (Char)(c | 0x20);
 551             }
 552             return c;
 553         }
 554
 555         ////////////////////////////////////////////////////////////////////////
 556         //
 557         //  ToUpper
 558         //
 559         //  Converts the character or string to upper case.  Certain locales
 560         //  have different casing semantics from the file systems in Win32.
 561         //
 562         ////////////////////////////////////////////////////////////////////////
 563
 564         [System.Security.SecuritySafeCritical]  // auto-generated
 565         public unsafe virtual char ToUpper(char c)
 566         {
 567             if (IsAscii(c) && IsAsciiCasingSameAsInvariant)
 568             {
 569                 return ToUpperAsciiInvariant(c);
 570             }
 571             return (InternalChangeCaseChar(this.m_dataHandle, this.m_handleOrigin, this.m_textInfoName, c, true));
 572         }
 573
 574
 575         [System.Security.SecuritySafeCritical]  // auto-generated
 576         public unsafe virtual String ToUpper(String str)
 577         {
 578             if (str == null) { throw new ArgumentNullException("str"); }
 579             Contract.EndContractBlock();
 580             return InternalChangeCaseString(this.m_dataHandle, this.m_handleOrigin, this.m_textInfoName, str, true);
 581         }
 582
 583         static private Char ToUpperAsciiInvariant(Char c)
 584         {
 585             if ('a' <= c && c <= 'z')
 586             {
 587                 c = (Char)(c & ~0x20);
 588             }
 589             return c;
 590         }
 591
 592         static private bool IsAscii(Char c)
 593         {
 594             return c < 0x80;
 595         }
 596
 597         private bool IsAsciiCasingSameAsInvariant
 598         {
 599             get
 600             {
 601                 if (m_IsAsciiCasingSameAsInvariant == null)
 602                 {
 603                     m_IsAsciiCasingSameAsInvariant =
 604                         CultureInfo.GetCultureInfo(m_textInfoName).CompareInfo.Compare("abcdefghijklmnopqrstuvwxyz",
 605                                                                              "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
 606                                                                              CompareOptions.IgnoreCase) == 0;
 607                 }
 608                 return (bool)m_IsAsciiCasingSameAsInvariant;
 609             }
 610         }
 611
 612         ////////////////////////////////////////////////////////////////////////
 613         //
 614         //  Equals
 615         //
 616         //  Implements Object.Equals().  Returns a boolean indicating whether
 617         //  or not object refers to the same CultureInfo as the current instance.
 618         //
 619         ////////////////////////////////////////////////////////////////////////
 620
 621
 622         public override bool Equals(Object obj)
 623         {
 624             TextInfo that = obj as TextInfo;
 625
 626             if (that != null)
 627             {
 628                 return this.CultureName.Equals(that.CultureName);
 629             }
 630
 631             return (false);
 632         }
 633
 634
 635         ////////////////////////////////////////////////////////////////////////
 636         //
 637         //  GetHashCode
 638         //
 639         //  Implements Object.GetHashCode().  Returns the hash code for the
 640         //  CultureInfo.  The hash code is guaranteed to be the same for CultureInfo A
 641         //  and B where A.Equals(B) is true.
 642         //
 643         ////////////////////////////////////////////////////////////////////////
 644
 645
 646         public override int GetHashCode()
 647         {
 648             return (this.CultureName.GetHashCode());
 649         }
 650
 651
 652         ////////////////////////////////////////////////////////////////////////
 653         //
 654         //  ToString
 655         //
 656         //  Implements Object.ToString().  Returns a string describing the
 657         //  TextInfo.
 658         //
 659         ////////////////////////////////////////////////////////////////////////
 660
 661
 662         public override String ToString()
 663         {
 664             return ("TextInfo - " + this.m_cultureData.CultureName);
 665         }
 666
 667
 668         //
 669         // Titlecasing:
 670         // -----------
 671         // Titlecasing refers to a casing practice wherein the first letter of a word is an uppercase letter
 672         // and the rest of the letters are lowercase.  The choice of which words to titlecase in headings
 673         // and titles is dependent on language and local conventions.  For example, "The Merry Wives of Windor"
 674         // is the appropriate titlecasing of that play's name in English, with the word "of" not titlecased.
 675         // In German, however, the title is "Die lustigen Weiber von Windsor," and both "lustigen" and "von"
 676         // are not titlecased.  In French even fewer words are titlecased: "Les joyeuses commeres de Windsor."
 677         //
 678         // Moreover, the determination of what actually constitutes a word is language dependent, and this can
 679         // influence which letter or letters of a "word" are uppercased when titlecasing strings.  For example
 680         // "l'arbre" is considered two words in French, whereas "can't" is considered one word in English.
 681         //
 682         //
 683         // Differences between UNICODE 5.0 and the .NET Framework:
 684         // -------------------------------------------------------------------------------------
 685         // The .NET Framework previously shipped a naive titlecasing implementation.  Every word is titlecased
 686         // regardless of language or orthographic practice.  Furthermore, apostrophe is always considered to be
 687         // a word joiner as used in English.  The longterm vision is to depend on the operating system for
 688         // titlecasing.  Windows 7 is expected to be the first release with this feature.  On the Macintosh side,
 689         // titlecasing is not available as of version 10.5 of the operating system.
 690         //
 691         public unsafe String ToTitleCase(String str)
 692         {
 693             if (str == null)
 694             {
 695                 throw new ArgumentNullException("str");
 696             }
 697             Contract.EndContractBlock();
 698             if (str.Length == 0)
 699             {
 700                 return (str);
 701             }
 702
 703             StringBuilder result = new StringBuilder();
 704             String lowercaseData = null;
 705
 706             for (int i = 0; i < str.Length; i++)
 707             {
 708                 UnicodeCategory charType;
 709                 int charLen;
 710
 711                 charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
 712                 if (Char.CheckLetter(charType))
 713                 {
 714                     // Do the titlecasing for the first character of the word.
 715                     i = AddTitlecaseLetter(ref result, ref str, i, charLen) + 1;
 716
 717                     //
 718                     // Convert the characters until the end of the this word
 719                     // to lowercase.
 720                     //
 721                     int lowercaseStart = i;
 722
 723                     //
 724                     // Use hasLowerCase flag to prevent from lowercasing acronyms (like "URT", "USA", etc)
 725                     // This is in line with Word 2000 behavior of titlecasing.
 726                     //
 727                     bool hasLowerCase = (charType == UnicodeCategory.LowercaseLetter);
 728                     // Use a loop to find all of the other letters following this letter.
 729                     while (i < str.Length)
 730                     {
 731                         charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
 732                         if (IsLetterCategory(charType))
 733                         {
 734                             if (charType == UnicodeCategory.LowercaseLetter)
 735                             {
 736                                 hasLowerCase = true;
 737                             }
 738                             i += charLen;
 739                         }
 740                         else if (str[i] == '\'')
 741                         {
 742                             i++;
 743                             if (hasLowerCase)
 744                             {
 745                                 if (lowercaseData == null)
 746                                 {
 747                                     lowercaseData = this.ToLower(str);
 748                                 }
 749                                 result.Append(lowercaseData, lowercaseStart, i - lowercaseStart);
 750                             }
 751                             else
 752                             {
 753                                 result.Append(str, lowercaseStart, i - lowercaseStart);
 754                             }
 755                             lowercaseStart = i;
 756                             hasLowerCase = true;
 757                         }
 758                         else if (!IsWordSeparator(charType))
 759                         {
 760                             // This category is considered to be part of the word.
 761                             // This is any category that is marked as false in wordSeprator array.
 762                             i+= charLen;
 763                         }
 764                         else
 765                         {
 766                             // A word separator. Break out of the loop.
 767                             break;
 768                         }
 769                     }
 770
 771                     int count = i - lowercaseStart;
 772
 773                     if (count>0)
 774                     {
 775                         if (hasLowerCase)
 776                         {
 777                             if (lowercaseData == null)
 778                             {
 779                                 lowercaseData = this.ToLower(str);
 780                             }
 781                             result.Append(lowercaseData, lowercaseStart, count);
 782                         }
 783                         else
 784                         {
 785                             result.Append(str, lowercaseStart, count);
 786                         }
 787                     }
 788
 789                     if (i < str.Length)
 790                     {
 791                         // not a letter, just append it
 792                         i = AddNonLetter(ref result, ref str, i, charLen);
 793                     }
 794                 }
 795                 else
 796                 {
 797                     // not a letter, just append it
 798                     i = AddNonLetter(ref result, ref str, i, charLen);
 799                 }
 800             }
 801             return (result.ToString());
 802         }
 803
 804         private static int AddNonLetter(ref StringBuilder result, ref String input, int inputIndex, int charLen)
 805         {
 806             Contract.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddNonLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!");
 807             if (charLen == 2)
 808             {
 809                 // Surrogate pair
 810                 result.Append(input[inputIndex++]);
 811                 result.Append(input[inputIndex]);
 812             }
 813             else
 814             {
 815                 result.Append(input[inputIndex]);
 816             }
 817             return inputIndex;
 818         }
 819
 820
 821         private int AddTitlecaseLetter(ref StringBuilder result, ref String input, int inputIndex, int charLen)
 822         {
 823             Contract.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddTitlecaseLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!");
 824
 825             // for surrogate pairs do a simple ToUpper operation on the substring
 826             if (charLen == 2)
 827             {
 828                 // Surrogate pair
 829                 result.Append( this.ToUpper(input.Substring(inputIndex, charLen)) );
 830                 inputIndex++;
 831             }
 832             else
 833             {
 834                 switch (input[inputIndex])
 835                 {
 836                     //
 837                     // For AppCompat, the Titlecase Case Mapping data from NDP 2.0 is used below.
 838                     case (char)0x01C4:  // DZ with Caron -> Dz with Caron
 839                     case (char)0x01C5:  // Dz with Caron -> Dz with Caron
 840                     case (char)0x01C6:  // dz with Caron -> Dz with Caron
 841                         result.Append( (char)0x01C5 );
 842                         break;
 843                     case (char)0x01C7:  // LJ -> Lj
 844                     case (char)0x01C8:  // Lj -> Lj
 845                     case (char)0x01C9:  // lj -> Lj
 846                         result.Append( (char)0x01C8 );
 847                         break;
 848                     case (char)0x01CA:  // NJ -> Nj
 849                     case (char)0x01CB:  // Nj -> Nj
 850                     case (char)0x01CC:  // nj -> Nj
 851                         result.Append( (char)0x01CB );
 852                         break;
 853                     case (char)0x01F1:  // DZ -> Dz
 854                     case (char)0x01F2:  // Dz -> Dz
 855                     case (char)0x01F3:  // dz -> Dz
 856                         result.Append( (char)0x01F2 );
 857                         break;
 858                     default:
 859                         result.Append( this.ToUpper(input[inputIndex]) );
 860                         break;
 861                 }
 862             }
 863             return inputIndex;
 864         }
 865
 866
 867         //
 868         // Used in ToTitleCase():
 869         // When we find a starting letter, the following array decides if a category should be
 870         // considered as word seprator or not.
 871         //
 872         private const int wordSeparatorMask =
 873             /* false */ (0 <<  0) | // UppercaseLetter = 0,
 874             /* false */ (0 <<  1) | // LowercaseLetter = 1,
 875             /* false */ (0 <<  2) | // TitlecaseLetter = 2,
 876             /* false */ (0 <<  3) | // ModifierLetter = 3,
 877             /* false */ (0 <<  4) | // OtherLetter = 4,
 878             /* false */ (0 <<  5) | // NonSpacingMark = 5,
 879             /* false */ (0 <<  6) | // SpacingCombiningMark = 6,
 880             /* false */ (0 <<  7) | // EnclosingMark = 7,
 881             /* false */ (0 <<  8) | // DecimalDigitNumber = 8,
 882             /* false */ (0 <<  9) | // LetterNumber = 9,
 883             /* false */ (0 << 10) | // OtherNumber = 10,
 884             /* true  */ (1 << 11) | // SpaceSeparator = 11,
 885             /* true  */ (1 << 12) | // LineSeparator = 12,
 886             /* true  */ (1 << 13) | // ParagraphSeparator = 13,
 887             /* true  */ (1 << 14) | // Control = 14,
 888             /* true  */ (1 << 15) | // Format = 15,
 889             /* false */ (0 << 16) | // Surrogate = 16,
 890             /* false */ (0 << 17) | // PrivateUse = 17,
 891             /* true  */ (1 << 18) | // ConnectorPunctuation = 18,
 892             /* true  */ (1 << 19) | // DashPunctuation = 19,
 893             /* true  */ (1 << 20) | // OpenPunctuation = 20,
 894             /* true  */ (1 << 21) | // ClosePunctuation = 21,
 895             /* true  */ (1 << 22) | // InitialQuotePunctuation = 22,
 896             /* true  */ (1 << 23) | // FinalQuotePunctuation = 23,
 897             /* true  */ (1 << 24) | // OtherPunctuation = 24,
 898             /* true  */ (1 << 25) | // MathSymbol = 25,
 899             /* true  */ (1 << 26) | // CurrencySymbol = 26,
 900             /* true  */ (1 << 27) | // ModifierSymbol = 27,
 901             /* true  */ (1 << 28) | // OtherSymbol = 28,
 902             /* false */ (0 << 29);  // OtherNotAssigned = 29;
 903
 904         private static bool IsWordSeparator(UnicodeCategory category)
 905         {
 906             return (wordSeparatorMask & (1 << (int)category)) != 0;
 907         }
 908
 909         private static bool IsLetterCategory(UnicodeCategory uc)
 910         {
 911             return (uc == UnicodeCategory.UppercaseLetter
 912                  || uc == UnicodeCategory.LowercaseLetter
 913                  || uc == UnicodeCategory.TitlecaseLetter
 914                  || uc == UnicodeCategory.ModifierLetter
 915                  || uc == UnicodeCategory.OtherLetter);
 916         }
 917
 918         // IsRightToLeft
 919         //
 920         // Returns true if the dominant direction of text and UI such as the relative position of buttons and scroll bars
 921         //
 922         [System.Runtime.InteropServices.ComVisible(false)]
 923         public bool IsRightToLeft
 924         {
 925             get
 926             {
 927                 return this.m_cultureData.IsRightToLeft;
 928             }
 929         }
 930
 931         /// <internalonly/>
 932         void IDeserializationCallback.OnDeserialization(Object sender)
 933         {
 934             OnDeserialized();
 935         }
 936
 937         //
 938         // Get case-insensitive hash code for the specified string.
 939         //
 940         // NOTENOTE: this is an internal function.  The caller should verify the string
 941         // is not null before calling this.  Currenlty, CaseInsensitiveHashCodeProvider
 942         // does that.
 943         //
 944         [System.Security.SecuritySafeCritical]  // auto-generated
 945         internal unsafe int GetCaseInsensitiveHashCode(String str)
 946         {
 947             return GetCaseInsensitiveHashCode(str, false, 0);
 948         }
 949
 950         [System.Security.SecuritySafeCritical]  // auto-generated
 951         internal unsafe int GetCaseInsensitiveHashCode(String str, bool forceRandomizedHashing, long additionalEntropy)
 952         {
 953             // Validate inputs
 954             if (str==null)
 955             {
 956                  throw new ArgumentNullException("str");
 957             }
 958             Contract.EndContractBlock();
 959
 960             // Return our result
 961             return (InternalGetCaseInsHash(this.m_dataHandle, this.m_handleOrigin, this.m_textInfoName, str, forceRandomizedHashing, additionalEntropy));
 962         }
 963
 964         // Change case (ToUpper/ToLower) -- COMNlsInfo::InternalChangeCaseChar
 965         [System.Security.SecurityCritical]  // auto-generated
 966         [MethodImplAttribute(MethodImplOptions.InternalCall)]
 967         private static unsafe extern char InternalChangeCaseChar(IntPtr handle, IntPtr handleOrigin, String localeName, char ch, bool isToUpper);
 968
 969         // Change case (ToUpper/ToLower) -- COMNlsInfo::InternalChangeCaseString
 970         [System.Security.SecurityCritical]  // auto-generated
 971         [MethodImplAttribute(MethodImplOptions.InternalCall)]
 972         private static unsafe extern String InternalChangeCaseString(IntPtr handle, IntPtr handleOrigin, String localeName, String str, bool isToUpper);
 973
 974         // Get case insensitive hash -- ComNlsInfo::InternalGetCaseInsHash
 975         [System.Security.SecurityCritical]  // auto-generated
 976         [MethodImplAttribute(MethodImplOptions.InternalCall)]
 977         private static unsafe extern int InternalGetCaseInsHash(IntPtr handle, IntPtr handleOrigin, String localeName, String str, bool forceRandomizedHashing, long additionalEntropy);
 978
 979         // Call ::CompareStringOrdinal -- ComNlsInfo::InternalCompareStringOrdinalIgnoreCase
 980         // Start at indexes and compare for length characters (or remainder of string if length == -1)
 981         [System.Security.SecurityCritical]  // auto-generated
 982         [DllImport(JitHelpers.QCall, CharSet = CharSet.Unicode)]
 983         [SuppressUnmanagedCodeSecurity]
 984         private static unsafe extern int InternalCompareStringOrdinalIgnoreCase(String string1, int index1, String string2, int index2, int length1, int length2);
 985
 986         // ComNlsInfo::InternalTryFindStringOrdinalIgnoreCase attempts a faster IndexOf/LastIndexOf OrdinalIgnoreCase using a kernel function.
 987         // Returns true if FindStringOrdinal was handled, with foundIndex set to the target's index into the source
 988         // Returns false when FindStringOrdinal wasn't handled
 989         [System.Security.SecurityCritical]  // auto-generated
 990         [DllImport(JitHelpers.QCall, CharSet = CharSet.Unicode)]
 991         [SuppressUnmanagedCodeSecurity]
 992         [return: MarshalAs(UnmanagedType.Bool)]
 993         private static unsafe extern bool InternalTryFindStringOrdinalIgnoreCase(int searchFlags, String source, int sourceCount, int startIndex, String target, int targetCount, ref int foundIndex);
 994     }
 995
 996 }
 997
 998