1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*============================================================
9 ** Purpose: This is the value class representing a Unicode character
10 ** Char methods until we create this functionality.
13 ===========================================================*/
15 using System.Diagnostics;
16 using System.Globalization;
17 using System.Runtime.InteropServices;
22 [StructLayout(LayoutKind.Sequential)]
23 [System.Runtime.CompilerServices.TypeForwardedFrom("mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089")]
24 public readonly struct Char : IComparable, IComparable<char>, IEquatable<char>, IConvertible
29 private readonly char m_value; // Do not rename (binary serialization)
34 // The maximum character value.
35 public const char MaxValue = (char)0xFFFF;
36 // The minimum character value.
37 public const char MinValue = (char)0x00;
39 // Unicode category values from Unicode U+0000 ~ U+00FF. Store them in byte[] array to save space.
40 private static readonly byte[] s_categoryForLatin1 = {
41 (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0000 - 0007
42 (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0008 - 000F
43 (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0010 - 0017
44 (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0018 - 001F
45 (byte)UnicodeCategory.SpaceSeparator, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.CurrencySymbol, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.OtherPunctuation, // 0020 - 0027
46 (byte)UnicodeCategory.OpenPunctuation, (byte)UnicodeCategory.ClosePunctuation, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.DashPunctuation, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.OtherPunctuation, // 0028 - 002F
47 (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, // 0030 - 0037
48 (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.OtherPunctuation, // 0038 - 003F
49 (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, // 0040 - 0047
50 (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, // 0048 - 004F
51 (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, // 0050 - 0057
52 (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.OpenPunctuation, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.ClosePunctuation, (byte)UnicodeCategory.ModifierSymbol, (byte)UnicodeCategory.ConnectorPunctuation, // 0058 - 005F
53 (byte)UnicodeCategory.ModifierSymbol, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, // 0060 - 0067
54 (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, // 0068 - 006F
55 (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, // 0070 - 0077
56 (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.OpenPunctuation, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.ClosePunctuation, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.Control, // 0078 - 007F
57 (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0080 - 0087
58 (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0088 - 008F
59 (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0090 - 0097
60 (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0098 - 009F
61 (byte)UnicodeCategory.SpaceSeparator, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.CurrencySymbol, (byte)UnicodeCategory.CurrencySymbol, (byte)UnicodeCategory.CurrencySymbol, (byte)UnicodeCategory.CurrencySymbol, (byte)UnicodeCategory.OtherSymbol, (byte)UnicodeCategory.OtherSymbol, // 00A0 - 00A7
62 (byte)UnicodeCategory.ModifierSymbol, (byte)UnicodeCategory.OtherSymbol, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.InitialQuotePunctuation, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.DashPunctuation, (byte)UnicodeCategory.OtherSymbol, (byte)UnicodeCategory.ModifierSymbol, // 00A8 - 00AF
63 (byte)UnicodeCategory.OtherSymbol, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.OtherNumber, (byte)UnicodeCategory.OtherNumber, (byte)UnicodeCategory.ModifierSymbol, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.OtherSymbol, (byte)UnicodeCategory.OtherPunctuation, // 00B0 - 00B7
64 (byte)UnicodeCategory.ModifierSymbol, (byte)UnicodeCategory.OtherNumber, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.FinalQuotePunctuation, (byte)UnicodeCategory.OtherNumber, (byte)UnicodeCategory.OtherNumber, (byte)UnicodeCategory.OtherNumber, (byte)UnicodeCategory.OtherPunctuation, // 00B8 - 00BF
65 (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, // 00C0 - 00C7
66 (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, // 00C8 - 00CF
67 (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.MathSymbol, // 00D0 - 00D7
68 (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.LowercaseLetter, // 00D8 - 00DF
69 (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, // 00E0 - 00E7
70 (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, // 00E8 - 00EF
71 (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.MathSymbol, // 00F0 - 00F7
72 (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, // 00F8 - 00FF
75 // Return true for all characters below or equal U+00ff, which is ASCII + Latin-1 Supplement.
76 private static bool IsLatin1(char ch)
78 return (ch <= '\x00ff');
81 // Return true for all characters below or equal U+007f, which is ASCII.
82 private static bool IsAscii(char ch)
84 return (ch <= '\x007f');
87 // Return the Unicode category for Unicode character <= 0x00ff.
88 private static UnicodeCategory GetLatin1UnicodeCategory(char ch)
90 Debug.Assert(IsLatin1(ch), "char.GetLatin1UnicodeCategory(): ch should be <= 007f");
91 return (UnicodeCategory)(s_categoryForLatin1[(int)ch]);
99 // Overriden Instance Methods
102 // Calculate a hashcode for a 2 byte Unicode character.
103 public override int GetHashCode()
105 return (int)m_value | ((int)m_value << 16);
108 // Used for comparing two boxed Char objects.
110 public override bool Equals(object obj)
116 return (m_value == ((char)obj).m_value);
119 [System.Runtime.Versioning.NonVersionable]
120 public bool Equals(char obj)
122 return m_value == obj;
125 // Compares this object to another object, returning an integer that
126 // indicates the relationship.
127 // Returns a value less than zero if this object
128 // null is considered to be less than any instance.
129 // If object is not of type Char, this method throws an ArgumentException.
131 public int CompareTo(object value)
137 if (!(value is char))
139 throw new ArgumentException(SR.Arg_MustBeChar);
142 return (m_value - ((char)value).m_value);
145 public int CompareTo(char value)
147 return (m_value - value);
150 // Overrides System.Object.ToString.
151 public override string ToString()
153 return char.ToString(m_value);
156 public string ToString(IFormatProvider provider)
158 return char.ToString(m_value);
162 // Formatting Methods
165 /*===================================ToString===================================
166 **This static methods takes a character and returns the String representation of it.
167 ==============================================================================*/
168 // Provides a string representation of a character.
169 public static string ToString(char c) => string.CreateFromChar(c);
171 public static char Parse(string s)
175 throw new ArgumentNullException(nameof(s));
180 throw new FormatException(SR.Format_NeedSingleChar);
185 public static bool TryParse(string s, out char result)
203 /*=================================ISDIGIT======================================
204 **A wrapper for char. Returns a boolean indicating whether **
205 **character c is considered to be a digit. **
206 ==============================================================================*/
207 // Determines whether a character is a digit.
208 public static bool IsDigit(char c)
212 return (c >= '0' && c <= '9');
214 return (CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.DecimalDigitNumber);
218 /*=================================CheckLetter=====================================
219 ** Check if the specified UnicodeCategory belongs to the letter categories.
220 ==============================================================================*/
221 internal static bool CheckLetter(UnicodeCategory uc)
225 case (UnicodeCategory.UppercaseLetter):
226 case (UnicodeCategory.LowercaseLetter):
227 case (UnicodeCategory.TitlecaseLetter):
228 case (UnicodeCategory.ModifierLetter):
229 case (UnicodeCategory.OtherLetter):
235 /*=================================ISLETTER=====================================
236 **A wrapper for char. Returns a boolean indicating whether **
237 **character c is considered to be a letter. **
238 ==============================================================================*/
239 // Determines whether a character is a letter.
240 public static bool IsLetter(char c)
247 return ((c >= 'a' && c <= 'z'));
249 return (CheckLetter(GetLatin1UnicodeCategory(c)));
251 return (CheckLetter(CharUnicodeInfo.GetUnicodeCategory(c)));
254 private static bool IsWhiteSpaceLatin1(char c)
256 // There are characters which belong to UnicodeCategory.Control but are considered as white spaces.
257 // We use code point comparisons for these characters here as a temporary fix.
259 // U+0009 = <control> HORIZONTAL TAB
260 // U+000a = <control> LINE FEED
261 // U+000b = <control> VERTICAL TAB
262 // U+000c = <contorl> FORM FEED
263 // U+000d = <control> CARRIAGE RETURN
264 // U+0085 = <control> NEXT LINE
265 // U+00a0 = NO-BREAK SPACE
268 (uint)(c - '\x0009') <= ('\x000d' - '\x0009') || // (c >= '\x0009' && c <= '\x000d')
273 /*===============================ISWHITESPACE===================================
274 **A wrapper for char. Returns a boolean indicating whether **
275 **character c is considered to be a whitespace character. **
276 ==============================================================================*/
277 // Determines whether a character is whitespace.
278 public static bool IsWhiteSpace(char c)
282 return (IsWhiteSpaceLatin1(c));
284 return CharUnicodeInfo.IsWhiteSpace(c);
288 /*===================================IsUpper====================================
289 **Arguments: c -- the characater to be checked.
290 **Returns: True if c is an uppercase character.
291 ==============================================================================*/
292 // Determines whether a character is upper-case.
293 public static bool IsUpper(char c)
299 return (c >= 'A' && c <= 'Z');
301 return (GetLatin1UnicodeCategory(c) == UnicodeCategory.UppercaseLetter);
303 return (CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.UppercaseLetter);
306 /*===================================IsLower====================================
307 **Arguments: c -- the characater to be checked.
308 **Returns: True if c is an lowercase character.
309 ==============================================================================*/
310 // Determines whether a character is lower-case.
311 public static bool IsLower(char c)
317 return (c >= 'a' && c <= 'z');
319 return (GetLatin1UnicodeCategory(c) == UnicodeCategory.LowercaseLetter);
321 return (CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.LowercaseLetter);
324 internal static bool CheckPunctuation(UnicodeCategory uc)
328 case UnicodeCategory.ConnectorPunctuation:
329 case UnicodeCategory.DashPunctuation:
330 case UnicodeCategory.OpenPunctuation:
331 case UnicodeCategory.ClosePunctuation:
332 case UnicodeCategory.InitialQuotePunctuation:
333 case UnicodeCategory.FinalQuotePunctuation:
334 case UnicodeCategory.OtherPunctuation:
341 /*================================IsPunctuation=================================
342 **Arguments: c -- the characater to be checked.
343 **Returns: True if c is an punctuation mark
344 ==============================================================================*/
345 // Determines whether a character is a punctuation mark.
346 public static bool IsPunctuation(char c)
350 return (CheckPunctuation(GetLatin1UnicodeCategory(c)));
352 return (CheckPunctuation(CharUnicodeInfo.GetUnicodeCategory(c)));
355 /*=================================CheckLetterOrDigit=====================================
356 ** Check if the specified UnicodeCategory belongs to the letter or digit categories.
357 ==============================================================================*/
358 internal static bool CheckLetterOrDigit(UnicodeCategory uc)
362 case UnicodeCategory.UppercaseLetter:
363 case UnicodeCategory.LowercaseLetter:
364 case UnicodeCategory.TitlecaseLetter:
365 case UnicodeCategory.ModifierLetter:
366 case UnicodeCategory.OtherLetter:
367 case UnicodeCategory.DecimalDigitNumber:
373 // Determines whether a character is a letter or a digit.
374 public static bool IsLetterOrDigit(char c)
378 return (CheckLetterOrDigit(GetLatin1UnicodeCategory(c)));
380 return (CheckLetterOrDigit(CharUnicodeInfo.GetUnicodeCategory(c)));
383 /*===================================ToUpper====================================
385 ==============================================================================*/
386 // Converts a character to upper-case for the specified culture.
387 // <;<;Not fully implemented>;>;
388 public static char ToUpper(char c, CultureInfo culture)
391 throw new ArgumentNullException(nameof(culture));
392 return culture.TextInfo.ToUpper(c);
395 /*=================================TOUPPER======================================
396 **A wrapper for char.ToUpperCase. Converts character c to its **
397 **uppercase equivalent. If c is already an uppercase character or is not an **
398 **alphabetic, nothing happens. **
399 ==============================================================================*/
400 // Converts a character to upper-case for the default culture.
402 public static char ToUpper(char c)
404 return CultureInfo.CurrentCulture.TextInfo.ToUpper(c);
408 // Converts a character to upper-case for invariant culture.
409 public static char ToUpperInvariant(char c)
411 return CultureInfo.InvariantCulture.TextInfo.ToUpper(c);
415 /*===================================ToLower====================================
417 ==============================================================================*/
418 // Converts a character to lower-case for the specified culture.
419 // <;<;Not fully implemented>;>;
420 public static char ToLower(char c, CultureInfo culture)
423 throw new ArgumentNullException(nameof(culture));
424 return culture.TextInfo.ToLower(c);
427 /*=================================TOLOWER======================================
428 **A wrapper for char.ToLowerCase. Converts character c to its **
429 **lowercase equivalent. If c is already a lowercase character or is not an **
430 **alphabetic, nothing happens. **
431 ==============================================================================*/
432 // Converts a character to lower-case for the default culture.
433 public static char ToLower(char c)
435 return CultureInfo.CurrentCulture.TextInfo.ToLower(c);
439 // Converts a character to lower-case for invariant culture.
440 public static char ToLowerInvariant(char c)
442 return CultureInfo.InvariantCulture.TextInfo.ToLower(c);
447 // IConvertible implementation
449 public TypeCode GetTypeCode()
451 return TypeCode.Char;
455 bool IConvertible.ToBoolean(IFormatProvider provider)
457 throw new InvalidCastException(SR.Format(SR.InvalidCast_FromTo, "Char", "Boolean"));
460 char IConvertible.ToChar(IFormatProvider provider)
465 sbyte IConvertible.ToSByte(IFormatProvider provider)
467 return Convert.ToSByte(m_value);
470 byte IConvertible.ToByte(IFormatProvider provider)
472 return Convert.ToByte(m_value);
475 short IConvertible.ToInt16(IFormatProvider provider)
477 return Convert.ToInt16(m_value);
480 ushort IConvertible.ToUInt16(IFormatProvider provider)
482 return Convert.ToUInt16(m_value);
485 int IConvertible.ToInt32(IFormatProvider provider)
487 return Convert.ToInt32(m_value);
490 uint IConvertible.ToUInt32(IFormatProvider provider)
492 return Convert.ToUInt32(m_value);
495 long IConvertible.ToInt64(IFormatProvider provider)
497 return Convert.ToInt64(m_value);
500 ulong IConvertible.ToUInt64(IFormatProvider provider)
502 return Convert.ToUInt64(m_value);
505 float IConvertible.ToSingle(IFormatProvider provider)
507 throw new InvalidCastException(SR.Format(SR.InvalidCast_FromTo, "Char", "Single"));
510 double IConvertible.ToDouble(IFormatProvider provider)
512 throw new InvalidCastException(SR.Format(SR.InvalidCast_FromTo, "Char", "Double"));
515 decimal IConvertible.ToDecimal(IFormatProvider provider)
517 throw new InvalidCastException(SR.Format(SR.InvalidCast_FromTo, "Char", "Decimal"));
520 DateTime IConvertible.ToDateTime(IFormatProvider provider)
522 throw new InvalidCastException(SR.Format(SR.InvalidCast_FromTo, "Char", "DateTime"));
525 object IConvertible.ToType(Type type, IFormatProvider provider)
527 return Convert.DefaultToType((IConvertible)this, type, provider);
529 public static bool IsControl(char c)
533 return (GetLatin1UnicodeCategory(c) == UnicodeCategory.Control);
535 return (CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.Control);
538 public static bool IsControl(string s, int index)
541 throw new ArgumentNullException(nameof(s));
542 if (((uint)index) >= ((uint)s.Length))
544 throw new ArgumentOutOfRangeException(nameof(index));
549 return (GetLatin1UnicodeCategory(c) == UnicodeCategory.Control);
551 return (CharUnicodeInfo.GetUnicodeCategory(s, index) == UnicodeCategory.Control);
555 public static bool IsDigit(string s, int index)
558 throw new ArgumentNullException(nameof(s));
559 if (((uint)index) >= ((uint)s.Length))
561 throw new ArgumentOutOfRangeException(nameof(index));
566 return (c >= '0' && c <= '9');
568 return (CharUnicodeInfo.GetUnicodeCategory(s, index) == UnicodeCategory.DecimalDigitNumber);
571 public static bool IsLetter(string s, int index)
574 throw new ArgumentNullException(nameof(s));
575 if (((uint)index) >= ((uint)s.Length))
577 throw new ArgumentOutOfRangeException(nameof(index));
585 return ((c >= 'a' && c <= 'z'));
587 return (CheckLetter(GetLatin1UnicodeCategory(c)));
589 return (CheckLetter(CharUnicodeInfo.GetUnicodeCategory(s, index)));
592 public static bool IsLetterOrDigit(string s, int index)
595 throw new ArgumentNullException(nameof(s));
596 if (((uint)index) >= ((uint)s.Length))
598 throw new ArgumentOutOfRangeException(nameof(index));
603 return CheckLetterOrDigit(GetLatin1UnicodeCategory(c));
605 return CheckLetterOrDigit(CharUnicodeInfo.GetUnicodeCategory(s, index));
608 public static bool IsLower(string s, int index)
611 throw new ArgumentNullException(nameof(s));
612 if (((uint)index) >= ((uint)s.Length))
614 throw new ArgumentOutOfRangeException(nameof(index));
621 return (c >= 'a' && c <= 'z');
623 return (GetLatin1UnicodeCategory(c) == UnicodeCategory.LowercaseLetter);
626 return (CharUnicodeInfo.GetUnicodeCategory(s, index) == UnicodeCategory.LowercaseLetter);
629 /*=================================CheckNumber=====================================
630 ** Check if the specified UnicodeCategory belongs to the number categories.
631 ==============================================================================*/
633 internal static bool CheckNumber(UnicodeCategory uc)
637 case (UnicodeCategory.DecimalDigitNumber):
638 case (UnicodeCategory.LetterNumber):
639 case (UnicodeCategory.OtherNumber):
645 public static bool IsNumber(char c)
651 return (c >= '0' && c <= '9');
653 return (CheckNumber(GetLatin1UnicodeCategory(c)));
655 return (CheckNumber(CharUnicodeInfo.GetUnicodeCategory(c)));
658 public static bool IsNumber(string s, int index)
661 throw new ArgumentNullException(nameof(s));
662 if (((uint)index) >= ((uint)s.Length))
664 throw new ArgumentOutOfRangeException(nameof(index));
671 return (c >= '0' && c <= '9');
673 return (CheckNumber(GetLatin1UnicodeCategory(c)));
675 return (CheckNumber(CharUnicodeInfo.GetUnicodeCategory(s, index)));
678 ////////////////////////////////////////////////////////////////////////
682 // Determines if the given character is a punctuation character.
684 ////////////////////////////////////////////////////////////////////////
686 public static bool IsPunctuation(string s, int index)
689 throw new ArgumentNullException(nameof(s));
690 if (((uint)index) >= ((uint)s.Length))
692 throw new ArgumentOutOfRangeException(nameof(index));
697 return (CheckPunctuation(GetLatin1UnicodeCategory(c)));
699 return (CheckPunctuation(CharUnicodeInfo.GetUnicodeCategory(s, index)));
703 /*================================= CheckSeparator ============================
704 ** Check if the specified UnicodeCategory belongs to the seprator categories.
705 ==============================================================================*/
707 internal static bool CheckSeparator(UnicodeCategory uc)
711 case UnicodeCategory.SpaceSeparator:
712 case UnicodeCategory.LineSeparator:
713 case UnicodeCategory.ParagraphSeparator:
719 private static bool IsSeparatorLatin1(char c)
721 // U+00a0 = NO-BREAK SPACE
722 // There is no LineSeparator or ParagraphSeparator in Latin 1 range.
723 return (c == '\x0020' || c == '\x00a0');
726 public static bool IsSeparator(char c)
730 return (IsSeparatorLatin1(c));
732 return (CheckSeparator(CharUnicodeInfo.GetUnicodeCategory(c)));
735 public static bool IsSeparator(string s, int index)
738 throw new ArgumentNullException(nameof(s));
739 if (((uint)index) >= ((uint)s.Length))
741 throw new ArgumentOutOfRangeException(nameof(index));
746 return (IsSeparatorLatin1(c));
748 return (CheckSeparator(CharUnicodeInfo.GetUnicodeCategory(s, index)));
751 public static bool IsSurrogate(char c)
753 return (c >= HIGH_SURROGATE_START && c <= LOW_SURROGATE_END);
756 public static bool IsSurrogate(string s, int index)
760 throw new ArgumentNullException(nameof(s));
762 if (((uint)index) >= ((uint)s.Length))
764 throw new ArgumentOutOfRangeException(nameof(index));
766 return (IsSurrogate(s[index]));
769 /*================================= CheckSymbol ============================
770 ** Check if the specified UnicodeCategory belongs to the symbol categories.
771 ==============================================================================*/
773 internal static bool CheckSymbol(UnicodeCategory uc)
777 case (UnicodeCategory.MathSymbol):
778 case (UnicodeCategory.CurrencySymbol):
779 case (UnicodeCategory.ModifierSymbol):
780 case (UnicodeCategory.OtherSymbol):
786 public static bool IsSymbol(char c)
790 return (CheckSymbol(GetLatin1UnicodeCategory(c)));
792 return (CheckSymbol(CharUnicodeInfo.GetUnicodeCategory(c)));
795 public static bool IsSymbol(string s, int index)
798 throw new ArgumentNullException(nameof(s));
799 if (((uint)index) >= ((uint)s.Length))
801 throw new ArgumentOutOfRangeException(nameof(index));
806 return (CheckSymbol(GetLatin1UnicodeCategory(c)));
808 return (CheckSymbol(CharUnicodeInfo.GetUnicodeCategory(s, index)));
812 public static bool IsUpper(string s, int index)
815 throw new ArgumentNullException(nameof(s));
816 if (((uint)index) >= ((uint)s.Length))
818 throw new ArgumentOutOfRangeException(nameof(index));
825 return (c >= 'A' && c <= 'Z');
827 return (GetLatin1UnicodeCategory(c) == UnicodeCategory.UppercaseLetter);
830 return (CharUnicodeInfo.GetUnicodeCategory(s, index) == UnicodeCategory.UppercaseLetter);
833 public static bool IsWhiteSpace(string s, int index)
836 throw new ArgumentNullException(nameof(s));
837 if (((uint)index) >= ((uint)s.Length))
839 throw new ArgumentOutOfRangeException(nameof(index));
842 if (IsLatin1(s[index]))
844 return IsWhiteSpaceLatin1(s[index]);
847 return CharUnicodeInfo.IsWhiteSpace(s, index);
850 public static UnicodeCategory GetUnicodeCategory(char c)
854 return (GetLatin1UnicodeCategory(c));
856 return CharUnicodeInfo.GetUnicodeCategory((int)c);
859 public static UnicodeCategory GetUnicodeCategory(string s, int index)
862 throw new ArgumentNullException(nameof(s));
863 if (((uint)index) >= ((uint)s.Length))
865 throw new ArgumentOutOfRangeException(nameof(index));
867 if (IsLatin1(s[index]))
869 return (GetLatin1UnicodeCategory(s[index]));
871 return CharUnicodeInfo.InternalGetUnicodeCategory(s, index);
874 public static double GetNumericValue(char c)
876 return CharUnicodeInfo.GetNumericValue(c);
879 public static double GetNumericValue(string s, int index)
882 throw new ArgumentNullException(nameof(s));
883 if (((uint)index) >= ((uint)s.Length))
885 throw new ArgumentOutOfRangeException(nameof(index));
887 return CharUnicodeInfo.GetNumericValue(s, index);
891 /*================================= IsHighSurrogate ============================
892 ** Check if a char is a high surrogate.
893 ==============================================================================*/
894 public static bool IsHighSurrogate(char c)
896 return ((c >= CharUnicodeInfo.HIGH_SURROGATE_START) && (c <= CharUnicodeInfo.HIGH_SURROGATE_END));
899 public static bool IsHighSurrogate(string s, int index)
903 throw new ArgumentNullException(nameof(s));
905 if (index < 0 || index >= s.Length)
907 throw new ArgumentOutOfRangeException(nameof(index));
909 return (IsHighSurrogate(s[index]));
912 /*================================= IsLowSurrogate ============================
913 ** Check if a char is a low surrogate.
914 ==============================================================================*/
915 public static bool IsLowSurrogate(char c)
917 return ((c >= CharUnicodeInfo.LOW_SURROGATE_START) && (c <= CharUnicodeInfo.LOW_SURROGATE_END));
920 public static bool IsLowSurrogate(string s, int index)
924 throw new ArgumentNullException(nameof(s));
926 if (index < 0 || index >= s.Length)
928 throw new ArgumentOutOfRangeException(nameof(index));
930 return (IsLowSurrogate(s[index]));
933 /*================================= IsSurrogatePair ============================
934 ** Check if the string specified by the index starts with a surrogate pair.
935 ==============================================================================*/
936 public static bool IsSurrogatePair(string s, int index)
940 throw new ArgumentNullException(nameof(s));
942 if (index < 0 || index >= s.Length)
944 throw new ArgumentOutOfRangeException(nameof(index));
946 if (index + 1 < s.Length)
948 return (IsSurrogatePair(s[index], s[index + 1]));
953 public static bool IsSurrogatePair(char highSurrogate, char lowSurrogate)
955 return ((highSurrogate >= CharUnicodeInfo.HIGH_SURROGATE_START && highSurrogate <= CharUnicodeInfo.HIGH_SURROGATE_END) &&
956 (lowSurrogate >= CharUnicodeInfo.LOW_SURROGATE_START && lowSurrogate <= CharUnicodeInfo.LOW_SURROGATE_END));
959 internal const int UNICODE_PLANE00_END = 0x00ffff;
960 // The starting codepoint for Unicode plane 1. Plane 1 contains 0x010000 ~ 0x01ffff.
961 internal const int UNICODE_PLANE01_START = 0x10000;
962 // The end codepoint for Unicode plane 16. This is the maximum code point value allowed for Unicode.
963 // Plane 16 contains 0x100000 ~ 0x10ffff.
964 internal const int UNICODE_PLANE16_END = 0x10ffff;
966 internal const int HIGH_SURROGATE_START = 0x00d800;
967 internal const int LOW_SURROGATE_END = 0x00dfff;
971 /*================================= ConvertFromUtf32 ============================
972 ** Convert an UTF32 value into a surrogate pair.
973 ==============================================================================*/
975 public static string ConvertFromUtf32(int utf32)
977 // For UTF32 values from U+00D800 ~ U+00DFFF, we should throw. They
978 // are considered as irregular code unit sequence, but they are not illegal.
979 if ((utf32 < 0 || utf32 > UNICODE_PLANE16_END) || (utf32 >= HIGH_SURROGATE_START && utf32 <= LOW_SURROGATE_END))
981 throw new ArgumentOutOfRangeException(nameof(utf32), SR.ArgumentOutOfRange_InvalidUTF32);
984 if (utf32 < UNICODE_PLANE01_START)
986 // This is a BMP character.
987 return (char.ToString((char)utf32));
992 // This is a supplementary character. Convert it to a surrogate pair in UTF-16.
993 utf32 -= UNICODE_PLANE01_START;
994 uint surrogate = 0; // allocate 2 chars worth of stack space
995 char* address = (char*)&surrogate;
996 address[0] = (char)((utf32 / 0x400) + (int)CharUnicodeInfo.HIGH_SURROGATE_START);
997 address[1] = (char)((utf32 % 0x400) + (int)CharUnicodeInfo.LOW_SURROGATE_START);
998 return new string(address, 0, 2);
1003 /*=============================ConvertToUtf32===================================
1004 ** Convert a surrogate pair to UTF32 value
1005 ==============================================================================*/
1007 public static int ConvertToUtf32(char highSurrogate, char lowSurrogate)
1009 if (!IsHighSurrogate(highSurrogate))
1011 throw new ArgumentOutOfRangeException(nameof(highSurrogate), SR.ArgumentOutOfRange_InvalidHighSurrogate);
1013 if (!IsLowSurrogate(lowSurrogate))
1015 throw new ArgumentOutOfRangeException(nameof(lowSurrogate), SR.ArgumentOutOfRange_InvalidLowSurrogate);
1017 return (((highSurrogate - CharUnicodeInfo.HIGH_SURROGATE_START) * 0x400) + (lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START) + UNICODE_PLANE01_START);
1020 /*=============================ConvertToUtf32===================================
1021 ** Convert a character or a surrogate pair starting at index of the specified string
1023 ** The char pointed by index should be a surrogate pair or a BMP character.
1024 ** This method throws if a high-surrogate is not followed by a low surrogate.
1025 ** This method throws if a low surrogate is seen without preceding a high-surrogate.
1026 ==============================================================================*/
1028 public static int ConvertToUtf32(string s, int index)
1032 throw new ArgumentNullException(nameof(s));
1035 if (index < 0 || index >= s.Length)
1037 throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index);
1039 // Check if the character at index is a high surrogate.
1040 int temp1 = (int)s[index] - CharUnicodeInfo.HIGH_SURROGATE_START;
1041 if (temp1 >= 0 && temp1 <= 0x7ff)
1043 // Found a surrogate char.
1046 // Found a high surrogate.
1047 if (index < s.Length - 1)
1049 int temp2 = (int)s[index + 1] - CharUnicodeInfo.LOW_SURROGATE_START;
1050 if (temp2 >= 0 && temp2 <= 0x3ff)
1052 // Found a low surrogate.
1053 return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START);
1057 throw new ArgumentException(SR.Format(SR.Argument_InvalidHighSurrogate, index), nameof(s));
1062 // Found a high surrogate at the end of the string.
1063 throw new ArgumentException(SR.Format(SR.Argument_InvalidHighSurrogate, index), nameof(s));
1068 // Find a low surrogate at the character pointed by index.
1069 throw new ArgumentException(SR.Format(SR.Argument_InvalidLowSurrogate, index), nameof(s));
1072 // Not a high-surrogate or low-surrogate. Genereate the UTF32 value for the BMP characters.
1073 return ((int)s[index]);