1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*============================================================
9 ** Purpose: This is the value class representing a Unicode character
10 ** Char methods until we create this functionality.
13 ===========================================================*/
15 using System.Diagnostics;
16 using System.Globalization;
17 using System.Runtime.InteropServices;
22 [StructLayout(LayoutKind.Sequential)]
23 [System.Runtime.CompilerServices.TypeForwardedFrom("mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089")]
24 public readonly struct Char : IComparable, IComparable<char>, IEquatable<char>, IConvertible
29 private readonly char m_value; // Do not rename (binary serialization)
34 // The maximum character value.
35 public const char MaxValue = (char)0xFFFF;
36 // The minimum character value.
37 public const char MinValue = (char)0x00;
39 // Unicode category values from Unicode U+0000 ~ U+00FF. Store them in byte[] array to save space.
40 private static ReadOnlySpan<byte> CategoryForLatin1 => new byte[] { // uses C# compiler's optimization for static byte[] data
41 (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0000 - 0007
42 (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0008 - 000F
43 (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0010 - 0017
44 (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0018 - 001F
45 (byte)UnicodeCategory.SpaceSeparator, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.CurrencySymbol, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.OtherPunctuation, // 0020 - 0027
46 (byte)UnicodeCategory.OpenPunctuation, (byte)UnicodeCategory.ClosePunctuation, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.DashPunctuation, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.OtherPunctuation, // 0028 - 002F
47 (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, // 0030 - 0037
48 (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.OtherPunctuation, // 0038 - 003F
49 (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, // 0040 - 0047
50 (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, // 0048 - 004F
51 (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, // 0050 - 0057
52 (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.OpenPunctuation, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.ClosePunctuation, (byte)UnicodeCategory.ModifierSymbol, (byte)UnicodeCategory.ConnectorPunctuation, // 0058 - 005F
53 (byte)UnicodeCategory.ModifierSymbol, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, // 0060 - 0067
54 (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, // 0068 - 006F
55 (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, // 0070 - 0077
56 (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.OpenPunctuation, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.ClosePunctuation, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.Control, // 0078 - 007F
57 (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0080 - 0087
58 (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0088 - 008F
59 (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0090 - 0097
60 (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0098 - 009F
61 (byte)UnicodeCategory.SpaceSeparator, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.CurrencySymbol, (byte)UnicodeCategory.CurrencySymbol, (byte)UnicodeCategory.CurrencySymbol, (byte)UnicodeCategory.CurrencySymbol, (byte)UnicodeCategory.OtherSymbol, (byte)UnicodeCategory.OtherSymbol, // 00A0 - 00A7
62 (byte)UnicodeCategory.ModifierSymbol, (byte)UnicodeCategory.OtherSymbol, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.InitialQuotePunctuation, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.DashPunctuation, (byte)UnicodeCategory.OtherSymbol, (byte)UnicodeCategory.ModifierSymbol, // 00A8 - 00AF
63 (byte)UnicodeCategory.OtherSymbol, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.OtherNumber, (byte)UnicodeCategory.OtherNumber, (byte)UnicodeCategory.ModifierSymbol, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.OtherSymbol, (byte)UnicodeCategory.OtherPunctuation, // 00B0 - 00B7
64 (byte)UnicodeCategory.ModifierSymbol, (byte)UnicodeCategory.OtherNumber, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.FinalQuotePunctuation, (byte)UnicodeCategory.OtherNumber, (byte)UnicodeCategory.OtherNumber, (byte)UnicodeCategory.OtherNumber, (byte)UnicodeCategory.OtherPunctuation, // 00B8 - 00BF
65 (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, // 00C0 - 00C7
66 (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, // 00C8 - 00CF
67 (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.MathSymbol, // 00D0 - 00D7
68 (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.LowercaseLetter, // 00D8 - 00DF
69 (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, // 00E0 - 00E7
70 (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, // 00E8 - 00EF
71 (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.MathSymbol, // 00F0 - 00F7
72 (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, // 00F8 - 00FF
75 // Return true for all characters below or equal U+00ff, which is ASCII + Latin-1 Supplement.
76 private static bool IsLatin1(char ch)
78 return (uint)ch <= '\x00ff';
81 // Return true for all characters below or equal U+007f, which is ASCII.
82 private static bool IsAscii(char ch)
84 return (uint)ch <= '\x007f';
87 // Return the Unicode category for Unicode character <= 0x00ff.
88 private static UnicodeCategory GetLatin1UnicodeCategory(char ch)
90 Debug.Assert(IsLatin1(ch), "char.GetLatin1UnicodeCategory(): ch should be <= 007f");
91 return (UnicodeCategory)CategoryForLatin1[(int)ch];
99 // Overriden Instance Methods
102 // Calculate a hashcode for a 2 byte Unicode character.
103 public override int GetHashCode()
105 return (int)m_value | ((int)m_value << 16);
108 // Used for comparing two boxed Char objects.
110 public override bool Equals(object obj)
116 return (m_value == ((char)obj).m_value);
119 [System.Runtime.Versioning.NonVersionable]
120 public bool Equals(char obj)
122 return m_value == obj;
125 // Compares this object to another object, returning an integer that
126 // indicates the relationship.
127 // Returns a value less than zero if this object
128 // null is considered to be less than any instance.
129 // If object is not of type Char, this method throws an ArgumentException.
131 public int CompareTo(object value)
137 if (!(value is char))
139 throw new ArgumentException(SR.Arg_MustBeChar);
142 return (m_value - ((char)value).m_value);
145 public int CompareTo(char value)
147 return (m_value - value);
150 // Overrides System.Object.ToString.
151 public override string ToString()
153 return char.ToString(m_value);
156 public string ToString(IFormatProvider provider)
158 return char.ToString(m_value);
162 // Formatting Methods
165 /*===================================ToString===================================
166 **This static methods takes a character and returns the String representation of it.
167 ==============================================================================*/
168 // Provides a string representation of a character.
169 public static string ToString(char c) => string.CreateFromChar(c);
171 public static char Parse(string s)
175 throw new ArgumentNullException(nameof(s));
180 throw new FormatException(SR.Format_NeedSingleChar);
185 public static bool TryParse(string s, out char result)
203 /*=================================ISDIGIT======================================
204 **A wrapper for char. Returns a boolean indicating whether **
205 **character c is considered to be a digit. **
206 ==============================================================================*/
207 // Determines whether a character is a digit.
208 public static bool IsDigit(char c)
212 return IsInRange(c, '0', '9');
214 return (CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.DecimalDigitNumber);
217 private static bool IsInRange(char c, char min, char max) => (uint)(c - min) <= (uint)(max - min);
219 private static bool IsInRange(UnicodeCategory c, UnicodeCategory min, UnicodeCategory max) => (uint)(c - min) <= (uint)(max - min);
221 /*=================================CheckLetter=====================================
222 ** Check if the specified UnicodeCategory belongs to the letter categories.
223 ==============================================================================*/
224 internal static bool CheckLetter(UnicodeCategory uc)
226 return IsInRange(uc, UnicodeCategory.UppercaseLetter, UnicodeCategory.OtherLetter);
229 /*=================================ISLETTER=====================================
230 **A wrapper for char. Returns a boolean indicating whether **
231 **character c is considered to be a letter. **
232 ==============================================================================*/
233 // Determines whether a character is a letter.
234 public static bool IsLetter(char c)
241 return IsInRange(c, 'a', 'z');
243 return (CheckLetter(GetLatin1UnicodeCategory(c)));
245 return (CheckLetter(CharUnicodeInfo.GetUnicodeCategory(c)));
248 private static bool IsWhiteSpaceLatin1(char c)
250 // There are characters which belong to UnicodeCategory.Control but are considered as white spaces.
251 // We use code point comparisons for these characters here as a temporary fix.
253 // U+0009 = <control> HORIZONTAL TAB
254 // U+000a = <control> LINE FEED
255 // U+000b = <control> VERTICAL TAB
256 // U+000c = <contorl> FORM FEED
257 // U+000d = <control> CARRIAGE RETURN
258 // U+0085 = <control> NEXT LINE
259 // U+00a0 = NO-BREAK SPACE
262 (uint)(c - '\x0009') <= ('\x000d' - '\x0009') || // (c >= '\x0009' && c <= '\x000d')
267 /*===============================ISWHITESPACE===================================
268 **A wrapper for char. Returns a boolean indicating whether **
269 **character c is considered to be a whitespace character. **
270 ==============================================================================*/
271 // Determines whether a character is whitespace.
272 public static bool IsWhiteSpace(char c)
276 return (IsWhiteSpaceLatin1(c));
278 return CheckSeparator(CharUnicodeInfo.GetUnicodeCategory(c));
282 /*===================================IsUpper====================================
283 **Arguments: c -- the characater to be checked.
284 **Returns: True if c is an uppercase character.
285 ==============================================================================*/
286 // Determines whether a character is upper-case.
287 public static bool IsUpper(char c)
293 return IsInRange(c, 'A', 'Z');
295 return (GetLatin1UnicodeCategory(c) == UnicodeCategory.UppercaseLetter);
297 return (CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.UppercaseLetter);
300 /*===================================IsLower====================================
301 **Arguments: c -- the characater to be checked.
302 **Returns: True if c is an lowercase character.
303 ==============================================================================*/
304 // Determines whether a character is lower-case.
305 public static bool IsLower(char c)
311 return IsInRange(c, 'a', 'z');
313 return (GetLatin1UnicodeCategory(c) == UnicodeCategory.LowercaseLetter);
315 return (CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.LowercaseLetter);
318 internal static bool CheckPunctuation(UnicodeCategory uc)
320 return IsInRange(uc, UnicodeCategory.ConnectorPunctuation, UnicodeCategory.OtherPunctuation);
324 /*================================IsPunctuation=================================
325 **Arguments: c -- the characater to be checked.
326 **Returns: True if c is an punctuation mark
327 ==============================================================================*/
328 // Determines whether a character is a punctuation mark.
329 public static bool IsPunctuation(char c)
333 return (CheckPunctuation(GetLatin1UnicodeCategory(c)));
335 return (CheckPunctuation(CharUnicodeInfo.GetUnicodeCategory(c)));
338 /*=================================CheckLetterOrDigit=====================================
339 ** Check if the specified UnicodeCategory belongs to the letter or digit categories.
340 ==============================================================================*/
341 internal static bool CheckLetterOrDigit(UnicodeCategory uc)
343 return CheckLetter(uc) || uc == UnicodeCategory.DecimalDigitNumber;
346 // Determines whether a character is a letter or a digit.
347 public static bool IsLetterOrDigit(char c)
351 return (CheckLetterOrDigit(GetLatin1UnicodeCategory(c)));
353 return (CheckLetterOrDigit(CharUnicodeInfo.GetUnicodeCategory(c)));
356 /*===================================ToUpper====================================
358 ==============================================================================*/
359 // Converts a character to upper-case for the specified culture.
360 // <;<;Not fully implemented>;>;
361 public static char ToUpper(char c, CultureInfo culture)
364 throw new ArgumentNullException(nameof(culture));
365 return culture.TextInfo.ToUpper(c);
368 /*=================================TOUPPER======================================
369 **A wrapper for char.ToUpperCase. Converts character c to its **
370 **uppercase equivalent. If c is already an uppercase character or is not an **
371 **alphabetic, nothing happens. **
372 ==============================================================================*/
373 // Converts a character to upper-case for the default culture.
375 public static char ToUpper(char c)
377 return CultureInfo.CurrentCulture.TextInfo.ToUpper(c);
381 // Converts a character to upper-case for invariant culture.
382 public static char ToUpperInvariant(char c)
384 return CultureInfo.InvariantCulture.TextInfo.ToUpper(c);
388 /*===================================ToLower====================================
390 ==============================================================================*/
391 // Converts a character to lower-case for the specified culture.
392 // <;<;Not fully implemented>;>;
393 public static char ToLower(char c, CultureInfo culture)
396 throw new ArgumentNullException(nameof(culture));
397 return culture.TextInfo.ToLower(c);
400 /*=================================TOLOWER======================================
401 **A wrapper for char.ToLowerCase. Converts character c to its **
402 **lowercase equivalent. If c is already a lowercase character or is not an **
403 **alphabetic, nothing happens. **
404 ==============================================================================*/
405 // Converts a character to lower-case for the default culture.
406 public static char ToLower(char c)
408 return CultureInfo.CurrentCulture.TextInfo.ToLower(c);
412 // Converts a character to lower-case for invariant culture.
413 public static char ToLowerInvariant(char c)
415 return CultureInfo.InvariantCulture.TextInfo.ToLower(c);
420 // IConvertible implementation
422 public TypeCode GetTypeCode()
424 return TypeCode.Char;
428 bool IConvertible.ToBoolean(IFormatProvider provider)
430 throw new InvalidCastException(SR.Format(SR.InvalidCast_FromTo, "Char", "Boolean"));
433 char IConvertible.ToChar(IFormatProvider provider)
438 sbyte IConvertible.ToSByte(IFormatProvider provider)
440 return Convert.ToSByte(m_value);
443 byte IConvertible.ToByte(IFormatProvider provider)
445 return Convert.ToByte(m_value);
448 short IConvertible.ToInt16(IFormatProvider provider)
450 return Convert.ToInt16(m_value);
453 ushort IConvertible.ToUInt16(IFormatProvider provider)
455 return Convert.ToUInt16(m_value);
458 int IConvertible.ToInt32(IFormatProvider provider)
460 return Convert.ToInt32(m_value);
463 uint IConvertible.ToUInt32(IFormatProvider provider)
465 return Convert.ToUInt32(m_value);
468 long IConvertible.ToInt64(IFormatProvider provider)
470 return Convert.ToInt64(m_value);
473 ulong IConvertible.ToUInt64(IFormatProvider provider)
475 return Convert.ToUInt64(m_value);
478 float IConvertible.ToSingle(IFormatProvider provider)
480 throw new InvalidCastException(SR.Format(SR.InvalidCast_FromTo, "Char", "Single"));
483 double IConvertible.ToDouble(IFormatProvider provider)
485 throw new InvalidCastException(SR.Format(SR.InvalidCast_FromTo, "Char", "Double"));
488 decimal IConvertible.ToDecimal(IFormatProvider provider)
490 throw new InvalidCastException(SR.Format(SR.InvalidCast_FromTo, "Char", "Decimal"));
493 DateTime IConvertible.ToDateTime(IFormatProvider provider)
495 throw new InvalidCastException(SR.Format(SR.InvalidCast_FromTo, "Char", "DateTime"));
498 object IConvertible.ToType(Type type, IFormatProvider provider)
500 return Convert.DefaultToType((IConvertible)this, type, provider);
502 public static bool IsControl(char c)
506 return (GetLatin1UnicodeCategory(c) == UnicodeCategory.Control);
508 return (CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.Control);
511 public static bool IsControl(string s, int index)
514 throw new ArgumentNullException(nameof(s));
515 if (((uint)index) >= ((uint)s.Length))
517 throw new ArgumentOutOfRangeException(nameof(index));
522 return (GetLatin1UnicodeCategory(c) == UnicodeCategory.Control);
524 return (CharUnicodeInfo.GetUnicodeCategory(s, index) == UnicodeCategory.Control);
528 public static bool IsDigit(string s, int index)
531 throw new ArgumentNullException(nameof(s));
532 if (((uint)index) >= ((uint)s.Length))
534 throw new ArgumentOutOfRangeException(nameof(index));
539 return IsInRange(c, '0', '9');
541 return (CharUnicodeInfo.GetUnicodeCategory(s, index) == UnicodeCategory.DecimalDigitNumber);
544 public static bool IsLetter(string s, int index)
547 throw new ArgumentNullException(nameof(s));
548 if (((uint)index) >= ((uint)s.Length))
550 throw new ArgumentOutOfRangeException(nameof(index));
558 return IsInRange(c, 'a', 'z');
560 return (CheckLetter(GetLatin1UnicodeCategory(c)));
562 return (CheckLetter(CharUnicodeInfo.GetUnicodeCategory(s, index)));
565 public static bool IsLetterOrDigit(string s, int index)
568 throw new ArgumentNullException(nameof(s));
569 if (((uint)index) >= ((uint)s.Length))
571 throw new ArgumentOutOfRangeException(nameof(index));
576 return CheckLetterOrDigit(GetLatin1UnicodeCategory(c));
578 return CheckLetterOrDigit(CharUnicodeInfo.GetUnicodeCategory(s, index));
581 public static bool IsLower(string s, int index)
584 throw new ArgumentNullException(nameof(s));
585 if (((uint)index) >= ((uint)s.Length))
587 throw new ArgumentOutOfRangeException(nameof(index));
594 return IsInRange(c, 'a', 'z');
596 return (GetLatin1UnicodeCategory(c) == UnicodeCategory.LowercaseLetter);
599 return (CharUnicodeInfo.GetUnicodeCategory(s, index) == UnicodeCategory.LowercaseLetter);
602 /*=================================CheckNumber=====================================
603 ** Check if the specified UnicodeCategory belongs to the number categories.
604 ==============================================================================*/
606 internal static bool CheckNumber(UnicodeCategory uc)
608 return IsInRange(uc, UnicodeCategory.DecimalDigitNumber, UnicodeCategory.OtherNumber);
611 public static bool IsNumber(char c)
617 return IsInRange(c, '0', '9');
619 return (CheckNumber(GetLatin1UnicodeCategory(c)));
621 return (CheckNumber(CharUnicodeInfo.GetUnicodeCategory(c)));
624 public static bool IsNumber(string s, int index)
627 throw new ArgumentNullException(nameof(s));
628 if (((uint)index) >= ((uint)s.Length))
630 throw new ArgumentOutOfRangeException(nameof(index));
637 return IsInRange(c, '0', '9');
639 return (CheckNumber(GetLatin1UnicodeCategory(c)));
641 return (CheckNumber(CharUnicodeInfo.GetUnicodeCategory(s, index)));
644 ////////////////////////////////////////////////////////////////////////
648 // Determines if the given character is a punctuation character.
650 ////////////////////////////////////////////////////////////////////////
652 public static bool IsPunctuation(string s, int index)
655 throw new ArgumentNullException(nameof(s));
656 if (((uint)index) >= ((uint)s.Length))
658 throw new ArgumentOutOfRangeException(nameof(index));
663 return (CheckPunctuation(GetLatin1UnicodeCategory(c)));
665 return (CheckPunctuation(CharUnicodeInfo.GetUnicodeCategory(s, index)));
669 /*================================= CheckSeparator ============================
670 ** Check if the specified UnicodeCategory belongs to the seprator categories.
671 ==============================================================================*/
673 internal static bool CheckSeparator(UnicodeCategory uc)
675 return IsInRange(uc, UnicodeCategory.SpaceSeparator, UnicodeCategory.ParagraphSeparator);
678 private static bool IsSeparatorLatin1(char c)
680 // U+00a0 = NO-BREAK SPACE
681 // There is no LineSeparator or ParagraphSeparator in Latin 1 range.
682 return (c == '\x0020' || c == '\x00a0');
685 public static bool IsSeparator(char c)
689 return (IsSeparatorLatin1(c));
691 return (CheckSeparator(CharUnicodeInfo.GetUnicodeCategory(c)));
694 public static bool IsSeparator(string s, int index)
697 throw new ArgumentNullException(nameof(s));
698 if (((uint)index) >= ((uint)s.Length))
700 throw new ArgumentOutOfRangeException(nameof(index));
705 return (IsSeparatorLatin1(c));
707 return (CheckSeparator(CharUnicodeInfo.GetUnicodeCategory(s, index)));
710 public static bool IsSurrogate(char c)
712 return IsInRange(c, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END);
715 public static bool IsSurrogate(string s, int index)
719 throw new ArgumentNullException(nameof(s));
721 if (((uint)index) >= ((uint)s.Length))
723 throw new ArgumentOutOfRangeException(nameof(index));
725 return (IsSurrogate(s[index]));
728 /*================================= CheckSymbol ============================
729 ** Check if the specified UnicodeCategory belongs to the symbol categories.
730 ==============================================================================*/
732 internal static bool CheckSymbol(UnicodeCategory uc)
734 return IsInRange(uc, UnicodeCategory.MathSymbol, UnicodeCategory.OtherSymbol);
737 public static bool IsSymbol(char c)
741 return (CheckSymbol(GetLatin1UnicodeCategory(c)));
743 return (CheckSymbol(CharUnicodeInfo.GetUnicodeCategory(c)));
746 public static bool IsSymbol(string s, int index)
749 throw new ArgumentNullException(nameof(s));
750 if (((uint)index) >= ((uint)s.Length))
752 throw new ArgumentOutOfRangeException(nameof(index));
757 return (CheckSymbol(GetLatin1UnicodeCategory(c)));
759 return (CheckSymbol(CharUnicodeInfo.GetUnicodeCategory(s, index)));
763 public static bool IsUpper(string s, int index)
766 throw new ArgumentNullException(nameof(s));
767 if (((uint)index) >= ((uint)s.Length))
769 throw new ArgumentOutOfRangeException(nameof(index));
776 return IsInRange(c, 'A', 'Z');
778 return (GetLatin1UnicodeCategory(c) == UnicodeCategory.UppercaseLetter);
781 return (CharUnicodeInfo.GetUnicodeCategory(s, index) == UnicodeCategory.UppercaseLetter);
784 public static bool IsWhiteSpace(string s, int index)
787 throw new ArgumentNullException(nameof(s));
788 if (((uint)index) >= ((uint)s.Length))
790 throw new ArgumentOutOfRangeException(nameof(index));
793 if (IsLatin1(s[index]))
795 return IsWhiteSpaceLatin1(s[index]);
798 return CheckSeparator(CharUnicodeInfo.GetUnicodeCategory(s, index));
801 public static UnicodeCategory GetUnicodeCategory(char c)
805 return (GetLatin1UnicodeCategory(c));
807 return CharUnicodeInfo.GetUnicodeCategory((int)c);
810 public static UnicodeCategory GetUnicodeCategory(string s, int index)
813 throw new ArgumentNullException(nameof(s));
814 if (((uint)index) >= ((uint)s.Length))
816 throw new ArgumentOutOfRangeException(nameof(index));
818 if (IsLatin1(s[index]))
820 return (GetLatin1UnicodeCategory(s[index]));
822 return CharUnicodeInfo.InternalGetUnicodeCategory(s, index);
825 public static double GetNumericValue(char c)
827 return CharUnicodeInfo.GetNumericValue(c);
830 public static double GetNumericValue(string s, int index)
833 throw new ArgumentNullException(nameof(s));
834 if (((uint)index) >= ((uint)s.Length))
836 throw new ArgumentOutOfRangeException(nameof(index));
838 return CharUnicodeInfo.GetNumericValue(s, index);
842 /*================================= IsHighSurrogate ============================
843 ** Check if a char is a high surrogate.
844 ==============================================================================*/
845 public static bool IsHighSurrogate(char c)
847 return IsInRange(c, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.HIGH_SURROGATE_END);
850 public static bool IsHighSurrogate(string s, int index)
854 throw new ArgumentNullException(nameof(s));
856 if (index < 0 || index >= s.Length)
858 throw new ArgumentOutOfRangeException(nameof(index));
860 return (IsHighSurrogate(s[index]));
863 /*================================= IsLowSurrogate ============================
864 ** Check if a char is a low surrogate.
865 ==============================================================================*/
866 public static bool IsLowSurrogate(char c)
868 return IsInRange(c, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END);
871 public static bool IsLowSurrogate(string s, int index)
875 throw new ArgumentNullException(nameof(s));
877 if (index < 0 || index >= s.Length)
879 throw new ArgumentOutOfRangeException(nameof(index));
881 return (IsLowSurrogate(s[index]));
884 /*================================= IsSurrogatePair ============================
885 ** Check if the string specified by the index starts with a surrogate pair.
886 ==============================================================================*/
887 public static bool IsSurrogatePair(string s, int index)
891 throw new ArgumentNullException(nameof(s));
893 if (index < 0 || index >= s.Length)
895 throw new ArgumentOutOfRangeException(nameof(index));
897 if (index + 1 < s.Length)
899 return (IsSurrogatePair(s[index], s[index + 1]));
904 public static bool IsSurrogatePair(char highSurrogate, char lowSurrogate)
906 return IsHighSurrogate(highSurrogate) && IsLowSurrogate(lowSurrogate);
909 internal const int UNICODE_PLANE00_END = 0x00ffff;
910 // The starting codepoint for Unicode plane 1. Plane 1 contains 0x010000 ~ 0x01ffff.
911 internal const int UNICODE_PLANE01_START = 0x10000;
912 // The end codepoint for Unicode plane 16. This is the maximum code point value allowed for Unicode.
913 // Plane 16 contains 0x100000 ~ 0x10ffff.
914 internal const int UNICODE_PLANE16_END = 0x10ffff;
918 /*================================= ConvertFromUtf32 ============================
919 ** Convert an UTF32 value into a surrogate pair.
920 ==============================================================================*/
922 public static string ConvertFromUtf32(int utf32)
924 // For UTF32 values from U+00D800 ~ U+00DFFF, we should throw. They
925 // are considered as irregular code unit sequence, but they are not illegal.
926 if (((uint)utf32 > UNICODE_PLANE16_END) || (utf32 >= CharUnicodeInfo.HIGH_SURROGATE_START && utf32 <= CharUnicodeInfo.LOW_SURROGATE_END))
928 throw new ArgumentOutOfRangeException(nameof(utf32), SR.ArgumentOutOfRange_InvalidUTF32);
931 if (utf32 < UNICODE_PLANE01_START)
933 // This is a BMP character.
934 return (char.ToString((char)utf32));
939 // This is a supplementary character. Convert it to a surrogate pair in UTF-16.
940 utf32 -= UNICODE_PLANE01_START;
941 uint surrogate = 0; // allocate 2 chars worth of stack space
942 char* address = (char*)&surrogate;
943 address[0] = (char)((utf32 / 0x400) + (int)CharUnicodeInfo.HIGH_SURROGATE_START);
944 address[1] = (char)((utf32 % 0x400) + (int)CharUnicodeInfo.LOW_SURROGATE_START);
945 return new string(address, 0, 2);
950 /*=============================ConvertToUtf32===================================
951 ** Convert a surrogate pair to UTF32 value
952 ==============================================================================*/
954 public static int ConvertToUtf32(char highSurrogate, char lowSurrogate)
956 if (!IsHighSurrogate(highSurrogate))
958 throw new ArgumentOutOfRangeException(nameof(highSurrogate), SR.ArgumentOutOfRange_InvalidHighSurrogate);
960 if (!IsLowSurrogate(lowSurrogate))
962 throw new ArgumentOutOfRangeException(nameof(lowSurrogate), SR.ArgumentOutOfRange_InvalidLowSurrogate);
964 return (((highSurrogate - CharUnicodeInfo.HIGH_SURROGATE_START) * 0x400) + (lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START) + UNICODE_PLANE01_START);
967 /*=============================ConvertToUtf32===================================
968 ** Convert a character or a surrogate pair starting at index of the specified string
970 ** The char pointed by index should be a surrogate pair or a BMP character.
971 ** This method throws if a high-surrogate is not followed by a low surrogate.
972 ** This method throws if a low surrogate is seen without preceding a high-surrogate.
973 ==============================================================================*/
975 public static int ConvertToUtf32(string s, int index)
979 throw new ArgumentNullException(nameof(s));
982 if (index < 0 || index >= s.Length)
984 throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index);
986 // Check if the character at index is a high surrogate.
987 int temp1 = (int)s[index] - CharUnicodeInfo.HIGH_SURROGATE_START;
988 if (temp1 >= 0 && temp1 <= 0x7ff)
990 // Found a surrogate char.
993 // Found a high surrogate.
994 if (index < s.Length - 1)
996 int temp2 = (int)s[index + 1] - CharUnicodeInfo.LOW_SURROGATE_START;
997 if (temp2 >= 0 && temp2 <= 0x3ff)
999 // Found a low surrogate.
1000 return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START);
1004 throw new ArgumentException(SR.Format(SR.Argument_InvalidHighSurrogate, index), nameof(s));
1009 // Found a high surrogate at the end of the string.
1010 throw new ArgumentException(SR.Format(SR.Argument_InvalidHighSurrogate, index), nameof(s));
1015 // Find a low surrogate at the character pointed by index.
1016 throw new ArgumentException(SR.Format(SR.Argument_InvalidLowSurrogate, index), nameof(s));
1019 // Not a high-surrogate or low-surrogate. Genereate the UTF32 value for the BMP characters.
1020 return ((int)s[index]);