1 /****************************************************************************
3 ** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
4 ** Contact: http://www.qt-project.org/
6 ** This file is part of the QtCore module of the Qt Toolkit.
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** GNU Lesser General Public License Usage
10 ** This file may be used under the terms of the GNU Lesser General Public
11 ** License version 2.1 as published by the Free Software Foundation and
12 ** appearing in the file LICENSE.LGPL included in the packaging of this
13 ** file. Please review the following information to ensure the GNU Lesser
14 ** General Public License version 2.1 requirements will be met:
15 ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
17 ** In addition, as a special exception, Nokia gives you certain additional
18 ** rights. These rights are described in the Nokia Qt LGPL Exception
19 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
21 ** GNU General Public License Usage
22 ** Alternatively, this file may be used under the terms of the GNU General
23 ** Public License version 3.0 as published by the Free Software Foundation
24 ** and appearing in the file LICENSE.GPL included in the packaging of this
25 ** file. Please review the following information to ensure the GNU General
26 ** Public License version 3.0 requirements will be met:
27 ** http://www.gnu.org/copyleft/gpl.html.
30 ** Alternatively, this file may be used in accordance with the terms and
31 ** conditions contained in a signed written agreement between you and Nokia.
40 ****************************************************************************/
42 // Don't define it while compiling this module, or USERS of Qt will
43 // not be able to link.
44 #ifdef QT_NO_CAST_FROM_ASCII
45 # undef QT_NO_CAST_FROM_ASCII
47 #ifdef QT_NO_CAST_TO_ASCII
48 # undef QT_NO_CAST_TO_ASCII
52 #include "qdatastream.h"
54 #include "qunicodetables_p.h"
55 #include "qunicodetables.cpp"
59 #define FLAG(x) (1 << (x))
63 \brief The QLatin1Char class provides an 8-bit ASCII/Latin-1 character.
65 \ingroup string-processing
67 This class is only useful to construct a QChar with 8-bit character.
69 \sa QChar, QLatin1String, QString
73 \fn const char QLatin1Char::toLatin1() const
75 Converts a Latin-1 character to an 8-bit ASCII representation of the character.
79 \fn const ushort QLatin1Char::unicode() const
81 Converts a Latin-1 character to an 16-bit-encoded Unicode representation
86 \fn QLatin1Char::QLatin1Char(char c)
88 Constructs a Latin-1 character for \a c. This constructor should be
89 used when the encoding of the input character is known to be Latin-1.
94 \brief The QChar class provides a 16-bit Unicode character.
96 \ingroup string-processing
99 In Qt, Unicode characters are 16-bit entities without any markup
100 or structure. This class represents such an entity. It is
101 lightweight, so it can be used everywhere. Most compilers treat
102 it like a \c{unsigned short}.
104 QChar provides a full complement of testing/classification
105 functions, converting to and from other formats, converting from
106 composed to decomposed Unicode, and trying to compare and
107 case-convert if you ask it to.
109 The classification functions include functions like those in the
110 standard C++ header \<cctype\> (formerly \<ctype.h\>), but
111 operating on the full range of Unicode characters. They all
112 return true if the character is a certain type of character;
113 otherwise they return false. These classification functions are
114 isNull() (returns true if the character is '\\0'), isPrint()
115 (true if the character is any sort of printable character,
116 including whitespace), isPunct() (any sort of punctation),
117 isMark() (Unicode Mark), isLetter() (a letter), isNumber() (any
118 sort of numeric character, not just 0-9), isLetterOrNumber(), and
119 isDigit() (decimal digits). All of these are wrappers around
120 category() which return the Unicode-defined category of each
123 QChar also provides direction(), which indicates the "natural"
124 writing direction of this character. The joining() function
125 indicates how the character joins with it's neighbors (needed
126 mostly for Arabic) and finally hasMirrored(), which indicates
127 whether the character needs to be mirrored when it is printed in
128 it's "unnatural" writing direction.
130 Composed Unicode characters (like \a ring) can be converted to
131 decomposed Unicode ("a" followed by "ring above") by using decomposition().
133 In Unicode, comparison is not necessarily possible and case
134 conversion is very difficult at best. Unicode, covering the
135 "entire" world, also includes most of the world's case and
136 sorting problems. operator==() and friends will do comparison
137 based purely on the numeric Unicode value (code point) of the
138 characters, and toUpper() and toLower() will do case changes when
139 the character has a well-defined uppercase/lowercase equivalent.
140 For locale-dependent comparisons, use QString::localeAwareCompare().
142 The conversion functions include unicode() (to a scalar),
143 toLatin1() (to scalar, but converts all non-Latin-1 characters to
144 0), row() (gives the Unicode row), cell() (gives the Unicode
145 cell), digitValue() (gives the integer value of any of the
146 numerous digit characters), and a host of constructors.
148 QChar provides constructors and cast operators that make it easy
149 to convert to and from traditional 8-bit \c{char}s. If you
150 defined \c QT_NO_CAST_FROM_ASCII and \c QT_NO_CAST_TO_ASCII, as
151 explained in the QString documentation, you will need to
152 explicitly call fromAscii() or fromLatin1(), or use QLatin1Char,
153 to construct a QChar from an 8-bit \c char, and you will need to
154 call toAscii() or toLatin1() to get the 8-bit value back.
156 \sa Unicode, QString, QLatin1Char
160 \enum QChar::UnicodeVersion
162 Specifies which version of the \l{http://www.unicode.org/}{Unicode standard}
163 introduced a certain character.
165 \value Unicode_1_1 Version 1.1
166 \value Unicode_2_0 Version 2.0
167 \value Unicode_2_1_2 Version 2.1.2
168 \value Unicode_3_0 Version 3.0
169 \value Unicode_3_1 Version 3.1
170 \value Unicode_3_2 Version 3.2
171 \value Unicode_4_0 Version 4.0
172 \value Unicode_4_1 Version 4.1
173 \value Unicode_5_0 Version 5.0
174 \value Unicode_Unassigned The value is not assigned to any character
175 in version 5.0 of Unicode.
177 \sa unicodeVersion(), currentUnicodeVersion()
181 \enum QChar::Category
183 This enum maps the Unicode character categories.
185 The following characters are normative in Unicode:
187 \value Mark_NonSpacing Unicode class name Mn
189 \value Mark_SpacingCombining Unicode class name Mc
191 \value Mark_Enclosing Unicode class name Me
193 \value Number_DecimalDigit Unicode class name Nd
195 \value Number_Letter Unicode class name Nl
197 \value Number_Other Unicode class name No
199 \value Separator_Space Unicode class name Zs
201 \value Separator_Line Unicode class name Zl
203 \value Separator_Paragraph Unicode class name Zp
205 \value Other_Control Unicode class name Cc
207 \value Other_Format Unicode class name Cf
209 \value Other_Surrogate Unicode class name Cs
211 \value Other_PrivateUse Unicode class name Co
213 \value Other_NotAssigned Unicode class name Cn
216 The following categories are informative in Unicode:
218 \value Letter_Uppercase Unicode class name Lu
220 \value Letter_Lowercase Unicode class name Ll
222 \value Letter_Titlecase Unicode class name Lt
224 \value Letter_Modifier Unicode class name Lm
226 \value Letter_Other Unicode class name Lo
228 \value Punctuation_Connector Unicode class name Pc
230 \value Punctuation_Dash Unicode class name Pd
232 \value Punctuation_Open Unicode class name Ps
234 \value Punctuation_Close Unicode class name Pe
236 \value Punctuation_InitialQuote Unicode class name Pi
238 \value Punctuation_FinalQuote Unicode class name Pf
240 \value Punctuation_Other Unicode class name Po
242 \value Symbol_Math Unicode class name Sm
244 \value Symbol_Currency Unicode class name Sc
246 \value Symbol_Modifier Unicode class name Sk
248 \value Symbol_Other Unicode class name So
254 \enum QChar::Direction
256 This enum type defines the Unicode direction attributes. See the
257 \l{http://www.unicode.org/}{Unicode Standard} for a description
260 In order to conform to C/C++ naming conventions "Dir" is prepended
261 to the codes used in the Unicode Standard.
287 \enum QChar::Decomposition
289 This enum type defines the Unicode decomposition attributes. See
290 the \l{http://www.unicode.org/}{Unicode Standard} for a
291 description of the values.
293 \value NoDecomposition
318 This enum type defines the Unicode joining attributes. See the
319 \l{http://www.unicode.org/}{Unicode Standard} for a description
331 \enum QChar::CombiningClass
335 This enum type defines names for some of the Unicode combining
336 classes. See the \l{http://www.unicode.org/}{Unicode Standard}
337 for a description of the values.
339 \value Combining_Above
340 \value Combining_AboveAttached
341 \value Combining_AboveLeft
342 \value Combining_AboveLeftAttached
343 \value Combining_AboveRight
344 \value Combining_AboveRightAttached
345 \value Combining_Below
346 \value Combining_BelowAttached
347 \value Combining_BelowLeft
348 \value Combining_BelowLeftAttached
349 \value Combining_BelowRight
350 \value Combining_BelowRightAttached
351 \value Combining_DoubleAbove
352 \value Combining_DoubleBelow
353 \value Combining_IotaSubscript
354 \value Combining_Left
355 \value Combining_LeftAttached
356 \value Combining_Right
357 \value Combining_RightAttached
361 \enum QChar::SpecialCharacter
363 \value Null A QChar with this value isNull().
364 \value Nbsp Non-breaking space.
365 \value ReplacementCharacter The character shown when a font has no glyph
366 for a certain codepoint. A special question mark character is often
367 used. Codecs use this codepoint when input data cannot be
368 represented in Unicode.
369 \value ObjectReplacementCharacter Used to represent an object such as an
370 image when such objects cannot be presented.
372 \value ByteOrderSwapped
373 \value ParagraphSeparator
378 \fn void QChar::setCell(uchar cell)
383 \fn void QChar::setRow(uchar row)
390 Constructs a null QChar ('\\0').
396 \fn QChar::QChar(QLatin1Char ch)
398 Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
402 \fn QChar::QChar(SpecialCharacter ch)
404 Constructs a QChar for the predefined character value \a ch.
408 \fn QChar::QChar(char ch)
410 Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
414 \fn QChar::QChar(uchar ch)
416 Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
420 \fn QChar::QChar(uchar cell, uchar row)
422 Constructs a QChar for Unicode cell \a cell in row \a row.
428 \fn QChar::QChar(ushort code)
430 Constructs a QChar for the character with Unicode code point \a code.
434 \fn QChar::QChar(short code)
436 Constructs a QChar for the character with Unicode code point \a code.
440 \fn QChar::QChar(uint code)
442 Constructs a QChar for the character with Unicode code point \a code.
446 \fn QChar::QChar(int code)
448 Constructs a QChar for the character with Unicode code point \a code.
452 \fn bool QChar::isNull() const
454 Returns true if the character is the Unicode character 0x0000
455 ('\\0'); otherwise returns false.
459 \fn uchar QChar::cell() const
461 Returns the cell (least significant byte) of the Unicode character.
467 \fn uchar QChar::row() const
469 Returns the row (most significant byte) of the Unicode character.
475 Returns true if the character is a printable character; otherwise
476 returns false. This is any character not of category Cc or Cn.
478 Note that this gives no indication of whether the character is
479 available in a particular font.
481 bool QChar::isPrint() const
483 const int test = FLAG(Other_Control) |
484 FLAG(Other_NotAssigned);
485 return !(FLAG(qGetProp(ucs)->category) & test);
489 \fn bool QChar::isSpace() const
491 Returns true if the character is a separator character
492 (Separator_* categories or certain code points from Other_Control category);
493 otherwise returns false.
500 bool QChar::isSpace(ushort ucs2)
502 const int test = FLAG(Separator_Space) |
503 FLAG(Separator_Line) |
504 FLAG(Separator_Paragraph);
505 return FLAG(qGetProp(ucs2)->category) & test;
509 Returns true if the character is a mark (Mark_* categories);
510 otherwise returns false.
512 See QChar::Category for more information regarding marks.
514 bool QChar::isMark() const
516 const int test = FLAG(Mark_NonSpacing) |
517 FLAG(Mark_SpacingCombining) |
518 FLAG(Mark_Enclosing);
519 return FLAG(qGetProp(ucs)->category) & test;
523 Returns true if the character is a punctuation mark (Punctuation_*
524 categories); otherwise returns false.
526 bool QChar::isPunct() const
528 const int test = FLAG(Punctuation_Connector) |
529 FLAG(Punctuation_Dash) |
530 FLAG(Punctuation_Open) |
531 FLAG(Punctuation_Close) |
532 FLAG(Punctuation_InitialQuote) |
533 FLAG(Punctuation_FinalQuote) |
534 FLAG(Punctuation_Other);
535 return FLAG(qGetProp(ucs)->category) & test;
539 \fn bool QChar::isLetter() const
541 Returns true if the character is a letter (Letter_* categories);
542 otherwise returns false.
549 bool QChar::isLetter(ushort ucs2)
551 const int test = FLAG(Letter_Uppercase) |
552 FLAG(Letter_Lowercase) |
553 FLAG(Letter_Titlecase) |
554 FLAG(Letter_Modifier) |
556 return FLAG(qGetProp(ucs2)->category) & test;
560 Returns true if the character is a number (Number_* categories,
561 not just 0-9); otherwise returns false.
565 bool QChar::isNumber() const
567 const int test = FLAG(Number_DecimalDigit) |
568 FLAG(Number_Letter) |
570 return FLAG(qGetProp(ucs)->category) & test;
574 \fn bool QChar::isLetterOrNumber() const
576 Returns true if the character is a letter or number (Letter_* or
577 Number_* categories); otherwise returns false.
584 bool QChar::isLetterOrNumber(ushort ucs2)
586 const int test = FLAG(Letter_Uppercase) |
587 FLAG(Letter_Lowercase) |
588 FLAG(Letter_Titlecase) |
589 FLAG(Letter_Modifier) |
591 FLAG(Number_DecimalDigit) |
592 FLAG(Number_Letter) |
594 return FLAG(qGetProp(ucs2)->category) & test;
598 \fn bool QChar::isDigit() const
600 Returns true if the character is a decimal digit
601 (Number_DecimalDigit); otherwise returns false.
608 bool QChar::isDigit(ushort ucs2)
610 return (qGetProp(ucs2)->category == Number_DecimalDigit);
614 Returns true if the character is a symbol (Symbol_* categories);
615 otherwise returns false.
617 bool QChar::isSymbol() const
619 const int test = FLAG(Symbol_Math) |
620 FLAG(Symbol_Currency) |
621 FLAG(Symbol_Modifier) |
623 return FLAG(qGetProp(ucs)->category) & test;
627 \fn bool QChar::isHighSurrogate() const
629 Returns true if the QChar is the high part of a UTF16 surrogate
630 (i.e. if it's code point in range [0xd800..0xdbff]).
634 \fn bool QChar::isLowSurrogate() const
636 Returns true if the QChar is the low part of a UTF16 surrogate
637 (i.e. if it's code point in range [0xdc00..0xdfff]).
641 \fn static bool QChar::isHighSurrogate(uint ucs4)
644 Returns true if the UCS-4-encoded character specified by \a ucs4
645 is the high part of a UTF16 surrogate
646 (i.e. if it's code point in range [0xd800..0xdbff]).
650 \fn static bool QChar::isLowSurrogate(uint ucs4)
653 Returns true if the UCS-4-encoded character specified by \a ucs4
654 is the low part of a UTF16 surrogate
655 (i.e. if it's code point in range [0xdc00..0xdfff]).
659 \fn static bool QChar::requiresSurrogates(uint ucs4)
661 Returns true if the UCS-4-encoded character specified by \a ucs4
662 can be split into the high and low parts of a UTF16 surrogate
663 (i.e. if it's code point is greater than or equals to 0x10000).
667 \fn static uint QChar::surrogateToUcs4(ushort high, ushort low)
669 Converts a UTF16 surrogate pair with the given \a high and \a low values
670 to it's UCS-4-encoded code point.
674 \fn static uint QChar::surrogateToUcs4(QChar high, QChar low)
677 Converts a UTF16 surrogate pair (\a high, \a low) to it's UCS-4-encoded code point.
681 \fn static ushort QChar::highSurrogate(uint ucs4)
683 Returns the high surrogate part of a UCS-4-encoded code point.
684 The returned result is undefined if \a ucs4 is smaller than 0x10000.
688 \fn static ushort QChar::lowSurrogate(uint ucs4)
690 Returns the low surrogate part of a UCS-4-encoded code point.
691 The returned result is undefined if \a ucs4 is smaller than 0x10000.
695 Returns the numeric value of the digit, or -1 if the character is not a digit.
697 int QChar::digitValue() const
699 return qGetProp(ucs)->digitValue;
704 Returns the numeric value of the digit, specified by the UCS-2-encoded
705 character, \a ucs2, or -1 if the character is not a digit.
707 int QChar::digitValue(ushort ucs2)
709 return qGetProp(ucs2)->digitValue;
714 Returns the numeric value of the digit specified by the UCS-4-encoded
715 character, \a ucs4, or -1 if the character is not a digit.
717 int QChar::digitValue(uint ucs4)
719 if (ucs4 > UNICODE_LAST_CODEPOINT)
721 return qGetProp(ucs4)->digitValue;
725 Returns the character's category.
727 QChar::Category QChar::category() const
729 return (QChar::Category) qGetProp(ucs)->category;
734 Returns the category of the UCS-4-encoded character specified by \a ucs4.
736 QChar::Category QChar::category(uint ucs4)
738 if (ucs4 > UNICODE_LAST_CODEPOINT)
739 return QChar::Other_NotAssigned;
740 return (QChar::Category) qGetProp(ucs4)->category;
745 Returns the category of the UCS-2-encoded character specified by \a ucs2.
747 QChar::Category QChar::category(ushort ucs2)
749 return (QChar::Category) qGetProp(ucs2)->category;
754 Returns the character's direction.
756 QChar::Direction QChar::direction() const
758 return (QChar::Direction) qGetProp(ucs)->direction;
763 Returns the direction of the UCS-4-encoded character specified by \a ucs4.
765 QChar::Direction QChar::direction(uint ucs4)
767 if (ucs4 > UNICODE_LAST_CODEPOINT)
769 return (QChar::Direction) qGetProp(ucs4)->direction;
774 Returns the direction of the UCS-2-encoded character specified by \a ucs2.
776 QChar::Direction QChar::direction(ushort ucs2)
778 return (QChar::Direction) qGetProp(ucs2)->direction;
782 Returns information about the joining properties of the character
783 (needed for certain languages such as Arabic).
785 QChar::Joining QChar::joining() const
787 return (QChar::Joining) qGetProp(ucs)->joining;
792 Returns information about the joining properties of the UCS-4-encoded
793 character specified by \a ucs4 (needed for certain languages such as Arabic).
795 QChar::Joining QChar::joining(uint ucs4)
797 if (ucs4 > UNICODE_LAST_CODEPOINT)
798 return QChar::OtherJoining;
799 return (QChar::Joining) qGetProp(ucs4)->joining;
804 Returns information about the joining properties of the UCS-2-encoded
805 character specified by \a ucs2 (needed for certain languages such as Arabic).
807 QChar::Joining QChar::joining(ushort ucs2)
809 return (QChar::Joining) qGetProp(ucs2)->joining;
813 Returns true if the character should be reversed if the text
814 direction is reversed; otherwise returns false.
816 Same as (ch.mirroredChar() != ch).
820 bool QChar::hasMirrored() const
822 return qGetProp(ucs)->mirrorDiff != 0;
826 \fn bool QChar::isLower() const
828 Returns true if the character is a lowercase letter, i.e.
829 category() is Letter_Lowercase.
831 \sa isUpper(), toLower(), toUpper()
835 \fn bool QChar::isUpper() const
837 Returns true if the character is an uppercase letter, i.e.
838 category() is Letter_Uppercase.
840 \sa isLower(), toUpper(), toLower()
844 \fn bool QChar::isTitleCase() const
846 Returns true if the character is a titlecase letter, i.e.
847 category() is Letter_Titlecase.
849 \sa isLower(), toUpper(), toLower(), toTitleCase()
853 Returns the mirrored character if this character is a mirrored
854 character; otherwise returns the character itself.
858 QChar QChar::mirroredChar() const
860 return ucs + qGetProp(ucs)->mirrorDiff;
865 Returns the mirrored character if the UCS-4-encoded character specified
866 by \a ucs4 is a mirrored character; otherwise returns the character itself.
870 uint QChar::mirroredChar(uint ucs4)
872 if (ucs4 > UNICODE_LAST_CODEPOINT)
874 return ucs4 + qGetProp(ucs4)->mirrorDiff;
879 Returns the mirrored character if the UCS-2-encoded character specified
880 by \a ucs2 is a mirrored character; otherwise returns the character itself.
884 ushort QChar::mirroredChar(ushort ucs2)
886 return ucs2 + qGetProp(ucs2)->mirrorDiff;
891 Hangul_SBase = 0xac00,
892 Hangul_LBase = 0x1100,
893 Hangul_VBase = 0x1161,
894 Hangul_TBase = 0x11a7,
895 Hangul_SCount = 11172,
899 Hangul_NCount = 21*28
902 // buffer has to have a length of 3. It's needed for Hangul decomposition
903 static const unsigned short * QT_FASTCALL decompositionHelper
904 (uint ucs4, int *length, int *tag, unsigned short *buffer)
907 if (ucs4 > UNICODE_LAST_CODEPOINT)
909 if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) {
910 int SIndex = ucs4 - Hangul_SBase;
911 buffer[0] = Hangul_LBase + SIndex / Hangul_NCount; // L
912 buffer[1] = Hangul_VBase + (SIndex % Hangul_NCount) / Hangul_TCount; // V
913 buffer[2] = Hangul_TBase + SIndex % Hangul_TCount; // T
914 *length = buffer[2] == Hangul_TBase ? 2 : 3;
915 *tag = QChar::Canonical;
919 const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
922 const unsigned short *decomposition = uc_decomposition_map+index;
923 *tag = (*decomposition) & 0xff;
924 *length = (*decomposition) >> 8;
925 return decomposition+1;
929 Decomposes a character into it's constituent parts. Returns an empty string
930 if no decomposition exists.
932 QString QChar::decomposition() const
934 return decomposition(ucs);
939 Decomposes the UCS-4-encoded character specified by \a ucs4 into it's
940 constituent parts. Returns an empty string if no decomposition exists.
942 QString QChar::decomposition(uint ucs4)
944 unsigned short buffer[3];
947 const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
948 return QString::fromUtf16(d, length);
952 Returns the tag defining the composition of the character. Returns
953 QChar::NoDecomposition if no decomposition exists.
955 QChar::Decomposition QChar::decompositionTag() const
957 return decompositionTag(ucs);
962 Returns the tag defining the composition of the UCS-4-encoded character
963 specified by \a ucs4. Returns QChar::NoDecomposition if no decomposition exists.
965 QChar::Decomposition QChar::decompositionTag(uint ucs4)
967 if (ucs4 > UNICODE_LAST_CODEPOINT)
968 return QChar::NoDecomposition;
969 const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
971 return QChar::NoDecomposition;
972 return (QChar::Decomposition)(uc_decomposition_map[index] & 0xff);
976 Returns the combining class for the character as defined in the
977 Unicode standard. This is mainly useful as a positioning hint for
978 marks attached to a base character.
980 The Qt text rendering engine uses this information to correctly
981 position non-spacing marks around a base character.
983 unsigned char QChar::combiningClass() const
985 return (unsigned char) qGetProp(ucs)->combiningClass;
990 Returns the combining class for the UCS-4-encoded character specified by
991 \a ucs4, as defined in the Unicode standard.
993 unsigned char QChar::combiningClass(uint ucs4)
995 if (ucs4 > UNICODE_LAST_CODEPOINT)
997 return (unsigned char) qGetProp(ucs4)->combiningClass;
1002 Returns the combining class for the UCS-2-encoded character specified by
1003 \a ucs2, as defined in the Unicode standard.
1005 unsigned char QChar::combiningClass(ushort ucs2)
1007 return (unsigned char) qGetProp(ucs2)->combiningClass;
1011 Returns the Unicode version that introduced this character.
1013 QChar::UnicodeVersion QChar::unicodeVersion() const
1015 return (QChar::UnicodeVersion) qGetProp(ucs)->unicodeVersion;
1020 Returns the Unicode version that introduced the character specified in
1021 its UCS-4-encoded form as \a ucs4.
1023 QChar::UnicodeVersion QChar::unicodeVersion(uint ucs4)
1025 if (ucs4 > UNICODE_LAST_CODEPOINT)
1026 return QChar::Unicode_Unassigned;
1027 return (QChar::UnicodeVersion) qGetProp(ucs4)->unicodeVersion;
1032 Returns the Unicode version that introduced the character specified in
1033 its UCS-2-encoded form as \a ucs2.
1035 QChar::UnicodeVersion QChar::unicodeVersion(ushort ucs2)
1037 return (QChar::UnicodeVersion) qGetProp(ucs2)->unicodeVersion;
1041 Returns the most recent supported Unicode version.
1043 QChar::UnicodeVersion QChar::currentUnicodeVersion()
1045 return UNICODE_DATA_VERSION;
1049 Returns the lowercase equivalent if the character is uppercase or titlecase;
1050 otherwise returns the character itself.
1052 QChar QChar::toLower() const
1054 const QUnicodeTables::Properties *p = qGetProp(ucs);
1055 if (!p->lowerCaseSpecial)
1056 return ucs + p->lowerCaseDiff;
1062 Returns the lowercase equivalent of the UCS-4-encoded character specified
1063 by \a ucs4 if the character is uppercase or titlecase; otherwise returns
1064 the character itself.
1066 uint QChar::toLower(uint ucs4)
1068 if (ucs4 > UNICODE_LAST_CODEPOINT)
1070 const QUnicodeTables::Properties *p = qGetProp(ucs4);
1071 if (!p->lowerCaseSpecial)
1072 return ucs4 + p->lowerCaseDiff;
1078 Returns the lowercase equivalent of the UCS-2-encoded character specified
1079 by \a ucs2 if the character is uppercase or titlecase; otherwise returns
1080 the character itself.
1082 ushort QChar::toLower(ushort ucs2)
1084 const QUnicodeTables::Properties *p = qGetProp(ucs2);
1085 if (!p->lowerCaseSpecial)
1086 return ucs2 + p->lowerCaseDiff;
1091 Returns the uppercase equivalent if the character is lowercase or titlecase;
1092 otherwise returns the character itself.
1094 QChar QChar::toUpper() const
1096 const QUnicodeTables::Properties *p = qGetProp(ucs);
1097 if (!p->upperCaseSpecial)
1098 return ucs + p->upperCaseDiff;
1104 Returns the uppercase equivalent of the UCS-4-encoded character specified
1105 by \a ucs4 if the character is lowercase or titlecase; otherwise returns
1106 the character itself.
1108 uint QChar::toUpper(uint ucs4)
1110 if (ucs4 > UNICODE_LAST_CODEPOINT)
1112 const QUnicodeTables::Properties *p = qGetProp(ucs4);
1113 if (!p->upperCaseSpecial)
1114 return ucs4 + p->upperCaseDiff;
1120 Returns the uppercase equivalent of the UCS-2-encoded character specified
1121 by \a ucs2 if the character is lowercase or titlecase; otherwise returns
1122 the character itself.
1124 ushort QChar::toUpper(ushort ucs2)
1126 const QUnicodeTables::Properties *p = qGetProp(ucs2);
1127 if (!p->upperCaseSpecial)
1128 return ucs2 + p->upperCaseDiff;
1133 Returns the title case equivalent if the character is lowercase or uppercase;
1134 otherwise returns the character itself.
1136 QChar QChar::toTitleCase() const
1138 const QUnicodeTables::Properties *p = qGetProp(ucs);
1139 if (!p->titleCaseSpecial)
1140 return ucs + p->titleCaseDiff;
1146 Returns the title case equivalent of the UCS-4-encoded character specified
1147 by \a ucs4 if the character is lowercase or uppercase; otherwise returns
1148 the character itself.
1150 uint QChar::toTitleCase(uint ucs4)
1152 if (ucs4 > UNICODE_LAST_CODEPOINT)
1154 const QUnicodeTables::Properties *p = qGetProp(ucs4);
1155 if (!p->titleCaseSpecial)
1156 return ucs4 + p->titleCaseDiff;
1162 Returns the title case equivalent of the UCS-2-encoded character specified
1163 by \a ucs2 if the character is lowercase or uppercase; otherwise returns
1164 the character itself.
1166 ushort QChar::toTitleCase(ushort ucs2)
1168 const QUnicodeTables::Properties *p = qGetProp(ucs2);
1169 if (!p->titleCaseSpecial)
1170 return ucs2 + p->titleCaseDiff;
1175 static inline uint foldCase(const ushort *ch, const ushort *start)
1178 if (QChar(c).isLowSurrogate() && ch > start && QChar(*(ch - 1)).isHighSurrogate())
1179 c = QChar::surrogateToUcs4(*(ch - 1), c);
1180 return *ch + qGetProp(c)->caseFoldDiff;
1183 static inline uint foldCase(uint ch, uint &last)
1186 if (QChar(c).isLowSurrogate() && QChar(last).isHighSurrogate())
1187 c = QChar::surrogateToUcs4(last, c);
1189 return ch + qGetProp(c)->caseFoldDiff;
1192 static inline ushort foldCase(ushort ch)
1194 return ch + qGetProp(ch)->caseFoldDiff;
1198 Returns the case folded equivalent of the character. For most Unicode characters this
1199 is the same as toLowerCase().
1201 QChar QChar::toCaseFolded() const
1203 return ucs + qGetProp(ucs)->caseFoldDiff;
1208 Returns the case folded equivalent of the UCS-4-encoded character specified
1209 by \a ucs4. For most Unicode characters this is the same as toLowerCase().
1211 uint QChar::toCaseFolded(uint ucs4)
1213 if (ucs4 > UNICODE_LAST_CODEPOINT)
1215 return ucs4 + qGetProp(ucs4)->caseFoldDiff;
1220 Returns the case folded equivalent of the UCS-2-encoded character specified
1221 by \a ucs2. For most Unicode characters this is the same as toLowerCase().
1223 ushort QChar::toCaseFolded(ushort ucs2)
1225 return ucs2 + qGetProp(ucs2)->caseFoldDiff;
1229 \fn char QChar::toLatin1() const
1231 Returns the Latin-1 character equivalent to the QChar, or 0. This
1232 is mainly useful for non-internationalized software.
1234 \sa toAscii(), unicode()
1238 \fn char QChar::toAscii() const
1240 Returns the Latin-1 character value of the QChar, or 0 if the character is not
1243 The main purpose of this function is to preserve ASCII characters used
1244 in C strings. This is mainly useful for developers of non-internationalized
1247 \note It is not possible to distinguish a non-Latin 1 character from an ASCII 0
1248 (NUL) character. Prefer to use unicode(), which does not have this ambiguity.
1250 \sa toLatin1(), unicode()
1254 \fn QChar QChar::fromAscii(char)
1256 Converts the ASCII character \a c to it's equivalent QChar. This
1257 is mainly useful for non-internationalized software.
1259 An alternative is to use QLatin1Char.
1261 \sa fromLatin1(), unicode()
1264 #ifndef QT_NO_DATASTREAM
1268 Writes the char \a chr to the stream \a out.
1270 \sa {Serializing Qt Data Types}
1272 QDataStream &operator<<(QDataStream &out, QChar chr)
1274 out << quint16(chr.unicode());
1281 Reads a char from the stream \a in into char \a chr.
1283 \sa {Serializing Qt Data Types}
1285 QDataStream &operator>>(QDataStream &in, QChar &chr)
1289 chr.unicode() = ushort(u);
1292 #endif // QT_NO_DATASTREAM
1295 \fn ushort & QChar::unicode()
1297 Returns a reference to the numeric Unicode value of the QChar.
1301 \fn ushort QChar::unicode() const
1306 /*****************************************************************************
1307 Documentation of QChar related functions
1308 *****************************************************************************/
1311 \fn bool operator==(QChar c1, QChar c2)
1315 Returns true if \a c1 and \a c2 are the same Unicode character;
1316 otherwise returns false.
1320 \fn int operator!=(QChar c1, QChar c2)
1324 Returns true if \a c1 and \a c2 are not the same Unicode
1325 character; otherwise returns false.
1329 \fn int operator<=(QChar c1, QChar c2)
1333 Returns true if the numeric Unicode value of \a c1 is less than
1334 or equal to that of \a c2; otherwise returns false.
1338 \fn int operator>=(QChar c1, QChar c2)
1342 Returns true if the numeric Unicode value of \a c1 is greater than
1343 or equal to that of \a c2; otherwise returns false.
1347 \fn int operator<(QChar c1, QChar c2)
1351 Returns true if the numeric Unicode value of \a c1 is less than
1352 that of \a c2; otherwise returns false.
1356 \fn int operator>(QChar c1, QChar c2)
1360 Returns true if the numeric Unicode value of \a c1 is greater than
1361 that of \a c2; otherwise returns false.
1365 // ---------------------------------------------------------------------------
1368 static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion version, int from)
1370 unsigned short buffer[3];
1374 const unsigned short *utf16 = reinterpret_cast<unsigned short *>(s.data());
1375 const unsigned short *uc = utf16 + s.length();
1376 while (uc != utf16 + from) {
1377 uint ucs4 = *(--uc);
1378 if (QChar(ucs4).isLowSurrogate() && uc != utf16) {
1379 ushort high = *(uc - 1);
1380 if (QChar(high).isHighSurrogate()) {
1382 ucs4 = QChar::surrogateToUcs4(high, ucs4);
1385 const QChar::UnicodeVersion v = QChar::unicodeVersion(ucs4);
1386 if (v > version || v == QChar::Unicode_Unassigned)
1390 const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
1391 if (!d || (canonical && tag != QChar::Canonical))
1394 int pos = uc - utf16;
1395 s.replace(pos, QChar::requiresSurrogates(ucs4) ? 2 : 1, reinterpret_cast<const QChar *>(d), length);
1396 // since the insert invalidates the pointers and we do decomposition recursive
1397 utf16 = reinterpret_cast<unsigned short *>(s.data());
1398 uc = utf16 + pos + length;
1408 inline bool operator<(ushort u1, const UCS2Pair &ligature)
1409 { return u1 < ligature.u1; }
1410 inline bool operator<(const UCS2Pair &ligature, ushort u1)
1411 { return ligature.u1 < u1; }
1413 static ushort ligatureHelper(ushort u1, ushort u2)
1416 int LIndex = u1 - Hangul_LBase;
1417 if (0 <= LIndex && LIndex < Hangul_LCount) {
1418 int VIndex = u2 - Hangul_VBase;
1419 if (0 <= VIndex && VIndex < Hangul_VCount)
1420 return Hangul_SBase + (LIndex * Hangul_VCount + VIndex) * Hangul_TCount;
1424 int SIndex = u1 - Hangul_SBase;
1425 if (0 <= SIndex && SIndex < Hangul_SCount && (SIndex % Hangul_TCount) == 0) {
1426 int TIndex = u2 - Hangul_TBase;
1427 if (0 <= TIndex && TIndex <= Hangul_TCount)
1431 const unsigned short index = GET_LIGATURE_INDEX(u2);
1432 if (index == 0xffff)
1434 const unsigned short *ligatures = uc_ligature_map+index;
1435 ushort length = *ligatures++;
1437 const UCS2Pair *data = reinterpret_cast<const UCS2Pair *>(ligatures);
1438 const UCS2Pair *r = qBinaryFind(data, data + length, u1);
1439 if (r != data + length)
1446 static void composeHelper(QString *str, QChar::UnicodeVersion version, int from)
1450 if (s.length() - from < 2)
1453 // the loop can partly ignore high Unicode as all ligatures are in the BMP
1455 int lastCombining = 0;
1457 while (pos < s.length()) {
1458 uint uc = s.at(pos).unicode();
1459 if (QChar(uc).isHighSurrogate() && pos < s.length()-1) {
1460 ushort low = s.at(pos+1).unicode();
1461 if (QChar(low).isLowSurrogate()) {
1462 uc = QChar::surrogateToUcs4(uc, low);
1466 const QUnicodeTables::Properties *p = qGetProp(uc);
1467 if (p->unicodeVersion > version || p->unicodeVersion == QChar::Unicode_Unassigned) {
1468 starter = -1; // to prevent starter == pos - 1
1473 int combining = p->combiningClass;
1474 if (starter == pos - 1 || combining > lastCombining) {
1475 // allowed to form ligature with S
1476 QChar ligature = ligatureHelper(s.at(starter).unicode(), uc);
1477 if (ligature.unicode()) {
1478 s[starter] = ligature;
1485 lastCombining = combining;
1491 static void canonicalOrderHelper(QString *str, QChar::UnicodeVersion version, int from)
1494 const int l = s.length()-1;
1498 uint u1 = s.at(pos).unicode();
1499 if (QChar(u1).isHighSurrogate()) {
1500 ushort low = s.at(p2).unicode();
1501 if (QChar(low).isLowSurrogate()) {
1502 u1 = QChar::surrogateToUcs4(u1, low);
1508 uint u2 = s.at(p2).unicode();
1509 if (QChar(u2).isHighSurrogate() && p2 < l) {
1510 ushort low = s.at(p2+1).unicode();
1511 if (QChar(low).isLowSurrogate()) {
1512 u2 = QChar::surrogateToUcs4(u2, low);
1519 const QUnicodeTables::Properties *p = qGetProp(u2);
1520 if (p->unicodeVersion <= version && p->unicodeVersion != QChar::Unicode_Unassigned)
1521 c2 = p->combiningClass;
1530 const QUnicodeTables::Properties *p = qGetProp(u1);
1531 if (p->unicodeVersion <= version && p->unicodeVersion != QChar::Unicode_Unassigned)
1532 c1 = p->combiningClass;
1536 QChar *uc = s.data();
1538 // exchange characters
1539 if (!QChar::requiresSurrogates(u2)) {
1542 uc[p++] = QChar::highSurrogate(u2);
1543 uc[p++] = QChar::lowSurrogate(u2);
1545 if (!QChar::requiresSurrogates(u1)) {
1548 uc[p++] = QChar::highSurrogate(u1);
1549 uc[p++] = QChar::lowSurrogate(u1);
1553 if (pos > 0 && s.at(pos).isLowSurrogate())
1557 if (QChar::requiresSurrogates(u1))