1 /****************************************************************************
3 ** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
4 ** Contact: http://www.qt-project.org/
6 ** This file is part of the QtCore module of the Qt Toolkit.
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** GNU Lesser General Public License Usage
10 ** This file may be used under the terms of the GNU Lesser General Public
11 ** License version 2.1 as published by the Free Software Foundation and
12 ** appearing in the file LICENSE.LGPL included in the packaging of this
13 ** file. Please review the following information to ensure the GNU Lesser
14 ** General Public License version 2.1 requirements will be met:
15 ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
17 ** In addition, as a special exception, Nokia gives you certain additional
18 ** rights. These rights are described in the Nokia Qt LGPL Exception
19 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
21 ** GNU General Public License Usage
22 ** Alternatively, this file may be used under the terms of the GNU General
23 ** Public License version 3.0 as published by the Free Software Foundation
24 ** and appearing in the file LICENSE.GPL included in the packaging of this
25 ** file. Please review the following information to ensure the GNU General
26 ** Public License version 3.0 requirements will be met:
27 ** http://www.gnu.org/copyleft/gpl.html.
30 ** Alternatively, this file may be used in accordance with the terms and
31 ** conditions contained in a signed written agreement between you and Nokia.
40 ****************************************************************************/
42 // Don't define it while compiling this module, or USERS of Qt will
43 // not be able to link.
44 #ifdef QT_NO_CAST_FROM_ASCII
45 # undef QT_NO_CAST_FROM_ASCII
47 #ifdef QT_NO_CAST_TO_ASCII
48 # undef QT_NO_CAST_TO_ASCII
52 #include "qdatastream.h"
53 #include "qtextcodec.h"
55 #include "qunicodetables_p.h"
56 #include "qunicodetables.cpp"
60 #ifndef QT_NO_CODEC_FOR_C_STRINGS
61 # ifdef QT_NO_TEXTCODEC
62 # define QT_NO_CODEC_FOR_C_STRINGS
66 #define FLAG(x) (1 << (x))
70 \brief The QLatin1Char class provides an 8-bit ASCII/Latin-1 character.
72 \ingroup string-processing
74 This class is only useful to avoid the codec for C strings business
75 in the QChar(ch) constructor. You can avoid it by writing QChar(ch, 0).
77 \sa QChar, QLatin1String, QString
81 \fn const char QLatin1Char::toLatin1() const
83 Converts a Latin-1 character to an 8-bit ASCII representation of the character.
87 \fn const ushort QLatin1Char::unicode() const
89 Converts a Latin-1 character to an 16-bit-encoded Unicode representation
94 \fn QLatin1Char::QLatin1Char(char c)
96 Constructs a Latin-1 character for \a c. This constructor should be
97 used when the encoding of the input character is known to be Latin-1.
102 \brief The QChar class provides a 16-bit Unicode character.
104 \ingroup string-processing
107 In Qt, Unicode characters are 16-bit entities without any markup
108 or structure. This class represents such an entity. It is
109 lightweight, so it can be used everywhere. Most compilers treat
110 it like a \c{unsigned short}.
112 QChar provides a full complement of testing/classification
113 functions, converting to and from other formats, converting from
114 composed to decomposed Unicode, and trying to compare and
115 case-convert if you ask it to.
117 The classification functions include functions like those in the
118 standard C++ header \<cctype\> (formerly \<ctype.h\>), but
119 operating on the full range of Unicode characters. They all
120 return true if the character is a certain type of character;
121 otherwise they return false. These classification functions are
122 isNull() (returns true if the character is '\\0'), isPrint()
123 (true if the character is any sort of printable character,
124 including whitespace), isPunct() (any sort of punctation),
125 isMark() (Unicode Mark), isLetter() (a letter), isNumber() (any
126 sort of numeric character, not just 0-9), isLetterOrNumber(), and
127 isDigit() (decimal digits). All of these are wrappers around
128 category() which return the Unicode-defined category of each
131 QChar also provides direction(), which indicates the "natural"
132 writing direction of this character. The joining() function
133 indicates how the character joins with it's neighbors (needed
134 mostly for Arabic) and finally hasMirrored(), which indicates
135 whether the character needs to be mirrored when it is printed in
136 it's "unnatural" writing direction.
138 Composed Unicode characters (like \a ring) can be converted to
139 decomposed Unicode ("a" followed by "ring above") by using decomposition().
141 In Unicode, comparison is not necessarily possible and case
142 conversion is very difficult at best. Unicode, covering the
143 "entire" world, also includes most of the world's case and
144 sorting problems. operator==() and friends will do comparison
145 based purely on the numeric Unicode value (code point) of the
146 characters, and toUpper() and toLower() will do case changes when
147 the character has a well-defined uppercase/lowercase equivalent.
148 For locale-dependent comparisons, use QString::localeAwareCompare().
150 The conversion functions include unicode() (to a scalar),
151 toLatin1() (to scalar, but converts all non-Latin-1 characters to
152 0), row() (gives the Unicode row), cell() (gives the Unicode
153 cell), digitValue() (gives the integer value of any of the
154 numerous digit characters), and a host of constructors.
156 QChar provides constructors and cast operators that make it easy
157 to convert to and from traditional 8-bit \c{char}s. If you
158 defined \c QT_NO_CAST_FROM_ASCII and \c QT_NO_CAST_TO_ASCII, as
159 explained in the QString documentation, you will need to
160 explicitly call fromAscii() or fromLatin1(), or use QLatin1Char,
161 to construct a QChar from an 8-bit \c char, and you will need to
162 call toAscii() or toLatin1() to get the 8-bit value back.
164 \sa Unicode, QString, QLatin1Char
168 \enum QChar::UnicodeVersion
170 Specifies which version of the \l{http://www.unicode.org/}{Unicode standard}
171 introduced a certain character.
173 \value Unicode_1_1 Version 1.1
174 \value Unicode_2_0 Version 2.0
175 \value Unicode_2_1_2 Version 2.1.2
176 \value Unicode_3_0 Version 3.0
177 \value Unicode_3_1 Version 3.1
178 \value Unicode_3_2 Version 3.2
179 \value Unicode_4_0 Version 4.0
180 \value Unicode_4_1 Version 4.1
181 \value Unicode_5_0 Version 5.0
182 \value Unicode_Unassigned The value is not assigned to any character
183 in version 5.0 of Unicode.
185 \sa unicodeVersion(), currentUnicodeVersion()
189 \enum QChar::Category
191 This enum maps the Unicode character categories.
193 The following characters are normative in Unicode:
195 \value Mark_NonSpacing Unicode class name Mn
197 \value Mark_SpacingCombining Unicode class name Mc
199 \value Mark_Enclosing Unicode class name Me
201 \value Number_DecimalDigit Unicode class name Nd
203 \value Number_Letter Unicode class name Nl
205 \value Number_Other Unicode class name No
207 \value Separator_Space Unicode class name Zs
209 \value Separator_Line Unicode class name Zl
211 \value Separator_Paragraph Unicode class name Zp
213 \value Other_Control Unicode class name Cc
215 \value Other_Format Unicode class name Cf
217 \value Other_Surrogate Unicode class name Cs
219 \value Other_PrivateUse Unicode class name Co
221 \value Other_NotAssigned Unicode class name Cn
224 The following categories are informative in Unicode:
226 \value Letter_Uppercase Unicode class name Lu
228 \value Letter_Lowercase Unicode class name Ll
230 \value Letter_Titlecase Unicode class name Lt
232 \value Letter_Modifier Unicode class name Lm
234 \value Letter_Other Unicode class name Lo
236 \value Punctuation_Connector Unicode class name Pc
238 \value Punctuation_Dash Unicode class name Pd
240 \value Punctuation_Open Unicode class name Ps
242 \value Punctuation_Close Unicode class name Pe
244 \value Punctuation_InitialQuote Unicode class name Pi
246 \value Punctuation_FinalQuote Unicode class name Pf
248 \value Punctuation_Other Unicode class name Po
250 \value Symbol_Math Unicode class name Sm
252 \value Symbol_Currency Unicode class name Sc
254 \value Symbol_Modifier Unicode class name Sk
256 \value Symbol_Other Unicode class name So
262 \enum QChar::Direction
264 This enum type defines the Unicode direction attributes. See the
265 \l{http://www.unicode.org/}{Unicode Standard} for a description
268 In order to conform to C/C++ naming conventions "Dir" is prepended
269 to the codes used in the Unicode Standard.
295 \enum QChar::Decomposition
297 This enum type defines the Unicode decomposition attributes. See
298 the \l{http://www.unicode.org/}{Unicode Standard} for a
299 description of the values.
301 \value NoDecomposition
326 This enum type defines the Unicode joining attributes. See the
327 \l{http://www.unicode.org/}{Unicode Standard} for a description
339 \enum QChar::CombiningClass
343 This enum type defines names for some of the Unicode combining
344 classes. See the \l{http://www.unicode.org/}{Unicode Standard}
345 for a description of the values.
347 \value Combining_Above
348 \value Combining_AboveAttached
349 \value Combining_AboveLeft
350 \value Combining_AboveLeftAttached
351 \value Combining_AboveRight
352 \value Combining_AboveRightAttached
353 \value Combining_Below
354 \value Combining_BelowAttached
355 \value Combining_BelowLeft
356 \value Combining_BelowLeftAttached
357 \value Combining_BelowRight
358 \value Combining_BelowRightAttached
359 \value Combining_DoubleAbove
360 \value Combining_DoubleBelow
361 \value Combining_IotaSubscript
362 \value Combining_Left
363 \value Combining_LeftAttached
364 \value Combining_Right
365 \value Combining_RightAttached
369 \enum QChar::SpecialCharacter
371 \value Null A QChar with this value isNull().
372 \value Nbsp Non-breaking space.
373 \value ReplacementCharacter The character shown when a font has no glyph
374 for a certain codepoint. A special question mark character is often
375 used. Codecs use this codepoint when input data cannot be
376 represented in Unicode.
377 \value ObjectReplacementCharacter Used to represent an object such as an
378 image when such objects cannot be presented.
380 \value ByteOrderSwapped
381 \value ParagraphSeparator
386 \fn void QChar::setCell(uchar cell)
391 \fn void QChar::setRow(uchar row)
398 Constructs a null QChar ('\\0').
404 \fn QChar::QChar(QLatin1Char ch)
406 Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
410 \fn QChar::QChar(SpecialCharacter ch)
412 Constructs a QChar for the predefined character value \a ch.
416 \fn QChar::QChar(char ch)
418 Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
422 \fn QChar::QChar(uchar ch)
424 Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
428 \fn QChar::QChar(uchar cell, uchar row)
430 Constructs a QChar for Unicode cell \a cell in row \a row.
436 \fn QChar::QChar(ushort code)
438 Constructs a QChar for the character with Unicode code point \a code.
442 \fn QChar::QChar(short code)
444 Constructs a QChar for the character with Unicode code point \a code.
448 \fn QChar::QChar(uint code)
450 Constructs a QChar for the character with Unicode code point \a code.
454 \fn QChar::QChar(int code)
456 Constructs a QChar for the character with Unicode code point \a code.
460 \fn bool QChar::isNull() const
462 Returns true if the character is the Unicode character 0x0000
463 ('\\0'); otherwise returns false.
467 \fn uchar QChar::cell() const
469 Returns the cell (least significant byte) of the Unicode character.
475 \fn uchar QChar::row() const
477 Returns the row (most significant byte) of the Unicode character.
483 Returns true if the character is a printable character; otherwise
484 returns false. This is any character not of category Cc or Cn.
486 Note that this gives no indication of whether the character is
487 available in a particular font.
489 bool QChar::isPrint() const
491 const int test = FLAG(Other_Control) |
492 FLAG(Other_NotAssigned);
493 return !(FLAG(qGetProp(ucs)->category) & test);
497 \fn bool QChar::isSpace() const
499 Returns true if the character is a separator character
500 (Separator_* categories or certain code points from Other_Control category);
501 otherwise returns false.
508 bool QChar::isSpace(ushort ucs2)
510 const int test = FLAG(Separator_Space) |
511 FLAG(Separator_Line) |
512 FLAG(Separator_Paragraph);
513 return FLAG(qGetProp(ucs2)->category) & test;
517 Returns true if the character is a mark (Mark_* categories);
518 otherwise returns false.
520 See QChar::Category for more information regarding marks.
522 bool QChar::isMark() const
524 const int test = FLAG(Mark_NonSpacing) |
525 FLAG(Mark_SpacingCombining) |
526 FLAG(Mark_Enclosing);
527 return FLAG(qGetProp(ucs)->category) & test;
531 Returns true if the character is a punctuation mark (Punctuation_*
532 categories); otherwise returns false.
534 bool QChar::isPunct() const
536 const int test = FLAG(Punctuation_Connector) |
537 FLAG(Punctuation_Dash) |
538 FLAG(Punctuation_Open) |
539 FLAG(Punctuation_Close) |
540 FLAG(Punctuation_InitialQuote) |
541 FLAG(Punctuation_FinalQuote) |
542 FLAG(Punctuation_Other);
543 return FLAG(qGetProp(ucs)->category) & test;
547 \fn bool QChar::isLetter() const
549 Returns true if the character is a letter (Letter_* categories);
550 otherwise returns false.
557 bool QChar::isLetter(ushort ucs2)
559 const int test = FLAG(Letter_Uppercase) |
560 FLAG(Letter_Lowercase) |
561 FLAG(Letter_Titlecase) |
562 FLAG(Letter_Modifier) |
564 return FLAG(qGetProp(ucs2)->category) & test;
568 Returns true if the character is a number (Number_* categories,
569 not just 0-9); otherwise returns false.
573 bool QChar::isNumber() const
575 const int test = FLAG(Number_DecimalDigit) |
576 FLAG(Number_Letter) |
578 return FLAG(qGetProp(ucs)->category) & test;
582 \fn bool QChar::isLetterOrNumber() const
584 Returns true if the character is a letter or number (Letter_* or
585 Number_* categories); otherwise returns false.
592 bool QChar::isLetterOrNumber(ushort ucs2)
594 const int test = FLAG(Letter_Uppercase) |
595 FLAG(Letter_Lowercase) |
596 FLAG(Letter_Titlecase) |
597 FLAG(Letter_Modifier) |
599 FLAG(Number_DecimalDigit) |
600 FLAG(Number_Letter) |
602 return FLAG(qGetProp(ucs2)->category) & test;
606 \fn bool QChar::isDigit() const
608 Returns true if the character is a decimal digit
609 (Number_DecimalDigit); otherwise returns false.
616 bool QChar::isDigit(ushort ucs2)
618 return (qGetProp(ucs2)->category == Number_DecimalDigit);
622 Returns true if the character is a symbol (Symbol_* categories);
623 otherwise returns false.
625 bool QChar::isSymbol() const
627 const int test = FLAG(Symbol_Math) |
628 FLAG(Symbol_Currency) |
629 FLAG(Symbol_Modifier) |
631 return FLAG(qGetProp(ucs)->category) & test;
635 \fn bool QChar::isHighSurrogate() const
637 Returns true if the QChar is the high part of a UTF16 surrogate
638 (i.e. if it's code point in range [0xd800..0xdbff]).
642 \fn bool QChar::isLowSurrogate() const
644 Returns true if the QChar is the low part of a UTF16 surrogate
645 (i.e. if it's code point in range [0xdc00..0xdfff]).
649 \fn static bool QChar::isHighSurrogate(uint ucs4)
652 Returns true if the UCS-4-encoded character specified by \a ucs4
653 is the high part of a UTF16 surrogate
654 (i.e. if it's code point in range [0xd800..0xdbff]).
658 \fn static bool QChar::isLowSurrogate(uint ucs4)
661 Returns true if the UCS-4-encoded character specified by \a ucs4
662 is the low part of a UTF16 surrogate
663 (i.e. if it's code point in range [0xdc00..0xdfff]).
667 \fn static bool QChar::requiresSurrogates(uint ucs4)
669 Returns true if the UCS-4-encoded character specified by \a ucs4
670 can be split into the high and low parts of a UTF16 surrogate
671 (i.e. if it's code point is greater than or equals to 0x10000).
675 \fn static uint QChar::surrogateToUcs4(ushort high, ushort low)
677 Converts a UTF16 surrogate pair with the given \a high and \a low values
678 to it's UCS-4-encoded code point.
682 \fn static uint QChar::surrogateToUcs4(QChar high, QChar low)
685 Converts a UTF16 surrogate pair (\a high, \a low) to it's UCS-4-encoded code point.
689 \fn static ushort QChar::highSurrogate(uint ucs4)
691 Returns the high surrogate part of a UCS-4-encoded code point.
692 The returned result is undefined if \a ucs4 is smaller than 0x10000.
696 \fn static ushort QChar::lowSurrogate(uint ucs4)
698 Returns the low surrogate part of a UCS-4-encoded code point.
699 The returned result is undefined if \a ucs4 is smaller than 0x10000.
703 Returns the numeric value of the digit, or -1 if the character is not a digit.
705 int QChar::digitValue() const
707 return qGetProp(ucs)->digitValue;
712 Returns the numeric value of the digit, specified by the UCS-2-encoded
713 character, \a ucs2, or -1 if the character is not a digit.
715 int QChar::digitValue(ushort ucs2)
717 return qGetProp(ucs2)->digitValue;
722 Returns the numeric value of the digit specified by the UCS-4-encoded
723 character, \a ucs4, or -1 if the character is not a digit.
725 int QChar::digitValue(uint ucs4)
727 if (ucs4 > UNICODE_LAST_CODEPOINT)
729 return qGetProp(ucs4)->digitValue;
733 Returns the character's category.
735 QChar::Category QChar::category() const
737 return (QChar::Category) qGetProp(ucs)->category;
742 Returns the category of the UCS-4-encoded character specified by \a ucs4.
744 QChar::Category QChar::category(uint ucs4)
746 if (ucs4 > UNICODE_LAST_CODEPOINT)
747 return QChar::Other_NotAssigned;
748 return (QChar::Category) qGetProp(ucs4)->category;
753 Returns the category of the UCS-2-encoded character specified by \a ucs2.
755 QChar::Category QChar::category(ushort ucs2)
757 return (QChar::Category) qGetProp(ucs2)->category;
762 Returns the character's direction.
764 QChar::Direction QChar::direction() const
766 return (QChar::Direction) qGetProp(ucs)->direction;
771 Returns the direction of the UCS-4-encoded character specified by \a ucs4.
773 QChar::Direction QChar::direction(uint ucs4)
775 if (ucs4 > UNICODE_LAST_CODEPOINT)
777 return (QChar::Direction) qGetProp(ucs4)->direction;
782 Returns the direction of the UCS-2-encoded character specified by \a ucs2.
784 QChar::Direction QChar::direction(ushort ucs2)
786 return (QChar::Direction) qGetProp(ucs2)->direction;
790 Returns information about the joining properties of the character
791 (needed for certain languages such as Arabic).
793 QChar::Joining QChar::joining() const
795 return (QChar::Joining) qGetProp(ucs)->joining;
800 Returns information about the joining properties of the UCS-4-encoded
801 character specified by \a ucs4 (needed for certain languages such as Arabic).
803 QChar::Joining QChar::joining(uint ucs4)
805 if (ucs4 > UNICODE_LAST_CODEPOINT)
806 return QChar::OtherJoining;
807 return (QChar::Joining) qGetProp(ucs4)->joining;
812 Returns information about the joining properties of the UCS-2-encoded
813 character specified by \a ucs2 (needed for certain languages such as Arabic).
815 QChar::Joining QChar::joining(ushort ucs2)
817 return (QChar::Joining) qGetProp(ucs2)->joining;
821 Returns true if the character should be reversed if the text
822 direction is reversed; otherwise returns false.
824 Same as (ch.mirroredChar() != ch).
828 bool QChar::hasMirrored() const
830 return qGetProp(ucs)->mirrorDiff != 0;
834 \fn bool QChar::isLower() const
836 Returns true if the character is a lowercase letter, i.e.
837 category() is Letter_Lowercase.
839 \sa isUpper(), toLower(), toUpper()
843 \fn bool QChar::isUpper() const
845 Returns true if the character is an uppercase letter, i.e.
846 category() is Letter_Uppercase.
848 \sa isLower(), toUpper(), toLower()
852 \fn bool QChar::isTitleCase() const
854 Returns true if the character is a titlecase letter, i.e.
855 category() is Letter_Titlecase.
857 \sa isLower(), toUpper(), toLower(), toTitleCase()
861 Returns the mirrored character if this character is a mirrored
862 character; otherwise returns the character itself.
866 QChar QChar::mirroredChar() const
868 return ucs + qGetProp(ucs)->mirrorDiff;
873 Returns the mirrored character if the UCS-4-encoded character specified
874 by \a ucs4 is a mirrored character; otherwise returns the character itself.
878 uint QChar::mirroredChar(uint ucs4)
880 if (ucs4 > UNICODE_LAST_CODEPOINT)
882 return ucs4 + qGetProp(ucs4)->mirrorDiff;
887 Returns the mirrored character if the UCS-2-encoded character specified
888 by \a ucs2 is a mirrored character; otherwise returns the character itself.
892 ushort QChar::mirroredChar(ushort ucs2)
894 return ucs2 + qGetProp(ucs2)->mirrorDiff;
899 Hangul_SBase = 0xac00,
900 Hangul_LBase = 0x1100,
901 Hangul_VBase = 0x1161,
902 Hangul_TBase = 0x11a7,
903 Hangul_SCount = 11172,
907 Hangul_NCount = 21*28
910 // buffer has to have a length of 3. It's needed for Hangul decomposition
911 static const unsigned short * QT_FASTCALL decompositionHelper
912 (uint ucs4, int *length, int *tag, unsigned short *buffer)
915 if (ucs4 > UNICODE_LAST_CODEPOINT)
917 if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) {
918 int SIndex = ucs4 - Hangul_SBase;
919 buffer[0] = Hangul_LBase + SIndex / Hangul_NCount; // L
920 buffer[1] = Hangul_VBase + (SIndex % Hangul_NCount) / Hangul_TCount; // V
921 buffer[2] = Hangul_TBase + SIndex % Hangul_TCount; // T
922 *length = buffer[2] == Hangul_TBase ? 2 : 3;
923 *tag = QChar::Canonical;
927 const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
930 const unsigned short *decomposition = uc_decomposition_map+index;
931 *tag = (*decomposition) & 0xff;
932 *length = (*decomposition) >> 8;
933 return decomposition+1;
937 Decomposes a character into it's constituent parts. Returns an empty string
938 if no decomposition exists.
940 QString QChar::decomposition() const
942 return decomposition(ucs);
947 Decomposes the UCS-4-encoded character specified by \a ucs4 into it's
948 constituent parts. Returns an empty string if no decomposition exists.
950 QString QChar::decomposition(uint ucs4)
952 unsigned short buffer[3];
955 const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
956 return QString::fromUtf16(d, length);
960 Returns the tag defining the composition of the character. Returns
961 QChar::NoDecomposition if no decomposition exists.
963 QChar::Decomposition QChar::decompositionTag() const
965 return decompositionTag(ucs);
970 Returns the tag defining the composition of the UCS-4-encoded character
971 specified by \a ucs4. Returns QChar::NoDecomposition if no decomposition exists.
973 QChar::Decomposition QChar::decompositionTag(uint ucs4)
975 if (ucs4 > UNICODE_LAST_CODEPOINT)
976 return QChar::NoDecomposition;
977 const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
979 return QChar::NoDecomposition;
980 return (QChar::Decomposition)(uc_decomposition_map[index] & 0xff);
984 Returns the combining class for the character as defined in the
985 Unicode standard. This is mainly useful as a positioning hint for
986 marks attached to a base character.
988 The Qt text rendering engine uses this information to correctly
989 position non-spacing marks around a base character.
991 unsigned char QChar::combiningClass() const
993 return (unsigned char) qGetProp(ucs)->combiningClass;
998 Returns the combining class for the UCS-4-encoded character specified by
999 \a ucs4, as defined in the Unicode standard.
1001 unsigned char QChar::combiningClass(uint ucs4)
1003 if (ucs4 > UNICODE_LAST_CODEPOINT)
1005 return (unsigned char) qGetProp(ucs4)->combiningClass;
1010 Returns the combining class for the UCS-2-encoded character specified by
1011 \a ucs2, as defined in the Unicode standard.
1013 unsigned char QChar::combiningClass(ushort ucs2)
1015 return (unsigned char) qGetProp(ucs2)->combiningClass;
1019 Returns the Unicode version that introduced this character.
1021 QChar::UnicodeVersion QChar::unicodeVersion() const
1023 return (QChar::UnicodeVersion) qGetProp(ucs)->unicodeVersion;
1028 Returns the Unicode version that introduced the character specified in
1029 its UCS-4-encoded form as \a ucs4.
1031 QChar::UnicodeVersion QChar::unicodeVersion(uint ucs4)
1033 if (ucs4 > UNICODE_LAST_CODEPOINT)
1034 return QChar::Unicode_Unassigned;
1035 return (QChar::UnicodeVersion) qGetProp(ucs4)->unicodeVersion;
1040 Returns the Unicode version that introduced the character specified in
1041 its UCS-2-encoded form as \a ucs2.
1043 QChar::UnicodeVersion QChar::unicodeVersion(ushort ucs2)
1045 return (QChar::UnicodeVersion) qGetProp(ucs2)->unicodeVersion;
1049 Returns the most recent supported Unicode version.
1051 QChar::UnicodeVersion QChar::currentUnicodeVersion()
1053 return UNICODE_DATA_VERSION;
1057 Returns the lowercase equivalent if the character is uppercase or titlecase;
1058 otherwise returns the character itself.
1060 QChar QChar::toLower() const
1062 const QUnicodeTables::Properties *p = qGetProp(ucs);
1063 if (!p->lowerCaseSpecial)
1064 return ucs + p->lowerCaseDiff;
1070 Returns the lowercase equivalent of the UCS-4-encoded character specified
1071 by \a ucs4 if the character is uppercase or titlecase; otherwise returns
1072 the character itself.
1074 uint QChar::toLower(uint ucs4)
1076 if (ucs4 > UNICODE_LAST_CODEPOINT)
1078 const QUnicodeTables::Properties *p = qGetProp(ucs4);
1079 if (!p->lowerCaseSpecial)
1080 return ucs4 + p->lowerCaseDiff;
1086 Returns the lowercase equivalent of the UCS-2-encoded character specified
1087 by \a ucs2 if the character is uppercase or titlecase; otherwise returns
1088 the character itself.
1090 ushort QChar::toLower(ushort ucs2)
1092 const QUnicodeTables::Properties *p = qGetProp(ucs2);
1093 if (!p->lowerCaseSpecial)
1094 return ucs2 + p->lowerCaseDiff;
1099 Returns the uppercase equivalent if the character is lowercase or titlecase;
1100 otherwise returns the character itself.
1102 QChar QChar::toUpper() const
1104 const QUnicodeTables::Properties *p = qGetProp(ucs);
1105 if (!p->upperCaseSpecial)
1106 return ucs + p->upperCaseDiff;
1112 Returns the uppercase equivalent of the UCS-4-encoded character specified
1113 by \a ucs4 if the character is lowercase or titlecase; otherwise returns
1114 the character itself.
1116 uint QChar::toUpper(uint ucs4)
1118 if (ucs4 > UNICODE_LAST_CODEPOINT)
1120 const QUnicodeTables::Properties *p = qGetProp(ucs4);
1121 if (!p->upperCaseSpecial)
1122 return ucs4 + p->upperCaseDiff;
1128 Returns the uppercase equivalent of the UCS-2-encoded character specified
1129 by \a ucs2 if the character is lowercase or titlecase; otherwise returns
1130 the character itself.
1132 ushort QChar::toUpper(ushort ucs2)
1134 const QUnicodeTables::Properties *p = qGetProp(ucs2);
1135 if (!p->upperCaseSpecial)
1136 return ucs2 + p->upperCaseDiff;
1141 Returns the title case equivalent if the character is lowercase or uppercase;
1142 otherwise returns the character itself.
1144 QChar QChar::toTitleCase() const
1146 const QUnicodeTables::Properties *p = qGetProp(ucs);
1147 if (!p->titleCaseSpecial)
1148 return ucs + p->titleCaseDiff;
1154 Returns the title case equivalent of the UCS-4-encoded character specified
1155 by \a ucs4 if the character is lowercase or uppercase; otherwise returns
1156 the character itself.
1158 uint QChar::toTitleCase(uint ucs4)
1160 if (ucs4 > UNICODE_LAST_CODEPOINT)
1162 const QUnicodeTables::Properties *p = qGetProp(ucs4);
1163 if (!p->titleCaseSpecial)
1164 return ucs4 + p->titleCaseDiff;
1170 Returns the title case equivalent of the UCS-2-encoded character specified
1171 by \a ucs2 if the character is lowercase or uppercase; otherwise returns
1172 the character itself.
1174 ushort QChar::toTitleCase(ushort ucs2)
1176 const QUnicodeTables::Properties *p = qGetProp(ucs2);
1177 if (!p->titleCaseSpecial)
1178 return ucs2 + p->titleCaseDiff;
1183 static inline uint foldCase(const ushort *ch, const ushort *start)
1186 if (QChar(c).isLowSurrogate() && ch > start && QChar(*(ch - 1)).isHighSurrogate())
1187 c = QChar::surrogateToUcs4(*(ch - 1), c);
1188 return *ch + qGetProp(c)->caseFoldDiff;
1191 static inline uint foldCase(uint ch, uint &last)
1194 if (QChar(c).isLowSurrogate() && QChar(last).isHighSurrogate())
1195 c = QChar::surrogateToUcs4(last, c);
1197 return ch + qGetProp(c)->caseFoldDiff;
1200 static inline ushort foldCase(ushort ch)
1202 return ch + qGetProp(ch)->caseFoldDiff;
1206 Returns the case folded equivalent of the character. For most Unicode characters this
1207 is the same as toLowerCase().
1209 QChar QChar::toCaseFolded() const
1211 return ucs + qGetProp(ucs)->caseFoldDiff;
1216 Returns the case folded equivalent of the UCS-4-encoded character specified
1217 by \a ucs4. For most Unicode characters this is the same as toLowerCase().
1219 uint QChar::toCaseFolded(uint ucs4)
1221 if (ucs4 > UNICODE_LAST_CODEPOINT)
1223 return ucs4 + qGetProp(ucs4)->caseFoldDiff;
1228 Returns the case folded equivalent of the UCS-2-encoded character specified
1229 by \a ucs2. For most Unicode characters this is the same as toLowerCase().
1231 ushort QChar::toCaseFolded(ushort ucs2)
1233 return ucs2 + qGetProp(ucs2)->caseFoldDiff;
1237 \fn char QChar::toLatin1() const
1239 Returns the Latin-1 character equivalent to the QChar, or 0. This
1240 is mainly useful for non-internationalized software.
1242 \sa toAscii(), unicode()
1246 \fn char QChar::toAscii() const
1248 Returns the Latin-1 character value of the QChar, or 0 if the character is not
1251 The main purpose of this function is to preserve ASCII characters used
1252 in C strings. This is mainly useful for developers of non-internationalized
1255 \note It is not possible to distinguish a non-Latin 1 character from an ASCII 0
1256 (NUL) character. Prefer to use unicode(), which does not have this ambiguity.
1258 \sa toLatin1(), unicode()
1262 \fn QChar QChar::fromAscii(char)
1264 Converts the ASCII character \a c to it's equivalent QChar. This
1265 is mainly useful for non-internationalized software.
1267 An alternative is to use QLatin1Char.
1269 \sa fromLatin1(), unicode()
1272 #ifndef QT_NO_DATASTREAM
1276 Writes the char \a chr to the stream \a out.
1278 \sa {Serializing Qt Data Types}
1280 QDataStream &operator<<(QDataStream &out, QChar chr)
1282 out << quint16(chr.unicode());
1289 Reads a char from the stream \a in into char \a chr.
1291 \sa {Serializing Qt Data Types}
1293 QDataStream &operator>>(QDataStream &in, QChar &chr)
1297 chr.unicode() = ushort(u);
1300 #endif // QT_NO_DATASTREAM
1303 \fn ushort & QChar::unicode()
1305 Returns a reference to the numeric Unicode value of the QChar.
1309 \fn ushort QChar::unicode() const
1314 /*****************************************************************************
1315 Documentation of QChar related functions
1316 *****************************************************************************/
1319 \fn bool operator==(QChar c1, QChar c2)
1323 Returns true if \a c1 and \a c2 are the same Unicode character;
1324 otherwise returns false.
1328 \fn int operator!=(QChar c1, QChar c2)
1332 Returns true if \a c1 and \a c2 are not the same Unicode
1333 character; otherwise returns false.
1337 \fn int operator<=(QChar c1, QChar c2)
1341 Returns true if the numeric Unicode value of \a c1 is less than
1342 or equal to that of \a c2; otherwise returns false.
1346 \fn int operator>=(QChar c1, QChar c2)
1350 Returns true if the numeric Unicode value of \a c1 is greater than
1351 or equal to that of \a c2; otherwise returns false.
1355 \fn int operator<(QChar c1, QChar c2)
1359 Returns true if the numeric Unicode value of \a c1 is less than
1360 that of \a c2; otherwise returns false.
1364 \fn int operator>(QChar c1, QChar c2)
1368 Returns true if the numeric Unicode value of \a c1 is greater than
1369 that of \a c2; otherwise returns false.
1373 // ---------------------------------------------------------------------------
1376 static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion version, int from)
1378 unsigned short buffer[3];
1382 const unsigned short *utf16 = reinterpret_cast<unsigned short *>(s.data());
1383 const unsigned short *uc = utf16 + s.length();
1384 while (uc != utf16 + from) {
1385 uint ucs4 = *(--uc);
1386 if (QChar(ucs4).isLowSurrogate() && uc != utf16) {
1387 ushort high = *(uc - 1);
1388 if (QChar(high).isHighSurrogate()) {
1390 ucs4 = QChar::surrogateToUcs4(high, ucs4);
1393 const QChar::UnicodeVersion v = QChar::unicodeVersion(ucs4);
1394 if (v > version || v == QChar::Unicode_Unassigned)
1398 const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
1399 if (!d || (canonical && tag != QChar::Canonical))
1402 int pos = uc - utf16;
1403 s.replace(pos, QChar::requiresSurrogates(ucs4) ? 2 : 1, reinterpret_cast<const QChar *>(d), length);
1404 // since the insert invalidates the pointers and we do decomposition recursive
1405 utf16 = reinterpret_cast<unsigned short *>(s.data());
1406 uc = utf16 + pos + length;
1416 inline bool operator<(ushort u1, const UCS2Pair &ligature)
1417 { return u1 < ligature.u1; }
1418 inline bool operator<(const UCS2Pair &ligature, ushort u1)
1419 { return ligature.u1 < u1; }
1421 static ushort ligatureHelper(ushort u1, ushort u2)
1424 int LIndex = u1 - Hangul_LBase;
1425 if (0 <= LIndex && LIndex < Hangul_LCount) {
1426 int VIndex = u2 - Hangul_VBase;
1427 if (0 <= VIndex && VIndex < Hangul_VCount)
1428 return Hangul_SBase + (LIndex * Hangul_VCount + VIndex) * Hangul_TCount;
1432 int SIndex = u1 - Hangul_SBase;
1433 if (0 <= SIndex && SIndex < Hangul_SCount && (SIndex % Hangul_TCount) == 0) {
1434 int TIndex = u2 - Hangul_TBase;
1435 if (0 <= TIndex && TIndex <= Hangul_TCount)
1439 const unsigned short index = GET_LIGATURE_INDEX(u2);
1440 if (index == 0xffff)
1442 const unsigned short *ligatures = uc_ligature_map+index;
1443 ushort length = *ligatures++;
1445 const UCS2Pair *data = reinterpret_cast<const UCS2Pair *>(ligatures);
1446 const UCS2Pair *r = qBinaryFind(data, data + length, u1);
1447 if (r != data + length)
1454 static void composeHelper(QString *str, QChar::UnicodeVersion version, int from)
1458 if (s.length() - from < 2)
1461 // the loop can partly ignore high Unicode as all ligatures are in the BMP
1463 int lastCombining = 0;
1465 while (pos < s.length()) {
1466 uint uc = s.at(pos).unicode();
1467 if (QChar(uc).isHighSurrogate() && pos < s.length()-1) {
1468 ushort low = s.at(pos+1).unicode();
1469 if (QChar(low).isLowSurrogate()) {
1470 uc = QChar::surrogateToUcs4(uc, low);
1474 const QUnicodeTables::Properties *p = qGetProp(uc);
1475 if (p->unicodeVersion > version || p->unicodeVersion == QChar::Unicode_Unassigned) {
1476 starter = -1; // to prevent starter == pos - 1
1481 int combining = p->combiningClass;
1482 if (starter == pos - 1 || combining > lastCombining) {
1483 // allowed to form ligature with S
1484 QChar ligature = ligatureHelper(s.at(starter).unicode(), uc);
1485 if (ligature.unicode()) {
1486 s[starter] = ligature;
1493 lastCombining = combining;
1499 static void canonicalOrderHelper(QString *str, QChar::UnicodeVersion version, int from)
1502 const int l = s.length()-1;
1506 uint u1 = s.at(pos).unicode();
1507 if (QChar(u1).isHighSurrogate()) {
1508 ushort low = s.at(p2).unicode();
1509 if (QChar(low).isLowSurrogate()) {
1510 u1 = QChar::surrogateToUcs4(u1, low);
1516 uint u2 = s.at(p2).unicode();
1517 if (QChar(u2).isHighSurrogate() && p2 < l) {
1518 ushort low = s.at(p2+1).unicode();
1519 if (QChar(low).isLowSurrogate()) {
1520 u2 = QChar::surrogateToUcs4(u2, low);
1527 const QUnicodeTables::Properties *p = qGetProp(u2);
1528 if (p->unicodeVersion <= version && p->unicodeVersion != QChar::Unicode_Unassigned)
1529 c2 = p->combiningClass;
1538 const QUnicodeTables::Properties *p = qGetProp(u1);
1539 if (p->unicodeVersion <= version && p->unicodeVersion != QChar::Unicode_Unassigned)
1540 c1 = p->combiningClass;
1544 QChar *uc = s.data();
1546 // exchange characters
1547 if (!QChar::requiresSurrogates(u2)) {
1550 uc[p++] = QChar::highSurrogate(u2);
1551 uc[p++] = QChar::lowSurrogate(u2);
1553 if (!QChar::requiresSurrogates(u1)) {
1556 uc[p++] = QChar::highSurrogate(u1);
1557 uc[p++] = QChar::lowSurrogate(u1);
1561 if (pos > 0 && s.at(pos).isLowSurrogate())
1565 if (QChar::requiresSurrogates(u1))