2 * Copyright (c) 2020 Samsung Electronics Co., Ltd.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <dali/devel-api/text-abstraction/script.h>
24 namespace TextAbstraction
29 constexpr unsigned int WHITE_SPACE_THRESHOLD = 0x21; ///< All characters below 0x21 are considered white spaces.
30 constexpr unsigned int CHAR_LF = 0x000A; ///< NL Line feed, new line.
31 constexpr unsigned int CHAR_VT = 0x000B; ///< Vertical tab.
32 constexpr unsigned int CHAR_FF = 0x000C; ///< NP Form feed, new page.
33 constexpr unsigned int CHAR_CR = 0x000D; ///< Carriage return, new line.
34 constexpr unsigned int CHAR_NEL = 0x0085; ///< Next line.
35 constexpr unsigned int CHAR_LS = 0x2028; ///< Line separator.
36 constexpr unsigned int CHAR_PS = 0x2029; ///< Paragraph separator
38 constexpr unsigned int CHAR_ZWS = 0x200B; ///< Zero width space.
39 constexpr unsigned int CHAR_ZWNJ = 0x200C; ///< Zero width non joiner.
40 constexpr unsigned int CHAR_ZWJ = 0x200D; ///< Zero width joiner.
41 constexpr unsigned int CHAR_LTRM = 0x200E; ///< Left to Right Mark.
42 constexpr unsigned int CHAR_RTLM = 0x200F; ///< Right to Left Mark.
43 constexpr unsigned int CHAR_TS = 0x2009; ///< Thin Space.
45 // Latin script: It contains punctuation characters and symbols which are not part of the latin script. https://en.wikipedia.org/wiki/Latin_script_in_Unicode
46 // 0x0000 - 0x007f C0 Controls and Basic Latin
48 // ASCII digits (not part of LATIN script):
51 // ASCII punctuation and symbols (not part of LATIN script):
57 // Controls (not part of LATIN script):
60 // 0x0080 - 0x00ff C1 Controls and Latin-1 Supplement
62 // Controls (not part of LATIN script):
65 // Punctuations and symbols (not part of LATIN script):
68 // Mathematical operators (not part of LATIN script):
72 // 0x0100 - 0x017f Latin Extended-A
73 // 0x0180 - 0x024f Latin Extended-B
74 // 0x0250 - 0x02af IPA Extensions
75 // 0x02b0 - 0x02ff Spacing Modifier Letters
77 // Punctuation (not part of LATIN script):
80 // 0x1d00 - 0x1d7f Phonetic Extensions
82 // Uralic Phonetic (not part of LATIN script):
85 // Subscripts and superscripts
90 // 0x1d80 - 0x1dbf Phonetic Extensions Supplement
92 // 0x1DBF (subscript or superscript. Not part of LATIN script )
94 // 0x1e00 - 0x1eff Latin Extended Additional
95 // 0x2070 - 0x209f Superscripts and Subscripts
97 // 0x2070 (not part of LATIN script)
98 // 0x2074 - 0x207E (not part of LATIN script)
100 // 0x2100 - 0x214f Letterlike symbols (not part of LATIN script)
102 // 0x212A - 0x212B (are part of LATIN script)
103 // 0x2132 (are part of LATIN script)
104 // 0x214E (are part of LATIN script)
106 // 0x2150 - 0x2189 Number Forms
108 // 0x2150 - 0x215F Fractions (not part of LATIN script)
109 // 0x2189 Fractions (not part of LATIN script)
111 // 0x2c60 - 0x2c7f Latin Extended-C
112 // 0xa720 - 0xa7ff Latin Extended-D
114 // 0xA720 - 0xA721 Uralic Phonetic (not part of LATIN script)
115 // 0xA788 (not part of LATIN script)
116 // 0xA789 - 0xA78A Budu (not part of LATIN script)
118 // 0xab30 - 0xab6f Latin Extended-E
120 // 0xfb00 - 0xfb06 Latin Alphabetic Presentation Forms
121 // 0xff00 - 0xffef Halfwidth and Fullwidth Forms
123 // 0xFF00 - 0xFF20 HWFW Symbols (not part of LATIN script)
124 // 0xFF3B - 0xFF40 HWFW Symbols (not part of LATIN script)
125 // 0xFF5B - 0xFFEF HWFW Symbols (not part of LATIN script)
128 // 0x0900 - 0x097f Devanagari
129 // 0x0980 - 0x09ff Bengali
130 // 0x0a00 - 0x0a7f Gurmukhi
131 // 0x0a80 - 0x0aff Gujarati
132 // 0x0b00 - 0x0b7f Oriya
133 // 0x0b80 - 0x0bff Tamil
134 // 0x0c00 - 0x0c7f Telugu
135 // 0x0c80 - 0x0cff Kannada
136 // 0x0d00 - 0x0d7f Malayalam
139 // 0x0d80 - 0x0dff Sinhala
142 // 0x0600 - 0x06ff Arabic
143 // 0x0750 - 0x077f Arabic Supplement
144 // 0x08A0 - 0x08ff Arabic Extended-A
145 // 0xfb50 - 0xfdff Arabic Presentation Forms-A
146 // 0xfe70 - 0xfeff Arabic Presentation Forms-B
147 // 0x1ee00 - 0x1eeff Arabic Mathematical Alphabetic Symbols
149 // CJK (Chinese, Japanese and Korean) and Vietnamese script.
150 // 0x2e80 - 0x2eff CJK Radicals Supplement
151 // 0x2f00 - 0x2fdf Kangxi Radicals
152 // 0x3000 - 0x303f CJK Symbols and Punctuation
153 // 0x3200 - 0x32ff Enclosed CJK Letters and Months
154 // 0x3400 - 0x4dbf CJK Unified Ideographs Extension A
155 // 0x4e00 - 0x62ff CJK Unified Ideographs
156 // 0x6300 - 0x77ff CJK Unified Ideographs
157 // 0x7800 - 0x8cff CJK Unified Ideographs
158 // 0x8d00 - 0x9fff CJK Unified Ideographs
159 // 0x20000 - 0x215ff CJK Unified Ideographs Extension B
160 // 0x21600 - 0x230ff CJK Unified Ideographs Extension B
161 // 0x23100 - 0x245ff CJK Unified Ideographs Extension B
162 // 0x24600 - 0x260ff CJK Unified Ideographs Extension B
163 // 0x26100 - 0x275ff CJK Unified Ideographs Extension B
164 // 0x27600 - 0x290ff CJK Unified Ideographs Extension B
165 // 0x29100 - 0x2a6df CJK Unified Ideographs Extension B
166 // 0x2a700 - 0x2b73f CJK Unified Ideographs Extension C
167 // 0x2b740 - 0x2b81f CJK Unified Ideographs Extension D
170 // 0x3040 - 0x309f Hiragana
171 // 0x30a0 - 0x30ff Katakana
174 // 0x1100 - 0x11ff Hangul jamo
175 // 0x3130 - 0x318f Hangul Compatibility Jamo
176 // 0xa960 - 0xa97f Hangul Jamo Extended-A
177 // 0xac00 - 0xd7af Hangul Syllables
178 // 0xd7b0 - 0xd7ff Hangul Jamo Extended-B
181 // 0x3100 - 0x312f Bopomofo
182 // 0x31a0 - 0x31bf Bopomofo Extended
185 // 0x1780 - 0x17ff Khmer
186 // 0x19e0 - 0x19ff Khmer Symbols
189 // 0x0e80 - 0x0eff Lao
192 // 0x0e00 - 0x0e7f Thai
195 // 0x1000 - 0x109f Myanmar
198 // 0x0591 - 0x05f4 Hebrew
199 // 0xfb1d - 0xfb4f Hebrew subset of Alphabetic Presentation Forms
202 // 0x0400 - 0x04ff Cyrillic
203 // 0x0500 - 0x052f Cyrillic suplement
204 // 0x2de0 - 0x2dff Cyrillic Extended-A
205 // 0xa640 - 0xa69f Cyrillic Extended-B
208 // 0x10a0 - 0x10ff Georgian
209 // 0x2d00 - 0x2d2f Georgian suplement
212 // 0x0370 - 0x03ff Greek & Coptic
213 // 0x1f00 - 0x1fff Greek Extended
216 // 0x0530 - 0x058f Armenian
217 // 0xfb13 - 0xfb17 Armenian subset of Alphabetic prefentation forms
220 // 0xa980 - 0xa9fd Javanese
223 // 0x1b80 - 0x1bbf Sundanese
224 // 0x1cc0 - 0x1ccf Sundanese supplement
226 // Ge'ez script (Ethiopic)
227 // 0x1200 - 0x137f Ethiopic
228 // 0x1380 - 0x139f Ethiopic supplement
229 // 0x2d80 - 0x2ddf Ethiopic Extended
230 // 0xab00 - 0xab2f Ethiopic Extended-A
233 // 0x1700 - 0x171f Baybayin
236 // 0x1c50 - 0x1c7f Ol Chiki
239 // 0xabc0 - 0xabff Meetei Mayek
240 // 0xaae0 - 0xaaff Meetei Mayek Extensions
242 // The Emoji which map to standardized Unicode characters
243 // 1. Emoticons ( 1F601 - 1F64F )
244 // 2. Dingbats ( 2700 - 27BF )
245 // 3. Transport and map symbols ( 1F680 - 1F6C0 )
246 // 4. Enclosed characters ( 24C2 - 1F251 )
247 // 5. Uncategorized :-S
248 // 6. Additional Emoticons ( 1F600 - 1F636 )
249 // 6b. Additional transport and map symbols ( 1F680 - 1F6FF ): http://unicode.org/charts/PDF/U1F680.pdf
250 // 6c. Other additional symbols ( 1F30D - 1F567 )
251 // 7. Supplemental Symbols and Pictographs ( 1F900–1F9FF ): http://unicode.org/charts/PDF/U1F900.pdf
253 // Symbols. Work around for these symbols.
267 /// character <= 0x077f
268 inline Script GetScriptTillArabicSupplement(Character character)
270 Script script = UNKNOWN;
272 if( ( 0x0030 <= character ) && ( character <= 0x0039 ) )
274 script = ASCII_DIGITS;
276 else if( character <= 0x007E )
278 if( ( 0x0020 <= character ) && ( character <= 0x002F ) )
282 else if( ( 0x003A <= character ) && ( character <= 0x0040 ) )
286 else if( ( 0x005B <= character ) && ( character <= 0x0060 ) )
290 else if( ( 0x007B <= character ) && ( character <= 0x007E ) )
299 else if( ( 0x007F <= character ) && ( character <= 0x009F ) )
301 // 0x007F is actually part of C0 Controls and Basic Latin. However, is the last and only control character of its block
302 // and the following characters of the next block are consecutive.
303 script = C1_CONTROLS;
305 else if( ( 0x00A0 <= character ) && ( character <= 0x00BF ) )
307 if( character == 0x00A9 )
309 script = EMOJI; // 5. Uncategorized: copyright sign
311 else if( character == 0x00AE )
313 script = EMOJI; // 5. Uncategorized: registered sign
320 else if( character == 0x00D7 )
324 else if( character == 0x00F7 )
328 else if( ( 0x00C0 <= character ) && ( character <= 0x02ff ) )
330 if( ( 0x02B9 <= character ) && ( character <= 0x02BF ) )
339 else if( ( 0x0370 <= character ) && ( character <= 0x03ff ) )
343 else if( ( 0x0400 <= character ) && ( character <= 0x04ff ) )
347 else if( ( 0x0500 <= character ) && ( character <= 0x052f ) )
351 else if( ( 0x0530 <= character ) && ( character <= 0x058f ) )
355 else if( ( 0x0591 <= character ) && ( character <= 0x05f4 ) )
359 else if( ( 0x0600 <= character ) && ( character <= 0x06ff ) )
363 else if( ( 0x0750 <= character ) && ( character <= 0x077f ) )
371 /// character <= 0x09ff
372 inline Script GetScriptTillBengali(Character character)
374 Script script = UNKNOWN;
376 if( character <= 0x077f )
378 script = GetScriptTillArabicSupplement(character);
382 if( ( 0x08A0 <= character ) && ( character <= 0x08ff ) )
386 else if( ( 0x0900 <= character ) && ( character <= 0x097f ) )
390 else if( ( 0x0980 <= character ) && ( character <= 0x09ff ) )
399 /// 0x09ff < character <= 0x0cff
400 inline Script GetScriptBetweenBengaliAndKannada( Character character )
402 Script script = UNKNOWN;
404 if( character <= 0x0b7f )
406 if( ( 0x0a00 <= character ) && ( character <= 0x0a7f ) )
410 else if( ( 0x0a80 <= character ) && ( character <= 0x0aff ) )
414 else if( ( 0x0b00 <= character ) && ( character <= 0x0b7f ) )
421 if( ( 0x0b80 <= character ) && ( character <= 0x0bff ) )
425 else if( ( 0x0c00 <= character ) && ( character <= 0x0c7f ) )
429 else if( ( 0x0c80 <= character ) && ( character <= 0x0cff ) )
438 /// 0x0cff < character <= 0x1eff
439 inline Script GetScriptBetweenKannadaAndLatinExtendedAdditional(Character character)
441 Script script = UNKNOWN;
443 if( ( 0x0d00 <= character ) && ( character <= 0x0d7f ) )
447 else if( ( 0x0d80 <= character ) && ( character <= 0x0dff ) )
451 else if( ( 0x0e00 <= character ) && ( character <= 0x0e7f ) )
455 else if( ( 0x0e80 <= character ) && ( character <= 0x0eff ) )
459 else if( ( 0x1000 <= character ) && ( character <= 0x109f ) )
463 else if( ( 0x10a0 <= character ) && ( character <= 0x10ff ) )
467 else if( ( 0x1100 <= character ) && ( character <= 0x11ff ) )
471 else if( ( 0x1200 <= character ) && ( character <= 0x137f ) )
475 else if( ( 0x1380 <= character ) && ( character <= 0x139f ) )
479 else if( ( 0x1700 <= character ) && ( character <= 0x171f ) )
483 else if( ( 0x1780 <= character ) && ( character <= 0x17ff ) )
487 else if( ( 0x19e0 <= character ) && ( character <= 0x19ff ) )
491 else if( ( 0x1b80 <= character ) && ( character <= 0x1bbf ) )
495 else if( ( 0x1c50 <= character ) && ( character <= 0x1c7f ) )
499 else if( ( 0x1cc0 <= character ) && ( character <= 0x1ccf ) )
503 else if( ( 0x1d00 <= character ) && ( character <= 0x1eff ) )
505 if( ( 0x1D26 <= character ) && ( character <= 0x1D2B ) )
509 else if( ( 0x1D5D <= character ) && ( character <= 0x1D61 ) )
511 script = PHONETIC_SS;
513 else if( ( 0x1D66 <= character ) && ( character <= 0x1D6A ) )
515 script = PHONETIC_SS;
517 else if( character == 0x1D78 )
519 script = PHONETIC_SS;
521 else if( character == 0x1DBF)
523 script = PHONETIC_SS;
534 /// 0x1eff < character <= 0x2c7f
535 inline Script GetScriptBetweenLatinExtendedAdditionalAndLatinExtendedC(Character character)
537 Script script = UNKNOWN;
539 if( ( 0x1f00 <= character ) && ( character <= 0x1fff ) )
543 else if( character == 0x203c )
545 script = EMOJI; // 5. Uncategorized: double exclamation mark
547 else if( character == 0x2049 )
549 script = EMOJI; // 5. Uncategorized: exclamation question mark
551 else if( ( 0x2070 <= character ) && ( character <= 0x209f ) )
553 if( character == 0x2070 )
557 else if( ( 0x2074 <= character ) && ( character <= 0x207E ) )
566 else if( character == 0x20e3 )
568 script = EMOJI; // 5. Uncategorized: combining enclosing keycap
570 else if( character == 0x2122 )
572 script = EMOJI; // 5. Uncategorized: trade mark sign
574 else if( character == 0x2139 )
576 script = EMOJI; // 5. Uncategorized: information source
578 else if( ( 0x2100 <= character ) && ( character <= 0x2189 ) )
580 if( ( 0x2100 <= character ) && ( character <= 0x214f ) )
582 if( ( 0x212A <= character ) && ( character <= 0x212B ) )
586 else if( character == 0x2132 )
590 else if( character == 0x214E )
596 script = LETTER_LIKE;
599 else if( ( 0x2150 <= character ) && ( character <= 0x215F ) )
601 script = FRACTIONS_NF;
603 else if( character == 0x2189 )
605 script = FRACTIONS_NF;
613 else if( ( 0x25cb == character ) ||
614 ( 0x25cf == character ) ||
615 ( 0x25a1 == character ) )
619 else if( 0x25a0 == character )
623 else if( ( 0x2664 == character ) ||
624 ( 0x2661 == character ) ||
625 ( 0x2662 == character ) ||
626 ( 0x2667 == character ) )
630 else if( ( 0x2606 == character ) ||
631 ( 0x25aa == character ) )
635 else if( 0x262a == character )
639 // U+2194 5. Uncategorized: left right arrow
640 // U+2B55 5. Uncategorized: heavy large circle
641 else if( ( 0x2194 <= character ) && ( character <= 0x2B55 ) )
645 else if( ( 0x2c60 <= character ) && ( character <= 0x2c7f ) )
653 /// 0x0cff < character <= 0x2c7f
654 inline Script GetScriptBetweenKannadaAndLatinExtendedC(Character character)
656 Script script = UNKNOWN;
658 if( character <= 0x1eff )
660 script = GetScriptBetweenKannadaAndLatinExtendedAdditional(character);
664 script = GetScriptBetweenLatinExtendedAdditionalAndLatinExtendedC(character);
670 /// 0x2c7f < character <= 0xa7ff
671 inline Script GetScriptBetweenLatinExtendedCAndLatinExtendedD(Character character)
673 Script script = UNKNOWN;
675 if( ( 0x2d00 <= character ) && ( character <= 0x2d2f ) )
679 else if( ( 0x2d80 <= character ) && ( character <= 0x2ddf ) )
683 else if( ( 0x2de0 <= character ) && ( character <= 0x2dff ) )
687 else if( ( 0x2e80 <= character ) && ( character <= 0x2eff ) )
691 else if( ( 0x2f00 <= character ) && ( character <= 0x2fdf ) )
695 else if( ( 0x3000 <= character ) && ( character <= 0x303f ) )
699 else if( ( 0x3040 <= character ) && ( character <= 0x309f ) )
703 else if( ( 0x30a0 <= character ) && ( character <= 0x30ff ) )
707 else if( ( 0x3100 <= character ) && ( character <= 0x312f ) )
711 else if( ( 0x3130 <= character ) && ( character <= 0x318f ) )
715 else if( ( 0x31a0 <= character ) && ( character <= 0x31bf ) )
719 else if( ( 0x3200 <= character ) && ( character <= 0x32ff ) )
723 else if( ( 0x3400 <= character ) && ( character <= 0x4dbf ) )
727 else if( ( 0x4e00 <= character ) && ( character <= 0x62ff ) )
731 else if( ( 0x6300 <= character ) && ( character <= 0x77ff ) )
735 else if( ( 0x7800 <= character ) && ( character <= 0x8cff ) )
739 else if( ( 0x8d00 <= character ) && ( character <= 0x9fff ) )
743 else if( ( 0xa640 <= character ) && ( character <= 0xa69f ) )
747 else if( ( 0xa720 <= character ) && ( character <= 0xa7ff ) )
749 if( character == 0xA720 )
753 else if( character == 0xA721 )
757 else if( character == 0xA788 )
759 script = NON_LATIN_LED;
761 else if( character == 0xA789 )
763 script = NON_LATIN_LED;
765 else if( character == 0xA78A )
767 script = NON_LATIN_LED;
778 /// 0x2c7f < character <= 0xfdff
779 inline Script GetScriptBetweenLatinExtendedCAndArabicPresentationFormsA(Character character)
781 Script script = GetScriptBetweenLatinExtendedCAndLatinExtendedD(character);
783 if( ( 0xa960 <= character ) && ( character <= 0xa97f ) )
787 else if( ( 0xa980 <= character ) && ( character <= 0xa9fd ) )
791 else if( ( 0xab00 <= character ) && ( character <= 0xab2f ) )
795 else if( ( 0xab30 <= character ) && ( character <= 0xab6f ) )
799 else if( ( 0xaae0 <= character ) && ( character <= 0xaaff ) )
803 else if( ( 0xabc0 <= character ) && ( character <= 0xabff ) )
807 else if( ( 0xac00 <= character ) && ( character <= 0xd7af ) )
811 else if( ( 0xd7b0 <= character ) && ( character <= 0xd7ff ) )
815 else if( ( 0xfb00 <= character ) && ( character <= 0xfb06 ) )
819 else if( ( 0xfb13 <= character ) && ( character <= 0xfb17 ) )
823 else if( ( 0xfb1d <= character ) && ( character <= 0xfb4f ) )
827 else if( ( 0xfb50 <= character ) && ( character <= 0xfdff ) )
835 /// character > 0xfdff
836 inline Script GetScriptAboveArabicPresentationFormsA(Character character)
838 Script script = UNKNOWN;
840 if( ( 0xfe70 <= character ) && ( character <= 0xfeff ) )
844 else if( ( 0xff00 <= character ) && ( character <= 0xffef ) )
846 if( ( 0xFF00 <= character ) && ( character <= 0xFF20 ) )
850 else if( ( 0xFF3B <= character ) && ( character <= 0xFF40 ) )
854 else if( ( 0xFF5B <= character ) && ( character <= 0xFFEF ) )
863 else if( ( 0x1ee00 <= character ) && ( character <= 0x1eeff ) )
867 // U+1f170 4. Enclosed characters: negative squared latin capital letter A
868 // U+1f6ff 6b. Additional transport and map symbols
869 else if( ( 0x1f170 <= character ) && ( character <= 0x1f6ff ) )
873 // 7. Supplemental Symbols and Pictographs
874 else if( ( 0x1f900 <= character ) && ( character <= 0x1f9ff ) )
878 else if( ( 0x20000 <= character ) && ( character <= 0x215ff ) )
882 else if( ( 0x21600 <= character ) && ( character <= 0x230ff ) )
886 else if( ( 0x23100 <= character ) && ( character <= 0x245ff ) )
890 else if( ( 0x24600 <= character ) && ( character <= 0x260ff ) )
894 else if( ( 0x26100 <= character ) && ( character <= 0x275ff ) )
898 else if( ( 0x27600 <= character ) && ( character <= 0x290ff ) )
902 else if( ( 0x29100 <= character ) && ( character <= 0x2a6df ) )
906 else if( ( 0x2a700 <= character ) && ( character <= 0x2b73f ) )
910 else if( ( 0x2b740 <= character ) && ( character <= 0x2b81f ) )
918 /// character > 0x2c7f
919 inline Script GetScriptAboveLatinExtendedC(Character character)
921 Script script = UNKNOWN;
923 if( character <= 0xfdff )
925 script = GetScriptBetweenLatinExtendedCAndArabicPresentationFormsA(character);
929 script = GetScriptAboveArabicPresentationFormsA(character);
937 bool IsRightToLeftScript( Script script )
939 return ( ( ARABIC == script ) ||
940 ( HEBREW == script ) );
943 Script GetCharacterScript( Character character )
945 Script script = UNKNOWN;
947 if( IsCommonScript( character ) )
951 else if( character <= 0x0cff )
953 if( character <= 0x09ff )
955 script = GetScriptTillBengali(character);
959 script = GetScriptBetweenBengaliAndKannada(character);
964 if( character <= 0x2c7f )
966 script = GetScriptBetweenKannadaAndLatinExtendedC(character);
970 script = GetScriptAboveLatinExtendedC(character);
977 bool IsWhiteSpace(Character character)
979 return character < WHITE_SPACE_THRESHOLD;
982 bool IsNewParagraph(Character character)
984 return ((CHAR_LF == character) ||
985 (CHAR_VT == character) ||
986 (CHAR_FF == character) ||
987 (CHAR_CR == character) ||
988 (CHAR_NEL == character) ||
989 (CHAR_LS == character) ||
990 (CHAR_PS == character));
993 bool IsZeroWidthNonJoiner(Character character)
995 return CHAR_ZWNJ == character;
998 bool IsZeroWidthJoiner(Character character)
1000 return CHAR_ZWJ == character;
1003 bool IsZeroWidthSpace(Character character)
1005 return CHAR_ZWS == character;
1008 bool IsLeftToRightMark(Character character)
1010 return CHAR_LTRM == character;
1013 bool IsRightToLeftMark(Character character)
1015 return CHAR_RTLM == character;
1018 bool IsThinSpace(Character character)
1020 return CHAR_TS == character;
1023 bool IsCommonScript(Character character)
1025 return (IsWhiteSpace(character) ||
1026 IsZeroWidthNonJoiner(character) ||
1027 IsZeroWidthJoiner(character) ||
1028 IsZeroWidthSpace(character) ||
1029 IsLeftToRightMark(character) ||
1030 IsRightToLeftMark(character) ||
1031 IsThinSpace(character) ||
1032 IsNewParagraph(character));
1035 bool HasLigatureMustBreak(Script script)
1037 return ((LATIN == script) ||
1038 (ARABIC == script));
1041 } // namespace TextAbstraction