10ec9526c65a6b6c4530577c7d8bcadca6f31cc5
[platform/core/uifw/dali-adaptor.git] / dali / devel-api / text-abstraction / script.cpp
1 /*
2  * Copyright (c) 2020 Samsung Electronics Co., Ltd.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17
18 // FILE HEADER
19 #include <dali/devel-api/text-abstraction/script.h>
20
21 namespace Dali
22 {
23 namespace TextAbstraction
24 {
25 namespace
26 {
27 const unsigned int WHITE_SPACE_THRESHOLD = 0x21;   ///< All characters below 0x21 are considered white spaces.
28 const unsigned int CHAR_LF               = 0x000A; ///< NL Line feed, new line.
29 const unsigned int CHAR_VT               = 0x000B; ///< Vertical tab.
30 const unsigned int CHAR_FF               = 0x000C; ///< NP Form feed, new page.
31 const unsigned int CHAR_CR               = 0x000D; ///< Carriage return, new line.
32 const unsigned int CHAR_NEL              = 0x0085; ///< Next line.
33 const unsigned int CHAR_LS               = 0x2028; ///< Line separator.
34 const unsigned int CHAR_PS               = 0x2029; ///< Paragraph separator
35
36 const unsigned int CHAR_ZWS  = 0x200B; ///< Zero width space.
37 const unsigned int CHAR_ZWNJ = 0x200C; ///< Zero width non joiner.
38 const unsigned int CHAR_ZWJ  = 0x200D; ///< Zero width joiner.
39 const unsigned int CHAR_LTRM = 0x200E; ///< Left to Right Mark.
40 const unsigned int CHAR_RTLM = 0x200F; ///< Right to Left Mark.
41 const unsigned int CHAR_TS   = 0x2009; ///< Thin Space.
42 } // namespace
43
44 bool IsRightToLeftScript(Script script)
45 {
46   return ((ARABIC == script) ||
47           (HEBREW == script));
48 }
49
50 Script GetCharacterScript(Character character)
51 {
52   // Latin script:   It contains punctuation characters and symbols which are not part of the latin script. https://en.wikipedia.org/wiki/Latin_script_in_Unicode
53   // 0x0000 - 0x007f C0 Controls and Basic Latin
54   //
55   //                 ASCII digits (not part of LATIN script):
56   //                 0x0030 - 0x0039
57   //
58   //                 ASCII punctuation and symbols (not part of LATIN script):
59   //                 0x0020 - 0x002F
60   //                 0x003A - 0x0040
61   //                 0x005B - 0x0060
62   //                 0x007B - 0x007E
63   //
64   //                 Controls (not part of LATIN script):
65   //                 0x007F
66   //
67   // 0x0080 - 0x00ff C1 Controls and Latin-1 Supplement
68   //
69   //                 Controls (not part of LATIN script):
70   //                 0x0080 - 0x009F
71   //
72   //                 Punctuations and symbols (not part of LATIN script):
73   //                 0x00A0 - 0x00BF
74   //
75   //                 Mathematical operators (not part of LATIN script):
76   //                 0x00D7
77   //                 0x00F7
78   //
79   // 0x0100 - 0x017f Latin Extended-A
80   // 0x0180 - 0x024f Latin Extended-B
81   // 0x0250 - 0x02af IPA Extensions
82   // 0x02b0 - 0x02ff Spacing Modifier Letters
83   //
84   //                 Punctuation (not part of LATIN script):
85   //                 0x02B9 - 0x02BF
86   //
87   // 0x1d00 - 0x1d7f Phonetic Extensions
88   //
89   //                 Uralic Phonetic (not part of LATIN script):
90   //                 0x1D26 - 0x1D2B
91   //
92   //                 Subscripts and superscripts
93   //                 0x1D5D - 0x1D61
94   //                 0x1D66 - 0x1D6A
95   //                 0x1D78
96   //
97   // 0x1d80 - 0x1dbf Phonetic Extensions Supplement
98   //
99   //                 0x1DBF (subscript or superscript. Not part of LATIN script )
100   //
101   // 0x1e00 - 0x1eff Latin Extended Additional
102   // 0x2070 - 0x209f Superscripts and Subscripts
103   //
104   //                 0x2070          (not part of LATIN script)
105   //                 0x2074 - 0x207E (not part of LATIN script)
106   //
107   // 0x2100 - 0x214f Letterlike symbols (not part of LATIN script)
108   //
109   //                 0x212A - 0x212B (are part of LATIN script)
110   //                 0x2132          (are part of LATIN script)
111   //                 0x214E          (are part of LATIN script)
112   //
113   // 0x2150 - 0x2189 Number Forms
114   //
115   //                 0x2150 - 0x215F Fractions (not part of LATIN script)
116   //                 0x2189          Fractions (not part of LATIN script)
117   //
118   // 0x2c60 - 0x2c7f Latin Extended-C
119   // 0xa720 - 0xa7ff Latin Extended-D
120   //
121   //                 0xA720 - 0xA721 Uralic Phonetic (not part of LATIN script)
122   //                 0xA788          (not part of LATIN script)
123   //                 0xA789 - 0xA78A Budu (not part of LATIN script)
124   //
125   // 0xab30 - 0xab6f Latin Extended-E
126   //
127   // 0xfb00 - 0xfb06 Latin Alphabetic Presentation Forms
128   // 0xff00 - 0xffef Halfwidth and Fullwidth Forms
129   //
130   //                 0xFF00 - 0xFF20 HWFW Symbols (not part of LATIN script)
131   //                 0xFF3B - 0xFF40 HWFW Symbols (not part of LATIN script)
132   //                 0xFF5B - 0xFFEF HWFW Symbols (not part of LATIN script)
133
134   // Brahmic scripts:
135   // 0x0900 - 0x097f Devanagari
136   // 0x0980 - 0x09ff Bengali
137   // 0x0a00 - 0x0a7f Gurmukhi
138   // 0x0a80 - 0x0aff Gujarati
139   // 0x0b00 - 0x0b7f Oriya
140   // 0x0b80 - 0x0bff Tamil
141   // 0x0c00 - 0x0c7f Telugu
142   // 0x0c80 - 0x0cff Kannada
143   // 0x0d00 - 0x0d7f Malayalam
144
145   // Sinhala script.
146   // 0x0d80 - 0x0dff Sinhala
147
148   // Arabic script.
149   // 0x0600 - 0x06ff Arabic
150   // 0x0750 - 0x077f Arabic Supplement
151   // 0x08A0 - 0x08ff Arabic Extended-A
152   // 0xfb50 - 0xfdff Arabic Presentation Forms-A
153   // 0xfe70 - 0xfeff Arabic Presentation Forms-B
154   // 0x1ee00 - 0x1eeff Arabic Mathematical Alphabetic Symbols
155
156   // CJK (Chinese, Japanese and Korean) and Vietnamese script.
157   // 0x2e80 - 0x2eff CJK Radicals Supplement
158   // 0x2f00 - 0x2fdf Kangxi Radicals
159   // 0x3000 - 0x303f CJK Symbols and Punctuation
160   // 0x3200 - 0x32ff Enclosed CJK Letters and Months
161   // 0x3400 - 0x4dbf CJK Unified Ideographs Extension A
162   // 0x4e00 - 0x62ff CJK Unified Ideographs
163   // 0x6300 - 0x77ff CJK Unified Ideographs
164   // 0x7800 - 0x8cff CJK Unified Ideographs
165   // 0x8d00 - 0x9fff CJK Unified Ideographs
166   // 0x20000 - 0x215ff CJK Unified Ideographs Extension B
167   // 0x21600 - 0x230ff CJK Unified Ideographs Extension B
168   // 0x23100 - 0x245ff CJK Unified Ideographs Extension B
169   // 0x24600 - 0x260ff CJK Unified Ideographs Extension B
170   // 0x26100 - 0x275ff CJK Unified Ideographs Extension B
171   // 0x27600 - 0x290ff CJK Unified Ideographs Extension B
172   // 0x29100 - 0x2a6df CJK Unified Ideographs Extension B
173   // 0x2a700 - 0x2b73f CJK Unified Ideographs Extension C
174   // 0x2b740 - 0x2b81f CJK Unified Ideographs Extension D
175
176   // Japanese scripts.
177   // 0x3040 - 0x309f Hiragana
178   // 0x30a0 - 0x30ff Katakana
179
180   // Hangul script
181   // 0x1100 - 0x11ff Hangul jamo
182   // 0x3130 - 0x318f Hangul Compatibility Jamo
183   // 0xa960 - 0xa97f Hangul Jamo Extended-A
184   // 0xac00 - 0xd7af Hangul Syllables
185   // 0xd7b0 - 0xd7ff Hangul Jamo Extended-B
186
187   // Bopomofo script
188   // 0x3100 - 0x312f Bopomofo
189   // 0x31a0 - 0x31bf Bopomofo Extended
190
191   // Khmer script
192   // 0x1780 - 0x17ff Khmer
193   // 0x19e0 - 0x19ff Khmer Symbols
194
195   // Lao script
196   // 0x0e80 - 0x0eff Lao
197
198   // Thai script
199   // 0x0e00 - 0x0e7f Thai
200
201   // Burmese script
202   // 0x1000 - 0x109f Myanmar
203
204   // Hebrew script
205   // 0x0591 - 0x05f4 Hebrew
206   // 0xfb1d - 0xfb4f Hebrew subset of Alphabetic Presentation Forms
207
208   // Cyrillic script
209   // 0x0400 - 0x04ff Cyrillic
210   // 0x0500 - 0x052f Cyrillic suplement
211   // 0x2de0 - 0x2dff Cyrillic Extended-A
212   // 0xa640 - 0xa69f Cyrillic Extended-B
213
214   // Georgian script
215   // 0x10a0 - 0x10ff Georgian
216   // 0x2d00 - 0x2d2f Georgian suplement
217
218   // Greek script
219   // 0x0370 - 0x03ff Greek & Coptic
220   // 0x1f00 - 0x1fff Greek Extended
221
222   // Armenian script
223   // 0x0530 - 0x058f Armenian
224   // 0xfb13 - 0xfb17 Armenian subset of Alphabetic prefentation forms
225
226   // Javanese script
227   // 0xa980 - 0xa9fd Javanese
228
229   // Sundanese script
230   // 0x1b80 - 0x1bbf Sundanese
231   // 0x1cc0 - 0x1ccf Sundanese supplement
232
233   // Ge'ez script (Ethiopic)
234   // 0x1200 - 0x137f Ethiopic
235   // 0x1380 - 0x139f Ethiopic supplement
236   // 0x2d80 - 0x2ddf Ethiopic Extended
237   // 0xab00 - 0xab2f Ethiopic Extended-A
238
239   // Baybayin Script
240   // 0x1700 - 0x171f Baybayin
241
242   // Ol Chiki Script
243   // 0x1c50 - 0x1c7f Ol Chiki
244
245   // Meitei Script
246   // 0xabc0 - 0xabff Meetei Mayek
247   // 0xaae0 - 0xaaff Meetei Mayek Extensions
248
249   // The Emoji which map to standardized Unicode characters
250   // 1. Emoticons ( 1F601 - 1F64F )
251   // 2. Dingbats ( 2700 - 27BF )
252   // 3. Transport and map symbols ( 1F680 - 1F6C0 )
253   // 4. Enclosed characters ( 24C2 - 1F251 )
254   // 5. Uncategorized :-S
255   // 6. Additional Emoticons ( 1F600 - 1F636 )
256   // 6b. Additional transport and map symbols ( 1F680 - 1F6FF ): http://unicode.org/charts/PDF/U1F680.pdf
257   // 6c. Other additional symbols ( 1F30D - 1F567 )
258   // 7. Supplemental Symbols and Pictographs ( 1F900–1F9FF ): http://unicode.org/charts/PDF/U1F900.pdf
259
260   // Symbols. Work around for these symbols.
261   // 0x25cb
262   // 0x25cf
263   // 0x25a1
264   // 0x25a0
265   // 0x2664
266   // 0x2661
267   // 0x2662
268   // 0x2667
269   // 0x2606
270   // 0x25aa
271   // 0x262a
272
273   if(IsCommonScript(character))
274   {
275     return COMMON;
276   }
277
278   if(character <= 0x0cff)
279   {
280     if(character <= 0x09ff)
281     {
282       if(character <= 0x077f)
283       {
284         if((0x0030 <= character) && (character <= 0x0039))
285         {
286           return ASCII_DIGITS;
287         }
288         if(character <= 0x007E)
289         {
290           if((0x0020 <= character) && (character <= 0x002F))
291           {
292             return ASCII_PS;
293           }
294           if((0x003A <= character) && (character <= 0x0040))
295           {
296             return ASCII_PS;
297           }
298           if((0x005B <= character) && (character <= 0x0060))
299           {
300             return ASCII_PS;
301           }
302           if((0x007B <= character) && (character <= 0x007E))
303           {
304             return ASCII_PS;
305           }
306         }
307         if((0x007F <= character) && (character <= 0x009F))
308         {
309           // 0x007F is actually part of C0 Controls and Basic Latin. However, is the last and only control character of its block
310           // and the following characters of the next block are consecutive.
311           return C1_CONTROLS;
312         }
313         if((0x00A0 <= character) && (character <= 0x00BF))
314         {
315           if(character == 0x00A9)
316           {
317             return EMOJI; // 5. Uncategorized: copyright sign
318           }
319           if(character == 0x00AE)
320           {
321             return EMOJI; // 5. Uncategorized: registered sign
322           }
323
324           return C1_PS;
325         }
326         if(character == 0x00D7)
327         {
328           return C1_MATH;
329         }
330         if(character == 0x00F7)
331         {
332           return C1_MATH;
333         }
334         if(character <= 0x02ff)
335         {
336           if((0x02B9 <= character) && (character <= 0x02BF))
337           {
338             return SML_P;
339           }
340
341           return LATIN;
342         }
343         if((0x0370 <= character) && (character <= 0x03ff))
344         {
345           return GREEK;
346         }
347         if((0x0400 <= character) && (character <= 0x04ff))
348         {
349           return CYRILLIC;
350         }
351         if((0x0500 <= character) && (character <= 0x052f))
352         {
353           return CYRILLIC;
354         }
355         if((0x0530 <= character) && (character <= 0x058f))
356         {
357           return ARMENIAN;
358         }
359         if((0x0591 <= character) && (character <= 0x05f4))
360         {
361           return HEBREW;
362         }
363         if((0x0600 <= character) && (character <= 0x06ff))
364         {
365           return ARABIC;
366         }
367         if((0x0750 <= character) && (character <= 0x077f))
368         {
369           return ARABIC;
370         }
371       }
372       else // > 0x077f
373       {
374         if((0x08A0 <= character) && (character <= 0x08ff))
375         {
376           return ARABIC;
377         }
378         if((0x0900 <= character) && (character <= 0x097f))
379         {
380           return DEVANAGARI;
381         }
382         if((0x0980 <= character) && (character <= 0x09ff))
383         {
384           return BENGALI;
385         }
386       }
387     }
388     else // > 0x09ff
389     {
390       if(character <= 0x0b7f)
391       {
392         if((0x0a00 <= character) && (character <= 0x0a7f))
393         {
394           return GURMUKHI;
395         }
396         if((0x0a80 <= character) && (character <= 0x0aff))
397         {
398           return GUJARATI;
399         }
400         if((0x0b00 <= character) && (character <= 0x0b7f))
401         {
402           return ORIYA;
403         }
404       }
405       else // > 0x0b7f
406       {
407         if((0x0b80 <= character) && (character <= 0x0bff))
408         {
409           return TAMIL;
410         }
411         if((0x0c00 <= character) && (character <= 0x0c7f))
412         {
413           return TELUGU;
414         }
415         if((0x0c80 <= character) && (character <= 0x0cff))
416         {
417           return KANNADA;
418         }
419       }
420     }
421   }
422   else // > 0x0cff
423   {
424     if(character <= 0x2c7f)
425     {
426       if(character <= 0x1eff)
427       {
428         if((0x0d00 <= character) && (character <= 0x0d7f))
429         {
430           return MALAYALAM;
431         }
432         if((0x0d80 <= character) && (character <= 0x0dff))
433         {
434           return SINHALA;
435         }
436         if((0x0e00 <= character) && (character <= 0x0e7f))
437         {
438           return THAI;
439         }
440         if((0x0e80 <= character) && (character <= 0x0eff))
441         {
442           return LAO;
443         }
444         if((0x1000 <= character) && (character <= 0x109f))
445         {
446           return BURMESE;
447         }
448         if((0x10a0 <= character) && (character <= 0x10ff))
449         {
450           return GEORGIAN;
451         }
452         if((0x1100 <= character) && (character <= 0x11ff))
453         {
454           return HANGUL;
455         }
456         if((0x1200 <= character) && (character <= 0x137f))
457         {
458           return GEEZ;
459         }
460         if((0x1380 <= character) && (character <= 0x139f))
461         {
462           return GEEZ;
463         }
464         if((0x1700 <= character) && (character <= 0x171f))
465         {
466           return BAYBAYIN;
467         }
468         if((0x1780 <= character) && (character <= 0x17ff))
469         {
470           return KHMER;
471         }
472         if((0x19e0 <= character) && (character <= 0x19ff))
473         {
474           return KHMER;
475         }
476         if((0x1b80 <= character) && (character <= 0x1bbf))
477         {
478           return SUNDANESE;
479         }
480         if((0x1c50 <= character) && (character <= 0x1c7f))
481         {
482           return OL_CHIKI;
483         }
484         if((0x1cc0 <= character) && (character <= 0x1ccf))
485         {
486           return SUNDANESE;
487         }
488         if((0x1d00 <= character) && (character <= 0x1eff))
489         {
490           if((0x1D26 <= character) && (character <= 0x1D2B))
491           {
492             return PHONETIC_U;
493           }
494           if((0x1D5D <= character) && (character <= 0x1D61))
495           {
496             return PHONETIC_SS;
497           }
498           if((0x1D66 <= character) && (character <= 0x1D6A))
499           {
500             return PHONETIC_SS;
501           }
502           if(character == 0x1D78)
503           {
504             return PHONETIC_SS;
505           }
506           if(character == 0x1DBF)
507           {
508             return PHONETIC_SS;
509           }
510
511           return LATIN;
512         }
513       }
514       else // > 0x1eff
515       {
516         if((0x1f00 <= character) && (character <= 0x1fff))
517         {
518           return GREEK;
519         }
520         if(character == 0x203c)
521         {
522           return EMOJI; // 5. Uncategorized: double exclamation mark
523         }
524         if(character == 0x2049)
525         {
526           return EMOJI; // 5. Uncategorized: exclamation question mark
527         }
528         if((0x2070 <= character) && (character <= 0x209f))
529         {
530           if(character == 0x2070)
531           {
532             return NUMERIC_SS;
533           }
534           if((0x2074 <= character) && (character <= 0x207E))
535           {
536             return NUMERIC_SS;
537           }
538
539           return LATIN;
540         }
541         if(character == 0x20e3)
542         {
543           return EMOJI; // 5. Uncategorized: combining enclosing keycap
544         }
545         if(character == 0x2122)
546         {
547           return EMOJI; // 5. Uncategorized: trade mark sign
548         }
549         if(character == 0x2139)
550         {
551           return EMOJI; // 5. Uncategorized: information source
552         }
553         if((0x2100 <= character) && (character <= 0x2189))
554         {
555           if((0x2100 <= character) && (character <= 0x214f))
556           {
557             if((0x212A <= character) && (character <= 0x212B))
558             {
559               return LATIN;
560             }
561             if(character == 0x2132)
562             {
563               return LATIN;
564             }
565             if(character == 0x214E)
566             {
567               return LATIN;
568             }
569
570             return LETTER_LIKE;
571           }
572           if((0x2150 <= character) && (character <= 0x215F))
573           {
574             return FRACTIONS_NF;
575           }
576           if(character == 0x2189)
577           {
578             return FRACTIONS_NF;
579           }
580
581           return LATIN;
582         }
583
584         // Symbols
585         if((0x25cb == character) ||
586            (0x25cf == character) ||
587            (0x25a1 == character))
588         {
589           return SYMBOLS1;
590         }
591
592         if(0x25a0 == character)
593         {
594           return SYMBOLS2;
595         }
596
597         if((0x2664 == character) ||
598            (0x2661 == character) ||
599            (0x2662 == character) ||
600            (0x2667 == character))
601         {
602           return SYMBOLS3;
603         }
604
605         if((0x2606 == character) ||
606            (0x25aa == character))
607         {
608           return SYMBOLS4;
609         }
610
611         if(0x262a == character)
612         {
613           return SYMBOLS5;
614         }
615
616         // U+2194 5. Uncategorized: left right arrow
617         // U+2B55 5. Uncategorized: heavy large circle
618         if((0x2194 <= character) && (character <= 0x2B55))
619         {
620           return EMOJI;
621         }
622         if((0x2c60 <= character) && (character <= 0x2c7f))
623         {
624           return LATIN;
625         }
626       }
627     }
628     else // > 0x2c7f
629     {
630       if(character <= 0xfdff)
631       {
632         if((0x2d00 <= character) && (character <= 0x2d2f))
633         {
634           return GEORGIAN;
635         }
636         if((0x2d80 <= character) && (character <= 0x2ddf))
637         {
638           return GEEZ;
639         }
640         if((0x2de0 <= character) && (character <= 0x2dff))
641         {
642           return CYRILLIC;
643         }
644         if((0x2e80 <= character) && (character <= 0x2eff))
645         {
646           return CJK;
647         }
648         if((0x2f00 <= character) && (character <= 0x2fdf))
649         {
650           return CJK;
651         }
652         if((0x3000 <= character) && (character <= 0x303f))
653         {
654           return CJK;
655         }
656         if((0x3040 <= character) && (character <= 0x309f))
657         {
658           return HIRAGANA;
659         }
660         if((0x30a0 <= character) && (character <= 0x30ff))
661         {
662           return KATAKANA;
663         }
664         if((0x3100 <= character) && (character <= 0x312f))
665         {
666           return BOPOMOFO;
667         }
668         if((0x3130 <= character) && (character <= 0x318f))
669         {
670           return HANGUL;
671         }
672         if((0x31a0 <= character) && (character <= 0x31bf))
673         {
674           return BOPOMOFO;
675         }
676         if((0x3200 <= character) && (character <= 0x32ff))
677         {
678           return CJK;
679         }
680         if((0x3400 <= character) && (character <= 0x4dbf))
681         {
682           return CJK;
683         }
684         if((0x4e00 <= character) && (character <= 0x62ff))
685         {
686           return CJK;
687         }
688         if((0x6300 <= character) && (character <= 0x77ff))
689         {
690           return CJK;
691         }
692         if((0x7800 <= character) && (character <= 0x8cff))
693         {
694           return CJK;
695         }
696         if((0x8d00 <= character) && (character <= 0x9fff))
697         {
698           return CJK;
699         }
700         if((0xa640 <= character) && (character <= 0xa69f))
701         {
702           return CYRILLIC;
703         }
704         if((0xa720 <= character) && (character <= 0xa7ff))
705         {
706           if(character == 0xA720)
707           {
708             return PHONETIC_U;
709           }
710           if(character == 0xA721)
711           {
712             return PHONETIC_U;
713           }
714           if(character == 0xA788)
715           {
716             return NON_LATIN_LED;
717           }
718           if(character == 0xA789)
719           {
720             return NON_LATIN_LED;
721           }
722           if(character == 0xA78A)
723           {
724             return NON_LATIN_LED;
725           }
726
727           return LATIN;
728         }
729         if((0xa960 <= character) && (character <= 0xa97f))
730         {
731           return HANGUL;
732         }
733         if((0xa980 <= character) && (character <= 0xa9fd))
734         {
735           return JAVANESE;
736         }
737         if((0xab00 <= character) && (character <= 0xab2f))
738         {
739           return GEEZ;
740         }
741         if((0xab30 <= character) && (character <= 0xab6f))
742         {
743           return LATIN;
744         }
745         if((0xaae0 <= character) && (character <= 0xaaff))
746         {
747           return MEITEI;
748         }
749         if((0xabc0 <= character) && (character <= 0xabff))
750         {
751           return MEITEI;
752         }
753         if((0xac00 <= character) && (character <= 0xd7af))
754         {
755           return HANGUL;
756         }
757         if((0xd7b0 <= character) && (character <= 0xd7ff))
758         {
759           return HANGUL;
760         }
761         if((0xfb00 <= character) && (character <= 0xfb06))
762         {
763           return LATIN;
764         }
765         if((0xfb13 <= character) && (character <= 0xfb17))
766         {
767           return ARMENIAN;
768         }
769         if((0xfb1d <= character) && (character <= 0xfb4f))
770         {
771           return HEBREW;
772         }
773         if((0xfb50 <= character) && (character <= 0xfdff))
774         {
775           return ARABIC;
776         }
777       }
778       else // > 0xfdff
779       {
780         if((0xfe70 <= character) && (character <= 0xfeff))
781         {
782           return ARABIC;
783         }
784         if((0xff00 <= character) && (character <= 0xffef))
785         {
786           if((0xFF00 <= character) && (character <= 0xFF20))
787           {
788             return HWFW_S;
789           }
790           if((0xFF3B <= character) && (character <= 0xFF40))
791           {
792             return HWFW_S;
793           }
794           if((0xFF5B <= character) && (character <= 0xFFEF))
795           {
796             return HWFW_S;
797           }
798
799           return LATIN;
800         }
801         if((0x1ee00 <= character) && (character <= 0x1eeff))
802         {
803           return ARABIC;
804         }
805         // U+1f170 4. Enclosed characters: negative squared latin capital letter A
806         // U+1f6ff 6b. Additional transport and map symbols
807         if((0x1f170 <= character) && (character <= 0x1f6ff))
808         {
809           return EMOJI;
810         }
811         // 7. Supplemental Symbols and Pictographs
812         if((0x1f900 <= character) && (character <= 0x1f9ff))
813         {
814           return EMOJI;
815         }
816         if((0x20000 <= character) && (character <= 0x215ff))
817         {
818           return CJK;
819         }
820         if((0x21600 <= character) && (character <= 0x230ff))
821         {
822           return CJK;
823         }
824         if((0x23100 <= character) && (character <= 0x245ff))
825         {
826           return CJK;
827         }
828         if((0x24600 <= character) && (character <= 0x260ff))
829         {
830           return CJK;
831         }
832         if((0x26100 <= character) && (character <= 0x275ff))
833         {
834           return CJK;
835         }
836         if((0x27600 <= character) && (character <= 0x290ff))
837         {
838           return CJK;
839         }
840         if((0x29100 <= character) && (character <= 0x2a6df))
841         {
842           return CJK;
843         }
844         if((0x2a700 <= character) && (character <= 0x2b73f))
845         {
846           return CJK;
847         }
848         if((0x2b740 <= character) && (character <= 0x2b81f))
849         {
850           return CJK;
851         }
852       }
853     }
854   }
855
856   return UNKNOWN;
857 }
858
859 bool IsWhiteSpace(Character character)
860 {
861   return character < WHITE_SPACE_THRESHOLD;
862 }
863
864 bool IsNewParagraph(Character character)
865 {
866   return ((CHAR_LF == character) ||
867           (CHAR_VT == character) ||
868           (CHAR_FF == character) ||
869           (CHAR_CR == character) ||
870           (CHAR_NEL == character) ||
871           (CHAR_LS == character) ||
872           (CHAR_PS == character));
873 }
874
875 bool IsZeroWidthNonJoiner(Character character)
876 {
877   return CHAR_ZWNJ == character;
878 }
879
880 bool IsZeroWidthJoiner(Character character)
881 {
882   return CHAR_ZWJ == character;
883 }
884
885 bool IsZeroWidthSpace(Character character)
886 {
887   return CHAR_ZWS == character;
888 }
889
890 bool IsLeftToRightMark(Character character)
891 {
892   return CHAR_LTRM == character;
893 }
894
895 bool IsRightToLeftMark(Character character)
896 {
897   return CHAR_RTLM == character;
898 }
899
900 bool IsThinSpace(Character character)
901 {
902   return CHAR_TS == character;
903 }
904
905 bool IsCommonScript(Character character)
906 {
907   return (IsWhiteSpace(character) ||
908           IsZeroWidthNonJoiner(character) ||
909           IsZeroWidthJoiner(character) ||
910           IsZeroWidthSpace(character) ||
911           IsLeftToRightMark(character) ||
912           IsRightToLeftMark(character) ||
913           IsThinSpace(character) ||
914           IsNewParagraph(character));
915 }
916
917 bool HasLigatureMustBreak(Script script)
918 {
919   return ((LATIN == script) ||
920           (ARABIC == script));
921 }
922
923 } // namespace TextAbstraction
924
925 } // namespace Dali