Fix Unicode Byte Order Mark was misinterpreted as arabic character. 01/318501/3
authorANZ1217 <chihun.jeong@samsung.com>
Fri, 17 Jan 2025 08:59:14 +0000 (17:59 +0900)
committerANZ1217 <chihun.jeong@samsung.com>
Fri, 17 Jan 2025 10:37:30 +0000 (19:37 +0900)
/ufeff is Unicode BOM character.

Change-Id: I0cdcb738c7cefe259125bf481fc4cbb10baf8ec3

dali/devel-api/text-abstraction/script.cpp

index bac6f8df9d08561b53d08a2bb7cd83e33faa11d6..1a7c7a3a871a5267f305b6391b8c61d3a5edf927 100644 (file)
@@ -40,6 +40,7 @@ constexpr unsigned int CHAR_ZWJ  = 0x200D; ///< Zero width joiner.
 constexpr unsigned int CHAR_LTRM = 0x200E; ///< Left to Right Mark.
 constexpr unsigned int CHAR_RTLM = 0x200F; ///< Right to Left Mark.
 constexpr unsigned int CHAR_TS   = 0x2009; ///< Thin Space.
+constexpr unsigned int CHAR_BOM  = 0xFEFF; ///< Byte Order Mark.
 
 // Latin script:   It contains punctuation characters and symbols which are not part of the latin script. https://en.wikipedia.org/wiki/Latin_script_in_Unicode
 // 0x0000 - 0x007f C0 Controls and Basic Latin
@@ -142,7 +143,7 @@ constexpr unsigned int CHAR_TS   = 0x2009; ///< Thin Space.
 // 0x0750 - 0x077f Arabic Supplement
 // 0x08A0 - 0x08ff Arabic Extended-A
 // 0xfb50 - 0xfdff Arabic Presentation Forms-A
-// 0xfe70 - 0xfeff Arabic Presentation Forms-B
+// 0xfe70 - 0xfefe Arabic Presentation Forms-B
 // 0x1ee00 - 0x1eeff Arabic Mathematical Alphabetic Symbols
 
 // CJK (Chinese, Japanese and Korean) and Vietnamese script.
@@ -1040,6 +1041,11 @@ bool IsThinSpace(Character character)
   return CHAR_TS == character;
 }
 
+bool IsByteOrderMark(Character character)
+{
+  return CHAR_BOM == character;
+}
+
 bool IsCommonScript(Character character)
 {
   return (IsWhiteSpace(character) ||
@@ -1049,7 +1055,8 @@ bool IsCommonScript(Character character)
           IsLeftToRightMark(character) ||
           IsRightToLeftMark(character) ||
           IsThinSpace(character) ||
-          IsNewParagraph(character));
+          IsNewParagraph(character)) ||
+          IsByteOrderMark(character);
 }
 
 bool HasLigatureMustBreak(Script script)