Scripts added.

author Victor Cebollada <v.cebollada@samsung.com>

Fri, 22 May 2015 14:17:05 +0000 (15:17 +0100)

committer Víctor Cebollada <v.cebollada@samsung.com>

Thu, 28 May 2015 06:32:54 +0000 (23:32 -0700)
author Victor Cebollada <v.cebollada@samsung.com>
Fri, 22 May 2015 14:17:05 +0000 (15:17 +0100)
committer Víctor Cebollada <v.cebollada@samsung.com>
Thu, 28 May 2015 06:32:54 +0000 (23:32 -0700)
diff --git a/text/dali/devel-api/text-abstraction/script.cpp b/text/dali/devel-api/text-abstraction/script.cpp

index 4985049..fe817d6 100644 (file)
--- a/text/dali/devel-api/text-abstraction/script.cpp
+++ b/text/dali/devel-api/text-abstraction/script.cpp
@@ -139,6 +139,24 @@ Script GetCharacterScript( Character character )
    // 0x0591 - 0x05f4 Hebrew
    // 0xfb1d - 0xfb4f Hebrew subset of Alphabetic Presentation Forms
  
+  // Cyrillic script
+  // 0x0400 - 0x04ff Cyrillic
+  // 0x0500 - 0x052f Cyrillic suplement
+  // 0x2de0 - 0x2dff Cyrillic Extended-A
+  // 0xa640 - 0xa69f Cyrillic Extended-B
+
+  // Georgian script
+  // 0x10a0 - 0x10ff Georgian
+  // 0x2d00 - 0x2d2f Georgian suplement
+
+  // Greek script
+  // 0x0370 - 0x03ff Greek & Coptic
+  // 0x1f00 - 0x1fff Greek Extended
+
+  // Armenian script
+  // 0x0530 - 0x058f Armenian
+  // 0xfb13 - 0xfb17 Armenian subset of Alphabetic prefentation forms
+
    // The Emoji which map to standardized Unicode characters
    // 1. Emoticons ( 1F601 - 1F64F )
    // 2. Dingbats ( 2702 - 27B0 )
@@ -167,6 +185,22 @@ Script GetCharacterScript( Character character )
          {
            return LATIN;
          }
+        if( ( 0x0370 <= character ) && ( character <= 0x03ff ) )
+        {
+          return GREEK;
+        }
+        if( ( 0x0400 <= character ) && ( character <= 0x04ff ) )
+        {
+          return CYRILLIC;
+        }
+        if( ( 0x0500 <= character ) && ( character <= 0x052f ) )
+        {
+          return CYRILLIC;
+        }
+        if( ( 0x0530 <= character ) && ( character <= 0x058f ) )
+        {
+          return ARMENIAN;
+        }
          if( ( 0x0591 <= character ) && ( character <= 0x05f4 ) )
          {
            return HEBREW;
@@ -256,6 +290,10 @@ Script GetCharacterScript( Character character )
          {
            return BURMESE;
          }
+        if( ( 0x10a0 <= character ) && ( character <= 0x10ff ) )
+        {
+          return GEORGIAN;
+        }
          if( ( 0x1100 <= character ) && ( character <= 0x11ff ) )
          {
            return HANGUL;
@@ -275,6 +313,10 @@ Script GetCharacterScript( Character character )
        }
        else // > 0x1eff
        {
+        if( ( 0x1f00 <= character ) && ( character <= 0x1fff ) )
+        {
+          return GREEK;
+        }
          if( character == 0x203c )
          {
            return EMOJI; // 5. Uncategorized: double exclamation mark
@@ -319,6 +361,14 @@ Script GetCharacterScript( Character character )
      {
        if( character <= 0xfdff )
        {
+        if( ( 0x2d00 <= character ) && ( character <= 0x2d2f ) )
+        {
+          return GEORGIAN;
+        }
+        if( ( 0x2de0 <= character ) && ( character <= 0x2dff ) )
+        {
+          return CYRILLIC;
+        }
          if( ( 0x2e80 <= character ) && ( character <= 0x2eff ) )
          {
            return CJK;
@@ -367,6 +417,10 @@ Script GetCharacterScript( Character character )
          {
            return CJK;
          }
+        if( ( 0xa640 <= character ) && ( character <= 0xa69f ) )
+        {
+          return CYRILLIC;
+        }
          if( ( 0xa720 <= character ) && ( character <= 0xa7ff ) )
          {
            return LATIN;
@@ -391,6 +445,10 @@ Script GetCharacterScript( Character character )
          {
            return LATIN;
          }
+        if( ( 0xfb13 <= character ) && ( character <= 0xfb17 ) )
+        {
+          return ARMENIAN;
+        }
          if( ( 0xfb1d <= character ) && ( character <= 0xfb4f ) )
          {
            return HEBREW;
diff --git a/text/dali/devel-api/text-abstraction/script.h b/text/dali/devel-api/text-abstraction/script.h

index 3f33208..9341f98 100644 (file)
--- a/text/dali/devel-api/text-abstraction/script.h
+++ b/text/dali/devel-api/text-abstraction/script.h
@@ -36,55 +36,77 @@ namespace TextAbstraction
   */
  enum Script
  {
+  CYRILLIC,   ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ...
+  GREEK,      ///< The Greek script. Used by Greek.
    LATIN,      ///< The latin script. Used by many western languages and others around the world.
+
    ARABIC,     ///< The arabic script. Used by Arab and Urdu among others.
-  DEVANAGARI, ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
+  HEBREW,     ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
+
+  ARMENIAN,   ///< The Armenian script. Used by Armenian.
+  GEORGIAN,   ///< The Georgian script. Used by Georgian.
+
+  CJK,        ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
+  HANGUL,     ///< The Hangul jamo script. Used by Korean.
+  HIRAGANA,   ///< The Hiragana script. Used by the Japanese.
+  KATAKANA,   ///< The Katakana script. Used by the Japanese.
+
    BENGALI,    ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali.
-  GURMUKHI,   ///< The Gurmukhi script. Used by Punjabi.
+  BURMESE,    ///< The Burmese script. Used by the Burmese (Myanmar) language.
+  DEVANAGARI, ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
    GUJARATI,   ///< The Gujarati script. Used by Gujarati.
-  ORIYA,      ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
-  TAMIL,      ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
-  TELUGU,     ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
+  GURMUKHI,   ///< The Gurmukhi script. Used by Punjabi.
    KANNADA,    ///< The Kannada script. Used by Kannada and Tulu.
    MALAYALAM,  ///< The Malayalam script. Used by Malayalam.
+  ORIYA,      ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
    SINHALA,    ///< The Sinhala script. Used by Sinhala and Pali.
-  CJK,        ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
-  HANGUL,     ///< The Hangul jamo script. Used by Korean.
-  KHMER,      ///< The Khmer script. Used by the Khmer language.
+  TAMIL,      ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
+  TELUGU,     ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
+
    LAO,        ///< The Lao script. Used by the Lao language.
    THAI,       ///< The Thai script. Used by the Thai language
-  BURMESE,    ///< The Burmese script. Used by the Burmese (Myanmar) language.
-  HEBREW,     ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
-  HIRAGANA,   ///< The Hiragana script. Used by the Japanese.
-  KATAKANA,   ///< The Katakana script. Used by the Japanese.
+  KHMER,      ///< The Khmer script. Used by the Khmer language.
+
    EMOJI,      ///< The Emoji which map to standardized Unicode characters.
+
    UNKNOWN     ///< The script is unknown.
  };
  
  const char* const ScriptName[] =
  {
+  "CYRILLIC",   ///< The Cyrillic script. Used by Russian, Bulgarian, Ukrainian, Macedonian, ...
+  "GREEK",      ///< The Greek script. Used by Greek.
    "LATIN",      ///< The latin script. Used by many western languages and others around the world.
+
    "ARABIC",     ///< The arabic script. Used by Arab and Urdu among others.
-  "DEVANAGARI", ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
+  "HEBREW",     ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
+
+  "ARMENIAN",   ///< The Armenian script. Used by Armenian.
+  "GEORGIAN",   ///< The Georgian script. Used by Georgian.
+
+  "CJK",        ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
+  "HANGUL",     ///< The Hangul jamo script. Used by Korean.
+  "HIRAGANA",   ///< The Hiragana script. Used by the Japanese.
+  "KATAKANA",   ///< The Katakana script. Used by the Japanese.
+
    "BENGALI",    ///< The Bengali script. Used by Bangla, Assamese, Bishnupriya Manipuri, Daphla, Garo, Hallam, Khasi, Mizo, Munda, Naga, Rian, and Santali.
-  "GURMUKHI",   ///< The Gurmukhi script. Used by Punjabi.
+  "BURMESE",    ///< The Burmese script. Used by the Burmese (Myanmar) language.
+  "DEVANAGARI", ///< The devanagari script. Used by Hindi, Marathi, Sindhi, Nepali and Sanskrit.
    "GUJARATI",   ///< The Gujarati script. Used by Gujarati.
-  "ORIYA",      ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
-  "TAMIL",      ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
-  "TELUGU",     ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
+  "GURMUKHI",   ///< The Gurmukhi script. Used by Punjabi.
    "KANNADA",    ///< The Kannada script. Used by Kannada and Tulu.
    "MALAYALAM",  ///< The Malayalam script. Used by Malayalam.
+  "ORIYA",      ///< The Oriya script. Used by Oriya (Odia), Khondi, and Santali.
    "SINHALA",    ///< The Sinhala script. Used by Sinhala and Pali.
-  "CJK",        ///< The CJK script. Used by Chinese, Japanese, Korean and Vietnamese(old writing system).
-  "HANGUL",     ///< The Hangul jamo script. Used by Korean.
-  "KHMER",      ///< The Khmer script. Used by the Khmer language.
+  "TAMIL",      ///< The Tamil script. Used by Tamil, Badaga, and Saurashtra.
+  "TELUGU",     ///< The Telugu script. Used by Telugu, Gondi, and Lambadi.
+
    "LAO",        ///< The Lao script. Used by the Lao language.
    "THAI",       ///< The Thai script. Used by the Thai language
-  "BURMESE",    ///< The Burmese script. Used by the Burmese (Myanmar) language.
-  "HEBREW",     ///< The Hebrew script. Used by the Hebrew, Yiddish, Ladino, and Judeo-Arabic.
-  "HIRAGANA",   ///< The Hiragana script. Used by the Japanese.
-  "KATAKANA",   ///< The Katakana script. Used by the Japanese.
+  "KHMER",      ///< The Khmer script. Used by the Khmer language.
+
    "EMOJI",      ///< The Emoji which map to standardized Unicode characters.
+
    "UNKNOWN"     ///< The script is unknown.
  };
  
diff --git a/text/dali/internal/text-abstraction/shaping-impl.cpp b/text/dali/internal/text-abstraction/shaping-impl.cpp

index 6f7d5c9..fd6e8dc 100644 (file)
--- a/text/dali/internal/text-abstraction/shaping-impl.cpp
+++ b/text/dali/internal/text-abstraction/shaping-impl.cpp
@@ -45,24 +45,38 @@ const float        FROM_266 = 1.0f / 64.0f;
  
  const hb_script_t SCRIPT_TO_HARFBUZZ[] =
  {
+  HB_SCRIPT_CYRILLIC,
+  HB_SCRIPT_GREEK,
    HB_SCRIPT_LATIN,
+
    HB_SCRIPT_ARABIC,
-  HB_SCRIPT_DEVANAGARI,
+  HB_SCRIPT_HEBREW,
+
+  HB_SCRIPT_ARMENIAN,
+  HB_SCRIPT_GEORGIAN,
+
+  HB_SCRIPT_HAN,
+  HB_SCRIPT_HANGUL,
+  HB_SCRIPT_HIRAGANA,
+  HB_SCRIPT_KATAKANA,
+
    HB_SCRIPT_BENGALI,
-  HB_SCRIPT_GURMUKHI,
+  HB_SCRIPT_MYANMAR,
+  HB_SCRIPT_DEVANAGARI,
    HB_SCRIPT_GUJARATI,
-  HB_SCRIPT_ORIYA,
-  HB_SCRIPT_TAMIL,
-  HB_SCRIPT_TELUGU,
+  HB_SCRIPT_GURMUKHI,
    HB_SCRIPT_KANNADA,
    HB_SCRIPT_MALAYALAM,
+  HB_SCRIPT_ORIYA,
    HB_SCRIPT_SINHALA,
-  HB_SCRIPT_HAN,
-  HB_SCRIPT_HANGUL,
-  HB_SCRIPT_KHMER,
+  HB_SCRIPT_TAMIL,
+  HB_SCRIPT_TELUGU,
+
    HB_SCRIPT_LAO,
    HB_SCRIPT_THAI,
-  HB_SCRIPT_MYANMAR,
+  HB_SCRIPT_KHMER,
+
+  HB_SCRIPT_UNKNOWN, // EMOJI
    HB_SCRIPT_UNKNOWN
  };
author	Victor Cebollada <v.cebollada@samsung.com>
	Fri, 22 May 2015 14:17:05 +0000 (15:17 +0100)
committer	Víctor Cebollada <v.cebollada@samsung.com>
	Thu, 28 May 2015 06:32:54 +0000 (23:32 -0700)
text/dali/devel-api/text-abstraction/script.cpp		patch \| blob \| history
text/dali/devel-api/text-abstraction/script.h		patch \| blob \| history
text/dali/internal/text-abstraction/shaping-impl.cpp		patch \| blob \| history