Supported XHTML entitites(Named & Numeric(hex/decimal)) in Markup Language. 04/140504/14
authorsaritarawat <sarita.rawat@samsung.com>
Tue, 25 Jul 2017 07:23:48 +0000 (12:53 +0530)
committerHeeyong Song <heeyong.song@samsung.com>
Wed, 11 Oct 2017 04:49:42 +0000 (13:49 +0900)
Added additional functionalities in ProcessMarkupString function to support XHTML entities.
New function has been added to get UTF 8 text from Named & Numeric entities

Change-Id: Ie47cf9274cd56e0f9c8d2448f0e7e812176ed1ed

automated-tests/src/dali-toolkit-internal/utc-Dali-Text-Markup.cpp [changed mode: 0644->0755]
dali-toolkit/internal/file.list [changed mode: 0644->0755]
dali-toolkit/internal/text/markup-processor.cpp [changed mode: 0644->0755]
dali-toolkit/internal/text/xhtml-entities.cpp [new file with mode: 0755]
dali-toolkit/internal/text/xhtml-entities.h [new file with mode: 0755]
docs/content/images/text-controls/SpecialCharacter1.png [new file with mode: 0755]
docs/content/images/text-controls/SpecialCharacters.png [new file with mode: 0755]
docs/content/images/text-controls/XHTML_entity.png [new file with mode: 0755]
docs/content/shared-javascript-and-cpp-documentation/markup-style.md [changed mode: 0644->0755]

old mode 100644 (file)
new mode 100755 (executable)
index 87db667..7007170
@@ -166,6 +166,34 @@ namespace
     return true;
   }
 
+  ///////////////////////////////////////////////////////////
+
+
+  struct XHTMLEntityToUTF8Data
+  {
+    std::string description;
+    std::string xHTMLEntityString;
+    std::string expectedString;
+  };
+
+  bool XHTMLEntityToUTF8Test( const XHTMLEntityToUTF8Data& data )
+  {
+    std::cout << "  testing " << data.description << std::endl;
+
+    Vector<ColorRun> colorRuns;
+    Vector<FontDescriptionRun> fontRuns;
+    MarkupProcessData markupProcessData( colorRuns, fontRuns );
+    ProcessMarkupString( data.xHTMLEntityString, markupProcessData );
+
+    if( markupProcessData.markupProcessedText != data.expectedString )
+    {
+      std::cout << "  different output string : " << markupProcessData.markupProcessedText << ", expected : " << data.expectedString << " " << std::endl;
+      return false;
+    }
+
+    return true;
+  }
+
 } // namespace
 
 int UtcDaliTextTokenComparison(void)
@@ -451,3 +479,54 @@ int UtcDaliTextVector2ToString(void)
   tet_result(TET_PASS);
   END_TEST;
 }
+
+int UtcDaliTextXHTMLEntityToUTF8(void)
+{
+  tet_infoline(" UtcDaliTextXHTMLEntityToUTF8");
+  const XHTMLEntityToUTF8Data data[] =
+  {
+    {
+      "Text Without XHTML Entity",
+      "Checking XHTML Entitities",
+      "Checking XHTML Entitities"
+    },
+    {
+      "Text With XHTML Entity in Numeric form",
+      "Checking Numeric Entitities &#x26; &#x27; &#x3C; &#x3E; &#xA1; &#xA2; &#xA3; &#xA4; &#xA5; &#xA6; &#xA7; &#xA8; &#xA9; &#xAA; &#xAB; &#xAC; &#xAD; &#xAE; &#xAF; &#xB0; &#xB1; &#xB2; &#xB3; &#xB4; &#xB5; &#xB6; &#xB7; &#xB8; &#xB9; &#xBA; &#xBB; &#xBC; &#xBD; &#xBE; &#xBF; &#xC0; &#xC1; &#xC2; &#xC3; &#xC4; &#xC5; &#xE6; &#xC7; &#xC8; &#xC9; &#xCA; &#xCB; &#xCC; &#xCD; &#xCE; &#xCF; &#xF0; &#xD1; &#xD2; &#xD3; &#xD4; &#xD5; &#xD6; &#xD7; &#xD8; &#xD9; &#xDA; &#xDB; &#xDD; &#xFE; &#xDF; &#xE0; &#xE1; &#xE2; &#xE3; &#xE4; &#xE5; &#xE6; &#xE7; &#xE8; &#xE9; &#xEA; &#xEB; &#xEC; &#xED; &#xEE; &#xEF; &#xF0; &#xF1; &#xF2; &#xF3; &#xF4; &#xF5; &#xF6; &#xF7; &#xF8; &#xF9; &#xFA; &#xFB; &#xFC; &#xFD; &#xFE; &#xFF; &#x3B1; &#x3B2; &#x3B3; &#x3B4; &#x3B5; &#x3B6; &#x3B7; &#x3B8; &#x3B9; &#x3BA; &#x3BB; &#x3BC; &#x3BD; &#x3BE; &#x3BF; &#x3C0; &#x3C1; &#x3C3; &#x3C4; &#x3C5; &#x3C6; &#x3C7; &#x3C8; &#x3C9; &#x2026; &#x20AC; &#x2190; &#x2191; &#x2192; &#x2193; &#x2194; &#x2190; &#x2192; &#x2200; &#x2203; &#x2207; &#x220F; &#x2211; &#x2227; &#x2228; &#x222B; &#x2260; &#x2261; &#x2295; &#x22A5; &#x2020; &#x2021; &#x2022; ",
+      "Checking Numeric Entitities & ' < > ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ­ ® ¯ ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿ À Á Â Ã Ä Å æ Ç È É Ê Ë Ì Í Î Ï ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ý þ ß à á â ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ σ τ υ φ χ ψ ω … € ← ↑ → ↓ ↔ ← → ∀ ∃ ∇ ∏ ∑ ∧ ∨ ∫ ≠ ≡ ⊕ ⊥ † ‡ • "
+    },
+    {
+      "Text With XHTML Named Entities",
+      "Checking Named Entitities &amp; &apos; &lt; &gt; &iexcl; &cent; &pound; &curren; &yen; &brvbar; &sect; &uml; &copy; &ordf; &laquo; &not; &shy; &reg; &macr; &deg; &plusmn; &sup2; &sup3; &acute; &micro; &para; &middot; &cedil; &sup1; &ordm; &raquo; &frac14; &frac12; &frac34; &iquest; &Agrave; &Aacute; &Acirc; &Atilde; &Auml; &Aring; &aelig; &Ccedil; &Egrave; &Eacute; &Ecirc; &Euml; &Igrave; &Iacute; &Icirc; &Iuml; &eth; &Ntilde; &Ograve; &Oacute; &Ocirc; &Otilde; &Ouml; &times; &Oslash; &Ugrave; &Uacute; &Ucirc; &Yacute; &thorn; &szlig; &agrave; &aacute; &acirc; &atilde; &auml; &aring; &aelig; &ccedil; &egrave; &eacute; &ecirc; &euml; &igrave; &iacute; &icirc; &iuml; &eth; &ntilde; &ograve; &oacute; &ocirc; &otilde; &ouml; &divide; &oslash; &ugrave; &uacute; &ucirc; &uuml; &yacute; &thorn; &yuml; &alpha; &beta; &gamma; &delta; &epsilon; &zeta; &eta; &theta; &iota; &kappa; &lambda; &mu; &nu; &xi; &omicron; &pi; &rho; &sigma; &tau; &upsilon; &phi; &chi; &psi; &omega; &hellip; &euro; &larr; &uarr; &rarr; &darr; &harr; &larr; &rarr; &forall; &exist; &nabla; &prod; &sum; &and; &or; &int; &ne; &equiv; &oplus; &perp; &dagger; &Dagger; &bull; ",
+      "Checking Named Entitities & ' < > ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ­ ® ¯ ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿ À Á Â Ã Ä Å æ Ç È É Ê Ë Ì Í Î Ï ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ý þ ß à á â ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ σ τ υ φ χ ψ ω … € ← ↑ → ↓ ↔ ← → ∀ ∃ ∇ ∏ ∑ ∧ ∨ ∫ ≠ ≡ ⊕ ⊥ † ‡ • "
+    },
+    {
+      "Testing of < special character",
+      "Testing of < special character",
+      "Testing of "
+    },
+    {
+      "Testing of & special character",
+      "Testing of & special character",
+      "Testing of "
+    },
+    {
+      "Testing of & < > special character",
+      "Testing of \\& \\< \\> special character",
+      "Testing of & < > special character"
+    }
+  };
+  const unsigned int numberOfTests = 6u;
+
+  for( unsigned int index = 0u; index < numberOfTests; ++index )
+  {
+    ToolkitTestApplication application;
+    if( !XHTMLEntityToUTF8Test( data[index] ) )
+    {
+      tet_result(TET_FAIL);
+    }
+  }
+
+  tet_result(TET_PASS);
+  END_TEST;
+}
old mode 100644 (file)
new mode 100755 (executable)
index 6622d01..3fa035e
@@ -153,4 +153,5 @@ toolkit_src_files = \
    $(toolkit_src_dir)/transition-effects/cube-transition-fold-effect-impl.cpp \
    $(toolkit_src_dir)/transition-effects/cube-transition-wave-effect-impl.cpp \
    $(toolkit_src_dir)/scripting/script-impl.cpp \
-   $(toolkit_src_dir)/scripting/script-plugin-proxy.cpp
+   $(toolkit_src_dir)/scripting/script-plugin-proxy.cpp \
+   $(toolkit_src_dir)/text/xhtml-entities.cpp
old mode 100644 (file)
new mode 100755 (executable)
index 6ea6352..5bbe5ad
 // FILE HEADER
 #include <dali-toolkit/internal/text/markup-processor.h>
 
+// EXTERNAL INCLUDES
+#include <climits>  // for ULONG_MAX
+#include <dali/integration-api/debug.h>
+
 // INTERNAL INCLUDES
 #include <dali-toolkit/internal/text/character-set-conversion.h>
 #include <dali-toolkit/internal/text/markup-processor-color.h>
 #include <dali-toolkit/internal/text/markup-processor-font.h>
 #include <dali-toolkit/internal/text/markup-processor-helper-functions.h>
+#include <dali-toolkit/internal/text/xhtml-entities.h>
+
+
 
 namespace Dali
 {
@@ -53,12 +60,26 @@ const char EQUAL          = '=';
 const char QUOTATION_MARK = '\'';
 const char SLASH          = '/';
 const char BACK_SLASH     = '\\';
+const char AMPERSAND      = '&';
+const char HASH           = '#';
+const char SEMI_COLON     = ';';
+const char CHAR_ARRAY_END = '\0';
+const char HEX_CODE       = 'x';
 
 const char WHITE_SPACE    = 0x20; // ASCII value of the white space.
 
+// Range 1 0x0u < XHTML_DECIMAL_ENTITY_RANGE <= 0xD7FFu
+// Range 2 0xE000u < XHTML_DECIMAL_ENTITY_RANGE <= 0xFFFDu
+// Range 3 0x10000u < XHTML_DECIMAL_ENTITY_RANGE <= 0x10FFFFu
+const unsigned long XHTML_DECIMAL_ENTITY_RANGE[] = { 0x0u, 0xD7FFu, 0xE000u, 0xFFFDu, 0x10000u, 0x10FFFFu };
+
 const unsigned int MAX_NUM_OF_ATTRIBUTES =  5u; ///< The font tag has the 'family', 'size' 'weight', 'width' and 'slant' attrubutes.
 const unsigned int DEFAULT_VECTOR_SIZE   = 16u; ///< Default size of run vectors.
 
+#if defined(DEBUG_ENABLED)
+Debug::Filter* gLogFilter = Debug::Filter::New(Debug::NoLogging, true, "LOG_MARKUP_PROCESSOR");
+#endif
+
 /**
  * @brief Struct used to retrieve the style runs from the mark-up string.
  */
@@ -354,6 +375,88 @@ bool IsTag( const char*& markupStringBuffer,
   return isTag;
 }
 
+/**
+ * @brief Returns length of XHTML entity by parsing the text. It also determines if it is XHTML entity or not.
+ *
+ * @param[in] markupStringBuffer The mark-up string buffer. It's a const iterator pointing the current character.
+ * @param[in] markupStringEndBuffer Pointing to end of mark-up string buffer.
+ *
+ * @return Length of markupText in case of XHTML entity otherwise return 0.
+ */
+unsigned int GetXHTMLEntityLength( const char*& markupStringBuffer,
+                                   const char* const markupStringEndBuffer )
+{
+  char character = *markupStringBuffer;
+  if( AMPERSAND == character ) // '&'
+  {
+    // if the iterator is pointing to a '&' character, then check for ';' to find end to XHTML entity.
+    ++markupStringBuffer;
+    if( markupStringBuffer < markupStringEndBuffer )
+    {
+      unsigned int len = 1u;
+      for( ; markupStringBuffer < markupStringEndBuffer ; ++markupStringBuffer )
+      {
+        character = *markupStringBuffer;
+        ++len;
+        if( SEMI_COLON == character ) // ';'
+        {
+          // found end of XHTML entity
+          ++markupStringBuffer;
+          return len;
+        }
+        else if( ( AMPERSAND == character ) || ( BACK_SLASH == character ) || ( LESS_THAN == character ))
+        {
+          return 0;
+        }
+      }
+    }
+  }
+  return 0;
+}
+
+/**
+ * @brief It parses a XHTML string which has hex/decimal entity and fill its corresponging utf-8 string.
+ *
+ * @param[in] markupText The mark-up text buffer.
+ * @param[out] utf-8 text Corresponding to markup Text
+ *
+ * @return true if string is successfully parsed otherwise false
+ */
+bool XHTMLNumericEntityToUtf8 ( const char* markupText, char* utf8 )
+{
+  bool result = false;
+
+  if( NULL != markupText )
+  {
+    bool isHex = false;
+
+    // check if hex or decimal entity
+    if( ( CHAR_ARRAY_END != *markupText ) && ( HEX_CODE == *markupText ) )
+    {
+      isHex = true;
+      ++markupText;
+    }
+
+    char* end = NULL;
+    unsigned long l = strtoul( markupText, &end, ( isHex ? 16 : 10 ) );  // l contains UTF-32 code in case of correct XHTML entity
+
+    // check for valid XHTML numeric entities (between '#' or "#x" and ';')
+    if( ( l > 0 ) && ( l < ULONG_MAX ) && ( *end == SEMI_COLON ) ) // in case wrong XHTML entity is set eg. "&#23abcdefs;" in that case *end will be 'a'
+    {
+      /* characters XML 1.1 permits */
+      if( ( ( XHTML_DECIMAL_ENTITY_RANGE[0] < l ) && ( l <= XHTML_DECIMAL_ENTITY_RANGE[1] ) ) ||
+        ( ( XHTML_DECIMAL_ENTITY_RANGE[2] <= l ) && ( l <= XHTML_DECIMAL_ENTITY_RANGE[3] ) ) ||
+        ( ( XHTML_DECIMAL_ENTITY_RANGE[4] <= l ) && ( l <= XHTML_DECIMAL_ENTITY_RANGE[5] ) ) )
+      {
+        // Convert UTF32 code to UTF8
+        Utf32ToUtf8( reinterpret_cast<const uint32_t* const>( &l ), 1, reinterpret_cast<uint8_t*>( utf8 ) );
+        result = true;
+       }
+    }
+  }
+  return result;
+}
+
 } // namespace
 
 void ProcessMarkupString( const std::string& markupString, MarkupProcessData& markupProcessData )
@@ -547,32 +650,84 @@ void ProcessMarkupString( const std::string& markupString, MarkupProcessData& ma
         }
       } // <outline></outline>
     }  // end if( IsTag() )
-    else
+    else if( markupStringBuffer < markupStringEndBuffer )
     {
       unsigned char character = *markupStringBuffer;
+      const char* markupBuffer = markupStringBuffer;
+      unsigned char count = GetUtf8Length( character );
+      char utf8[8];
 
       if( ( BACK_SLASH == character ) && ( markupStringBuffer + 1u < markupStringEndBuffer ) )
       {
-        // Adding < or > special character.
+        // Adding < , >  or & special character.
         const unsigned char nextCharacter = *( markupStringBuffer + 1u );
-        if( ( LESS_THAN == nextCharacter ) || ( GREATER_THAN == nextCharacter ) )
+        if( ( LESS_THAN == nextCharacter ) || ( GREATER_THAN == nextCharacter ) || ( AMPERSAND == nextCharacter ) )
         {
           character = nextCharacter;
           ++markupStringBuffer;
+
+          count = GetUtf8Length( character );
+          markupBuffer = markupStringBuffer;
         }
       }
+      else   // checking if conatins XHTML entity or not
+      {
+        const unsigned int len =  GetXHTMLEntityLength( markupStringBuffer, markupStringEndBuffer);
 
-      const unsigned char numberOfBytes = GetUtf8Length( character );
+        // Parse markupStringTxt if it contains XHTML Entity between '&' and ';'
+        if( len > 0 )
+        {
+          char* entityCode = NULL;
+          bool result = false;
+          count = 0;
 
-      markupProcessData.markupProcessedText.push_back( character );
-      for( unsigned char i = 1u; i < numberOfBytes; ++i )
-      {
-        ++markupStringBuffer;
-        markupProcessData.markupProcessedText.push_back( *markupStringBuffer );
+          // Checking if XHTML Numeric Entity
+          if( HASH == *( markupBuffer + 1u ) )
+          {
+            entityCode = &utf8[0];
+            // markupBuffer is currently pointing to '&'. By adding 2u to markupBuffer it will point to numeric string by skipping "&#'
+            result = XHTMLNumericEntityToUtf8( ( markupBuffer + 2u ), entityCode );
+          }
+          else    // Checking if XHTML Named Entity
+          {
+            entityCode = const_cast<char*> ( NamedEntityToUtf8( markupBuffer, len ) );
+            result = ( entityCode != NULL );
+          }
+          if ( result )
+          {
+            markupBuffer = entityCode; //utf8 text assigned to markupBuffer
+            character = markupBuffer[0];
+          }
+          else
+          {
+            DALI_LOG_INFO( gLogFilter, Debug::Verbose, "Not valid XHTML entity : (%.*s) \n", len, markupBuffer );
+            markupBuffer = NULL;
+          }
+        }
+        else    // in case string conatins Start of XHTML Entity('&') but not its end character(';')
+        {
+          if( character == AMPERSAND )
+          {
+            markupBuffer = NULL;
+            DALI_LOG_INFO( gLogFilter, Debug::Verbose, "Not Well formed XHTML content \n" );
+          }
+        }
       }
 
-      ++characterIndex;
-      ++markupStringBuffer;
+      if( markupBuffer != NULL )
+      {
+        const unsigned char numberOfBytes = GetUtf8Length( character );
+        markupProcessData.markupProcessedText.push_back( character );
+
+        for( unsigned char i = 1u; i < numberOfBytes; ++i )
+        {
+          ++markupBuffer;
+          markupProcessData.markupProcessedText.push_back( *markupBuffer );
+        }
+
+        ++characterIndex;
+        markupStringBuffer += count;
+      }
     }
   }
 
diff --git a/dali-toolkit/internal/text/xhtml-entities.cpp b/dali-toolkit/internal/text/xhtml-entities.cpp
new file mode 100755 (executable)
index 0000000..3ee7215
--- /dev/null
@@ -0,0 +1,331 @@
+/*
+ * Copyright (c) 2017 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+// EXTERNAL INCLUDES
+#include <cstring> // for strlen()
+
+// FILE HEADER
+#include "xhtml-entities.h"
+
+namespace Dali
+{
+
+namespace Toolkit
+{
+
+namespace Text
+{
+
+namespace
+{
+/**
+ * Implementation of the XHTML Entity matching
+ */
+struct XHTMLEntityLookup
+{
+    const char* const entityName;  // XHTML Named Entity string
+    const char* const entityCode;  // Corresponding UTF-8
+};
+
+/* table of html name entities supported in DALi
+ *
+ * these are stored as pair with Named entity as Key and
+ * its utf 8 as value
+ */
+const XHTMLEntityLookup XHTMLEntityLookupTable[] =
+  {
+  { "&quot;\0"    ,"\x22\0" },
+  { "&amp;\0"     ,"\x26\0" },
+  { "&apos;\0"    ,"\x27\0" },
+  { "&lt;\0"      ,"\x3c\0" },
+  { "&gt;\0"      ,"\x3e\0" },
+  { "&nbsp;\0"    ,"\xc2\xa0\0" },
+  { "&iexcl;\0"   ,"\xc2\xa1\0" },
+  { "&cent;\0"    ,"\xc2\xa2\0" },
+  { "&pound;\0"   ,"\xc2\xa3\0" },
+  { "&curren;\0"  ,"\xc2\xa4\0" },
+  { "&yen;\0"     ,"\xc2\xa5\0" },
+  { "&brvbar;\0"  ,"\xc2\xa6\0" },
+  { "&sect;\0"    ,"\xc2\xa7\0" },
+  { "&uml;\0"     ,"\xc2\xa8\0" },
+  { "&copy;\0"    ,"\xc2\xa9\0" },
+  { "&ordf;\0"    ,"\xc2\xaa\0" },
+  { "&laquo;\0"   ,"\xc2\xab\0" },
+  { "&not;\0"     ,"\xc2\xac\0" },
+  { "&shy;\0"     ,"\xc2\xad\0" },
+  { "&reg;\0"     ,"\xc2\xae\0" },
+  { "&macr;\0"    ,"\xc2\xaf\0" },
+  { "&deg;\0"     ,"\xc2\xb0\0" },
+  { "&plusmn;\0"  ,"\xc2\xb1\0" },
+  { "&sup2;\0"    ,"\xc2\xb2\0" },
+  { "&sup3;\0"    ,"\xc2\xb3\0" },
+  { "&acute;\0"   ,"\xc2\xb4\0" },
+  { "&micro;\0"   ,"\xc2\xb5\0" },
+  { "&para;\0"    ,"\xc2\xb6\0" },
+  { "&middot;\0"  ,"\xc2\xb7\0" },
+  { "&cedil;\0"   ,"\xc2\xb8\0" },
+  { "&sup1;\0"    ,"\xc2\xb9\0" },
+  { "&ordm;\0"    ,"\xc2\xba\0" },
+  { "&raquo;\0"   ,"\xc2\xbb\0" },
+  { "&frac14;\0"  ,"\xc2\xbc\0" },
+  { "&frac12;\0"  ,"\xc2\xbd\0" },
+  { "&frac34;\0"  ,"\xc2\xbe\0" },
+  { "&iquest;\0"  ,"\xc2\xbf\0" },
+  { "&Agrave;\0"  ,"\xc3\x80\0" },
+  { "&Aacute;\0"  ,"\xc3\x81\0" },
+  { "&Acirc;\0"   ,"\xc3\x82\0" },
+  { "&Atilde;\0"  ,"\xc3\x83\0" },
+  { "&Auml;\0"    ,"\xc3\x84\0" },
+  { "&Aring;\0"   ,"\xc3\x85\0" },
+  { "&AElig;\0"   ,"\xc3\x86\0" },
+  { "&Ccedil;\0"  ,"\xc3\x87\0" },
+  { "&Egrave;\0"  ,"\xc3\x88\0" },
+  { "&Eacute;\0"  ,"\xc3\x89\0" },
+  { "&Ecirc;\0"   ,"\xc3\x8a\0" },
+  { "&Euml;\0"    ,"\xc3\x8b\0" },
+  { "&Igrave;\0"  ,"\xc3\x8c\0" },
+  { "&Iacute;\0"  ,"\xc3\x8d\0" },
+  { "&Icirc;\0"   ,"\xc3\x8e\0" },
+  { "&Iuml;\0"    ,"\xc3\x8f\0" },
+  { "&ETH;\0"     ,"\xc3\x90\0" },
+  { "&Ntilde;\0"  ,"\xc3\x91\0" },
+  { "&Ograve;\0"  ,"\xc3\x92\0" },
+  { "&Oacute;\0"  ,"\xc3\x93\0" },
+  { "&Ocirc;\0"   ,"\xc3\x94\0" },
+  { "&Otilde;\0"  ,"\xc3\x95\0" },
+  { "&Ouml;\0"    ,"\xc3\x96\0" },
+  { "&times;\0"   ,"\xc3\x97\0" },
+  { "&Oslash;\0"  ,"\xc3\x98\0" },
+  { "&Ugrave;\0"  ,"\xc3\x99\0" },
+  { "&Uacute;\0"  ,"\xc3\x9a\0" },
+  { "&Ucirc;\0"   ,"\xc3\x9b\0" },
+  { "&Uuml;\0"    ,"\xc3\x9c\0" },
+  { "&Yacute;\0"  ,"\xc3\x9d\0" },
+  { "&THORN;\0"   ,"\xc3\x9e\0" },
+  { "&szlig;\0"   ,"\xc3\x9f\0" },
+  { "&agrave;\0"  ,"\xc3\xa0\0" },
+  { "&aacute;\0"  ,"\xc3\xa1\0" },
+  { "&acirc;\0"   ,"\xc3\xa2\0" },
+  { "&atilde;\0"  ,"\xc3\xa3\0" },
+  { "&auml;\0"    ,"\xc3\xa4\0" },
+  { "&aring;\0"   ,"\xc3\xa5\0" },
+  { "&aelig;\0"   ,"\xc3\xa6\0" },
+  { "&ccedil;\0"  ,"\xc3\xa7\0" },
+  { "&egrave;\0"  ,"\xc3\xa8\0" },
+  { "&eacute;\0"  ,"\xc3\xa9\0" },
+  { "&ecirc;\0"   ,"\xc3\xaa\0" },
+  { "&euml;\0"    ,"\xc3\xab\0" },
+  { "&igrave;\0"  ,"\xc3\xac\0" },
+  { "&iacute;\0"  ,"\xc3\xad\0" },
+  { "&icirc;\0"   ,"\xc3\xae\0" },
+  { "&iuml;\0"    ,"\xc3\xaf\0" },
+  { "&eth;\0"     ,"\xc3\xb0\0" },
+  { "&ntilde;\0"  ,"\xc3\xb1\0" },
+  { "&ograve;\0"  ,"\xc3\xb2\0" },
+  { "&oacute;\0"  ,"\xc3\xb3\0" },
+  { "&ocirc;\0"   ,"\xc3\xb4\0" },
+  { "&otilde;\0"  ,"\xc3\xb5\0" },
+  { "&ouml;\0"    ,"\xc3\xb6\0" },
+  { "&divide;\0"  ,"\xc3\xb7\0" },
+  { "&oslash;\0"  ,"\xc3\xb8\0" },
+  { "&ugrave;\0"  ,"\xc3\xb9\0" },
+  { "&uacute;\0"  ,"\xc3\xba\0" },
+  { "&ucirc;\0"   ,"\xc3\xbb\0" },
+  { "&uuml;\0"    ,"\xc3\xbc\0" },
+  { "&yacute;\0"  ,"\xc3\xbd\0" },
+  { "&thorn;\0"   ,"\xc3\xbe\0" },
+  { "&yuml;\0"    ,"\xc3\xbf\0" },
+  { "&OElig;\0"   ,"\xc5\x92\0" },
+  { "&oelig;\0"   ,"\xc5\x93\0" },
+  { "&Scaron;\0"  ,"\xc5\xa0\0" },
+  { "&scaron;\0"  ,"\xc5\xa1\0" },
+  { "&Yuml;\0"    ,"\xc5\xb8\0" },
+  { "&fnof;\0"    ,"\xc6\x92\0" },
+  { "&circ;\0"    ,"\xcb\x86\0" },
+  { "&tilde;\0"   ,"\xcb\x9c\0" },
+  { "&Alpha;\0"   ,"\xce\x91\0" },
+  { "&Beta;\0"    ,"\xce\x92\0" },
+  { "&Gamma;\0"   ,"\xce\x93\0" },
+  { "&Delta;\0"   ,"\xce\x94\0" },
+  { "&Epsilon;\0" ,"\xce\x95\0" },
+  { "&Zeta;\0"    ,"\xce\x96\0" },
+  { "&Eta;\0"     ,"\xce\x97\0" },
+  { "&Theta;\0"   ,"\xce\x98\0" },
+  { "&Iota;\0"    ,"\xce\x99\0" },
+  { "&Kappa;\0"   ,"\xce\x9a\0" },
+  { "&Lambda;\0"  ,"\xce\x9b\0" },
+  { "&Mu;\0"      ,"\xce\x9c\0" },
+  { "&Nu;\0"      ,"\xce\x9d\0" },
+  { "&Xi;\0"      ,"\xce\x9e\0" },
+  { "&Omicron;\0" ,"\xce\x9f\0" },
+  { "&Pi;\0"      ,"\xce\xa0\0" },
+  { "&Rho;\0"     ,"\xce\xa1\0" },
+  { "&Sigma;\0"   ,"\xce\xa3\0" },
+  { "&Tau;\0"     ,"\xce\xa4\0" },
+  { "&Upsilon;\0" ,"\xce\xa5\0" },
+  { "&Phi;\0"     ,"\xce\xa6\0" },
+  { "&Chi;\0"     ,"\xce\xa7\0" },
+  { "&Psi;\0"     ,"\xce\xa8\0" },
+  { "&Omega;\0"   ,"\xce\xa9\0" },
+  { "&alpha;\0"   ,"\xce\xb1\0" },
+  { "&beta;\0"    ,"\xce\xb2\0" },
+  { "&gamma;\0"   ,"\xce\xb3\0" },
+  { "&delta;\0"   ,"\xce\xb4\0" },
+  { "&epsilon;\0" ,"\xce\xb5\0" },
+  { "&zeta;\0"    ,"\xce\xb6\0" },
+  { "&eta;\0"     ,"\xce\xb7\0" },
+  { "&theta;\0"   ,"\xce\xb8\0" },
+  { "&iota;\0"    ,"\xce\xb9\0" },
+  { "&kappa;\0"   ,"\xce\xba\0" },
+  { "&lambda;\0"  ,"\xce\xbb\0" },
+  { "&mu;\0"      ,"\xce\xbc\0" },
+  { "&nu;\0"      ,"\xce\xbd\0" },
+  { "&xi;\0"      ,"\xce\xbe\0" },
+  { "&omicron;\0" ,"\xce\xbf\0" },
+  { "&pi;\0"      ,"\xcf\x80\0" },
+  { "&rho;\0"     ,"\xcf\x81\0" },
+  { "&sigmaf;\0"  ,"\xcf\x82\0" },
+  { "&sigma;\0"   ,"\xcf\x83\0" },
+  { "&tau;\0"     ,"\xcf\x84\0" },
+  { "&upsilon;\0" ,"\xcf\x85\0" },
+  { "&phi;\0"     ,"\xcf\x86\0" },
+  { "&chi;\0"     ,"\xcf\x87\0" },
+  { "&psi;\0"     ,"\xcf\x88\0" },
+  { "&omega;\0"   ,"\xcf\x89\0" },
+  { "&thetasym;\0","\xcf\x91\0" },
+  { "&upsih;\0"   ,"\xcf\x92\0" },
+  { "&piv;\0"     ,"\xcf\x96\0" },
+  { "&ensp;\0"    ,"\xe2\x80\x82\0" },
+  { "&emsp;\0"    ,"\xe2\x80\x83\0" },
+  { "&thinsp;\0"  ,"\xe2\x80\x89\0" },
+  { "&zwnj;\0"    ,"\xe2\x80\x8c\0" },
+  { "&zwj;\0"     ,"\xe2\x80\x8d\0" },
+  { "&lrm;\0"     ,"\xe2\x80\x8e\0" },
+  { "&rlm;\0"     ,"\xe2\x80\x8f\0" },
+  { "&ndash;\0"   ,"\xe2\x80\x93\0" },
+  { "&mdash;\0"   ,"\xe2\x80\x94\0" },
+  { "&lsquo;\0"   ,"\xe2\x80\x98\0" },
+  { "&rsquo;\0"   ,"\xe2\x80\x99\0" },
+  { "&sbquo;\0"   ,"\xe2\x80\x9a\0" },
+  { "&ldquo;\0"   ,"\xe2\x80\x9c\0" },
+  { "&rdquo;\0"   ,"\xe2\x80\x9d\0" },
+  { "&bdquo;\0"   ,"\xe2\x80\x9e\0" },
+  { "&dagger;\0"  ,"\xe2\x80\xa0\0" },
+  { "&Dagger;\0"  ,"\xe2\x80\xa1\0" },
+  { "&bull;\0"    ,"\xe2\x80\xa2\0" },
+  { "&hellip;\0"  ,"\xe2\x80\xa6\0" },
+  { "&permil;\0"  ,"\xe2\x80\xb0\0" },
+  { "&prime;\0"   ,"\xe2\x80\xb2\0" },
+  { "&Prime;\0"   ,"\xe2\x80\xb3\0" },
+  { "&lsaquo;\0"  ,"\xe2\x80\xb9\0" },
+  { "&rsaquo;\0"  ,"\xe2\x80\xba\0" },
+  { "&oline;\0"   ,"\xe2\x80\xbe\0" },
+  { "&frasl;\0"   ,"\xe2\x81\x84\0" },
+  { "&euro;\0"    ,"\xe2\x82\xac\0" },
+  { "&image;\0"   ,"\xe2\x84\x91\0" },
+  { "&weierp;\0"  ,"\xe2\x84\x98\0" },
+  { "&real;\0"    ,"\xe2\x84\x9c\0" },
+  { "&trade;\0"   ,"\xe2\x84\xa2\0" },
+  { "&alefsym;\0" ,"\xe2\x84\xb5\0" },
+  { "&larr;\0"    ,"\xe2\x86\x90\0" },
+  { "&uarr;\0"    ,"\xe2\x86\x91\0" },
+  { "&rarr;\0"    ,"\xe2\x86\x92\0" },
+  { "&darr;\0"    ,"\xe2\x86\x93\0" },
+  { "&harr;\0"    ,"\xe2\x86\x94\0" },
+  { "&crarr;\0"   ,"\xe2\x86\xb5\0" },
+  { "&lArr;\0"    ,"\xe2\x87\x90\0" },
+  { "&uArr;\0"    ,"\xe2\x87\x91\0" },
+  { "&rArr;\0"    ,"\xe2\x87\x92\0" },
+  { "&dArr;\0"    ,"\xe2\x87\x93\0" },
+  { "&hArr;\0"    ,"\xe2\x87\x94\0" },
+  { "&forall;\0"  ,"\xe2\x88\x80\0" },
+  { "&part;\0"    ,"\xe2\x88\x82\0" },
+  { "&exist;\0"   ,"\xe2\x88\x83\0" },
+  { "&empty;\0"   ,"\xe2\x88\x85\0" },
+  { "&nabla;\0"   ,"\xe2\x88\x87\0" },
+  { "&isin;\0"    ,"\xe2\x88\x88\0" },
+  { "&notin;\0"   ,"\xe2\x88\x89\0" },
+  { "&ni;\0"      ,"\xe2\x88\x8b\0" },
+  { "&prod;\0"    ,"\xe2\x88\x8f\0" },
+  { "&sum;\0"     ,"\xe2\x88\x91\0" },
+  { "&minus;\0"   ,"\xe2\x88\x92\0" },
+  { "&lowast;\0"  ,"\xe2\x88\x97\0" },
+  { "&radic;\0"   ,"\xe2\x88\x9a\0" },
+  { "&prop;\0"    ,"\xe2\x88\x9d\0" },
+  { "&infin;\0"   ,"\xe2\x88\x9e\0" },
+  { "&ang;\0"     ,"\xe2\x88\xa0\0" },
+  { "&and;\0"     ,"\xe2\x88\xa7\0" },
+  { "&or;\0"      ,"\xe2\x88\xa8\0" },
+  { "&cap;\0"     ,"\xe2\x88\xa9\0" },
+  { "&cup;\0"     ,"\xe2\x88\xaa\0" },
+  { "&int;\0"     ,"\xe2\x88\xab\0" },
+  { "&there4;\0"  ,"\xe2\x88\xb4\0" },
+  { "&sim;\0"     ,"\xe2\x88\xbc\0" },
+  { "&cong;\0"    ,"\xe2\x89\x85\0" },
+  { "&asymp;\0"   ,"\xe2\x89\x88\0" },
+  { "&ne;\0"      ,"\xe2\x89\xa0\0" },
+  { "&equiv;\0"   ,"\xe2\x89\xa1\0" },
+  { "&le;\0"      ,"\xe2\x89\xa4\0" },
+  { "&ge;\0"      ,"\xe2\x89\xa5\0" },
+  { "&sub;\0"     ,"\xe2\x8a\x82\0" },
+  { "&sup;\0"     ,"\xe2\x8a\x83\0" },
+  { "&nsub;\0"    ,"\xe2\x8a\x84\0" },
+  { "&sube;\0"    ,"\xe2\x8a\x86\0" },
+  { "&supe;\0"    ,"\xe2\x8a\x87\0" },
+  { "&oplus;\0"   ,"\xe2\x8a\x95\0" },
+  { "&otimes;\0"  ,"\xe2\x8a\x97\0" },
+  { "&perp;\0"    ,"\xe2\x8a\xa5\0" },
+  { "&sdot;\0"    ,"\xe2\x8b\x85\0" },
+  { "&lceil;\0"   ,"\xe2\x8c\x88\0" },
+  { "&rceil;\0"   ,"\xe2\x8c\x89\0" },
+  { "&lfloor;\0"  ,"\xe2\x8c\x8a\0" },
+  { "&rfloor;\0"  ,"\xe2\x8c\x8b\0" },
+  { "&loz;\0"     ,"\xe2\x97\x8a\0" },
+  { "&spades;\0"  ,"\xe2\x99\xa0\0" },
+  { "&clubs;\0"   ,"\xe2\x99\xa3\0" },
+  { "&hearts;\0"  ,"\xe2\x99\xa5\0" },
+  { "&diams;\0"   ,"\xe2\x99\xa6\0" },
+  { "&lang;\0"    ,"\xe2\x9f\xa8\0" },
+  { "&rang;\0"    ,"\xe2\x9f\xa9\0" }
+};
+
+const std::size_t XHTMLENTITY_LOOKUP_COUNT = (sizeof( XHTMLEntityLookupTable))/ (sizeof(XHTMLEntityLookup));
+
+} // unnamed namespace
+
+const char* const  NamedEntityToUtf8( const char* const markupText, unsigned int len )
+{
+  // finding if given XHTML named entity is supported or not
+  for( size_t i = 0; i < XHTMLENTITY_LOOKUP_COUNT ; ++i )
+  {
+    unsigned int entityLen = strlen(XHTMLEntityLookupTable[i].entityName);
+    if( len == entityLen )
+    {
+      if( strncmp( markupText, XHTMLEntityLookupTable[i].entityName, len )  == 0 )  // if named Entity found in table
+        {
+          return XHTMLEntityLookupTable[i].entityCode;
+        }
+    }
+  }
+  return NULL;
+}
+
+} // namespace  Text
+
+} // namespace  Toolkit
+
+} // namespace  Dali
diff --git a/dali-toolkit/internal/text/xhtml-entities.h b/dali-toolkit/internal/text/xhtml-entities.h
new file mode 100755 (executable)
index 0000000..c09d5cd
--- /dev/null
@@ -0,0 +1,45 @@
+#ifndef DALI_TOOLKIT_TEXT_XHTML_ENTITIES_H
+#define DALI_TOOLKIT_TEXT_XHTML_ENTITIES_H
+
+/*
+ * Copyright (c) 2017 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+namespace Dali
+{
+
+namespace Toolkit
+{
+
+namespace Text
+{
+/**
+ * @brief Retrieves UTF8 entity code for corresponding XHTML named Entity.
+ *
+ * @param[in] markupText The XHTML named entity.
+ * @param[int] len Length of markupText.
+ *
+ * @return pointer to UTF8 entity code if namedEntity found in table otherwise NULL
+ */
+const char* const NamedEntityToUtf8( const char* const markupText, unsigned int len );
+
+} // namespace Text
+
+} // namespace Toolkit
+
+} // namespace Dali
+
+#endif // DALI_TOOLKIT_TEXT_XHTML_ENTITIES_H
diff --git a/docs/content/images/text-controls/SpecialCharacter1.png b/docs/content/images/text-controls/SpecialCharacter1.png
new file mode 100755 (executable)
index 0000000..6515a8b
Binary files /dev/null and b/docs/content/images/text-controls/SpecialCharacter1.png differ
diff --git a/docs/content/images/text-controls/SpecialCharacters.png b/docs/content/images/text-controls/SpecialCharacters.png
new file mode 100755 (executable)
index 0000000..0e9ecca
Binary files /dev/null and b/docs/content/images/text-controls/SpecialCharacters.png differ
diff --git a/docs/content/images/text-controls/XHTML_entity.png b/docs/content/images/text-controls/XHTML_entity.png
new file mode 100755 (executable)
index 0000000..b656a5f
Binary files /dev/null and b/docs/content/images/text-controls/XHTML_entity.png differ
old mode 100644 (file)
new mode 100755 (executable)
index 1893273..5697959
@@ -126,4 +126,56 @@ field.SetProperty( TextLabel::Property::TEXT, "<font family='SamsungSans' weight
 field.text = "<font family='SamsungSans' weight='bold'>Hello world</font>";
 ~~~
 
+## XHTML ENTITIES
+
+Single characters can be embedded into text using character entity references. These references have a numeric value as well as a named value.
+You can use either one of them.
+
+XHTML ENTITIES Format:
+- Named reference : "&entity_name;" (i.e. an ampersand, the entity name, and then a semi-colon).
+- Numeric reference:
+- a. Decimal reference : "&#decimal_code;" (i.e. an ampersand, a hash symbol (which signals that a number reference is coming), the character's number, and then a semi colon)
+- b. Hex reference     : "&#xhex-code;" (i.e. an ampersand, a hash symbol (which signals that a number reference is coming), x which indicates the character's number is in hex, and then a semi colon)
+
+
+~~~{.cpp}
+// C++
+field.SetProperty( TextLabel::Property::TEXT, "Named Entity: &amp;  Numeric Entity: Decimal Entity: &#9827;  Hex Entity: &#x2660;" );
+~~~
+
+![ ](XHTML_entity.png)
+
+## SPECIAL CHARACTERS HANDLING IN MARKUP
+
+Three special characters are supported :
+- < : Less Than. It means beginning of tag.
+- > : Greater Than. It means end of tag.
+- & : Ampersand. It means beginning of XHTML Entity.
+
+> "&" usage in markup style changed from Tizen 4.0.
+"To display special character needs as regular, prepend it with two backslashes in the string."
+
+Below are some examples
+
+~~~{.cpp}
+// C++ ( Wrong usage to print text "Testing of < special character" )
+field.SetProperty( TextLabel::Property::TEXT, "Testing of < special character" );
+~~~
+
+![ ](SpecialCharacter1.png)
+
+~~~{.cpp}
+// C++ ( Wrong usage to print text "Testing of & special character" )
+field.SetProperty( TextLabel::Property::TEXT, "Testing of & special character" );
+~~~
+
+![ ](SpecialCharacter1.png)
+
+~~~{.cpp}
+// C++ ( Correct usage to print text "Testing of & < > special characters" )
+field.SetProperty( TextLabel::Property::TEXT, "Testing of \\& \\< \\> special characters" );
+~~~
+
+![ ](SpecialCharacters.png)
+
 */