dali-toolkit/internal/text/markup-processor.cpp

   1 /*
   2  * Copyright (c) 2021 Samsung Electronics Co., Ltd.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  *
  16  */
  17
  18 // FILE HEADER
  19 #include <dali-toolkit/internal/text/markup-processor.h>
  20
  21 // EXTERNAL INCLUDES
  22 #include <dali/integration-api/debug.h>
  23 #include <climits> // for ULONG_MAX
  24 #include <functional>
  25
  26 // INTERNAL INCLUDES
  27 #include <dali-toolkit/internal/text/character-set-conversion.h>
  28 #include <dali-toolkit/internal/text/markup-processor-anchor.h>
  29 #include <dali-toolkit/internal/text/markup-processor-color.h>
  30 #include <dali-toolkit/internal/text/markup-processor-embedded-item.h>
  31 #include <dali-toolkit/internal/text/markup-processor-font.h>
  32 #include <dali-toolkit/internal/text/markup-processor-helper-functions.h>
  33 #include <dali-toolkit/internal/text/xhtml-entities.h>
  34
  35 namespace Dali
  36 {
  37 namespace Toolkit
  38 {
  39 namespace Text
  40 {
  41 namespace
  42 {
  43 // HTML-ISH tag and attribute constants.
  44 // Note they must be lower case in order to make the comparison to work
  45 // as the parser converts all the read tags to lower case.
  46 const std::string XHTML_COLOR_TAG("color");
  47 const std::string XHTML_FONT_TAG("font");
  48 const std::string XHTML_B_TAG("b");
  49 const std::string XHTML_I_TAG("i");
  50 const std::string XHTML_U_TAG("u");
  51 const std::string XHTML_SHADOW_TAG("shadow");
  52 const std::string XHTML_GLOW_TAG("glow");
  53 const std::string XHTML_OUTLINE_TAG("outline");
  54 const std::string XHTML_ITEM_TAG("item");
  55 const std::string XHTML_ANCHOR_TAG("a");
  56
  57 const char LESS_THAN      = '<';
  58 const char GREATER_THAN   = '>';
  59 const char EQUAL          = '=';
  60 const char QUOTATION_MARK = '\'';
  61 const char SLASH          = '/';
  62 const char BACK_SLASH     = '\\';
  63 const char AMPERSAND      = '&';
  64 const char HASH           = '#';
  65 const char SEMI_COLON     = ';';
  66 const char CHAR_ARRAY_END = '\0';
  67 const char HEX_CODE       = 'x';
  68
  69 const char WHITE_SPACE = 0x20; // ASCII value of the white space.
  70
  71 // Range 1 0x0u < XHTML_DECIMAL_ENTITY_RANGE <= 0xD7FFu
  72 // Range 2 0xE000u < XHTML_DECIMAL_ENTITY_RANGE <= 0xFFFDu
  73 // Range 3 0x10000u < XHTML_DECIMAL_ENTITY_RANGE <= 0x10FFFFu
  74 const unsigned long XHTML_DECIMAL_ENTITY_RANGE[] = {0x0u, 0xD7FFu, 0xE000u, 0xFFFDu, 0x10000u, 0x10FFFFu};
  75
  76 const unsigned int MAX_NUM_OF_ATTRIBUTES = 5u;  ///< The font tag has the 'family', 'size' 'weight', 'width' and 'slant' attrubutes.
  77 const unsigned int DEFAULT_VECTOR_SIZE   = 16u; ///< Default size of run vectors.
  78
  79 #if defined(DEBUG_ENABLED)
  80 Debug::Filter* gLogFilter = Debug::Filter::New(Debug::NoLogging, true, "LOG_MARKUP_PROCESSOR");
  81 #endif
  82
  83 /**
  84  * @brief Struct used to retrieve the style runs from the mark-up string.
  85  */
  86 struct StyleStack
  87 {
  88   typedef VectorBase::SizeType RunIndex;
  89
  90   Vector<RunIndex> stack;    ///< Use a vector as a style stack. Stores the indices pointing where the run is stored inside the logical model.
  91   unsigned int     topIndex; ///< Points the top of the stack.
  92
  93   StyleStack()
  94   : stack(),
  95     topIndex(0u)
  96   {
  97     stack.Resize(DEFAULT_VECTOR_SIZE);
  98   }
  99
 100   void Push(RunIndex index)
 101   {
 102     // Check if there is space inside the style stack.
 103     const VectorBase::SizeType size = stack.Count();
 104     if(topIndex >= size)
 105     {
 106       // Resize the style stack.
 107       stack.Resize(2u * size);
 108     }
 109
 110     // Set the run index in the top of the stack.
 111     *(stack.Begin() + topIndex) = index;
 112
 113     // Reposition the pointer to the top of the stack.
 114     ++topIndex;
 115   }
 116
 117   RunIndex Pop()
 118   {
 119     // Pop the top of the stack.
 120     --topIndex;
 121     return *(stack.Begin() + topIndex);
 122   }
 123 };
 124
 125 /**
 126  * @brief Initializes a font run description to its defaults.
 127  *
 128  * @param[in,out] fontRun The font description run to initialize.
 129  */
 130 void Initialize(FontDescriptionRun& fontRun)
 131 {
 132   fontRun.characterRun.characterIndex     = 0u;
 133   fontRun.characterRun.numberOfCharacters = 0u;
 134   fontRun.familyName                      = NULL;
 135   fontRun.familyLength                    = 0u;
 136   fontRun.weight                          = TextAbstraction::FontWeight::NORMAL;
 137   fontRun.width                           = TextAbstraction::FontWidth::NORMAL;
 138   fontRun.slant                           = TextAbstraction::FontSlant::NORMAL;
 139   fontRun.size                            = 0u;
 140   fontRun.familyDefined                   = false;
 141   fontRun.weightDefined                   = false;
 142   fontRun.widthDefined                    = false;
 143   fontRun.slantDefined                    = false;
 144   fontRun.sizeDefined                     = false;
 145 }
 146
 147 /**
 148  * @brief Initializes a color run description to its defaults.
 149  *
 150  * @param[in,out] colorRun The font description run to initialize.
 151  */
 152 void Initialize(ColorRun& colorRun)
 153 {
 154   colorRun.characterRun.characterIndex     = 0u;
 155   colorRun.characterRun.numberOfCharacters = 0u;
 156 }
 157
 158 /**
 159  * @brief Initializes a underlined character run to its defaults.
 160  *
 161  * @param[in,out] underlinedCharacterRun The underelined character run to initialize.
 162  */
 163 void Initialize(UnderlinedCharacterRun& underlinedCharacterRun)
 164 {
 165   underlinedCharacterRun.characterRun.characterIndex     = 0u;
 166   underlinedCharacterRun.characterRun.numberOfCharacters = 0u;
 167 }
 168
 169 /**
 170  * @brief Splits the tag string into the tag name and its attributes.
 171  *
 172  * The attributes are stored in a vector in the tag.
 173  *
 174  * @param[in,out] tag The tag.
 175  */
 176 void ParseAttributes(Tag& tag)
 177 {
 178   if(tag.buffer == NULL)
 179   {
 180     return;
 181   }
 182
 183   tag.attributes.Resize(MAX_NUM_OF_ATTRIBUTES);
 184
 185   // Find first the tag name.
 186   bool isQuotationOpen = false;
 187
 188   const char*       tagBuffer    = tag.buffer;
 189   const char* const tagEndBuffer = tagBuffer + tag.length;
 190   tag.length                     = 0u;
 191   for(; tagBuffer < tagEndBuffer; ++tagBuffer)
 192   {
 193     const char character = *tagBuffer;
 194     if(WHITE_SPACE < character)
 195     {
 196       ++tag.length;
 197     }
 198     else
 199     {
 200       // Stops counting the length of the tag when a white space is found.
 201       // @note a white space is the WHITE_SPACE character and anything below as 'tab', 'return' or 'control characters'.
 202       break;
 203     }
 204   }
 205   SkipWhiteSpace(tagBuffer, tagEndBuffer);
 206
 207   // Find the attributes.
 208   unsigned int attributeIndex = 0u;
 209   const char*  nameBuffer     = NULL;
 210   const char*  valueBuffer    = NULL;
 211   Length       nameLength     = 0u;
 212   Length       valueLength    = 0u;
 213
 214   bool   addToNameValue     = true;
 215   Length numberOfWhiteSpace = 0u;
 216   for(; tagBuffer < tagEndBuffer; ++tagBuffer)
 217   {
 218     const char character = *tagBuffer;
 219     if((WHITE_SPACE >= character) && !isQuotationOpen)
 220     {
 221       if(NULL != valueBuffer)
 222       {
 223         // Remove white spaces at the end of the value.
 224         valueLength -= numberOfWhiteSpace;
 225       }
 226
 227       if((NULL != nameBuffer) && (NULL != valueBuffer))
 228       {
 229         // Every time a white space is found, a new attribute is created and stored in the attributes vector.
 230         Attribute& attribute = *(tag.attributes.Begin() + attributeIndex);
 231         ++attributeIndex;
 232
 233         attribute.nameBuffer  = nameBuffer;
 234         attribute.valueBuffer = valueBuffer;
 235         attribute.nameLength  = nameLength;
 236         attribute.valueLength = valueLength;
 237
 238         nameBuffer  = NULL;
 239         valueBuffer = NULL;
 240         nameLength  = 0u;
 241         valueLength = 0u;
 242
 243         addToNameValue = true; // next read characters will be added to the name.
 244       }
 245     }
 246     else if(EQUAL == character) // '='
 247     {
 248       addToNameValue = false; // next read characters will be added to the value.
 249       SkipWhiteSpace(tagBuffer, tagEndBuffer);
 250     }
 251     else if(QUOTATION_MARK == character) // '\''
 252     {
 253       // Do not add quotation marks to neither name nor value.
 254       isQuotationOpen = !isQuotationOpen;
 255
 256       if(isQuotationOpen)
 257       {
 258         ++tagBuffer;
 259         SkipWhiteSpace(tagBuffer, tagEndBuffer);
 260         --tagBuffer;
 261       }
 262     }
 263     else
 264     {
 265       // Adds characters to the name or the value.
 266       if(addToNameValue)
 267       {
 268         if(NULL == nameBuffer)
 269         {
 270           nameBuffer = tagBuffer;
 271         }
 272         ++nameLength;
 273       }
 274       else
 275       {
 276         if(isQuotationOpen)
 277         {
 278           if(WHITE_SPACE >= character)
 279           {
 280             ++numberOfWhiteSpace;
 281           }
 282           else
 283           {
 284             numberOfWhiteSpace = 0u;
 285           }
 286         }
 287         if(NULL == valueBuffer)
 288         {
 289           valueBuffer = tagBuffer;
 290         }
 291         ++valueLength;
 292       }
 293     }
 294   }
 295
 296   if(NULL != valueBuffer)
 297   {
 298     // Remove white spaces at the end of the value.
 299     valueLength -= numberOfWhiteSpace;
 300   }
 301
 302   if((NULL != nameBuffer) && (NULL != valueBuffer))
 303   {
 304     // Checks if the last attribute needs to be added.
 305     Attribute& attribute = *(tag.attributes.Begin() + attributeIndex);
 306     ++attributeIndex;
 307
 308     attribute.nameBuffer  = nameBuffer;
 309     attribute.valueBuffer = valueBuffer;
 310     attribute.nameLength  = nameLength;
 311     attribute.valueLength = valueLength;
 312   }
 313
 314   // Resize the vector of attributes.
 315   tag.attributes.Resize(attributeIndex);
 316 }
 317
 318 /**
 319  * @brief It parses a tag and its attributes if the given iterator @e it is pointing at a tag beginning.
 320  *
 321  * @param[in,out] markupStringBuffer The mark-up string buffer. It's a const iterator pointing the current character.
 322  * @param[in] markupStringEndBuffer Pointer to one character after the end of the mark-up string buffer.
 323  * @param[out] tag The tag with its attributes.
 324  *
 325  * @return @e true if the iterator @e it is pointing a mark-up tag. Otherwise @e false.
 326  */
 327 bool IsTag(const char*&      markupStringBuffer,
 328            const char* const markupStringEndBuffer,
 329            Tag&              tag)
 330 {
 331   bool isTag              = false;
 332   bool isQuotationOpen    = false;
 333   bool attributesFound    = false;
 334   tag.isEndTag            = false;
 335   bool isPreviousLessThan = false;
 336   bool isPreviousSlash    = false;
 337
 338   const char character = *markupStringBuffer;
 339   if(LESS_THAN == character) // '<'
 340   {
 341     tag.buffer         = NULL;
 342     tag.length         = 0u;
 343     isPreviousLessThan = true;
 344
 345     // if the iterator is pointing to a '<' character, then check if it's a mark-up tag is needed.
 346     ++markupStringBuffer;
 347     if(markupStringBuffer < markupStringEndBuffer)
 348     {
 349       SkipWhiteSpace(markupStringBuffer, markupStringEndBuffer);
 350
 351       for(; (!isTag) && (markupStringBuffer < markupStringEndBuffer); ++markupStringBuffer)
 352       {
 353         const char character = *markupStringBuffer;
 354
 355         if(!isQuotationOpen && (SLASH == character)) // '/'
 356         {
 357           if(isPreviousLessThan)
 358           {
 359             tag.isEndTag = true;
 360           }
 361           else
 362           {
 363             // if the tag has a '/' it may be an end tag.
 364             isPreviousSlash = true;
 365           }
 366
 367           isPreviousLessThan = false;
 368           if((markupStringBuffer + 1u < markupStringEndBuffer) && (WHITE_SPACE >= *(markupStringBuffer + 1u)))
 369           {
 370             ++markupStringBuffer;
 371             SkipWhiteSpace(markupStringBuffer, markupStringEndBuffer);
 372             --markupStringBuffer;
 373           }
 374         }
 375         else if(GREATER_THAN == character) // '>'
 376         {
 377           isTag = true;
 378           if(isPreviousSlash)
 379           {
 380             tag.isEndTag = true;
 381           }
 382
 383           isPreviousSlash    = false;
 384           isPreviousLessThan = false;
 385         }
 386         else if(QUOTATION_MARK == character)
 387         {
 388           isQuotationOpen = !isQuotationOpen;
 389           ++tag.length;
 390
 391           isPreviousSlash    = false;
 392           isPreviousLessThan = false;
 393         }
 394         else if(WHITE_SPACE >= character) // ' '
 395         {
 396           // If the tag contains white spaces then it may have attributes.
 397           if(!isQuotationOpen)
 398           {
 399             attributesFound = true;
 400           }
 401           ++tag.length;
 402         }
 403         else
 404         {
 405           if(NULL == tag.buffer)
 406           {
 407             tag.buffer = markupStringBuffer;
 408           }
 409
 410           // If it's not any of the 'special' characters then just add it to the tag string.
 411           ++tag.length;
 412
 413           isPreviousSlash    = false;
 414           isPreviousLessThan = false;
 415         }
 416       }
 417     }
 418
 419     // If the tag string has white spaces, then parse the attributes is needed.
 420     if(attributesFound)
 421     {
 422       ParseAttributes(tag);
 423     }
 424   }
 425
 426   return isTag;
 427 }
 428
 429 /**
 430  * @brief Returns length of XHTML entity by parsing the text. It also determines if it is XHTML entity or not.
 431  *
 432  * @param[in] markupStringBuffer The mark-up string buffer. It's a const iterator pointing the current character.
 433  * @param[in] markupStringEndBuffer Pointing to end of mark-up string buffer.
 434  *
 435  * @return Length of markupText in case of XHTML entity otherwise return 0.
 436  */
 437 unsigned int GetXHTMLEntityLength(const char*&      markupStringBuffer,
 438                                   const char* const markupStringEndBuffer)
 439 {
 440   char character = *markupStringBuffer;
 441   if(AMPERSAND == character) // '&'
 442   {
 443     // if the iterator is pointing to a '&' character, then check for ';' to find end to XHTML entity.
 444     ++markupStringBuffer;
 445     if(markupStringBuffer < markupStringEndBuffer)
 446     {
 447       unsigned int len = 1u;
 448       for(; markupStringBuffer < markupStringEndBuffer; ++markupStringBuffer)
 449       {
 450         character = *markupStringBuffer;
 451         ++len;
 452         if(SEMI_COLON == character) // ';'
 453         {
 454           // found end of XHTML entity
 455           ++markupStringBuffer;
 456           return len;
 457         }
 458         else if((AMPERSAND == character) || (BACK_SLASH == character) || (LESS_THAN == character))
 459         {
 460           return 0;
 461         }
 462       }
 463     }
 464   }
 465   return 0;
 466 }
 467
 468 /**
 469  * @brief It parses a XHTML string which has hex/decimal entity and fill its corresponging utf-8 string.
 470  *
 471  * @param[in] markupText The mark-up text buffer.
 472  * @param[out] utf-8 text Corresponding to markup Text
 473  *
 474  * @return true if string is successfully parsed otherwise false
 475  */
 476 bool XHTMLNumericEntityToUtf8(const char* markupText, char* utf8)
 477 {
 478   bool result = false;
 479
 480   if(NULL != markupText)
 481   {
 482     bool isHex = false;
 483
 484     // check if hex or decimal entity
 485     if((CHAR_ARRAY_END != *markupText) && (HEX_CODE == *markupText))
 486     {
 487       isHex = true;
 488       ++markupText;
 489     }
 490
 491     char*         end = NULL;
 492     unsigned long l   = strtoul(markupText, &end, (isHex ? 16 : 10)); // l contains UTF-32 code in case of correct XHTML entity
 493
 494     // check for valid XHTML numeric entities (between '#' or "#x" and ';')
 495     if((l > 0) && (l < ULONG_MAX) && (*end == SEMI_COLON)) // in case wrong XHTML entity is set eg. "&#23abcdefs;" in that case *end will be 'a'
 496     {
 497       /* characters XML 1.1 permits */
 498       if(((XHTML_DECIMAL_ENTITY_RANGE[0] < l) && (l <= XHTML_DECIMAL_ENTITY_RANGE[1])) ||
 499          ((XHTML_DECIMAL_ENTITY_RANGE[2] <= l) && (l <= XHTML_DECIMAL_ENTITY_RANGE[3])) ||
 500          ((XHTML_DECIMAL_ENTITY_RANGE[4] <= l) && (l <= XHTML_DECIMAL_ENTITY_RANGE[5])))
 501       {
 502         // Convert UTF32 code to UTF8
 503         Utf32ToUtf8(reinterpret_cast<const uint32_t* const>(&l), 1, reinterpret_cast<uint8_t*>(utf8));
 504         result = true;
 505       }
 506     }
 507   }
 508   return result;
 509 }
 510
 511 /**
 512  * @brief Processes a particular tag for the required run (color-run, font-run or underlined-character-run).
 513  *
 514  * @tparam RunType Whether ColorRun , FontDescriptionRun or UnderlinedCharacterRun
 515  *
 516  * @param[in/out] runsContainer The container containing all the runs
 517  * @param[in/out] styleStack The style stack
 518  * @param[in] tag The tag we are currently processing
 519  * @param[in] characterIndex The current character index
 520  * @param[in/out] runIndex The run index
 521  * @param[in/out] tagReference The tagReference we should increment/decrement
 522  * @param[in] parameterSettingFunction This function will be called to set run specific parameters
 523  */
 524 template<typename RunType>
 525 void ProcessTagForRun(
 526   Vector<RunType>&                          runsContainer,
 527   StyleStack&                               styleStack,
 528   const Tag&                                tag,
 529   const CharacterIndex                      characterIndex,
 530   StyleStack::RunIndex&                     runIndex,
 531   int&                                      tagReference,
 532   std::function<void(const Tag&, RunType&)> parameterSettingFunction)
 533 {
 534   if(!tag.isEndTag)
 535   {
 536     // Create a new run.
 537     RunType run;
 538     Initialize(run);
 539
 540     // Fill the run with the parameters.
 541     run.characterRun.characterIndex = characterIndex;
 542     parameterSettingFunction(tag, run);
 543
 544     // Push the run in the logical model.
 545     runsContainer.PushBack(run);
 546
 547     // Push the index of the run into the stack.
 548     styleStack.Push(runIndex);
 549
 550     // Point the next free run.
 551     ++runIndex;
 552
 553     // Increase reference
 554     ++tagReference;
 555   }
 556   else
 557   {
 558     if(tagReference > 0)
 559     {
 560       // Pop the top of the stack and set the number of characters of the run.
 561       RunType& run                        = *(runsContainer.Begin() + styleStack.Pop());
 562       run.characterRun.numberOfCharacters = characterIndex - run.characterRun.characterIndex;
 563       --tagReference;
 564     }
 565   }
 566 }
 567
 568 /**
 569  * @brief Processes the item tag
 570  *
 571  * @param[in/out] markupProcessData The markup process data
 572  * @param[in] tag The current tag
 573  * @param[in/out] characterIndex The current character index
 574  */
 575 void ProcessItemTag(
 576   MarkupProcessData& markupProcessData,
 577   const Tag          tag,
 578   CharacterIndex&    characterIndex)
 579 {
 580   if(tag.isEndTag)
 581   {
 582     // Create an embedded item instance.
 583     EmbeddedItem item;
 584     item.characterIndex = characterIndex;
 585     ProcessEmbeddedItem(tag, item);
 586
 587     markupProcessData.items.PushBack(item);
 588
 589     // Insert white space character that will be replaced by the item.
 590     markupProcessData.markupProcessedText.append(1u, WHITE_SPACE);
 591     ++characterIndex;
 592   }
 593 }
 594
 595 /**
 596  * @brief Processes the anchor tag
 597  *
 598  * @param[in/out] markupProcessData The markup process data
 599  * @param[in] tag The current tag
 600  * @param[in/out] characterIndex The current character index
 601  */
 602 void ProcessAnchorTag(
 603   MarkupProcessData& markupProcessData,
 604   const Tag          tag,
 605   CharacterIndex&    characterIndex)
 606 {
 607   if(!tag.isEndTag)
 608   {
 609     // Create an anchor instance.
 610     Anchor anchor;
 611     anchor.startIndex = characterIndex;
 612     anchor.endIndex   = 0u;
 613     ProcessAnchor(tag, anchor);
 614     markupProcessData.anchors.PushBack(anchor);
 615   }
 616   else
 617   {
 618     // Update end index.
 619     unsigned int count = markupProcessData.anchors.Count();
 620     if(count > 0)
 621     {
 622       markupProcessData.anchors[count - 1].endIndex = characterIndex;
 623     }
 624   }
 625 }
 626
 627 /**
 628  * @brief Resizes the model's vectors
 629  *
 630  * @param[in/out] markupProcessData The markup process data
 631  * @param[in] fontRunIndex The font run index
 632  * @param[in] colorRunIndex The color run index
 633  * @param[in] underlinedCharacterRunIndex The underlined character run index
 634  */
 635 void ResizeModelVectors(MarkupProcessData& markupProcessData, const StyleStack::RunIndex fontRunIndex, const StyleStack::RunIndex colorRunIndex, const StyleStack::RunIndex underlinedCharacterRunIndex)
 636 {
 637   markupProcessData.fontRuns.Resize(fontRunIndex);
 638   markupProcessData.colorRuns.Resize(colorRunIndex);
 639   markupProcessData.underlinedCharacterRuns.Resize(underlinedCharacterRunIndex);
 640
 641 #ifdef DEBUG_ENABLED
 642   for(unsigned int i = 0; i < colorRunIndex; ++i)
 643   {
 644     ColorRun& run = markupProcessData.colorRuns[i];
 645     DALI_LOG_INFO(gLogFilter, Debug::Verbose, "run[%d] index: %d, length: %d, color %f,%f,%f,%f\n", i, run.characterRun.characterIndex, run.characterRun.numberOfCharacters, run.color.r, run.color.g, run.color.b, run.color.a);
 646   }
 647 #endif
 648 }
 649
 650 /**
 651  * @brief Processes the markup string buffer
 652  *
 653  * @param[in/out] markupProcessData The markup process data
 654  * @param[in/out] markupStringBuffer The markup string buffer pointer
 655  * @param[in] markupStringEndBuffer The markup string end buffer pointer
 656  * @param[in/out] characterIndex The current character index
 657  */
 658 void ProcessMarkupStringBuffer(
 659   MarkupProcessData& markupProcessData,
 660   const char*&       markupStringBuffer,
 661   const char* const  markupStringEndBuffer,
 662   CharacterIndex&    characterIndex)
 663 {
 664   unsigned char character    = *markupStringBuffer;
 665   const char*   markupBuffer = markupStringBuffer;
 666   unsigned char count        = GetUtf8Length(character);
 667   char          utf8[8];
 668
 669   if((BACK_SLASH == character) && (markupStringBuffer + 1u < markupStringEndBuffer))
 670   {
 671     // Adding < , >  or & special character.
 672     const unsigned char nextCharacter = *(markupStringBuffer + 1u);
 673     if((LESS_THAN == nextCharacter) || (GREATER_THAN == nextCharacter) || (AMPERSAND == nextCharacter))
 674     {
 675       character = nextCharacter;
 676       ++markupStringBuffer;
 677
 678       count        = GetUtf8Length(character);
 679       markupBuffer = markupStringBuffer;
 680     }
 681   }
 682   else // checking if contains XHTML entity or not
 683   {
 684     const unsigned int len = GetXHTMLEntityLength(markupStringBuffer, markupStringEndBuffer);
 685
 686     // Parse markupStringTxt if it contains XHTML Entity between '&' and ';'
 687     if(len > 0)
 688     {
 689       char* entityCode = NULL;
 690       bool  result     = false;
 691       count            = 0;
 692
 693       // Checking if XHTML Numeric Entity
 694       if(HASH == *(markupBuffer + 1u))
 695       {
 696         entityCode = &utf8[0];
 697         // markupBuffer is currently pointing to '&'. By adding 2u to markupBuffer it will point to numeric string by skipping "&#'
 698         result = XHTMLNumericEntityToUtf8((markupBuffer + 2u), entityCode);
 699       }
 700       else // Checking if XHTML Named Entity
 701       {
 702         entityCode = const_cast<char*>(NamedEntityToUtf8(markupBuffer, len));
 703         result     = (entityCode != NULL);
 704       }
 705       if(result)
 706       {
 707         markupBuffer = entityCode; //utf8 text assigned to markupBuffer
 708         character    = markupBuffer[0];
 709       }
 710       else
 711       {
 712         DALI_LOG_INFO(gLogFilter, Debug::Verbose, "Not valid XHTML entity : (%.*s) \n", len, markupBuffer);
 713         markupBuffer = NULL;
 714       }
 715     }
 716     else // in case string conatins Start of XHTML Entity('&') but not its end character(';')
 717     {
 718       if(character == AMPERSAND)
 719       {
 720         markupBuffer = NULL;
 721         DALI_LOG_INFO(gLogFilter, Debug::Verbose, "Not Well formed XHTML content \n");
 722       }
 723     }
 724   }
 725
 726   if(markupBuffer != NULL)
 727   {
 728     const unsigned char numberOfBytes = GetUtf8Length(character);
 729     markupProcessData.markupProcessedText.push_back(character);
 730
 731     for(unsigned char i = 1u; i < numberOfBytes; ++i)
 732     {
 733       ++markupBuffer;
 734       markupProcessData.markupProcessedText.push_back(*markupBuffer);
 735     }
 736
 737     ++characterIndex;
 738     markupStringBuffer += count;
 739   }
 740 }
 741
 742 } // namespace
 743
 744 void ProcessMarkupString(const std::string& markupString, MarkupProcessData& markupProcessData)
 745 {
 746   DALI_LOG_INFO(gLogFilter, Debug::Verbose, "markupString: %s\n", markupString.c_str());
 747
 748   // Reserve space for the plain text.
 749   const Length markupStringSize = markupString.size();
 750   markupProcessData.markupProcessedText.reserve(markupStringSize);
 751
 752   // Stores a struct with the index to the first character of the run, the type of run and its parameters.
 753   StyleStack styleStack;
 754
 755   // Points the next free position in the vector of runs.
 756   StyleStack::RunIndex colorRunIndex                = 0u;
 757   StyleStack::RunIndex fontRunIndex                 = 0u;
 758   StyleStack::RunIndex underlinedCharacterRunIndex  = 0u;
 759
 760   // check tag reference
 761   int colorTagReference = 0u;
 762   int fontTagReference  = 0u;
 763   int iTagReference     = 0u;
 764   int bTagReference     = 0u;
 765   int uTagReference     = 0u;
 766
 767   // Give an initial default value to the model's vectors.
 768   markupProcessData.colorRuns.Reserve(DEFAULT_VECTOR_SIZE);
 769   markupProcessData.fontRuns.Reserve(DEFAULT_VECTOR_SIZE);
 770   markupProcessData.underlinedCharacterRuns.Reserve(DEFAULT_VECTOR_SIZE);
 771
 772   // Get the mark-up string buffer.
 773   const char*       markupStringBuffer    = markupString.c_str();
 774   const char* const markupStringEndBuffer = markupStringBuffer + markupStringSize;
 775
 776   Tag            tag;
 777   CharacterIndex characterIndex = 0u;
 778   for(; markupStringBuffer < markupStringEndBuffer;)
 779   {
 780     tag.attributes.Clear();
 781     if(IsTag(markupStringBuffer,
 782              markupStringEndBuffer,
 783              tag))
 784     {
 785       if(TokenComparison(XHTML_COLOR_TAG, tag.buffer, tag.length))
 786       {
 787         ProcessTagForRun<ColorRun>(
 788           markupProcessData.colorRuns, styleStack, tag, characterIndex, colorRunIndex, colorTagReference, [](const Tag& tag, ColorRun& run) { ProcessColorTag(tag, run); });
 789       } // <color></color>
 790       else if(TokenComparison(XHTML_I_TAG, tag.buffer, tag.length))
 791       {
 792         ProcessTagForRun<FontDescriptionRun>(
 793           markupProcessData.fontRuns, styleStack, tag, characterIndex, fontRunIndex, iTagReference, [](const Tag&, FontDescriptionRun& fontRun) {
 794             fontRun.slant        = TextAbstraction::FontSlant::ITALIC;
 795             fontRun.slantDefined = true;
 796           });
 797       } // <i></i>
 798       else if(TokenComparison(XHTML_U_TAG, tag.buffer, tag.length))
 799       {
 800         ProcessTagForRun<UnderlinedCharacterRun>(
 801           markupProcessData.underlinedCharacterRuns, styleStack, tag, characterIndex, underlinedCharacterRunIndex, uTagReference, [](const Tag& tag, UnderlinedCharacterRun& run) {  });
 802       }  // <u></u>
 803       else if(TokenComparison(XHTML_B_TAG, tag.buffer, tag.length))
 804       {
 805         ProcessTagForRun<FontDescriptionRun>(
 806           markupProcessData.fontRuns, styleStack, tag, characterIndex, fontRunIndex, bTagReference, [](const Tag&, FontDescriptionRun& fontRun) {
 807             fontRun.weight        = TextAbstraction::FontWeight::BOLD;
 808             fontRun.weightDefined = true;
 809           });
 810       } // <b></b>
 811       else if(TokenComparison(XHTML_FONT_TAG, tag.buffer, tag.length))
 812       {
 813         ProcessTagForRun<FontDescriptionRun>(
 814           markupProcessData.fontRuns, styleStack, tag, characterIndex, fontRunIndex, fontTagReference, [](const Tag& tag, FontDescriptionRun& fontRun) { ProcessFontTag(tag, fontRun); });
 815       } // <font></font>
 816       else if(TokenComparison(XHTML_ANCHOR_TAG, tag.buffer, tag.length))
 817       {
 818         /* Anchor */
 819         ProcessAnchorTag(markupProcessData, tag, characterIndex);
 820         /* Color */
 821         ProcessTagForRun<ColorRun>(
 822           markupProcessData.colorRuns, styleStack, tag, characterIndex, colorRunIndex, colorTagReference, [](const Tag& tag, ColorRun& run) {
 823             run.color = Color::BLUE;
 824             ProcessColorTag(tag, run);
 825           });
 826         /* TODO - underline */
 827       } // <a href=https://www.tizen.org>tizen</a>
 828       else if(TokenComparison(XHTML_SHADOW_TAG, tag.buffer, tag.length))
 829       {
 830         // TODO: If !tag.isEndTag, then create a new shadow run.
 831         //       else Pop the top of the stack and set the number of characters of the run.
 832       } // <shadow></shadow>
 833       else if(TokenComparison(XHTML_GLOW_TAG, tag.buffer, tag.length))
 834       {
 835         // TODO: If !tag.isEndTag, then create a new glow run.
 836         //       else Pop the top of the stack and set the number of characters of the run.
 837       } // <glow></glow>
 838       else if(TokenComparison(XHTML_OUTLINE_TAG, tag.buffer, tag.length))
 839       {
 840         // TODO: If !tag.isEndTag, then create a new outline run.
 841         //       else Pop the top of the stack and set the number of characters of the run.
 842       } // <outline></outline>
 843       else if(TokenComparison(XHTML_ITEM_TAG, tag.buffer, tag.length))
 844       {
 845         ProcessItemTag(markupProcessData, tag, characterIndex);
 846       }
 847     } // end if( IsTag() )
 848     else if(markupStringBuffer < markupStringEndBuffer)
 849     {
 850       ProcessMarkupStringBuffer(markupProcessData, markupStringBuffer, markupStringEndBuffer, characterIndex);
 851     }
 852   }
 853
 854   // Resize the model's vectors.
 855   ResizeModelVectors(markupProcessData, fontRunIndex, colorRunIndex, underlinedCharacterRunIndex);
 856 }
 857
 858 } // namespace Text
 859
 860 } // namespace Toolkit
 861
 862 } // namespace Dali