dali-toolkit/internal/text/markup-processor.cpp

   1 /*
   2  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  *
  16  */
  17
  18 // FILE HEADER
  19 #include <dali-toolkit/internal/text/markup-processor.h>
  20
  21 // INTERNAL INCLUDES
  22 #include <dali-toolkit/internal/text/character-set-conversion.h>
  23 #include <dali-toolkit/internal/text/markup-processor-color.h>
  24 #include <dali-toolkit/internal/text/markup-processor-helper-functions.h>
  25
  26 namespace Dali
  27 {
  28
  29 namespace Toolkit
  30 {
  31
  32 namespace Text
  33 {
  34
  35 namespace
  36 {
  37 // HTML-ISH tag and attribute constants.
  38 // Note they must be lower case in order to make the comparison to work
  39 // as the parser converts all the read tags to lower case.
  40 const std::string XHTML_COLOR_TAG("color");
  41 const std::string XHTML_FONT_TAG("font");
  42 const std::string XHTML_B_TAG("b");
  43 const std::string XHTML_I_TAG("i");
  44 const std::string XHTML_U_TAG("u");
  45 const std::string XHTML_SHADOW_TAG("shadow");
  46 const std::string XHTML_GLOW_TAG("glow");
  47 const std::string XHTML_OUTLINE_TAG("outline");
  48
  49 const char LESS_THAN         = '<';
  50 const char GREATER_THAN      = '>';
  51 const char EQUAL             = '=';
  52 const char QUOTATION_MARK    = '\'';
  53 const char LINE_SEPARATOR_CR = 0x0D; // Carriage return character  CR
  54 const char LINE_SEPARATOR_LF = 0x0A; // New line character         LF
  55 const char SLASH             = '/';
  56 const char BACK_SLASH        = '\\';
  57
  58 const char WHITE_SPACE       = 0x20; // ASCII value of the white space.
  59
  60 const unsigned int MAX_NUM_OF_ATTRIBUTES =  5u; ///< The font tag has the 'family', 'size' 'weight', 'width' and 'slant' attrubutes.
  61
  62 const unsigned int DEFAULT_VECTOR_SIZE   = 16u; ///< Default size of run vectors.
  63
  64 /**
  65  * @brief Struct used to retrieve the style runs from the mark-up string.
  66  */
  67 struct StyleStack
  68 {
  69   typedef VectorBase::SizeType RunIndex;
  70
  71   Vector<RunIndex>  stack;    ///< Use a vector as a style stack. Stores the indices pointing where the run is stored inside the logical model.
  72   unsigned int topIndex; ///< Points the top of the stack.
  73
  74   StyleStack()
  75   : stack(),
  76     topIndex( 0u )
  77   {
  78     stack.Resize( DEFAULT_VECTOR_SIZE );
  79   }
  80
  81   void Push( RunIndex index )
  82   {
  83     // Check if there is space inside the style stack.
  84     const VectorBase::SizeType size = stack.Count();
  85     if( topIndex >= size )
  86     {
  87       // Resize the style stack.
  88       stack.Resize( 2u * size );
  89     }
  90
  91     // Set the run index in the top of the stack.
  92     *( stack.Begin() + topIndex ) = index;
  93
  94     // Reposition the pointer to the top of the stack.
  95     ++topIndex;
  96   }
  97
  98   RunIndex Pop()
  99   {
 100     // Pop the top of the stack.
 101     --topIndex;
 102     return *( stack.Begin() + topIndex );
 103   }
 104 };
 105
 106 /**
 107  * @brief Splits the tag string into the tag name and its attributes.
 108  *
 109  * The attributes are stored in a vector in the tag.
 110  *
 111  * @param[in,out] tag The tag.
 112  */
 113 void ParseAttributes( Tag& tag )
 114 {
 115   tag.attributes.Resize( MAX_NUM_OF_ATTRIBUTES );
 116
 117   // Find first the tag name.
 118   bool isQuotationOpen = false;
 119
 120   const char* tagBuffer = tag.buffer;
 121   const char* const tagEndBuffer = tagBuffer + tag.length;
 122   tag.length = 0u;
 123   for( ; tagBuffer < tagEndBuffer; ++tagBuffer )
 124   {
 125     const char character = *tagBuffer;
 126     if( WHITE_SPACE < character )
 127     {
 128       ++tag.length;
 129     }
 130     else
 131     {
 132       // Stops counting the length of the tag when a white space is found.
 133       // @note a white space is the WHITE_SPACE character and anything below as 'tab', 'return' or 'control characters'.
 134       break;
 135     }
 136   }
 137   SkipWhiteSpace( tagBuffer, tagEndBuffer );
 138
 139   // Find the attributes.
 140   unsigned int attributeIndex = 0u;
 141   const char* nameBuffer = NULL;
 142   const char* valueBuffer = NULL;
 143   Length nameLength = 0u;
 144   Length valueLength = 0u;
 145
 146   bool addToNameValue = true;
 147   Length numberOfWhiteSpace = 0u;
 148   for( ; tagBuffer < tagEndBuffer; ++tagBuffer )
 149   {
 150     const char character = *tagBuffer;
 151     if( ( WHITE_SPACE >= character ) && !isQuotationOpen )
 152     {
 153       if( NULL != valueBuffer )
 154       {
 155         // Remove white spaces at the end of the value.
 156         valueLength -= numberOfWhiteSpace;
 157       }
 158
 159       if( ( NULL != nameBuffer ) && ( NULL != valueBuffer ) )
 160       {
 161         // Every time a white space is found, a new attribute is created and stored in the attributes vector.
 162         Attribute& attribute = *( tag.attributes.Begin() + attributeIndex );
 163         ++attributeIndex;
 164
 165         attribute.nameBuffer = nameBuffer;
 166         attribute.valueBuffer = valueBuffer;
 167         attribute.nameLength = nameLength;
 168         attribute.valueLength = valueLength;
 169
 170         nameBuffer = NULL;
 171         valueBuffer = NULL;
 172         nameLength = 0u;
 173         valueLength = 0u;
 174
 175         addToNameValue = true; // next read characters will be added to the name.
 176       }
 177     }
 178     else if( EQUAL == character ) // '='
 179     {
 180       addToNameValue = false; // next read characters will be added to the value.
 181       SkipWhiteSpace( tagBuffer, tagEndBuffer );
 182     }
 183     else if( QUOTATION_MARK == character ) // '\''
 184     {
 185       // Do not add quotation marks to neither name nor value.
 186       isQuotationOpen = !isQuotationOpen;
 187
 188       if( isQuotationOpen )
 189       {
 190         ++tagBuffer;
 191         SkipWhiteSpace( tagBuffer, tagEndBuffer );
 192         --tagBuffer;
 193       }
 194     }
 195     else
 196     {
 197       // Adds characters to the name or the value.
 198       if( addToNameValue )
 199       {
 200         if( NULL == nameBuffer )
 201         {
 202           nameBuffer = tagBuffer;
 203         }
 204         ++nameLength;
 205       }
 206       else
 207       {
 208         if( isQuotationOpen )
 209         {
 210           if( WHITE_SPACE >= character )
 211           {
 212             ++numberOfWhiteSpace;
 213           }
 214           else
 215           {
 216             numberOfWhiteSpace = 0u;
 217           }
 218         }
 219         if( NULL == valueBuffer )
 220         {
 221           valueBuffer = tagBuffer;
 222         }
 223         ++valueLength;
 224       }
 225     }
 226   }
 227
 228   if( NULL != valueBuffer )
 229   {
 230     // Remove white spaces at the end of the value.
 231     valueLength -= numberOfWhiteSpace;
 232   }
 233
 234   if( ( NULL != nameBuffer ) && ( NULL != valueBuffer ) )
 235   {
 236     // Checks if the last attribute needs to be added.
 237     Attribute& attribute = *( tag.attributes.Begin() + attributeIndex );
 238     ++attributeIndex;
 239
 240     attribute.nameBuffer = nameBuffer;
 241     attribute.valueBuffer = valueBuffer;
 242     attribute.nameLength = nameLength;
 243     attribute.valueLength = valueLength;
 244   }
 245
 246   // Resize the vector of attributes.
 247   tag.attributes.Resize( attributeIndex );
 248 }
 249
 250 /**
 251  * @brief It parses a tag and its attributes if the given iterator @e it is pointing at a tag beginning.
 252  *
 253  * @param[in,out] markupStringBuffer The mark-up string buffer. It's a const iterator pointing the current character.
 254  * @param[in] markupStringEndBuffer Pointer to one character after the end of the mark-up string buffer.
 255  * @param[out] tag The tag with its attributes.
 256  *
 257  * @return @e true if the iterator @e it is pointing a mark-up tag. Otherwise @e false.
 258  */
 259 bool IsTag( const char*& markupStringBuffer,
 260             const char* const markupStringEndBuffer,
 261             Tag& tag )
 262 {
 263   bool isTag = false;
 264   bool isQuotationOpen = false;
 265   bool attributesFound = false;
 266   tag.isEndTag = false;
 267
 268   const char character = *markupStringBuffer;
 269   if( LESS_THAN == character ) // '<'
 270   {
 271     tag.buffer = NULL;
 272     tag.length = 0u;
 273
 274     // if the iterator is pointing to a '<' character, then check if it's a mark-up tag is needed.
 275     ++markupStringBuffer;
 276     if( markupStringBuffer < markupStringEndBuffer )
 277     {
 278       SkipWhiteSpace( markupStringBuffer, markupStringEndBuffer );
 279
 280       for( ; ( !isTag ) && ( markupStringBuffer < markupStringEndBuffer ); ++markupStringBuffer )
 281       {
 282         const char character = *markupStringBuffer;
 283
 284         if( SLASH == character ) // '/'
 285         {
 286           // if the tag has a '/' then it's an end or empty tag.
 287           tag.isEndTag = true;
 288
 289           if( ( markupStringBuffer + 1u < markupStringEndBuffer ) && ( WHITE_SPACE >= *( markupStringBuffer + 1u ) ) && ( !isQuotationOpen ) )
 290           {
 291             ++markupStringBuffer;
 292             SkipWhiteSpace( markupStringBuffer, markupStringEndBuffer );
 293             --markupStringBuffer;
 294           }
 295         }
 296         else if( GREATER_THAN == character ) // '>'
 297         {
 298           isTag = true;
 299         }
 300         else if( QUOTATION_MARK == character )
 301         {
 302           isQuotationOpen = !isQuotationOpen;
 303           ++tag.length;
 304         }
 305         else if( WHITE_SPACE >= character ) // ' '
 306         {
 307           // If the tag contains white spaces then it may have attributes.
 308           if( !isQuotationOpen )
 309           {
 310             attributesFound = true;
 311           }
 312           ++tag.length;
 313         }
 314         else
 315         {
 316           if( NULL == tag.buffer )
 317           {
 318             tag.buffer = markupStringBuffer;
 319           }
 320
 321           // If it's not any of the 'special' characters then just add it to the tag string.
 322           ++tag.length;
 323         }
 324       }
 325     }
 326
 327     // If the tag string has white spaces, then parse the attributes is needed.
 328     if( attributesFound )
 329     {
 330       ParseAttributes( tag );
 331     }
 332   }
 333
 334   return isTag;
 335 }
 336
 337 } // namespace
 338
 339 void ProcessMarkupString( const std::string& markupString, MarkupProcessData& markupProcessData )
 340 {
 341   // Reserve space for the plain text.
 342   const Length markupStringSize = markupString.size();
 343   markupProcessData.markupProcessedText.reserve( markupStringSize );
 344
 345   // Stores a struct with the index to the first character of the run, the type of run and its parameters.
 346   StyleStack styleStack;
 347
 348   // Points the next free position in the vector of runs.
 349   StyleStack::RunIndex colorRunIndex = 0u;
 350
 351   // Give an initial default value to the model's vectors.
 352   markupProcessData.colorRuns.Reserve( DEFAULT_VECTOR_SIZE );
 353
 354   // Get the mark-up string buffer.
 355   const char* markupStringBuffer = markupString.c_str();
 356   const char* const markupStringEndBuffer = markupStringBuffer + markupStringSize;
 357
 358   Tag tag;
 359   CharacterIndex characterIndex = 0u;
 360   for( ; markupStringBuffer < markupStringEndBuffer; )
 361   {
 362     if( IsTag( markupStringBuffer,
 363                markupStringEndBuffer,
 364                tag ) )
 365     {
 366       if( TokenComparison( XHTML_COLOR_TAG, tag.buffer, tag.length ) )
 367       {
 368         if( !tag.isEndTag )
 369         {
 370           // Create a new color run.
 371           ColorRun colorRun;
 372           colorRun.characterRun.numberOfCharacters = 0u;
 373
 374           // Set the start character index.
 375           colorRun.characterRun.characterIndex = characterIndex;
 376
 377           // Fill the run with the attributes.
 378           ProcessColorTag( tag, colorRun );
 379
 380           // Push the color run in the logical model.
 381           markupProcessData.colorRuns.PushBack( colorRun );
 382
 383           // Push the index of the run into the stack.
 384           styleStack.Push( colorRunIndex );
 385
 386           // Point the next color run.
 387           ++colorRunIndex;
 388         }
 389         else
 390         {
 391           // Pop the top of the stack and set the number of characters of the run.
 392           ColorRun& colorRun = *( markupProcessData.colorRuns.Begin() + styleStack.Pop() );
 393           colorRun.characterRun.numberOfCharacters = characterIndex - colorRun.characterRun.characterIndex;
 394         }
 395       } // <color></color>
 396       else if( TokenComparison( XHTML_I_TAG, tag.buffer, tag.length ) )
 397       {
 398         if( !tag.isEndTag )
 399         {
 400           // Create a new font run.
 401         }
 402         else
 403         {
 404           // Pop the top of the stack and set the number of characters of the run.
 405         }
 406       } // <i></i>
 407       else if( TokenComparison( XHTML_U_TAG, tag.buffer, tag.length ) )
 408       {
 409         if( !tag.isEndTag )
 410         {
 411           // Create a new underline run.
 412         }
 413         else
 414         {
 415           // Pop the top of the stack and set the number of characters of the run.
 416         }
 417       } // <u></u>
 418       else if( TokenComparison( XHTML_B_TAG, tag.buffer, tag.length ) )
 419       {
 420         if( !tag.isEndTag )
 421         {
 422           // Create a new font run.
 423         }
 424         else
 425         {
 426           // Pop the top of the stack and set the number of characters of the run.
 427         }
 428       } // <b></b>
 429       else if( TokenComparison( XHTML_FONT_TAG, tag.buffer, tag.length ) )
 430       {
 431         if( !tag.isEndTag )
 432         {
 433           // Create a new font run.
 434         }
 435         else
 436         {
 437           // Pop the top of the stack and set the number of characters of the run.
 438         }
 439       } // <font></font>
 440       else if( TokenComparison( XHTML_SHADOW_TAG, tag.buffer, tag.length ) )
 441       {
 442         if( !tag.isEndTag )
 443         {
 444           // Create a new shadow run.
 445         }
 446         else
 447         {
 448           // Pop the top of the stack and set the number of characters of the run.
 449         }
 450       } // <shadow></shadow>
 451       else if( TokenComparison( XHTML_GLOW_TAG, tag.buffer, tag.length ) )
 452       {
 453         if( !tag.isEndTag )
 454         {
 455           // Create a new glow run.
 456         }
 457         else
 458         {
 459           // Pop the top of the stack and set the number of characters of the run.
 460         }
 461       } // <glow></glow>
 462       else if( TokenComparison( XHTML_OUTLINE_TAG, tag.buffer, tag.length ) )
 463       {
 464         if( !tag.isEndTag )
 465         {
 466           // Create a new outline run.
 467         }
 468         else
 469         {
 470           // Pop the top of the stack and set the number of characters of the run.
 471         }
 472       } // <outline></outline>
 473     }  // end if( IsTag() )
 474     else
 475     {
 476       unsigned char character = *markupStringBuffer;
 477
 478       if( ( BACK_SLASH == character ) && ( markupStringBuffer + 1u < markupStringEndBuffer ) )
 479       {
 480         // Adding < or > special character.
 481         const unsigned char nextCharacter = *( markupStringBuffer + 1u );
 482         if( ( LESS_THAN == nextCharacter ) || ( GREATER_THAN == nextCharacter ) )
 483         {
 484           character = nextCharacter;
 485           ++markupStringBuffer;
 486         }
 487       }
 488       else if( ( LINE_SEPARATOR_CR == character ) && ( markupStringBuffer + 1u < markupStringEndBuffer ) )
 489       {
 490         // Replacing CR+LF end line by LF.
 491         if( LINE_SEPARATOR_LF == *( markupStringBuffer + 1u ) )
 492         {
 493           character = LINE_SEPARATOR_LF;
 494           ++markupStringBuffer;
 495         }
 496       }
 497
 498       const unsigned char numberOfBytes = GetUtf8Length( character );
 499
 500       markupProcessData.markupProcessedText.push_back( character );
 501       for( unsigned char i = 1u; i < numberOfBytes; ++i )
 502       {
 503         ++markupStringBuffer;
 504         markupProcessData.markupProcessedText.push_back( *markupStringBuffer );
 505       }
 506
 507       ++characterIndex;
 508       ++markupStringBuffer;
 509     }
 510   }
 511
 512   // Resize the model's vectors.
 513   if( 0u == colorRunIndex )
 514   {
 515     markupProcessData.colorRuns.Clear();
 516   }
 517   else
 518   {
 519     markupProcessData.colorRuns.Resize( colorRunIndex );
 520   }
 521 }
 522
 523 } // namespace Text
 524
 525 } // namespace Toolkit
 526
 527 } // namespace Dali