dali-toolkit/internal/text/markup-processor.cpp

   1 /*
   2  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  *
  16  */
  17
  18 // FILE HEADER
  19 #include <dali-toolkit/internal/text/markup-processor.h>
  20
  21 // INTERNAL INCLUDES
  22 #include <dali-toolkit/internal/text/character-set-conversion.h>
  23 #include <dali-toolkit/internal/text/markup-processor-helper-functions.h>
  24
  25 namespace Dali
  26 {
  27
  28 namespace Toolkit
  29 {
  30
  31 namespace Text
  32 {
  33
  34 namespace
  35 {
  36 // HTML-ISH tag and attribute constants.
  37 // Note they must be lower case in order to make the comparison to work
  38 // as the parser converts all the read tags to lower case.
  39 const std::string XHTML_COLOR_TAG("color");
  40 const std::string XHTML_FONT_TAG("font");
  41 const std::string XHTML_B_TAG("b");
  42 const std::string XHTML_I_TAG("i");
  43 const std::string XHTML_U_TAG("u");
  44 const std::string XHTML_SHADOW_TAG("shadow");
  45 const std::string XHTML_GLOW_TAG("glow");
  46 const std::string XHTML_OUTLINE_TAG("outline");
  47
  48 const char LESS_THAN         = '<';
  49 const char GREATER_THAN      = '>';
  50 const char EQUAL             = '=';
  51 const char QUOTATION_MARK    = '\'';
  52 const char LINE_SEPARATOR_CR = 0x0D; // Carriage return character  CR
  53 const char LINE_SEPARATOR_LF = 0x0A; // New line character         LF
  54 const char SLASH             = '/';
  55 const char BACK_SLASH        = '\\';
  56
  57 const char WHITE_SPACE       = 0x20; // ASCII value of the white space.
  58
  59 const unsigned int MAX_NUM_OF_ATTRIBUTES =  5u; ///< The font tag has the 'family', 'size' 'weight', 'width' and 'slant' attrubutes.
  60
  61 /**
  62  * @brief Splits the tag string into the tag name and its attributes.
  63  *
  64  * The attributes are stored in a vector in the tag.
  65  *
  66  * @param[in,out] tag The tag.
  67  */
  68 void ParseAttributes( Tag& tag )
  69 {
  70   tag.attributes.Resize( MAX_NUM_OF_ATTRIBUTES );
  71
  72   // Find first the tag name.
  73   bool isQuotationOpen = false;
  74
  75   const char* tagBuffer = tag.buffer;
  76   const char* const tagEndBuffer = tagBuffer + tag.length;
  77   tag.length = 0u;
  78   for( ; tagBuffer < tagEndBuffer; ++tagBuffer )
  79   {
  80     const char character = *tagBuffer;
  81     if( WHITE_SPACE < character )
  82     {
  83       ++tag.length;
  84     }
  85     else
  86     {
  87       // Stops counting the length of the tag when a white space is found.
  88       // @note a white space is the WHITE_SPACE character and anything below as 'tab', 'return' or 'control characters'.
  89       break;
  90     }
  91   }
  92   SkipWhiteSpace( tagBuffer, tagEndBuffer );
  93
  94   // Find the attributes.
  95   unsigned int attributeIndex = 0u;
  96   const char* nameBuffer = NULL;
  97   const char* valueBuffer = NULL;
  98   Length nameLength = 0u;
  99   Length valueLength = 0u;
 100
 101   bool addToNameValue = true;
 102   Length numberOfWhiteSpace = 0u;
 103   for( ; tagBuffer < tagEndBuffer; ++tagBuffer )
 104   {
 105     const char character = *tagBuffer;
 106     if( ( WHITE_SPACE >= character ) && !isQuotationOpen )
 107     {
 108       if( NULL != valueBuffer )
 109       {
 110         // Remove white spaces at the end of the value.
 111         valueLength -= numberOfWhiteSpace;
 112       }
 113
 114       if( ( NULL != nameBuffer ) && ( NULL != valueBuffer ) )
 115       {
 116         // Every time a white space is found, a new attribute is created and stored in the attributes vector.
 117         Attribute& attribute = *( tag.attributes.Begin() + attributeIndex );
 118         ++attributeIndex;
 119
 120         attribute.nameBuffer = nameBuffer;
 121         attribute.valueBuffer = valueBuffer;
 122         attribute.nameLength = nameLength;
 123         attribute.valueLength = valueLength;
 124
 125         nameBuffer = NULL;
 126         valueBuffer = NULL;
 127         nameLength = 0u;
 128         valueLength = 0u;
 129
 130         addToNameValue = true; // next read characters will be added to the name.
 131       }
 132     }
 133     else if( EQUAL == character ) // '='
 134     {
 135       addToNameValue = false; // next read characters will be added to the value.
 136       SkipWhiteSpace( tagBuffer, tagEndBuffer );
 137     }
 138     else if( QUOTATION_MARK == character ) // '\''
 139     {
 140       // Do not add quotation marks to neither name nor value.
 141       isQuotationOpen = !isQuotationOpen;
 142
 143       if( isQuotationOpen )
 144       {
 145         ++tagBuffer;
 146         SkipWhiteSpace( tagBuffer, tagEndBuffer );
 147         --tagBuffer;
 148       }
 149     }
 150     else
 151     {
 152       // Adds characters to the name or the value.
 153       if( addToNameValue )
 154       {
 155         if( NULL == nameBuffer )
 156         {
 157           nameBuffer = tagBuffer;
 158         }
 159         ++nameLength;
 160       }
 161       else
 162       {
 163         if( isQuotationOpen )
 164         {
 165           if( WHITE_SPACE >= character )
 166           {
 167             ++numberOfWhiteSpace;
 168           }
 169           else
 170           {
 171             numberOfWhiteSpace = 0u;
 172           }
 173         }
 174         if( NULL == valueBuffer )
 175         {
 176           valueBuffer = tagBuffer;
 177         }
 178         ++valueLength;
 179       }
 180     }
 181   }
 182
 183   if( NULL != valueBuffer )
 184   {
 185     // Remove white spaces at the end of the value.
 186     valueLength -= numberOfWhiteSpace;
 187   }
 188
 189   if( ( NULL != nameBuffer ) && ( NULL != valueBuffer ) )
 190   {
 191     // Checks if the last attribute needs to be added.
 192     Attribute& attribute = *( tag.attributes.Begin() + attributeIndex );
 193     ++attributeIndex;
 194
 195     attribute.nameBuffer = nameBuffer;
 196     attribute.valueBuffer = valueBuffer;
 197     attribute.nameLength = nameLength;
 198     attribute.valueLength = valueLength;
 199   }
 200
 201   // Resize the vector of attributes.
 202   tag.attributes.Resize( attributeIndex );
 203 }
 204
 205 /**
 206  * @brief It parses a tag and its attributes if the given iterator @e it is pointing at a tag beginning.
 207  *
 208  * @param[in,out] markupStringBuffer The mark-up string buffer. It's a const iterator pointing the current character.
 209  * @param[in] markupStringEndBuffer Pointer to one character after the end of the mark-up string buffer.
 210  * @param[out] tag The tag with its attributes.
 211  *
 212  * @return @e true if the iterator @e it is pointing a mark-up tag. Otherwise @e false.
 213  */
 214 bool IsTag( const char*& markupStringBuffer,
 215             const char* const markupStringEndBuffer,
 216             Tag& tag )
 217 {
 218   bool isTag = false;
 219   bool isQuotationOpen = false;
 220   bool attributesFound = false;
 221   tag.isEndTag = false;
 222
 223   const char character = *markupStringBuffer;
 224   if( LESS_THAN == character ) // '<'
 225   {
 226     tag.buffer = NULL;
 227     tag.length = 0u;
 228
 229     // if the iterator is pointing to a '<' character, then check if it's a mark-up tag is needed.
 230     ++markupStringBuffer;
 231     if( markupStringBuffer < markupStringEndBuffer )
 232     {
 233       SkipWhiteSpace( markupStringBuffer, markupStringEndBuffer );
 234
 235       for( ; ( !isTag ) && ( markupStringBuffer < markupStringEndBuffer ); ++markupStringBuffer )
 236       {
 237         const char character = *markupStringBuffer;
 238
 239         if( SLASH == character ) // '/'
 240         {
 241           // if the tag has a '/' then it's an end or empty tag.
 242           tag.isEndTag = true;
 243
 244           if( ( markupStringBuffer + 1u < markupStringEndBuffer ) && ( WHITE_SPACE >= *( markupStringBuffer + 1u ) ) && ( !isQuotationOpen ) )
 245           {
 246             ++markupStringBuffer;
 247             SkipWhiteSpace( markupStringBuffer, markupStringEndBuffer );
 248             --markupStringBuffer;
 249           }
 250         }
 251         else if( GREATER_THAN == character ) // '>'
 252         {
 253           isTag = true;
 254         }
 255         else if( QUOTATION_MARK == character )
 256         {
 257           isQuotationOpen = !isQuotationOpen;
 258           ++tag.length;
 259         }
 260         else if( WHITE_SPACE >= character ) // ' '
 261         {
 262           // If the tag contains white spaces then it may have attributes.
 263           if( !isQuotationOpen )
 264           {
 265             attributesFound = true;
 266           }
 267           ++tag.length;
 268         }
 269         else
 270         {
 271           if( NULL == tag.buffer )
 272           {
 273             tag.buffer = markupStringBuffer;
 274           }
 275
 276           // If it's not any of the 'special' characters then just add it to the tag string.
 277           ++tag.length;
 278         }
 279       }
 280     }
 281
 282     // If the tag string has white spaces, then parse the attributes is needed.
 283     if( attributesFound )
 284     {
 285       ParseAttributes( tag );
 286     }
 287   }
 288
 289   return isTag;
 290 }
 291
 292 } // namespace
 293
 294 void ProcessMarkupString( const std::string& markupString, MarkupProcessData& markupProcessData )
 295 {
 296   // Reserve space for the plain text.
 297   const Length markupStringSize = markupString.size();
 298   markupProcessData.markupProcessedText.reserve( markupStringSize );
 299
 300   // Get the mark-up string buffer.
 301   const char* markupStringBuffer = markupString.c_str();
 302   const char* const markupStringEndBuffer = markupStringBuffer + markupStringSize;
 303
 304   Tag tag;
 305   CharacterIndex characterIndex = 0u;
 306   for( ; markupStringBuffer < markupStringEndBuffer; )
 307   {
 308     if( IsTag( markupStringBuffer,
 309                markupStringEndBuffer,
 310                tag ) )
 311     {
 312       if( TokenComparison( XHTML_COLOR_TAG, tag.buffer, tag.length ) )
 313       {
 314         if( !tag.isEndTag )
 315         {
 316           // Create a new color run.
 317         }
 318         else
 319         {
 320           // Pop the top of the stack and set the number of characters of the run.
 321         }
 322       } // <color></color>
 323       else if( TokenComparison( XHTML_I_TAG, tag.buffer, tag.length ) )
 324       {
 325         if( !tag.isEndTag )
 326         {
 327           // Create a new font run.
 328         }
 329         else
 330         {
 331           // Pop the top of the stack and set the number of characters of the run.
 332         }
 333       } // <i></i>
 334       else if( TokenComparison( XHTML_U_TAG, tag.buffer, tag.length ) )
 335       {
 336         if( !tag.isEndTag )
 337         {
 338           // Create a new underline run.
 339         }
 340         else
 341         {
 342           // Pop the top of the stack and set the number of characters of the run.
 343         }
 344       } // <u></u>
 345       else if( TokenComparison( XHTML_B_TAG, tag.buffer, tag.length ) )
 346       {
 347         if( !tag.isEndTag )
 348         {
 349           // Create a new font run.
 350         }
 351         else
 352         {
 353           // Pop the top of the stack and set the number of characters of the run.
 354         }
 355       } // <b></b>
 356       else if( TokenComparison( XHTML_FONT_TAG, tag.buffer, tag.length ) )
 357       {
 358         if( !tag.isEndTag )
 359         {
 360           // Create a new font run.
 361         }
 362         else
 363         {
 364           // Pop the top of the stack and set the number of characters of the run.
 365         }
 366       } // <font></font>
 367       else if( TokenComparison( XHTML_SHADOW_TAG, tag.buffer, tag.length ) )
 368       {
 369         if( !tag.isEndTag )
 370         {
 371           // Create a new shadow run.
 372         }
 373         else
 374         {
 375           // Pop the top of the stack and set the number of characters of the run.
 376         }
 377       } // <shadow></shadow>
 378       else if( TokenComparison( XHTML_GLOW_TAG, tag.buffer, tag.length ) )
 379       {
 380         if( !tag.isEndTag )
 381         {
 382           // Create a new glow run.
 383         }
 384         else
 385         {
 386           // Pop the top of the stack and set the number of characters of the run.
 387         }
 388       } // <glow></glow>
 389       else if( TokenComparison( XHTML_OUTLINE_TAG, tag.buffer, tag.length ) )
 390       {
 391         if( !tag.isEndTag )
 392         {
 393           // Create a new outline run.
 394         }
 395         else
 396         {
 397           // Pop the top of the stack and set the number of characters of the run.
 398         }
 399       } // <outline></outline>
 400     }  // end if( IsTag() )
 401     else
 402     {
 403       unsigned char character = *markupStringBuffer;
 404
 405       if( ( BACK_SLASH == character ) && ( markupStringBuffer + 1u < markupStringEndBuffer ) )
 406       {
 407         // Adding < or > special character.
 408         const unsigned char nextCharacter = *( markupStringBuffer + 1u );
 409         if( ( LESS_THAN == nextCharacter ) || ( GREATER_THAN == nextCharacter ) )
 410         {
 411           character = nextCharacter;
 412           ++markupStringBuffer;
 413         }
 414       }
 415       else if( ( LINE_SEPARATOR_CR == character ) && ( markupStringBuffer + 1u < markupStringEndBuffer ) )
 416       {
 417         // Replacing CR+LF end line by LF.
 418         if( LINE_SEPARATOR_LF == *( markupStringBuffer + 1u ) )
 419         {
 420           character = LINE_SEPARATOR_LF;
 421           ++markupStringBuffer;
 422         }
 423       }
 424
 425       const unsigned char numberOfBytes = GetUtf8Length( character );
 426
 427       markupProcessData.markupProcessedText.push_back( character );
 428       for( unsigned char i = 1u; i < numberOfBytes; ++i )
 429       {
 430         ++markupStringBuffer;
 431         markupProcessData.markupProcessedText.push_back( *markupStringBuffer );
 432       }
 433
 434       ++characterIndex;
 435       ++markupStringBuffer;
 436     }
 437   }
 438
 439   // Resize the model's vectors.
 440 }
 441
 442 } // namespace Text
 443
 444 } // namespace Toolkit
 445
 446 } // namespace Dali