2 * Copyright (c) 2015 Samsung Electronics Co., Ltd.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <dali-toolkit/internal/text/markup-processor.h>
22 #include <climits> // for ULONG_MAX
23 #include <dali/integration-api/debug.h>
26 #include <dali-toolkit/internal/text/character-set-conversion.h>
27 #include <dali-toolkit/internal/text/markup-processor-color.h>
28 #include <dali-toolkit/internal/text/markup-processor-embedded-item.h>
29 #include <dali-toolkit/internal/text/markup-processor-font.h>
30 #include <dali-toolkit/internal/text/markup-processor-helper-functions.h>
31 #include <dali-toolkit/internal/text/xhtml-entities.h>
44 // HTML-ISH tag and attribute constants.
45 // Note they must be lower case in order to make the comparison to work
46 // as the parser converts all the read tags to lower case.
47 const std::string XHTML_COLOR_TAG("color");
48 const std::string XHTML_FONT_TAG("font");
49 const std::string XHTML_B_TAG("b");
50 const std::string XHTML_I_TAG("i");
51 const std::string XHTML_U_TAG("u");
52 const std::string XHTML_SHADOW_TAG("shadow");
53 const std::string XHTML_GLOW_TAG("glow");
54 const std::string XHTML_OUTLINE_TAG("outline");
55 const std::string XHTML_ITEM_TAG("item");
57 const char LESS_THAN = '<';
58 const char GREATER_THAN = '>';
59 const char EQUAL = '=';
60 const char QUOTATION_MARK = '\'';
61 const char SLASH = '/';
62 const char BACK_SLASH = '\\';
63 const char AMPERSAND = '&';
64 const char HASH = '#';
65 const char SEMI_COLON = ';';
66 const char CHAR_ARRAY_END = '\0';
67 const char HEX_CODE = 'x';
69 const char WHITE_SPACE = 0x20; // ASCII value of the white space.
71 // Range 1 0x0u < XHTML_DECIMAL_ENTITY_RANGE <= 0xD7FFu
72 // Range 2 0xE000u < XHTML_DECIMAL_ENTITY_RANGE <= 0xFFFDu
73 // Range 3 0x10000u < XHTML_DECIMAL_ENTITY_RANGE <= 0x10FFFFu
74 const unsigned long XHTML_DECIMAL_ENTITY_RANGE[] = { 0x0u, 0xD7FFu, 0xE000u, 0xFFFDu, 0x10000u, 0x10FFFFu };
76 const unsigned int MAX_NUM_OF_ATTRIBUTES = 5u; ///< The font tag has the 'family', 'size' 'weight', 'width' and 'slant' attrubutes.
77 const unsigned int DEFAULT_VECTOR_SIZE = 16u; ///< Default size of run vectors.
79 #if defined(DEBUG_ENABLED)
80 Debug::Filter* gLogFilter = Debug::Filter::New(Debug::NoLogging, true, "LOG_MARKUP_PROCESSOR");
84 * @brief Struct used to retrieve the style runs from the mark-up string.
88 typedef VectorBase::SizeType RunIndex;
90 Vector<RunIndex> stack; ///< Use a vector as a style stack. Stores the indices pointing where the run is stored inside the logical model.
91 unsigned int topIndex; ///< Points the top of the stack.
97 stack.Resize( DEFAULT_VECTOR_SIZE );
100 void Push( RunIndex index )
102 // Check if there is space inside the style stack.
103 const VectorBase::SizeType size = stack.Count();
104 if( topIndex >= size )
106 // Resize the style stack.
107 stack.Resize( 2u * size );
110 // Set the run index in the top of the stack.
111 *( stack.Begin() + topIndex ) = index;
113 // Reposition the pointer to the top of the stack.
119 // Pop the top of the stack.
121 return *( stack.Begin() + topIndex );
126 * @brief Initializes a font run description to its defaults.
128 * @param[in,out] fontRun The font description run to initialize.
130 void Initialize( FontDescriptionRun& fontRun )
132 fontRun.characterRun.characterIndex = 0u;
133 fontRun.characterRun.numberOfCharacters = 0u;
134 fontRun.familyName = NULL;
135 fontRun.familyLength = 0u;
136 fontRun.weight = TextAbstraction::FontWeight::NORMAL;
137 fontRun.width = TextAbstraction::FontWidth::NORMAL;
138 fontRun.slant = TextAbstraction::FontSlant::NORMAL;
140 fontRun.familyDefined = false;
141 fontRun.weightDefined = false;
142 fontRun.widthDefined = false;
143 fontRun.slantDefined = false;
144 fontRun.sizeDefined = false;
148 * @brief Splits the tag string into the tag name and its attributes.
150 * The attributes are stored in a vector in the tag.
152 * @param[in,out] tag The tag.
154 void ParseAttributes( Tag& tag )
156 if( tag.buffer == NULL )
161 tag.attributes.Resize( MAX_NUM_OF_ATTRIBUTES );
163 // Find first the tag name.
164 bool isQuotationOpen = false;
166 const char* tagBuffer = tag.buffer;
167 const char* const tagEndBuffer = tagBuffer + tag.length;
169 for( ; tagBuffer < tagEndBuffer; ++tagBuffer )
171 const char character = *tagBuffer;
172 if( WHITE_SPACE < character )
178 // Stops counting the length of the tag when a white space is found.
179 // @note a white space is the WHITE_SPACE character and anything below as 'tab', 'return' or 'control characters'.
183 SkipWhiteSpace( tagBuffer, tagEndBuffer );
185 // Find the attributes.
186 unsigned int attributeIndex = 0u;
187 const char* nameBuffer = NULL;
188 const char* valueBuffer = NULL;
189 Length nameLength = 0u;
190 Length valueLength = 0u;
192 bool addToNameValue = true;
193 Length numberOfWhiteSpace = 0u;
194 for( ; tagBuffer < tagEndBuffer; ++tagBuffer )
196 const char character = *tagBuffer;
197 if( ( WHITE_SPACE >= character ) && !isQuotationOpen )
199 if( NULL != valueBuffer )
201 // Remove white spaces at the end of the value.
202 valueLength -= numberOfWhiteSpace;
205 if( ( NULL != nameBuffer ) && ( NULL != valueBuffer ) )
207 // Every time a white space is found, a new attribute is created and stored in the attributes vector.
208 Attribute& attribute = *( tag.attributes.Begin() + attributeIndex );
211 attribute.nameBuffer = nameBuffer;
212 attribute.valueBuffer = valueBuffer;
213 attribute.nameLength = nameLength;
214 attribute.valueLength = valueLength;
221 addToNameValue = true; // next read characters will be added to the name.
224 else if( EQUAL == character ) // '='
226 addToNameValue = false; // next read characters will be added to the value.
227 SkipWhiteSpace( tagBuffer, tagEndBuffer );
229 else if( QUOTATION_MARK == character ) // '\''
231 // Do not add quotation marks to neither name nor value.
232 isQuotationOpen = !isQuotationOpen;
234 if( isQuotationOpen )
237 SkipWhiteSpace( tagBuffer, tagEndBuffer );
243 // Adds characters to the name or the value.
246 if( NULL == nameBuffer )
248 nameBuffer = tagBuffer;
254 if( isQuotationOpen )
256 if( WHITE_SPACE >= character )
258 ++numberOfWhiteSpace;
262 numberOfWhiteSpace = 0u;
265 if( NULL == valueBuffer )
267 valueBuffer = tagBuffer;
274 if( NULL != valueBuffer )
276 // Remove white spaces at the end of the value.
277 valueLength -= numberOfWhiteSpace;
280 if( ( NULL != nameBuffer ) && ( NULL != valueBuffer ) )
282 // Checks if the last attribute needs to be added.
283 Attribute& attribute = *( tag.attributes.Begin() + attributeIndex );
286 attribute.nameBuffer = nameBuffer;
287 attribute.valueBuffer = valueBuffer;
288 attribute.nameLength = nameLength;
289 attribute.valueLength = valueLength;
292 // Resize the vector of attributes.
293 tag.attributes.Resize( attributeIndex );
297 * @brief It parses a tag and its attributes if the given iterator @e it is pointing at a tag beginning.
299 * @param[in,out] markupStringBuffer The mark-up string buffer. It's a const iterator pointing the current character.
300 * @param[in] markupStringEndBuffer Pointer to one character after the end of the mark-up string buffer.
301 * @param[out] tag The tag with its attributes.
303 * @return @e true if the iterator @e it is pointing a mark-up tag. Otherwise @e false.
305 bool IsTag( const char*& markupStringBuffer,
306 const char* const markupStringEndBuffer,
310 bool isQuotationOpen = false;
311 bool attributesFound = false;
312 tag.isEndTag = false;
313 bool isPreviousLessThan = false;
314 bool isPreviousSlash = false;
316 const char character = *markupStringBuffer;
317 if( LESS_THAN == character ) // '<'
321 isPreviousLessThan = true;
323 // if the iterator is pointing to a '<' character, then check if it's a mark-up tag is needed.
324 ++markupStringBuffer;
325 if( markupStringBuffer < markupStringEndBuffer )
327 SkipWhiteSpace( markupStringBuffer, markupStringEndBuffer );
329 for( ; ( !isTag ) && ( markupStringBuffer < markupStringEndBuffer ); ++markupStringBuffer )
331 const char character = *markupStringBuffer;
333 if( !isQuotationOpen && ( SLASH == character ) ) // '/'
335 if (isPreviousLessThan)
341 // if the tag has a '/' it may be an end tag.
342 isPreviousSlash = true;
345 isPreviousLessThan = false;
346 if( ( markupStringBuffer + 1u < markupStringEndBuffer ) && ( WHITE_SPACE >= *( markupStringBuffer + 1u ) ) )
348 ++markupStringBuffer;
349 SkipWhiteSpace( markupStringBuffer, markupStringEndBuffer );
350 --markupStringBuffer;
353 else if( GREATER_THAN == character ) // '>'
361 isPreviousSlash = false;
362 isPreviousLessThan = false;
364 else if( QUOTATION_MARK == character )
366 isQuotationOpen = !isQuotationOpen;
369 isPreviousSlash = false;
370 isPreviousLessThan = false;
372 else if( WHITE_SPACE >= character ) // ' '
374 // If the tag contains white spaces then it may have attributes.
375 if( !isQuotationOpen )
377 attributesFound = true;
383 if( NULL == tag.buffer )
385 tag.buffer = markupStringBuffer;
388 // If it's not any of the 'special' characters then just add it to the tag string.
391 isPreviousSlash = false;
392 isPreviousLessThan = false;
397 // If the tag string has white spaces, then parse the attributes is needed.
398 if( attributesFound )
400 ParseAttributes( tag );
408 * @brief Returns length of XHTML entity by parsing the text. It also determines if it is XHTML entity or not.
410 * @param[in] markupStringBuffer The mark-up string buffer. It's a const iterator pointing the current character.
411 * @param[in] markupStringEndBuffer Pointing to end of mark-up string buffer.
413 * @return Length of markupText in case of XHTML entity otherwise return 0.
415 unsigned int GetXHTMLEntityLength( const char*& markupStringBuffer,
416 const char* const markupStringEndBuffer )
418 char character = *markupStringBuffer;
419 if( AMPERSAND == character ) // '&'
421 // if the iterator is pointing to a '&' character, then check for ';' to find end to XHTML entity.
422 ++markupStringBuffer;
423 if( markupStringBuffer < markupStringEndBuffer )
425 unsigned int len = 1u;
426 for( ; markupStringBuffer < markupStringEndBuffer ; ++markupStringBuffer )
428 character = *markupStringBuffer;
430 if( SEMI_COLON == character ) // ';'
432 // found end of XHTML entity
433 ++markupStringBuffer;
436 else if( ( AMPERSAND == character ) || ( BACK_SLASH == character ) || ( LESS_THAN == character ))
447 * @brief It parses a XHTML string which has hex/decimal entity and fill its corresponging utf-8 string.
449 * @param[in] markupText The mark-up text buffer.
450 * @param[out] utf-8 text Corresponding to markup Text
452 * @return true if string is successfully parsed otherwise false
454 bool XHTMLNumericEntityToUtf8 ( const char* markupText, char* utf8 )
458 if( NULL != markupText )
462 // check if hex or decimal entity
463 if( ( CHAR_ARRAY_END != *markupText ) && ( HEX_CODE == *markupText ) )
470 unsigned long l = strtoul( markupText, &end, ( isHex ? 16 : 10 ) ); // l contains UTF-32 code in case of correct XHTML entity
472 // check for valid XHTML numeric entities (between '#' or "#x" and ';')
473 if( ( l > 0 ) && ( l < ULONG_MAX ) && ( *end == SEMI_COLON ) ) // in case wrong XHTML entity is set eg. "abcdefs;" in that case *end will be 'a'
475 /* characters XML 1.1 permits */
476 if( ( ( XHTML_DECIMAL_ENTITY_RANGE[0] < l ) && ( l <= XHTML_DECIMAL_ENTITY_RANGE[1] ) ) ||
477 ( ( XHTML_DECIMAL_ENTITY_RANGE[2] <= l ) && ( l <= XHTML_DECIMAL_ENTITY_RANGE[3] ) ) ||
478 ( ( XHTML_DECIMAL_ENTITY_RANGE[4] <= l ) && ( l <= XHTML_DECIMAL_ENTITY_RANGE[5] ) ) )
480 // Convert UTF32 code to UTF8
481 Utf32ToUtf8( reinterpret_cast<const uint32_t* const>( &l ), 1, reinterpret_cast<uint8_t*>( utf8 ) );
491 void ProcessMarkupString( const std::string& markupString, MarkupProcessData& markupProcessData )
493 DALI_LOG_INFO( gLogFilter, Debug::Verbose, "markupString: %s\n", markupString.c_str() );
495 // Reserve space for the plain text.
496 const Length markupStringSize = markupString.size();
497 markupProcessData.markupProcessedText.reserve( markupStringSize );
499 // Stores a struct with the index to the first character of the run, the type of run and its parameters.
500 StyleStack styleStack;
502 // Points the next free position in the vector of runs.
503 StyleStack::RunIndex colorRunIndex = 0u;
504 StyleStack::RunIndex fontRunIndex = 0u;
506 // Give an initial default value to the model's vectors.
507 markupProcessData.colorRuns.Reserve( DEFAULT_VECTOR_SIZE );
508 markupProcessData.fontRuns.Reserve( DEFAULT_VECTOR_SIZE );
510 // Get the mark-up string buffer.
511 const char* markupStringBuffer = markupString.c_str();
512 const char* const markupStringEndBuffer = markupStringBuffer + markupStringSize;
515 CharacterIndex characterIndex = 0u;
516 for( ; markupStringBuffer < markupStringEndBuffer; )
518 tag.attributes.Clear();
519 if( IsTag( markupStringBuffer,
520 markupStringEndBuffer,
523 if( TokenComparison( XHTML_COLOR_TAG, tag.buffer, tag.length ) )
527 // Create a new color run.
529 colorRun.characterRun.numberOfCharacters = 0u;
531 // Set the start character index.
532 colorRun.characterRun.characterIndex = characterIndex;
534 // Fill the run with the attributes.
535 ProcessColorTag( tag, colorRun );
537 // Push the color run in the logical model.
538 markupProcessData.colorRuns.PushBack( colorRun );
540 // Push the index of the run into the stack.
541 styleStack.Push( colorRunIndex );
543 // Point the next color run.
548 // Pop the top of the stack and set the number of characters of the run.
549 ColorRun& colorRun = *( markupProcessData.colorRuns.Begin() + styleStack.Pop() );
550 colorRun.characterRun.numberOfCharacters = characterIndex - colorRun.characterRun.characterIndex;
553 else if( TokenComparison( XHTML_I_TAG, tag.buffer, tag.length ) )
557 // Create a new font run.
558 FontDescriptionRun fontRun;
559 Initialize( fontRun );
561 // Fill the run with the parameters.
562 fontRun.characterRun.characterIndex = characterIndex;
563 fontRun.slant = TextAbstraction::FontSlant::ITALIC;
564 fontRun.slantDefined = true;
566 // Push the font run in the logical model.
567 markupProcessData.fontRuns.PushBack( fontRun );
569 // Push the index of the run into the stack.
570 styleStack.Push( fontRunIndex );
572 // Point the next free font run.
577 // Pop the top of the stack and set the number of characters of the run.
578 FontDescriptionRun& fontRun = *( markupProcessData.fontRuns.Begin() + styleStack.Pop() );
579 fontRun.characterRun.numberOfCharacters = characterIndex - fontRun.characterRun.characterIndex;
582 else if( TokenComparison( XHTML_U_TAG, tag.buffer, tag.length ) )
586 // Create a new underline run.
590 // Pop the top of the stack and set the number of characters of the run.
593 else if( TokenComparison( XHTML_B_TAG, tag.buffer, tag.length ) )
597 // Create a new font run.
598 FontDescriptionRun fontRun;
599 Initialize( fontRun );
601 // Fill the run with the parameters.
602 fontRun.characterRun.characterIndex = characterIndex;
603 fontRun.weight = TextAbstraction::FontWeight::BOLD;
604 fontRun.weightDefined = true;
606 // Push the font run in the logical model.
607 markupProcessData.fontRuns.PushBack( fontRun );
609 // Push the index of the run into the stack.
610 styleStack.Push( fontRunIndex );
612 // Point the next free font run.
617 // Pop the top of the stack and set the number of characters of the run.
618 FontDescriptionRun& fontRun = *( markupProcessData.fontRuns.Begin() + styleStack.Pop() );
619 fontRun.characterRun.numberOfCharacters = characterIndex - fontRun.characterRun.characterIndex;
622 else if( TokenComparison( XHTML_FONT_TAG, tag.buffer, tag.length ) )
626 // Create a new font run.
627 FontDescriptionRun fontRun;
628 Initialize( fontRun );
630 // Fill the run with the parameters.
631 fontRun.characterRun.characterIndex = characterIndex;
633 ProcessFontTag( tag, fontRun );
635 // Push the font run in the logical model.
636 markupProcessData.fontRuns.PushBack( fontRun );
638 // Push the index of the run into the stack.
639 styleStack.Push( fontRunIndex );
641 // Point the next free font run.
646 // Pop the top of the stack and set the number of characters of the run.
647 FontDescriptionRun& fontRun = *( markupProcessData.fontRuns.Begin() + styleStack.Pop() );
648 fontRun.characterRun.numberOfCharacters = characterIndex - fontRun.characterRun.characterIndex;
651 else if( TokenComparison( XHTML_SHADOW_TAG, tag.buffer, tag.length ) )
655 // Create a new shadow run.
659 // Pop the top of the stack and set the number of characters of the run.
661 } // <shadow></shadow>
662 else if( TokenComparison( XHTML_GLOW_TAG, tag.buffer, tag.length ) )
666 // Create a new glow run.
670 // Pop the top of the stack and set the number of characters of the run.
673 else if( TokenComparison( XHTML_OUTLINE_TAG, tag.buffer, tag.length ) )
677 // Create a new outline run.
681 // Pop the top of the stack and set the number of characters of the run.
683 } // <outline></outline>
684 else if (TokenComparison(XHTML_ITEM_TAG, tag.buffer, tag.length))
688 // Create an embedded item instance.
690 item.characterIndex = characterIndex;
691 ProcessEmbeddedItem(tag, item);
693 markupProcessData.items.PushBack(item);
695 // Insert white space character that will be replaced by the item.
696 markupProcessData.markupProcessedText.append( 1u, WHITE_SPACE );
700 } // end if( IsTag() )
701 else if( markupStringBuffer < markupStringEndBuffer )
703 unsigned char character = *markupStringBuffer;
704 const char* markupBuffer = markupStringBuffer;
705 unsigned char count = GetUtf8Length( character );
708 if( ( BACK_SLASH == character ) && ( markupStringBuffer + 1u < markupStringEndBuffer ) )
710 // Adding < , > or & special character.
711 const unsigned char nextCharacter = *( markupStringBuffer + 1u );
712 if( ( LESS_THAN == nextCharacter ) || ( GREATER_THAN == nextCharacter ) || ( AMPERSAND == nextCharacter ) )
714 character = nextCharacter;
715 ++markupStringBuffer;
717 count = GetUtf8Length( character );
718 markupBuffer = markupStringBuffer;
721 else // checking if conatins XHTML entity or not
723 const unsigned int len = GetXHTMLEntityLength( markupStringBuffer, markupStringEndBuffer);
725 // Parse markupStringTxt if it contains XHTML Entity between '&' and ';'
728 char* entityCode = NULL;
732 // Checking if XHTML Numeric Entity
733 if( HASH == *( markupBuffer + 1u ) )
735 entityCode = &utf8[0];
736 // markupBuffer is currently pointing to '&'. By adding 2u to markupBuffer it will point to numeric string by skipping "&#'
737 result = XHTMLNumericEntityToUtf8( ( markupBuffer + 2u ), entityCode );
739 else // Checking if XHTML Named Entity
741 entityCode = const_cast<char*> ( NamedEntityToUtf8( markupBuffer, len ) );
742 result = ( entityCode != NULL );
746 markupBuffer = entityCode; //utf8 text assigned to markupBuffer
747 character = markupBuffer[0];
751 DALI_LOG_INFO( gLogFilter, Debug::Verbose, "Not valid XHTML entity : (%.*s) \n", len, markupBuffer );
755 else // in case string conatins Start of XHTML Entity('&') but not its end character(';')
757 if( character == AMPERSAND )
760 DALI_LOG_INFO( gLogFilter, Debug::Verbose, "Not Well formed XHTML content \n" );
765 if( markupBuffer != NULL )
767 const unsigned char numberOfBytes = GetUtf8Length( character );
768 markupProcessData.markupProcessedText.push_back( character );
770 for( unsigned char i = 1u; i < numberOfBytes; ++i )
773 markupProcessData.markupProcessedText.push_back( *markupBuffer );
777 markupStringBuffer += count;
782 // Resize the model's vectors.
783 if( 0u == fontRunIndex )
785 markupProcessData.fontRuns.Clear();
789 markupProcessData.fontRuns.Resize( fontRunIndex );
792 if( 0u == colorRunIndex )
794 markupProcessData.colorRuns.Clear();
798 markupProcessData.colorRuns.Resize( colorRunIndex );
801 for( unsigned int i=0; i<colorRunIndex; ++i )
803 ColorRun& run = markupProcessData.colorRuns[i];
804 DALI_LOG_INFO( gLogFilter, Debug::Verbose, "run[%d] index: %d, length: %d, color %f,%f,%f,%f\n", i, run.characterRun.characterIndex, run.characterRun.numberOfCharacters, run.color.r, run.color.g, run.color.b, run.color.a );
812 } // namespace Toolkit