2 * Copyright (c) 2020 Samsung Electronics Co., Ltd.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <dali-toolkit/internal/text/markup-processor.h>
22 #include <climits> // for ULONG_MAX
24 #include <dali/integration-api/debug.h>
27 #include <dali-toolkit/internal/text/character-set-conversion.h>
28 #include <dali-toolkit/internal/text/markup-processor-color.h>
29 #include <dali-toolkit/internal/text/markup-processor-embedded-item.h>
30 #include <dali-toolkit/internal/text/markup-processor-font.h>
31 #include <dali-toolkit/internal/text/markup-processor-helper-functions.h>
32 #include <dali-toolkit/internal/text/xhtml-entities.h>
45 // HTML-ISH tag and attribute constants.
46 // Note they must be lower case in order to make the comparison to work
47 // as the parser converts all the read tags to lower case.
48 const std::string XHTML_COLOR_TAG("color");
49 const std::string XHTML_FONT_TAG("font");
50 const std::string XHTML_B_TAG("b");
51 const std::string XHTML_I_TAG("i");
52 const std::string XHTML_U_TAG("u");
53 const std::string XHTML_SHADOW_TAG("shadow");
54 const std::string XHTML_GLOW_TAG("glow");
55 const std::string XHTML_OUTLINE_TAG("outline");
56 const std::string XHTML_ITEM_TAG("item");
58 const char LESS_THAN = '<';
59 const char GREATER_THAN = '>';
60 const char EQUAL = '=';
61 const char QUOTATION_MARK = '\'';
62 const char SLASH = '/';
63 const char BACK_SLASH = '\\';
64 const char AMPERSAND = '&';
65 const char HASH = '#';
66 const char SEMI_COLON = ';';
67 const char CHAR_ARRAY_END = '\0';
68 const char HEX_CODE = 'x';
70 const char WHITE_SPACE = 0x20; // ASCII value of the white space.
72 // Range 1 0x0u < XHTML_DECIMAL_ENTITY_RANGE <= 0xD7FFu
73 // Range 2 0xE000u < XHTML_DECIMAL_ENTITY_RANGE <= 0xFFFDu
74 // Range 3 0x10000u < XHTML_DECIMAL_ENTITY_RANGE <= 0x10FFFFu
75 const unsigned long XHTML_DECIMAL_ENTITY_RANGE[] = { 0x0u, 0xD7FFu, 0xE000u, 0xFFFDu, 0x10000u, 0x10FFFFu };
77 const unsigned int MAX_NUM_OF_ATTRIBUTES = 5u; ///< The font tag has the 'family', 'size' 'weight', 'width' and 'slant' attrubutes.
78 const unsigned int DEFAULT_VECTOR_SIZE = 16u; ///< Default size of run vectors.
80 #if defined(DEBUG_ENABLED)
81 Debug::Filter* gLogFilter = Debug::Filter::New(Debug::NoLogging, true, "LOG_MARKUP_PROCESSOR");
85 * @brief Struct used to retrieve the style runs from the mark-up string.
89 typedef VectorBase::SizeType RunIndex;
91 Vector<RunIndex> stack; ///< Use a vector as a style stack. Stores the indices pointing where the run is stored inside the logical model.
92 unsigned int topIndex; ///< Points the top of the stack.
98 stack.Resize( DEFAULT_VECTOR_SIZE );
101 void Push( RunIndex index )
103 // Check if there is space inside the style stack.
104 const VectorBase::SizeType size = stack.Count();
105 if( topIndex >= size )
107 // Resize the style stack.
108 stack.Resize( 2u * size );
111 // Set the run index in the top of the stack.
112 *( stack.Begin() + topIndex ) = index;
114 // Reposition the pointer to the top of the stack.
120 // Pop the top of the stack.
122 return *( stack.Begin() + topIndex );
127 * @brief Initializes a font run description to its defaults.
129 * @param[in,out] fontRun The font description run to initialize.
131 void Initialize( FontDescriptionRun& fontRun )
133 fontRun.characterRun.characterIndex = 0u;
134 fontRun.characterRun.numberOfCharacters = 0u;
135 fontRun.familyName = NULL;
136 fontRun.familyLength = 0u;
137 fontRun.weight = TextAbstraction::FontWeight::NORMAL;
138 fontRun.width = TextAbstraction::FontWidth::NORMAL;
139 fontRun.slant = TextAbstraction::FontSlant::NORMAL;
141 fontRun.familyDefined = false;
142 fontRun.weightDefined = false;
143 fontRun.widthDefined = false;
144 fontRun.slantDefined = false;
145 fontRun.sizeDefined = false;
149 * @brief Initializes a color run description to its defaults.
151 * @param[in,out] colorRun The font description run to initialize.
153 void Initialize( ColorRun& colorRun )
155 colorRun.characterRun.characterIndex = 0u;
156 colorRun.characterRun.numberOfCharacters = 0u;
160 * @brief Splits the tag string into the tag name and its attributes.
162 * The attributes are stored in a vector in the tag.
164 * @param[in,out] tag The tag.
166 void ParseAttributes( Tag& tag )
168 if( tag.buffer == NULL )
173 tag.attributes.Resize( MAX_NUM_OF_ATTRIBUTES );
175 // Find first the tag name.
176 bool isQuotationOpen = false;
178 const char* tagBuffer = tag.buffer;
179 const char* const tagEndBuffer = tagBuffer + tag.length;
181 for( ; tagBuffer < tagEndBuffer; ++tagBuffer )
183 const char character = *tagBuffer;
184 if( WHITE_SPACE < character )
190 // Stops counting the length of the tag when a white space is found.
191 // @note a white space is the WHITE_SPACE character and anything below as 'tab', 'return' or 'control characters'.
195 SkipWhiteSpace( tagBuffer, tagEndBuffer );
197 // Find the attributes.
198 unsigned int attributeIndex = 0u;
199 const char* nameBuffer = NULL;
200 const char* valueBuffer = NULL;
201 Length nameLength = 0u;
202 Length valueLength = 0u;
204 bool addToNameValue = true;
205 Length numberOfWhiteSpace = 0u;
206 for( ; tagBuffer < tagEndBuffer; ++tagBuffer )
208 const char character = *tagBuffer;
209 if( ( WHITE_SPACE >= character ) && !isQuotationOpen )
211 if( NULL != valueBuffer )
213 // Remove white spaces at the end of the value.
214 valueLength -= numberOfWhiteSpace;
217 if( ( NULL != nameBuffer ) && ( NULL != valueBuffer ) )
219 // Every time a white space is found, a new attribute is created and stored in the attributes vector.
220 Attribute& attribute = *( tag.attributes.Begin() + attributeIndex );
223 attribute.nameBuffer = nameBuffer;
224 attribute.valueBuffer = valueBuffer;
225 attribute.nameLength = nameLength;
226 attribute.valueLength = valueLength;
233 addToNameValue = true; // next read characters will be added to the name.
236 else if( EQUAL == character ) // '='
238 addToNameValue = false; // next read characters will be added to the value.
239 SkipWhiteSpace( tagBuffer, tagEndBuffer );
241 else if( QUOTATION_MARK == character ) // '\''
243 // Do not add quotation marks to neither name nor value.
244 isQuotationOpen = !isQuotationOpen;
246 if( isQuotationOpen )
249 SkipWhiteSpace( tagBuffer, tagEndBuffer );
255 // Adds characters to the name or the value.
258 if( NULL == nameBuffer )
260 nameBuffer = tagBuffer;
266 if( isQuotationOpen )
268 if( WHITE_SPACE >= character )
270 ++numberOfWhiteSpace;
274 numberOfWhiteSpace = 0u;
277 if( NULL == valueBuffer )
279 valueBuffer = tagBuffer;
286 if( NULL != valueBuffer )
288 // Remove white spaces at the end of the value.
289 valueLength -= numberOfWhiteSpace;
292 if( ( NULL != nameBuffer ) && ( NULL != valueBuffer ) )
294 // Checks if the last attribute needs to be added.
295 Attribute& attribute = *( tag.attributes.Begin() + attributeIndex );
298 attribute.nameBuffer = nameBuffer;
299 attribute.valueBuffer = valueBuffer;
300 attribute.nameLength = nameLength;
301 attribute.valueLength = valueLength;
304 // Resize the vector of attributes.
305 tag.attributes.Resize( attributeIndex );
309 * @brief It parses a tag and its attributes if the given iterator @e it is pointing at a tag beginning.
311 * @param[in,out] markupStringBuffer The mark-up string buffer. It's a const iterator pointing the current character.
312 * @param[in] markupStringEndBuffer Pointer to one character after the end of the mark-up string buffer.
313 * @param[out] tag The tag with its attributes.
315 * @return @e true if the iterator @e it is pointing a mark-up tag. Otherwise @e false.
317 bool IsTag( const char*& markupStringBuffer,
318 const char* const markupStringEndBuffer,
322 bool isQuotationOpen = false;
323 bool attributesFound = false;
324 tag.isEndTag = false;
325 bool isPreviousLessThan = false;
326 bool isPreviousSlash = false;
328 const char character = *markupStringBuffer;
329 if( LESS_THAN == character ) // '<'
333 isPreviousLessThan = true;
335 // if the iterator is pointing to a '<' character, then check if it's a mark-up tag is needed.
336 ++markupStringBuffer;
337 if( markupStringBuffer < markupStringEndBuffer )
339 SkipWhiteSpace( markupStringBuffer, markupStringEndBuffer );
341 for( ; ( !isTag ) && ( markupStringBuffer < markupStringEndBuffer ); ++markupStringBuffer )
343 const char character = *markupStringBuffer;
345 if( !isQuotationOpen && ( SLASH == character ) ) // '/'
347 if (isPreviousLessThan)
353 // if the tag has a '/' it may be an end tag.
354 isPreviousSlash = true;
357 isPreviousLessThan = false;
358 if( ( markupStringBuffer + 1u < markupStringEndBuffer ) && ( WHITE_SPACE >= *( markupStringBuffer + 1u ) ) )
360 ++markupStringBuffer;
361 SkipWhiteSpace( markupStringBuffer, markupStringEndBuffer );
362 --markupStringBuffer;
365 else if( GREATER_THAN == character ) // '>'
373 isPreviousSlash = false;
374 isPreviousLessThan = false;
376 else if( QUOTATION_MARK == character )
378 isQuotationOpen = !isQuotationOpen;
381 isPreviousSlash = false;
382 isPreviousLessThan = false;
384 else if( WHITE_SPACE >= character ) // ' '
386 // If the tag contains white spaces then it may have attributes.
387 if( !isQuotationOpen )
389 attributesFound = true;
395 if( NULL == tag.buffer )
397 tag.buffer = markupStringBuffer;
400 // If it's not any of the 'special' characters then just add it to the tag string.
403 isPreviousSlash = false;
404 isPreviousLessThan = false;
409 // If the tag string has white spaces, then parse the attributes is needed.
410 if( attributesFound )
412 ParseAttributes( tag );
420 * @brief Returns length of XHTML entity by parsing the text. It also determines if it is XHTML entity or not.
422 * @param[in] markupStringBuffer The mark-up string buffer. It's a const iterator pointing the current character.
423 * @param[in] markupStringEndBuffer Pointing to end of mark-up string buffer.
425 * @return Length of markupText in case of XHTML entity otherwise return 0.
427 unsigned int GetXHTMLEntityLength( const char*& markupStringBuffer,
428 const char* const markupStringEndBuffer )
430 char character = *markupStringBuffer;
431 if( AMPERSAND == character ) // '&'
433 // if the iterator is pointing to a '&' character, then check for ';' to find end to XHTML entity.
434 ++markupStringBuffer;
435 if( markupStringBuffer < markupStringEndBuffer )
437 unsigned int len = 1u;
438 for( ; markupStringBuffer < markupStringEndBuffer ; ++markupStringBuffer )
440 character = *markupStringBuffer;
442 if( SEMI_COLON == character ) // ';'
444 // found end of XHTML entity
445 ++markupStringBuffer;
448 else if( ( AMPERSAND == character ) || ( BACK_SLASH == character ) || ( LESS_THAN == character ))
459 * @brief It parses a XHTML string which has hex/decimal entity and fill its corresponging utf-8 string.
461 * @param[in] markupText The mark-up text buffer.
462 * @param[out] utf-8 text Corresponding to markup Text
464 * @return true if string is successfully parsed otherwise false
466 bool XHTMLNumericEntityToUtf8 ( const char* markupText, char* utf8 )
470 if( NULL != markupText )
474 // check if hex or decimal entity
475 if( ( CHAR_ARRAY_END != *markupText ) && ( HEX_CODE == *markupText ) )
482 unsigned long l = strtoul( markupText, &end, ( isHex ? 16 : 10 ) ); // l contains UTF-32 code in case of correct XHTML entity
484 // check for valid XHTML numeric entities (between '#' or "#x" and ';')
485 if( ( l > 0 ) && ( l < ULONG_MAX ) && ( *end == SEMI_COLON ) ) // in case wrong XHTML entity is set eg. "abcdefs;" in that case *end will be 'a'
487 /* characters XML 1.1 permits */
488 if( ( ( XHTML_DECIMAL_ENTITY_RANGE[0] < l ) && ( l <= XHTML_DECIMAL_ENTITY_RANGE[1] ) ) ||
489 ( ( XHTML_DECIMAL_ENTITY_RANGE[2] <= l ) && ( l <= XHTML_DECIMAL_ENTITY_RANGE[3] ) ) ||
490 ( ( XHTML_DECIMAL_ENTITY_RANGE[4] <= l ) && ( l <= XHTML_DECIMAL_ENTITY_RANGE[5] ) ) )
492 // Convert UTF32 code to UTF8
493 Utf32ToUtf8( reinterpret_cast<const uint32_t* const>( &l ), 1, reinterpret_cast<uint8_t*>( utf8 ) );
502 * @brief Processes a particular tag for the required run (color-run or font-run).
504 * @tparam RunType Whether ColorRun or FontDescriptionRun
506 * @param[in/out] runsContainer The container containing all the runs
507 * @param[in/out] styleStack The style stack
508 * @param[in] tag The tag we are currently processing
509 * @param[in] characterIndex The current character index
510 * @param[in/out] runIndex The run index
511 * @param[in/out] tagReference The tagReference we should increment/decrement
512 * @param[in] parameterSettingFunction This function will be called to set run specific parameters
514 template <typename RunType>
515 void ProcessTagForRun(
516 Vector<RunType>& runsContainer,
517 StyleStack& styleStack,
519 const CharacterIndex characterIndex,
520 StyleStack::RunIndex& runIndex,
522 std::function<void (const Tag&, RunType&)> parameterSettingFunction)
530 // Fill the run with the parameters.
531 run.characterRun.characterIndex = characterIndex;
532 parameterSettingFunction(tag, run);
534 // Push the run in the logical model.
535 runsContainer.PushBack(run);
537 // Push the index of the run into the stack.
538 styleStack.Push(runIndex);
540 // Point the next free run.
543 // Increase reference
548 if( tagReference > 0 )
550 // Pop the top of the stack and set the number of characters of the run.
551 RunType& run = *( runsContainer.Begin() + styleStack.Pop() );
552 run.characterRun.numberOfCharacters = characterIndex - run.characterRun.characterIndex;
559 * @brief Processes the item tag
561 * @param[in/out] markupProcessData The markup process data
562 * @param[in] tag The current tag
563 * @param[in/out] characterIndex The current character index
566 MarkupProcessData& markupProcessData,
568 CharacterIndex& characterIndex)
572 // Create an embedded item instance.
574 item.characterIndex = characterIndex;
575 ProcessEmbeddedItem(tag, item);
577 markupProcessData.items.PushBack(item);
579 // Insert white space character that will be replaced by the item.
580 markupProcessData.markupProcessedText.append( 1u, WHITE_SPACE );
586 * @brief Resizes the model's vectors
588 * @param[in/out] markupProcessData The markup process data
589 * @param[in] fontRunIndex The font run index
590 * @param[in] colorRunIndex The color run index
592 void ResizeModelVectors(MarkupProcessData& markupProcessData, const StyleStack::RunIndex fontRunIndex, const StyleStack::RunIndex colorRunIndex)
594 markupProcessData.fontRuns.Resize( fontRunIndex );
595 markupProcessData.colorRuns.Resize( colorRunIndex );
598 for( unsigned int i=0; i<colorRunIndex; ++i )
600 ColorRun& run = markupProcessData.colorRuns[i];
601 DALI_LOG_INFO( gLogFilter, Debug::Verbose, "run[%d] index: %d, length: %d, color %f,%f,%f,%f\n", i, run.characterRun.characterIndex, run.characterRun.numberOfCharacters, run.color.r, run.color.g, run.color.b, run.color.a );
607 * @brief Processes the markup string buffer
609 * @param[in/out] markupProcessData The markup process data
610 * @param[in/out] markupStringBuffer The markup string buffer pointer
611 * @param[in] markupStringEndBuffer The markup string end buffer pointer
612 * @param[in/out] characterIndex The current character index
614 void ProcessMarkupStringBuffer(
615 MarkupProcessData& markupProcessData,
616 const char*& markupStringBuffer,
617 const char* const markupStringEndBuffer,
618 CharacterIndex& characterIndex)
620 unsigned char character = *markupStringBuffer;
621 const char* markupBuffer = markupStringBuffer;
622 unsigned char count = GetUtf8Length( character );
625 if( ( BACK_SLASH == character ) && ( markupStringBuffer + 1u < markupStringEndBuffer ) )
627 // Adding < , > or & special character.
628 const unsigned char nextCharacter = *( markupStringBuffer + 1u );
629 if( ( LESS_THAN == nextCharacter ) || ( GREATER_THAN == nextCharacter ) || ( AMPERSAND == nextCharacter ) )
631 character = nextCharacter;
632 ++markupStringBuffer;
634 count = GetUtf8Length( character );
635 markupBuffer = markupStringBuffer;
638 else // checking if contains XHTML entity or not
640 const unsigned int len = GetXHTMLEntityLength( markupStringBuffer, markupStringEndBuffer);
642 // Parse markupStringTxt if it contains XHTML Entity between '&' and ';'
645 char* entityCode = NULL;
649 // Checking if XHTML Numeric Entity
650 if( HASH == *( markupBuffer + 1u ) )
652 entityCode = &utf8[0];
653 // markupBuffer is currently pointing to '&'. By adding 2u to markupBuffer it will point to numeric string by skipping "&#'
654 result = XHTMLNumericEntityToUtf8( ( markupBuffer + 2u ), entityCode );
656 else // Checking if XHTML Named Entity
658 entityCode = const_cast<char*> ( NamedEntityToUtf8( markupBuffer, len ) );
659 result = ( entityCode != NULL );
663 markupBuffer = entityCode; //utf8 text assigned to markupBuffer
664 character = markupBuffer[0];
668 DALI_LOG_INFO( gLogFilter, Debug::Verbose, "Not valid XHTML entity : (%.*s) \n", len, markupBuffer );
672 else // in case string conatins Start of XHTML Entity('&') but not its end character(';')
674 if( character == AMPERSAND )
677 DALI_LOG_INFO( gLogFilter, Debug::Verbose, "Not Well formed XHTML content \n" );
682 if( markupBuffer != NULL )
684 const unsigned char numberOfBytes = GetUtf8Length( character );
685 markupProcessData.markupProcessedText.push_back( character );
687 for( unsigned char i = 1u; i < numberOfBytes; ++i )
690 markupProcessData.markupProcessedText.push_back( *markupBuffer );
694 markupStringBuffer += count;
700 void ProcessMarkupString( const std::string& markupString, MarkupProcessData& markupProcessData )
702 DALI_LOG_INFO( gLogFilter, Debug::Verbose, "markupString: %s\n", markupString.c_str() );
704 // Reserve space for the plain text.
705 const Length markupStringSize = markupString.size();
706 markupProcessData.markupProcessedText.reserve( markupStringSize );
708 // Stores a struct with the index to the first character of the run, the type of run and its parameters.
709 StyleStack styleStack;
711 // Points the next free position in the vector of runs.
712 StyleStack::RunIndex colorRunIndex = 0u;
713 StyleStack::RunIndex fontRunIndex = 0u;
715 // check tag reference
716 int colorTagReference = 0u;
717 int fontTagReference = 0u;
718 int iTagReference = 0u;
719 int bTagReference = 0u;
721 // Give an initial default value to the model's vectors.
722 markupProcessData.colorRuns.Reserve( DEFAULT_VECTOR_SIZE );
723 markupProcessData.fontRuns.Reserve( DEFAULT_VECTOR_SIZE );
725 // Get the mark-up string buffer.
726 const char* markupStringBuffer = markupString.c_str();
727 const char* const markupStringEndBuffer = markupStringBuffer + markupStringSize;
730 CharacterIndex characterIndex = 0u;
731 for( ; markupStringBuffer < markupStringEndBuffer; )
733 tag.attributes.Clear();
734 if( IsTag( markupStringBuffer,
735 markupStringEndBuffer,
738 if( TokenComparison( XHTML_COLOR_TAG, tag.buffer, tag.length ) )
740 ProcessTagForRun<ColorRun>(
741 markupProcessData.colorRuns, styleStack, tag, characterIndex, colorRunIndex, colorTagReference,
742 [] (const Tag& tag, ColorRun& run) { ProcessColorTag( tag, run ); });
744 else if( TokenComparison( XHTML_I_TAG, tag.buffer, tag.length ) )
746 ProcessTagForRun<FontDescriptionRun>(
747 markupProcessData.fontRuns, styleStack, tag, characterIndex, fontRunIndex, iTagReference,
748 [] (const Tag&, FontDescriptionRun& fontRun)
750 fontRun.slant = TextAbstraction::FontSlant::ITALIC;
751 fontRun.slantDefined = true;
754 else if( TokenComparison( XHTML_U_TAG, tag.buffer, tag.length ) )
756 // TODO: If !tag.isEndTag, then create a new underline run.
757 // else Pop the top of the stack and set the number of characters of the run.
759 else if( TokenComparison( XHTML_B_TAG, tag.buffer, tag.length ) )
761 ProcessTagForRun<FontDescriptionRun>(
762 markupProcessData.fontRuns, styleStack, tag, characterIndex, fontRunIndex, bTagReference,
763 [] (const Tag&, FontDescriptionRun& fontRun)
765 fontRun.weight = TextAbstraction::FontWeight::BOLD;
766 fontRun.weightDefined = true;
769 else if( TokenComparison( XHTML_FONT_TAG, tag.buffer, tag.length ) )
771 ProcessTagForRun<FontDescriptionRun>(
772 markupProcessData.fontRuns, styleStack, tag, characterIndex, fontRunIndex, fontTagReference,
773 [] (const Tag& tag, FontDescriptionRun& fontRun) { ProcessFontTag( tag, fontRun ); });
775 else if( TokenComparison( XHTML_SHADOW_TAG, tag.buffer, tag.length ) )
777 // TODO: If !tag.isEndTag, then create a new shadow run.
778 // else Pop the top of the stack and set the number of characters of the run.
779 } // <shadow></shadow>
780 else if( TokenComparison( XHTML_GLOW_TAG, tag.buffer, tag.length ) )
782 // TODO: If !tag.isEndTag, then create a new glow run.
783 // else Pop the top of the stack and set the number of characters of the run.
785 else if( TokenComparison( XHTML_OUTLINE_TAG, tag.buffer, tag.length ) )
787 // TODO: If !tag.isEndTag, then create a new outline run.
788 // else Pop the top of the stack and set the number of characters of the run.
789 } // <outline></outline>
790 else if (TokenComparison(XHTML_ITEM_TAG, tag.buffer, tag.length))
792 ProcessItemTag(markupProcessData, tag, characterIndex);
794 } // end if( IsTag() )
795 else if( markupStringBuffer < markupStringEndBuffer )
797 ProcessMarkupStringBuffer(markupProcessData, markupStringBuffer, markupStringEndBuffer, characterIndex);
801 // Resize the model's vectors.
802 ResizeModelVectors(markupProcessData, fontRunIndex, colorRunIndex);
807 } // namespace Toolkit