2 * Copyright (c) 2021 Samsung Electronics Co., Ltd.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <dali-toolkit/internal/text/markup-processor.h>
22 #include <dali/integration-api/debug.h>
23 #include <climits> // for ULONG_MAX
27 #include <dali-toolkit/internal/text/character-set-conversion.h>
28 #include <dali-toolkit/internal/text/markup-processor-color.h>
29 #include <dali-toolkit/internal/text/markup-processor-embedded-item.h>
30 #include <dali-toolkit/internal/text/markup-processor-font.h>
31 #include <dali-toolkit/internal/text/markup-processor-helper-functions.h>
32 #include <dali-toolkit/internal/text/xhtml-entities.h>
42 // HTML-ISH tag and attribute constants.
43 // Note they must be lower case in order to make the comparison to work
44 // as the parser converts all the read tags to lower case.
45 const std::string XHTML_COLOR_TAG("color");
46 const std::string XHTML_FONT_TAG("font");
47 const std::string XHTML_B_TAG("b");
48 const std::string XHTML_I_TAG("i");
49 const std::string XHTML_U_TAG("u");
50 const std::string XHTML_SHADOW_TAG("shadow");
51 const std::string XHTML_GLOW_TAG("glow");
52 const std::string XHTML_OUTLINE_TAG("outline");
53 const std::string XHTML_ITEM_TAG("item");
55 const char LESS_THAN = '<';
56 const char GREATER_THAN = '>';
57 const char EQUAL = '=';
58 const char QUOTATION_MARK = '\'';
59 const char SLASH = '/';
60 const char BACK_SLASH = '\\';
61 const char AMPERSAND = '&';
62 const char HASH = '#';
63 const char SEMI_COLON = ';';
64 const char CHAR_ARRAY_END = '\0';
65 const char HEX_CODE = 'x';
67 const char WHITE_SPACE = 0x20; // ASCII value of the white space.
69 // Range 1 0x0u < XHTML_DECIMAL_ENTITY_RANGE <= 0xD7FFu
70 // Range 2 0xE000u < XHTML_DECIMAL_ENTITY_RANGE <= 0xFFFDu
71 // Range 3 0x10000u < XHTML_DECIMAL_ENTITY_RANGE <= 0x10FFFFu
72 const unsigned long XHTML_DECIMAL_ENTITY_RANGE[] = {0x0u, 0xD7FFu, 0xE000u, 0xFFFDu, 0x10000u, 0x10FFFFu};
74 const unsigned int MAX_NUM_OF_ATTRIBUTES = 5u; ///< The font tag has the 'family', 'size' 'weight', 'width' and 'slant' attrubutes.
75 const unsigned int DEFAULT_VECTOR_SIZE = 16u; ///< Default size of run vectors.
77 #if defined(DEBUG_ENABLED)
78 Debug::Filter* gLogFilter = Debug::Filter::New(Debug::NoLogging, true, "LOG_MARKUP_PROCESSOR");
82 * @brief Struct used to retrieve the style runs from the mark-up string.
86 typedef VectorBase::SizeType RunIndex;
88 Vector<RunIndex> stack; ///< Use a vector as a style stack. Stores the indices pointing where the run is stored inside the logical model.
89 unsigned int topIndex; ///< Points the top of the stack.
95 stack.Resize(DEFAULT_VECTOR_SIZE);
98 void Push(RunIndex index)
100 // Check if there is space inside the style stack.
101 const VectorBase::SizeType size = stack.Count();
104 // Resize the style stack.
105 stack.Resize(2u * size);
108 // Set the run index in the top of the stack.
109 *(stack.Begin() + topIndex) = index;
111 // Reposition the pointer to the top of the stack.
117 // Pop the top of the stack.
119 return *(stack.Begin() + topIndex);
124 * @brief Initializes a font run description to its defaults.
126 * @param[in,out] fontRun The font description run to initialize.
128 void Initialize(FontDescriptionRun& fontRun)
130 fontRun.characterRun.characterIndex = 0u;
131 fontRun.characterRun.numberOfCharacters = 0u;
132 fontRun.familyName = NULL;
133 fontRun.familyLength = 0u;
134 fontRun.weight = TextAbstraction::FontWeight::NORMAL;
135 fontRun.width = TextAbstraction::FontWidth::NORMAL;
136 fontRun.slant = TextAbstraction::FontSlant::NORMAL;
138 fontRun.familyDefined = false;
139 fontRun.weightDefined = false;
140 fontRun.widthDefined = false;
141 fontRun.slantDefined = false;
142 fontRun.sizeDefined = false;
146 * @brief Initializes a color run description to its defaults.
148 * @param[in,out] colorRun The font description run to initialize.
150 void Initialize(ColorRun& colorRun)
152 colorRun.characterRun.characterIndex = 0u;
153 colorRun.characterRun.numberOfCharacters = 0u;
157 * @brief Splits the tag string into the tag name and its attributes.
159 * The attributes are stored in a vector in the tag.
161 * @param[in,out] tag The tag.
163 void ParseAttributes(Tag& tag)
165 if(tag.buffer == NULL)
170 tag.attributes.Resize(MAX_NUM_OF_ATTRIBUTES);
172 // Find first the tag name.
173 bool isQuotationOpen = false;
175 const char* tagBuffer = tag.buffer;
176 const char* const tagEndBuffer = tagBuffer + tag.length;
178 for(; tagBuffer < tagEndBuffer; ++tagBuffer)
180 const char character = *tagBuffer;
181 if(WHITE_SPACE < character)
187 // Stops counting the length of the tag when a white space is found.
188 // @note a white space is the WHITE_SPACE character and anything below as 'tab', 'return' or 'control characters'.
192 SkipWhiteSpace(tagBuffer, tagEndBuffer);
194 // Find the attributes.
195 unsigned int attributeIndex = 0u;
196 const char* nameBuffer = NULL;
197 const char* valueBuffer = NULL;
198 Length nameLength = 0u;
199 Length valueLength = 0u;
201 bool addToNameValue = true;
202 Length numberOfWhiteSpace = 0u;
203 for(; tagBuffer < tagEndBuffer; ++tagBuffer)
205 const char character = *tagBuffer;
206 if((WHITE_SPACE >= character) && !isQuotationOpen)
208 if(NULL != valueBuffer)
210 // Remove white spaces at the end of the value.
211 valueLength -= numberOfWhiteSpace;
214 if((NULL != nameBuffer) && (NULL != valueBuffer))
216 // Every time a white space is found, a new attribute is created and stored in the attributes vector.
217 Attribute& attribute = *(tag.attributes.Begin() + attributeIndex);
220 attribute.nameBuffer = nameBuffer;
221 attribute.valueBuffer = valueBuffer;
222 attribute.nameLength = nameLength;
223 attribute.valueLength = valueLength;
230 addToNameValue = true; // next read characters will be added to the name.
233 else if(EQUAL == character) // '='
235 addToNameValue = false; // next read characters will be added to the value.
236 SkipWhiteSpace(tagBuffer, tagEndBuffer);
238 else if(QUOTATION_MARK == character) // '\''
240 // Do not add quotation marks to neither name nor value.
241 isQuotationOpen = !isQuotationOpen;
246 SkipWhiteSpace(tagBuffer, tagEndBuffer);
252 // Adds characters to the name or the value.
255 if(NULL == nameBuffer)
257 nameBuffer = tagBuffer;
265 if(WHITE_SPACE >= character)
267 ++numberOfWhiteSpace;
271 numberOfWhiteSpace = 0u;
274 if(NULL == valueBuffer)
276 valueBuffer = tagBuffer;
283 if(NULL != valueBuffer)
285 // Remove white spaces at the end of the value.
286 valueLength -= numberOfWhiteSpace;
289 if((NULL != nameBuffer) && (NULL != valueBuffer))
291 // Checks if the last attribute needs to be added.
292 Attribute& attribute = *(tag.attributes.Begin() + attributeIndex);
295 attribute.nameBuffer = nameBuffer;
296 attribute.valueBuffer = valueBuffer;
297 attribute.nameLength = nameLength;
298 attribute.valueLength = valueLength;
301 // Resize the vector of attributes.
302 tag.attributes.Resize(attributeIndex);
306 * @brief It parses a tag and its attributes if the given iterator @e it is pointing at a tag beginning.
308 * @param[in,out] markupStringBuffer The mark-up string buffer. It's a const iterator pointing the current character.
309 * @param[in] markupStringEndBuffer Pointer to one character after the end of the mark-up string buffer.
310 * @param[out] tag The tag with its attributes.
312 * @return @e true if the iterator @e it is pointing a mark-up tag. Otherwise @e false.
314 bool IsTag(const char*& markupStringBuffer,
315 const char* const markupStringEndBuffer,
319 bool isQuotationOpen = false;
320 bool attributesFound = false;
321 tag.isEndTag = false;
322 bool isPreviousLessThan = false;
323 bool isPreviousSlash = false;
325 const char character = *markupStringBuffer;
326 if(LESS_THAN == character) // '<'
330 isPreviousLessThan = true;
332 // if the iterator is pointing to a '<' character, then check if it's a mark-up tag is needed.
333 ++markupStringBuffer;
334 if(markupStringBuffer < markupStringEndBuffer)
336 SkipWhiteSpace(markupStringBuffer, markupStringEndBuffer);
338 for(; (!isTag) && (markupStringBuffer < markupStringEndBuffer); ++markupStringBuffer)
340 const char character = *markupStringBuffer;
342 if(!isQuotationOpen && (SLASH == character)) // '/'
344 if(isPreviousLessThan)
350 // if the tag has a '/' it may be an end tag.
351 isPreviousSlash = true;
354 isPreviousLessThan = false;
355 if((markupStringBuffer + 1u < markupStringEndBuffer) && (WHITE_SPACE >= *(markupStringBuffer + 1u)))
357 ++markupStringBuffer;
358 SkipWhiteSpace(markupStringBuffer, markupStringEndBuffer);
359 --markupStringBuffer;
362 else if(GREATER_THAN == character) // '>'
370 isPreviousSlash = false;
371 isPreviousLessThan = false;
373 else if(QUOTATION_MARK == character)
375 isQuotationOpen = !isQuotationOpen;
378 isPreviousSlash = false;
379 isPreviousLessThan = false;
381 else if(WHITE_SPACE >= character) // ' '
383 // If the tag contains white spaces then it may have attributes.
386 attributesFound = true;
392 if(NULL == tag.buffer)
394 tag.buffer = markupStringBuffer;
397 // If it's not any of the 'special' characters then just add it to the tag string.
400 isPreviousSlash = false;
401 isPreviousLessThan = false;
406 // If the tag string has white spaces, then parse the attributes is needed.
409 ParseAttributes(tag);
417 * @brief Returns length of XHTML entity by parsing the text. It also determines if it is XHTML entity or not.
419 * @param[in] markupStringBuffer The mark-up string buffer. It's a const iterator pointing the current character.
420 * @param[in] markupStringEndBuffer Pointing to end of mark-up string buffer.
422 * @return Length of markupText in case of XHTML entity otherwise return 0.
424 unsigned int GetXHTMLEntityLength(const char*& markupStringBuffer,
425 const char* const markupStringEndBuffer)
427 char character = *markupStringBuffer;
428 if(AMPERSAND == character) // '&'
430 // if the iterator is pointing to a '&' character, then check for ';' to find end to XHTML entity.
431 ++markupStringBuffer;
432 if(markupStringBuffer < markupStringEndBuffer)
434 unsigned int len = 1u;
435 for(; markupStringBuffer < markupStringEndBuffer; ++markupStringBuffer)
437 character = *markupStringBuffer;
439 if(SEMI_COLON == character) // ';'
441 // found end of XHTML entity
442 ++markupStringBuffer;
445 else if((AMPERSAND == character) || (BACK_SLASH == character) || (LESS_THAN == character))
456 * @brief It parses a XHTML string which has hex/decimal entity and fill its corresponging utf-8 string.
458 * @param[in] markupText The mark-up text buffer.
459 * @param[out] utf-8 text Corresponding to markup Text
461 * @return true if string is successfully parsed otherwise false
463 bool XHTMLNumericEntityToUtf8(const char* markupText, char* utf8)
467 if(NULL != markupText)
471 // check if hex or decimal entity
472 if((CHAR_ARRAY_END != *markupText) && (HEX_CODE == *markupText))
479 unsigned long l = strtoul(markupText, &end, (isHex ? 16 : 10)); // l contains UTF-32 code in case of correct XHTML entity
481 // check for valid XHTML numeric entities (between '#' or "#x" and ';')
482 if((l > 0) && (l < ULONG_MAX) && (*end == SEMI_COLON)) // in case wrong XHTML entity is set eg. "abcdefs;" in that case *end will be 'a'
484 /* characters XML 1.1 permits */
485 if(((XHTML_DECIMAL_ENTITY_RANGE[0] < l) && (l <= XHTML_DECIMAL_ENTITY_RANGE[1])) ||
486 ((XHTML_DECIMAL_ENTITY_RANGE[2] <= l) && (l <= XHTML_DECIMAL_ENTITY_RANGE[3])) ||
487 ((XHTML_DECIMAL_ENTITY_RANGE[4] <= l) && (l <= XHTML_DECIMAL_ENTITY_RANGE[5])))
489 // Convert UTF32 code to UTF8
490 Utf32ToUtf8(reinterpret_cast<const uint32_t* const>(&l), 1, reinterpret_cast<uint8_t*>(utf8));
499 * @brief Processes a particular tag for the required run (color-run or font-run).
501 * @tparam RunType Whether ColorRun or FontDescriptionRun
503 * @param[in/out] runsContainer The container containing all the runs
504 * @param[in/out] styleStack The style stack
505 * @param[in] tag The tag we are currently processing
506 * @param[in] characterIndex The current character index
507 * @param[in/out] runIndex The run index
508 * @param[in/out] tagReference The tagReference we should increment/decrement
509 * @param[in] parameterSettingFunction This function will be called to set run specific parameters
511 template<typename RunType>
512 void ProcessTagForRun(
513 Vector<RunType>& runsContainer,
514 StyleStack& styleStack,
516 const CharacterIndex characterIndex,
517 StyleStack::RunIndex& runIndex,
519 std::function<void(const Tag&, RunType&)> parameterSettingFunction)
527 // Fill the run with the parameters.
528 run.characterRun.characterIndex = characterIndex;
529 parameterSettingFunction(tag, run);
531 // Push the run in the logical model.
532 runsContainer.PushBack(run);
534 // Push the index of the run into the stack.
535 styleStack.Push(runIndex);
537 // Point the next free run.
540 // Increase reference
547 // Pop the top of the stack and set the number of characters of the run.
548 RunType& run = *(runsContainer.Begin() + styleStack.Pop());
549 run.characterRun.numberOfCharacters = characterIndex - run.characterRun.characterIndex;
556 * @brief Processes the item tag
558 * @param[in/out] markupProcessData The markup process data
559 * @param[in] tag The current tag
560 * @param[in/out] characterIndex The current character index
563 MarkupProcessData& markupProcessData,
565 CharacterIndex& characterIndex)
569 // Create an embedded item instance.
571 item.characterIndex = characterIndex;
572 ProcessEmbeddedItem(tag, item);
574 markupProcessData.items.PushBack(item);
576 // Insert white space character that will be replaced by the item.
577 markupProcessData.markupProcessedText.append(1u, WHITE_SPACE);
583 * @brief Resizes the model's vectors
585 * @param[in/out] markupProcessData The markup process data
586 * @param[in] fontRunIndex The font run index
587 * @param[in] colorRunIndex The color run index
589 void ResizeModelVectors(MarkupProcessData& markupProcessData, const StyleStack::RunIndex fontRunIndex, const StyleStack::RunIndex colorRunIndex)
591 markupProcessData.fontRuns.Resize(fontRunIndex);
592 markupProcessData.colorRuns.Resize(colorRunIndex);
595 for(unsigned int i = 0; i < colorRunIndex; ++i)
597 ColorRun& run = markupProcessData.colorRuns[i];
598 DALI_LOG_INFO(gLogFilter, Debug::Verbose, "run[%d] index: %d, length: %d, color %f,%f,%f,%f\n", i, run.characterRun.characterIndex, run.characterRun.numberOfCharacters, run.color.r, run.color.g, run.color.b, run.color.a);
604 * @brief Processes the markup string buffer
606 * @param[in/out] markupProcessData The markup process data
607 * @param[in/out] markupStringBuffer The markup string buffer pointer
608 * @param[in] markupStringEndBuffer The markup string end buffer pointer
609 * @param[in/out] characterIndex The current character index
611 void ProcessMarkupStringBuffer(
612 MarkupProcessData& markupProcessData,
613 const char*& markupStringBuffer,
614 const char* const markupStringEndBuffer,
615 CharacterIndex& characterIndex)
617 unsigned char character = *markupStringBuffer;
618 const char* markupBuffer = markupStringBuffer;
619 unsigned char count = GetUtf8Length(character);
622 if((BACK_SLASH == character) && (markupStringBuffer + 1u < markupStringEndBuffer))
624 // Adding < , > or & special character.
625 const unsigned char nextCharacter = *(markupStringBuffer + 1u);
626 if((LESS_THAN == nextCharacter) || (GREATER_THAN == nextCharacter) || (AMPERSAND == nextCharacter))
628 character = nextCharacter;
629 ++markupStringBuffer;
631 count = GetUtf8Length(character);
632 markupBuffer = markupStringBuffer;
635 else // checking if contains XHTML entity or not
637 const unsigned int len = GetXHTMLEntityLength(markupStringBuffer, markupStringEndBuffer);
639 // Parse markupStringTxt if it contains XHTML Entity between '&' and ';'
642 char* entityCode = NULL;
646 // Checking if XHTML Numeric Entity
647 if(HASH == *(markupBuffer + 1u))
649 entityCode = &utf8[0];
650 // markupBuffer is currently pointing to '&'. By adding 2u to markupBuffer it will point to numeric string by skipping "&#'
651 result = XHTMLNumericEntityToUtf8((markupBuffer + 2u), entityCode);
653 else // Checking if XHTML Named Entity
655 entityCode = const_cast<char*>(NamedEntityToUtf8(markupBuffer, len));
656 result = (entityCode != NULL);
660 markupBuffer = entityCode; //utf8 text assigned to markupBuffer
661 character = markupBuffer[0];
665 DALI_LOG_INFO(gLogFilter, Debug::Verbose, "Not valid XHTML entity : (%.*s) \n", len, markupBuffer);
669 else // in case string conatins Start of XHTML Entity('&') but not its end character(';')
671 if(character == AMPERSAND)
674 DALI_LOG_INFO(gLogFilter, Debug::Verbose, "Not Well formed XHTML content \n");
679 if(markupBuffer != NULL)
681 const unsigned char numberOfBytes = GetUtf8Length(character);
682 markupProcessData.markupProcessedText.push_back(character);
684 for(unsigned char i = 1u; i < numberOfBytes; ++i)
687 markupProcessData.markupProcessedText.push_back(*markupBuffer);
691 markupStringBuffer += count;
697 void ProcessMarkupString(const std::string& markupString, MarkupProcessData& markupProcessData)
699 DALI_LOG_INFO(gLogFilter, Debug::Verbose, "markupString: %s\n", markupString.c_str());
701 // Reserve space for the plain text.
702 const Length markupStringSize = markupString.size();
703 markupProcessData.markupProcessedText.reserve(markupStringSize);
705 // Stores a struct with the index to the first character of the run, the type of run and its parameters.
706 StyleStack styleStack;
708 // Points the next free position in the vector of runs.
709 StyleStack::RunIndex colorRunIndex = 0u;
710 StyleStack::RunIndex fontRunIndex = 0u;
712 // check tag reference
713 int colorTagReference = 0u;
714 int fontTagReference = 0u;
715 int iTagReference = 0u;
716 int bTagReference = 0u;
718 // Give an initial default value to the model's vectors.
719 markupProcessData.colorRuns.Reserve(DEFAULT_VECTOR_SIZE);
720 markupProcessData.fontRuns.Reserve(DEFAULT_VECTOR_SIZE);
722 // Get the mark-up string buffer.
723 const char* markupStringBuffer = markupString.c_str();
724 const char* const markupStringEndBuffer = markupStringBuffer + markupStringSize;
727 CharacterIndex characterIndex = 0u;
728 for(; markupStringBuffer < markupStringEndBuffer;)
730 tag.attributes.Clear();
731 if(IsTag(markupStringBuffer,
732 markupStringEndBuffer,
735 if(TokenComparison(XHTML_COLOR_TAG, tag.buffer, tag.length))
737 ProcessTagForRun<ColorRun>(
738 markupProcessData.colorRuns, styleStack, tag, characterIndex, colorRunIndex, colorTagReference, [](const Tag& tag, ColorRun& run) { ProcessColorTag(tag, run); });
740 else if(TokenComparison(XHTML_I_TAG, tag.buffer, tag.length))
742 ProcessTagForRun<FontDescriptionRun>(
743 markupProcessData.fontRuns, styleStack, tag, characterIndex, fontRunIndex, iTagReference, [](const Tag&, FontDescriptionRun& fontRun) {
744 fontRun.slant = TextAbstraction::FontSlant::ITALIC;
745 fontRun.slantDefined = true;
748 else if(TokenComparison(XHTML_U_TAG, tag.buffer, tag.length))
750 // TODO: If !tag.isEndTag, then create a new underline run.
751 // else Pop the top of the stack and set the number of characters of the run.
753 else if(TokenComparison(XHTML_B_TAG, tag.buffer, tag.length))
755 ProcessTagForRun<FontDescriptionRun>(
756 markupProcessData.fontRuns, styleStack, tag, characterIndex, fontRunIndex, bTagReference, [](const Tag&, FontDescriptionRun& fontRun) {
757 fontRun.weight = TextAbstraction::FontWeight::BOLD;
758 fontRun.weightDefined = true;
761 else if(TokenComparison(XHTML_FONT_TAG, tag.buffer, tag.length))
763 ProcessTagForRun<FontDescriptionRun>(
764 markupProcessData.fontRuns, styleStack, tag, characterIndex, fontRunIndex, fontTagReference, [](const Tag& tag, FontDescriptionRun& fontRun) { ProcessFontTag(tag, fontRun); });
766 else if(TokenComparison(XHTML_SHADOW_TAG, tag.buffer, tag.length))
768 // TODO: If !tag.isEndTag, then create a new shadow run.
769 // else Pop the top of the stack and set the number of characters of the run.
770 } // <shadow></shadow>
771 else if(TokenComparison(XHTML_GLOW_TAG, tag.buffer, tag.length))
773 // TODO: If !tag.isEndTag, then create a new glow run.
774 // else Pop the top of the stack and set the number of characters of the run.
776 else if(TokenComparison(XHTML_OUTLINE_TAG, tag.buffer, tag.length))
778 // TODO: If !tag.isEndTag, then create a new outline run.
779 // else Pop the top of the stack and set the number of characters of the run.
780 } // <outline></outline>
781 else if(TokenComparison(XHTML_ITEM_TAG, tag.buffer, tag.length))
783 ProcessItemTag(markupProcessData, tag, characterIndex);
785 } // end if( IsTag() )
786 else if(markupStringBuffer < markupStringEndBuffer)
788 ProcessMarkupStringBuffer(markupProcessData, markupStringBuffer, markupStringEndBuffer, characterIndex);
792 // Resize the model's vectors.
793 ResizeModelVectors(markupProcessData, fontRunIndex, colorRunIndex);
798 } // namespace Toolkit