2 * Copyright (c) 2015 Samsung Electronics Co., Ltd.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <dali-toolkit/internal/text/markup-processor.h>
22 #include <dali-toolkit/internal/text/character-set-conversion.h>
23 #include <dali-toolkit/internal/text/markup-processor-color.h>
24 #include <dali-toolkit/internal/text/markup-processor-font.h>
25 #include <dali-toolkit/internal/text/markup-processor-helper-functions.h>
38 // HTML-ISH tag and attribute constants.
39 // Note they must be lower case in order to make the comparison to work
40 // as the parser converts all the read tags to lower case.
41 const std::string XHTML_COLOR_TAG("color");
42 const std::string XHTML_FONT_TAG("font");
43 const std::string XHTML_B_TAG("b");
44 const std::string XHTML_I_TAG("i");
45 const std::string XHTML_U_TAG("u");
46 const std::string XHTML_SHADOW_TAG("shadow");
47 const std::string XHTML_GLOW_TAG("glow");
48 const std::string XHTML_OUTLINE_TAG("outline");
50 const char LESS_THAN = '<';
51 const char GREATER_THAN = '>';
52 const char EQUAL = '=';
53 const char QUOTATION_MARK = '\'';
54 const char LINE_SEPARATOR_CR = 0x0D; // Carriage return character CR
55 const char LINE_SEPARATOR_LF = 0x0A; // New line character LF
56 const char SLASH = '/';
57 const char BACK_SLASH = '\\';
59 const char WHITE_SPACE = 0x20; // ASCII value of the white space.
61 const unsigned int MAX_NUM_OF_ATTRIBUTES = 5u; ///< The font tag has the 'family', 'size' 'weight', 'width' and 'slant' attrubutes.
62 const unsigned int DEFAULT_VECTOR_SIZE = 16u; ///< Default size of run vectors.
65 * @brief Struct used to retrieve the style runs from the mark-up string.
69 typedef VectorBase::SizeType RunIndex;
71 Vector<RunIndex> stack; ///< Use a vector as a style stack. Stores the indices pointing where the run is stored inside the logical model.
72 unsigned int topIndex; ///< Points the top of the stack.
78 stack.Resize( DEFAULT_VECTOR_SIZE );
81 void Push( RunIndex index )
83 // Check if there is space inside the style stack.
84 const VectorBase::SizeType size = stack.Count();
85 if( topIndex >= size )
87 // Resize the style stack.
88 stack.Resize( 2u * size );
91 // Set the run index in the top of the stack.
92 *( stack.Begin() + topIndex ) = index;
94 // Reposition the pointer to the top of the stack.
100 // Pop the top of the stack.
102 return *( stack.Begin() + topIndex );
107 * @brief Splits the tag string into the tag name and its attributes.
109 * The attributes are stored in a vector in the tag.
111 * @param[in,out] tag The tag.
113 void ParseAttributes( Tag& tag )
115 tag.attributes.Resize( MAX_NUM_OF_ATTRIBUTES );
117 // Find first the tag name.
118 bool isQuotationOpen = false;
120 const char* tagBuffer = tag.buffer;
121 const char* const tagEndBuffer = tagBuffer + tag.length;
123 for( ; tagBuffer < tagEndBuffer; ++tagBuffer )
125 const char character = *tagBuffer;
126 if( WHITE_SPACE < character )
132 // Stops counting the length of the tag when a white space is found.
133 // @note a white space is the WHITE_SPACE character and anything below as 'tab', 'return' or 'control characters'.
137 SkipWhiteSpace( tagBuffer, tagEndBuffer );
139 // Find the attributes.
140 unsigned int attributeIndex = 0u;
141 const char* nameBuffer = NULL;
142 const char* valueBuffer = NULL;
143 Length nameLength = 0u;
144 Length valueLength = 0u;
146 bool addToNameValue = true;
147 Length numberOfWhiteSpace = 0u;
148 for( ; tagBuffer < tagEndBuffer; ++tagBuffer )
150 const char character = *tagBuffer;
151 if( ( WHITE_SPACE >= character ) && !isQuotationOpen )
153 if( NULL != valueBuffer )
155 // Remove white spaces at the end of the value.
156 valueLength -= numberOfWhiteSpace;
159 if( ( NULL != nameBuffer ) && ( NULL != valueBuffer ) )
161 // Every time a white space is found, a new attribute is created and stored in the attributes vector.
162 Attribute& attribute = *( tag.attributes.Begin() + attributeIndex );
165 attribute.nameBuffer = nameBuffer;
166 attribute.valueBuffer = valueBuffer;
167 attribute.nameLength = nameLength;
168 attribute.valueLength = valueLength;
175 addToNameValue = true; // next read characters will be added to the name.
178 else if( EQUAL == character ) // '='
180 addToNameValue = false; // next read characters will be added to the value.
181 SkipWhiteSpace( tagBuffer, tagEndBuffer );
183 else if( QUOTATION_MARK == character ) // '\''
185 // Do not add quotation marks to neither name nor value.
186 isQuotationOpen = !isQuotationOpen;
188 if( isQuotationOpen )
191 SkipWhiteSpace( tagBuffer, tagEndBuffer );
197 // Adds characters to the name or the value.
200 if( NULL == nameBuffer )
202 nameBuffer = tagBuffer;
208 if( isQuotationOpen )
210 if( WHITE_SPACE >= character )
212 ++numberOfWhiteSpace;
216 numberOfWhiteSpace = 0u;
219 if( NULL == valueBuffer )
221 valueBuffer = tagBuffer;
228 if( NULL != valueBuffer )
230 // Remove white spaces at the end of the value.
231 valueLength -= numberOfWhiteSpace;
234 if( ( NULL != nameBuffer ) && ( NULL != valueBuffer ) )
236 // Checks if the last attribute needs to be added.
237 Attribute& attribute = *( tag.attributes.Begin() + attributeIndex );
240 attribute.nameBuffer = nameBuffer;
241 attribute.valueBuffer = valueBuffer;
242 attribute.nameLength = nameLength;
243 attribute.valueLength = valueLength;
246 // Resize the vector of attributes.
247 tag.attributes.Resize( attributeIndex );
251 * @brief It parses a tag and its attributes if the given iterator @e it is pointing at a tag beginning.
253 * @param[in,out] markupStringBuffer The mark-up string buffer. It's a const iterator pointing the current character.
254 * @param[in] markupStringEndBuffer Pointer to one character after the end of the mark-up string buffer.
255 * @param[out] tag The tag with its attributes.
257 * @return @e true if the iterator @e it is pointing a mark-up tag. Otherwise @e false.
259 bool IsTag( const char*& markupStringBuffer,
260 const char* const markupStringEndBuffer,
264 bool isQuotationOpen = false;
265 bool attributesFound = false;
266 tag.isEndTag = false;
268 const char character = *markupStringBuffer;
269 if( LESS_THAN == character ) // '<'
274 // if the iterator is pointing to a '<' character, then check if it's a mark-up tag is needed.
275 ++markupStringBuffer;
276 if( markupStringBuffer < markupStringEndBuffer )
278 SkipWhiteSpace( markupStringBuffer, markupStringEndBuffer );
280 for( ; ( !isTag ) && ( markupStringBuffer < markupStringEndBuffer ); ++markupStringBuffer )
282 const char character = *markupStringBuffer;
284 if( SLASH == character ) // '/'
286 // if the tag has a '/' then it's an end or empty tag.
289 if( ( markupStringBuffer + 1u < markupStringEndBuffer ) && ( WHITE_SPACE >= *( markupStringBuffer + 1u ) ) && ( !isQuotationOpen ) )
291 ++markupStringBuffer;
292 SkipWhiteSpace( markupStringBuffer, markupStringEndBuffer );
293 --markupStringBuffer;
296 else if( GREATER_THAN == character ) // '>'
300 else if( QUOTATION_MARK == character )
302 isQuotationOpen = !isQuotationOpen;
305 else if( WHITE_SPACE >= character ) // ' '
307 // If the tag contains white spaces then it may have attributes.
308 if( !isQuotationOpen )
310 attributesFound = true;
316 if( NULL == tag.buffer )
318 tag.buffer = markupStringBuffer;
321 // If it's not any of the 'special' characters then just add it to the tag string.
327 // If the tag string has white spaces, then parse the attributes is needed.
328 if( attributesFound )
330 ParseAttributes( tag );
339 void ProcessMarkupString( const std::string& markupString, MarkupProcessData& markupProcessData )
341 // Reserve space for the plain text.
342 const Length markupStringSize = markupString.size();
343 markupProcessData.markupProcessedText.reserve( markupStringSize );
345 // Stores a struct with the index to the first character of the run, the type of run and its parameters.
346 StyleStack styleStack;
348 // Points the next free position in the vector of runs.
349 StyleStack::RunIndex colorRunIndex = 0u;
350 StyleStack::RunIndex fontRunIndex = 0u;
352 // Give an initial default value to the model's vectors.
353 markupProcessData.colorRuns.Reserve( DEFAULT_VECTOR_SIZE );
354 markupProcessData.fontRuns.Reserve( DEFAULT_VECTOR_SIZE );
356 // Get the mark-up string buffer.
357 const char* markupStringBuffer = markupString.c_str();
358 const char* const markupStringEndBuffer = markupStringBuffer + markupStringSize;
361 CharacterIndex characterIndex = 0u;
362 for( ; markupStringBuffer < markupStringEndBuffer; )
364 if( IsTag( markupStringBuffer,
365 markupStringEndBuffer,
368 if( TokenComparison( XHTML_COLOR_TAG, tag.buffer, tag.length ) )
372 // Create a new color run.
374 colorRun.characterRun.numberOfCharacters = 0u;
376 // Set the start character index.
377 colorRun.characterRun.characterIndex = characterIndex;
379 // Fill the run with the attributes.
380 ProcessColorTag( tag, colorRun );
382 // Push the color run in the logical model.
383 markupProcessData.colorRuns.PushBack( colorRun );
385 // Push the index of the run into the stack.
386 styleStack.Push( colorRunIndex );
388 // Point the next color run.
393 // Pop the top of the stack and set the number of characters of the run.
394 ColorRun& colorRun = *( markupProcessData.colorRuns.Begin() + styleStack.Pop() );
395 colorRun.characterRun.numberOfCharacters = characterIndex - colorRun.characterRun.characterIndex;
398 else if( TokenComparison( XHTML_I_TAG, tag.buffer, tag.length ) )
402 // Create a new font run.
403 FontDescriptionRun fontRun;
404 fontRun.characterRun.numberOfCharacters = 0u;
406 // Fill the run with the parameters.
407 fontRun.characterRun.characterIndex = characterIndex;
408 fontRun.slant = TextAbstraction::FontSlant::ITALIC;
410 fontRun.familyName = NULL;
411 fontRun.familyDefined = false;
412 fontRun.weightDefined = false;
413 fontRun.widthDefined = false;
414 fontRun.slantDefined = true;
415 fontRun.sizeDefined = false;
417 // Push the font run in the logical model.
418 markupProcessData.fontRuns.PushBack( fontRun );
420 // Push the index of the run into the stack.
421 styleStack.Push( fontRunIndex );
423 // Point the next free font run.
428 // Pop the top of the stack and set the number of characters of the run.
429 FontDescriptionRun& fontRun = *( markupProcessData.fontRuns.Begin() + styleStack.Pop() );
430 fontRun.characterRun.numberOfCharacters = characterIndex - fontRun.characterRun.characterIndex;
433 else if( TokenComparison( XHTML_U_TAG, tag.buffer, tag.length ) )
437 // Create a new underline run.
441 // Pop the top of the stack and set the number of characters of the run.
444 else if( TokenComparison( XHTML_B_TAG, tag.buffer, tag.length ) )
448 // Create a new font run.
449 FontDescriptionRun fontRun;
450 fontRun.characterRun.numberOfCharacters = 0u;
452 // Fill the run with the parameters.
453 fontRun.characterRun.characterIndex = characterIndex;
455 fontRun.weight = TextAbstraction::FontWeight::BOLD;
457 fontRun.familyName = NULL;
458 fontRun.familyDefined = false;
459 fontRun.weightDefined = true;
460 fontRun.widthDefined = false;
461 fontRun.slantDefined = false;
462 fontRun.sizeDefined = false;
464 // Push the font run in the logical model.
465 markupProcessData.fontRuns.PushBack( fontRun );
467 // Push the index of the run into the stack.
468 styleStack.Push( fontRunIndex );
470 // Point the next free font run.
475 // Pop the top of the stack and set the number of characters of the run.
476 FontDescriptionRun& fontRun = *( markupProcessData.fontRuns.Begin() + styleStack.Pop() );
477 fontRun.characterRun.numberOfCharacters = characterIndex - fontRun.characterRun.characterIndex;
480 else if( TokenComparison( XHTML_FONT_TAG, tag.buffer, tag.length ) )
484 // Create a new font run.
485 FontDescriptionRun fontRun;
486 fontRun.characterRun.numberOfCharacters = 0u;
488 // Fill the run with the parameters.
489 fontRun.characterRun.characterIndex = characterIndex;
491 fontRun.familyName = NULL;
492 fontRun.familyDefined = false;
493 fontRun.weightDefined = false;
494 fontRun.widthDefined = false;
495 fontRun.slantDefined = false;
496 fontRun.sizeDefined = false;
498 ProcessFontTag( tag, fontRun );
500 // Push the font run in the logical model.
501 markupProcessData.fontRuns.PushBack( fontRun );
503 // Push the index of the run into the stack.
504 styleStack.Push( fontRunIndex );
506 // Point the next free font run.
511 // Pop the top of the stack and set the number of characters of the run.
512 FontDescriptionRun& fontRun = *( markupProcessData.fontRuns.Begin() + styleStack.Pop() );
513 fontRun.characterRun.numberOfCharacters = characterIndex - fontRun.characterRun.characterIndex;
516 else if( TokenComparison( XHTML_SHADOW_TAG, tag.buffer, tag.length ) )
520 // Create a new shadow run.
524 // Pop the top of the stack and set the number of characters of the run.
526 } // <shadow></shadow>
527 else if( TokenComparison( XHTML_GLOW_TAG, tag.buffer, tag.length ) )
531 // Create a new glow run.
535 // Pop the top of the stack and set the number of characters of the run.
538 else if( TokenComparison( XHTML_OUTLINE_TAG, tag.buffer, tag.length ) )
542 // Create a new outline run.
546 // Pop the top of the stack and set the number of characters of the run.
548 } // <outline></outline>
549 } // end if( IsTag() )
552 unsigned char character = *markupStringBuffer;
554 if( ( BACK_SLASH == character ) && ( markupStringBuffer + 1u < markupStringEndBuffer ) )
556 // Adding < or > special character.
557 const unsigned char nextCharacter = *( markupStringBuffer + 1u );
558 if( ( LESS_THAN == nextCharacter ) || ( GREATER_THAN == nextCharacter ) )
560 character = nextCharacter;
561 ++markupStringBuffer;
564 else if( ( LINE_SEPARATOR_CR == character ) && ( markupStringBuffer + 1u < markupStringEndBuffer ) )
566 // Replacing CR+LF end line by LF.
567 if( LINE_SEPARATOR_LF == *( markupStringBuffer + 1u ) )
569 character = LINE_SEPARATOR_LF;
570 ++markupStringBuffer;
574 const unsigned char numberOfBytes = GetUtf8Length( character );
576 markupProcessData.markupProcessedText.push_back( character );
577 for( unsigned char i = 1u; i < numberOfBytes; ++i )
579 ++markupStringBuffer;
580 markupProcessData.markupProcessedText.push_back( *markupStringBuffer );
584 ++markupStringBuffer;
588 // Resize the model's vectors.
589 if( 0u == fontRunIndex )
591 markupProcessData.fontRuns.Clear();
595 markupProcessData.fontRuns.Resize( fontRunIndex );
598 if( 0u == colorRunIndex )
600 markupProcessData.colorRuns.Clear();
604 markupProcessData.colorRuns.Resize( colorRunIndex );
610 } // namespace Toolkit