2 * Copyright (c) 2015 Samsung Electronics Co., Ltd.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <dali-toolkit/internal/text/markup-processor.h>
22 #include <dali-toolkit/internal/text/character-set-conversion.h>
23 #include <dali-toolkit/internal/text/markup-processor-color.h>
24 #include <dali-toolkit/internal/text/markup-processor-helper-functions.h>
37 // HTML-ISH tag and attribute constants.
38 // Note they must be lower case in order to make the comparison to work
39 // as the parser converts all the read tags to lower case.
40 const std::string XHTML_COLOR_TAG("color");
41 const std::string XHTML_FONT_TAG("font");
42 const std::string XHTML_B_TAG("b");
43 const std::string XHTML_I_TAG("i");
44 const std::string XHTML_U_TAG("u");
45 const std::string XHTML_SHADOW_TAG("shadow");
46 const std::string XHTML_GLOW_TAG("glow");
47 const std::string XHTML_OUTLINE_TAG("outline");
49 const char LESS_THAN = '<';
50 const char GREATER_THAN = '>';
51 const char EQUAL = '=';
52 const char QUOTATION_MARK = '\'';
53 const char LINE_SEPARATOR_CR = 0x0D; // Carriage return character CR
54 const char LINE_SEPARATOR_LF = 0x0A; // New line character LF
55 const char SLASH = '/';
56 const char BACK_SLASH = '\\';
58 const char WHITE_SPACE = 0x20; // ASCII value of the white space.
60 const unsigned int MAX_NUM_OF_ATTRIBUTES = 5u; ///< The font tag has the 'family', 'size' 'weight', 'width' and 'slant' attrubutes.
62 const unsigned int DEFAULT_VECTOR_SIZE = 16u; ///< Default size of run vectors.
65 * @brief Struct used to retrieve the style runs from the mark-up string.
69 typedef VectorBase::SizeType RunIndex;
71 Vector<RunIndex> stack; ///< Use a vector as a style stack. Stores the indices pointing where the run is stored inside the logical model.
72 unsigned int topIndex; ///< Points the top of the stack.
78 stack.Resize( DEFAULT_VECTOR_SIZE );
81 void Push( RunIndex index )
83 // Check if there is space inside the style stack.
84 const VectorBase::SizeType size = stack.Count();
85 if( topIndex >= size )
87 // Resize the style stack.
88 stack.Resize( 2u * size );
91 // Set the run index in the top of the stack.
92 *( stack.Begin() + topIndex ) = index;
94 // Reposition the pointer to the top of the stack.
100 // Pop the top of the stack.
102 return *( stack.Begin() + topIndex );
107 * @brief Splits the tag string into the tag name and its attributes.
109 * The attributes are stored in a vector in the tag.
111 * @param[in,out] tag The tag.
113 void ParseAttributes( Tag& tag )
115 tag.attributes.Resize( MAX_NUM_OF_ATTRIBUTES );
117 // Find first the tag name.
118 bool isQuotationOpen = false;
120 const char* tagBuffer = tag.buffer;
121 const char* const tagEndBuffer = tagBuffer + tag.length;
123 for( ; tagBuffer < tagEndBuffer; ++tagBuffer )
125 const char character = *tagBuffer;
126 if( WHITE_SPACE < character )
132 // Stops counting the length of the tag when a white space is found.
133 // @note a white space is the WHITE_SPACE character and anything below as 'tab', 'return' or 'control characters'.
137 SkipWhiteSpace( tagBuffer, tagEndBuffer );
139 // Find the attributes.
140 unsigned int attributeIndex = 0u;
141 const char* nameBuffer = NULL;
142 const char* valueBuffer = NULL;
143 Length nameLength = 0u;
144 Length valueLength = 0u;
146 bool addToNameValue = true;
147 Length numberOfWhiteSpace = 0u;
148 for( ; tagBuffer < tagEndBuffer; ++tagBuffer )
150 const char character = *tagBuffer;
151 if( ( WHITE_SPACE >= character ) && !isQuotationOpen )
153 if( NULL != valueBuffer )
155 // Remove white spaces at the end of the value.
156 valueLength -= numberOfWhiteSpace;
159 if( ( NULL != nameBuffer ) && ( NULL != valueBuffer ) )
161 // Every time a white space is found, a new attribute is created and stored in the attributes vector.
162 Attribute& attribute = *( tag.attributes.Begin() + attributeIndex );
165 attribute.nameBuffer = nameBuffer;
166 attribute.valueBuffer = valueBuffer;
167 attribute.nameLength = nameLength;
168 attribute.valueLength = valueLength;
175 addToNameValue = true; // next read characters will be added to the name.
178 else if( EQUAL == character ) // '='
180 addToNameValue = false; // next read characters will be added to the value.
181 SkipWhiteSpace( tagBuffer, tagEndBuffer );
183 else if( QUOTATION_MARK == character ) // '\''
185 // Do not add quotation marks to neither name nor value.
186 isQuotationOpen = !isQuotationOpen;
188 if( isQuotationOpen )
191 SkipWhiteSpace( tagBuffer, tagEndBuffer );
197 // Adds characters to the name or the value.
200 if( NULL == nameBuffer )
202 nameBuffer = tagBuffer;
208 if( isQuotationOpen )
210 if( WHITE_SPACE >= character )
212 ++numberOfWhiteSpace;
216 numberOfWhiteSpace = 0u;
219 if( NULL == valueBuffer )
221 valueBuffer = tagBuffer;
228 if( NULL != valueBuffer )
230 // Remove white spaces at the end of the value.
231 valueLength -= numberOfWhiteSpace;
234 if( ( NULL != nameBuffer ) && ( NULL != valueBuffer ) )
236 // Checks if the last attribute needs to be added.
237 Attribute& attribute = *( tag.attributes.Begin() + attributeIndex );
240 attribute.nameBuffer = nameBuffer;
241 attribute.valueBuffer = valueBuffer;
242 attribute.nameLength = nameLength;
243 attribute.valueLength = valueLength;
246 // Resize the vector of attributes.
247 tag.attributes.Resize( attributeIndex );
251 * @brief It parses a tag and its attributes if the given iterator @e it is pointing at a tag beginning.
253 * @param[in,out] markupStringBuffer The mark-up string buffer. It's a const iterator pointing the current character.
254 * @param[in] markupStringEndBuffer Pointer to one character after the end of the mark-up string buffer.
255 * @param[out] tag The tag with its attributes.
257 * @return @e true if the iterator @e it is pointing a mark-up tag. Otherwise @e false.
259 bool IsTag( const char*& markupStringBuffer,
260 const char* const markupStringEndBuffer,
264 bool isQuotationOpen = false;
265 bool attributesFound = false;
266 tag.isEndTag = false;
268 const char character = *markupStringBuffer;
269 if( LESS_THAN == character ) // '<'
274 // if the iterator is pointing to a '<' character, then check if it's a mark-up tag is needed.
275 ++markupStringBuffer;
276 if( markupStringBuffer < markupStringEndBuffer )
278 SkipWhiteSpace( markupStringBuffer, markupStringEndBuffer );
280 for( ; ( !isTag ) && ( markupStringBuffer < markupStringEndBuffer ); ++markupStringBuffer )
282 const char character = *markupStringBuffer;
284 if( SLASH == character ) // '/'
286 // if the tag has a '/' then it's an end or empty tag.
289 if( ( markupStringBuffer + 1u < markupStringEndBuffer ) && ( WHITE_SPACE >= *( markupStringBuffer + 1u ) ) && ( !isQuotationOpen ) )
291 ++markupStringBuffer;
292 SkipWhiteSpace( markupStringBuffer, markupStringEndBuffer );
293 --markupStringBuffer;
296 else if( GREATER_THAN == character ) // '>'
300 else if( QUOTATION_MARK == character )
302 isQuotationOpen = !isQuotationOpen;
305 else if( WHITE_SPACE >= character ) // ' '
307 // If the tag contains white spaces then it may have attributes.
308 if( !isQuotationOpen )
310 attributesFound = true;
316 if( NULL == tag.buffer )
318 tag.buffer = markupStringBuffer;
321 // If it's not any of the 'special' characters then just add it to the tag string.
327 // If the tag string has white spaces, then parse the attributes is needed.
328 if( attributesFound )
330 ParseAttributes( tag );
339 void ProcessMarkupString( const std::string& markupString, MarkupProcessData& markupProcessData )
341 // Reserve space for the plain text.
342 const Length markupStringSize = markupString.size();
343 markupProcessData.markupProcessedText.reserve( markupStringSize );
345 // Stores a struct with the index to the first character of the run, the type of run and its parameters.
346 StyleStack styleStack;
348 // Points the next free position in the vector of runs.
349 StyleStack::RunIndex colorRunIndex = 0u;
351 // Give an initial default value to the model's vectors.
352 markupProcessData.colorRuns.Reserve( DEFAULT_VECTOR_SIZE );
354 // Get the mark-up string buffer.
355 const char* markupStringBuffer = markupString.c_str();
356 const char* const markupStringEndBuffer = markupStringBuffer + markupStringSize;
359 CharacterIndex characterIndex = 0u;
360 for( ; markupStringBuffer < markupStringEndBuffer; )
362 if( IsTag( markupStringBuffer,
363 markupStringEndBuffer,
366 if( TokenComparison( XHTML_COLOR_TAG, tag.buffer, tag.length ) )
370 // Create a new color run.
372 colorRun.characterRun.numberOfCharacters = 0u;
374 // Set the start character index.
375 colorRun.characterRun.characterIndex = characterIndex;
377 // Fill the run with the attributes.
378 ProcessColorTag( tag, colorRun );
380 // Push the color run in the logical model.
381 markupProcessData.colorRuns.PushBack( colorRun );
383 // Push the index of the run into the stack.
384 styleStack.Push( colorRunIndex );
386 // Point the next color run.
391 // Pop the top of the stack and set the number of characters of the run.
392 ColorRun& colorRun = *( markupProcessData.colorRuns.Begin() + styleStack.Pop() );
393 colorRun.characterRun.numberOfCharacters = characterIndex - colorRun.characterRun.characterIndex;
396 else if( TokenComparison( XHTML_I_TAG, tag.buffer, tag.length ) )
400 // Create a new font run.
404 // Pop the top of the stack and set the number of characters of the run.
407 else if( TokenComparison( XHTML_U_TAG, tag.buffer, tag.length ) )
411 // Create a new underline run.
415 // Pop the top of the stack and set the number of characters of the run.
418 else if( TokenComparison( XHTML_B_TAG, tag.buffer, tag.length ) )
422 // Create a new font run.
426 // Pop the top of the stack and set the number of characters of the run.
429 else if( TokenComparison( XHTML_FONT_TAG, tag.buffer, tag.length ) )
433 // Create a new font run.
437 // Pop the top of the stack and set the number of characters of the run.
440 else if( TokenComparison( XHTML_SHADOW_TAG, tag.buffer, tag.length ) )
444 // Create a new shadow run.
448 // Pop the top of the stack and set the number of characters of the run.
450 } // <shadow></shadow>
451 else if( TokenComparison( XHTML_GLOW_TAG, tag.buffer, tag.length ) )
455 // Create a new glow run.
459 // Pop the top of the stack and set the number of characters of the run.
462 else if( TokenComparison( XHTML_OUTLINE_TAG, tag.buffer, tag.length ) )
466 // Create a new outline run.
470 // Pop the top of the stack and set the number of characters of the run.
472 } // <outline></outline>
473 } // end if( IsTag() )
476 unsigned char character = *markupStringBuffer;
478 if( ( BACK_SLASH == character ) && ( markupStringBuffer + 1u < markupStringEndBuffer ) )
480 // Adding < or > special character.
481 const unsigned char nextCharacter = *( markupStringBuffer + 1u );
482 if( ( LESS_THAN == nextCharacter ) || ( GREATER_THAN == nextCharacter ) )
484 character = nextCharacter;
485 ++markupStringBuffer;
488 else if( ( LINE_SEPARATOR_CR == character ) && ( markupStringBuffer + 1u < markupStringEndBuffer ) )
490 // Replacing CR+LF end line by LF.
491 if( LINE_SEPARATOR_LF == *( markupStringBuffer + 1u ) )
493 character = LINE_SEPARATOR_LF;
494 ++markupStringBuffer;
498 const unsigned char numberOfBytes = GetUtf8Length( character );
500 markupProcessData.markupProcessedText.push_back( character );
501 for( unsigned char i = 1u; i < numberOfBytes; ++i )
503 ++markupStringBuffer;
504 markupProcessData.markupProcessedText.push_back( *markupStringBuffer );
508 ++markupStringBuffer;
512 // Resize the model's vectors.
513 if( 0u == colorRunIndex )
515 markupProcessData.colorRuns.Clear();
519 markupProcessData.colorRuns.Resize( colorRunIndex );
525 } // namespace Toolkit