2 * Copyright (c) 2015 Samsung Electronics Co., Ltd.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <dali-toolkit/internal/text/markup-processor.h>
22 #include <dali-toolkit/internal/text/character-set-conversion.h>
23 #include <dali-toolkit/internal/text/markup-processor-helper-functions.h>
36 // HTML-ISH tag and attribute constants.
37 // Note they must be lower case in order to make the comparison to work
38 // as the parser converts all the read tags to lower case.
39 const std::string XHTML_COLOR_TAG("color");
40 const std::string XHTML_FONT_TAG("font");
41 const std::string XHTML_B_TAG("b");
42 const std::string XHTML_I_TAG("i");
43 const std::string XHTML_U_TAG("u");
44 const std::string XHTML_SHADOW_TAG("shadow");
45 const std::string XHTML_GLOW_TAG("glow");
46 const std::string XHTML_OUTLINE_TAG("outline");
48 const char LESS_THAN = '<';
49 const char GREATER_THAN = '>';
50 const char EQUAL = '=';
51 const char QUOTATION_MARK = '\'';
52 const char LINE_SEPARATOR_CR = 0x0D; // Carriage return character CR
53 const char LINE_SEPARATOR_LF = 0x0A; // New line character LF
54 const char SLASH = '/';
55 const char BACK_SLASH = '\\';
57 const char WHITE_SPACE = 0x20; // ASCII value of the white space.
59 const unsigned int MAX_NUM_OF_ATTRIBUTES = 5u; ///< The font tag has the 'family', 'size' 'weight', 'width' and 'slant' attrubutes.
62 * @brief Splits the tag string into the tag name and its attributes.
64 * The attributes are stored in a vector in the tag.
66 * @param[in,out] tag The tag.
68 void ParseAttributes( Tag& tag )
70 tag.attributes.Resize( MAX_NUM_OF_ATTRIBUTES );
72 // Find first the tag name.
73 bool isQuotationOpen = false;
75 const char* tagBuffer = tag.buffer;
76 const char* const tagEndBuffer = tagBuffer + tag.length;
78 for( ; tagBuffer < tagEndBuffer; ++tagBuffer )
80 const char character = *tagBuffer;
81 if( WHITE_SPACE < character )
87 // Stops counting the length of the tag when a white space is found.
88 // @note a white space is the WHITE_SPACE character and anything below as 'tab', 'return' or 'control characters'.
92 SkipWhiteSpace( tagBuffer, tagEndBuffer );
94 // Find the attributes.
95 unsigned int attributeIndex = 0u;
96 const char* nameBuffer = NULL;
97 const char* valueBuffer = NULL;
98 Length nameLength = 0u;
99 Length valueLength = 0u;
101 bool addToNameValue = true;
102 Length numberOfWhiteSpace = 0u;
103 for( ; tagBuffer < tagEndBuffer; ++tagBuffer )
105 const char character = *tagBuffer;
106 if( ( WHITE_SPACE >= character ) && !isQuotationOpen )
108 if( NULL != valueBuffer )
110 // Remove white spaces at the end of the value.
111 valueLength -= numberOfWhiteSpace;
114 if( ( NULL != nameBuffer ) && ( NULL != valueBuffer ) )
116 // Every time a white space is found, a new attribute is created and stored in the attributes vector.
117 Attribute& attribute = *( tag.attributes.Begin() + attributeIndex );
120 attribute.nameBuffer = nameBuffer;
121 attribute.valueBuffer = valueBuffer;
122 attribute.nameLength = nameLength;
123 attribute.valueLength = valueLength;
130 addToNameValue = true; // next read characters will be added to the name.
133 else if( EQUAL == character ) // '='
135 addToNameValue = false; // next read characters will be added to the value.
136 SkipWhiteSpace( tagBuffer, tagEndBuffer );
138 else if( QUOTATION_MARK == character ) // '\''
140 // Do not add quotation marks to neither name nor value.
141 isQuotationOpen = !isQuotationOpen;
143 if( isQuotationOpen )
146 SkipWhiteSpace( tagBuffer, tagEndBuffer );
152 // Adds characters to the name or the value.
155 if( NULL == nameBuffer )
157 nameBuffer = tagBuffer;
163 if( isQuotationOpen )
165 if( WHITE_SPACE >= character )
167 ++numberOfWhiteSpace;
171 numberOfWhiteSpace = 0u;
174 if( NULL == valueBuffer )
176 valueBuffer = tagBuffer;
183 if( NULL != valueBuffer )
185 // Remove white spaces at the end of the value.
186 valueLength -= numberOfWhiteSpace;
189 if( ( NULL != nameBuffer ) && ( NULL != valueBuffer ) )
191 // Checks if the last attribute needs to be added.
192 Attribute& attribute = *( tag.attributes.Begin() + attributeIndex );
195 attribute.nameBuffer = nameBuffer;
196 attribute.valueBuffer = valueBuffer;
197 attribute.nameLength = nameLength;
198 attribute.valueLength = valueLength;
201 // Resize the vector of attributes.
202 tag.attributes.Resize( attributeIndex );
206 * @brief It parses a tag and its attributes if the given iterator @e it is pointing at a tag beginning.
208 * @param[in,out] markupStringBuffer The mark-up string buffer. It's a const iterator pointing the current character.
209 * @param[in] markupStringEndBuffer Pointer to one character after the end of the mark-up string buffer.
210 * @param[out] tag The tag with its attributes.
212 * @return @e true if the iterator @e it is pointing a mark-up tag. Otherwise @e false.
214 bool IsTag( const char*& markupStringBuffer,
215 const char* const markupStringEndBuffer,
219 bool isQuotationOpen = false;
220 bool attributesFound = false;
221 tag.isEndTag = false;
223 const char character = *markupStringBuffer;
224 if( LESS_THAN == character ) // '<'
229 // if the iterator is pointing to a '<' character, then check if it's a mark-up tag is needed.
230 ++markupStringBuffer;
231 if( markupStringBuffer < markupStringEndBuffer )
233 SkipWhiteSpace( markupStringBuffer, markupStringEndBuffer );
235 for( ; ( !isTag ) && ( markupStringBuffer < markupStringEndBuffer ); ++markupStringBuffer )
237 const char character = *markupStringBuffer;
239 if( SLASH == character ) // '/'
241 // if the tag has a '/' then it's an end or empty tag.
244 if( ( markupStringBuffer + 1u < markupStringEndBuffer ) && ( WHITE_SPACE >= *( markupStringBuffer + 1u ) ) && ( !isQuotationOpen ) )
246 ++markupStringBuffer;
247 SkipWhiteSpace( markupStringBuffer, markupStringEndBuffer );
248 --markupStringBuffer;
251 else if( GREATER_THAN == character ) // '>'
255 else if( QUOTATION_MARK == character )
257 isQuotationOpen = !isQuotationOpen;
260 else if( WHITE_SPACE >= character ) // ' '
262 // If the tag contains white spaces then it may have attributes.
263 if( !isQuotationOpen )
265 attributesFound = true;
271 if( NULL == tag.buffer )
273 tag.buffer = markupStringBuffer;
276 // If it's not any of the 'special' characters then just add it to the tag string.
282 // If the tag string has white spaces, then parse the attributes is needed.
283 if( attributesFound )
285 ParseAttributes( tag );
294 void ProcessMarkupString( const std::string& markupString, MarkupProcessData& markupProcessData )
296 // Reserve space for the plain text.
297 const Length markupStringSize = markupString.size();
298 markupProcessData.markupProcessedText.reserve( markupStringSize );
300 // Get the mark-up string buffer.
301 const char* markupStringBuffer = markupString.c_str();
302 const char* const markupStringEndBuffer = markupStringBuffer + markupStringSize;
305 CharacterIndex characterIndex = 0u;
306 for( ; markupStringBuffer < markupStringEndBuffer; )
308 if( IsTag( markupStringBuffer,
309 markupStringEndBuffer,
312 if( TokenComparison( XHTML_COLOR_TAG, tag.buffer, tag.length ) )
316 // Create a new color run.
320 // Pop the top of the stack and set the number of characters of the run.
323 else if( TokenComparison( XHTML_I_TAG, tag.buffer, tag.length ) )
327 // Create a new font run.
331 // Pop the top of the stack and set the number of characters of the run.
334 else if( TokenComparison( XHTML_U_TAG, tag.buffer, tag.length ) )
338 // Create a new underline run.
342 // Pop the top of the stack and set the number of characters of the run.
345 else if( TokenComparison( XHTML_B_TAG, tag.buffer, tag.length ) )
349 // Create a new font run.
353 // Pop the top of the stack and set the number of characters of the run.
356 else if( TokenComparison( XHTML_FONT_TAG, tag.buffer, tag.length ) )
360 // Create a new font run.
364 // Pop the top of the stack and set the number of characters of the run.
367 else if( TokenComparison( XHTML_SHADOW_TAG, tag.buffer, tag.length ) )
371 // Create a new shadow run.
375 // Pop the top of the stack and set the number of characters of the run.
377 } // <shadow></shadow>
378 else if( TokenComparison( XHTML_GLOW_TAG, tag.buffer, tag.length ) )
382 // Create a new glow run.
386 // Pop the top of the stack and set the number of characters of the run.
389 else if( TokenComparison( XHTML_OUTLINE_TAG, tag.buffer, tag.length ) )
393 // Create a new outline run.
397 // Pop the top of the stack and set the number of characters of the run.
399 } // <outline></outline>
400 } // end if( IsTag() )
403 unsigned char character = *markupStringBuffer;
405 if( ( BACK_SLASH == character ) && ( markupStringBuffer + 1u < markupStringEndBuffer ) )
407 // Adding < or > special character.
408 const unsigned char nextCharacter = *( markupStringBuffer + 1u );
409 if( ( LESS_THAN == nextCharacter ) || ( GREATER_THAN == nextCharacter ) )
411 character = nextCharacter;
412 ++markupStringBuffer;
415 else if( ( LINE_SEPARATOR_CR == character ) && ( markupStringBuffer + 1u < markupStringEndBuffer ) )
417 // Replacing CR+LF end line by LF.
418 if( LINE_SEPARATOR_LF == *( markupStringBuffer + 1u ) )
420 character = LINE_SEPARATOR_LF;
421 ++markupStringBuffer;
425 const unsigned char numberOfBytes = GetUtf8Length( character );
427 markupProcessData.markupProcessedText.push_back( character );
428 for( unsigned char i = 1u; i < numberOfBytes; ++i )
430 ++markupStringBuffer;
431 markupProcessData.markupProcessedText.push_back( *markupStringBuffer );
435 ++markupStringBuffer;
439 // Resize the model's vectors.
444 } // namespace Toolkit