X-Git-Url: http://review.tizen.org/git/?p=platform%2Fcore%2Fuifw%2Fdali-toolkit.git;a=blobdiff_plain;f=dali-toolkit%2Finternal%2Ftext%2Fmarkup-processor.cpp;h=dac4f922ef7905abd0408fba82c13f3112300c35;hp=51a1efbfd41a573a638b3ec199dae366c55a17e4;hb=d48e9b8b161367ad699c0352dfeb0128832bbe59;hpb=49fabc565606e00c95baacb41f009de2a532a4da diff --git a/dali-toolkit/internal/text/markup-processor.cpp b/dali-toolkit/internal/text/markup-processor.cpp old mode 100644 new mode 100755 index 51a1efb..dac4f92 --- a/dali-toolkit/internal/text/markup-processor.cpp +++ b/dali-toolkit/internal/text/markup-processor.cpp @@ -18,10 +18,17 @@ // FILE HEADER #include +// EXTERNAL INCLUDES +#include // for ULONG_MAX +#include + // INTERNAL INCLUDES #include #include +#include +#include #include +#include namespace Dali { @@ -45,22 +52,34 @@ const std::string XHTML_U_TAG("u"); const std::string XHTML_SHADOW_TAG("shadow"); const std::string XHTML_GLOW_TAG("glow"); const std::string XHTML_OUTLINE_TAG("outline"); - -const char LESS_THAN = '<'; -const char GREATER_THAN = '>'; -const char EQUAL = '='; -const char QUOTATION_MARK = '\''; -const char LINE_SEPARATOR_CR = 0x0D; // Carriage return character CR -const char LINE_SEPARATOR_LF = 0x0A; // New line character LF -const char SLASH = '/'; -const char BACK_SLASH = '\\'; - -const char WHITE_SPACE = 0x20; // ASCII value of the white space. +const std::string XHTML_ITEM_TAG("item"); + +const char LESS_THAN = '<'; +const char GREATER_THAN = '>'; +const char EQUAL = '='; +const char QUOTATION_MARK = '\''; +const char SLASH = '/'; +const char BACK_SLASH = '\\'; +const char AMPERSAND = '&'; +const char HASH = '#'; +const char SEMI_COLON = ';'; +const char CHAR_ARRAY_END = '\0'; +const char HEX_CODE = 'x'; + +const char WHITE_SPACE = 0x20; // ASCII value of the white space. + +// Range 1 0x0u < XHTML_DECIMAL_ENTITY_RANGE <= 0xD7FFu +// Range 2 0xE000u < XHTML_DECIMAL_ENTITY_RANGE <= 0xFFFDu +// Range 3 0x10000u < XHTML_DECIMAL_ENTITY_RANGE <= 0x10FFFFu +const unsigned long XHTML_DECIMAL_ENTITY_RANGE[] = { 0x0u, 0xD7FFu, 0xE000u, 0xFFFDu, 0x10000u, 0x10FFFFu }; const unsigned int MAX_NUM_OF_ATTRIBUTES = 5u; ///< The font tag has the 'family', 'size' 'weight', 'width' and 'slant' attrubutes. - const unsigned int DEFAULT_VECTOR_SIZE = 16u; ///< Default size of run vectors. +#if defined(DEBUG_ENABLED) +Debug::Filter* gLogFilter = Debug::Filter::New(Debug::NoLogging, true, "LOG_MARKUP_PROCESSOR"); +#endif + /** * @brief Struct used to retrieve the style runs from the mark-up string. */ @@ -104,6 +123,28 @@ struct StyleStack }; /** + * @brief Initializes a font run description to its defaults. + * + * @param[in,out] fontRun The font description run to initialize. + */ +void Initialize( FontDescriptionRun& fontRun ) +{ + fontRun.characterRun.characterIndex = 0u; + fontRun.characterRun.numberOfCharacters = 0u; + fontRun.familyName = NULL; + fontRun.familyLength = 0u; + fontRun.weight = TextAbstraction::FontWeight::NORMAL; + fontRun.width = TextAbstraction::FontWidth::NORMAL; + fontRun.slant = TextAbstraction::FontSlant::NORMAL; + fontRun.size = 0u; + fontRun.familyDefined = false; + fontRun.weightDefined = false; + fontRun.widthDefined = false; + fontRun.slantDefined = false; + fontRun.sizeDefined = false; +} + +/** * @brief Splits the tag string into the tag name and its attributes. * * The attributes are stored in a vector in the tag. @@ -112,6 +153,11 @@ struct StyleStack */ void ParseAttributes( Tag& tag ) { + if( tag.buffer == NULL ) + { + return; + } + tag.attributes.Resize( MAX_NUM_OF_ATTRIBUTES ); // Find first the tag name. @@ -264,12 +310,15 @@ bool IsTag( const char*& markupStringBuffer, bool isQuotationOpen = false; bool attributesFound = false; tag.isEndTag = false; + bool isPreviousLessThan = false; + bool isPreviousSlash = false; const char character = *markupStringBuffer; if( LESS_THAN == character ) // '<' { tag.buffer = NULL; tag.length = 0u; + isPreviousLessThan = true; // if the iterator is pointing to a '<' character, then check if it's a mark-up tag is needed. ++markupStringBuffer; @@ -281,12 +330,20 @@ bool IsTag( const char*& markupStringBuffer, { const char character = *markupStringBuffer; - if( SLASH == character ) // '/' + if( !isQuotationOpen && ( SLASH == character ) ) // '/' { - // if the tag has a '/' then it's an end or empty tag. - tag.isEndTag = true; + if (isPreviousLessThan) + { + tag.isEndTag = true; + } + else + { + // if the tag has a '/' it may be an end tag. + isPreviousSlash = true; + } - if( ( markupStringBuffer + 1u < markupStringEndBuffer ) && ( WHITE_SPACE >= *( markupStringBuffer + 1u ) ) && ( !isQuotationOpen ) ) + isPreviousLessThan = false; + if( ( markupStringBuffer + 1u < markupStringEndBuffer ) && ( WHITE_SPACE >= *( markupStringBuffer + 1u ) ) ) { ++markupStringBuffer; SkipWhiteSpace( markupStringBuffer, markupStringEndBuffer ); @@ -296,11 +353,21 @@ bool IsTag( const char*& markupStringBuffer, else if( GREATER_THAN == character ) // '>' { isTag = true; + if (isPreviousSlash) + { + tag.isEndTag = true; + } + + isPreviousSlash = false; + isPreviousLessThan = false; } else if( QUOTATION_MARK == character ) { isQuotationOpen = !isQuotationOpen; ++tag.length; + + isPreviousSlash = false; + isPreviousLessThan = false; } else if( WHITE_SPACE >= character ) // ' ' { @@ -320,6 +387,9 @@ bool IsTag( const char*& markupStringBuffer, // If it's not any of the 'special' characters then just add it to the tag string. ++tag.length; + + isPreviousSlash = false; + isPreviousLessThan = false; } } } @@ -334,10 +404,94 @@ bool IsTag( const char*& markupStringBuffer, return isTag; } +/** + * @brief Returns length of XHTML entity by parsing the text. It also determines if it is XHTML entity or not. + * + * @param[in] markupStringBuffer The mark-up string buffer. It's a const iterator pointing the current character. + * @param[in] markupStringEndBuffer Pointing to end of mark-up string buffer. + * + * @return Length of markupText in case of XHTML entity otherwise return 0. + */ +unsigned int GetXHTMLEntityLength( const char*& markupStringBuffer, + const char* const markupStringEndBuffer ) +{ + char character = *markupStringBuffer; + if( AMPERSAND == character ) // '&' + { + // if the iterator is pointing to a '&' character, then check for ';' to find end to XHTML entity. + ++markupStringBuffer; + if( markupStringBuffer < markupStringEndBuffer ) + { + unsigned int len = 1u; + for( ; markupStringBuffer < markupStringEndBuffer ; ++markupStringBuffer ) + { + character = *markupStringBuffer; + ++len; + if( SEMI_COLON == character ) // ';' + { + // found end of XHTML entity + ++markupStringBuffer; + return len; + } + else if( ( AMPERSAND == character ) || ( BACK_SLASH == character ) || ( LESS_THAN == character )) + { + return 0; + } + } + } + } + return 0; +} + +/** + * @brief It parses a XHTML string which has hex/decimal entity and fill its corresponging utf-8 string. + * + * @param[in] markupText The mark-up text buffer. + * @param[out] utf-8 text Corresponding to markup Text + * + * @return true if string is successfully parsed otherwise false + */ +bool XHTMLNumericEntityToUtf8 ( const char* markupText, char* utf8 ) +{ + bool result = false; + + if( NULL != markupText ) + { + bool isHex = false; + + // check if hex or decimal entity + if( ( CHAR_ARRAY_END != *markupText ) && ( HEX_CODE == *markupText ) ) + { + isHex = true; + ++markupText; + } + + char* end = NULL; + unsigned long l = strtoul( markupText, &end, ( isHex ? 16 : 10 ) ); // l contains UTF-32 code in case of correct XHTML entity + + // check for valid XHTML numeric entities (between '#' or "#x" and ';') + if( ( l > 0 ) && ( l < ULONG_MAX ) && ( *end == SEMI_COLON ) ) // in case wrong XHTML entity is set eg. "abcdefs;" in that case *end will be 'a' + { + /* characters XML 1.1 permits */ + if( ( ( XHTML_DECIMAL_ENTITY_RANGE[0] < l ) && ( l <= XHTML_DECIMAL_ENTITY_RANGE[1] ) ) || + ( ( XHTML_DECIMAL_ENTITY_RANGE[2] <= l ) && ( l <= XHTML_DECIMAL_ENTITY_RANGE[3] ) ) || + ( ( XHTML_DECIMAL_ENTITY_RANGE[4] <= l ) && ( l <= XHTML_DECIMAL_ENTITY_RANGE[5] ) ) ) + { + // Convert UTF32 code to UTF8 + Utf32ToUtf8( reinterpret_cast( &l ), 1, reinterpret_cast( utf8 ) ); + result = true; + } + } + } + return result; +} + } // namespace void ProcessMarkupString( const std::string& markupString, MarkupProcessData& markupProcessData ) { + DALI_LOG_INFO( gLogFilter, Debug::Verbose, "markupString: %s\n", markupString.c_str() ); + // Reserve space for the plain text. const Length markupStringSize = markupString.size(); markupProcessData.markupProcessedText.reserve( markupStringSize ); @@ -347,9 +501,17 @@ void ProcessMarkupString( const std::string& markupString, MarkupProcessData& ma // Points the next free position in the vector of runs. StyleStack::RunIndex colorRunIndex = 0u; + StyleStack::RunIndex fontRunIndex = 0u; + + // check tag reference + int colorTagReference = 0u; + int fontTagReference = 0u; + int iTagReference = 0u; + int bTagReference = 0u; // Give an initial default value to the model's vectors. markupProcessData.colorRuns.Reserve( DEFAULT_VECTOR_SIZE ); + markupProcessData.fontRuns.Reserve( DEFAULT_VECTOR_SIZE ); // Get the mark-up string buffer. const char* markupStringBuffer = markupString.c_str(); @@ -359,6 +521,7 @@ void ProcessMarkupString( const std::string& markupString, MarkupProcessData& ma CharacterIndex characterIndex = 0u; for( ; markupStringBuffer < markupStringEndBuffer; ) { + tag.attributes.Clear(); if( IsTag( markupStringBuffer, markupStringEndBuffer, tag ) ) @@ -385,12 +548,19 @@ void ProcessMarkupString( const std::string& markupString, MarkupProcessData& ma // Point the next color run. ++colorRunIndex; + + // Increase reference + ++colorTagReference; } else { - // Pop the top of the stack and set the number of characters of the run. - ColorRun& colorRun = *( markupProcessData.colorRuns.Begin() + styleStack.Pop() ); - colorRun.characterRun.numberOfCharacters = characterIndex - colorRun.characterRun.characterIndex; + if( colorTagReference > 0 ) + { + // Pop the top of the stack and set the number of characters of the run. + ColorRun& colorRun = *( markupProcessData.colorRuns.Begin() + styleStack.Pop() ); + colorRun.characterRun.numberOfCharacters = characterIndex - colorRun.characterRun.characterIndex; + --colorTagReference; + } } } // else if( TokenComparison( XHTML_I_TAG, tag.buffer, tag.length ) ) @@ -398,10 +568,35 @@ void ProcessMarkupString( const std::string& markupString, MarkupProcessData& ma if( !tag.isEndTag ) { // Create a new font run. + FontDescriptionRun fontRun; + Initialize( fontRun ); + + // Fill the run with the parameters. + fontRun.characterRun.characterIndex = characterIndex; + fontRun.slant = TextAbstraction::FontSlant::ITALIC; + fontRun.slantDefined = true; + + // Push the font run in the logical model. + markupProcessData.fontRuns.PushBack( fontRun ); + + // Push the index of the run into the stack. + styleStack.Push( fontRunIndex ); + + // Point the next free font run. + ++fontRunIndex; + + // Increase reference + ++iTagReference; } else { - // Pop the top of the stack and set the number of characters of the run. + if( iTagReference > 0 ) + { + // Pop the top of the stack and set the number of characters of the run. + FontDescriptionRun& fontRun = *( markupProcessData.fontRuns.Begin() + styleStack.Pop() ); + fontRun.characterRun.numberOfCharacters = characterIndex - fontRun.characterRun.characterIndex; + --iTagReference; + } } } // else if( TokenComparison( XHTML_U_TAG, tag.buffer, tag.length ) ) @@ -420,10 +615,35 @@ void ProcessMarkupString( const std::string& markupString, MarkupProcessData& ma if( !tag.isEndTag ) { // Create a new font run. + FontDescriptionRun fontRun; + Initialize( fontRun ); + + // Fill the run with the parameters. + fontRun.characterRun.characterIndex = characterIndex; + fontRun.weight = TextAbstraction::FontWeight::BOLD; + fontRun.weightDefined = true; + + // Push the font run in the logical model. + markupProcessData.fontRuns.PushBack( fontRun ); + + // Push the index of the run into the stack. + styleStack.Push( fontRunIndex ); + + // Point the next free font run. + ++fontRunIndex; + + // Increase reference + ++bTagReference; } else { - // Pop the top of the stack and set the number of characters of the run. + if( bTagReference > 0 ) + { + // Pop the top of the stack and set the number of characters of the run. + FontDescriptionRun& fontRun = *( markupProcessData.fontRuns.Begin() + styleStack.Pop() ); + fontRun.characterRun.numberOfCharacters = characterIndex - fontRun.characterRun.characterIndex; + --bTagReference; + } } } // else if( TokenComparison( XHTML_FONT_TAG, tag.buffer, tag.length ) ) @@ -431,10 +651,35 @@ void ProcessMarkupString( const std::string& markupString, MarkupProcessData& ma if( !tag.isEndTag ) { // Create a new font run. + FontDescriptionRun fontRun; + Initialize( fontRun ); + + // Fill the run with the parameters. + fontRun.characterRun.characterIndex = characterIndex; + + ProcessFontTag( tag, fontRun ); + + // Push the font run in the logical model. + markupProcessData.fontRuns.PushBack( fontRun ); + + // Push the index of the run into the stack. + styleStack.Push( fontRunIndex ); + + // Point the next free font run. + ++fontRunIndex; + + // Increase reference + ++fontTagReference; } else { - // Pop the top of the stack and set the number of characters of the run. + if( fontTagReference > 0 ) + { + // Pop the top of the stack and set the number of characters of the run. + FontDescriptionRun& fontRun = *( markupProcessData.fontRuns.Begin() + styleStack.Pop() ); + fontRun.characterRun.numberOfCharacters = characterIndex - fontRun.characterRun.characterIndex; + --fontTagReference; + } } } // else if( TokenComparison( XHTML_SHADOW_TAG, tag.buffer, tag.length ) ) @@ -470,46 +715,114 @@ void ProcessMarkupString( const std::string& markupString, MarkupProcessData& ma // Pop the top of the stack and set the number of characters of the run. } } // + else if (TokenComparison(XHTML_ITEM_TAG, tag.buffer, tag.length)) + { + if (tag.isEndTag) + { + // Create an embedded item instance. + EmbeddedItem item; + item.characterIndex = characterIndex; + ProcessEmbeddedItem(tag, item); + + markupProcessData.items.PushBack(item); + + // Insert white space character that will be replaced by the item. + markupProcessData.markupProcessedText.append( 1u, WHITE_SPACE ); + ++characterIndex; + } + } } // end if( IsTag() ) - else + else if( markupStringBuffer < markupStringEndBuffer ) { unsigned char character = *markupStringBuffer; + const char* markupBuffer = markupStringBuffer; + unsigned char count = GetUtf8Length( character ); + char utf8[8]; if( ( BACK_SLASH == character ) && ( markupStringBuffer + 1u < markupStringEndBuffer ) ) { - // Adding < or > special character. + // Adding < , > or & special character. const unsigned char nextCharacter = *( markupStringBuffer + 1u ); - if( ( LESS_THAN == nextCharacter ) || ( GREATER_THAN == nextCharacter ) ) + if( ( LESS_THAN == nextCharacter ) || ( GREATER_THAN == nextCharacter ) || ( AMPERSAND == nextCharacter ) ) { character = nextCharacter; ++markupStringBuffer; + + count = GetUtf8Length( character ); + markupBuffer = markupStringBuffer; } } - else if( ( LINE_SEPARATOR_CR == character ) && ( markupStringBuffer + 1u < markupStringEndBuffer ) ) + else // checking if conatins XHTML entity or not { - // Replacing CR+LF end line by LF. - if( LINE_SEPARATOR_LF == *( markupStringBuffer + 1u ) ) + const unsigned int len = GetXHTMLEntityLength( markupStringBuffer, markupStringEndBuffer); + + // Parse markupStringTxt if it contains XHTML Entity between '&' and ';' + if( len > 0 ) { - character = LINE_SEPARATOR_LF; - ++markupStringBuffer; + char* entityCode = NULL; + bool result = false; + count = 0; + + // Checking if XHTML Numeric Entity + if( HASH == *( markupBuffer + 1u ) ) + { + entityCode = &utf8[0]; + // markupBuffer is currently pointing to '&'. By adding 2u to markupBuffer it will point to numeric string by skipping "&#' + result = XHTMLNumericEntityToUtf8( ( markupBuffer + 2u ), entityCode ); + } + else // Checking if XHTML Named Entity + { + entityCode = const_cast ( NamedEntityToUtf8( markupBuffer, len ) ); + result = ( entityCode != NULL ); + } + if ( result ) + { + markupBuffer = entityCode; //utf8 text assigned to markupBuffer + character = markupBuffer[0]; + } + else + { + DALI_LOG_INFO( gLogFilter, Debug::Verbose, "Not valid XHTML entity : (%.*s) \n", len, markupBuffer ); + markupBuffer = NULL; + } + } + else // in case string conatins Start of XHTML Entity('&') but not its end character(';') + { + if( character == AMPERSAND ) + { + markupBuffer = NULL; + DALI_LOG_INFO( gLogFilter, Debug::Verbose, "Not Well formed XHTML content \n" ); + } } } - const unsigned char numberOfBytes = GetUtf8Length( character ); - - markupProcessData.markupProcessedText.push_back( character ); - for( unsigned char i = 1u; i < numberOfBytes; ++i ) + if( markupBuffer != NULL ) { - ++markupStringBuffer; - markupProcessData.markupProcessedText.push_back( *markupStringBuffer ); - } + const unsigned char numberOfBytes = GetUtf8Length( character ); + markupProcessData.markupProcessedText.push_back( character ); + + for( unsigned char i = 1u; i < numberOfBytes; ++i ) + { + ++markupBuffer; + markupProcessData.markupProcessedText.push_back( *markupBuffer ); + } - ++characterIndex; - ++markupStringBuffer; + ++characterIndex; + markupStringBuffer += count; + } } } // Resize the model's vectors. + if( 0u == fontRunIndex ) + { + markupProcessData.fontRuns.Clear(); + } + else + { + markupProcessData.fontRuns.Resize( fontRunIndex ); + } + if( 0u == colorRunIndex ) { markupProcessData.colorRuns.Clear(); @@ -517,6 +830,14 @@ void ProcessMarkupString( const std::string& markupString, MarkupProcessData& ma else { markupProcessData.colorRuns.Resize( colorRunIndex ); + +#ifdef DEBUG_ENABLED + for( unsigned int i=0; i