// FILE HEADER
#include <dali-toolkit/internal/text/markup-processor.h>
+// EXTERNAL INCLUDES
+#include <climits> // for ULONG_MAX
+#include <dali/integration-api/debug.h>
+
// INTERNAL INCLUDES
#include <dali-toolkit/internal/text/character-set-conversion.h>
#include <dali-toolkit/internal/text/markup-processor-color.h>
+#include <dali-toolkit/internal/text/markup-processor-embedded-item.h>
#include <dali-toolkit/internal/text/markup-processor-font.h>
#include <dali-toolkit/internal/text/markup-processor-helper-functions.h>
+#include <dali-toolkit/internal/text/xhtml-entities.h>
namespace Dali
{
const std::string XHTML_SHADOW_TAG("shadow");
const std::string XHTML_GLOW_TAG("glow");
const std::string XHTML_OUTLINE_TAG("outline");
-
-const char LESS_THAN = '<';
-const char GREATER_THAN = '>';
-const char EQUAL = '=';
-const char QUOTATION_MARK = '\'';
-const char LINE_SEPARATOR_CR = 0x0D; // Carriage return character CR
-const char LINE_SEPARATOR_LF = 0x0A; // New line character LF
-const char SLASH = '/';
-const char BACK_SLASH = '\\';
-
-const char WHITE_SPACE = 0x20; // ASCII value of the white space.
+const std::string XHTML_ITEM_TAG("item");
+
+const char LESS_THAN = '<';
+const char GREATER_THAN = '>';
+const char EQUAL = '=';
+const char QUOTATION_MARK = '\'';
+const char SLASH = '/';
+const char BACK_SLASH = '\\';
+const char AMPERSAND = '&';
+const char HASH = '#';
+const char SEMI_COLON = ';';
+const char CHAR_ARRAY_END = '\0';
+const char HEX_CODE = 'x';
+
+const char WHITE_SPACE = 0x20; // ASCII value of the white space.
+
+// Range 1 0x0u < XHTML_DECIMAL_ENTITY_RANGE <= 0xD7FFu
+// Range 2 0xE000u < XHTML_DECIMAL_ENTITY_RANGE <= 0xFFFDu
+// Range 3 0x10000u < XHTML_DECIMAL_ENTITY_RANGE <= 0x10FFFFu
+const unsigned long XHTML_DECIMAL_ENTITY_RANGE[] = { 0x0u, 0xD7FFu, 0xE000u, 0xFFFDu, 0x10000u, 0x10FFFFu };
const unsigned int MAX_NUM_OF_ATTRIBUTES = 5u; ///< The font tag has the 'family', 'size' 'weight', 'width' and 'slant' attrubutes.
const unsigned int DEFAULT_VECTOR_SIZE = 16u; ///< Default size of run vectors.
+#if defined(DEBUG_ENABLED)
+Debug::Filter* gLogFilter = Debug::Filter::New(Debug::NoLogging, true, "LOG_MARKUP_PROCESSOR");
+#endif
+
/**
* @brief Struct used to retrieve the style runs from the mark-up string.
*/
};
/**
+ * @brief Initializes a font run description to its defaults.
+ *
+ * @param[in,out] fontRun The font description run to initialize.
+ */
+void Initialize( FontDescriptionRun& fontRun )
+{
+ fontRun.characterRun.characterIndex = 0u;
+ fontRun.characterRun.numberOfCharacters = 0u;
+ fontRun.familyName = NULL;
+ fontRun.familyLength = 0u;
+ fontRun.weight = TextAbstraction::FontWeight::NORMAL;
+ fontRun.width = TextAbstraction::FontWidth::NORMAL;
+ fontRun.slant = TextAbstraction::FontSlant::NORMAL;
+ fontRun.size = 0u;
+ fontRun.familyDefined = false;
+ fontRun.weightDefined = false;
+ fontRun.widthDefined = false;
+ fontRun.slantDefined = false;
+ fontRun.sizeDefined = false;
+}
+
+/**
* @brief Splits the tag string into the tag name and its attributes.
*
* The attributes are stored in a vector in the tag.
*/
void ParseAttributes( Tag& tag )
{
+ if( tag.buffer == NULL )
+ {
+ return;
+ }
+
tag.attributes.Resize( MAX_NUM_OF_ATTRIBUTES );
// Find first the tag name.
bool isQuotationOpen = false;
bool attributesFound = false;
tag.isEndTag = false;
+ bool isPreviousLessThan = false;
+ bool isPreviousSlash = false;
const char character = *markupStringBuffer;
if( LESS_THAN == character ) // '<'
{
tag.buffer = NULL;
tag.length = 0u;
+ isPreviousLessThan = true;
// if the iterator is pointing to a '<' character, then check if it's a mark-up tag is needed.
++markupStringBuffer;
{
const char character = *markupStringBuffer;
- if( SLASH == character ) // '/'
+ if( !isQuotationOpen && ( SLASH == character ) ) // '/'
{
- // if the tag has a '/' then it's an end or empty tag.
- tag.isEndTag = true;
+ if (isPreviousLessThan)
+ {
+ tag.isEndTag = true;
+ }
+ else
+ {
+ // if the tag has a '/' it may be an end tag.
+ isPreviousSlash = true;
+ }
- if( ( markupStringBuffer + 1u < markupStringEndBuffer ) && ( WHITE_SPACE >= *( markupStringBuffer + 1u ) ) && ( !isQuotationOpen ) )
+ isPreviousLessThan = false;
+ if( ( markupStringBuffer + 1u < markupStringEndBuffer ) && ( WHITE_SPACE >= *( markupStringBuffer + 1u ) ) )
{
++markupStringBuffer;
SkipWhiteSpace( markupStringBuffer, markupStringEndBuffer );
else if( GREATER_THAN == character ) // '>'
{
isTag = true;
+ if (isPreviousSlash)
+ {
+ tag.isEndTag = true;
+ }
+
+ isPreviousSlash = false;
+ isPreviousLessThan = false;
}
else if( QUOTATION_MARK == character )
{
isQuotationOpen = !isQuotationOpen;
++tag.length;
+
+ isPreviousSlash = false;
+ isPreviousLessThan = false;
}
else if( WHITE_SPACE >= character ) // ' '
{
// If it's not any of the 'special' characters then just add it to the tag string.
++tag.length;
+
+ isPreviousSlash = false;
+ isPreviousLessThan = false;
}
}
}
return isTag;
}
+/**
+ * @brief Returns length of XHTML entity by parsing the text. It also determines if it is XHTML entity or not.
+ *
+ * @param[in] markupStringBuffer The mark-up string buffer. It's a const iterator pointing the current character.
+ * @param[in] markupStringEndBuffer Pointing to end of mark-up string buffer.
+ *
+ * @return Length of markupText in case of XHTML entity otherwise return 0.
+ */
+unsigned int GetXHTMLEntityLength( const char*& markupStringBuffer,
+ const char* const markupStringEndBuffer )
+{
+ char character = *markupStringBuffer;
+ if( AMPERSAND == character ) // '&'
+ {
+ // if the iterator is pointing to a '&' character, then check for ';' to find end to XHTML entity.
+ ++markupStringBuffer;
+ if( markupStringBuffer < markupStringEndBuffer )
+ {
+ unsigned int len = 1u;
+ for( ; markupStringBuffer < markupStringEndBuffer ; ++markupStringBuffer )
+ {
+ character = *markupStringBuffer;
+ ++len;
+ if( SEMI_COLON == character ) // ';'
+ {
+ // found end of XHTML entity
+ ++markupStringBuffer;
+ return len;
+ }
+ else if( ( AMPERSAND == character ) || ( BACK_SLASH == character ) || ( LESS_THAN == character ))
+ {
+ return 0;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/**
+ * @brief It parses a XHTML string which has hex/decimal entity and fill its corresponging utf-8 string.
+ *
+ * @param[in] markupText The mark-up text buffer.
+ * @param[out] utf-8 text Corresponding to markup Text
+ *
+ * @return true if string is successfully parsed otherwise false
+ */
+bool XHTMLNumericEntityToUtf8 ( const char* markupText, char* utf8 )
+{
+ bool result = false;
+
+ if( NULL != markupText )
+ {
+ bool isHex = false;
+
+ // check if hex or decimal entity
+ if( ( CHAR_ARRAY_END != *markupText ) && ( HEX_CODE == *markupText ) )
+ {
+ isHex = true;
+ ++markupText;
+ }
+
+ char* end = NULL;
+ unsigned long l = strtoul( markupText, &end, ( isHex ? 16 : 10 ) ); // l contains UTF-32 code in case of correct XHTML entity
+
+ // check for valid XHTML numeric entities (between '#' or "#x" and ';')
+ if( ( l > 0 ) && ( l < ULONG_MAX ) && ( *end == SEMI_COLON ) ) // in case wrong XHTML entity is set eg. "abcdefs;" in that case *end will be 'a'
+ {
+ /* characters XML 1.1 permits */
+ if( ( ( XHTML_DECIMAL_ENTITY_RANGE[0] < l ) && ( l <= XHTML_DECIMAL_ENTITY_RANGE[1] ) ) ||
+ ( ( XHTML_DECIMAL_ENTITY_RANGE[2] <= l ) && ( l <= XHTML_DECIMAL_ENTITY_RANGE[3] ) ) ||
+ ( ( XHTML_DECIMAL_ENTITY_RANGE[4] <= l ) && ( l <= XHTML_DECIMAL_ENTITY_RANGE[5] ) ) )
+ {
+ // Convert UTF32 code to UTF8
+ Utf32ToUtf8( reinterpret_cast<const uint32_t* const>( &l ), 1, reinterpret_cast<uint8_t*>( utf8 ) );
+ result = true;
+ }
+ }
+ }
+ return result;
+}
+
} // namespace
void ProcessMarkupString( const std::string& markupString, MarkupProcessData& markupProcessData )
{
+ DALI_LOG_INFO( gLogFilter, Debug::Verbose, "markupString: %s\n", markupString.c_str() );
+
// Reserve space for the plain text.
const Length markupStringSize = markupString.size();
markupProcessData.markupProcessedText.reserve( markupStringSize );
StyleStack::RunIndex colorRunIndex = 0u;
StyleStack::RunIndex fontRunIndex = 0u;
+ // check tag reference
+ int colorTagReference = 0u;
+ int fontTagReference = 0u;
+ int iTagReference = 0u;
+ int bTagReference = 0u;
+
// Give an initial default value to the model's vectors.
markupProcessData.colorRuns.Reserve( DEFAULT_VECTOR_SIZE );
markupProcessData.fontRuns.Reserve( DEFAULT_VECTOR_SIZE );
CharacterIndex characterIndex = 0u;
for( ; markupStringBuffer < markupStringEndBuffer; )
{
+ tag.attributes.Clear();
if( IsTag( markupStringBuffer,
markupStringEndBuffer,
tag ) )
// Point the next color run.
++colorRunIndex;
+
+ // Increase reference
+ ++colorTagReference;
}
else
{
- // Pop the top of the stack and set the number of characters of the run.
- ColorRun& colorRun = *( markupProcessData.colorRuns.Begin() + styleStack.Pop() );
- colorRun.characterRun.numberOfCharacters = characterIndex - colorRun.characterRun.characterIndex;
+ if( colorTagReference > 0 )
+ {
+ // Pop the top of the stack and set the number of characters of the run.
+ ColorRun& colorRun = *( markupProcessData.colorRuns.Begin() + styleStack.Pop() );
+ colorRun.characterRun.numberOfCharacters = characterIndex - colorRun.characterRun.characterIndex;
+ --colorTagReference;
+ }
}
} // <color></color>
else if( TokenComparison( XHTML_I_TAG, tag.buffer, tag.length ) )
{
// Create a new font run.
FontDescriptionRun fontRun;
- fontRun.characterRun.numberOfCharacters = 0u;
+ Initialize( fontRun );
// Fill the run with the parameters.
fontRun.characterRun.characterIndex = characterIndex;
fontRun.slant = TextAbstraction::FontSlant::ITALIC;
-
- fontRun.familyName = NULL;
- fontRun.familyDefined = false;
- fontRun.weightDefined = false;
- fontRun.widthDefined = false;
fontRun.slantDefined = true;
- fontRun.sizeDefined = false;
// Push the font run in the logical model.
markupProcessData.fontRuns.PushBack( fontRun );
// Point the next free font run.
++fontRunIndex;
+
+ // Increase reference
+ ++iTagReference;
}
else
{
- // Pop the top of the stack and set the number of characters of the run.
- FontDescriptionRun& fontRun = *( markupProcessData.fontRuns.Begin() + styleStack.Pop() );
- fontRun.characterRun.numberOfCharacters = characterIndex - fontRun.characterRun.characterIndex;
+ if( iTagReference > 0 )
+ {
+ // Pop the top of the stack and set the number of characters of the run.
+ FontDescriptionRun& fontRun = *( markupProcessData.fontRuns.Begin() + styleStack.Pop() );
+ fontRun.characterRun.numberOfCharacters = characterIndex - fontRun.characterRun.characterIndex;
+ --iTagReference;
+ }
}
} // <i></i>
else if( TokenComparison( XHTML_U_TAG, tag.buffer, tag.length ) )
{
// Create a new font run.
FontDescriptionRun fontRun;
- fontRun.characterRun.numberOfCharacters = 0u;
+ Initialize( fontRun );
// Fill the run with the parameters.
fontRun.characterRun.characterIndex = characterIndex;
-
fontRun.weight = TextAbstraction::FontWeight::BOLD;
-
- fontRun.familyName = NULL;
- fontRun.familyDefined = false;
fontRun.weightDefined = true;
- fontRun.widthDefined = false;
- fontRun.slantDefined = false;
- fontRun.sizeDefined = false;
// Push the font run in the logical model.
markupProcessData.fontRuns.PushBack( fontRun );
// Point the next free font run.
++fontRunIndex;
+
+ // Increase reference
+ ++bTagReference;
}
else
{
- // Pop the top of the stack and set the number of characters of the run.
- FontDescriptionRun& fontRun = *( markupProcessData.fontRuns.Begin() + styleStack.Pop() );
- fontRun.characterRun.numberOfCharacters = characterIndex - fontRun.characterRun.characterIndex;
+ if( bTagReference > 0 )
+ {
+ // Pop the top of the stack and set the number of characters of the run.
+ FontDescriptionRun& fontRun = *( markupProcessData.fontRuns.Begin() + styleStack.Pop() );
+ fontRun.characterRun.numberOfCharacters = characterIndex - fontRun.characterRun.characterIndex;
+ --bTagReference;
+ }
}
} // <b></b>
else if( TokenComparison( XHTML_FONT_TAG, tag.buffer, tag.length ) )
{
// Create a new font run.
FontDescriptionRun fontRun;
- fontRun.characterRun.numberOfCharacters = 0u;
+ Initialize( fontRun );
// Fill the run with the parameters.
fontRun.characterRun.characterIndex = characterIndex;
- fontRun.familyName = NULL;
- fontRun.familyDefined = false;
- fontRun.weightDefined = false;
- fontRun.widthDefined = false;
- fontRun.slantDefined = false;
- fontRun.sizeDefined = false;
-
ProcessFontTag( tag, fontRun );
// Push the font run in the logical model.
// Point the next free font run.
++fontRunIndex;
+
+ // Increase reference
+ ++fontTagReference;
}
else
{
- // Pop the top of the stack and set the number of characters of the run.
- FontDescriptionRun& fontRun = *( markupProcessData.fontRuns.Begin() + styleStack.Pop() );
- fontRun.characterRun.numberOfCharacters = characterIndex - fontRun.characterRun.characterIndex;
+ if( fontTagReference > 0 )
+ {
+ // Pop the top of the stack and set the number of characters of the run.
+ FontDescriptionRun& fontRun = *( markupProcessData.fontRuns.Begin() + styleStack.Pop() );
+ fontRun.characterRun.numberOfCharacters = characterIndex - fontRun.characterRun.characterIndex;
+ --fontTagReference;
+ }
}
} // <font></font>
else if( TokenComparison( XHTML_SHADOW_TAG, tag.buffer, tag.length ) )
// Pop the top of the stack and set the number of characters of the run.
}
} // <outline></outline>
+ else if (TokenComparison(XHTML_ITEM_TAG, tag.buffer, tag.length))
+ {
+ if (tag.isEndTag)
+ {
+ // Create an embedded item instance.
+ EmbeddedItem item;
+ item.characterIndex = characterIndex;
+ ProcessEmbeddedItem(tag, item);
+
+ markupProcessData.items.PushBack(item);
+
+ // Insert white space character that will be replaced by the item.
+ markupProcessData.markupProcessedText.append( 1u, WHITE_SPACE );
+ ++characterIndex;
+ }
+ }
} // end if( IsTag() )
- else
+ else if( markupStringBuffer < markupStringEndBuffer )
{
unsigned char character = *markupStringBuffer;
+ const char* markupBuffer = markupStringBuffer;
+ unsigned char count = GetUtf8Length( character );
+ char utf8[8];
if( ( BACK_SLASH == character ) && ( markupStringBuffer + 1u < markupStringEndBuffer ) )
{
- // Adding < or > special character.
+ // Adding < , > or & special character.
const unsigned char nextCharacter = *( markupStringBuffer + 1u );
- if( ( LESS_THAN == nextCharacter ) || ( GREATER_THAN == nextCharacter ) )
+ if( ( LESS_THAN == nextCharacter ) || ( GREATER_THAN == nextCharacter ) || ( AMPERSAND == nextCharacter ) )
{
character = nextCharacter;
++markupStringBuffer;
+
+ count = GetUtf8Length( character );
+ markupBuffer = markupStringBuffer;
}
}
- else if( ( LINE_SEPARATOR_CR == character ) && ( markupStringBuffer + 1u < markupStringEndBuffer ) )
+ else // checking if conatins XHTML entity or not
{
- // Replacing CR+LF end line by LF.
- if( LINE_SEPARATOR_LF == *( markupStringBuffer + 1u ) )
+ const unsigned int len = GetXHTMLEntityLength( markupStringBuffer, markupStringEndBuffer);
+
+ // Parse markupStringTxt if it contains XHTML Entity between '&' and ';'
+ if( len > 0 )
{
- character = LINE_SEPARATOR_LF;
- ++markupStringBuffer;
+ char* entityCode = NULL;
+ bool result = false;
+ count = 0;
+
+ // Checking if XHTML Numeric Entity
+ if( HASH == *( markupBuffer + 1u ) )
+ {
+ entityCode = &utf8[0];
+ // markupBuffer is currently pointing to '&'. By adding 2u to markupBuffer it will point to numeric string by skipping "&#'
+ result = XHTMLNumericEntityToUtf8( ( markupBuffer + 2u ), entityCode );
+ }
+ else // Checking if XHTML Named Entity
+ {
+ entityCode = const_cast<char*> ( NamedEntityToUtf8( markupBuffer, len ) );
+ result = ( entityCode != NULL );
+ }
+ if ( result )
+ {
+ markupBuffer = entityCode; //utf8 text assigned to markupBuffer
+ character = markupBuffer[0];
+ }
+ else
+ {
+ DALI_LOG_INFO( gLogFilter, Debug::Verbose, "Not valid XHTML entity : (%.*s) \n", len, markupBuffer );
+ markupBuffer = NULL;
+ }
+ }
+ else // in case string conatins Start of XHTML Entity('&') but not its end character(';')
+ {
+ if( character == AMPERSAND )
+ {
+ markupBuffer = NULL;
+ DALI_LOG_INFO( gLogFilter, Debug::Verbose, "Not Well formed XHTML content \n" );
+ }
}
}
- const unsigned char numberOfBytes = GetUtf8Length( character );
-
- markupProcessData.markupProcessedText.push_back( character );
- for( unsigned char i = 1u; i < numberOfBytes; ++i )
+ if( markupBuffer != NULL )
{
- ++markupStringBuffer;
- markupProcessData.markupProcessedText.push_back( *markupStringBuffer );
- }
+ const unsigned char numberOfBytes = GetUtf8Length( character );
+ markupProcessData.markupProcessedText.push_back( character );
- ++characterIndex;
- ++markupStringBuffer;
+ for( unsigned char i = 1u; i < numberOfBytes; ++i )
+ {
+ ++markupBuffer;
+ markupProcessData.markupProcessedText.push_back( *markupBuffer );
+ }
+
+ ++characterIndex;
+ markupStringBuffer += count;
+ }
}
}
else
{
markupProcessData.colorRuns.Resize( colorRunIndex );
+
+#ifdef DEBUG_ENABLED
+ for( unsigned int i=0; i<colorRunIndex; ++i )
+ {
+ ColorRun& run = markupProcessData.colorRuns[i];
+ DALI_LOG_INFO( gLogFilter, Debug::Verbose, "run[%d] index: %d, length: %d, color %f,%f,%f,%f\n", i, run.characterRun.characterIndex, run.characterRun.numberOfCharacters, run.color.r, run.color.g, run.color.b, run.color.a );
+ }
+#endif
}
}