From: Victor Cebollada Date: Tue, 24 Feb 2015 15:07:35 +0000 (+0000) Subject: Fixes for multi-language. X-Git-Tag: new_text_0.1~36 X-Git-Url: http://review.tizen.org/git/?p=platform%2Fcore%2Fuifw%2Fdali-toolkit.git;a=commitdiff_plain;h=d83ef802ea8a21da2707c0053267064f6c1e8af7 Fixes for multi-language. White spaces are detected as LATIN. Do not change the script if there is a white space. Use the line break info to get the '\n' characters. Change-Id: I18a4ea47a7dbc351a18c249aa72e2b269ee39b81 Signed-off-by: Victor Cebollada --- diff --git a/dali-toolkit/internal/text/multi-language-support-impl.cpp b/dali-toolkit/internal/text/multi-language-support-impl.cpp index e0053c8..f6a9a22 100644 --- a/dali-toolkit/internal/text/multi-language-support-impl.cpp +++ b/dali-toolkit/internal/text/multi-language-support-impl.cpp @@ -18,17 +18,17 @@ // CLASS HEADER #include -// INTERNAL INCLUDES +// EXTERNAL INCLUDES +#include +#include #include #include + +// INTERNAL INCLUDES #include #include #include #include -#include - -// EXTERNAL INCLUDES -#include namespace Dali { @@ -116,6 +116,24 @@ Script GetScript( Length index, return script; } +/** + * @brief Whether the character is valid for all scripts. i.e. the white space. + * + * @param[in] character The character. + * + * @return @e true if the character is valid for all scripts. + */ +bool IsValidForAllScripts( Character character ) +{ + return ( IsWhiteSpace( character ) || + IsZeroWidthNonJoiner( character ) || + IsZeroWidthJoiner( character ) || + IsZeroWidthSpace( character ) || + IsLeftToRightMark( character ) || + IsRightToLeftMark( character ) || + IsThinSpace( character ) ); +} + bool ValidateFontsPerScript::FindValidFont( FontId fontId ) const { for( Vector::ConstIterator it = mValidFonts.Begin(), @@ -184,6 +202,7 @@ Text::MultilanguageSupport MultilanguageSupport::Get() } void MultilanguageSupport::SetScripts( const Vector& text, + const Vector& lineBreakInfo, Vector& scripts ) { const Length numberOfCharacters = text.Count(); @@ -194,8 +213,6 @@ void MultilanguageSupport::SetScripts( const Vector& text, return; } - // Traverse all characters and set the scripts. - // Stores the current script run. ScriptRun currentScriptRun; currentScriptRun.characterRun.characterIndex = 0u; @@ -205,35 +222,92 @@ void MultilanguageSupport::SetScripts( const Vector& text, // Reserve some space to reduce the number of reallocations. scripts.Reserve( numberOfCharacters << 2u ); - for( Length index = 0u; index < numberOfCharacters; ++index ) - { - const Character character = *( text.Begin() + index ); + // Whether the first valid script need to be set. + bool firstValidScript = true; - Script script = GetCharacterScript( character ); + // Whether the first valid script is a right to left script. + bool isParagraphRTL = false; - if( TextAbstraction::UNKNOWN == script ) + // Count the number of characters which are valid for all scripts. i.e. white spaces or '\n'. + Length numberOfAllScriptCharacters = 0u; + + // Pointers to the text and break info buffers. + const Character* textBuffer = text.Begin(); + const LineBreakInfo* breakInfoBuffer = lineBreakInfo.Begin(); + + // Traverse all characters and set the scripts. + for( Length index = 0u; index < numberOfCharacters; ++index ) + { + Character character = *( textBuffer + index ); + LineBreakInfo breakInfo = *( breakInfoBuffer + index ); + + // Some characters (like white spaces) are valid for many scripts. The rules to set a script + // for them are: + // - If they are at the begining of a paragraph they get the script of the first character with + // a defined script. If they are at the end, they get the script of the last one. + // - If they are between two scripts with the same direction, they get the script of the previous + // character with a defined script. If the two scripts have different directions, they get the + // script of the first character of the paragraph with a defined script. + + // Skip those characters valid for many scripts like white spaces or '\n'. + bool endOfText = index == numberOfCharacters; + while( !endOfText && + IsValidForAllScripts( character ) ) { - if( IsZeroWidthNonJoiner( character ) || - IsZeroWidthJoiner( character ) || - IsZeroWidthSpace( character ) || - IsLeftToRightMark( character ) || - IsRightToLeftMark( character ) || - IsThinSpace( character ) ) + // Count all these characters to be added into a script. + ++numberOfAllScriptCharacters; + + if( TextAbstraction::LINE_MUST_BREAK == breakInfo ) { - // Keep previous script if the character is a zero width joiner or a zero width non joiner. - script = currentScriptRun.script; + // The next character is a new paragraph. + // Know when there is a new paragraph is needed because if there is a white space + // between two scripts with different directions, it is added to the script with + // the same direction than the first script of the paragraph. + firstValidScript = true; + isParagraphRTL = false; } - else + + // Get the next character. + ++index; + endOfText = index == numberOfCharacters; + if( !endOfText ) { - script = TextAbstraction::LATIN; - DALI_ASSERT_DEBUG( !"MultilanguageSupport::SetScripts. Unkown script!" ); + character = *( textBuffer + index ); + breakInfo = *( breakInfoBuffer + index ); } } + if( endOfText ) + { + // Last characters of the text are 'white spaces'. + // There is nothing else to do. Just add the remaining characters to the last script after this bucle. + break; + } + + // Get the script of the character. + Script script = GetCharacterScript( character ); + + // Check if it is the first character of a paragraph. + if( firstValidScript && + ( TextAbstraction::UNKNOWN != script ) ) + { + // Sets the direction of the first valid script. + isParagraphRTL = ( TextAbstraction::ARABIC == script ); + firstValidScript = false; + } + if( script != currentScriptRun.script ) { // Current run needs to be stored and a new one initialized. + if( isParagraphRTL != ( TextAbstraction::ARABIC == script ) ) + { + // Current script has different direction than the first script of the paragraph. + // All the previously skipped characters need to be added to the previous script before it's stored. + currentScriptRun.characterRun.numberOfCharacters += numberOfAllScriptCharacters; + numberOfAllScriptCharacters = 0u; + } + if( 0u != currentScriptRun.characterRun.numberOfCharacters ) { // Store the script run. @@ -242,16 +316,38 @@ void MultilanguageSupport::SetScripts( const Vector& text, // Initialize the new one. currentScriptRun.characterRun.characterIndex = currentScriptRun.characterRun.characterIndex + currentScriptRun.characterRun.numberOfCharacters; - currentScriptRun.characterRun.numberOfCharacters = 0u; + currentScriptRun.characterRun.numberOfCharacters = numberOfAllScriptCharacters; // Adds the white spaces which are at the begining of the script. currentScriptRun.script = script; + numberOfAllScriptCharacters = 0u; + } + else + { + // Adds white spaces between characters. + currentScriptRun.characterRun.numberOfCharacters += numberOfAllScriptCharacters; + numberOfAllScriptCharacters = 0u; + } + + if( TextAbstraction::LINE_MUST_BREAK == breakInfo ) + { + // The next character is a new paragraph. + firstValidScript = true; + isParagraphRTL = false; } // Add one more character to the run. ++currentScriptRun.characterRun.numberOfCharacters; } + // Add remaining characters into the last script. + currentScriptRun.characterRun.numberOfCharacters += numberOfAllScriptCharacters; if( 0u != currentScriptRun.characterRun.numberOfCharacters ) { + if( TextAbstraction::UNKNOWN == currentScriptRun.script ) + { + // There are only white spaces in the last script. Set the latin script. + currentScriptRun.script = TextAbstraction::LATIN; + } + // Store the last run. scripts.PushBack( currentScriptRun ); } diff --git a/dali-toolkit/internal/text/multi-language-support-impl.h b/dali-toolkit/internal/text/multi-language-support-impl.h index a424cd0..643dea3 100644 --- a/dali-toolkit/internal/text/multi-language-support-impl.h +++ b/dali-toolkit/internal/text/multi-language-support-impl.h @@ -94,6 +94,7 @@ public: * @copydoc Dali::MultilanguageSupport::SetScripts() */ void SetScripts( const Vector& text, + const Vector& lineBreakInfo, Vector& scripts ); /** * @copydoc Dali::MultilanguageSupport::ValidateFonts() diff --git a/dali-toolkit/public-api/text/multi-language-support.cpp b/dali-toolkit/public-api/text/multi-language-support.cpp index 205b39b..b9edd83 100644 --- a/dali-toolkit/public-api/text/multi-language-support.cpp +++ b/dali-toolkit/public-api/text/multi-language-support.cpp @@ -49,9 +49,11 @@ MultilanguageSupport MultilanguageSupport::Get() } void MultilanguageSupport::SetScripts( const Vector& text, + const Vector& lineBreakInfo, Vector& scripts ) { GetImplementation( *this ).SetScripts( text, + lineBreakInfo, scripts ); } diff --git a/dali-toolkit/public-api/text/multi-language-support.h b/dali-toolkit/public-api/text/multi-language-support.h index 7fb7f58..531fdad 100644 --- a/dali-toolkit/public-api/text/multi-language-support.h +++ b/dali-toolkit/public-api/text/multi-language-support.h @@ -80,10 +80,20 @@ public: * * Scripts are used to validate and set default fonts and to shape the text in further steps. * + * Some characters (like white spaces) are valid for many scripts. The rules to set a script + * for them are: + * - If they are at the begining of a paragraph they get the script of the first character with + * a defined script. If they are at the end, they get the script of the last one. + * - If they are between two scripts with the same direction, they get the script of the previous + * character with a defined script. If the two scripts have different directions, they get the + * script of the first character of the paragraph with a defined script. + * * @param[in] text Vector of UTF-32 characters. + * @param[in] lineBreakInfo Vector with the line break info. * @param[out] scripts Vector containing the script runs for the whole text. */ void SetScripts( const Vector& text, + const Vector& lineBreakInfo, Vector& scripts ); /** diff --git a/dali-toolkit/public-api/text/script.cpp b/dali-toolkit/public-api/text/script.cpp index 5cd8c3f..c320a7e 100644 --- a/dali-toolkit/public-api/text/script.cpp +++ b/dali-toolkit/public-api/text/script.cpp @@ -29,6 +29,14 @@ namespace Text namespace { +const unsigned int WHITE_SPACE_THRESHOLD = 0x21; ///< All characters below 0x21 are considered white spaces. +const unsigned int CHAR_FL = 0x000A; ///< NL Line feed, new line. +const unsigned int CHAR_VT = 0x000B; ///< Vertical tab. +const unsigned int CHAR_FF = 0x000C; ///< NP Form feed, new page. +const unsigned int CHAR_NEL = 0x0085; ///< Next line. +const unsigned int CHAR_LS = 0x2028; ///< Line separator. +const unsigned int CHAR_PS = 0x2029; ///< Paragraph separator + const unsigned int CHAR_ZWS = 0x200B; ///< Zero width space. const unsigned int CHAR_ZWNJ = 0x200C; ///< Zero width non joiner. const unsigned int CHAR_ZWJ = 0x200D; ///< Zero width joiner. @@ -119,7 +127,6 @@ Script GetCharacterScript( Character character ) // Burmese script // 0x1000 - 0x109f Myanmar - if( character <= 0x0cff ) { if( character <= 0x09ff ) @@ -374,6 +381,21 @@ Script GetCharacterScript( Character character ) return TextAbstraction::UNKNOWN; } +bool IsWhiteSpace( Character character ) +{ + return character < WHITE_SPACE_THRESHOLD; +} + +bool IsNewParagraph( Character character ) +{ + return ( ( CHAR_FL == character ) || + ( CHAR_VT == character ) || + ( CHAR_FF == character ) || + ( CHAR_NEL == character ) || + ( CHAR_LS == character ) || + ( CHAR_PS == character ) ); +} + bool IsZeroWidthNonJoiner( Character character ) { return CHAR_ZWNJ == character; diff --git a/dali-toolkit/public-api/text/script.h b/dali-toolkit/public-api/text/script.h index e3f4c63..7ab8d89 100644 --- a/dali-toolkit/public-api/text/script.h +++ b/dali-toolkit/public-api/text/script.h @@ -40,6 +40,24 @@ namespace Text Script GetCharacterScript( Character character ); /** + * @brief Whether the character is a white space. + * + * @param[in] character The character. + * + * @return @e true if the character is a white space. + */ +bool IsWhiteSpace( Character character ); + +/** + * @brief Whether the character is a new paragraph character. + * + * @param[in] character The character. + * + * @return @e true if the character is a new paragraph character. + */ +bool IsNewParagraph( Character character ); + +/** * @brief Whether the character is a zero width non joiner. * * @param[in] character The character. diff --git a/dali-toolkit/public-api/text/text-controller.cpp b/dali-toolkit/public-api/text/text-controller.cpp index ceae8ca..d759497 100644 --- a/dali-toolkit/public-api/text/text-controller.cpp +++ b/dali-toolkit/public-api/text/text-controller.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -45,7 +46,8 @@ struct Controller::Impl { Impl() : mNewText(), - mOperations( NO_OPERATION ) + mOperations( NO_OPERATION ), + mControlSize() { mLogicalModel = LogicalModel::New(); mVisualModel = VisualModel::New(); @@ -67,6 +69,8 @@ struct Controller::Impl TextAbstraction::FontClient mFontClient; OperationsMask mOperations; + + Size mControlSize; }; ControllerPtr Controller::New() @@ -91,14 +95,14 @@ bool Controller::Relayout( const Vector2& size ) bool viewUpdated = false; - if( size != mControlSize ) + if( size != mImpl->mControlSize ) { viewUpdated = DoRelayout( size, mImpl->mOperations ); // Do not re-do any operation until something changes. mImpl->mOperations = NO_OPERATION; - mControlSize = size; + mImpl->mControlSize = size; } return viewUpdated; @@ -132,6 +136,21 @@ bool Controller::DoRelayout( const Vector2& size, OperationsMask operations ) text.clear(); } + Vector lineBreakInfo; + if( GET_LINE_BREAKS & operations ) + { + // Retrieves the line break info. The line break info is used to split the text in 'paragraphs' to + // calculate the bidirectional info for each 'paragraph'. + // It's also used to layout the text (where it should be a new line) or to shape the text (text in different lines + // is not shaped together). + lineBreakInfo.Resize( characterCount, TextAbstraction::LINE_NO_BREAK ); + + SetLineBreakInfo( utf32Characters, + lineBreakInfo ); + + mImpl->mLogicalModel->SetLineBreakInfo( lineBreakInfo.Begin(), characterCount ); + } + const bool getScripts = GET_SCRIPTS & operations; const bool validateFonts = VALIDATE_FONTS & operations; @@ -147,6 +166,7 @@ bool Controller::DoRelayout( const Vector2& size, OperationsMask operations ) { // Retrieves the scripts used in the text. multilanguageSupport.SetScripts( utf32Characters, + lineBreakInfo, scripts ); // Sets the scripts into the model. @@ -166,17 +186,6 @@ bool Controller::DoRelayout( const Vector2& size, OperationsMask operations ) } } - Vector lineBreakInfo; - if( GET_LINE_BREAKS & operations ) - { - // Retrieves the line break info. The line break info is used to split the text in 'paragraphs' to - // calculate the bidirectional info for each 'paragraph'. - // It's also used to layout the text (where it should be a new line) or to shape the text (text in different lines - // is not shaped together). - lineBreakInfo.Resize( characterCount, TextAbstraction::LINE_NO_BREAK ); - mImpl->mLogicalModel->SetLineBreakInfo( lineBreakInfo.Begin(), characterCount ); - } - Vector glyphs; Vector characterIndices; Vector charactersPerGlyph; @@ -210,7 +219,7 @@ bool Controller::DoRelayout( const Vector2& size, OperationsMask operations ) mImpl->mVisualModel->GetGlyphs( glyphs.Begin(), 0u, numberOfGlyphs ); - + mImpl->mVisualModel->GetGlyphToCharacterMap( characterIndices.Begin(), 0u, numberOfGlyphs ); @@ -306,8 +315,7 @@ Controller::~Controller() } Controller::Controller() -: mImpl( NULL ), - mControlSize() +: mImpl( NULL ) { mImpl = new Controller::Impl(); } diff --git a/dali-toolkit/public-api/text/text-controller.h b/dali-toolkit/public-api/text/text-controller.h index 25ad189..92bc417 100644 --- a/dali-toolkit/public-api/text/text-controller.h +++ b/dali-toolkit/public-api/text/text-controller.h @@ -67,7 +67,7 @@ private: GET_GLYPH_METRICS = 0x40, LAYOUT = 0x80, REORDER = 0x100, - ALIGNEMENT = 0x200, + ALIGNMENT = 0x200, RENDER = 0x400, ALL_OPERATIONS = 0xFFF }; @@ -156,8 +156,6 @@ private: struct Impl; Impl* mImpl; - - Size mControlSize; }; } // namespace Text