Fixes for multi-language.

author Victor Cebollada <v.cebollada@samsung.com>

Tue, 24 Feb 2015 15:07:35 +0000 (15:07 +0000)

committer Victor Cebollada <v.cebollada@samsung.com>

Thu, 26 Feb 2015 16:58:44 +0000 (16:58 +0000)
author Victor Cebollada <v.cebollada@samsung.com>
Tue, 24 Feb 2015 15:07:35 +0000 (15:07 +0000)
committer Victor Cebollada <v.cebollada@samsung.com>
Thu, 26 Feb 2015 16:58:44 +0000 (16:58 +0000)
diff --git a/dali-toolkit/internal/text/multi-language-support-impl.cpp b/dali-toolkit/internal/text/multi-language-support-impl.cpp

index e0053c8..f6a9a22 100644 (file)
--- a/dali-toolkit/internal/text/multi-language-support-impl.cpp
+++ b/dali-toolkit/internal/text/multi-language-support-impl.cpp
@@ -18,17 +18,17 @@
  // CLASS HEADER
  #include <dali-toolkit/internal/text/multi-language-support-impl.h>
  
-// INTERNAL INCLUDES
+// EXTERNAL INCLUDES
+#include <memory.h>
+#include <dali/integration-api/debug.h>
  #include <dali/public-api/adaptor-framework/singleton-service.h>
  #include <dali/public-api/text-abstraction/font-client.h>
+
+// INTERNAL INCLUDES
  #include <dali-toolkit/public-api/text/logical-model.h>
  #include <dali-toolkit/public-api/text/font-run.h>
  #include <dali-toolkit/public-api/text/script.h>
  #include <dali-toolkit/public-api/text/script-run.h>
-#include <dali/integration-api/debug.h>
-
-// EXTERNAL INCLUDES
-#include <memory.h>
  
  namespace Dali
  {
@@ -116,6 +116,24 @@ Script GetScript( Length index,
    return script;
  }
  
+/**
+ * @brief Whether the character is valid for all scripts. i.e. the white space.
+ *
+ * @param[in] character The character.
+ *
+ * @return @e true if the character is valid for all scripts.
+ */
+bool IsValidForAllScripts( Character character )
+{
+  return ( IsWhiteSpace( character )         ||
+           IsZeroWidthNonJoiner( character ) ||
+           IsZeroWidthJoiner( character )    ||
+           IsZeroWidthSpace( character )     ||
+           IsLeftToRightMark( character )    ||
+           IsRightToLeftMark( character )    ||
+           IsThinSpace( character ) );
+}
+
  bool ValidateFontsPerScript::FindValidFont( FontId fontId ) const
  {
    for( Vector<FontId>::ConstIterator it = mValidFonts.Begin(),
@@ -184,6 +202,7 @@ Text::MultilanguageSupport MultilanguageSupport::Get()
  }
  
  void MultilanguageSupport::SetScripts( const Vector<Character>& text,
+                                       const Vector<LineBreakInfo>& lineBreakInfo,
                                         Vector<ScriptRun>& scripts )
  {
    const Length numberOfCharacters = text.Count();
@@ -194,8 +213,6 @@ void MultilanguageSupport::SetScripts( const Vector<Character>& text,
      return;
    }
  
-  // Traverse all characters and set the scripts.
-
    // Stores the current script run.
    ScriptRun currentScriptRun;
    currentScriptRun.characterRun.characterIndex = 0u;
@@ -205,35 +222,92 @@ void MultilanguageSupport::SetScripts( const Vector<Character>& text,
    // Reserve some space to reduce the number of reallocations.
    scripts.Reserve( numberOfCharacters << 2u );
  
-  for( Length index = 0u; index < numberOfCharacters; ++index )
-  {
-    const Character character = *( text.Begin() + index );
+  // Whether the first valid script need to be set.
+  bool firstValidScript = true;
  
-    Script script = GetCharacterScript( character );
+  // Whether the first valid script is a right to left script.
+  bool isParagraphRTL = false;
  
-    if( TextAbstraction::UNKNOWN == script )
+  // Count the number of characters which are valid for all scripts. i.e. white spaces or '\n'.
+  Length numberOfAllScriptCharacters = 0u;
+
+  // Pointers to the text and break info buffers.
+  const Character* textBuffer = text.Begin();
+  const LineBreakInfo* breakInfoBuffer = lineBreakInfo.Begin();
+
+  // Traverse all characters and set the scripts.
+  for( Length index = 0u; index < numberOfCharacters; ++index )
+  {
+    Character character = *( textBuffer + index );
+    LineBreakInfo breakInfo = *( breakInfoBuffer + index );
+
+    // Some characters (like white spaces) are valid for many scripts. The rules to set a script
+    // for them are:
+    // - If they are at the begining of a paragraph they get the script of the first character with
+    //   a defined script. If they are at the end, they get the script of the last one.
+    // - If they are between two scripts with the same direction, they get the script of the previous
+    //   character with a defined script. If the two scripts have different directions, they get the
+    //   script of the first character of the paragraph with a defined script.
+
+    // Skip those characters valid for many scripts like white spaces or '\n'.
+    bool endOfText = index == numberOfCharacters;
+    while( !endOfText &&
+           IsValidForAllScripts( character ) )
      {
-      if( IsZeroWidthNonJoiner( character ) ||
-          IsZeroWidthJoiner( character ) ||
-          IsZeroWidthSpace( character ) ||
-          IsLeftToRightMark( character ) ||
-          IsRightToLeftMark( character ) ||
-          IsThinSpace( character ) )
+      // Count all these characters to be added into a script.
+      ++numberOfAllScriptCharacters;
+
+      if( TextAbstraction::LINE_MUST_BREAK == breakInfo )
        {
-        // Keep previous script if the character is a zero width joiner or a zero width non joiner.
-        script = currentScriptRun.script;
+        // The next character is a new paragraph.
+        // Know when there is a new paragraph is needed because if there is a white space
+        // between two scripts with different directions, it is added to the script with
+        // the same direction than the first script of the paragraph.
+        firstValidScript = true;
+        isParagraphRTL = false;
        }
-      else
+
+      // Get the next character.
+      ++index;
+      endOfText = index == numberOfCharacters;
+      if( !endOfText )
        {
-        script = TextAbstraction::LATIN;
-        DALI_ASSERT_DEBUG( !"MultilanguageSupport::SetScripts. Unkown script!" );
+        character = *( textBuffer + index );
+        breakInfo = *( breakInfoBuffer + index );
        }
      }
  
+    if( endOfText )
+    {
+      // Last characters of the text are 'white spaces'.
+      // There is nothing else to do. Just add the remaining characters to the last script after this bucle.
+      break;
+    }
+
+    // Get the script of the character.
+    Script script = GetCharacterScript( character );
+
+    // Check if it is the first character of a paragraph.
+    if( firstValidScript &&
+        ( TextAbstraction::UNKNOWN != script ) )
+    {
+      // Sets the direction of the first valid script.
+      isParagraphRTL = ( TextAbstraction::ARABIC == script );
+      firstValidScript = false;
+    }
+
      if( script != currentScriptRun.script )
      {
        // Current run needs to be stored and a new one initialized.
  
+      if( isParagraphRTL != ( TextAbstraction::ARABIC == script ) )
+      {
+        // Current script has different direction than the first script of the paragraph.
+        // All the previously skipped characters need to be added to the previous script before it's stored.
+        currentScriptRun.characterRun.numberOfCharacters += numberOfAllScriptCharacters;
+        numberOfAllScriptCharacters = 0u;
+      }
+
        if( 0u != currentScriptRun.characterRun.numberOfCharacters )
        {
          // Store the script run.
@@ -242,16 +316,38 @@ void MultilanguageSupport::SetScripts( const Vector<Character>& text,
  
        // Initialize the new one.
        currentScriptRun.characterRun.characterIndex = currentScriptRun.characterRun.characterIndex + currentScriptRun.characterRun.numberOfCharacters;
-      currentScriptRun.characterRun.numberOfCharacters = 0u;
+      currentScriptRun.characterRun.numberOfCharacters = numberOfAllScriptCharacters; // Adds the white spaces which are at the begining of the script.
        currentScriptRun.script = script;
+      numberOfAllScriptCharacters = 0u;
+    }
+    else
+    {
+      // Adds white spaces between characters.
+      currentScriptRun.characterRun.numberOfCharacters += numberOfAllScriptCharacters;
+      numberOfAllScriptCharacters = 0u;
+    }
+
+    if( TextAbstraction::LINE_MUST_BREAK == breakInfo )
+    {
+      // The next character is a new paragraph.
+      firstValidScript = true;
+      isParagraphRTL = false;
      }
  
      // Add one more character to the run.
      ++currentScriptRun.characterRun.numberOfCharacters;
    }
  
+  // Add remaining characters into the last script.
+  currentScriptRun.characterRun.numberOfCharacters += numberOfAllScriptCharacters;
    if( 0u != currentScriptRun.characterRun.numberOfCharacters )
    {
+    if( TextAbstraction::UNKNOWN == currentScriptRun.script )
+    {
+      // There are only white spaces in the last script. Set the latin script.
+      currentScriptRun.script = TextAbstraction::LATIN;
+    }
+
      // Store the last run.
      scripts.PushBack( currentScriptRun );
    }
diff --git a/dali-toolkit/internal/text/multi-language-support-impl.h b/dali-toolkit/internal/text/multi-language-support-impl.h

index a424cd0..643dea3 100644 (file)
--- a/dali-toolkit/internal/text/multi-language-support-impl.h
+++ b/dali-toolkit/internal/text/multi-language-support-impl.h
@@ -94,6 +94,7 @@ public:
     * @copydoc Dali::MultilanguageSupport::SetScripts()
     */
    void SetScripts( const Vector<Character>& text,
+                   const Vector<LineBreakInfo>& lineBreakInfo,
                     Vector<ScriptRun>& scripts );
    /**
     * @copydoc Dali::MultilanguageSupport::ValidateFonts()
diff --git a/dali-toolkit/public-api/text/multi-language-support.cpp b/dali-toolkit/public-api/text/multi-language-support.cpp

index 205b39b..b9edd83 100644 (file)
--- a/dali-toolkit/public-api/text/multi-language-support.cpp
+++ b/dali-toolkit/public-api/text/multi-language-support.cpp
@@ -49,9 +49,11 @@ MultilanguageSupport MultilanguageSupport::Get()
  }
  
  void MultilanguageSupport::SetScripts( const Vector<Character>& text,
+                                       const Vector<LineBreakInfo>& lineBreakInfo,
                                         Vector<ScriptRun>& scripts )
  {
    GetImplementation( *this ).SetScripts( text,
+                                         lineBreakInfo,
                                           scripts );
  }
  
diff --git a/dali-toolkit/public-api/text/multi-language-support.h b/dali-toolkit/public-api/text/multi-language-support.h

index 7fb7f58..531fdad 100644 (file)
--- a/dali-toolkit/public-api/text/multi-language-support.h
+++ b/dali-toolkit/public-api/text/multi-language-support.h
@@ -80,10 +80,20 @@ public:
     *
     * Scripts are used to validate and set default fonts and to shape the text in further steps.
     *
+   * Some characters (like white spaces) are valid for many scripts. The rules to set a script
+   * for them are:
+   * - If they are at the begining of a paragraph they get the script of the first character with
+   *   a defined script. If they are at the end, they get the script of the last one.
+   * - If they are between two scripts with the same direction, they get the script of the previous
+   *   character with a defined script. If the two scripts have different directions, they get the
+   *   script of the first character of the paragraph with a defined script.
+   *
     * @param[in] text Vector of UTF-32 characters.
+   * @param[in] lineBreakInfo Vector with the line break info.
     * @param[out] scripts Vector containing the script runs for the whole text.
     */
    void SetScripts( const Vector<Character>& text,
+                   const Vector<LineBreakInfo>& lineBreakInfo,
                     Vector<ScriptRun>& scripts );
  
    /**
diff --git a/dali-toolkit/public-api/text/script.cpp b/dali-toolkit/public-api/text/script.cpp

index 5cd8c3f..c320a7e 100644 (file)
--- a/dali-toolkit/public-api/text/script.cpp
+++ b/dali-toolkit/public-api/text/script.cpp
@@ -29,6 +29,14 @@ namespace Text
  
  namespace
  {
+const unsigned int WHITE_SPACE_THRESHOLD  = 0x21; ///< All characters below 0x21 are considered white spaces.
+const unsigned int CHAR_FL   = 0x000A; ///< NL Line feed, new line.
+const unsigned int CHAR_VT   = 0x000B; ///< Vertical tab.
+const unsigned int CHAR_FF   = 0x000C; ///< NP Form feed, new page.
+const unsigned int CHAR_NEL  = 0x0085; ///< Next line.
+const unsigned int CHAR_LS   = 0x2028; ///< Line separator.
+const unsigned int CHAR_PS   = 0x2029; ///< Paragraph separator
+
  const unsigned int CHAR_ZWS  = 0x200B; ///< Zero width space.
  const unsigned int CHAR_ZWNJ = 0x200C; ///< Zero width non joiner.
  const unsigned int CHAR_ZWJ  = 0x200D; ///< Zero width joiner.
@@ -119,7 +127,6 @@ Script GetCharacterScript( Character character )
    // Burmese script
    // 0x1000 - 0x109f Myanmar
  
-
    if( character <= 0x0cff )
    {
      if( character <= 0x09ff )
@@ -374,6 +381,21 @@ Script GetCharacterScript( Character character )
    return TextAbstraction::UNKNOWN;
  }
  
+bool IsWhiteSpace( Character character )
+{
+  return character < WHITE_SPACE_THRESHOLD;
+}
+
+bool IsNewParagraph( Character character )
+{
+  return ( ( CHAR_FL == character )  ||
+           ( CHAR_VT == character )  ||
+           ( CHAR_FF == character )  ||
+           ( CHAR_NEL == character ) ||
+           ( CHAR_LS == character )  ||
+           ( CHAR_PS == character ) );
+}
+
  bool IsZeroWidthNonJoiner( Character character )
  {
    return CHAR_ZWNJ == character;
diff --git a/dali-toolkit/public-api/text/script.h b/dali-toolkit/public-api/text/script.h

index e3f4c63..7ab8d89 100644 (file)
--- a/dali-toolkit/public-api/text/script.h
+++ b/dali-toolkit/public-api/text/script.h
@@ -40,6 +40,24 @@ namespace Text
  Script GetCharacterScript( Character character );
  
  /**
+ * @brief Whether the character is a white space.
+ *
+ * @param[in] character The character.
+ *
+ * @return @e true if the character is a white space.
+ */
+bool IsWhiteSpace( Character character );
+
+/**
+ * @brief Whether the character is a new paragraph character.
+ *
+ * @param[in] character The character.
+ *
+ * @return @e true if the character is a new paragraph character.
+ */
+bool IsNewParagraph( Character character );
+
+/**
   * @brief Whether the character is a zero width non joiner.
   *
   * @param[in] character The character.
diff --git a/dali-toolkit/public-api/text/text-controller.cpp b/dali-toolkit/public-api/text/text-controller.cpp

index ceae8ca..d759497 100644 (file)
--- a/dali-toolkit/public-api/text/text-controller.cpp
+++ b/dali-toolkit/public-api/text/text-controller.cpp
@@ -24,6 +24,7 @@
  #include <dali-toolkit/public-api/text/logical-model.h>
  #include <dali-toolkit/public-api/text/multi-language-support.h>
  #include <dali-toolkit/public-api/text/script-run.h>
+#include <dali-toolkit/public-api/text/segmentation.h>
  #include <dali-toolkit/public-api/text/shaper.h>
  #include <dali-toolkit/public-api/text/text-view.h>
  #include <dali-toolkit/public-api/text/visual-model.h>
@@ -45,7 +46,8 @@ struct Controller::Impl
  {
    Impl()
    : mNewText(),
-    mOperations( NO_OPERATION )
+    mOperations( NO_OPERATION ),
+    mControlSize()
    {
      mLogicalModel = LogicalModel::New();
      mVisualModel  = VisualModel::New();
@@ -67,6 +69,8 @@ struct Controller::Impl
    TextAbstraction::FontClient mFontClient;
  
    OperationsMask mOperations;
+
+  Size mControlSize;
  };
  
  ControllerPtr Controller::New()
@@ -91,14 +95,14 @@ bool Controller::Relayout( const Vector2& size )
  
    bool viewUpdated = false;
  
-  if( size != mControlSize )
+  if( size != mImpl->mControlSize )
    {
      viewUpdated = DoRelayout( size, mImpl->mOperations );
  
      // Do not re-do any operation until something changes.
      mImpl->mOperations = NO_OPERATION;
  
-    mControlSize = size;
+    mImpl->mControlSize = size;
    }
  
    return viewUpdated;
@@ -132,6 +136,21 @@ bool Controller::DoRelayout( const Vector2& size, OperationsMask operations )
      text.clear();
    }
  
+  Vector<LineBreakInfo> lineBreakInfo;
+  if( GET_LINE_BREAKS & operations )
+  {
+    // Retrieves the line break info. The line break info is used to split the text in 'paragraphs' to
+    // calculate the bidirectional info for each 'paragraph'.
+    // It's also used to layout the text (where it should be a new line) or to shape the text (text in different lines
+    // is not shaped together).
+    lineBreakInfo.Resize( characterCount, TextAbstraction::LINE_NO_BREAK );
+
+    SetLineBreakInfo( utf32Characters,
+                      lineBreakInfo );
+
+    mImpl->mLogicalModel->SetLineBreakInfo( lineBreakInfo.Begin(), characterCount );
+  }
+
    const bool getScripts = GET_SCRIPTS & operations;
    const bool validateFonts = VALIDATE_FONTS & operations;
  
@@ -147,6 +166,7 @@ bool Controller::DoRelayout( const Vector2& size, OperationsMask operations )
      {
        // Retrieves the scripts used in the text.
        multilanguageSupport.SetScripts( utf32Characters,
+                                       lineBreakInfo,
                                         scripts );
  
        // Sets the scripts into the model.
@@ -166,17 +186,6 @@ bool Controller::DoRelayout( const Vector2& size, OperationsMask operations )
      }
    }
  
-  Vector<LineBreakInfo> lineBreakInfo;
-  if( GET_LINE_BREAKS & operations )
-  {
-    // Retrieves the line break info. The line break info is used to split the text in 'paragraphs' to
-    // calculate the bidirectional info for each 'paragraph'.
-    // It's also used to layout the text (where it should be a new line) or to shape the text (text in different lines
-    // is not shaped together).
-    lineBreakInfo.Resize( characterCount, TextAbstraction::LINE_NO_BREAK );
-    mImpl->mLogicalModel->SetLineBreakInfo( lineBreakInfo.Begin(), characterCount );
-  }
-
    Vector<GlyphInfo> glyphs;
    Vector<CharacterIndex> characterIndices;
    Vector<Length> charactersPerGlyph;
@@ -210,7 +219,7 @@ bool Controller::DoRelayout( const Vector2& size, OperationsMask operations )
        mImpl->mVisualModel->GetGlyphs( glyphs.Begin(),
                                        0u,
                                        numberOfGlyphs );
-      
+
        mImpl->mVisualModel->GetGlyphToCharacterMap( characterIndices.Begin(),
                                                     0u,
                                                     numberOfGlyphs );
@@ -306,8 +315,7 @@ Controller::~Controller()
  }
  
  Controller::Controller()
-: mImpl( NULL ),
-  mControlSize()
+: mImpl( NULL )
  {
    mImpl = new Controller::Impl();
  }
diff --git a/dali-toolkit/public-api/text/text-controller.h b/dali-toolkit/public-api/text/text-controller.h

index 25ad189..92bc417 100644 (file)
--- a/dali-toolkit/public-api/text/text-controller.h
+++ b/dali-toolkit/public-api/text/text-controller.h
@@ -67,7 +67,7 @@ private:
      GET_GLYPH_METRICS = 0x40,
      LAYOUT            = 0x80,
      REORDER           = 0x100,
-    ALIGNEMENT        = 0x200,
+    ALIGNMENT         = 0x200,
      RENDER            = 0x400,
      ALL_OPERATIONS    = 0xFFF
    };
@@ -156,8 +156,6 @@ private:
  
    struct Impl;
    Impl* mImpl;
-
-  Size mControlSize;
  };
  
  } // namespace Text
author	Victor Cebollada <v.cebollada@samsung.com>
	Tue, 24 Feb 2015 15:07:35 +0000 (15:07 +0000)
committer	Victor Cebollada <v.cebollada@samsung.com>
	Thu, 26 Feb 2015 16:58:44 +0000 (16:58 +0000)
dali-toolkit/internal/text/multi-language-support-impl.cpp		patch \| blob \| history
dali-toolkit/internal/text/multi-language-support-impl.h		patch \| blob \| history
dali-toolkit/public-api/text/multi-language-support.cpp		patch \| blob \| history
dali-toolkit/public-api/text/multi-language-support.h		patch \| blob \| history
dali-toolkit/public-api/text/script.cpp		patch \| blob \| history
dali-toolkit/public-api/text/script.h		patch \| blob \| history
dali-toolkit/public-api/text/text-controller.cpp		patch \| blob \| history
dali-toolkit/public-api/text/text-controller.h		patch \| blob \| history