From: Victor Cebollada <v.cebollada@samsung.com>
Date: Tue, 24 Feb 2015 15:07:35 +0000 (+0000)
Subject: Fixes for multi-language.
X-Git-Tag: new_text_0.1~36
X-Git-Url: http://review.tizen.org/git/?p=platform%2Fcore%2Fuifw%2Fdali-toolkit.git;a=commitdiff_plain;h=d83ef802ea8a21da2707c0053267064f6c1e8af7

Fixes for multi-language.
White spaces are detected as LATIN. Do not change the script if there is a white space.
Use the line break info to get the '\n' characters.

Change-Id: I18a4ea47a7dbc351a18c249aa72e2b269ee39b81
Signed-off-by: Victor Cebollada <v.cebollada@samsung.com>
---

diff --git a/dali-toolkit/internal/text/multi-language-support-impl.cpp b/dali-toolkit/internal/text/multi-language-support-impl.cpp
index e0053c8..f6a9a22 100644
--- a/dali-toolkit/internal/text/multi-language-support-impl.cpp
+++ b/dali-toolkit/internal/text/multi-language-support-impl.cpp
@@ -18,17 +18,17 @@
 // CLASS HEADER
 #include <dali-toolkit/internal/text/multi-language-support-impl.h>
 
-// INTERNAL INCLUDES
+// EXTERNAL INCLUDES
+#include <memory.h>
+#include <dali/integration-api/debug.h>
 #include <dali/public-api/adaptor-framework/singleton-service.h>
 #include <dali/public-api/text-abstraction/font-client.h>
+
+// INTERNAL INCLUDES
 #include <dali-toolkit/public-api/text/logical-model.h>
 #include <dali-toolkit/public-api/text/font-run.h>
 #include <dali-toolkit/public-api/text/script.h>
 #include <dali-toolkit/public-api/text/script-run.h>
-#include <dali/integration-api/debug.h>
-
-// EXTERNAL INCLUDES
-#include <memory.h>
 
 namespace Dali
 {
@@ -116,6 +116,24 @@ Script GetScript( Length index,
   return script;
 }
 
+/**
+ * @brief Whether the character is valid for all scripts. i.e. the white space.
+ *
+ * @param[in] character The character.
+ *
+ * @return @e true if the character is valid for all scripts.
+ */
+bool IsValidForAllScripts( Character character )
+{
+  return ( IsWhiteSpace( character )         ||
+           IsZeroWidthNonJoiner( character ) ||
+           IsZeroWidthJoiner( character )    ||
+           IsZeroWidthSpace( character )     ||
+           IsLeftToRightMark( character )    ||
+           IsRightToLeftMark( character )    ||
+           IsThinSpace( character ) );
+}
+
 bool ValidateFontsPerScript::FindValidFont( FontId fontId ) const
 {
   for( Vector<FontId>::ConstIterator it = mValidFonts.Begin(),
@@ -184,6 +202,7 @@ Text::MultilanguageSupport MultilanguageSupport::Get()
 }
 
 void MultilanguageSupport::SetScripts( const Vector<Character>& text,
+                                       const Vector<LineBreakInfo>& lineBreakInfo,
                                        Vector<ScriptRun>& scripts )
 {
   const Length numberOfCharacters = text.Count();
@@ -194,8 +213,6 @@ void MultilanguageSupport::SetScripts( const Vector<Character>& text,
     return;
   }
 
-  // Traverse all characters and set the scripts.
-
   // Stores the current script run.
   ScriptRun currentScriptRun;
   currentScriptRun.characterRun.characterIndex = 0u;
@@ -205,35 +222,92 @@ void MultilanguageSupport::SetScripts( const Vector<Character>& text,
   // Reserve some space to reduce the number of reallocations.
   scripts.Reserve( numberOfCharacters << 2u );
 
-  for( Length index = 0u; index < numberOfCharacters; ++index )
-  {
-    const Character character = *( text.Begin() + index );
+  // Whether the first valid script need to be set.
+  bool firstValidScript = true;
 
-    Script script = GetCharacterScript( character );
+  // Whether the first valid script is a right to left script.
+  bool isParagraphRTL = false;
 
-    if( TextAbstraction::UNKNOWN == script )
+  // Count the number of characters which are valid for all scripts. i.e. white spaces or '\n'.
+  Length numberOfAllScriptCharacters = 0u;
+
+  // Pointers to the text and break info buffers.
+  const Character* textBuffer = text.Begin();
+  const LineBreakInfo* breakInfoBuffer = lineBreakInfo.Begin();
+
+  // Traverse all characters and set the scripts.
+  for( Length index = 0u; index < numberOfCharacters; ++index )
+  {
+    Character character = *( textBuffer + index );
+    LineBreakInfo breakInfo = *( breakInfoBuffer + index );
+
+    // Some characters (like white spaces) are valid for many scripts. The rules to set a script
+    // for them are:
+    // - If they are at the begining of a paragraph they get the script of the first character with
+    //   a defined script. If they are at the end, they get the script of the last one.
+    // - If they are between two scripts with the same direction, they get the script of the previous
+    //   character with a defined script. If the two scripts have different directions, they get the
+    //   script of the first character of the paragraph with a defined script.
+
+    // Skip those characters valid for many scripts like white spaces or '\n'.
+    bool endOfText = index == numberOfCharacters;
+    while( !endOfText &&
+           IsValidForAllScripts( character ) )
     {
-      if( IsZeroWidthNonJoiner( character ) ||
-          IsZeroWidthJoiner( character ) ||
-          IsZeroWidthSpace( character ) ||
-          IsLeftToRightMark( character ) ||
-          IsRightToLeftMark( character ) ||
-          IsThinSpace( character ) )
+      // Count all these characters to be added into a script.
+      ++numberOfAllScriptCharacters;
+
+      if( TextAbstraction::LINE_MUST_BREAK == breakInfo )
       {
-        // Keep previous script if the character is a zero width joiner or a zero width non joiner.
-        script = currentScriptRun.script;
+        // The next character is a new paragraph.
+        // Know when there is a new paragraph is needed because if there is a white space
+        // between two scripts with different directions, it is added to the script with
+        // the same direction than the first script of the paragraph.
+        firstValidScript = true;
+        isParagraphRTL = false;
       }
-      else
+
+      // Get the next character.
+      ++index;
+      endOfText = index == numberOfCharacters;
+      if( !endOfText )
       {
-        script = TextAbstraction::LATIN;
-        DALI_ASSERT_DEBUG( !"MultilanguageSupport::SetScripts. Unkown script!" );
+        character = *( textBuffer + index );
+        breakInfo = *( breakInfoBuffer + index );
       }
     }
 
+    if( endOfText )
+    {
+      // Last characters of the text are 'white spaces'.
+      // There is nothing else to do. Just add the remaining characters to the last script after this bucle.
+      break;
+    }
+
+    // Get the script of the character.
+    Script script = GetCharacterScript( character );
+
+    // Check if it is the first character of a paragraph.
+    if( firstValidScript &&
+        ( TextAbstraction::UNKNOWN != script ) )
+    {
+      // Sets the direction of the first valid script.
+      isParagraphRTL = ( TextAbstraction::ARABIC == script );
+      firstValidScript = false;
+    }
+
     if( script != currentScriptRun.script )
     {
       // Current run needs to be stored and a new one initialized.
 
+      if( isParagraphRTL != ( TextAbstraction::ARABIC == script ) )
+      {
+        // Current script has different direction than the first script of the paragraph.
+        // All the previously skipped characters need to be added to the previous script before it's stored.
+        currentScriptRun.characterRun.numberOfCharacters += numberOfAllScriptCharacters;
+        numberOfAllScriptCharacters = 0u;
+      }
+
       if( 0u != currentScriptRun.characterRun.numberOfCharacters )
       {
         // Store the script run.
@@ -242,16 +316,38 @@ void MultilanguageSupport::SetScripts( const Vector<Character>& text,
 
       // Initialize the new one.
       currentScriptRun.characterRun.characterIndex = currentScriptRun.characterRun.characterIndex + currentScriptRun.characterRun.numberOfCharacters;
-      currentScriptRun.characterRun.numberOfCharacters = 0u;
+      currentScriptRun.characterRun.numberOfCharacters = numberOfAllScriptCharacters; // Adds the white spaces which are at the begining of the script.
       currentScriptRun.script = script;
+      numberOfAllScriptCharacters = 0u;
+    }
+    else
+    {
+      // Adds white spaces between characters.
+      currentScriptRun.characterRun.numberOfCharacters += numberOfAllScriptCharacters;
+      numberOfAllScriptCharacters = 0u;
+    }
+
+    if( TextAbstraction::LINE_MUST_BREAK == breakInfo )
+    {
+      // The next character is a new paragraph.
+      firstValidScript = true;
+      isParagraphRTL = false;
     }
 
     // Add one more character to the run.
     ++currentScriptRun.characterRun.numberOfCharacters;
   }
 
+  // Add remaining characters into the last script.
+  currentScriptRun.characterRun.numberOfCharacters += numberOfAllScriptCharacters;
   if( 0u != currentScriptRun.characterRun.numberOfCharacters )
   {
+    if( TextAbstraction::UNKNOWN == currentScriptRun.script )
+    {
+      // There are only white spaces in the last script. Set the latin script.
+      currentScriptRun.script = TextAbstraction::LATIN;
+    }
+
     // Store the last run.
     scripts.PushBack( currentScriptRun );
   }
diff --git a/dali-toolkit/internal/text/multi-language-support-impl.h b/dali-toolkit/internal/text/multi-language-support-impl.h
index a424cd0..643dea3 100644
--- a/dali-toolkit/internal/text/multi-language-support-impl.h
+++ b/dali-toolkit/internal/text/multi-language-support-impl.h
@@ -94,6 +94,7 @@ public:
    * @copydoc Dali::MultilanguageSupport::SetScripts()
    */
   void SetScripts( const Vector<Character>& text,
+                   const Vector<LineBreakInfo>& lineBreakInfo,
                    Vector<ScriptRun>& scripts );
   /**
    * @copydoc Dali::MultilanguageSupport::ValidateFonts()
diff --git a/dali-toolkit/public-api/text/multi-language-support.cpp b/dali-toolkit/public-api/text/multi-language-support.cpp
index 205b39b..b9edd83 100644
--- a/dali-toolkit/public-api/text/multi-language-support.cpp
+++ b/dali-toolkit/public-api/text/multi-language-support.cpp
@@ -49,9 +49,11 @@ MultilanguageSupport MultilanguageSupport::Get()
 }
 
 void MultilanguageSupport::SetScripts( const Vector<Character>& text,
+                                       const Vector<LineBreakInfo>& lineBreakInfo,
                                        Vector<ScriptRun>& scripts )
 {
   GetImplementation( *this ).SetScripts( text,
+                                         lineBreakInfo,
                                          scripts );
 }
 
diff --git a/dali-toolkit/public-api/text/multi-language-support.h b/dali-toolkit/public-api/text/multi-language-support.h
index 7fb7f58..531fdad 100644
--- a/dali-toolkit/public-api/text/multi-language-support.h
+++ b/dali-toolkit/public-api/text/multi-language-support.h
@@ -80,10 +80,20 @@ public:
    *
    * Scripts are used to validate and set default fonts and to shape the text in further steps.
    *
+   * Some characters (like white spaces) are valid for many scripts. The rules to set a script
+   * for them are:
+   * - If they are at the begining of a paragraph they get the script of the first character with
+   *   a defined script. If they are at the end, they get the script of the last one.
+   * - If they are between two scripts with the same direction, they get the script of the previous
+   *   character with a defined script. If the two scripts have different directions, they get the
+   *   script of the first character of the paragraph with a defined script.
+   *
    * @param[in] text Vector of UTF-32 characters.
+   * @param[in] lineBreakInfo Vector with the line break info.
    * @param[out] scripts Vector containing the script runs for the whole text.
    */
   void SetScripts( const Vector<Character>& text,
+                   const Vector<LineBreakInfo>& lineBreakInfo,
                    Vector<ScriptRun>& scripts );
 
   /**
diff --git a/dali-toolkit/public-api/text/script.cpp b/dali-toolkit/public-api/text/script.cpp
index 5cd8c3f..c320a7e 100644
--- a/dali-toolkit/public-api/text/script.cpp
+++ b/dali-toolkit/public-api/text/script.cpp
@@ -29,6 +29,14 @@ namespace Text
 
 namespace
 {
+const unsigned int WHITE_SPACE_THRESHOLD  = 0x21; ///< All characters below 0x21 are considered white spaces.
+const unsigned int CHAR_FL   = 0x000A; ///< NL Line feed, new line.
+const unsigned int CHAR_VT   = 0x000B; ///< Vertical tab.
+const unsigned int CHAR_FF   = 0x000C; ///< NP Form feed, new page.
+const unsigned int CHAR_NEL  = 0x0085; ///< Next line.
+const unsigned int CHAR_LS   = 0x2028; ///< Line separator.
+const unsigned int CHAR_PS   = 0x2029; ///< Paragraph separator
+
 const unsigned int CHAR_ZWS  = 0x200B; ///< Zero width space.
 const unsigned int CHAR_ZWNJ = 0x200C; ///< Zero width non joiner.
 const unsigned int CHAR_ZWJ  = 0x200D; ///< Zero width joiner.
@@ -119,7 +127,6 @@ Script GetCharacterScript( Character character )
   // Burmese script
   // 0x1000 - 0x109f Myanmar
 
-
   if( character <= 0x0cff )
   {
     if( character <= 0x09ff )
@@ -374,6 +381,21 @@ Script GetCharacterScript( Character character )
   return TextAbstraction::UNKNOWN;
 }
 
+bool IsWhiteSpace( Character character )
+{
+  return character < WHITE_SPACE_THRESHOLD;
+}
+
+bool IsNewParagraph( Character character )
+{
+  return ( ( CHAR_FL == character )  ||
+           ( CHAR_VT == character )  ||
+           ( CHAR_FF == character )  ||
+           ( CHAR_NEL == character ) ||
+           ( CHAR_LS == character )  ||
+           ( CHAR_PS == character ) );
+}
+
 bool IsZeroWidthNonJoiner( Character character )
 {
   return CHAR_ZWNJ == character;
diff --git a/dali-toolkit/public-api/text/script.h b/dali-toolkit/public-api/text/script.h
index e3f4c63..7ab8d89 100644
--- a/dali-toolkit/public-api/text/script.h
+++ b/dali-toolkit/public-api/text/script.h
@@ -40,6 +40,24 @@ namespace Text
 Script GetCharacterScript( Character character );
 
 /**
+ * @brief Whether the character is a white space.
+ *
+ * @param[in] character The character.
+ *
+ * @return @e true if the character is a white space.
+ */
+bool IsWhiteSpace( Character character );
+
+/**
+ * @brief Whether the character is a new paragraph character.
+ *
+ * @param[in] character The character.
+ *
+ * @return @e true if the character is a new paragraph character.
+ */
+bool IsNewParagraph( Character character );
+
+/**
  * @brief Whether the character is a zero width non joiner.
  *
  * @param[in] character The character.
diff --git a/dali-toolkit/public-api/text/text-controller.cpp b/dali-toolkit/public-api/text/text-controller.cpp
index ceae8ca..d759497 100644
--- a/dali-toolkit/public-api/text/text-controller.cpp
+++ b/dali-toolkit/public-api/text/text-controller.cpp
@@ -24,6 +24,7 @@
 #include <dali-toolkit/public-api/text/logical-model.h>
 #include <dali-toolkit/public-api/text/multi-language-support.h>
 #include <dali-toolkit/public-api/text/script-run.h>
+#include <dali-toolkit/public-api/text/segmentation.h>
 #include <dali-toolkit/public-api/text/shaper.h>
 #include <dali-toolkit/public-api/text/text-view.h>
 #include <dali-toolkit/public-api/text/visual-model.h>
@@ -45,7 +46,8 @@ struct Controller::Impl
 {
   Impl()
   : mNewText(),
-    mOperations( NO_OPERATION )
+    mOperations( NO_OPERATION ),
+    mControlSize()
   {
     mLogicalModel = LogicalModel::New();
     mVisualModel  = VisualModel::New();
@@ -67,6 +69,8 @@ struct Controller::Impl
   TextAbstraction::FontClient mFontClient;
 
   OperationsMask mOperations;
+
+  Size mControlSize;
 };
 
 ControllerPtr Controller::New()
@@ -91,14 +95,14 @@ bool Controller::Relayout( const Vector2& size )
 
   bool viewUpdated = false;
 
-  if( size != mControlSize )
+  if( size != mImpl->mControlSize )
   {
     viewUpdated = DoRelayout( size, mImpl->mOperations );
 
     // Do not re-do any operation until something changes.
     mImpl->mOperations = NO_OPERATION;
 
-    mControlSize = size;
+    mImpl->mControlSize = size;
   }
 
   return viewUpdated;
@@ -132,6 +136,21 @@ bool Controller::DoRelayout( const Vector2& size, OperationsMask operations )
     text.clear();
   }
 
+  Vector<LineBreakInfo> lineBreakInfo;
+  if( GET_LINE_BREAKS & operations )
+  {
+    // Retrieves the line break info. The line break info is used to split the text in 'paragraphs' to
+    // calculate the bidirectional info for each 'paragraph'.
+    // It's also used to layout the text (where it should be a new line) or to shape the text (text in different lines
+    // is not shaped together).
+    lineBreakInfo.Resize( characterCount, TextAbstraction::LINE_NO_BREAK );
+
+    SetLineBreakInfo( utf32Characters,
+                      lineBreakInfo );
+
+    mImpl->mLogicalModel->SetLineBreakInfo( lineBreakInfo.Begin(), characterCount );
+  }
+
   const bool getScripts = GET_SCRIPTS & operations;
   const bool validateFonts = VALIDATE_FONTS & operations;
 
@@ -147,6 +166,7 @@ bool Controller::DoRelayout( const Vector2& size, OperationsMask operations )
     {
       // Retrieves the scripts used in the text.
       multilanguageSupport.SetScripts( utf32Characters,
+                                       lineBreakInfo,
                                        scripts );
 
       // Sets the scripts into the model.
@@ -166,17 +186,6 @@ bool Controller::DoRelayout( const Vector2& size, OperationsMask operations )
     }
   }
 
-  Vector<LineBreakInfo> lineBreakInfo;
-  if( GET_LINE_BREAKS & operations )
-  {
-    // Retrieves the line break info. The line break info is used to split the text in 'paragraphs' to
-    // calculate the bidirectional info for each 'paragraph'.
-    // It's also used to layout the text (where it should be a new line) or to shape the text (text in different lines
-    // is not shaped together).
-    lineBreakInfo.Resize( characterCount, TextAbstraction::LINE_NO_BREAK );
-    mImpl->mLogicalModel->SetLineBreakInfo( lineBreakInfo.Begin(), characterCount );
-  }
-
   Vector<GlyphInfo> glyphs;
   Vector<CharacterIndex> characterIndices;
   Vector<Length> charactersPerGlyph;
@@ -210,7 +219,7 @@ bool Controller::DoRelayout( const Vector2& size, OperationsMask operations )
       mImpl->mVisualModel->GetGlyphs( glyphs.Begin(),
                                       0u,
                                       numberOfGlyphs );
-      
+
       mImpl->mVisualModel->GetGlyphToCharacterMap( characterIndices.Begin(),
                                                    0u,
                                                    numberOfGlyphs );
@@ -306,8 +315,7 @@ Controller::~Controller()
 }
 
 Controller::Controller()
-: mImpl( NULL ),
-  mControlSize()
+: mImpl( NULL )
 {
   mImpl = new Controller::Impl();
 }
diff --git a/dali-toolkit/public-api/text/text-controller.h b/dali-toolkit/public-api/text/text-controller.h
index 25ad189..92bc417 100644
--- a/dali-toolkit/public-api/text/text-controller.h
+++ b/dali-toolkit/public-api/text/text-controller.h
@@ -67,7 +67,7 @@ private:
     GET_GLYPH_METRICS = 0x40,
     LAYOUT            = 0x80,
     REORDER           = 0x100,
-    ALIGNEMENT        = 0x200,
+    ALIGNMENT         = 0x200,
     RENDER            = 0x400,
     ALL_OPERATIONS    = 0xFFF
   };
@@ -156,8 +156,6 @@ private:
 
   struct Impl;
   Impl* mImpl;
-
-  Size mControlSize;
 };
 
 } // namespace Text