From 4d39084f8e8479ee521dcaae09aa94e76d965d98 Mon Sep 17 00:00:00 2001
From: Victor Cebollada <v.cebollada@samsung.com>
Date: Thu, 16 Jul 2015 13:54:46 +0100
Subject: [PATCH] Convert CR or CR+LF to LF.

Change-Id: Icd14045ddd7f74f690fcacbffe8ad33ba1f5fbd2
Signed-off-by: Victor Cebollada <v.cebollada@samsung.com>
---
 .../utc-Dali-Text-CharacterSetConversion.cpp       | 20 +++++++++++------
 .../internal/text/character-set-conversion.cpp     | 25 ++++++++++++++++++++--
 .../internal/text/character-set-conversion.h       |  5 +++++
 dali-toolkit/internal/text/markup-processor.cpp    | 25 ++++++----------------
 .../text-field.md                                  |  2 ++
 .../text-label.md                                  |  2 ++
 6 files changed, 52 insertions(+), 27 deletions(-)

diff --git a/automated-tests/src/dali-toolkit-internal/utc-Dali-Text-CharacterSetConversion.cpp b/automated-tests/src/dali-toolkit-internal/utc-Dali-Text-CharacterSetConversion.cpp
index 9f816ef..fd77e0d 100644
--- a/automated-tests/src/dali-toolkit-internal/utc-Dali-Text-CharacterSetConversion.cpp
+++ b/automated-tests/src/dali-toolkit-internal/utc-Dali-Text-CharacterSetConversion.cpp
@@ -221,9 +221,10 @@ int UtcDaliTextCharacterSetConversionUtf8ToUtf32(void)
   tet_infoline(" UtcDaliTextCharacterSetConversionGetNumberOfUtf8Bytes");
 
   unsigned int utf32_01[] = { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64 }; // Hello World
-  unsigned int utf32_02[] = { 0x645, 0x631, 0x62D, 0x628, 0x627, 0x20, 0x628, 0x627, 0x644, 0x639, 0x627, 0x644, 0x645 }; // ÙØ±Ø­Ø¨Ø§ Ø¨Ø§ÙØ¹Ø§ÙÙ
-  unsigned int utf32_03[] = { 0x939, 0x948, 0x932, 0x94B, 0x20, 0x935, 0x930, 0x94D, 0x932, 0x94D, 0x921 }; // à¤¹à¥à¤²à¥ à¤µà¤°à¥à¤²à¥à¤¡
-  unsigned int utf32_04[] = { 0x1F601, 0x20, 0x1F602, 0x20, 0x1F603, 0x20, 0x1F604 }; // Emojis
+  unsigned int utf32_02[] = { 0xA, 0x20, 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0xA, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64 }; // Hello World + CR and CR+LF
+  unsigned int utf32_03[] = { 0x645, 0x631, 0x62D, 0x628, 0x627, 0x20, 0x628, 0x627, 0x644, 0x639, 0x627, 0x644, 0x645 }; // ÙØ±Ø­Ø¨Ø§ Ø¨Ø§ÙØ¹Ø§ÙÙ
+  unsigned int utf32_04[] = { 0x939, 0x948, 0x932, 0x94B, 0x20, 0x935, 0x930, 0x94D, 0x932, 0x94D, 0x921 }; // à¤¹à¥à¤²à¥ à¤µà¤°à¥à¤²à¥à¤¡
+  unsigned int utf32_05[] = { 0x1F601, 0x20, 0x1F602, 0x20, 0x1F603, 0x20, 0x1F604 }; // Emojis
 
   const Utf8ToUtf32Data data[] =
   {
@@ -233,22 +234,27 @@ int UtcDaliTextCharacterSetConversionUtf8ToUtf32(void)
       utf32_01,
     },
     {
+      "Latin script with 'CR' and 'CR'+'LF'",
+      "\xd Hello\xd\xa World",
+      utf32_02,
+    },
+    {
       "Arabic script",
       "ÙØ±Ø­Ø¨Ø§ Ø¨Ø§ÙØ¹Ø§ÙÙ",
-      utf32_02,
+      utf32_03,
     },
     {
       "Devanagari script",
       "à¤¹à¥à¤²à¥ à¤µà¤°à¥à¤²à¥à¤¡",
-      utf32_03,
+      utf32_04,
     },
     {
       "Emojis",
       "\xF0\x9F\x98\x81 \xF0\x9F\x98\x82 \xF0\x9F\x98\x83 \xF0\x9F\x98\x84",
-      utf32_04,
+      utf32_05,
     },
   };
-  const unsigned int numberOfTests = 4u;
+  const unsigned int numberOfTests = 5u;
 
   for( unsigned int index = 0u; index < numberOfTests; ++index )
   {
diff --git a/dali-toolkit/internal/text/character-set-conversion.cpp b/dali-toolkit/internal/text/character-set-conversion.cpp
index ac8b317..bd72dc1 100644
--- a/dali-toolkit/internal/text/character-set-conversion.cpp
+++ b/dali-toolkit/internal/text/character-set-conversion.cpp
@@ -69,6 +69,9 @@ namespace
     U0, U0, U0, U0,                         // Non valid.
     U0, U0, U0, U0,                         // Non valid.
   };
+
+  const uint8_t CR = 0xd;
+  const uint8_t LF = 0xa;
 } // namespace
 
 uint8_t GetUtf8Length( uint8_t utf8LeadByte )
@@ -135,8 +138,26 @@ uint32_t Utf8ToUtf32( const uint8_t* const utf8, uint32_t length, uint32_t* utf3
     {
       case U1:
       {
-        *utf32++ = leadByte;
-        begin++;
+        if( CR == leadByte )
+        {
+          // Replace CR+LF or CR by LF
+          *utf32++ = LF;
+
+          // Look ahead if the next one is a LF.
+          ++begin;
+          if( begin < end )
+          {
+            if( LF == *begin )
+            {
+              ++begin;
+            }
+          }
+        }
+        else
+        {
+          *utf32++ = leadByte;
+          begin++;
+        }
         break;
       }
 
diff --git a/dali-toolkit/internal/text/character-set-conversion.h b/dali-toolkit/internal/text/character-set-conversion.h
index 730016b..e2306fe 100644
--- a/dali-toolkit/internal/text/character-set-conversion.h
+++ b/dali-toolkit/internal/text/character-set-conversion.h
@@ -65,6 +65,11 @@ uint32_t GetNumberOfUtf8Bytes( const uint32_t* const utf32, uint32_t numberOfCha
  *
  * The @p utf32 buffer needs to be big enough to store all the characters.
  *
+ * If the text contains a single 'CR' character or a pair 'CR'+'LF', they are replaced by a 'LF'.
+ *
+ * @note GetNumberOfUtf8Characters() does not convert 'CR' or 'CR'+'LF' to 'LF' so the return number
+ * of characters of that method may be higher than the number of characters returned by this one.
+ *
  * @param[in] utf8 The pointer to the UTF8 array.
  * @param[in] length The length of the UTF8 array.
  * @param[out] utf32 The pointer to the UTF32 array.
diff --git a/dali-toolkit/internal/text/markup-processor.cpp b/dali-toolkit/internal/text/markup-processor.cpp
index 53ccf0b..f8ecde3 100644
--- a/dali-toolkit/internal/text/markup-processor.cpp
+++ b/dali-toolkit/internal/text/markup-processor.cpp
@@ -47,16 +47,14 @@ const std::string XHTML_SHADOW_TAG("shadow");
 const std::string XHTML_GLOW_TAG("glow");
 const std::string XHTML_OUTLINE_TAG("outline");
 
-const char LESS_THAN         = '<';
-const char GREATER_THAN      = '>';
-const char EQUAL             = '=';
-const char QUOTATION_MARK    = '\'';
-const char LINE_SEPARATOR_CR = 0x0D; // Carriage return character  CR
-const char LINE_SEPARATOR_LF = 0x0A; // New line character         LF
-const char SLASH             = '/';
-const char BACK_SLASH        = '\\';
+const char LESS_THAN      = '<';
+const char GREATER_THAN   = '>';
+const char EQUAL          = '=';
+const char QUOTATION_MARK = '\'';
+const char SLASH          = '/';
+const char BACK_SLASH     = '\\';
 
-const char WHITE_SPACE       = 0x20; // ASCII value of the white space.
+const char WHITE_SPACE    = 0x20; // ASCII value of the white space.
 
 const unsigned int MAX_NUM_OF_ATTRIBUTES =  5u; ///< The font tag has the 'family', 'size' 'weight', 'width' and 'slant' attrubutes.
 const unsigned int DEFAULT_VECTOR_SIZE   = 16u; ///< Default size of run vectors.
@@ -561,15 +559,6 @@ void ProcessMarkupString( const std::string& markupString, MarkupProcessData& ma
           ++markupStringBuffer;
         }
       }
-      else if( ( LINE_SEPARATOR_CR == character ) && ( markupStringBuffer + 1u < markupStringEndBuffer ) )
-      {
-        // Replacing CR+LF end line by LF.
-        if( LINE_SEPARATOR_LF == *( markupStringBuffer + 1u ) )
-        {
-          character = LINE_SEPARATOR_LF;
-          ++markupStringBuffer;
-        }
-      }
 
       const unsigned char numberOfBytes = GetUtf8Length( character );
 
diff --git a/docs/content/shared-javascript-and-cpp-documentation/text-field.md b/docs/content/shared-javascript-and-cpp-documentation/text-field.md
index c06c434..76cea4c 100644
--- a/docs/content/shared-javascript-and-cpp-documentation/text-field.md
+++ b/docs/content/shared-javascript-and-cpp-documentation/text-field.md
@@ -13,6 +13,8 @@ Before any text has been entered, the TextField can display some placeholder tex
 An alternative placeholder can be displayed when the TextField has keyboard focus.
 For example a TextField used to enter a username could initially show "Unknown Name", and then show "Enter Name." when the cursor is shown.
 
+Note *CR+LF* new line characters are replaced by a *LF* one.
+
 ~~~{.cpp}
 // C++
 
diff --git a/docs/content/shared-javascript-and-cpp-documentation/text-label.md b/docs/content/shared-javascript-and-cpp-documentation/text-label.md
index 13d2ab0..5247e92 100644
--- a/docs/content/shared-javascript-and-cpp-documentation/text-label.md
+++ b/docs/content/shared-javascript-and-cpp-documentation/text-label.md
@@ -12,6 +12,8 @@ Text labels are lightweight, non-editable and do not respond to user input.
 
 To display a TextLabel the TEXT property must be set using a UTF-8 string.
 
+Note *CR+LF* new line characters are replaced by a *LF* one.
+
 ~~~{.cpp}
 // C++
 
-- 
2.7.4