From 4d39084f8e8479ee521dcaae09aa94e76d965d98 Mon Sep 17 00:00:00 2001 From: Victor Cebollada Date: Thu, 16 Jul 2015 13:54:46 +0100 Subject: [PATCH] Convert CR or CR+LF to LF. Change-Id: Icd14045ddd7f74f690fcacbffe8ad33ba1f5fbd2 Signed-off-by: Victor Cebollada --- .../utc-Dali-Text-CharacterSetConversion.cpp | 20 +++++++++++------ .../internal/text/character-set-conversion.cpp | 25 ++++++++++++++++++++-- .../internal/text/character-set-conversion.h | 5 +++++ dali-toolkit/internal/text/markup-processor.cpp | 25 ++++++---------------- .../text-field.md | 2 ++ .../text-label.md | 2 ++ 6 files changed, 52 insertions(+), 27 deletions(-) diff --git a/automated-tests/src/dali-toolkit-internal/utc-Dali-Text-CharacterSetConversion.cpp b/automated-tests/src/dali-toolkit-internal/utc-Dali-Text-CharacterSetConversion.cpp index 9f816ef..fd77e0d 100644 --- a/automated-tests/src/dali-toolkit-internal/utc-Dali-Text-CharacterSetConversion.cpp +++ b/automated-tests/src/dali-toolkit-internal/utc-Dali-Text-CharacterSetConversion.cpp @@ -221,9 +221,10 @@ int UtcDaliTextCharacterSetConversionUtf8ToUtf32(void) tet_infoline(" UtcDaliTextCharacterSetConversionGetNumberOfUtf8Bytes"); unsigned int utf32_01[] = { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64 }; // Hello World - unsigned int utf32_02[] = { 0x645, 0x631, 0x62D, 0x628, 0x627, 0x20, 0x628, 0x627, 0x644, 0x639, 0x627, 0x644, 0x645 }; // مرحبا بالعالم - unsigned int utf32_03[] = { 0x939, 0x948, 0x932, 0x94B, 0x20, 0x935, 0x930, 0x94D, 0x932, 0x94D, 0x921 }; // हैलो वर्ल्ड - unsigned int utf32_04[] = { 0x1F601, 0x20, 0x1F602, 0x20, 0x1F603, 0x20, 0x1F604 }; // Emojis + unsigned int utf32_02[] = { 0xA, 0x20, 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0xA, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64 }; // Hello World + CR and CR+LF + unsigned int utf32_03[] = { 0x645, 0x631, 0x62D, 0x628, 0x627, 0x20, 0x628, 0x627, 0x644, 0x639, 0x627, 0x644, 0x645 }; // مرحبا بالعالم + unsigned int utf32_04[] = { 0x939, 0x948, 0x932, 0x94B, 0x20, 0x935, 0x930, 0x94D, 0x932, 0x94D, 0x921 }; // हैलो वर्ल्ड + unsigned int utf32_05[] = { 0x1F601, 0x20, 0x1F602, 0x20, 0x1F603, 0x20, 0x1F604 }; // Emojis const Utf8ToUtf32Data data[] = { @@ -233,22 +234,27 @@ int UtcDaliTextCharacterSetConversionUtf8ToUtf32(void) utf32_01, }, { + "Latin script with 'CR' and 'CR'+'LF'", + "\xd Hello\xd\xa World", + utf32_02, + }, + { "Arabic script", "مرحبا بالعالم", - utf32_02, + utf32_03, }, { "Devanagari script", "हैलो वर्ल्ड", - utf32_03, + utf32_04, }, { "Emojis", "\xF0\x9F\x98\x81 \xF0\x9F\x98\x82 \xF0\x9F\x98\x83 \xF0\x9F\x98\x84", - utf32_04, + utf32_05, }, }; - const unsigned int numberOfTests = 4u; + const unsigned int numberOfTests = 5u; for( unsigned int index = 0u; index < numberOfTests; ++index ) { diff --git a/dali-toolkit/internal/text/character-set-conversion.cpp b/dali-toolkit/internal/text/character-set-conversion.cpp index ac8b317..bd72dc1 100644 --- a/dali-toolkit/internal/text/character-set-conversion.cpp +++ b/dali-toolkit/internal/text/character-set-conversion.cpp @@ -69,6 +69,9 @@ namespace U0, U0, U0, U0, // Non valid. U0, U0, U0, U0, // Non valid. }; + + const uint8_t CR = 0xd; + const uint8_t LF = 0xa; } // namespace uint8_t GetUtf8Length( uint8_t utf8LeadByte ) @@ -135,8 +138,26 @@ uint32_t Utf8ToUtf32( const uint8_t* const utf8, uint32_t length, uint32_t* utf3 { case U1: { - *utf32++ = leadByte; - begin++; + if( CR == leadByte ) + { + // Replace CR+LF or CR by LF + *utf32++ = LF; + + // Look ahead if the next one is a LF. + ++begin; + if( begin < end ) + { + if( LF == *begin ) + { + ++begin; + } + } + } + else + { + *utf32++ = leadByte; + begin++; + } break; } diff --git a/dali-toolkit/internal/text/character-set-conversion.h b/dali-toolkit/internal/text/character-set-conversion.h index 730016b..e2306fe 100644 --- a/dali-toolkit/internal/text/character-set-conversion.h +++ b/dali-toolkit/internal/text/character-set-conversion.h @@ -65,6 +65,11 @@ uint32_t GetNumberOfUtf8Bytes( const uint32_t* const utf32, uint32_t numberOfCha * * The @p utf32 buffer needs to be big enough to store all the characters. * + * If the text contains a single 'CR' character or a pair 'CR'+'LF', they are replaced by a 'LF'. + * + * @note GetNumberOfUtf8Characters() does not convert 'CR' or 'CR'+'LF' to 'LF' so the return number + * of characters of that method may be higher than the number of characters returned by this one. + * * @param[in] utf8 The pointer to the UTF8 array. * @param[in] length The length of the UTF8 array. * @param[out] utf32 The pointer to the UTF32 array. diff --git a/dali-toolkit/internal/text/markup-processor.cpp b/dali-toolkit/internal/text/markup-processor.cpp index 53ccf0b..f8ecde3 100644 --- a/dali-toolkit/internal/text/markup-processor.cpp +++ b/dali-toolkit/internal/text/markup-processor.cpp @@ -47,16 +47,14 @@ const std::string XHTML_SHADOW_TAG("shadow"); const std::string XHTML_GLOW_TAG("glow"); const std::string XHTML_OUTLINE_TAG("outline"); -const char LESS_THAN = '<'; -const char GREATER_THAN = '>'; -const char EQUAL = '='; -const char QUOTATION_MARK = '\''; -const char LINE_SEPARATOR_CR = 0x0D; // Carriage return character CR -const char LINE_SEPARATOR_LF = 0x0A; // New line character LF -const char SLASH = '/'; -const char BACK_SLASH = '\\'; +const char LESS_THAN = '<'; +const char GREATER_THAN = '>'; +const char EQUAL = '='; +const char QUOTATION_MARK = '\''; +const char SLASH = '/'; +const char BACK_SLASH = '\\'; -const char WHITE_SPACE = 0x20; // ASCII value of the white space. +const char WHITE_SPACE = 0x20; // ASCII value of the white space. const unsigned int MAX_NUM_OF_ATTRIBUTES = 5u; ///< The font tag has the 'family', 'size' 'weight', 'width' and 'slant' attrubutes. const unsigned int DEFAULT_VECTOR_SIZE = 16u; ///< Default size of run vectors. @@ -561,15 +559,6 @@ void ProcessMarkupString( const std::string& markupString, MarkupProcessData& ma ++markupStringBuffer; } } - else if( ( LINE_SEPARATOR_CR == character ) && ( markupStringBuffer + 1u < markupStringEndBuffer ) ) - { - // Replacing CR+LF end line by LF. - if( LINE_SEPARATOR_LF == *( markupStringBuffer + 1u ) ) - { - character = LINE_SEPARATOR_LF; - ++markupStringBuffer; - } - } const unsigned char numberOfBytes = GetUtf8Length( character ); diff --git a/docs/content/shared-javascript-and-cpp-documentation/text-field.md b/docs/content/shared-javascript-and-cpp-documentation/text-field.md index c06c434..76cea4c 100644 --- a/docs/content/shared-javascript-and-cpp-documentation/text-field.md +++ b/docs/content/shared-javascript-and-cpp-documentation/text-field.md @@ -13,6 +13,8 @@ Before any text has been entered, the TextField can display some placeholder tex An alternative placeholder can be displayed when the TextField has keyboard focus. For example a TextField used to enter a username could initially show "Unknown Name", and then show "Enter Name." when the cursor is shown. +Note *CR+LF* new line characters are replaced by a *LF* one. + ~~~{.cpp} // C++ diff --git a/docs/content/shared-javascript-and-cpp-documentation/text-label.md b/docs/content/shared-javascript-and-cpp-documentation/text-label.md index 13d2ab0..5247e92 100644 --- a/docs/content/shared-javascript-and-cpp-documentation/text-label.md +++ b/docs/content/shared-javascript-and-cpp-documentation/text-label.md @@ -12,6 +12,8 @@ Text labels are lightweight, non-editable and do not respond to user input. To display a TextLabel the TEXT property must be set using a UTF-8 string. +Note *CR+LF* new line characters are replaced by a *LF* one. + ~~~{.cpp} // C++ -- 2.7.4