From: Seoyeon Kim Date: Fri, 7 Feb 2020 05:03:46 +0000 (+0900) Subject: Update UTF8 text array for 5 and 6 bytes X-Git-Tag: dali_1.9.1~2^2 X-Git-Url: http://review.tizen.org/git/?p=platform%2Fcore%2Fuifw%2Fdali-toolkit.git;a=commitdiff_plain;h=442e6589f61a2ca0da4b25d36908f11840eaa80d Update UTF8 text array for 5 and 6 bytes - Update start and end offsets encoded in UTF8 - Add U5 and U6 cases to UTF8 array Change-Id: Ic51ee3cc0878c3d09df8d7caeaeff09d7e48cbf7 Signed-off-by: Seoyeon Kim --- diff --git a/automated-tests/src/dali-toolkit-internal/utc-Dali-Text-CharacterSetConversion.cpp b/automated-tests/src/dali-toolkit-internal/utc-Dali-Text-CharacterSetConversion.cpp index 3175baf..f3725d2 100755 --- a/automated-tests/src/dali-toolkit-internal/utc-Dali-Text-CharacterSetConversion.cpp +++ b/automated-tests/src/dali-toolkit-internal/utc-Dali-Text-CharacterSetConversion.cpp @@ -134,6 +134,8 @@ int UtcDaliTextCharacterSetConversionGetUtf8Length(void) const static uint8_t U2 = 2u; const static uint8_t U3 = 3u; const static uint8_t U4 = 4u; + const static uint8_t U5 = 5u; + const static uint8_t U6 = 6u; const static uint8_t U0 = 0u; const static uint8_t UTF8_LENGTH[256] = { U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // @@ -167,8 +169,11 @@ int UtcDaliTextCharacterSetConversionGetUtf8Length(void) U4, U4, U4, U4, U4, U4, U4, U4, // lead byte = 1111 0xxx (U+10000 - U+1FFFFF) - U0, U0, U0, U0, // Non valid. - U0, U0, U0, U0, // Non valid. + U5, U5, U5, U5, // lead byte = 1111 10xx (U+200000 - U+3FFFFFF) + + U6, U6, // lead byte = 1111 110x (U+4000000 - U+7FFFFFFF) + + U0, U0, // Non valid. }; for( unsigned int index = 0; index < 256u; ++index ) @@ -211,8 +216,18 @@ int UtcDaliTextCharacterSetConversionGetNumberOfUtf8Characters(void) "\xF0\x9F\x98\x81 \xF0\x9F\x98\x82 \xF0\x9F\x98\x83 \xF0\x9F\x98\x84", 7u, }, + { + "5 bytes test", + "\xF8\xA0\x80\x80\x80", + 1u, + }, + { + "6 bytes test", + "\xFC\x84\x80\x80\x80\x80", + 1u, + }, }; - const unsigned int numberOfTests = 4u; + const unsigned int numberOfTests = 6u; for( unsigned int index = 0u; index < numberOfTests; ++index ) { @@ -235,6 +250,8 @@ int UtcDaliTextCharacterSetConversionGetNumberOfUtf8Bytes(void) unsigned int utf32_02[] = { 0x645, 0x631, 0x62D, 0x628, 0x627, 0x20, 0x628, 0x627, 0x644, 0x639, 0x627, 0x644, 0x645 }; // مرحبا بالعالم unsigned int utf32_03[] = { 0x939, 0x948, 0x932, 0x94B, 0x20, 0x935, 0x930, 0x94D, 0x932, 0x94D, 0x921 }; // हैलो वर्ल्ड unsigned int utf32_04[] = { 0x1F601, 0x20, 0x1F602, 0x20, 0x1F603, 0x20, 0x1F604 }; // Emojis + unsigned int utf32_05[] = { 0x800000 }; + unsigned int utf32_06[] = { 0x4000000 }; const GetNumberOfUtf8BytesData data[] = { @@ -262,8 +279,20 @@ int UtcDaliTextCharacterSetConversionGetNumberOfUtf8Bytes(void) 7u, 19u, }, + { + "5 bytes test", + utf32_05, + 1u, + 5u, + }, + { + "6 bytes test", + utf32_06, + 1u, + 6u + }, }; - const unsigned int numberOfTests = 4u; + const unsigned int numberOfTests = 6u; for( unsigned int index = 0u; index < numberOfTests; ++index ) { @@ -282,14 +311,16 @@ int UtcDaliTextCharacterSetConversionUtf8ToUtf32(void) ToolkitTestApplication application; tet_infoline(" UtcDaliTextCharacterSetConversionGetNumberOfUtf8Bytes"); - char utf8_06[] = { -8, -7, -6, -5, -4, -3, -2, -1 }; // Invalid string + char utf8_06[] = { -2, -1 }; // Invalid string unsigned int utf32_01[] = { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64 }; // Hello World unsigned int utf32_02[] = { 0xA, 0x20, 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0xA, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64 }; // Hello World + CR and CR+LF unsigned int utf32_03[] = { 0x645, 0x631, 0x62D, 0x628, 0x627, 0x20, 0x628, 0x627, 0x644, 0x639, 0x627, 0x644, 0x645 }; // مرحبا بالعالم unsigned int utf32_04[] = { 0x939, 0x948, 0x932, 0x94B, 0x20, 0x935, 0x930, 0x94D, 0x932, 0x94D, 0x921 }; // हैलो वर्ल्ड unsigned int utf32_05[] = { 0x1F601, 0x20, 0x1F602, 0x20, 0x1F603, 0x20, 0x1F604 }; // Emojis - unsigned int utf32_06[] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; // Invalid string + unsigned int utf32_06[] = { 0x800000 }; + unsigned int utf32_07[] = { 0x4000000 }; + unsigned int utf32_08[] = { 0x20, 0x20 }; // Invalid string const Utf8ToUtf32Data data[] = { @@ -319,12 +350,22 @@ int UtcDaliTextCharacterSetConversionUtf8ToUtf32(void) utf32_05, }, { + "5 bytes test", + "\xF8\xA0\x80\x80\x80", + utf32_06, + }, + { + "6 bytes test", + "\xFC\x84\x80\x80\x80\x80", + utf32_07, + }, + { "Invalid text", utf8_06, - utf32_06, + utf32_08, }, }; - const unsigned int numberOfTests = 6u; + const unsigned int numberOfTests = 8u; for( unsigned int index = 0u; index < numberOfTests; ++index ) { @@ -347,6 +388,8 @@ int UtcDaliTextCharacterSetConversionUtf32ToUtf8(void) unsigned int utf32_02[] = { 0x645, 0x631, 0x62D, 0x628, 0x627, 0x20, 0x628, 0x627, 0x644, 0x639, 0x627, 0x644, 0x645 }; // مرحبا بالعالم unsigned int utf32_03[] = { 0x939, 0x948, 0x932, 0x94B, 0x20, 0x935, 0x930, 0x94D, 0x932, 0x94D, 0x921 }; // हैलो वर्ल्ड unsigned int utf32_04[] = { 0x1F601, 0x20, 0x1F602, 0x20, 0x1F603, 0x20, 0x1F604 }; // Emojis + unsigned int utf32_05[] = { 0x800000 }; + unsigned int utf32_06[] = { 0x4000000 }; struct Utf32ToUtf8Data data[] = { @@ -374,9 +417,21 @@ int UtcDaliTextCharacterSetConversionUtf32ToUtf8(void) 7u, "\xF0\x9F\x98\x81 \xF0\x9F\x98\x82 \xF0\x9F\x98\x83 \xF0\x9F\x98\x84", }, + { + "5 bytes test", + utf32_05, + 1u, + "\xF8\xA0\x80\x80\x80", + }, + { + "6 bytes test", + utf32_06, + 1u, + "\xFC\x84\x80\x80\x80\x80", + }, }; - const unsigned int numberOfTests = 4u; + const unsigned int numberOfTests = 6u; for( unsigned int index = 0u; index < numberOfTests; ++index ) { diff --git a/automated-tests/src/dali-toolkit/dali-toolkit-test-utils/toolkit-input-method-context.cpp b/automated-tests/src/dali-toolkit/dali-toolkit-test-utils/toolkit-input-method-context.cpp index 2446f29..0f74724 100755 --- a/automated-tests/src/dali-toolkit/dali-toolkit-test-utils/toolkit-input-method-context.cpp +++ b/automated-tests/src/dali-toolkit/dali-toolkit-test-utils/toolkit-input-method-context.cpp @@ -60,7 +60,7 @@ public: void ApplyOptions( const InputMethodOptions& options ); bool FilterEventKey( const Dali::KeyEvent& keyEvent ); void SetPreeditStyle( Dali::InputMethodContext::PreeditStyle type ); - void GetPreeditStyle( Vector< Dali::InputMethodContext::PreeditAttributeData >& attrs ) const; + void GetPreeditStyle( Dali::InputMethodContext::PreEditAttributeDataContainer& attrs ) const; public: // Signals ActivatedSignalType& ActivatedSignal() { return mActivatedSignal; } @@ -87,7 +87,7 @@ private: bool mRestoreAfterFocusLost:1; ///< Whether the keyboard needs to be restored (activated ) after focus regained. bool mIdleCallbackConnected:1; ///< Whether the idle callback is already connected. InputMethodOptions mOptions; - Vector< Dali::InputMethodContext::PreeditAttributeData > mPreeditAttrs; ///< Stores preedit attr data + Dali::InputMethodContext::PreEditAttributeDataContainer mPreeditAttrs; ///< Stores preedit attribute data ActivatedSignalType mActivatedSignal; KeyboardEventSignalType mEventSignal; @@ -223,7 +223,7 @@ void InputMethodContext::SetPreeditStyle( Dali::InputMethodContext::PreeditStyle mPreeditAttrs.PushBack( data ); } -void InputMethodContext::GetPreeditStyle( Vector< Dali::InputMethodContext::PreeditAttributeData >& attrs ) const +void InputMethodContext::GetPreeditStyle( Dali::InputMethodContext::PreEditAttributeDataContainer& attrs ) const { attrs = mPreeditAttrs; } @@ -328,7 +328,7 @@ void InputMethodContext::SetPreeditStyle( Dali::InputMethodContext::PreeditStyle Internal::Adaptor::InputMethodContext::GetImplementation(*this).SetPreeditStyle( type ); } -void InputMethodContext::GetPreeditStyle( Vector< Dali::InputMethodContext::PreeditAttributeData >& attrs ) const +void InputMethodContext::GetPreeditStyle( Dali::InputMethodContext::PreEditAttributeDataContainer& attrs ) const { Internal::Adaptor::InputMethodContext::GetImplementation(*this).GetPreeditStyle( attrs ); } diff --git a/automated-tests/src/dali-toolkit/dali-toolkit-test-utils/toolkit-input-method-context.h b/automated-tests/src/dali-toolkit/dali-toolkit-test-utils/toolkit-input-method-context.h index 395ad4d..d53e02a 100755 --- a/automated-tests/src/dali-toolkit/dali-toolkit-test-utils/toolkit-input-method-context.h +++ b/automated-tests/src/dali-toolkit/dali-toolkit-test-utils/toolkit-input-method-context.h @@ -117,9 +117,16 @@ public: */ struct PreeditAttributeData { - PreeditStyle preeditType; /// The preedit style type - unsigned int startIndex; /// The start index of preedit - unsigned int endIndex; /// The end index of preedit + PreeditAttributeData() + : preeditType( PreeditStyle::NONE ), + startIndex( 0 ), + endIndex( 0 ) + { + } + + PreeditStyle preeditType; /// The preedit style type + unsigned int startIndex; /// The start index of preedit + unsigned int endIndex; /// The end index of preedit }; /** @@ -203,6 +210,8 @@ public: typedef Signal< void () > VoidSignalType; typedef Signal< void (bool) > StatusSignalType; + using PreEditAttributeDataContainer = Vector< Dali::InputMethodContext::PreeditAttributeData >; + public: /** @@ -323,11 +332,11 @@ public: void SetPreeditStyle( PreeditStyle type ); /** - * @brief Gets the preedit attrs data. + * @brief Gets the preedit attributes data. * - * @param[out] attrs The preedit attrs data. + * @param[out] attrs The preedit attributes data. */ - void GetPreeditStyle( Vector& attrs ) const; + void GetPreeditStyle( Dali::InputMethodContext::PreEditAttributeDataContainer& attrs ) const; public: diff --git a/dali-toolkit/internal/text/character-set-conversion.cpp b/dali-toolkit/internal/text/character-set-conversion.cpp index d79fbc3..7402292 100644 --- a/dali-toolkit/internal/text/character-set-conversion.cpp +++ b/dali-toolkit/internal/text/character-set-conversion.cpp @@ -33,6 +33,8 @@ namespace const static uint8_t U2 = 2u; const static uint8_t U3 = 3u; const static uint8_t U4 = 4u; + const static uint8_t U5 = 5u; + const static uint8_t U6 = 6u; const static uint8_t U0 = 0u; const static uint8_t UTF8_LENGTH[256] = { U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // @@ -66,8 +68,11 @@ namespace U4, U4, U4, U4, U4, U4, U4, U4, // lead byte = 1111 0xxx (U+10000 - U+1FFFFF) - U0, U0, U0, U0, // Non valid. - U0, U0, U0, U0, // Non valid. + U5, U5, U5, U5, // lead byte = 1111 10xx (U+200000 - U+3FFFFFF) + + U6, U6, // lead byte = 1111 110x (U+4000000 - U+7FFFFFFF) + + U0, U0, // Non valid. }; const uint8_t CR = 0xd; @@ -118,6 +123,14 @@ uint32_t GetNumberOfUtf8Bytes( const uint32_t* const utf32, uint32_t numberOfCha { numberOfBytes += U4; } + else if( code < 0x4000000u ) + { + numberOfBytes += U5; + } + else if( code < 0x80000000u ) + { + numberOfBytes += U6; + } } return numberOfBytes; @@ -197,6 +210,40 @@ uint32_t Utf8ToUtf32( const uint8_t* const utf8, uint32_t length, uint32_t* utf3 break; } + case U5: + { + uint32_t& code = *utf32++; + code = leadByte & 0x03u; + begin++; + code <<= 6u; + code |= *begin++ & 0x3fu; + code <<= 6u; + code |= *begin++ & 0x3fu; + code <<= 6u; + code |= *begin++ & 0x3fu; + code <<= 6u; + code |= *begin++ & 0x3fu; + break; + } + + case U6: + { + uint32_t& code = *utf32++; + code = leadByte & 0x01u; + begin++; + code <<= 6u; + code |= *begin++ & 0x3fu; + code <<= 6u; + code |= *begin++ & 0x3fu; + code <<= 6u; + code |= *begin++ & 0x3fu; + code <<= 6u; + code |= *begin++ & 0x3fu; + code <<= 6u; + code |= *begin++ & 0x3fu; + break; + } + case U0: // Invalid case { begin++; @@ -231,13 +278,30 @@ uint32_t Utf32ToUtf8( const uint32_t* const utf32, uint32_t numberOfCharacters, } else if( code < 0x10000u ) { - *utf8++ = static_cast( code >> 12u ) | 0xe0u; // lead byte for 2 byte sequence + *utf8++ = static_cast( code >> 12u ) | 0xe0u; // lead byte for 3 byte sequence *utf8++ = static_cast( ( code >> 6u ) & 0x3f ) | 0x80u; // continuation byte *utf8++ = static_cast( code & 0x3f ) | 0x80u; // continuation byte } else if( code < 0x200000u ) { - *utf8++ = static_cast( code >> 18u ) | 0xf0u; // lead byte for 2 byte sequence + *utf8++ = static_cast( code >> 18u ) | 0xf0u; // lead byte for 4 byte sequence + *utf8++ = static_cast( ( code >> 12u ) & 0x3f ) | 0x80u; // continuation byte + *utf8++ = static_cast( ( code >> 6u ) & 0x3f ) | 0x80u; // continuation byte + *utf8++ = static_cast( code & 0x3f ) | 0x80u; // continuation byte + } + else if( code < 0x4000000u ) + { + *utf8++ = static_cast( code >> 24u ) | 0xf8u; // lead byte for 5 byte sequence + *utf8++ = static_cast( ( code >> 18u ) & 0x3f ) | 0x80u; // continuation byte + *utf8++ = static_cast( ( code >> 12u ) & 0x3f ) | 0x80u; // continuation byte + *utf8++ = static_cast( ( code >> 6u ) & 0x3f ) | 0x80u; // continuation byte + *utf8++ = static_cast( code & 0x3f ) | 0x80u; // continuation byte + } + else if( code < 0x80000000u ) + { + *utf8++ = static_cast( code >> 30u ) | 0xfcu; // lead byte for 6 byte sequence + *utf8++ = static_cast( ( code >> 24u ) & 0x3f ) | 0x80u; // continuation byte + *utf8++ = static_cast( ( code >> 18u ) & 0x3f ) | 0x80u; // continuation byte *utf8++ = static_cast( ( code >> 12u ) & 0x3f ) | 0x80u; // continuation byte *utf8++ = static_cast( ( code >> 6u ) & 0x3f ) | 0x80u; // continuation byte *utf8++ = static_cast( code & 0x3f ) | 0x80u; // continuation byte diff --git a/dali-toolkit/internal/text/text-controller-impl.cpp b/dali-toolkit/internal/text/text-controller-impl.cpp index c39d5e8..8f4de65 100755 --- a/dali-toolkit/internal/text/text-controller-impl.cpp +++ b/dali-toolkit/internal/text/text-controller-impl.cpp @@ -1086,16 +1086,16 @@ bool Controller::Impl::UpdateModel( OperationsMask operationsRequired ) mEventData->mPreEditFlag && ( 0u != mModel->mVisualModel->mCharactersToGlyph.Count() ) ) { - Vector< Dali::InputMethodContext::PreeditAttributeData > attrs; + Dali::InputMethodContext::PreEditAttributeDataContainer attrs; mEventData->mInputMethodContext.GetPreeditStyle( attrs ); Dali::InputMethodContext::PreeditStyle type = Dali::InputMethodContext::PreeditStyle::NONE; // Check the type of preedit and run it. - for( Vector::Iterator it = attrs.Begin(), endIt = attrs.End(); it != endIt; it++ ) + for( Dali::InputMethodContext::PreEditAttributeDataContainer::Iterator it = attrs.Begin(), endIt = attrs.End(); it != endIt; it++ ) { Dali::InputMethodContext::PreeditAttributeData attrData = *it; DALI_LOG_INFO( gLogFilter, Debug::General, "Controller::UpdateModel PreeditStyle type : %d start %d end %d \n", attrData.preeditType, attrData.startIndex, attrData.endIndex ); - type = attrData.preeditType; + type = attrData.preeditType; // Check the number of commit characters for the start position. unsigned int numberOfCommit = mEventData->mPrimaryCursorPosition - mEventData->mPreEditLength; @@ -1108,7 +1108,7 @@ bool Controller::Impl::UpdateModel( OperationsMask operationsRequired ) // Add the underline for the pre-edit text. GlyphRun underlineRun; underlineRun.glyphIndex = attrData.startIndex + numberOfCommit; - underlineRun.numberOfGlyphs = attrData.endIndex - attrData.startIndex; + underlineRun.numberOfGlyphs = numberOfIndices; mModel->mVisualModel->mUnderlineRuns.PushBack( underlineRun ); break; } @@ -1152,7 +1152,7 @@ bool Controller::Impl::UpdateModel( OperationsMask operationsRequired ) GlyphRun underlineRun; underlineRun.glyphIndex = attrData.startIndex + numberOfCommit; - underlineRun.numberOfGlyphs = attrData.endIndex - attrData.startIndex; + underlineRun.numberOfGlyphs = numberOfIndices; mModel->mVisualModel->mUnderlineRuns.PushBack( underlineRun ); break; } @@ -1167,7 +1167,7 @@ bool Controller::Impl::UpdateModel( OperationsMask operationsRequired ) GlyphRun underlineRun; underlineRun.glyphIndex = attrData.startIndex + numberOfCommit; - underlineRun.numberOfGlyphs = attrData.endIndex - attrData.startIndex; + underlineRun.numberOfGlyphs = numberOfIndices; mModel->mVisualModel->mUnderlineRuns.PushBack( underlineRun ); break; } @@ -1182,7 +1182,7 @@ bool Controller::Impl::UpdateModel( OperationsMask operationsRequired ) GlyphRun underlineRun; underlineRun.glyphIndex = attrData.startIndex + numberOfCommit; - underlineRun.numberOfGlyphs = attrData.endIndex - attrData.startIndex; + underlineRun.numberOfGlyphs = numberOfIndices; mModel->mVisualModel->mUnderlineRuns.PushBack( underlineRun ); break; } @@ -1197,7 +1197,7 @@ bool Controller::Impl::UpdateModel( OperationsMask operationsRequired ) GlyphRun underlineRun; underlineRun.glyphIndex = attrData.startIndex + numberOfCommit; - underlineRun.numberOfGlyphs = attrData.endIndex - attrData.startIndex; + underlineRun.numberOfGlyphs = numberOfIndices; mModel->mVisualModel->mUnderlineRuns.PushBack( underlineRun ); break; }