X-Git-Url: http://review.tizen.org/git/?p=platform%2Fcore%2Fuifw%2Fdali-toolkit.git;a=blobdiff_plain;f=dali-toolkit%2Finternal%2Ftext%2Fcharacter-set-conversion.cpp;h=74022925f6a7a49b3e437fa17cae9ab727107f26;hp=b1b9993b3c5e06f74cdc5ce7024c863d15e908c5;hb=528aa3699cd51dab5115bca1aaebb65d4bc67c15;hpb=61cdd0ef6c35b82605b7313845e5dc84b707074f diff --git a/dali-toolkit/internal/text/character-set-conversion.cpp b/dali-toolkit/internal/text/character-set-conversion.cpp index b1b9993..7402292 100644 --- a/dali-toolkit/internal/text/character-set-conversion.cpp +++ b/dali-toolkit/internal/text/character-set-conversion.cpp @@ -24,12 +24,17 @@ namespace Dali namespace Toolkit { +namespace Text +{ + namespace { const static uint8_t U1 = 1u; const static uint8_t U2 = 2u; const static uint8_t U3 = 3u; const static uint8_t U4 = 4u; + const static uint8_t U5 = 5u; + const static uint8_t U6 = 6u; const static uint8_t U0 = 0u; const static uint8_t UTF8_LENGTH[256] = { U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // @@ -63,11 +68,22 @@ namespace U4, U4, U4, U4, U4, U4, U4, U4, // lead byte = 1111 0xxx (U+10000 - U+1FFFFF) - U0, U0, U0, U0, // Non valid. - U0, U0, U0, U0, // Non valid. + U5, U5, U5, U5, // lead byte = 1111 10xx (U+200000 - U+3FFFFFF) + + U6, U6, // lead byte = 1111 110x (U+4000000 - U+7FFFFFFF) + + U0, U0, // Non valid. }; + + const uint8_t CR = 0xd; + const uint8_t LF = 0xa; } // namespace +uint8_t GetUtf8Length( uint8_t utf8LeadByte ) +{ + return UTF8_LENGTH[utf8LeadByte]; +} + uint32_t GetNumberOfUtf8Characters( const uint8_t* const utf8, uint32_t length ) { uint32_t numberOfCharacters = 0u; @@ -107,6 +123,14 @@ uint32_t GetNumberOfUtf8Bytes( const uint32_t* const utf32, uint32_t numberOfCha { numberOfBytes += U4; } + else if( code < 0x4000000u ) + { + numberOfBytes += U5; + } + else if( code < 0x80000000u ) + { + numberOfBytes += U6; + } } return numberOfBytes; @@ -127,8 +151,26 @@ uint32_t Utf8ToUtf32( const uint8_t* const utf8, uint32_t length, uint32_t* utf3 { case U1: { - *utf32++ = leadByte; - begin++; + if( CR == leadByte ) + { + // Replace CR+LF or CR by LF + *utf32++ = LF; + + // Look ahead if the next one is a LF. + ++begin; + if( begin < end ) + { + if( LF == *begin ) + { + ++begin; + } + } + } + else + { + *utf32++ = leadByte; + begin++; + } break; } @@ -167,6 +209,47 @@ uint32_t Utf8ToUtf32( const uint8_t* const utf8, uint32_t length, uint32_t* utf3 code |= *begin++ & 0x3fu; break; } + + case U5: + { + uint32_t& code = *utf32++; + code = leadByte & 0x03u; + begin++; + code <<= 6u; + code |= *begin++ & 0x3fu; + code <<= 6u; + code |= *begin++ & 0x3fu; + code <<= 6u; + code |= *begin++ & 0x3fu; + code <<= 6u; + code |= *begin++ & 0x3fu; + break; + } + + case U6: + { + uint32_t& code = *utf32++; + code = leadByte & 0x01u; + begin++; + code <<= 6u; + code |= *begin++ & 0x3fu; + code <<= 6u; + code |= *begin++ & 0x3fu; + code <<= 6u; + code |= *begin++ & 0x3fu; + code <<= 6u; + code |= *begin++ & 0x3fu; + code <<= 6u; + code |= *begin++ & 0x3fu; + break; + } + + case U0: // Invalid case + { + begin++; + *utf32++ = 0x20; // Use white space + break; + } } } @@ -195,13 +278,30 @@ uint32_t Utf32ToUtf8( const uint32_t* const utf32, uint32_t numberOfCharacters, } else if( code < 0x10000u ) { - *utf8++ = static_cast( code >> 12u ) | 0xe0u; // lead byte for 2 byte sequence + *utf8++ = static_cast( code >> 12u ) | 0xe0u; // lead byte for 3 byte sequence *utf8++ = static_cast( ( code >> 6u ) & 0x3f ) | 0x80u; // continuation byte *utf8++ = static_cast( code & 0x3f ) | 0x80u; // continuation byte } else if( code < 0x200000u ) { - *utf8++ = static_cast( code >> 18u ) | 0xf0u; // lead byte for 2 byte sequence + *utf8++ = static_cast( code >> 18u ) | 0xf0u; // lead byte for 4 byte sequence + *utf8++ = static_cast( ( code >> 12u ) & 0x3f ) | 0x80u; // continuation byte + *utf8++ = static_cast( ( code >> 6u ) & 0x3f ) | 0x80u; // continuation byte + *utf8++ = static_cast( code & 0x3f ) | 0x80u; // continuation byte + } + else if( code < 0x4000000u ) + { + *utf8++ = static_cast( code >> 24u ) | 0xf8u; // lead byte for 5 byte sequence + *utf8++ = static_cast( ( code >> 18u ) & 0x3f ) | 0x80u; // continuation byte + *utf8++ = static_cast( ( code >> 12u ) & 0x3f ) | 0x80u; // continuation byte + *utf8++ = static_cast( ( code >> 6u ) & 0x3f ) | 0x80u; // continuation byte + *utf8++ = static_cast( code & 0x3f ) | 0x80u; // continuation byte + } + else if( code < 0x80000000u ) + { + *utf8++ = static_cast( code >> 30u ) | 0xfcu; // lead byte for 6 byte sequence + *utf8++ = static_cast( ( code >> 24u ) & 0x3f ) | 0x80u; // continuation byte + *utf8++ = static_cast( ( code >> 18u ) & 0x3f ) | 0x80u; // continuation byte *utf8++ = static_cast( ( code >> 12u ) & 0x3f ) | 0x80u; // continuation byte *utf8++ = static_cast( ( code >> 6u ) & 0x3f ) | 0x80u; // continuation byte *utf8++ = static_cast( code & 0x3f ) | 0x80u; // continuation byte @@ -222,6 +322,8 @@ void Utf32ToUtf8( const uint32_t* const utf32, uint32_t numberOfCharacters, std: Utf32ToUtf8( utf32, numberOfCharacters, reinterpret_cast(&utf8[0]) ); } +} // namespace Text + } // namespace Toolkit } // namespace Dali