2 * Copyright (c) 2015 Samsung Electronics Co., Ltd.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <dali-toolkit/public-api/text/character-set-conversion.h>
29 const static uint8_t U1 = 1u;
30 const static uint8_t U2 = 2u;
31 const static uint8_t U3 = 3u;
32 const static uint8_t U4 = 4u;
33 const static uint8_t U0 = 0u;
34 const static uint8_t UTF8_LENGTH[256] = {
35 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
36 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
37 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
38 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
39 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
40 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
41 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
42 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
43 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // lead byte = 0xxx xxxx (U+0000 - U+007F + some extended ascii characters)
44 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
45 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
46 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
47 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
48 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
49 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
50 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
51 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
52 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
53 U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
56 U2, U2, U2, U2, U2, U2, U2, U2, U2, U2, //
57 U2, U2, U2, U2, U2, U2, U2, U2, U2, U2, // lead byte = 110x xxxx (U+0080 - U+07FF)
58 U2, U2, U2, U2, U2, U2, U2, U2, U2, U2, //
61 U3, U3, U3, U3, U3, U3, U3, U3, U3, U3, // lead byte = 1110 xxxx (U+0800 - U+FFFF)
62 U3, U3, U3, U3, U3, U3, //
64 U4, U4, U4, U4, U4, U4, U4, U4, // lead byte = 1111 0xxx (U+10000 - U+1FFFFF)
66 U0, U0, U0, U0, // Non valid.
67 U0, U0, U0, U0, // Non valid.
71 uint32_t GetNumberOfUtf8Characters( const uint8_t* const utf8, uint32_t length )
73 uint32_t numberOfCharacters = 0u;
75 const uint8_t* begin = utf8;
76 const uint8_t* end = utf8 + length;
78 for( ; begin < end ; begin += UTF8_LENGTH[*begin], ++numberOfCharacters );
80 return numberOfCharacters;
83 uint32_t GetNumberOfUtf8Bytes( const uint32_t* const utf32, uint32_t numberOfCharacters )
85 uint32_t numberOfBytes = 0u;
87 const uint32_t* begin = utf32;
88 const uint32_t* end = utf32 + numberOfCharacters;
90 for( ; begin < end; ++begin )
92 const uint32_t code = *begin;
98 else if( code < 0x800u )
102 else if( code < 0x10000u )
106 else if( code < 0x200000u )
112 return numberOfBytes;
115 uint32_t Utf8ToUtf32( const uint8_t* const utf8, uint32_t length, uint32_t* utf32 )
117 uint32_t numberOfCharacters = 0u;
119 const uint8_t* begin = utf8;
120 const uint8_t* end = utf8 + length;
122 for( ; begin < end ; ++numberOfCharacters )
124 const uint8_t leadByte = *begin;
125 switch( UTF8_LENGTH[leadByte] )
135 uint32_t& code = *utf32;
136 code = leadByte & 0x1fu;
139 code |= *begin++ & 0x3fu;
144 uint32_t& code = *utf32;
145 code = leadByte & 0x1fu;
148 code |= *begin++ & 0x3fu;
150 code |= *begin++ & 0x3fu;
155 uint32_t& code = *utf32;
156 code = leadByte & 0x1fu;
159 code |= *begin++ & 0x3fu;
161 code |= *begin++ & 0x3fu;
163 code |= *begin++ & 0x3fu;
169 return numberOfCharacters;
172 void Utf32ToUtf8( const uint32_t* const utf32, uint32_t numberOfCharacters, uint8_t* utf8 )
174 const uint32_t* begin = utf32;
175 const uint32_t* end = utf32 + numberOfCharacters;
177 for( ; begin < end; ++begin )
179 const uint32_t code = *begin;
185 else if( code < 0x800u )
187 *utf8++ = static_cast<uint8_t>( code >> 6u ) | 0xc0u; // lead byte for 2 byte sequence
188 *utf8++ = static_cast<uint8_t>( code & 0x3f ) | 0x80u; // continuation byte
190 else if( code < 0x10000u )
192 *utf8++ = static_cast<uint8_t>( code >> 12u ) | 0xe0u; // lead byte for 2 byte sequence
193 *utf8++ = static_cast<uint8_t>( ( code >> 6u ) & 0x3f ) | 0x80u; // continuation byte
194 *utf8++ = static_cast<uint8_t>( code & 0x3f ) | 0x80u; // continuation byte
196 else if( code < 0x200000u )
198 *utf8++ = static_cast<uint8_t>( code >> 18u ) | 0xf0u; // lead byte for 2 byte sequence
199 *utf8++ = static_cast<uint8_t>( ( code >> 12u ) & 0x3f ) | 0x80u; // continuation byte
200 *utf8++ = static_cast<uint8_t>( ( code >> 6u ) & 0x3f ) | 0x80u; // continuation byte
201 *utf8++ = static_cast<uint8_t>( code & 0x3f ) | 0x80u; // continuation byte
206 } // namespace Toolkit