2 // Copyright (c) 2014 Samsung Electronics Co., Ltd.
4 // Licensed under the Flora License, Version 1.0 (the License);
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://floralicense.org/license/
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an AS IS BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
18 #include <dali/internal/event/text/utf8-impl.h>
21 #include <dali/integration-api/debug.h>
29 size_t UTF8SequenceLength(const unsigned char leadByte)
33 if ((leadByte & 0x80) == 0 ) //ASCII character (lead bit zero)
37 else if (( leadByte & 0xe0 ) == 0xc0 ) //110x xxxx
41 else if (( leadByte & 0xf0 ) == 0xe0 ) //1110 xxxx
45 else if (( leadByte & 0xf8 ) == 0xf0 ) //1111 0xxx
51 DALI_LOG_WARNING("Unrecognized lead byte %c\n", leadByte);
57 uint32_t UTF8Read(const unsigned char* utf8Data, const size_t sequenceLength)
61 if (sequenceLength == 1)
65 else if (sequenceLength == 2)
67 code = *utf8Data++ & 0x1f;
69 code |= *utf8Data & 0x3f;
71 else if (sequenceLength == 3)
73 code = *utf8Data++ & 0x0f;
75 code |= *utf8Data++ & 0x3f;
77 code |= *utf8Data & 0x3f;
79 else if (sequenceLength == 4)
81 code = *utf8Data++ & 0x07;
83 code |= *utf8Data++ & 0x3f;
85 code |= *utf8Data++ & 0x3f;
87 code |= *utf8Data & 0x3f;
93 size_t UTF8Write(const uint32_t code, unsigned char* utf8Data)
95 size_t sequenceLength = 0;
101 else if (code < 0x800u)
103 *utf8Data++ = static_cast<uint8_t>( code >> 6) | 0xc0; // lead byte for 2 byte sequence
104 *utf8Data = static_cast<uint8_t>( code & 0x3f) | 0x80; // continuation byte
107 else if (code < 0x10000u)
109 *utf8Data++ = static_cast<uint8_t>( code >> 12) | 0xe0; // lead byte for 2 byte sequence
110 *utf8Data++ = static_cast<uint8_t>((code >> 6) & 0x3f) | 0x80; // continuation byte
111 *utf8Data = static_cast<uint8_t>( code & 0x3f) | 0x80; // continuation byte
114 else if (code < 0x200000u)
116 *utf8Data++ = static_cast<uint8_t>( code >> 18) | 0xf0; // lead byte for 2 byte sequence
117 *utf8Data++ = static_cast<uint8_t>((code >> 12) & 0x3f) | 0x80; // continuation byte
118 *utf8Data++ = static_cast<uint8_t>((code >> 6) & 0x3f) | 0x80; // continuation byte
119 *utf8Data = static_cast<uint8_t>( code & 0x3f) | 0x80; // continuation byte
123 return sequenceLength;
126 size_t UTF8Tokenize(const unsigned char* utf8Data, const size_t utf8DataLength, TextArray& tokens)
128 size_t dataLength = utf8DataLength;
133 size_t sequenceLength = UTF8SequenceLength(*utf8Data);
140 if (sequenceLength > dataLength)
146 code = UTF8Read(utf8Data, sequenceLength);
147 tokens.push_back(code);
149 utf8Data += sequenceLength;
150 dataLength -= sequenceLength;
152 return tokens.size();
155 } // namespace Internal