2 * Copyright (c) 2014 Samsung Electronics Co., Ltd.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <dali/internal/event/text/utf8-impl.h>
22 #include <dali/integration-api/debug.h>
30 size_t UTF8SequenceLength(const unsigned char leadByte)
34 if ((leadByte & 0x80) == 0 ) //ASCII character (lead bit zero)
38 else if (( leadByte & 0xe0 ) == 0xc0 ) //110x xxxx
42 else if (( leadByte & 0xf0 ) == 0xe0 ) //1110 xxxx
46 else if (( leadByte & 0xf8 ) == 0xf0 ) //1111 0xxx
52 DALI_LOG_WARNING("Unrecognized lead byte %c\n", leadByte);
58 uint32_t UTF8Read(const unsigned char* utf8Data, const size_t sequenceLength)
62 if (sequenceLength == 1)
66 else if (sequenceLength == 2)
68 code = *utf8Data++ & 0x1f;
70 code |= *utf8Data & 0x3f;
72 else if (sequenceLength == 3)
74 code = *utf8Data++ & 0x0f;
76 code |= *utf8Data++ & 0x3f;
78 code |= *utf8Data & 0x3f;
80 else if (sequenceLength == 4)
82 code = *utf8Data++ & 0x07;
84 code |= *utf8Data++ & 0x3f;
86 code |= *utf8Data++ & 0x3f;
88 code |= *utf8Data & 0x3f;
94 size_t UTF8Write(const uint32_t code, unsigned char* utf8Data)
96 size_t sequenceLength = 0;
102 else if (code < 0x800u)
104 *utf8Data++ = static_cast<uint8_t>( code >> 6) | 0xc0; // lead byte for 2 byte sequence
105 *utf8Data = static_cast<uint8_t>( code & 0x3f) | 0x80; // continuation byte
108 else if (code < 0x10000u)
110 *utf8Data++ = static_cast<uint8_t>( code >> 12) | 0xe0; // lead byte for 2 byte sequence
111 *utf8Data++ = static_cast<uint8_t>((code >> 6) & 0x3f) | 0x80; // continuation byte
112 *utf8Data = static_cast<uint8_t>( code & 0x3f) | 0x80; // continuation byte
115 else if (code < 0x200000u)
117 *utf8Data++ = static_cast<uint8_t>( code >> 18) | 0xf0; // lead byte for 2 byte sequence
118 *utf8Data++ = static_cast<uint8_t>((code >> 12) & 0x3f) | 0x80; // continuation byte
119 *utf8Data++ = static_cast<uint8_t>((code >> 6) & 0x3f) | 0x80; // continuation byte
120 *utf8Data = static_cast<uint8_t>( code & 0x3f) | 0x80; // continuation byte
124 return sequenceLength;
127 size_t UTF8Tokenize(const unsigned char* utf8Data, const size_t utf8DataLength, TextArray& tokens)
129 size_t dataLength = utf8DataLength;
134 size_t sequenceLength = UTF8SequenceLength(*utf8Data);
141 if (sequenceLength > dataLength)
147 code = UTF8Read(utf8Data, sequenceLength);
148 tokens.push_back(code);
150 utf8Data += sequenceLength;
151 dataLength -= sequenceLength;
153 return tokens.size();
156 } // namespace Internal