From 4e7f6c3bf6391d39c35b814d1454a899482a27f5 Mon Sep 17 00:00:00 2001 From: Victor Cebollada Date: Mon, 19 Jan 2015 14:17:52 +0000 Subject: [PATCH] UTF8 to UTF32 conversion Change-Id: I19e32d4dd13c127bbf308605ea8c04e636c9c049 Signed-off-by: Victor Cebollada --- base/dali-toolkit/public-api/file.list | 3 +- .../public-api/text/character-set-conversion.cpp | 208 +++++++++++++++++++++ .../public-api/text/character-set-conversion.h | 81 ++++++++ optional/dali-toolkit/dali-toolkit.h | 1 + 4 files changed, 292 insertions(+), 1 deletion(-) create mode 100644 base/dali-toolkit/public-api/text/character-set-conversion.cpp create mode 100644 base/dali-toolkit/public-api/text/character-set-conversion.h diff --git a/base/dali-toolkit/public-api/file.list b/base/dali-toolkit/public-api/file.list index f7d3578..c5a1b0c 100755 --- a/base/dali-toolkit/public-api/file.list +++ b/base/dali-toolkit/public-api/file.list @@ -40,6 +40,7 @@ public_api_base_src_files = \ $(public_api_base_src_dir)/controls/scrollable/scroll-view/scroll-view-wobble-effect.cpp \ $(public_api_base_src_dir)/controls/table-view/table-view.cpp \ $(public_api_base_src_dir)/controls/text-controls/text-label.cpp \ + $(public_api_base_src_dir)/text/character-set-conversion.cpp \ $(public_api_base_src_dir)/text/logical-model.cpp \ $(public_api_base_src_dir)/text/text-renderer.cpp \ $(public_api_base_src_dir)/text/text-view.cpp \ @@ -122,6 +123,7 @@ public_api_base_text_controls_header_files = \ $(public_api_base_src_dir)/controls/text-controls/text-label.h public_api_base_text_header_files = \ + $(public_api_base_src_dir)/text/character-set-conversion.h \ $(public_api_base_src_dir)/text/logical-model.h \ $(public_api_base_src_dir)/text/text-definitions.h \ $(public_api_base_src_dir)/text/text-renderer.h \ @@ -154,4 +156,3 @@ public_api_base_builder_header_files = \ $(public_api_base_src_dir)/builder/builder.h \ $(public_api_base_src_dir)/builder/json-parser.h \ $(public_api_base_src_dir)/builder/tree-node.h - diff --git a/base/dali-toolkit/public-api/text/character-set-conversion.cpp b/base/dali-toolkit/public-api/text/character-set-conversion.cpp new file mode 100644 index 0000000..d2a8c55 --- /dev/null +++ b/base/dali-toolkit/public-api/text/character-set-conversion.cpp @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2015 Samsung Electronics Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +// FILE HEATHER +#include + +namespace Dali +{ + +namespace Toolkit +{ + +namespace +{ + const static uint8_t U1 = 1u; + const static uint8_t U2 = 2u; + const static uint8_t U3 = 3u; + const static uint8_t U4 = 4u; + const static uint8_t U0 = 0u; + const static uint8_t UTF8_LENGTH[256] = { + U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // + U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // + U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // + U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // + U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // + U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // + U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // + U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // + U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // lead byte = 0xxx xxxx (U+0000 - U+007F + some extended ascii characters) + U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // + U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // + U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // + U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // + U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // + U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // + U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // + U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // + U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // + U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // + U1, U1, // + + U2, U2, U2, U2, U2, U2, U2, U2, U2, U2, // + U2, U2, U2, U2, U2, U2, U2, U2, U2, U2, // lead byte = 110x xxxx (U+0080 - U+07FF) + U2, U2, U2, U2, U2, U2, U2, U2, U2, U2, // + U2, U2, // + + U3, U3, U3, U3, U3, U3, U3, U3, U3, U3, // lead byte = 1110 xxxx (U+0800 - U+FFFF) + U3, U3, U3, U3, U3, U3, // + + U4, U4, U4, U4, U4, U4, U4, U4, // lead byte = 1111 0xxx (U+10000 - U+1FFFFF) + + U0, U0, U0, U0, // Non valid. + U0, U0, U0, U0, // Non valid. + }; +} // namespace + +uint32_t GetNumberOfUtf8Characters( const uint8_t* const utf8, uint32_t length ) +{ + uint32_t numberOfCharacters = 0u; + + const uint8_t* begin = utf8; + const uint8_t* end = utf8 + length; + + for( ; begin < end ; begin += UTF8_LENGTH[*begin], ++numberOfCharacters ); + + return numberOfCharacters; +} + +uint32_t GetNumberOfUtf8Bytes( const uint32_t* const utf32, uint32_t numberOfCharacters ) +{ + uint32_t numberOfBytes = 0u; + + const uint32_t* begin = utf32; + const uint32_t* end = utf32 + numberOfCharacters; + + for( ; begin < end; ++begin ) + { + const uint32_t code = *begin; + + if( code < 0x80u ) + { + ++numberOfBytes; + } + else if( code < 0x800u ) + { + numberOfBytes += U2; + } + else if( code < 0x10000u ) + { + numberOfBytes += U3; + } + else if( code < 0x200000u ) + { + numberOfBytes += U4; + } + } + + return numberOfBytes; +} + +uint32_t Utf8ToUtf32( const uint8_t* const utf8, uint32_t length, uint32_t* utf32 ) +{ + uint32_t numberOfCharacters = 0u; + + const uint8_t* begin = utf8; + const uint8_t* end = utf8 + length; + + for( ; begin < end ; ++numberOfCharacters ) + { + const uint8_t leadByte = *begin; + switch( UTF8_LENGTH[leadByte] ) + { + case U1: + { + *utf32 = leadByte; + begin++; + break; + } + case U2: + { + uint32_t& code = *utf32; + code = leadByte & 0x1fu; + begin++; + code <<= 6u; + code |= *begin++ & 0x3fu; + break; + } + case U3: + { + uint32_t& code = *utf32; + code = leadByte & 0x1fu; + begin++; + code <<= 6u; + code |= *begin++ & 0x3fu; + code <<= 6u; + code |= *begin++ & 0x3fu; + break; + } + case U4: + { + uint32_t& code = *utf32; + code = leadByte & 0x1fu; + begin++; + code <<= 6u; + code |= *begin++ & 0x3fu; + code <<= 6u; + code |= *begin++ & 0x3fu; + code <<= 6u; + code |= *begin++ & 0x3fu; + break; + } + } + } + + return numberOfCharacters; +} + +void Utf32ToUtf8( const uint32_t* const utf32, uint32_t numberOfCharacters, uint8_t* utf8 ) +{ + const uint32_t* begin = utf32; + const uint32_t* end = utf32 + numberOfCharacters; + + for( ; begin < end; ++begin ) + { + const uint32_t code = *begin; + + if( code < 0x80u ) + { + *utf8++ = code; + } + else if( code < 0x800u ) + { + *utf8++ = static_cast( code >> 6u ) | 0xc0u; // lead byte for 2 byte sequence + *utf8++ = static_cast( code & 0x3f ) | 0x80u; // continuation byte + } + else if( code < 0x10000u ) + { + *utf8++ = static_cast( code >> 12u ) | 0xe0u; // lead byte for 2 byte sequence + *utf8++ = static_cast( ( code >> 6u ) & 0x3f ) | 0x80u; // continuation byte + *utf8++ = static_cast( code & 0x3f ) | 0x80u; // continuation byte + } + else if( code < 0x200000u ) + { + *utf8++ = static_cast( code >> 18u ) | 0xf0u; // lead byte for 2 byte sequence + *utf8++ = static_cast( ( code >> 12u ) & 0x3f ) | 0x80u; // continuation byte + *utf8++ = static_cast( ( code >> 6u ) & 0x3f ) | 0x80u; // continuation byte + *utf8++ = static_cast( code & 0x3f ) | 0x80u; // continuation byte + } + } +} + +} // namespace Toolkit + +} // namespace Dali diff --git a/base/dali-toolkit/public-api/text/character-set-conversion.h b/base/dali-toolkit/public-api/text/character-set-conversion.h new file mode 100644 index 0000000..1e49357 --- /dev/null +++ b/base/dali-toolkit/public-api/text/character-set-conversion.h @@ -0,0 +1,81 @@ +#ifndef __DALI_TOOLKIT_CHARACTER_SET_CONVERSION_H__ +#define __DALI_TOOLKIT_CHARACTER_SET_CONVERSION_H__ + +/* + * Copyright (c) 2015 Samsung Electronics Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +// INTERNAL INCLUDES +#include + +// EXTERNAL INCLUDES +#include + +namespace Dali +{ + +namespace Toolkit +{ + +/** + * @brief Retrieves the number of characters of the text array encoded in UTF8 + * + * @param[in] utf8 The pointer to the UTF8 array. + * @param[in] length The length of the UTF8 array. + * + * @return The number of characters. + */ +DALI_IMPORT_API uint32_t GetNumberOfUtf8Characters( const uint8_t* const utf8, uint32_t length ); + +/** + * @brief Retrieves the number of bytes needed to encode in UTF8 the given text array encoded in UTF32. + * + * @param[in] utf32 The pointer to the UTF32 array. + * @param[in] numberOfCharacters The number of characters of the UTF32 array. + * + * @return The number of bytes. + */ +DALI_IMPORT_API uint32_t GetNumberOfUtf8Bytes( const uint32_t* const utf32, uint32_t numberOfCharacters ); + +/** + * @brief Converts a text array encoded in UTF8 into a text array encoded in UTF32. + * + * The @p utf32 buffer needs to be big enough to store all the characters. + * + * @param[in] utf8 The pointer to the UTF8 array. + * @param[in] length The length of the UTF8 array. + * @param[out] utf32 The pointer to the UTF32 array. + * + * @return The number of characters. + */ +DALI_IMPORT_API uint32_t Utf8ToUtf32( const uint8_t* const utf8, uint32_t length, uint32_t* utf32 ); + +/** + * @brief Converts a text array encoded in UTF32 into a text array encoded in UTF8. + * + * The @p utf8 buffer needs to be big enough to store all the characters. + * + * @param[in] utf32 The pointer to the UTF32 array. + * @param[in] numberOfCharacters The number of characters of the UTF32 array. + * @param[out] utf8 The pointer to the UTF8 array. + */ +DALI_IMPORT_API void Utf32ToUtf8( const uint32_t* const utf32, uint32_t numberOfCharacters, uint32_t* utf8 ); + +} // namespace Toolkit + +} // namespace Dali + +#endif // __DALI_TOOLKIT_CHARACTER_SET_CONVERSION_H__ diff --git a/optional/dali-toolkit/dali-toolkit.h b/optional/dali-toolkit/dali-toolkit.h index d7aefeb..d5c0dd4 100644 --- a/optional/dali-toolkit/dali-toolkit.h +++ b/optional/dali-toolkit/dali-toolkit.h @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include -- 2.7.4