namespace Toolkit
{
+namespace Text
+{
+
namespace
{
const static uint8_t U1 = 1u;
U0, U0, U0, U0, // Non valid.
U0, U0, U0, U0, // Non valid.
};
+
+ const uint8_t CR = 0xd;
+ const uint8_t LF = 0xa;
} // namespace
+uint8_t GetUtf8Length( uint8_t utf8LeadByte )
+{
+ return UTF8_LENGTH[utf8LeadByte];
+}
+
uint32_t GetNumberOfUtf8Characters( const uint8_t* const utf8, uint32_t length )
{
uint32_t numberOfCharacters = 0u;
{
case U1:
{
- *utf32++ = leadByte;
- begin++;
+ if( CR == leadByte )
+ {
+ // Replace CR+LF or CR by LF
+ *utf32++ = LF;
+
+ // Look ahead if the next one is a LF.
+ ++begin;
+ if( begin < end )
+ {
+ if( LF == *begin )
+ {
+ ++begin;
+ }
+ }
+ }
+ else
+ {
+ *utf32++ = leadByte;
+ begin++;
+ }
break;
}
case U3:
{
uint32_t& code = *utf32++;
- code = leadByte & 0x1fu;
+ code = leadByte & 0x0fu;
begin++;
code <<= 6u;
code |= *begin++ & 0x3fu;
case U4:
{
uint32_t& code = *utf32++;
- code = leadByte & 0x1fu;
+ code = leadByte & 0x07u;
begin++;
code <<= 6u;
code |= *begin++ & 0x3fu;
code |= *begin++ & 0x3fu;
break;
}
+
+ case U0: // Invalid case
+ {
+ begin++;
+ *utf32++ = 0x20; // Use white space
+ break;
+ }
}
}
return numberOfCharacters;
}
-void Utf32ToUtf8( const uint32_t* const utf32, uint32_t numberOfCharacters, uint8_t* utf8 )
+uint32_t Utf32ToUtf8( const uint32_t* const utf32, uint32_t numberOfCharacters, uint8_t* utf8 )
{
const uint32_t* begin = utf32;
const uint32_t* end = utf32 + numberOfCharacters;
+ uint8_t* utf8Begin = utf8;
+
for( ; begin < end; ++begin )
{
const uint32_t code = *begin;
*utf8++ = static_cast<uint8_t>( code & 0x3f ) | 0x80u; // continuation byte
}
}
+
+ return utf8 - utf8Begin;
}
+void Utf32ToUtf8( const uint32_t* const utf32, uint32_t numberOfCharacters, std::string& utf8 )
+{
+ utf8.clear();
+
+ uint32_t numberOfBytes = GetNumberOfUtf8Bytes( &utf32[0], numberOfCharacters );
+ utf8.resize( numberOfBytes );
+
+ // This is a bit horrible but std::string returns a (signed) char*
+ Utf32ToUtf8( utf32, numberOfCharacters, reinterpret_cast<uint8_t*>(&utf8[0]) );
+}
+
+} // namespace Text
+
} // namespace Toolkit
} // namespace Dali