automated-tests/src/dali-toolkit-internal/utc-Dali-Text-CharacterSetConversion.cpp

   1 /*
   2  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  *
  16  */
  17
  18 #include <iostream>
  19
  20 #include <stdlib.h>
  21 #include <dali-toolkit/internal/text/character-set-conversion.h>
  22 #include <dali-toolkit-test-suite-utils.h>
  23 #include <dali-toolkit/dali-toolkit.h>
  24
  25
  26 using namespace Dali;
  27 using namespace Toolkit;
  28 using namespace Text;
  29
  30 // Tests the following functions for scripts with different number of bytes per character.
  31 // Latin 1 byte per character, Arabic 2 bytes per character, Devanagari 3 bytes per character and emojis 4 bytes per character.
  32 //
  33 // uint8_t GetUtf8Length( uint8_t utf8LeadByte );
  34 // uint32_t GetNumberOfUtf8Characters( const uint8_t* const utf8, uint32_t length );
  35 // uint32_t GetNumberOfUtf8Bytes( const uint32_t* const utf32, uint32_t numberOfCharacters );
  36 // uint32_t Utf8ToUtf32( const uint8_t* const utf8, uint32_t length, uint32_t* utf32 );
  37 // uint32_t Utf32ToUtf8( const uint32_t* const utf32, uint32_t numberOfCharacters, uint8_t* utf8 );
  38 //     void Utf32ToUtf8( const uint32_t* const utf32, uint32_t numberOfCharacters, std::string& utf8 );
  39 //
  40
  41 //////////////////////////////////////////////////////////
  42
  43 namespace
  44 {
  45
  46 struct GetNumberOfUtf8CharactersData
  47 {
  48   std::string  description;        ///< Description of the test.
  49   std::string  text;               ///< input text.
  50   unsigned int numberOfCharacters; ///< The expected number of characters.
  51 };
  52
  53 bool GetNumberOfUtf8CharactersTest( const GetNumberOfUtf8CharactersData& data )
  54 {
  55   return GetNumberOfUtf8Characters( reinterpret_cast<const uint8_t*>( data.text.c_str() ), data.text.size() ) == data.numberOfCharacters;
  56 }
  57
  58 //////////////////////////////////////////////////////////
  59
  60 struct GetNumberOfUtf8BytesData
  61 {
  62   std::string   description;        ///< Description of the test.
  63   unsigned int* utf32;              ///< input text in utf32.
  64   unsigned int  numberOfCharacters; ///< The number of characters.
  65   unsigned int  numberOfBytes;      ///< The expected number of bytes in utf8.
  66 };
  67
  68 bool GetNumberOfUtf8BytesTest( const GetNumberOfUtf8BytesData& data )
  69 {
  70   return GetNumberOfUtf8Bytes( data.utf32, data.numberOfCharacters ) == data.numberOfBytes;
  71 }
  72
  73 //////////////////////////////////////////////////////////
  74
  75 struct Utf8ToUtf32Data
  76 {
  77   std::string   description; ///< Description of the test.
  78   std::string   text;        ///< input text.
  79   unsigned int* utf32;       ///< The expected text (array of bytes with text encoded in utf32).
  80 };
  81
  82
  83 bool Utf8ToUtf32Test( const Utf8ToUtf32Data& data )
  84 {
  85   Vector<uint32_t> utf32;
  86   utf32.Resize( data.text.size() );
  87
  88   const uint32_t numberOfCharacters = Utf8ToUtf32( reinterpret_cast<const uint8_t* const>( data.text.c_str() ),
  89                                                    data.text.size(),
  90                                                    utf32.Begin() );
  91
  92   for( unsigned int index = 0u; index < numberOfCharacters; ++index )
  93   {
  94     if( data.utf32[index] != utf32[index] )
  95     {
  96       return false;
  97     }
  98   }
  99
 100   return true;
 101 }
 102
 103 } // namespace
 104
 105 //////////////////////////////////////////////////////////
 106
 107 struct Utf32ToUtf8Data
 108 {
 109   std::string   description;        ///< Description of the test.
 110   unsigned int* utf32;              ///< The input text (array of bytes with text encoded in utf32).
 111   unsigned int  numberOfCharacters; ///< The number of characters.
 112   std::string   text;               ///< The expected text.
 113 };
 114
 115 bool Utf32ToUtf8Test( const Utf32ToUtf8Data& data )
 116 {
 117   std::string text;
 118
 119   Utf32ToUtf8( data.utf32, data.numberOfCharacters, text );
 120
 121   return text == data.text;
 122 }
 123
 124 //////////////////////////////////////////////////////////
 125
 126 int UtcDaliTextCharacterSetConversionGetUtf8Length(void)
 127 {
 128   ToolkitTestApplication application;
 129   tet_infoline(" UtcDaliTextCharacterSetConversionGetUtf8Length");
 130
 131   // Copy of the table used to get the size in bytes of a character encoded with utf8.
 132   // If the table used by the GetUtf8Length() function is updated, this one needs to be updated as well.
 133   const static uint8_t U1 = 1u;
 134   const static uint8_t U2 = 2u;
 135   const static uint8_t U3 = 3u;
 136   const static uint8_t U4 = 4u;
 137   const static uint8_t U0 = 0u;
 138   const static uint8_t UTF8_LENGTH[256] = {
 139     U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
 140     U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
 141     U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
 142     U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
 143     U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
 144     U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
 145     U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
 146     U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
 147     U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, // lead byte = 0xxx xxxx (U+0000 - U+007F + some extended ascii characters)
 148     U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
 149     U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
 150     U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
 151     U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
 152     U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
 153     U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
 154     U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
 155     U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
 156     U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
 157     U1, U1, U1, U1, U1, U1, U1, U1, U1, U1, //
 158     U1, U1,                                 //
 159
 160     U2, U2, U2, U2, U2, U2, U2, U2, U2, U2, //
 161     U2, U2, U2, U2, U2, U2, U2, U2, U2, U2, // lead byte = 110x xxxx (U+0080 - U+07FF)
 162     U2, U2, U2, U2, U2, U2, U2, U2, U2, U2, //
 163     U2, U2,                                 //
 164
 165     U3, U3, U3, U3, U3, U3, U3, U3, U3, U3, // lead byte = 1110 xxxx (U+0800 - U+FFFF)
 166     U3, U3, U3, U3, U3, U3,                 //
 167
 168     U4, U4, U4, U4, U4, U4, U4, U4,         // lead byte = 1111 0xxx (U+10000 - U+1FFFFF)
 169
 170     U0, U0, U0, U0,                         // Non valid.
 171     U0, U0, U0, U0,                         // Non valid.
 172   };
 173
 174   for( unsigned int index = 0; index < 256u; ++index )
 175   {
 176     if( GetUtf8Length( index ) != UTF8_LENGTH[static_cast<uint8_t>(index)] )
 177     {
 178       tet_result(TET_FAIL);
 179     }
 180   }
 181
 182   tet_result(TET_PASS);
 183   END_TEST;
 184 }
 185
 186
 187 int UtcDaliTextCharacterSetConversionGetNumberOfUtf8Characters(void)
 188 {
 189   ToolkitTestApplication application;
 190   tet_infoline(" UtcDaliTextCharacterSetConversionGetNumberOfUtf8Characters");
 191
 192   const GetNumberOfUtf8CharactersData data[] =
 193   {
 194     {
 195       "Latin script",
 196       "Hello World",
 197       11u,
 198     },
 199     {
 200       "Arabic script",
 201       "مرحبا بالعالم",
 202       13u,
 203     },
 204     {
 205       "Devanagari script",
 206       "हैलो वर्ल्ड",
 207       11u,
 208     },
 209     {
 210       "Emojis",
 211       "\xF0\x9F\x98\x81 \xF0\x9F\x98\x82 \xF0\x9F\x98\x83 \xF0\x9F\x98\x84",
 212       7u,
 213     },
 214   };
 215   const unsigned int numberOfTests = 4u;
 216
 217   for( unsigned int index = 0u; index < numberOfTests; ++index )
 218   {
 219     if( !GetNumberOfUtf8CharactersTest( data[index] ) )
 220     {
 221       tet_result(TET_FAIL);
 222     }
 223   }
 224
 225   tet_result(TET_PASS);
 226   END_TEST;
 227 }
 228
 229 int UtcDaliTextCharacterSetConversionGetNumberOfUtf8Bytes(void)
 230 {
 231   ToolkitTestApplication application;
 232   tet_infoline(" UtcDaliTextCharacterSetConversionGetNumberOfUtf8Bytes");
 233
 234   unsigned int utf32_01[] = { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64 }; // Hello World
 235   unsigned int utf32_02[] = { 0x645, 0x631, 0x62D, 0x628, 0x627, 0x20, 0x628, 0x627, 0x644, 0x639, 0x627, 0x644, 0x645 }; // مرحبا بالعالم
 236   unsigned int utf32_03[] = { 0x939, 0x948, 0x932, 0x94B, 0x20, 0x935, 0x930, 0x94D, 0x932, 0x94D, 0x921 }; // हैलो वर्ल्ड
 237   unsigned int utf32_04[] = { 0x1F601, 0x20, 0x1F602, 0x20, 0x1F603, 0x20, 0x1F604 }; // Emojis
 238
 239   const GetNumberOfUtf8BytesData data[] =
 240   {
 241     {
 242       "Latin script",
 243       utf32_01,
 244       11u,
 245       11u,
 246     },
 247     {
 248       "Arabic script",
 249       utf32_02,
 250       13u,
 251       25u,
 252     },
 253     {
 254       "Devanagari script",
 255       utf32_03,
 256       11u,
 257       31u,
 258     },
 259     {
 260       "Emojis",
 261       utf32_04,
 262       7u,
 263       19u,
 264     },
 265   };
 266   const unsigned int numberOfTests = 4u;
 267
 268   for( unsigned int index = 0u; index < numberOfTests; ++index )
 269   {
 270     if( !GetNumberOfUtf8BytesTest( data[index] ) )
 271     {
 272       tet_result(TET_FAIL);
 273     }
 274   }
 275
 276   tet_result(TET_PASS);
 277   END_TEST;
 278 }
 279
 280 int UtcDaliTextCharacterSetConversionUtf8ToUtf32(void)
 281 {
 282   ToolkitTestApplication application;
 283   tet_infoline(" UtcDaliTextCharacterSetConversionGetNumberOfUtf8Bytes");
 284
 285   char utf8_06[] = { -8, -7, -6, -5, -4, -3, -2, -1 }; // Invalid string
 286
 287   unsigned int utf32_01[] = { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64 }; // Hello World
 288   unsigned int utf32_02[] = { 0xA, 0x20, 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0xA, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64 }; // Hello World + CR and CR+LF
 289   unsigned int utf32_03[] = { 0x645, 0x631, 0x62D, 0x628, 0x627, 0x20, 0x628, 0x627, 0x644, 0x639, 0x627, 0x644, 0x645 }; // مرحبا بالعالم
 290   unsigned int utf32_04[] = { 0x939, 0x948, 0x932, 0x94B, 0x20, 0x935, 0x930, 0x94D, 0x932, 0x94D, 0x921 }; // हैलो वर्ल्ड
 291   unsigned int utf32_05[] = { 0x1F601, 0x20, 0x1F602, 0x20, 0x1F603, 0x20, 0x1F604 }; // Emojis
 292   unsigned int utf32_06[] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; // Invalid string
 293
 294   const Utf8ToUtf32Data data[] =
 295   {
 296     {
 297       "Latin script",
 298       "Hello World",
 299       utf32_01,
 300     },
 301     {
 302       "Latin script with 'CR' and 'CR'+'LF'",
 303       "\xd Hello\xd\xa World",
 304       utf32_02,
 305     },
 306     {
 307       "Arabic script",
 308       "مرحبا بالعالم",
 309       utf32_03,
 310     },
 311     {
 312       "Devanagari script",
 313       "हैलो वर्ल्ड",
 314       utf32_04,
 315     },
 316     {
 317       "Emojis",
 318       "\xF0\x9F\x98\x81 \xF0\x9F\x98\x82 \xF0\x9F\x98\x83 \xF0\x9F\x98\x84",
 319       utf32_05,
 320     },
 321     {
 322       "Invalid text",
 323       utf8_06,
 324       utf32_06,
 325     },
 326   };
 327   const unsigned int numberOfTests = 6u;
 328
 329   for( unsigned int index = 0u; index < numberOfTests; ++index )
 330   {
 331     if( !Utf8ToUtf32Test( data[index] ) )
 332     {
 333       tet_result(TET_FAIL);
 334     }
 335   }
 336
 337   tet_result(TET_PASS);
 338   END_TEST;
 339 }
 340
 341 int UtcDaliTextCharacterSetConversionUtf32ToUtf8(void)
 342 {
 343   ToolkitTestApplication application;
 344   tet_infoline(" UtcDaliTextCharacterSetConversionUtf32ToUtf8");
 345
 346   unsigned int utf32_01[] = { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64 }; // Hello World
 347   unsigned int utf32_02[] = { 0x645, 0x631, 0x62D, 0x628, 0x627, 0x20, 0x628, 0x627, 0x644, 0x639, 0x627, 0x644, 0x645 }; // مرحبا بالعالم
 348   unsigned int utf32_03[] = { 0x939, 0x948, 0x932, 0x94B, 0x20, 0x935, 0x930, 0x94D, 0x932, 0x94D, 0x921 }; // हैलो वर्ल्ड
 349   unsigned int utf32_04[] = { 0x1F601, 0x20, 0x1F602, 0x20, 0x1F603, 0x20, 0x1F604 }; // Emojis
 350
 351   struct Utf32ToUtf8Data data[] =
 352   {
 353     {
 354       "Latin script",
 355       utf32_01,
 356       11u,
 357       "Hello World",
 358     },
 359     {
 360       "Arabic script",
 361       utf32_02,
 362       13u,
 363       "مرحبا بالعالم",
 364     },
 365     {
 366       "Devanagari script",
 367       utf32_03,
 368       11u,
 369       "हैलो वर्ल्ड",
 370     },
 371     {
 372       "Emojis",
 373       utf32_04,
 374       7u,
 375       "\xF0\x9F\x98\x81 \xF0\x9F\x98\x82 \xF0\x9F\x98\x83 \xF0\x9F\x98\x84",
 376     },
 377   };
 378
 379   const unsigned int numberOfTests = 4u;
 380
 381   for( unsigned int index = 0u; index < numberOfTests; ++index )
 382   {
 383     if( !Utf32ToUtf8Test( data[index] ) )
 384     {
 385       tet_result(TET_FAIL);
 386     }
 387   }
 388
 389   tet_result(TET_PASS);
 390   END_TEST;
 391 }