2 * Copyright (c) 2024 Samsung Electronics Co., Ltd.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
21 #include <dali-toolkit/internal/text/character-set-conversion.h>
22 #include <dali-toolkit/internal/text/segmentation.h>
23 #include <dali-toolkit-test-suite-utils.h>
24 #include <dali-toolkit/dali-toolkit.h>
28 using namespace Toolkit;
31 // Tests the following functions with different scripts.
32 // void SetLineBreakInfo( const Vector<Character>& text,
33 // Vector<LineBreakInfo>& lineBreakInfo );
34 // void SetWordBreakInfo( const Vector<Character>& text,
35 // CharacterIndex startIndex,
36 // Length numberOfCharacters,
37 // Vector<WordBreakInfo>& wordBreakInfo );
39 //////////////////////////////////////////////////////////
46 std::string description; ///< Description of the test.
47 std::string text; ///< input text.
48 uint32_t index; ///< The index from where to start to query the break info.
49 uint32_t numberOfCharacters; ///< The requested number of characters.
50 std::string breakInfo; ///< The expected break info.
53 bool LineBreakInfoTest( const BreakInfoData& data )
55 TextAbstraction::Segmentation segmentation = TextAbstraction::Segmentation::Get();
57 // 1) Convert to utf32
58 Vector<Character> utf32;
59 utf32.Resize( data.text.size() );
61 const uint32_t numberOfCharacters = ( data.text.size() == 0 ) ? 0 :
62 Utf8ToUtf32( reinterpret_cast<const uint8_t* const>( data.text.c_str() ),
66 utf32.Resize( numberOfCharacters );
68 // 2) Set the line break info for the whole text.
69 Vector<LineBreakInfo> lineBreakInfo;
70 lineBreakInfo.Resize( numberOfCharacters );
72 SetLineBreakInfo( segmentation,
78 // 3) Update the word text info if it's requested for part of the text.
79 if( ( 0u != data.index ) &&
80 ( numberOfCharacters != data.numberOfCharacters ) )
82 // Clear part of the line break info.
83 lineBreakInfo.Erase( lineBreakInfo.Begin() + data.index,
84 lineBreakInfo.Begin() + data.index + data.numberOfCharacters );
86 // Update the word line info.
87 SetLineBreakInfo( segmentation,
90 data.numberOfCharacters,
94 // 4) compare the results
95 std::ostringstream breakInfo;
97 for( unsigned int index = 0u; index < numberOfCharacters; ++index )
99 breakInfo << static_cast<unsigned int>( lineBreakInfo[index] );
102 if( data.breakInfo != breakInfo.str() )
104 std::cout << " text : [" << data.text << "]" << std::endl;
105 std::cout << " index : " << data.index << std::endl;
106 std::cout << " numberOfCharacters : " << data.numberOfCharacters << std::endl;
107 std::cout << " expected : [" << data.breakInfo << "]" << std::endl;
108 std::cout << " got : [" << breakInfo.str() << "]" << std::endl;
115 bool WordBreakInfoTest( const BreakInfoData& data )
117 TextAbstraction::Segmentation segmentation = TextAbstraction::Segmentation::Get();
119 // 1) Convert to utf32
120 Vector<Character> utf32;
121 utf32.Resize( data.text.size() );
123 const uint32_t numberOfCharacters = ( data.text.size() == 0 ) ? 0 :
124 Utf8ToUtf32( reinterpret_cast<const uint8_t* const>( data.text.c_str() ),
128 utf32.Resize( numberOfCharacters );
130 // 2) Set the word break info for the whole text.
131 Vector<WordBreakInfo> wordBreakInfo;
132 wordBreakInfo.Resize( numberOfCharacters );
134 SetWordBreakInfo( segmentation,
140 // 3) Update the word text info if it's requested for part of the text.
141 if( ( 0u != data.index ) &&
142 ( numberOfCharacters != data.numberOfCharacters ) )
144 // Clear part of the word break info.
145 wordBreakInfo.Erase( wordBreakInfo.Begin() + data.index,
146 wordBreakInfo.Begin() + data.index + data.numberOfCharacters );
148 // Update the word break info.
149 SetWordBreakInfo( segmentation,
152 data.numberOfCharacters,
156 // 4) compare the results
157 std::ostringstream breakInfo;
159 for( unsigned int index = 0u; index < numberOfCharacters; ++index )
161 breakInfo << static_cast<unsigned int>( wordBreakInfo[index] );
164 if( data.breakInfo != breakInfo.str() )
166 std::cout << " text : [" << data.text << "]" << std::endl;
167 std::cout << " index : " << data.index << std::endl;
168 std::cout << " numberOfCharacters : " << data.numberOfCharacters << std::endl;
169 std::cout << " expected : [" << data.breakInfo << "]" << std::endl;
170 std::cout << " got : [" << breakInfo.str() << "]" << std::endl;
179 //////////////////////////////////////////////////////////
181 int UtcDaliTextSegnemtationSetLineBreakInfo(void)
183 tet_infoline(" UtcDaliTextSegnemtationSetLineBreakInfo");
185 struct BreakInfoData data[] =
196 "Lorem ipsum dolor sit amet, aeque definiebas ea mei, posse iracundia ne cum.\n"
197 "Usu ne nisl maiorum iudicabit, veniam epicurei oporteat eos an.\n"
198 "Ne nec nulla regione albucius, mea doctus delenit ad!\n"
199 "Et everti blandit adversarium mei, eam porro neglegentur suscipiantur an.\n"
200 "Quidam corpora at duo. An eos possim scripserit?",
203 "22222122222122222122212222212222212222222222122122221222221222222222122122220"
204 "2221221222212222222122222222221222222122222222122222222122212220"
205 "221222122222122222221222222222122212222221222222212220"
206 "22122222212222222122222222222122221222122222122222222222122222222222212220"
207 "222222122222221221222212212221222222122222222220",
210 "Latin script. Update initial paragraphs.",
211 "Lorem ipsum dolor sit amet, aeque definiebas ea mei, posse iracundia ne cum.\n"
212 "Usu ne nisl maiorum iudicabit, veniam epicurei oporteat eos an.\n"
213 "Ne nec nulla regione albucius, mea doctus delenit ad!\n"
214 "Et everti blandit adversarium mei, eam porro neglegentur suscipiantur an.\n"
215 "Quidam corpora at duo. An eos possim scripserit?",
218 "22222122222122222122212222212222212222222222122122221222221222222222122122220"
219 "2221221222212222222122222222221222222122222222122222222122212220"
220 "221222122222122222221222222222122212222221222222212220"
221 "22122222212222222122222222222122221222122222122222222222122222222222212220"
222 "222222122222221221222212212221222222122222222220",
225 "Latin script. Update mid paragraphs.",
226 "Lorem ipsum dolor sit amet, aeque definiebas ea mei, posse iracundia ne cum.\n"
227 "Usu ne nisl maiorum iudicabit, veniam epicurei oporteat eos an.\n"
228 "Ne nec nulla regione albucius, mea doctus delenit ad!\n"
229 "Et everti blandit adversarium mei, eam porro neglegentur suscipiantur an.\n"
230 "Quidam corpora at duo. An eos possim scripserit?",
233 "22222122222122222122212222212222212222222222122122221222221222222222122122220"
234 "2221221222212222222122222222221222222122222222122222222122212220"
235 "221222122222122222221222222222122212222221222222212220"
236 "22122222212222222122222222222122221222122222122222222222122222222222212220"
237 "222222122222221221222212212221222222122222222220",
240 "Latin script. Update final paragraphs.",
241 "Lorem ipsum dolor sit amet, aeque definiebas ea mei, posse iracundia ne cum.\n"
242 "Usu ne nisl maiorum iudicabit, veniam epicurei oporteat eos an.\n"
243 "Ne nec nulla regione albucius, mea doctus delenit ad!\n"
244 "Et everti blandit adversarium mei, eam porro neglegentur suscipiantur an.\n"
245 "Quidam corpora at duo. An eos possim scripserit?",
248 "22222122222122222122212222212222212222222222122122221222221222222222122122220"
249 "2221221222212222222122222222221222222122222222122222222122212220"
250 "221222122222122222221222222222122212222221222222212220"
251 "22122222212222222122222222222122221222122222122222222222122222222222212220"
252 "222222122222221221222212212221222222122222222220",
256 "韓国側は北朝鮮当局を通じて米ドルで賃金を支払う。\n"
257 "国際社会から様々な経済制裁を受ける北朝鮮にとっては出稼ぎ労働などと並んで重要な外貨稼ぎの手段となっている。\n"
258 "韓国統一省によると15年だけで1320億ウォン(約130億円)が同工業団地を通じ北朝鮮に支払われたという。",
261 "1111111111111111111111220"
262 "111111211111111111111111111111111111111111111111111220"
263 "11111111121111122211111212211211111111111111111111120",
267 "在被捕的64人中,警方落案起訴了35名男子和3名女子,他們年齡介乎15到70歲。\n"
269 "16名年齡介乎14到33歲的被捕人士獲准保釋候查,另有10人仍被拘留作進一步調查。",
272 "11112112111111112111111112111111121121220"
274 "21111112112111111111111211121111111111120",
277 const unsigned int numberOfTests = 7u;
279 for( unsigned int index = 0u; index < numberOfTests; ++index )
281 ToolkitTestApplication application;
282 if( !LineBreakInfoTest( data[index] ) )
284 tet_result(TET_FAIL);
288 tet_result(TET_PASS);
292 int UtcDaliTextSegnemtationSetWordBreakInfo(void)
294 tet_infoline(" UtcDaliTextSegnemtationSetWordBreakInfo");
296 struct BreakInfoData data[] =
306 "Latin script, full text.",
307 "Lorem ipsum dolor sit amet, aeque definiebas ea mei, posse iracundia ne cum.\n"
308 "Usu ne nisl maiorum iudicabit, veniam epicurei oporteat eos an.\n"
309 "Ne nec nulla regione albucius, mea doctus delenit ad!\n"
310 "Et everti blandit adversarium mei, eam porro neglegentur suscipiantur an.\n"
311 "Quidam corpora at duo. An eos possim scripserit?",
314 "11110011110011110011001110001111001111111110010011000111100111111110010011000"
315 "1100100111001111110011111111000111110011111110011111110011001000"
316 "100110011110011111100111111100011001111100111111001000"
317 "10011111001111110011111111110011000110011110011111111110011111111111001000"
318 "111110011111100100110001001100111110011111111100",
321 "Latin script, update first paragraph.",
322 "Lorem ipsum dolor sit amet, aeque definiebas ea mei, posse iracundia ne cum.\n"
323 "Usu ne nisl maiorum iudicabit, veniam epicurei oporteat eos an.\n"
324 "Ne nec nulla regione albucius, mea doctus delenit ad!\n"
325 "Et everti blandit adversarium mei, eam porro neglegentur suscipiantur an.\n"
326 "Quidam corpora at duo. An eos possim scripserit?",
329 "11110011110011110011001110001111001111111110010011000111100111111110010011000"
330 "1100100111001111110011111111000111110011111110011111110011001000"
331 "100110011110011111100111111100011001111100111111001000"
332 "10011111001111110011111111110011000110011110011111111110011111111111001000"
333 "111110011111100100110001001100111110011111111100",
336 "Latin script, update middle paragraphs.",
337 "Lorem ipsum dolor sit amet, aeque definiebas ea mei, posse iracundia ne cum.\n"
338 "Usu ne nisl maiorum iudicabit, veniam epicurei oporteat eos an.\n"
339 "Ne nec nulla regione albucius, mea doctus delenit ad!\n"
340 "Et everti blandit adversarium mei, eam porro neglegentur suscipiantur an.\n"
341 "Quidam corpora at duo. An eos possim scripserit?",
344 "11110011110011110011001110001111001111111110010011000111100111111110010011000"
345 "1100100111001111110011111111000111110011111110011111110011001000"
346 "100110011110011111100111111100011001111100111111001000"
347 "10011111001111110011111111110011000110011110011111111110011111111111001000"
348 "111110011111100100110001001100111110011111111100",
351 "Latin script, update last paragraph.",
352 "Lorem ipsum dolor sit amet, aeque definiebas ea mei, posse iracundia ne cum.\n"
353 "Usu ne nisl maiorum iudicabit, veniam epicurei oporteat eos an.\n"
354 "Ne nec nulla regione albucius, mea doctus delenit ad!\n"
355 "Et everti blandit adversarium mei, eam porro neglegentur suscipiantur an.\n"
356 "Quidam corpora at duo. An eos possim scripserit?",
359 "11110011110011110011001110001111001111111110010011000111100111111110010011000"
360 "1100100111001111110011111111000111110011111110011111110011001000"
361 "100110011110011111100111111100011001111100111111001000"
362 "10011111001111110011111111110011000110011110011111111110011111111111001000"
363 "111110011111100100110001001100111110011111111100",
366 "Japanese script, full text.",
367 "韓国側は北朝鮮当局を通じて米ドルで賃金を支払う。\n"
368 "国際社会から様々な経済制裁を受ける北朝鮮にとっては出稼ぎ労働などと並んで重要な外貨稼ぎの手段となっている。\n"
369 "韓国統一省によると15年だけで1320億ウォン(約130億円)が同工業団地を通じ北朝鮮に支払われたという。",
372 "0000000000000010000000000"
373 "000000000000000000000000000000000000000000000000000000"
374 "00000000010000011100110001100000000000000000000000000",
377 "Japanese script, update first paragraph.",
378 "韓国側は北朝鮮当局を通じて米ドルで賃金を支払う。\n"
379 "国際社会から様々な経済制裁を受ける北朝鮮にとっては出稼ぎ労働などと並んで重要な外貨稼ぎの手段となっている。\n"
380 "韓国統一省によると15年だけで1320億ウォン(約130億円)が同工業団地を通じ北朝鮮に支払われたという。",
383 "0000000000000010000000000"
384 "000000000000000000000000000000000000000000000000000000"
385 "00000000010000011100110001100000000000000000000000000",
388 "Japanese script, update middle paragraph.",
389 "韓国側は北朝鮮当局を通じて米ドルで賃金を支払う。\n"
390 "国際社会から様々な経済制裁を受ける北朝鮮にとっては出稼ぎ労働などと並んで重要な外貨稼ぎの手段となっている。\n"
391 "韓国統一省によると15年だけで1320億ウォン(約130億円)が同工業団地を通じ北朝鮮に支払われたという。",
394 "0000000000000010000000000"
395 "000000000000000000000000000000000000000000000000000000"
396 "00000000010000011100110001100000000000000000000000000",
399 "Japanese script, update last paragraph.",
400 "韓国側は北朝鮮当局を通じて米ドルで賃金を支払う。\n"
401 "国際社会から様々な経済制裁を受ける北朝鮮にとっては出稼ぎ労働などと並んで重要な外貨稼ぎの手段となっている。\n"
402 "韓国統一省によると15年だけで1320億ウォン(約130億円)が同工業団地を通じ北朝鮮に支払われたという。",
405 "0000000000000010000000000"
406 "000000000000000000000000000000000000000000000000000000"
407 "00000000010000011100110001100000000000000000000000000",
410 "Chinese script, full text.",
411 "在被捕的64人中,警方落案起訴了35名男子和3名女子,他們年齡介乎15到70歲。\n"
413 "16名年齡介乎14到33歲的被捕人士獲准保釋候查,另有10人仍被拘留作進一步調查。",
416 "00001000000000001000000000000000010010000"
418 "10000001001000000000000000010000000000000",
421 "Chinese script, update first paragraph.",
422 "在被捕的64人中,警方落案起訴了35名男子和3名女子,他們年齡介乎15到70歲。\n"
424 "16名年齡介乎14到33歲的被捕人士獲准保釋候查,另有10人仍被拘留作進一步調查。",
427 "00001000000000001000000000000000010010000"
429 "10000001001000000000000000010000000000000",
432 "Chinese script, update middle paragraph.",
433 "在被捕的64人中,警方落案起訴了35名男子和3名女子,他們年齡介乎15到70歲。\n"
435 "16名年齡介乎14到33歲的被捕人士獲准保釋候查,另有10人仍被拘留作進一步調查。",
438 "00001000000000001000000000000000010010000"
440 "10000001001000000000000000010000000000000",
443 "Chinese script, update last paragraph.",
444 "在被捕的64人中,警方落案起訴了35名男子和3名女子,他們年齡介乎15到70歲。\n"
446 "16名年齡介乎14到33歲的被捕人士獲准保釋候查,另有10人仍被拘留作進一步調查。",
449 "00001000000000001000000000000000010010000"
451 "10000001001000000000000000010000000000000",
454 const unsigned int numberOfTests = 13u;
456 for( unsigned int index = 0u; index < numberOfTests; ++index )
458 ToolkitTestApplication application;
459 if( !WordBreakInfoTest( data[index] ) )
461 tet_result(TET_FAIL);
465 tet_result(TET_PASS);