2 * Copyright © 2011 Google, Inc.
4 * This is part of HarfBuzz, a text shaping library.
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 * Google Author(s): Behdad Esfahbod
29 /* Unit tests for hb-buffer.h */
32 static const char utf8[10] = "ab\360\240\200\200defg";
33 static const uint16_t utf16[8] = {'a', 'b', 0xD840, 0xDC00, 'd', 'e', 'f', 'g'};
34 static const uint32_t utf32[7] = {'a', 'b', 0x20000, 'd', 'e', 'f', 'g'};
46 static const char *buffer_names[] = {
60 fixture_init (fixture_t *fixture, gconstpointer user_data)
65 b = fixture->buffer = hb_buffer_create ();
67 switch (GPOINTER_TO_INT (user_data))
72 case BUFFER_ONE_BY_ONE:
73 for (i = 1; i < G_N_ELEMENTS (utf32) - 1; i++)
74 hb_buffer_add (b, utf32[i], i);
78 hb_buffer_add_utf32 (b, utf32, G_N_ELEMENTS (utf32), 1, G_N_ELEMENTS (utf32) - 2);
82 hb_buffer_add_utf16 (b, utf16, G_N_ELEMENTS (utf16), 1, G_N_ELEMENTS (utf16) - 2);
86 hb_buffer_add_utf8 (b, utf8, G_N_ELEMENTS (utf8), 1, G_N_ELEMENTS (utf8) - 2);
90 g_assert_not_reached ();
95 fixture_finish (fixture_t *fixture, gconstpointer user_data)
97 hb_buffer_destroy (fixture->buffer);
102 test_buffer_properties (fixture_t *fixture, gconstpointer user_data)
104 hb_buffer_t *b = fixture->buffer;
105 hb_unicode_funcs_t *ufuncs;
107 /* test default properties */
109 g_assert (hb_buffer_get_unicode_funcs (b) == hb_unicode_funcs_get_default ());
110 g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
111 g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
112 g_assert (hb_buffer_get_language (b) == NULL);
113 g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAGS_DEFAULT);
116 /* test property changes are retained */
117 ufuncs = hb_unicode_funcs_create (NULL);
118 hb_buffer_set_unicode_funcs (b, ufuncs);
119 hb_unicode_funcs_destroy (ufuncs);
120 g_assert (hb_buffer_get_unicode_funcs (b) == ufuncs);
122 hb_buffer_set_direction (b, HB_DIRECTION_RTL);
123 g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_RTL);
125 hb_buffer_set_script (b, HB_SCRIPT_ARABIC);
126 g_assert (hb_buffer_get_script (b) == HB_SCRIPT_ARABIC);
128 hb_buffer_set_language (b, hb_language_from_string ("fa", -1));
129 g_assert (hb_buffer_get_language (b) == hb_language_from_string ("Fa", -1));
131 hb_buffer_set_flags (b, HB_BUFFER_FLAG_BOT);
132 g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_BOT);
136 /* test clear clears all properties but unicode_funcs */
138 hb_buffer_clear_contents (b);
140 g_assert (hb_buffer_get_unicode_funcs (b) == ufuncs);
141 g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
142 g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
143 g_assert (hb_buffer_get_language (b) == NULL);
144 g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAGS_DEFAULT);
147 /* test reset clears all properties */
149 hb_buffer_set_direction (b, HB_DIRECTION_RTL);
150 g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_RTL);
152 hb_buffer_set_script (b, HB_SCRIPT_ARABIC);
153 g_assert (hb_buffer_get_script (b) == HB_SCRIPT_ARABIC);
155 hb_buffer_set_language (b, hb_language_from_string ("fa", -1));
156 g_assert (hb_buffer_get_language (b) == hb_language_from_string ("Fa", -1));
158 hb_buffer_set_flags (b, HB_BUFFER_FLAG_BOT);
159 g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_BOT);
163 g_assert (hb_buffer_get_unicode_funcs (b) == hb_unicode_funcs_get_default ());
164 g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
165 g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
166 g_assert (hb_buffer_get_language (b) == NULL);
167 g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAGS_DEFAULT);
171 test_buffer_contents (fixture_t *fixture, gconstpointer user_data)
173 hb_buffer_t *b = fixture->buffer;
174 unsigned int i, len, len2;
175 buffer_type_t buffer_type = GPOINTER_TO_INT (user_data);
176 hb_glyph_info_t *glyphs;
178 if (buffer_type == BUFFER_EMPTY) {
179 g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
183 len = hb_buffer_get_length (b);
184 hb_buffer_get_glyph_infos (b, NULL); /* test NULL */
185 glyphs = hb_buffer_get_glyph_infos (b, &len2);
186 g_assert_cmpint (len, ==, len2);
187 g_assert_cmpint (len, ==, 5);
189 for (i = 0; i < len; i++) {
190 g_assert_cmphex (glyphs[i].mask, ==, 1);
191 g_assert_cmphex (glyphs[i].var1.u32, ==, 0);
192 g_assert_cmphex (glyphs[i].var2.u32, ==, 0);
195 for (i = 0; i < len; i++) {
196 unsigned int cluster;
199 if (buffer_type == BUFFER_UTF16)
201 else if (buffer_type == BUFFER_UTF8)
204 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
205 g_assert_cmphex (glyphs[i].cluster, ==, cluster);
208 /* reverse, test, and reverse back */
210 hb_buffer_reverse (b);
211 for (i = 0; i < len; i++)
212 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
214 hb_buffer_reverse (b);
215 for (i = 0; i < len; i++)
216 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
218 /* reverse_clusters works same as reverse for now since each codepoint is
219 * in its own cluster */
221 hb_buffer_reverse_clusters (b);
222 for (i = 0; i < len; i++)
223 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
225 hb_buffer_reverse_clusters (b);
226 for (i = 0; i < len; i++)
227 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
229 /* now form a cluster and test again */
230 glyphs[2].cluster = glyphs[1].cluster;
232 /* reverse, test, and reverse back */
234 hb_buffer_reverse (b);
235 for (i = 0; i < len; i++)
236 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
238 hb_buffer_reverse (b);
239 for (i = 0; i < len; i++)
240 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
242 /* reverse_clusters twice still should return the original string,
243 * but when applied once, the 1-2 cluster should be retained. */
245 hb_buffer_reverse_clusters (b);
246 for (i = 0; i < len; i++) {
247 unsigned int j = len-1-i;
252 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+j]);
255 hb_buffer_reverse_clusters (b);
256 for (i = 0; i < len; i++)
257 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
260 /* test setting length */
263 g_assert (hb_buffer_set_length (b, 10));
264 glyphs = hb_buffer_get_glyph_infos (b, NULL);
265 g_assert_cmpint (hb_buffer_get_length (b), ==, 10);
266 for (i = 0; i < 5; i++)
267 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
268 for (i = 5; i < 10; i++)
269 g_assert_cmphex (glyphs[i].codepoint, ==, 0);
271 g_assert (hb_buffer_set_length (b, 3));
272 glyphs = hb_buffer_get_glyph_infos (b, NULL);
273 g_assert_cmpint (hb_buffer_get_length (b), ==, 3);
274 for (i = 0; i < 3; i++)
275 g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
278 g_assert (hb_buffer_allocation_successful (b));
281 /* test reset clears content */
284 g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
288 test_buffer_positions (fixture_t *fixture, gconstpointer user_data)
290 hb_buffer_t *b = fixture->buffer;
291 unsigned int i, len, len2;
292 hb_glyph_position_t *positions;
294 /* Without shaping, positions should all be zero */
295 len = hb_buffer_get_length (b);
296 hb_buffer_get_glyph_positions (b, NULL); /* test NULL */
297 positions = hb_buffer_get_glyph_positions (b, &len2);
298 g_assert_cmpint (len, ==, len2);
299 for (i = 0; i < len; i++) {
300 g_assert_cmpint (0, ==, positions[i].x_advance);
301 g_assert_cmpint (0, ==, positions[i].y_advance);
302 g_assert_cmpint (0, ==, positions[i].x_offset);
303 g_assert_cmpint (0, ==, positions[i].y_offset);
304 g_assert_cmpint (0, ==, positions[i].var.i32);
307 /* test reset clears content */
309 g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
313 test_buffer_allocation (fixture_t *fixture, gconstpointer user_data)
315 hb_buffer_t *b = fixture->buffer;
317 g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
319 g_assert (hb_buffer_pre_allocate (b, 100));
320 g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
321 g_assert (hb_buffer_allocation_successful (b));
323 /* lets try a huge allocation, make sure it fails */
324 g_assert (!hb_buffer_pre_allocate (b, (unsigned int) -1));
325 g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
326 g_assert (!hb_buffer_allocation_successful (b));
328 /* small one again */
329 g_assert (hb_buffer_pre_allocate (b, 50));
330 g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
331 g_assert (!hb_buffer_allocation_successful (b));
334 g_assert (hb_buffer_allocation_successful (b));
336 /* all allocation and size */
337 g_assert (!hb_buffer_pre_allocate (b, ((unsigned int) -1) / 20 + 1));
338 g_assert (!hb_buffer_allocation_successful (b));
341 g_assert (hb_buffer_allocation_successful (b));
343 /* technically, this one can actually pass on 64bit machines, but
344 * I'm doubtful that any malloc allows 4GB allocations at a time.
345 * But let's only enable it on a 32-bit machine. */
346 if (sizeof (long) == 4) {
347 g_assert (!hb_buffer_pre_allocate (b, ((unsigned int) -1) / 20 - 1));
348 g_assert (!hb_buffer_allocation_successful (b));
352 g_assert (hb_buffer_allocation_successful (b));
358 const uint32_t codepoints[8];
359 } utf8_conversion_test_t;
361 /* note: we skip the first and last byte when adding to buffer */
362 static const utf8_conversion_test_t utf8_conversion_tests[] = {
364 {"a\303\207b", {0xC7}},
365 {"ab\303cd", {'b', -1, 'c'}},
366 {"ab\303\302\301cd", {'b', -1, -1, -1, 'c'}}
370 test_buffer_utf8_conversion (void)
373 hb_glyph_info_t *glyphs;
374 unsigned int bytes, chars, i, j, len;
376 b = hb_buffer_create ();
378 for (i = 0; i < G_N_ELEMENTS (utf8_conversion_tests); i++)
380 const utf8_conversion_test_t *test = &utf8_conversion_tests[i];
383 escaped = g_strescape (test->utf8, NULL);
384 g_test_message ("UTF-8 test #%d: %s", i, escaped);
387 bytes = strlen (test->utf8);
388 for (chars = 0; test->codepoints[chars]; chars++)
392 hb_buffer_add_utf8 (b, test->utf8, bytes, 1, bytes - 2);
394 glyphs = hb_buffer_get_glyph_infos (b, &len);
395 g_assert_cmpint (len, ==, chars);
396 for (j = 0; j < chars; j++)
397 g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
400 hb_buffer_destroy (b);
405 /* Following test table is adapted from glib/glib/tests/utf8-validate.c
406 * with relicensing permission from Matthias Clasen. */
413 } utf8_validity_test_t;
415 static const utf8_validity_test_t utf8_validity_tests[] = {
416 /* some tests to check max_len handling */
418 { "abcde", -1, 5, TRUE },
419 { "abcde", 3, 3, TRUE },
420 { "abcde", 5, 5, TRUE },
422 { "\xc2\xa9\xc2\xa9\xc2\xa9", -1, 6, TRUE },
423 { "\xc2\xa9\xc2\xa9\xc2\xa9", 1, 0, FALSE },
424 { "\xc2\xa9\xc2\xa9\xc2\xa9", 2, 2, TRUE },
425 { "\xc2\xa9\xc2\xa9\xc2\xa9", 3, 2, FALSE },
426 { "\xc2\xa9\xc2\xa9\xc2\xa9", 4, 4, TRUE },
427 { "\xc2\xa9\xc2\xa9\xc2\xa9", 5, 4, FALSE },
428 { "\xc2\xa9\xc2\xa9\xc2\xa9", 6, 6, TRUE },
430 { "\xe2\x89\xa0\xe2\x89\xa0", -1, 6, TRUE },
431 { "\xe2\x89\xa0\xe2\x89\xa0", 1, 0, FALSE },
432 { "\xe2\x89\xa0\xe2\x89\xa0", 2, 0, FALSE },
433 { "\xe2\x89\xa0\xe2\x89\xa0", 3, 3, TRUE },
434 { "\xe2\x89\xa0\xe2\x89\xa0", 4, 3, FALSE },
435 { "\xe2\x89\xa0\xe2\x89\xa0", 5, 3, FALSE },
436 { "\xe2\x89\xa0\xe2\x89\xa0", 6, 6, TRUE },
438 /* examples from http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt */
440 { "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5", -1, 11, TRUE },
441 /* first sequence of each length */
442 { "\x00", -1, 0, TRUE },
443 { "\xc2\x80", -1, 2, TRUE },
444 { "\xe0\xa0\x80", -1, 3, TRUE },
445 { "\xf0\x90\x80\x80", -1, 4, TRUE },
446 { "\xf8\x88\x80\x80\x80", -1, 0, FALSE },
447 { "\xfc\x84\x80\x80\x80\x80", -1, 0, FALSE },
448 /* last sequence of each length */
449 { "\x7f", -1, 1, TRUE },
450 { "\xdf\xbf", -1, 2, TRUE },
451 { "\xef\xbf\xbf", -1, 0, TRUE },
452 { "\xf7\xbf\xbf\xbf", -1, 0, TRUE },
453 { "\xfb\xbf\xbf\xbf\xbf", -1, 0, FALSE },
454 { "\xfd\xbf\xbf\xbf\xbf\xbf", -1, 0, FALSE },
455 /* other boundary conditions */
456 { "\xed\x9f\xbf", -1, 3, TRUE },
457 { "\xee\x80\x80", -1, 3, TRUE },
458 { "\xef\xbf\xbd", -1, 3, TRUE },
459 { "\xf4\x8f\xbf\xbf", -1, 0, TRUE },
460 /* malformed sequences */
461 /* continuation bytes */
462 { "\x80", -1, 0, FALSE },
463 { "\xbf", -1, 0, FALSE },
464 { "\x80\xbf", -1, 0, FALSE },
465 { "\x80\xbf\x80", -1, 0, FALSE },
466 { "\x80\xbf\x80\xbf", -1, 0, FALSE },
467 { "\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
468 { "\x80\xbf\x80\xbf\x80\xbf", -1, 0, FALSE },
469 { "\x80\xbf\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
471 /* all possible continuation byte */
472 { "\x80", -1, 0, FALSE },
473 { "\x81", -1, 0, FALSE },
474 { "\x82", -1, 0, FALSE },
475 { "\x83", -1, 0, FALSE },
476 { "\x84", -1, 0, FALSE },
477 { "\x85", -1, 0, FALSE },
478 { "\x86", -1, 0, FALSE },
479 { "\x87", -1, 0, FALSE },
480 { "\x88", -1, 0, FALSE },
481 { "\x89", -1, 0, FALSE },
482 { "\x8a", -1, 0, FALSE },
483 { "\x8b", -1, 0, FALSE },
484 { "\x8c", -1, 0, FALSE },
485 { "\x8d", -1, 0, FALSE },
486 { "\x8e", -1, 0, FALSE },
487 { "\x8f", -1, 0, FALSE },
488 { "\x90", -1, 0, FALSE },
489 { "\x91", -1, 0, FALSE },
490 { "\x92", -1, 0, FALSE },
491 { "\x93", -1, 0, FALSE },
492 { "\x94", -1, 0, FALSE },
493 { "\x95", -1, 0, FALSE },
494 { "\x96", -1, 0, FALSE },
495 { "\x97", -1, 0, FALSE },
496 { "\x98", -1, 0, FALSE },
497 { "\x99", -1, 0, FALSE },
498 { "\x9a", -1, 0, FALSE },
499 { "\x9b", -1, 0, FALSE },
500 { "\x9c", -1, 0, FALSE },
501 { "\x9d", -1, 0, FALSE },
502 { "\x9e", -1, 0, FALSE },
503 { "\x9f", -1, 0, FALSE },
504 { "\xa0", -1, 0, FALSE },
505 { "\xa1", -1, 0, FALSE },
506 { "\xa2", -1, 0, FALSE },
507 { "\xa3", -1, 0, FALSE },
508 { "\xa4", -1, 0, FALSE },
509 { "\xa5", -1, 0, FALSE },
510 { "\xa6", -1, 0, FALSE },
511 { "\xa7", -1, 0, FALSE },
512 { "\xa8", -1, 0, FALSE },
513 { "\xa9", -1, 0, FALSE },
514 { "\xaa", -1, 0, FALSE },
515 { "\xab", -1, 0, FALSE },
516 { "\xac", -1, 0, FALSE },
517 { "\xad", -1, 0, FALSE },
518 { "\xae", -1, 0, FALSE },
519 { "\xaf", -1, 0, FALSE },
520 { "\xb0", -1, 0, FALSE },
521 { "\xb1", -1, 0, FALSE },
522 { "\xb2", -1, 0, FALSE },
523 { "\xb3", -1, 0, FALSE },
524 { "\xb4", -1, 0, FALSE },
525 { "\xb5", -1, 0, FALSE },
526 { "\xb6", -1, 0, FALSE },
527 { "\xb7", -1, 0, FALSE },
528 { "\xb8", -1, 0, FALSE },
529 { "\xb9", -1, 0, FALSE },
530 { "\xba", -1, 0, FALSE },
531 { "\xbb", -1, 0, FALSE },
532 { "\xbc", -1, 0, FALSE },
533 { "\xbd", -1, 0, FALSE },
534 { "\xbe", -1, 0, FALSE },
535 { "\xbf", -1, 0, FALSE },
536 /* lone start characters */
537 { "\xc0\x20", -1, 0, FALSE },
538 { "\xc1\x20", -1, 0, FALSE },
539 { "\xc2\x20", -1, 0, FALSE },
540 { "\xc3\x20", -1, 0, FALSE },
541 { "\xc4\x20", -1, 0, FALSE },
542 { "\xc5\x20", -1, 0, FALSE },
543 { "\xc6\x20", -1, 0, FALSE },
544 { "\xc7\x20", -1, 0, FALSE },
545 { "\xc8\x20", -1, 0, FALSE },
546 { "\xc9\x20", -1, 0, FALSE },
547 { "\xca\x20", -1, 0, FALSE },
548 { "\xcb\x20", -1, 0, FALSE },
549 { "\xcc\x20", -1, 0, FALSE },
550 { "\xcd\x20", -1, 0, FALSE },
551 { "\xce\x20", -1, 0, FALSE },
552 { "\xcf\x20", -1, 0, FALSE },
553 { "\xd0\x20", -1, 0, FALSE },
554 { "\xd1\x20", -1, 0, FALSE },
555 { "\xd2\x20", -1, 0, FALSE },
556 { "\xd3\x20", -1, 0, FALSE },
557 { "\xd4\x20", -1, 0, FALSE },
558 { "\xd5\x20", -1, 0, FALSE },
559 { "\xd6\x20", -1, 0, FALSE },
560 { "\xd7\x20", -1, 0, FALSE },
561 { "\xd8\x20", -1, 0, FALSE },
562 { "\xd9\x20", -1, 0, FALSE },
563 { "\xda\x20", -1, 0, FALSE },
564 { "\xdb\x20", -1, 0, FALSE },
565 { "\xdc\x20", -1, 0, FALSE },
566 { "\xdd\x20", -1, 0, FALSE },
567 { "\xde\x20", -1, 0, FALSE },
568 { "\xdf\x20", -1, 0, FALSE },
569 { "\xe0\x20", -1, 0, FALSE },
570 { "\xe1\x20", -1, 0, FALSE },
571 { "\xe2\x20", -1, 0, FALSE },
572 { "\xe3\x20", -1, 0, FALSE },
573 { "\xe4\x20", -1, 0, FALSE },
574 { "\xe5\x20", -1, 0, FALSE },
575 { "\xe6\x20", -1, 0, FALSE },
576 { "\xe7\x20", -1, 0, FALSE },
577 { "\xe8\x20", -1, 0, FALSE },
578 { "\xe9\x20", -1, 0, FALSE },
579 { "\xea\x20", -1, 0, FALSE },
580 { "\xeb\x20", -1, 0, FALSE },
581 { "\xec\x20", -1, 0, FALSE },
582 { "\xed\x20", -1, 0, FALSE },
583 { "\xee\x20", -1, 0, FALSE },
584 { "\xef\x20", -1, 0, FALSE },
585 { "\xf0\x20", -1, 0, FALSE },
586 { "\xf1\x20", -1, 0, FALSE },
587 { "\xf2\x20", -1, 0, FALSE },
588 { "\xf3\x20", -1, 0, FALSE },
589 { "\xf4\x20", -1, 0, FALSE },
590 { "\xf5\x20", -1, 0, FALSE },
591 { "\xf6\x20", -1, 0, FALSE },
592 { "\xf7\x20", -1, 0, FALSE },
593 { "\xf8\x20", -1, 0, FALSE },
594 { "\xf9\x20", -1, 0, FALSE },
595 { "\xfa\x20", -1, 0, FALSE },
596 { "\xfb\x20", -1, 0, FALSE },
597 { "\xfc\x20", -1, 0, FALSE },
598 { "\xfd\x20", -1, 0, FALSE },
599 /* missing continuation bytes */
600 { "\x20\xc0", -1, 1, FALSE },
601 { "\x20\xe0\x80", -1, 1, FALSE },
602 { "\x20\xf0\x80\x80", -1, 1, FALSE },
603 { "\x20\xf8\x80\x80\x80", -1, 1, FALSE },
604 { "\x20\xfc\x80\x80\x80\x80", -1, 1, FALSE },
605 { "\x20\xdf", -1, 1, FALSE },
606 { "\x20\xef\xbf", -1, 1, FALSE },
607 { "\x20\xf7\xbf\xbf", -1, 1, FALSE },
608 { "\x20\xfb\xbf\xbf\xbf", -1, 1, FALSE },
609 { "\x20\xfd\xbf\xbf\xbf\xbf", -1, 1, FALSE },
610 /* impossible bytes */
611 { "\x20\xfe\x20", -1, 1, FALSE },
612 { "\x20\xff\x20", -1, 1, FALSE },
614 /* XXX fix these, or document that we don't detect them? */
615 /* overlong sequences */
616 { "\x20\xc0\xaf\x20", -1, 1, FALSE },
617 { "\x20\xe0\x80\xaf\x20", -1, 1, FALSE },
618 { "\x20\xf0\x80\x80\xaf\x20", -1, 1, FALSE },
619 { "\x20\xf8\x80\x80\x80\xaf\x20", -1, 1, FALSE },
620 { "\x20\xfc\x80\x80\x80\x80\xaf\x20", -1, 1, FALSE },
621 { "\x20\xc1\xbf\x20", -1, 1, FALSE },
622 { "\x20\xe0\x9f\xbf\x20", -1, 1, FALSE },
623 { "\x20\xf0\x8f\xbf\xbf\x20", -1, 1, FALSE },
624 { "\x20\xf8\x87\xbf\xbf\xbf\x20", -1, 1, FALSE },
625 { "\x20\xfc\x83\xbf\xbf\xbf\xbf\x20", -1, 1, FALSE },
626 { "\x20\xc0\x80\x20", -1, 1, FALSE },
627 { "\x20\xe0\x80\x80\x20", -1, 1, FALSE },
628 { "\x20\xf0\x80\x80\x80\x20", -1, 1, FALSE },
629 { "\x20\xf8\x80\x80\x80\x80\x20", -1, 1, FALSE },
630 { "\x20\xfc\x80\x80\x80\x80\x80\x20", -1, 1, FALSE },
631 /* illegal code positions */
632 { "\x20\xed\xa0\x80\x20", -1, 1, FALSE },
633 { "\x20\xed\xad\xbf\x20", -1, 1, FALSE },
634 { "\x20\xed\xae\x80\x20", -1, 1, FALSE },
635 { "\x20\xed\xaf\xbf\x20", -1, 1, FALSE },
636 { "\x20\xed\xb0\x80\x20", -1, 1, FALSE },
637 { "\x20\xed\xbe\x80\x20", -1, 1, FALSE },
638 { "\x20\xed\xbf\xbf\x20", -1, 1, FALSE },
639 { "\x20\xed\xa0\x80\xed\xb0\x80\x20", -1, 1, FALSE },
640 { "\x20\xed\xa0\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
641 { "\x20\xed\xad\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
642 { "\x20\xed\xad\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
643 { "\x20\xed\xae\x80\xed\xb0\x80\x20", -1, 1, FALSE },
644 { "\x20\xed\xae\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
645 { "\x20\xed\xaf\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
646 { "\x20\xed\xaf\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
647 { "\x20\xef\xbf\xbe\x20", -1, 1, FALSE },
648 { "\x20\xef\xbf\xbf\x20", -1, 1, FALSE },
654 test_buffer_utf8_validity (void)
659 b = hb_buffer_create ();
661 for (i = 0; i < G_N_ELEMENTS (utf8_validity_tests); i++)
663 const utf8_validity_test_t *test = &utf8_validity_tests[i];
664 unsigned int text_bytes, segment_bytes, j, len;
665 hb_glyph_info_t *glyphs;
668 escaped = g_strescape (test->utf8, NULL);
669 g_test_message ("UTF-8 test #%d: %s", i, escaped);
672 text_bytes = strlen (test->utf8);
673 if (test->max_len == -1)
674 segment_bytes = text_bytes;
676 segment_bytes = test->max_len;
679 hb_buffer_add_utf8 (b, test->utf8, text_bytes, 0, segment_bytes);
681 glyphs = hb_buffer_get_glyph_infos (b, &len);
682 for (j = 0; j < len; j++)
683 if (glyphs[j].codepoint == (hb_codepoint_t) -1)
686 g_assert (test->valid ? j == len : j < len);
688 g_assert (glyphs[j].cluster == test->offset);
691 hb_buffer_destroy (b);
696 const uint16_t utf16[8];
697 const uint32_t codepoints[8];
698 } utf16_conversion_test_t;
700 /* note: we skip the first and last item from utf16 when adding to buffer */
701 static const utf16_conversion_test_t utf16_conversion_tests[] = {
702 {{0x41, 0x004D, 0x0430, 0x4E8C, 0xD800, 0xDF02, 0x61} , {0x004D, 0x0430, 0x4E8C, 0x10302}},
703 {{0x41, 0xD800, 0xDF02, 0x61}, {0x10302}},
704 {{0x41, 0xD800, 0xDF02}, {-1}},
705 {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, -1}},
706 {{0x41, 0xD800, 0x61, 0xDF02}, {-1, 0x61}},
711 test_buffer_utf16_conversion (void)
716 b = hb_buffer_create ();
718 for (i = 0; i < G_N_ELEMENTS (utf16_conversion_tests); i++)
720 const utf16_conversion_test_t *test = &utf16_conversion_tests[i];
721 unsigned int u_len, chars, j, len;
722 hb_glyph_info_t *glyphs;
724 g_test_message ("UTF-16 test #%d", i);
726 for (u_len = 0; test->utf16[u_len]; u_len++)
728 for (chars = 0; test->codepoints[chars]; chars++)
732 hb_buffer_add_utf16 (b, test->utf16, u_len, 1, u_len - 2);
734 glyphs = hb_buffer_get_glyph_infos (b, &len);
735 g_assert_cmpint (len, ==, chars);
736 for (j = 0; j < chars; j++)
737 g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
740 hb_buffer_destroy (b);
744 test_empty (hb_buffer_t *b)
746 g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
747 g_assert (!hb_buffer_get_glyph_infos (b, NULL));
748 g_assert (!hb_buffer_get_glyph_positions (b, NULL));
752 test_buffer_empty (void)
754 hb_buffer_t *b = hb_buffer_get_empty ();
756 g_assert (hb_buffer_get_empty ());
757 g_assert (hb_buffer_get_empty () == b);
759 g_assert (!hb_buffer_allocation_successful (b));
763 hb_buffer_add_utf32 (b, utf32, G_N_ELEMENTS (utf32), 1, G_N_ELEMENTS (utf32) - 2);
767 hb_buffer_reverse (b);
768 hb_buffer_reverse_clusters (b);
770 g_assert (!hb_buffer_set_length (b, 10));
774 g_assert (hb_buffer_set_length (b, 0));
778 g_assert (!hb_buffer_allocation_successful (b));
784 g_assert (!hb_buffer_allocation_successful (b));
788 main (int argc, char **argv)
792 hb_test_init (&argc, &argv);
794 for (i = 0; i < BUFFER_NUM_TYPES; i++)
796 const void *buffer_type = GINT_TO_POINTER (i);
797 const char *buffer_name = buffer_names[i];
799 hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_properties);
800 hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_contents);
801 hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_positions);
804 hb_test_add_fixture (fixture, GINT_TO_POINTER (BUFFER_EMPTY), test_buffer_allocation);
806 hb_test_add (test_buffer_utf8_conversion);
807 hb_test_add (test_buffer_utf8_validity);
808 hb_test_add (test_buffer_utf16_conversion);
809 hb_test_add (test_buffer_empty);
811 return hb_test_run();