g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
g_assert (hb_buffer_get_language (b) == NULL);
- g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAGS_DEFAULT);
/* test property changes are retained */
hb_buffer_set_flags (b, HB_BUFFER_FLAG_BOT);
g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_BOT);
+ hb_buffer_set_replacement_codepoint (b, (unsigned int) -1);
+ g_assert (hb_buffer_get_replacement_codepoint (b) == (unsigned int) -1);
- /* test clear clears all properties but unicode_funcs */
+ /* test clear_contents clears all these properties: */
hb_buffer_clear_contents (b);
g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
g_assert (hb_buffer_get_language (b) == NULL);
- g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAGS_DEFAULT);
+
+ /* but not these: */
+
+ g_assert (hb_buffer_get_flags (b) != HB_BUFFER_FLAGS_DEFAULT);
+ g_assert (hb_buffer_get_replacement_codepoint (b) != HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT);
/* test reset clears all properties */
hb_buffer_set_flags (b, HB_BUFFER_FLAG_BOT);
g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_BOT);
+ hb_buffer_set_replacement_codepoint (b, (unsigned int) -1);
+ g_assert (hb_buffer_get_replacement_codepoint (b) == (unsigned int) -1);
+
hb_buffer_reset (b);
g_assert (hb_buffer_get_unicode_funcs (b) == hb_unicode_funcs_get_default ());
g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
g_assert (hb_buffer_get_language (b) == NULL);
g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAGS_DEFAULT);
+ g_assert (hb_buffer_get_replacement_codepoint (b) == HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT);
}
static void
g_assert_cmpint (len, ==, 5);
for (i = 0; i < len; i++) {
- g_assert_cmphex (glyphs[i].mask, ==, 1);
+ g_assert_cmphex (glyphs[i].mask, ==, 0);
g_assert_cmphex (glyphs[i].var1.u32, ==, 0);
g_assert_cmphex (glyphs[i].var2.u32, ==, 0);
}
/* note: we skip the first and last byte when adding to buffer */
static const utf8_conversion_test_t utf8_conversion_tests[] = {
- {"a\303\207", {-1}},
+ {"a\303\207", {(hb_codepoint_t) -1}},
{"a\303\207b", {0xC7}},
- {"ab\303cd", {'b', -1, 'c'}},
- {"ab\303\302\301cd", {'b', -1, -1, -1, 'c'}}
+ {"ab\303cd", {'b', (hb_codepoint_t) -1, 'c'}},
+ {"ab\303\302\301cd", {'b', (hb_codepoint_t) -1, (hb_codepoint_t) -1, (hb_codepoint_t) -1, 'c'}}
};
static void
unsigned int bytes, chars, i, j, len;
b = hb_buffer_create ();
+ hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
for (i = 0; i < G_N_ELEMENTS (utf8_conversion_tests); i++)
{
for (chars = 0; test->codepoints[chars]; chars++)
;
- hb_buffer_reset (b);
+ hb_buffer_clear_contents (b);
hb_buffer_add_utf8 (b, test->utf8, bytes, 1, bytes - 2);
glyphs = hb_buffer_get_glyph_infos (b, &len);
{ "\xe2\x89\xa0\xe2\x89\xa0", 5, 3, FALSE },
{ "\xe2\x89\xa0\xe2\x89\xa0", 6, 6, TRUE },
- /* examples from http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt */
+ /* examples from https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt */
/* greek 'kosme' */
{ "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5", -1, 11, TRUE },
/* first sequence of each length */
{ "\x7f", -1, 1, TRUE },
{ "\xdf\xbf", -1, 2, TRUE },
{ "\xef\xbf\xbf", -1, 0, TRUE },
- { "\xf7\xbf\xbf\xbf", -1, 0, TRUE },
+ { "\xf4\x8f\xbf\xbf", -1, 0, TRUE },
+ { "\xf4\x90\xbf\xbf", -1, 0, FALSE },
+ { "\xf7\xbf\xbf\xbf", -1, 0, FALSE },
{ "\xfb\xbf\xbf\xbf\xbf", -1, 0, FALSE },
{ "\xfd\xbf\xbf\xbf\xbf\xbf", -1, 0, FALSE },
/* other boundary conditions */
{ "\xed\x9f\xbf", -1, 3, TRUE },
+ { "\xed\xa0\x80", -1, 0, FALSE },
+ { "\xed\xbf\xbf", -1, 0, FALSE },
{ "\xee\x80\x80", -1, 3, TRUE },
{ "\xef\xbf\xbd", -1, 3, TRUE },
{ "\xf4\x8f\xbf\xbf", -1, 0, TRUE },
/* impossible bytes */
{ "\x20\xfe\x20", -1, 1, FALSE },
{ "\x20\xff\x20", -1, 1, FALSE },
-#if 0
- /* XXX fix these, or document that we don't detect them? */
/* overlong sequences */
{ "\x20\xc0\xaf\x20", -1, 1, FALSE },
{ "\x20\xe0\x80\xaf\x20", -1, 1, FALSE },
{ "\x20\xed\xae\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
{ "\x20\xed\xaf\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
{ "\x20\xed\xaf\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
+#if 0 /* We don't consider U+FFFE / U+FFFF and similar invalid. */
{ "\x20\xef\xbf\xbe\x20", -1, 1, FALSE },
{ "\x20\xef\xbf\xbf\x20", -1, 1, FALSE },
#endif
unsigned int i;
b = hb_buffer_create ();
+ hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
for (i = 0; i < G_N_ELEMENTS (utf8_validity_tests); i++)
{
else
segment_bytes = test->max_len;
- hb_buffer_reset (b);
+ hb_buffer_clear_contents (b);
hb_buffer_add_utf8 (b, test->utf8, text_bytes, 0, segment_bytes);
glyphs = hb_buffer_get_glyph_infos (b, &len);
static const utf16_conversion_test_t utf16_conversion_tests[] = {
{{0x41, 0x004D, 0x0430, 0x4E8C, 0xD800, 0xDF02, 0x61} , {0x004D, 0x0430, 0x4E8C, 0x10302}},
{{0x41, 0xD800, 0xDF02, 0x61}, {0x10302}},
- {{0x41, 0xD800, 0xDF02}, {-1}},
- {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, -1}},
- {{0x41, 0xD800, 0x61, 0xDF02}, {-1, 0x61}},
- {{0x41, 0x61}, {}}
+ {{0x41, 0xD800, 0xDF02}, {(hb_codepoint_t) -1}},
+ {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, (hb_codepoint_t) -1}},
+ {{0x41, 0xD800, 0x61, 0xDF02}, {(hb_codepoint_t) -1, 0x61}},
+ {{0x41, 0xDF00, 0x61}, {(hb_codepoint_t) -1}},
+ {{0x41, 0x61}, {0}}
};
static void
unsigned int i;
b = hb_buffer_create ();
+ hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
for (i = 0; i < G_N_ELEMENTS (utf16_conversion_tests); i++)
{
for (chars = 0; test->codepoints[chars]; chars++)
;
- hb_buffer_reset (b);
+ hb_buffer_clear_contents (b);
hb_buffer_add_utf16 (b, test->utf16, u_len, 1, u_len - 2);
glyphs = hb_buffer_get_glyph_infos (b, &len);
hb_buffer_destroy (b);
}
+
+typedef struct {
+ const uint32_t utf32[8];
+ const uint32_t codepoints[8];
+} utf32_conversion_test_t;
+
+/* note: we skip the first and last item from utf32 when adding to buffer */
+static const utf32_conversion_test_t utf32_conversion_tests[] = {
+ {{0x41, 0x004D, 0x0430, 0x4E8C, 0xD800, 0xDF02, 0x61} , {0x004D, 0x0430, 0x4E8C, (hb_codepoint_t) -3, (hb_codepoint_t) -3}},
+ {{0x41, 0x004D, 0x0430, 0x4E8C, 0x10302, 0x61} , {0x004D, 0x0430, 0x4E8C, 0x10302}},
+ {{0x41, 0xD800, 0xDF02, 0x61}, {(hb_codepoint_t) -3, (hb_codepoint_t) -3}},
+ {{0x41, 0xD800, 0xDF02}, {(hb_codepoint_t) -3}},
+ {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, (hb_codepoint_t) -3}},
+ {{0x41, 0xD800, 0x61, 0xDF02}, {(hb_codepoint_t) -3, 0x61}},
+ {{0x41, 0xDF00, 0x61}, {(hb_codepoint_t) -3}},
+ {{0x41, 0x10FFFF, 0x61}, {0x10FFFF}},
+ {{0x41, 0x110000, 0x61}, {(hb_codepoint_t) -3}},
+ {{0x41, 0x61}, {0}}
+};
+
+static void
+test_buffer_utf32_conversion (void)
+{
+ hb_buffer_t *b;
+ unsigned int i;
+
+ b = hb_buffer_create ();
+ hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -3);
+
+ for (i = 0; i < G_N_ELEMENTS (utf32_conversion_tests); i++)
+ {
+ const utf32_conversion_test_t *test = &utf32_conversion_tests[i];
+ unsigned int u_len, chars, j, len;
+ hb_glyph_info_t *glyphs;
+
+ g_test_message ("UTF-32 test #%d", i);
+
+ for (u_len = 0; test->utf32[u_len]; u_len++)
+ ;
+ for (chars = 0; test->codepoints[chars]; chars++)
+ ;
+
+ hb_buffer_clear_contents (b);
+ hb_buffer_add_utf32 (b, test->utf32, u_len, 1, u_len - 2);
+
+ glyphs = hb_buffer_get_glyph_infos (b, &len);
+ g_assert_cmpint (len, ==, chars);
+ for (j = 0; j < chars; j++)
+ g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
+ }
+
+ hb_buffer_destroy (b);
+}
+
+
static void
test_empty (hb_buffer_t *b)
{
hb_test_add (test_buffer_utf8_conversion);
hb_test_add (test_buffer_utf8_validity);
hb_test_add (test_buffer_utf16_conversion);
+ hb_test_add (test_buffer_utf32_conversion);
hb_test_add (test_buffer_empty);
return hb_test_run();