From 63c0ef4a0763e579c9c80887bbfbd2651de05067 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Thu, 21 Jul 2011 20:58:42 -0400 Subject: [PATCH] Fix decompose() implementations to work with non-starter non-composables Add tests. --- src/hb-glib.cc | 10 +++++----- src/hb-icu.cc | 20 +++++++++++++------- test/test-unicode.c | 11 ++++++++--- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/src/hb-glib.cc b/src/hb-glib.cc index fbf8cf5..76e1dfd 100644 --- a/src/hb-glib.cc +++ b/src/hb-glib.cc @@ -296,16 +296,16 @@ hb_glib_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED, *b = 0; ret = *a != ab; } else if (len == 2) { + *a = g_utf8_get_char (normalized); + *b = g_utf8_get_char (g_utf8_next_char (normalized)); /* Here's the ugly part: if ab decomposes to a single character and * that character decomposes again, we have to detect that and undo * the second part :-(. */ gchar *recomposed = g_utf8_normalize (normalized, -1, G_NORMALIZE_NFC); - if (g_utf8_get_char (recomposed) != ab) { - *a = g_utf8_get_char (recomposed); + hb_codepoint_t c = g_utf8_get_char (recomposed); + if (c != ab && c != *a) { + *a = c; *b = 0; - } else { - *a = g_utf8_get_char (normalized); - *b = g_utf8_get_char (g_utf8_next_char (normalized)); } g_free (recomposed); ret = TRUE; diff --git a/src/hb-icu.cc b/src/hb-icu.cc index 7b85cd5..7fe78d2 100644 --- a/src/hb-icu.cc +++ b/src/hb-icu.cc @@ -214,6 +214,10 @@ hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED, hb_bool_t ret, err; UErrorCode icu_err; + /* This function is a monster! Maybe it wasn't a good idea adding a + * pairwise decompose API... */ + /* Watchout for the dragons. Err, watchout for macros changing len. */ + len = 0; err = FALSE; U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), ab, err); @@ -232,21 +236,23 @@ hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED, *b = 0; ret = *a != ab; } else if (len == 2) { + len =0; + U16_NEXT_UNSAFE (normalized, len, *a); + U16_NEXT_UNSAFE (normalized, len, *b); + /* Here's the ugly part: if ab decomposes to a single character and * that character decomposes again, we have to detect that and undo * the second part :-(. */ UChar recomposed[20]; icu_err = U_ZERO_ERROR; - len = unorm_normalize (normalized, len, UNORM_NFC, 0, recomposed, ARRAY_LENGTH (recomposed), &icu_err); + unorm_normalize (normalized, len, UNORM_NFC, 0, recomposed, ARRAY_LENGTH (recomposed), &icu_err); if (icu_err) return FALSE; - U16_GET_UNSAFE (recomposed, 0, *a); - if (*a != ab) { + hb_codepoint_t c; + U16_GET_UNSAFE (recomposed, 0, c); + if (c != *a && c != ab) { + *a = c; *b = 0; - } else { - len =0; - U16_NEXT_UNSAFE (normalized, len, *a); - U16_GET_UNSAFE (normalized, len, *b); } ret = TRUE; } else { diff --git a/test/test-unicode.c b/test/test-unicode.c index c614c7d..9f526d7 100644 --- a/test/test-unicode.c +++ b/test/test-unicode.c @@ -800,6 +800,10 @@ test_unicode_normalization (gconstpointer user_data) g_assert (!hb_unicode_compose (uf, 0x2126, 0, &ab) && ab == 0); g_assert (!hb_unicode_compose (uf, 0x03A9, 0, &ab) && ab == 0); + /* Non-starter pairs should not compose */ + g_assert (!hb_unicode_compose (uf, 0x0308, 0x0301, &ab) && ab == 0); /* !0x0344 */ + g_assert (!hb_unicode_compose (uf, 0x0F71, 0x0F72, &ab) && ab == 0); /* !0x0F73 */ + /* Pairs */ g_assert (hb_unicode_compose (uf, 0x0041, 0x030A, &ab) && ab == 0x00C5); g_assert (hb_unicode_compose (uf, 0x006F, 0x0302, &ab) && ab == 0x00F4); @@ -822,12 +826,13 @@ test_unicode_normalization (gconstpointer user_data) g_assert (!hb_unicode_decompose (uf, 0xFB01, &a, &b) && a == 0xFB01 && b == 0); /* Singletons */ - g_assert (hb_unicode_decompose (uf, 0x212B, &a, &b)); - g_assert_cmphex (a, ==, 0x00C5); - g_assert_cmphex (b, ==, 0); g_assert (hb_unicode_decompose (uf, 0x212B, &a, &b) && a == 0x00C5 && b == 0); g_assert (hb_unicode_decompose (uf, 0x2126, &a, &b) && a == 0x03A9 && b == 0); + /* Non-starter pairs decompose, but not compose */ + g_assert (hb_unicode_decompose (uf, 0x0344, &a, &b) && a == 0x0308 && b == 0x0301); + g_assert (hb_unicode_decompose (uf, 0x0F73, &a, &b) && a == 0x0F71 && b == 0x0F72); + /* Pairs */ g_assert (hb_unicode_decompose (uf, 0x00C5, &a, &b) && a == 0x0041 && b == 0x030A); g_assert (hb_unicode_decompose (uf, 0x00F4, &a, &b) && a == 0x006F && b == 0x0302); -- 2.7.4