[API] Add compose() and decompose() unicode funcs, rename other ones
authorBehdad Esfahbod <behdad@behdad.org>
Fri, 8 Jul 2011 03:47:19 +0000 (23:47 -0400)
committerBehdad Esfahbod <behdad@behdad.org>
Fri, 8 Jul 2011 04:09:31 +0000 (00:09 -0400)
Add compose() and decompose() unicode funcs.  These implement
pair-wise canonical composition/decomposition.

The glib/icu implementations are lacking for now.  We are adding
API for this to glib, but I cannot find any useful API in ICU.
May end of implementing these in-house.

Changed all unicode_funcs callback names to remove the "_get" part.
Eg, hb_unicode_get_script_func_t is now hb_unicode_script_func_t,
and hb_unicode_get_script() is hb_unicode_script() now.

TODO
src/hb-glib.cc
src/hb-icu.cc
src/hb-ot-shape.cc
src/hb-shape.cc
src/hb-unicode-private.hh
src/hb-unicode.cc
src/hb-unicode.h
test/test-unicode.c

diff --git a/TODO b/TODO
index c5fe068..e15e295 100644 (file)
--- a/TODO
+++ b/TODO
@@ -68,7 +68,9 @@ Tests to write:
 
 - ot-layout enumeration API (needs font)
 
-- Finish test-shape.c
+- Finish test-shape.c, grep for TODO
+
+- Finish test-unicode.c, grep for TODO
 
 
 Optimizations:
index 109b9ba..de90972 100644 (file)
@@ -231,7 +231,11 @@ hb_unicode_funcs_t _hb_glib_unicode_funcs = {
     hb_glib_get_eastasian_width,
     hb_glib_get_general_category,
     hb_glib_get_mirroring,
-    hb_glib_get_script
+    hb_glib_get_script,
+    /* TODO
+    hb_glib_compose,
+    hb_glib_decompose,
+    */
   }
 };
 
index 1e0134c..4797cc5 100644 (file)
@@ -174,7 +174,11 @@ hb_unicode_funcs_t _hb_icu_unicode_funcs = {
     hb_icu_get_eastasian_width,
     hb_icu_get_general_category,
     hb_icu_get_mirroring,
-    hb_icu_get_script
+    hb_icu_get_script,
+    /* TODO
+    hb_icu_compose,
+    hb_icu_decompose,
+    */
   }
 };
 
index 248f2bd..8378e81 100644 (file)
@@ -197,8 +197,8 @@ hb_set_unicode_props (hb_ot_shape_context_t *c)
 
   unsigned int count = c->buffer->len;
   for (unsigned int i = 1; i < count; i++) {
-    info[i].general_category() = hb_unicode_get_general_category (unicode, info[i].codepoint);
-    info[i].combining_class() = hb_unicode_get_combining_class (unicode, info[i].codepoint);
+    info[i].general_category() = hb_unicode_general_category (unicode, info[i].codepoint);
+    info[i].combining_class() = hb_unicode_combining_class (unicode, info[i].codepoint);
   }
 }
 
@@ -252,7 +252,7 @@ hb_mirror_chars (hb_ot_shape_context_t *c)
 
   unsigned int count = c->buffer->len;
   for (unsigned int i = 0; i < count; i++) {
-    hb_codepoint_t codepoint = hb_unicode_get_mirroring (unicode, c->buffer->info[i].codepoint);
+    hb_codepoint_t codepoint = hb_unicode_mirroring (unicode, c->buffer->info[i].codepoint);
     if (likely (codepoint == c->buffer->info[i].codepoint))
       c->buffer->info[i].mask |= rtlm_mask; /* XXX this should be moved to before setting user-feature masks */
     else
index 5db6cfe..1ff830a 100644 (file)
@@ -63,7 +63,7 @@ hb_shape (hb_font_t          *font,
     hb_unicode_funcs_t *unicode = buffer->unicode;
     unsigned int count = buffer->len;
     for (unsigned int i = 0; i < count; i++) {
-      hb_script_t script = hb_unicode_get_script (unicode, buffer->info[i].codepoint);
+      hb_script_t script = hb_unicode_script (unicode, buffer->info[i].codepoint);
       if (likely (script != HB_SCRIPT_COMMON &&
                  script != HB_SCRIPT_INHERITED &&
                  script != HB_SCRIPT_UNKNOWN)) {
index 55b94a7..2b0ec99 100644 (file)
@@ -49,6 +49,8 @@ HB_BEGIN_DECLS
   HB_UNICODE_FUNC_IMPLEMENT (general_category) \
   HB_UNICODE_FUNC_IMPLEMENT (mirroring) \
   HB_UNICODE_FUNC_IMPLEMENT (script) \
+  HB_UNICODE_FUNC_IMPLEMENT (compose) \
+  HB_UNICODE_FUNC_IMPLEMENT (decompose) \
   /* ^--- Add new callbacks here */
 
 /* Simple callbacks are those taking a hb_codepoint_t and returning a hb_codepoint_t */
@@ -67,13 +69,13 @@ struct _hb_unicode_funcs_t {
 
   bool immutable;
 
-  /* Don't access these directly.  Call hb_unicode_get_*() instead. */
+  /* Don't access these directly.  Call hb_unicode_*() instead. */
 
   struct {
-#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_get_##name##_func_t name;
+#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_func_t name;
     HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
 #undef HB_UNICODE_FUNC_IMPLEMENT
-  } get;
+  } func;
 
   struct {
 #define HB_UNICODE_FUNC_IMPLEMENT(name) void *name;
index ba0004b..e2043c2 100644 (file)
@@ -40,45 +40,67 @@ HB_BEGIN_DECLS
  */
 
 static unsigned int
-hb_unicode_get_combining_class_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
-                                   hb_codepoint_t      unicode   HB_UNUSED,
-                                   void               *user_data HB_UNUSED)
+hb_unicode_combining_class_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
+                               hb_codepoint_t      unicode   HB_UNUSED,
+                               void               *user_data HB_UNUSED)
 {
   return 0;
 }
 
 static unsigned int
-hb_unicode_get_eastasian_width_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
-                                   hb_codepoint_t      unicode   HB_UNUSED,
-                                   void               *user_data HB_UNUSED)
+hb_unicode_eastasian_width_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
+                               hb_codepoint_t      unicode   HB_UNUSED,
+                               void               *user_data HB_UNUSED)
 {
   return 1;
 }
 
 static hb_unicode_general_category_t
-hb_unicode_get_general_category_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
-                                    hb_codepoint_t      unicode   HB_UNUSED,
-                                    void               *user_data HB_UNUSED)
+hb_unicode_general_category_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
+                                hb_codepoint_t      unicode   HB_UNUSED,
+                                void               *user_data HB_UNUSED)
 {
   return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
 }
 
 static hb_codepoint_t
-hb_unicode_get_mirroring_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
-                             hb_codepoint_t      unicode   HB_UNUSED,
-                             void               *user_data HB_UNUSED)
+hb_unicode_mirroring_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
+                         hb_codepoint_t      unicode   HB_UNUSED,
+                         void               *user_data HB_UNUSED)
 {
   return unicode;
 }
 
 static hb_script_t
-hb_unicode_get_script_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
-                          hb_codepoint_t      unicode   HB_UNUSED,
-                          void               *user_data HB_UNUSED)
+hb_unicode_script_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
+                      hb_codepoint_t      unicode   HB_UNUSED,
+                      void               *user_data HB_UNUSED)
 {
   return HB_SCRIPT_UNKNOWN;
 }
 
+static hb_bool_t
+hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs,
+                       hb_codepoint_t      a         HB_UNUSED,
+                       hb_codepoint_t      b         HB_UNUSED,
+                       hb_codepoint_t     *ab        HB_UNUSED,
+                       void               *user_data HB_UNUSED)
+{
+  /* TODO handle Hangul jamo here? */
+  return FALSE;
+}
+
+static hb_bool_t
+hb_unicode_decompose_nil (hb_unicode_funcs_t *ufuncs,
+                         hb_codepoint_t      ab        HB_UNUSED,
+                         hb_codepoint_t     *a         HB_UNUSED,
+                         hb_codepoint_t     *b         HB_UNUSED,
+                         void               *user_data HB_UNUSED)
+{
+  /* TODO handle Hangul jamo here? */
+  return FALSE;
+}
+
 
 hb_unicode_funcs_t _hb_unicode_funcs_nil = {
   HB_OBJECT_HEADER_STATIC,
@@ -86,7 +108,7 @@ hb_unicode_funcs_t _hb_unicode_funcs_nil = {
   NULL, /* parent */
   TRUE, /* immutable */
   {
-#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_get_##name##_nil,
+#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_nil,
     HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
 #undef HB_UNICODE_FUNC_IMPLEMENT
   }
@@ -113,7 +135,7 @@ hb_unicode_funcs_create (hb_unicode_funcs_t *parent)
   hb_unicode_funcs_make_immutable (parent);
   ufuncs->parent = hb_unicode_funcs_reference (parent);
 
-  ufuncs->get = parent->get;
+  ufuncs->func = parent->func;
 
   /* We can safely copy user_data from parent since we hold a reference
    * onto it and it's immutable.  We should not copy the destroy notifiers
@@ -193,7 +215,7 @@ hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs)
                                                                                \
 void                                                                           \
 hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t            *ufuncs,     \
-                                   hb_unicode_get_##name##_func_t  func,       \
+                                   hb_unicode_##name##_func_t      func,       \
                                    void                           *user_data,  \
                                    hb_destroy_func_t               destroy)    \
 {                                                                              \
@@ -204,11 +226,11 @@ hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t               *ufuncs,     \
     ufuncs->destroy.name (ufuncs->user_data.name);                             \
                                                                                \
   if (func) {                                                                  \
-    ufuncs->get.name = func;                                                   \
+    ufuncs->func.name = func;                                                  \
     ufuncs->user_data.name = user_data;                                                \
     ufuncs->destroy.name = destroy;                                            \
   } else {                                                                     \
-    ufuncs->get.name = ufuncs->parent->get.name;                               \
+    ufuncs->func.name = ufuncs->parent->func.name;                             \
     ufuncs->user_data.name = ufuncs->parent->user_data.name;                   \
     ufuncs->destroy.name = NULL;                                               \
   }                                                                            \
@@ -221,13 +243,30 @@ hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t               *ufuncs,     \
 #define HB_UNICODE_FUNC_IMPLEMENT(return_type, name)                           \
                                                                                \
 return_type                                                                    \
-hb_unicode_get_##name (hb_unicode_funcs_t *ufuncs,                             \
-                      hb_codepoint_t      unicode)                             \
+hb_unicode_##name (hb_unicode_funcs_t *ufuncs,                                 \
+                  hb_codepoint_t      unicode)                                 \
 {                                                                              \
-  return ufuncs->get.name (ufuncs, unicode, ufuncs->user_data.name);           \
+  return ufuncs->func.name (ufuncs, unicode, ufuncs->user_data.name);          \
 }
     HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
 #undef HB_UNICODE_FUNC_IMPLEMENT
 
+hb_bool_t
+hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
+                   hb_codepoint_t      a,
+                   hb_codepoint_t      b,
+                   hb_codepoint_t     *ab)
+{
+  return ufuncs->func.compose (ufuncs, a, b, ab, ufuncs->user_data.compose);
+}
+
+hb_bool_t
+hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
+                     hb_codepoint_t      ab,
+                     hb_codepoint_t     *a,
+                     hb_codepoint_t     *b)
+{
+  return ufuncs->func.decompose (ufuncs, ab, a, b, ufuncs->user_data.decompose);
+}
 
 HB_END_DECLS
index e7a2005..9aa97a6 100644 (file)
@@ -90,73 +90,103 @@ hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs);
 
 /* typedefs */
 
-typedef unsigned int                  (*hb_unicode_get_combining_class_func_t)  (hb_unicode_funcs_t *ufuncs,
+typedef unsigned int                   (*hb_unicode_combining_class_func_t)    (hb_unicode_funcs_t *ufuncs,
                                                                                 hb_codepoint_t      unicode,
                                                                                 void               *user_data);
-typedef unsigned int                  (*hb_unicode_get_eastasian_width_func_t)  (hb_unicode_funcs_t *ufuncs,
+typedef unsigned int                   (*hb_unicode_eastasian_width_func_t)    (hb_unicode_funcs_t *ufuncs,
                                                                                 hb_codepoint_t      unicode,
                                                                                 void               *user_data);
-typedef hb_unicode_general_category_t (*hb_unicode_get_general_category_func_t) (hb_unicode_funcs_t *ufuncs,
+typedef hb_unicode_general_category_t  (*hb_unicode_general_category_func_t)   (hb_unicode_funcs_t *ufuncs,
                                                                                 hb_codepoint_t      unicode,
                                                                                 void               *user_data);
-typedef hb_codepoint_t                (*hb_unicode_get_mirroring_func_t)        (hb_unicode_funcs_t *ufuncs,
+typedef hb_codepoint_t                 (*hb_unicode_mirroring_func_t)          (hb_unicode_funcs_t *ufuncs,
                                                                                 hb_codepoint_t      unicode,
                                                                                 void               *user_data);
-typedef hb_script_t                   (*hb_unicode_get_script_func_t)           (hb_unicode_funcs_t *ufuncs,
+typedef hb_script_t                    (*hb_unicode_script_func_t)             (hb_unicode_funcs_t *ufuncs,
                                                                                 hb_codepoint_t      unicode,
                                                                                 void               *user_data);
 
+typedef hb_bool_t                      (*hb_unicode_compose_func_t)            (hb_unicode_funcs_t *ufuncs,
+                                                                                hb_codepoint_t      a,
+                                                                                hb_codepoint_t      b,
+                                                                                hb_codepoint_t     *ab,
+                                                                                void               *user_data);
+typedef hb_bool_t                      (*hb_unicode_decompose_func_t)          (hb_unicode_funcs_t *ufuncs,
+                                                                                hb_codepoint_t      ab,
+                                                                                hb_codepoint_t     *a,
+                                                                                hb_codepoint_t     *b,
+                                                                                void               *user_data);
+
 /* setters */
 
 void
 hb_unicode_funcs_set_combining_class_func (hb_unicode_funcs_t *ufuncs,
-                                          hb_unicode_get_combining_class_func_t combining_class_func,
+                                          hb_unicode_combining_class_func_t combining_class_func,
                                           void *user_data, hb_destroy_func_t destroy);
 
 void
 hb_unicode_funcs_set_eastasian_width_func (hb_unicode_funcs_t *ufuncs,
-                                          hb_unicode_get_eastasian_width_func_t eastasian_width_func,
+                                          hb_unicode_eastasian_width_func_t eastasian_width_func,
                                           void *user_data, hb_destroy_func_t destroy);
 
 void
 hb_unicode_funcs_set_general_category_func (hb_unicode_funcs_t *ufuncs,
-                                           hb_unicode_get_general_category_func_t general_category_func,
+                                           hb_unicode_general_category_func_t general_category_func,
                                            void *user_data, hb_destroy_func_t destroy);
 
 void
 hb_unicode_funcs_set_mirroring_func (hb_unicode_funcs_t *ufuncs,
-                                    hb_unicode_get_mirroring_func_t mirroring_func,
+                                    hb_unicode_mirroring_func_t mirroring_func,
                                     void *user_data, hb_destroy_func_t destroy);
 
 void
 hb_unicode_funcs_set_script_func (hb_unicode_funcs_t *ufuncs,
-                                 hb_unicode_get_script_func_t script_func,
+                                 hb_unicode_script_func_t script_func,
                                  void *user_data, hb_destroy_func_t destroy);
 
+void
+hb_unicode_funcs_set_compose_func (hb_unicode_funcs_t *ufuncs,
+                                  hb_unicode_compose_func_t compose_func,
+                                  void *user_data, hb_destroy_func_t destroy);
+
+void
+hb_unicode_funcs_set_decompose_func (hb_unicode_funcs_t *ufuncs,
+                                    hb_unicode_decompose_func_t decompose_func,
+                                    void *user_data, hb_destroy_func_t destroy);
 
 
 /* accessors */
 
 unsigned int
-hb_unicode_get_combining_class (hb_unicode_funcs_t *ufuncs,
-                               hb_codepoint_t unicode);
+hb_unicode_combining_class (hb_unicode_funcs_t *ufuncs,
+                           hb_codepoint_t unicode);
 
 unsigned int
-hb_unicode_get_eastasian_width (hb_unicode_funcs_t *ufuncs,
-                               hb_codepoint_t unicode);
+hb_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs,
+                           hb_codepoint_t unicode);
 
 hb_unicode_general_category_t
-hb_unicode_get_general_category (hb_unicode_funcs_t *ufuncs,
-                                hb_codepoint_t unicode);
+hb_unicode_general_category (hb_unicode_funcs_t *ufuncs,
+                            hb_codepoint_t unicode);
 
 hb_codepoint_t
-hb_unicode_get_mirroring (hb_unicode_funcs_t *ufuncs,
-                         hb_codepoint_t unicode);
+hb_unicode_mirroring (hb_unicode_funcs_t *ufuncs,
+                     hb_codepoint_t unicode);
 
 hb_script_t
-hb_unicode_get_script (hb_unicode_funcs_t *ufuncs,
-                      hb_codepoint_t unicode);
+hb_unicode_script (hb_unicode_funcs_t *ufuncs,
+                  hb_codepoint_t unicode);
 
+hb_bool_t
+hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
+                   hb_codepoint_t      a,
+                   hb_codepoint_t      b,
+                   hb_codepoint_t     *ab);
+hb_bool_t
+hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
+                     hb_codepoint_t      ab,
+                     hb_codepoint_t     *a,
+                     hb_codepoint_t     *b);
 
 HB_END_DECLS
 
index c84ba86..a691cb4 100644 (file)
@@ -93,7 +93,7 @@ a_is_for_arabic_get_script (hb_unicode_funcs_t *ufuncs,
   } else {
     hb_unicode_funcs_t *parent = hb_unicode_funcs_get_parent (ufuncs);
 
-    return hb_unicode_get_script (parent, codepoint);
+    return hb_unicode_script (parent, codepoint);
   }
 }
 
@@ -457,7 +457,7 @@ typedef struct {
   { \
     #name, \
     (func_setter_func_t) hb_unicode_funcs_set_##name##_func, \
-    (getter_func_t) hb_unicode_get_##name, \
+    (getter_func_t) hb_unicode_##name, \
     name##_tests, \
     G_N_ELEMENTS (name##_tests), \
     name##_tests_more, \
@@ -667,8 +667,8 @@ test_unicode_subclassing_nil (data_fixture_t *f, gconstpointer user_data)
   hb_unicode_funcs_set_script_func (aa, a_is_for_arabic_get_script,
                                     &f->data[1], free_up);
 
-  g_assert_cmphex (hb_unicode_get_script (aa, 'a'), ==, HB_SCRIPT_ARABIC);
-  g_assert_cmphex (hb_unicode_get_script (aa, 'b'), ==, HB_SCRIPT_UNKNOWN);
+  g_assert_cmphex (hb_unicode_script (aa, 'a'), ==, HB_SCRIPT_ARABIC);
+  g_assert_cmphex (hb_unicode_script (aa, 'b'), ==, HB_SCRIPT_UNKNOWN);
 
   g_assert (!f->data[0].freed && !f->data[1].freed);
   hb_unicode_funcs_destroy (aa);
@@ -686,8 +686,8 @@ test_unicode_subclassing_default (data_fixture_t *f, gconstpointer user_data)
   hb_unicode_funcs_set_script_func (aa, a_is_for_arabic_get_script,
                                     &f->data[1], free_up);
 
-  g_assert_cmphex (hb_unicode_get_script (aa, 'a'), ==, HB_SCRIPT_ARABIC);
-  g_assert_cmphex (hb_unicode_get_script (aa, 'b'), ==, HB_SCRIPT_LATIN);
+  g_assert_cmphex (hb_unicode_script (aa, 'a'), ==, HB_SCRIPT_ARABIC);
+  g_assert_cmphex (hb_unicode_script (aa, 'b'), ==, HB_SCRIPT_LATIN);
 
   g_assert (!f->data[0].freed && !f->data[1].freed);
   hb_unicode_funcs_destroy (aa);
@@ -714,9 +714,9 @@ test_unicode_subclassing_deep (data_fixture_t *f, gconstpointer user_data)
   hb_unicode_funcs_set_script_func (aa, a_is_for_arabic_get_script,
                                     &f->data[1], free_up);
 
-  g_assert_cmphex (hb_unicode_get_script (aa, 'a'), ==, HB_SCRIPT_ARABIC);
-  g_assert_cmphex (hb_unicode_get_script (aa, 'b'), ==, HB_SCRIPT_LATIN);
-  g_assert_cmphex (hb_unicode_get_script (aa, '0'), ==, HB_SCRIPT_UNKNOWN);
+  g_assert_cmphex (hb_unicode_script (aa, 'a'), ==, HB_SCRIPT_ARABIC);
+  g_assert_cmphex (hb_unicode_script (aa, 'b'), ==, HB_SCRIPT_LATIN);
+  g_assert_cmphex (hb_unicode_script (aa, '0'), ==, HB_SCRIPT_UNKNOWN);
 
   g_assert (!f->data[0].freed && !f->data[1].freed);
   hb_unicode_funcs_destroy (aa);
@@ -779,6 +779,9 @@ test_unicode_script_roundtrip (gconstpointer user_data)
 }
 
 
+/* TODO test compose() and decompose() */
+
+
 int
 main (int argc, char **argv)
 {