Implement more granular cluster-merging
authorBehdad Esfahbod <behdad@behdad.org>
Wed, 22 Jul 2015 15:51:12 +0000 (16:51 +0100)
committerBehdad Esfahbod <behdad@behdad.org>
Wed, 22 Jul 2015 15:51:12 +0000 (16:51 +0100)
TODO: Documentation.

Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=71445

NEWS
src/hb-buffer-private.hh
src/hb-buffer.cc
src/hb-buffer.h
src/hb-ot-shape-complex-hangul.cc
src/hb-ot-shape.cc
util/options.cc
util/options.hh

diff --git a/NEWS b/NEWS
index d985427..a5ac906 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,11 @@
+
+- Removed HB_NO_MERGE_CLUSTERS hack.
+- New API:
+  hb_buffer_cluster_level_t enum
+  hb_buffer_get_cluster_level()
+  hb_buffer_set_cluster_level()
+  hb-shape / hb-view --cluster-level
+
 Overview of changes leading to 0.9.41
 Thursday, June 18, 2015
 =====================================
index 32d5a5f..3a2db48 100644 (file)
@@ -50,6 +50,7 @@ struct hb_buffer_t {
   /* Information about how the text in the buffer should be treated */
   hb_unicode_funcs_t *unicode; /* Unicode functions */
   hb_buffer_flags_t flags; /* BOT / EOT / etc. */
+  hb_buffer_cluster_level_t cluster_level;
   hb_codepoint_t replacement; /* U+FFFD or something else. */
 
   /* Buffer contents */
index 0a11fec..03fe8e1 100644 (file)
@@ -507,9 +507,8 @@ void
 hb_buffer_t::merge_clusters_impl (unsigned int start,
                                  unsigned int end)
 {
-#ifdef HB_NO_MERGE_CLUSTERS
-  return;
-#endif
+  if (cluster_level == HB_BUFFER_CLUSTER_LEVEL_CHARACTERS)
+    return;
 
   unsigned int cluster = info[start].cluster;
 
@@ -536,9 +535,8 @@ void
 hb_buffer_t::merge_out_clusters (unsigned int start,
                                 unsigned int end)
 {
-#ifdef HB_NO_MERGE_CLUSTERS
-  return;
-#endif
+  if (cluster_level == HB_BUFFER_CLUSTER_LEVEL_CHARACTERS)
+    return;
 
   if (unlikely (end - start < 2))
     return;
@@ -738,6 +736,7 @@ hb_buffer_get_empty (void)
 
     const_cast<hb_unicode_funcs_t *> (&_hb_unicode_funcs_nil),
     HB_BUFFER_FLAG_DEFAULT,
+    HB_BUFFER_CLUSTER_LEVEL_DEFAULT,
     HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT,
 
     HB_BUFFER_CONTENT_TYPE_INVALID,
@@ -1083,6 +1082,41 @@ hb_buffer_get_flags (hb_buffer_t *buffer)
   return buffer->flags;
 }
 
+/**
+ * hb_buffer_set_cluster_level:
+ * @buffer: a buffer.
+ * @cluster_level: 
+ *
+ * 
+ *
+ * Since: 0.9.42
+ **/
+void
+hb_buffer_set_cluster_level (hb_buffer_t       *buffer,
+                    hb_buffer_cluster_level_t  cluster_level)
+{
+  if (unlikely (hb_object_is_inert (buffer)))
+    return;
+
+  buffer->cluster_level = cluster_level;
+}
+
+/**
+ * hb_buffer_get_cluster_level:
+ * @buffer: a buffer.
+ *
+ * 
+ *
+ * Return value: 
+ *
+ * Since: 0.9.42
+ **/
+hb_buffer_cluster_level_t
+hb_buffer_get_cluster_level (hb_buffer_t *buffer)
+{
+  return buffer->cluster_level;
+}
+
 
 /**
  * hb_buffer_set_replacement_codepoint:
index 520141b..4b285bb 100644 (file)
@@ -185,7 +185,19 @@ hb_buffer_set_flags (hb_buffer_t       *buffer,
 hb_buffer_flags_t
 hb_buffer_get_flags (hb_buffer_t *buffer);
 
+typedef enum {
+  HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES   = 0,
+  HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS  = 1,
+  HB_BUFFER_CLUSTER_LEVEL_CHARACTERS           = 2,
+  HB_BUFFER_CLUSTER_LEVEL_DEFAULT = HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES
+} hb_buffer_cluster_level_t;
+
+void
+hb_buffer_set_cluster_level (hb_buffer_t               *buffer,
+                            hb_buffer_cluster_level_t  cluster_level);
 
+hb_buffer_cluster_level_t
+hb_buffer_get_cluster_level (hb_buffer_t *buffer);
 
 #define HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT 0xFFFDu
 
index 6ac18b0..763dbf0 100644 (file)
@@ -209,13 +209,8 @@ preprocess_text_hangul (const hb_ot_shape_plan_t *plan,
          hb_glyph_info_t tone = info[end];
          memmove (&info[start + 1], &info[start], (end - start) * sizeof (hb_glyph_info_t));
          info[start] = tone;
+         buffer->merge_out_clusters (start, end + 1);
        }
-       /* Merge clusters across the (possibly reordered) syllable+tone.
-        * We want to merge even in the zero-width tone mark case here,
-        * so that clustering behavior isn't dependent on how the tone mark
-        * is handled by the font.
-        */
-       buffer->merge_out_clusters (start, end + 1);
       }
       else
       {
@@ -296,7 +291,8 @@ preprocess_text_hangul (const hb_ot_shape_plan_t *plan,
        }
        else
          end = start + 2;
-       buffer->merge_out_clusters (start, end);
+       if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES)
+         buffer->merge_out_clusters (start, end);
        continue;
       }
     }
@@ -368,7 +364,8 @@ preprocess_text_hangul (const hb_ot_shape_plan_t *plan,
          info[i++].hangul_shaping_feature() = VJMO;
          if (i < end)
            info[i++].hangul_shaping_feature() = TJMO;
-         buffer->merge_out_clusters (start, end);
+         if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES)
+           buffer->merge_out_clusters (start, end);
          continue;
        }
       }
index 5ead0b7..d290cad 100644 (file)
@@ -264,6 +264,9 @@ hb_insert_dotted_circle (hb_buffer_t *buffer, hb_font_t *font)
 static void
 hb_form_clusters (hb_buffer_t *buffer)
 {
+  if (buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES)
+    return;
+
   /* Loop duplicated in hb_ensure_native_direction(). */
   unsigned int base = 0;
   unsigned int count = buffer->len;
@@ -301,10 +304,14 @@ hb_ensure_native_direction (hb_buffer_t *buffer)
       if (likely (!HB_UNICODE_GENERAL_CATEGORY_IS_MARK (_hb_glyph_info_get_general_category (&info[i]))))
       {
        buffer->reverse_range (base, i);
+       if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS)
+         buffer->merge_clusters (base, i);
        base = i;
       }
     }
     buffer->reverse_range (base, count);
+    if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS)
+      buffer->merge_clusters (base, count);
 
     buffer->reverse ();
 
index 0f92aec..0821a17 100644 (file)
@@ -291,6 +291,7 @@ shape_options_t::add_options (option_parser_t *parser)
     {"eot",            0, 0, G_OPTION_ARG_NONE,        &this->eot,                     "Treat text as end-of-paragraph",       NULL},
     {"preserve-default-ignorables",0, 0, G_OPTION_ARG_NONE,    &this->preserve_default_ignorables,     "Preserve Default-Ignorable characters",        NULL},
     {"utf8-clusters",  0, 0, G_OPTION_ARG_NONE,        &this->utf8_clusters,           "Use UTF8 byte indices, not char indices",      NULL},
+    {"cluster-level",  0, 0, G_OPTION_ARG_INT,         &this->cluster_level,           "Cluster merging level (default: 0)",   "0/1/2"},
     {"normalize-glyphs",0, 0, G_OPTION_ARG_NONE,       &this->normalize_glyphs,        "Rearrange glyph clusters in nominal order",    NULL},
     {"num-iterations", 0, 0, G_OPTION_ARG_INT,         &this->num_iterations,          "Run shaper N times (default: 1)",      "N"},
     {NULL}
index 8b9b10e..6eb6c04 100644 (file)
@@ -180,6 +180,7 @@ struct shape_options_t : option_group_t
     num_features = 0;
     shapers = NULL;
     utf8_clusters = false;
+    cluster_level = HB_BUFFER_CLUSTER_LEVEL_DEFAULT;
     normalize_glyphs = false;
     num_iterations = 1;
 
@@ -202,6 +203,7 @@ struct shape_options_t : option_group_t
                         (bot ? HB_BUFFER_FLAG_BOT : 0) |
                         (eot ? HB_BUFFER_FLAG_EOT : 0) |
                         (preserve_default_ignorables ? HB_BUFFER_FLAG_PRESERVE_DEFAULT_IGNORABLES : 0)));
+    hb_buffer_set_cluster_level (buffer, cluster_level);
     hb_buffer_guess_segment_properties (buffer);
   }
 
@@ -265,6 +267,7 @@ struct shape_options_t : option_group_t
   unsigned int num_features;
   char **shapers;
   hb_bool_t utf8_clusters;
+  hb_buffer_cluster_level_t cluster_level;
   hb_bool_t normalize_glyphs;
   unsigned int num_iterations;
 };