[arabic] Implement Unicode Arabic Mark Ordering Algorithm UTR#53
authorBehdad Esfahbod <behdad@behdad.org>
Wed, 4 Oct 2017 12:47:10 +0000 (14:47 +0200)
committerBehdad Esfahbod <behdad@behdad.org>
Wed, 4 Oct 2017 12:47:10 +0000 (14:47 +0200)
Fixes https://github.com/behdad/harfbuzz/issues/509

16 files changed:
src/hb-ot-shape-complex-arabic.cc
src/hb-ot-shape-complex-default.cc
src/hb-ot-shape-complex-hangul.cc
src/hb-ot-shape-complex-hebrew.cc
src/hb-ot-shape-complex-indic.cc
src/hb-ot-shape-complex-myanmar.cc
src/hb-ot-shape-complex-private.hh
src/hb-ot-shape-complex-thai.cc
src/hb-ot-shape-complex-tibetan.cc
src/hb-ot-shape-complex-use.cc
src/hb-ot-shape-normalize.cc
test/shaping/Makefile.am
test/shaping/fonts/sha1sum/24b8d24d00ae86f49791b746da4c9d3f717a51a8.ttf [new file with mode: 0644]
test/shaping/fonts/sha1sum/94a5d6fb15a27521fba9ea4aee9cb39b2d03322a.ttf [new file with mode: 0644]
test/shaping/record-test.sh
test/shaping/tests/arabic-mark-order.tests [new file with mode: 0644]

index ed7b3f2..28dd4e1 100644 (file)
@@ -613,6 +613,80 @@ postprocess_glyphs_arabic (const hb_ot_shape_plan_t *plan,
   HB_BUFFER_DEALLOCATE_VAR (buffer, arabic_shaping_action);
 }
 
+/* http://www.unicode.org/reports/tr53/tr53-1.pdf */
+
+static hb_codepoint_t
+modifier_combining_marks[] =
+{
+  0x0654u, /* ARABIC HAMZA ABOVE */
+  0x0655u, /* ARABIC HAMZA BELOW */
+  0x0658u, /* ARABIC MARK NOON GHUNNA */
+  0x06DCu, /* ARABIC SMALL HIGH SEEN */
+  0x06E3u, /* ARABIC SMALL LOW SEEN */
+  0x06E7u, /* ARABIC SMALL HIGH YEH */
+  0x06E8u, /* ARABIC SMALL HIGH NOON */
+  0x08F3u, /* ARABIC SMALL HIGH WAW */
+};
+
+static inline bool
+info_is_mcm (const hb_glyph_info_t &info)
+{
+  hb_codepoint_t u = info.codepoint;
+  for (unsigned int i = 0; i < ARRAY_LENGTH (modifier_combining_marks); i++)
+    if (u == modifier_combining_marks[i])
+      return true;
+  return false;
+}
+
+static void
+reorder_marks_arabic (const hb_ot_shape_plan_t *plan,
+                     hb_buffer_t              *buffer,
+                     unsigned int              start,
+                     unsigned int              end)
+{
+  hb_glyph_info_t *info = buffer->info;
+
+  unsigned int i = start;
+  for (unsigned int cc = 220; cc <= 230; cc += 10)
+  {
+    DEBUG_MSG (ARABIC, buffer, "Looking for %d's starting at %d\n", cc, i);
+    while (i < end && info_cc(info[i]) < cc)
+      i++;
+    DEBUG_MSG (ARABIC, buffer, "Looking for %d's stopped at %d\n", cc, i);
+
+    if (i == end)
+      break;
+
+    if (info_cc(info[i]) > cc)
+      continue;
+
+    /* Technically we should also check "info_cc(info[j]) == cc"
+     * in the following loop.  But not doing it is safe; we might
+     * end up moving all the 220 MCMs and 230 MCMs together in one
+     * move and be done. */
+    unsigned int j = i;
+    while (j < end && info_is_mcm (info[j]))
+      j++;
+    DEBUG_MSG (ARABIC, buffer, "Found %d's from %d to %d\n", cc, i, j);
+
+    if (i == j)
+      continue;
+
+    /* Shift it! */
+    DEBUG_MSG (ARABIC, buffer, "Shifting %d's: %d %d\n", cc, i, j);
+    hb_glyph_info_t temp[HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS];
+    assert (j - i <= ARRAY_LENGTH (temp));
+    buffer->merge_out_clusters (start, j);
+    memmove (temp, &info[i], (j - i) * sizeof (hb_glyph_info_t));
+    memmove (&info[start + j - i], &info[start], (i - start) * sizeof (hb_glyph_info_t));
+    memmove (&info[start], temp, (j - i) * sizeof (hb_glyph_info_t));
+
+    start += j - i;
+
+    i = j;
+  }
+}
+
 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_arabic =
 {
   "arabic",
@@ -627,6 +701,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_arabic =
   NULL, /* compose */
   setup_masks_arabic,
   NULL, /* disable_otl */
+  reorder_marks_arabic,
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
   true, /* fallback_position */
 };
index 42830ab..857980c 100644 (file)
@@ -41,6 +41,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_default =
   NULL, /* compose */
   NULL, /* setup_masks */
   NULL, /* disable_otl */
+  NULL, /* reorder_marks */
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
   true, /* fallback_position */
 };
index 0e74802..63850d3 100644 (file)
@@ -426,6 +426,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hangul =
   NULL, /* compose */
   setup_masks_hangul,
   NULL, /* disable_otl */
+  NULL, /* reorder_marks */
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
   false, /* fallback_position */
 };
index 96f2494..b8ddadc 100644 (file)
@@ -181,6 +181,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hebrew =
   compose_hebrew,
   NULL, /* setup_masks */
   disable_otl_hebrew,
+  NULL, /* reorder_marks */
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
   true, /* fallback_position */
 };
index 00130e6..31dc1c0 100644 (file)
@@ -1840,6 +1840,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic =
   compose_indic,
   setup_masks_indic,
   NULL, /* disable_otl */
+  NULL, /* reorder_marks */
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
   false, /* fallback_position */
 };
index 676d494..4081ed0 100644 (file)
@@ -524,6 +524,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar_old =
   NULL, /* compose */
   NULL, /* setup_masks */
   NULL, /* disable_otl */
+  NULL, /* reorder_marks */
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
   true, /* fallback_position */
 };
@@ -542,6 +543,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar =
   NULL, /* compose */
   setup_masks_myanmar,
   NULL, /* disable_otl */
+  NULL, /* reorder_marks */
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
   false, /* fallback_position */
 };
index 8fadd7c..9792067 100644 (file)
@@ -39,6 +39,8 @@
 #define complex_var_u8_1()     var2.u8[3]
 
 
+#define HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS 32
+
 enum hb_ot_shape_zero_width_marks_type_t {
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
@@ -154,6 +156,16 @@ struct hb_ot_complex_shaper_t
    */
   bool (*disable_otl) (const hb_ot_shape_plan_t *plan);
 
+  /* reorder_marks()
+   * Called during shape().
+   * Shapers can use to modify ordering of combining marks.
+   * May be NULL.
+   */
+  void (*reorder_marks) (const hb_ot_shape_plan_t *plan,
+                        hb_buffer_t              *buffer,
+                        unsigned int              start,
+                        unsigned int              end);
+
   hb_ot_shape_zero_width_marks_type_t zero_width_marks;
 
   bool fallback_position;
index 924247f..651c47f 100644 (file)
@@ -378,6 +378,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_thai =
   NULL, /* compose */
   NULL, /* setup_masks */
   NULL, /* disable_otl */
+  NULL, /* reorder_marks */
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
   false,/* fallback_position */
 };
index aadf59f..a85ac0f 100644 (file)
@@ -58,6 +58,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_tibetan =
   NULL, /* compose */
   NULL, /* setup_masks */
   NULL, /* disable_otl */
+  NULL, /* reorder_marks */
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
   true, /* fallback_position */
 };
index a5ab0ab..ac3a248 100644 (file)
@@ -607,6 +607,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_use =
   compose_use,
   setup_masks_use,
   NULL, /* disable_otl */
+  NULL, /* reorder_marks */
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
   false, /* fallback_position */
 };
index a514488..fd9e7c2 100644 (file)
@@ -345,14 +345,18 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan,
       if (_hb_glyph_info_get_modified_combining_class (&buffer->info[end]) == 0)
         break;
 
-    /* We are going to do a O(n^2).  Only do this if the sequence is short. */
-    if (end - i > 10) {
+    /* We are going to do a O(n^2).  Only do this if the sequence is short,
+     * but not too short ;). */
+    if (end - i < 2 || end - i > HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS) {
       i = end;
       continue;
     }
 
     buffer->sort (i, end, compare_combining_class);
 
+    if (plan->shaper->reorder_marks)
+      plan->shaper->reorder_marks (plan, buffer, i, end);
+
     i = end;
   }
 
index d29800c..ecb836d 100644 (file)
@@ -44,6 +44,7 @@ TESTS = \
        tests/arabic-fallback-shaping.tests \
        tests/arabic-feature-order.tests \
        tests/arabic-like-joining.tests \
+       tests/arabic-mark-order.tests \
        tests/automatic-fractions.tests \
        tests/cluster.tests \
        tests/color-fonts.tests \
diff --git a/test/shaping/fonts/sha1sum/24b8d24d00ae86f49791b746da4c9d3f717a51a8.ttf b/test/shaping/fonts/sha1sum/24b8d24d00ae86f49791b746da4c9d3f717a51a8.ttf
new file mode 100644 (file)
index 0000000..dc290ad
Binary files /dev/null and b/test/shaping/fonts/sha1sum/24b8d24d00ae86f49791b746da4c9d3f717a51a8.ttf differ
diff --git a/test/shaping/fonts/sha1sum/94a5d6fb15a27521fba9ea4aee9cb39b2d03322a.ttf b/test/shaping/fonts/sha1sum/94a5d6fb15a27521fba9ea4aee9cb39b2d03322a.ttf
new file mode 100644 (file)
index 0000000..baf544f
Binary files /dev/null and b/test/shaping/fonts/sha1sum/94a5d6fb15a27521fba9ea4aee9cb39b2d03322a.ttf differ
index 886d946..0259c76 100755 (executable)
@@ -43,7 +43,7 @@ if test $? != 0; then
 fi
 
 cp "$fontfile" "$dir/font.ttf"
-pyftsubset \
+fonttools subset \
        --glyph-names \
        --no-hinting \
        --layout-features='*' \
diff --git a/test/shaping/tests/arabic-mark-order.tests b/test/shaping/tests/arabic-mark-order.tests
new file mode 100644 (file)
index 0000000..b48c82f
--- /dev/null
@@ -0,0 +1,2 @@
+fonts/sha1sum/94a5d6fb15a27521fba9ea4aee9cb39b2d03322a.ttf::U+064A,U+064E,U+0670,U+0653,U+0640,U+0654,U+064E,U+0627:[afii57415.zz04=7+481|afii57454=4@25,975+0|uni0654=4@-50,50+0|afii57440=4+650|uni0670_uni0653=0@75,400+0|afii57454=0@750,1125+0|afii57450.calt=0+1331]
+fonts/sha1sum/24b8d24d00ae86f49791b746da4c9d3f717a51a8.ttf::U+0628,U+0618,U+0619,U+064E,U+064F,U+0654,U+0658,U+0653,U+0654,U+0651,U+0656,U+0651,U+065C,U+0655,U+0650:[uni0653.small=0@266,2508+0|uni0654=0@308,2151+0|uni0655=0@518,-1544+0|uni065C=0@501,-1453+0|uni0656=0@573,-659+0|uni0650=0@500,133+0|uni0619=0@300,1807+0|uni0618=0@357,1674+0|uni0651064E=0@387,1178+0|uni0651=0@402,764+0|uni0658=0@424,404+0|uni0654064F=0@540,-435+0|uni0628=0+1352]