[USE] Put a Ragel machine together
authorBehdad Esfahbod <behdad@behdad.org>
Mon, 20 Jul 2015 12:30:51 +0000 (13:30 +0100)
committerBehdad Esfahbod <behdad@behdad.org>
Mon, 20 Jul 2015 12:50:38 +0000 (13:50 +0100)
Grammar from the spec!

src/hb-ot-shape-complex-use-machine.rl
src/hb-ot-shape-complex-use-private.hh

index e69de29..0ea71ac 100644 (file)
@@ -0,0 +1,169 @@
+/*
+ * Copyright © 2015  Mozilla Foundation.
+ * Copyright © 2015  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Mozilla Author(s): Jonathan Kew
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH
+#define HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH
+
+#include "hb-private.hh"
+
+%%{
+  machine use_syllable_machine;
+  alphtype unsigned char;
+  write data;
+}%%
+
+%%{
+
+# Same order as enum use_category_t.  Not sure how to avoid duplication.
+
+O      = 0; # OTHER
+
+B      = 1; # BASE
+IV     = 2; # BASE_VOWEL
+IND    = 3; # BASE_IND
+N      = 4; # BASE_NUM
+GB     = 5; # BASE_OTHER
+CGJ    = 6; # CGJ
+#F     = 7; # CONS_FINAL
+FM     = 8; # CONS_FINAL_MOD
+#M     = 9; # CONS_MED
+#CM    = 10; # CONS_MOD
+SUB    = 11; # CONS_SUB
+H      = 12; # HALANT
+HN     = 13; # HALANT_NUM
+ZWNJ   = 14; # Zero width non-joiner
+ZWJ    = 15; # Zero width joiner
+WJ     = 16; # Word joiner
+Rsv    = 17; # Reserved characters
+R      = 18; # REPHA
+S      = 19; # SYM
+#SM    = 20; # SYM_MOD
+VS     = 21; # VARIATION_SELECTOR
+#V     = 22; # VOWEL
+#VM    = 23; # VOWEL_MOD
+
+FAbv   = 24; # CONS_FINAL_ABOVE
+FBlw   = 25; # CONS_FINAL_BELOW
+FPst   = 26; # CONS_FINAL_POST
+MAbv   = 27; # CONS_MED_ABOVE
+MBlw   = 28; # CONS_MED_BELOW
+MPst   = 29; # CONS_MED_POST
+MPre   = 30; # CONS_MED_PRE
+CMAbv  = 31; # CONS_MOD_ABOVE
+CMBlw  = 32; # CONS_MOD_BELOW
+VAbv   = 33; # VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST
+VBlw   = 34; # VOWEL_BELOW / VOWEL_BELOW_POST
+VPst   = 35; # VOWEL_POST      UIPC = Right
+VPre   = 36; # VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST
+VMAbv  = 37; # VOWEL_MOD_ABOVE
+VMBlw  = 38; # VOWEL_MOD_BELOW
+VMPst  = 39; # VOWEL_MOD_POST
+VMPre  = 40; # VOWEL_MOD_PRE
+SMAbv  = 41; # SYM_MOD_ABOVE
+SMBlw  = 42; # SYM_MOD_BELOW
+
+
+consonant_modifiers = CMAbv* CMBlw* ((H B | SUB) VS? CMAbv? CMBlw*)*;
+medial_consonants = MPre? MAbv? MBlw? MPst?;
+dependent_vowels = VPre* VAbv* VBlw* VPst*;
+vowel_modifiers = VMPre* VMAbv* VMBlw* VMPst*;
+final_consonants = FAbv* FBlw* FPst* FM?;
+
+virama_terminated_cluster =
+       R? (B | GB | IV) VS?
+       consonant_modifiers
+       H
+;
+consonant_cluster =
+       R? (B | GB) VS?
+       consonant_modifiers
+       medial_consonants
+       dependent_vowels
+       vowel_modifiers
+       final_consonants
+;
+vowel_cluster =
+       R? (IV) VS?
+       consonant_modifiers
+       medial_consonants
+       vowel_modifiers
+       final_consonants
+;
+
+number_joiner_terminated_cluster = N VS? (H N VS?)* H;
+numeral_cluster = N VS? (H N VS?)*;
+symbol_cluster = S VS? SMAbv* SMBlw*;
+independent_cluster = (IND | O | Rsv | WJ) VS?;
+
+main := |*
+       independent_cluster                     => { found_syllable (independent_cluster); };
+       virama_terminated_cluster               => { found_syllable (virama_terminated_cluster); };
+       consonant_cluster                       => { found_syllable (consonant_cluster); };
+       vowel_cluster                           => { found_syllable (vowel_cluster); };
+       number_joiner_terminated_cluster        => { found_syllable (number_joiner_terminated_cluster); };
+       numeral_cluster                         => { found_syllable (numeral_cluster); };
+       symbol_cluster                          => { found_syllable (symbol_cluster); };
+*|;
+
+
+}%%
+
+#define found_syllable(syllable_type) \
+  HB_STMT_START { \
+    if (0) fprintf (stderr, "syllable %d..%d %s\n", last, p+1, #syllable_type); \
+    for (unsigned int i = last; i < p+1; i++) \
+      info[i].syllable() = (syllable_serial << 4) | syllable_type; \
+    last = p+1; \
+    syllable_serial++; \
+    if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
+  } HB_STMT_END
+
+static void
+find_syllables (hb_buffer_t *buffer)
+{
+  unsigned int p, pe, eof, ts HB_UNUSED, te HB_UNUSED, act HB_UNUSED;
+  int cs;
+  hb_glyph_info_t *info = buffer->info;
+  %%{
+    write init;
+    getkey info[p].use_category();
+  }%%
+
+  p = 0;
+  pe = eof = buffer->len;
+
+  unsigned int last = 0;
+  unsigned int syllable_serial = 1;
+  %%{
+    write exec;
+  }%%
+}
+
+#undef found_syllable
+
+#endif /* HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH */
index e69de29..73ecc3b 100644 (file)
@@ -0,0 +1,160 @@
+/*
+ * Copyright © 2015  Mozilla Foundation.
+ * Copyright © 2015  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Mozilla Author(s): Jonathan Kew
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_COMPLEX_USE_PRIVATE_HH
+#define HB_OT_SHAPE_COMPLEX_USE_PRIVATE_HH
+
+#include "hb-private.hh"
+
+
+#include "hb-ot-shape-complex-private.hh"
+
+
+#define USE_TABLE_ELEMENT_TYPE uint16_t
+
+/* Cateories used in the Universal Shaping Engine spec:
+ * https://www.microsoft.com/typography/OpenTypeDev/USE/intro.htm
+ */
+/* Note: This enum is duplicated in the -machine.rl source file.
+ * Not sure how to avoid duplication. */
+enum use_category_t {
+  USE_O                = 0,    /* OTHER */
+
+  USE_B                = 1,    /* BASE */
+  USE_IV       = 2,    /* BASE_VOWEL */
+  USE_IND      = 3,    /* BASE_IND */
+  USE_N                = 4,    /* BASE_NUM */
+  USE_GB       = 5,    /* BASE_OTHER */
+  USE_CGJ      = 6,    /* CGJ */
+//  USE_F              = 7,    /* CONS_FINAL */
+  USE_FM       = 8,    /* CONS_FINAL_MOD */
+//  USE_M              = 9,    /* CONS_MED */
+//  USE_CM     = 10,   /* CONS_MOD */
+  USE_SUB      = 11,   /* CONS_SUB */
+  USE_H                = 12,   /* HALANT */
+  USE_HN       = 13,   /* HALANT_NUM */
+  USE_ZWNJ     = 14,   /* Zero width non-joiner */
+  USE_ZWJ      = 15,   /* Zero width joiner */
+  USE_WJ       = 16,   /* Word joiner */
+  USE_Rsv      = 17,   /* Reserved characters */
+  USE_R                = 18,   /* REPHA */
+  USE_S                = 19,   /* SYM */
+//  USE_SM     = 20,   /* SYM_MOD */
+  USE_VS       = 21,   /* VARIATION_SELECTOR */
+//  USE_V              = 22,   /* VOWEL */
+//  USE_VM     = 23,   /* VOWEL_MOD */
+
+  USE_FAbv     = 24,   /* CONS_FINAL_ABOVE */
+  USE_FBlw     = 25,   /* CONS_FINAL_BELOW */
+  USE_FPst     = 26,   /* CONS_FINAL_POST */
+  USE_MAbv     = 27,   /* CONS_MED_ABOVE */
+  USE_MBlw     = 28,   /* CONS_MED_BELOW */
+  USE_MPst     = 29,   /* CONS_MED_POST */
+  USE_MPre     = 30,   /* CONS_MED_PRE */
+  USE_CMAbv    = 31,   /* CONS_MOD_ABOVE */
+  USE_CMBlw    = 32,   /* CONS_MOD_BELOW */
+  USE_VAbv     = 33,   /* VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST */
+  USE_VBlw     = 34,   /* VOWEL_BELOW / VOWEL_BELOW_POST */
+  USE_VPst     = 35,   /* VOWEL_POST   UIPC = Right */
+  USE_VPre     = 36,   /* VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST */
+  USE_VMAbv    = 37,   /* VOWEL_MOD_ABOVE */
+  USE_VMBlw    = 38,   /* VOWEL_MOD_BELOW */
+  USE_VMPst    = 39,   /* VOWEL_MOD_POST */
+  USE_VMPre    = 40,   /* VOWEL_MOD_PRE */
+  USE_SMAbv    = 41,   /* SYM_MOD_ABOVE */
+  USE_SMBlw    = 42    /* SYM_MOD_BELOW */
+};
+
+/* Categories used in IndicSyllabicCategory.txt from UCD. */
+enum indic_syllabic_category_t
+{
+  INDIC_SYLLABIC_CATEGORY_AVAGRAHA,
+  INDIC_SYLLABIC_CATEGORY_BINDU,
+  INDIC_SYLLABIC_CATEGORY_BRAHMI_JOINING_NUMBER,
+  INDIC_SYLLABIC_CATEGORY_CANTILLATION_MARK,
+  INDIC_SYLLABIC_CATEGORY_CONSONANT,
+  INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD,
+  INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL,
+  INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER,
+  INDIC_SYLLABIC_CATEGORY_CONSONANT_KILLER,
+  INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL,
+  INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER,
+  INDIC_SYLLABIC_CATEGORY_CONSONANT_PRECEDING_REPHA,
+  INDIC_SYLLABIC_CATEGORY_CONSONANT_PREFIXED,
+  INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED,
+  INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA,
+  INDIC_SYLLABIC_CATEGORY_CONSONANT_WITH_STACKER,
+  INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK,
+  INDIC_SYLLABIC_CATEGORY_INVISIBLE_STACKER,
+  INDIC_SYLLABIC_CATEGORY_JOINER,
+  INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER,
+  INDIC_SYLLABIC_CATEGORY_NON_JOINER,
+  INDIC_SYLLABIC_CATEGORY_NUKTA,
+  INDIC_SYLLABIC_CATEGORY_NUMBER,
+  INDIC_SYLLABIC_CATEGORY_NUMBER_JOINER,
+  INDIC_SYLLABIC_CATEGORY_OTHER,
+  INDIC_SYLLABIC_CATEGORY_PURE_KILLER,
+  INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER,
+  INDIC_SYLLABIC_CATEGORY_SYLLABLE_MODIFIER,
+  INDIC_SYLLABIC_CATEGORY_TONE_LETTER,
+  INDIC_SYLLABIC_CATEGORY_TONE_MARK,
+  INDIC_SYLLABIC_CATEGORY_VIRAMA,
+  INDIC_SYLLABIC_CATEGORY_VISARGA,
+  INDIC_SYLLABIC_CATEGORY_VOWEL,
+  INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT,
+  INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT,
+};
+
+/* Categories used in IndicPositionalCategory.txt from UCD */
+enum indic_matra_category_t {
+  INDIC_POSITIONAL_CATEGORY_BOTTOM,
+  INDIC_POSITIONAL_CATEGORY_BOTTOM_AND_RIGHT,
+  INDIC_POSITIONAL_CATEGORY_LEFT,
+  INDIC_POSITIONAL_CATEGORY_LEFT_AND_RIGHT,
+  INDIC_POSITIONAL_CATEGORY_NOT_APPLICABLE,
+  INDIC_POSITIONAL_CATEGORY_OVERSTRUCK,
+  INDIC_POSITIONAL_CATEGORY_RIGHT,
+  INDIC_POSITIONAL_CATEGORY_TOP,
+  INDIC_POSITIONAL_CATEGORY_TOP_AND_BOTTOM,
+  INDIC_POSITIONAL_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT,
+  INDIC_POSITIONAL_CATEGORY_TOP_AND_LEFT,
+  INDIC_POSITIONAL_CATEGORY_TOP_AND_LEFT_AND_RIGHT,
+  INDIC_POSITIONAL_CATEGORY_TOP_AND_RIGHT,
+  INDIC_POSITIONAL_CATEGORY_VISUAL_ORDER_LEFT,
+};
+
+/* Note: We use ASSERT_STATIC_EXPR_ZERO() instead of ASSERT_STATIC_EXPR() and the comma operation
+ * because gcc fails to optimize the latter and fills the table in at runtime. */
+#define USE_COMBINE_CATEGORIES(S,P) \
+  (ASSERT_STATIC_EXPR_ZERO (S < 255 && P < 255) + \
+   ((P << 8) | S))
+
+HB_INTERNAL USE_TABLE_ELEMENT_TYPE
+hb_use_get_categories (hb_codepoint_t u);
+
+#endif /* HB_OT_SHAPE_COMPLEX_USE_PRIVATE_HH */