Like Uniscribe does.
*/
#include "hb-ot-shape-complex-private.hh"
+#include "hb-ot-shape-private.hh"
for (unsigned int i = 0; i < count; i++)
{
- unsigned int this_type = get_joining_type (buffer->info[i].codepoint, (hb_unicode_general_category_t) buffer->info[i].general_category());
+ unsigned int this_type = get_joining_type (buffer->info[i].codepoint, _hb_glyph_info_get_general_category (&buffer->info[i]));
if (unlikely (this_type == JOINING_TYPE_T)) {
buffer->info[i].arabic_shaping_action() = NONE;
#include "hb-ot-shape-complex-indic-machine.hh"
static void
-remove_joiners (hb_buffer_t *buffer)
-{
- /* For now we remove joiners. However, Uniscbire seems to keep them
- * and output a zero-width space glyph for them. It is not clear to
- * me how that is supposed to interact with GSUB. */
-
- buffer->clear_output ();
- unsigned int count = buffer->len;
- for (buffer->idx = 0; buffer->idx < count;)
- if (unlikely (is_joiner (buffer->info[buffer->idx])))
- buffer->skip_glyph ();
- else
- buffer->next_glyph ();
-
- buffer->swap_buffers ();
-}
-
-static void
initial_reordering (const hb_ot_map_t *map,
hb_face_t *face,
hb_buffer_t *buffer,
mask_array[i] = map->get_1_mask (indic_basic_features[i].tag);
find_syllables (map, buffer, mask_array);
-
- remove_joiners (buffer);
}
static void
/* buffer var allocations, used during the entire shaping process */
-#define general_category() var1.u8[0] /* unicode general_category (hb_unicode_general_category_t) */
-#define combining_class() var1.u8[1] /* unicode combining_class (uint8_t) */
+#define unicode_props0() var1.u8[0]
+#define unicode_props1() var1.u8[1]
/* buffer var allocations, used by complex shapers */
#define complex_var_persistent_u8_0() var2.u8[0]
* matra for the Indic shaper.
*/
-static inline void
-set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *unicode)
-{
- info->general_category() = hb_unicode_general_category (unicode, info->codepoint);
- info->combining_class() = _hb_unicode_modified_combining_class (unicode, info->codepoint);
-}
-
static void
output_glyph (hb_font_t *font, hb_buffer_t *buffer,
hb_codepoint_t glyph)
{
buffer->output_glyph (glyph);
- set_unicode_props (&buffer->out_info[buffer->out_len - 1], buffer->unicode);
+ _hb_glyph_info_set_unicode_props (&buffer->out_info[buffer->out_len - 1], buffer->unicode);
}
static bool
static int
compare_combining_class (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
{
- unsigned int a = pa->combining_class();
- unsigned int b = pb->combining_class();
+ unsigned int a = _hb_glyph_info_get_modified_combining_class (pa);
+ unsigned int b = _hb_glyph_info_get_modified_combining_class (pb);
return a < b ? -1 : a == b ? 0 : +1;
}
count = buffer->len;
for (unsigned int i = 0; i < count; i++)
{
- if (buffer->info[i].combining_class() == 0)
+ if (_hb_glyph_info_get_modified_combining_class (&buffer->info[i]) == 0)
continue;
unsigned int end;
for (end = i + 1; end < count; end++)
- if (buffer->info[end].combining_class() == 0)
+ if (_hb_glyph_info_get_modified_combining_class (&buffer->info[end]) == 0)
break;
/* We are going to do a bubble-sort. Only do this if the
if (/* If mode is NOT COMPOSED_FULL (ie. it's COMPOSED_DIACRITICS), we don't try to
* compose a CCC=0 character with it's preceding starter. */
(mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL ||
- buffer->info[buffer->idx].combining_class() != 0) &&
+ _hb_glyph_info_get_modified_combining_class (&buffer->info[buffer->idx]) != 0) &&
/* If there's anything between the starter and this char, they should have CCC
* smaller than this character's. */
(starter == buffer->out_len - 1 ||
- buffer->out_info[buffer->out_len - 1].combining_class() < buffer->info[buffer->idx].combining_class()) &&
+ _hb_glyph_info_get_modified_combining_class (&buffer->out_info[buffer->out_len - 1]) < _hb_glyph_info_get_modified_combining_class (&buffer->info[buffer->idx])) &&
/* And compose. */
hb_unicode_compose (buffer->unicode,
buffer->out_info[starter].codepoint,
/* Composes. Modify starter and carry on. */
buffer->out_info[starter].codepoint = composed;
/* XXX update cluster */
- set_unicode_props (&buffer->out_info[starter], buffer->unicode);
+ _hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer->unicode);
buffer->skip_glyph ();
continue;
/* Blocked, or doesn't compose. */
buffer->next_glyph ();
- if (buffer->out_info[buffer->out_len - 1].combining_class() == 0)
+ if (_hb_glyph_info_get_modified_combining_class (&buffer->out_info[buffer->out_len - 1]) == 0)
starter = buffer->out_len - 1;
}
buffer->swap_buffers ();
const hb_feature_t *features,
unsigned int num_features);
+
+inline void
+_hb_glyph_info_set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *unicode)
+{
+ info->unicode_props0() = ((unsigned int) hb_unicode_general_category (unicode, info->codepoint)) |
+ (_hb_unicode_is_zero_width (info->codepoint) ? 0x80 : 0);
+ info->unicode_props1() = _hb_unicode_modified_combining_class (unicode, info->codepoint);
+}
+
+inline hb_unicode_general_category_t
+_hb_glyph_info_get_general_category (const hb_glyph_info_t *info)
+{
+ return (hb_unicode_general_category_t) (info->unicode_props0() & 0x7F);
+}
+
+inline unsigned int
+_hb_glyph_info_get_modified_combining_class (const hb_glyph_info_t *info)
+{
+ return info->unicode_props1();
+}
+
+inline hb_bool_t
+_hb_glyph_info_is_zero_width (const hb_glyph_info_t *info)
+{
+ return !!(info->unicode_props0() & 0x80);
+}
+
#endif /* HB_OT_SHAPE_PRIVATE_HH */
HB_TAG('r','l','i','g'),
};
+
hb_tag_t horizontal_features[] = {
HB_TAG('c','a','l','t'),
HB_TAG('c','l','i','g'),
/* Prepare */
-static inline void
-set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *unicode)
-{
- info->general_category() = hb_unicode_general_category (unicode, info->codepoint);
- info->combining_class() = _hb_unicode_modified_combining_class (unicode, info->codepoint);
-}
-
static void
hb_set_unicode_props (hb_buffer_t *buffer)
{
unsigned int count = buffer->len;
for (unsigned int i = 0; i < count; i++)
- set_unicode_props (&buffer->info[i], buffer->unicode);
+ _hb_glyph_info_set_unicode_props (&buffer->info[i], buffer->unicode);
}
static void
{
unsigned int count = buffer->len;
for (unsigned int i = 1; i < count; i++)
- if (FLAG (buffer->info[i].general_category()) &
+ if (FLAG (_hb_glyph_info_get_general_category (&buffer->info[i])) &
(FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) |
FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))
hb_truetype_kern (c);
}
+static void
+hb_hide_zerowidth (hb_ot_shape_context_t *c)
+{
+ /* TODO Save the space character in the font? */
+ hb_codepoint_t space;
+ if (!hb_font_get_glyph (c->font, ' ', 0, &space))
+ return; /* No point! */
+
+ unsigned int count = c->buffer->len;
+ for (unsigned int i = 0; i < count; i++)
+ if (unlikely (_hb_glyph_info_is_zero_width (&c->buffer->info[i]))) {
+ c->buffer->info[i].codepoint = space;
+ c->buffer->pos[i].x_advance = 0;
+ c->buffer->pos[i].y_advance = 0;
+ }
+}
+
/* Do it! */
/* Save the original direction, we use it later. */
c->target_direction = c->buffer->props.direction;
- HB_BUFFER_ALLOCATE_VAR (c->buffer, general_category);
- HB_BUFFER_ALLOCATE_VAR (c->buffer, combining_class);
+ HB_BUFFER_ALLOCATE_VAR (c->buffer, unicode_props0);
+ HB_BUFFER_ALLOCATE_VAR (c->buffer, unicode_props1);
- hb_set_unicode_props (c->buffer); /* BUFFER: Set general_category and combining_class */
+ hb_set_unicode_props (c->buffer);
hb_form_clusters (c->buffer);
hb_position_complex_fallback_visual (c);
}
- HB_BUFFER_DEALLOCATE_VAR (c->buffer, combining_class);
- HB_BUFFER_DEALLOCATE_VAR (c->buffer, general_category);
+ hb_hide_zerowidth (c);
+
+ HB_BUFFER_DEALLOCATE_VAR (c->buffer, unicode_props1);
+ HB_BUFFER_DEALLOCATE_VAR (c->buffer, unicode_props0);
c->buffer->props.direction = c->target_direction;
(unicode >= 0xE0100 && unicode <= 0xE01EF)); /* VARIATION SELECTOR-17..256 */
}
+/* Zero-Width invisible characters:
+ *
+ * 00AD SOFT HYPHEN
+ * 034F COMBINING GRAPHEME JOINER
+ *
+ * 200B ZERO WIDTH SPACE
+ * 200C ZERO WIDTH NON-JOINER
+ * 200D ZERO WIDTH JOINER
+ * 200E LEFT-TO-RIGHT MARK
+ * 200F RIGHT-TO-LEFT MARK
+ *
+ * 2028 LINE SEPARATOR
+ *
+ * 202A LEFT-TO-RIGHT EMBEDDING
+ * 202B RIGHT-TO-LEFT EMBEDDING
+ * 202C POP DIRECTIONAL FORMATTING
+ * 202D LEFT-TO-RIGHT OVERRIDE
+ * 202E RIGHT-TO-LEFT OVERRIDE
+ *
+ * 2060 WORD JOINER
+ * 2061 FUNCTION APPLICATION
+ * 2062 INVISIBLE TIMES
+ * 2063 INVISIBLE SEPARATOR
+ *
+ * FEFF ZERO WIDTH NO-BREAK SPACE
+ */
+static inline hb_bool_t
+_hb_unicode_is_zero_width (hb_codepoint_t ch)
+{
+ return ((ch & ~0x007F) == 0x2000 && (
+ (ch >= 0x200B && ch <= 0x200F) ||
+ (ch >= 0x202A && ch <= 0x202E) ||
+ (ch >= 0x2060 && ch <= 0x2063) ||
+ (ch == 0x2028)
+ )) || unlikely (ch == 0x00AD
+ || ch == 0x034F
+ || ch == 0xFEFF);
+}
#endif /* HB_UNICODE_PRIVATE_HH */