Imported Upstream version 8.2.2
[platform/upstream/harfbuzz.git] / src / hb-ot-shape.cc
index 4bd8aaf..90f596a 100644 (file)
@@ -37,7 +37,7 @@
 #include "hb-shaper-impl.hh"
 
 #include "hb-ot-shape.hh"
-#include "hb-ot-shape-complex.hh"
+#include "hb-ot-shaper.hh"
 #include "hb-ot-shape-fallback.hh"
 #include "hb-ot-shape-normalize.hh"
 
 
 #include "hb-aat-layout.hh"
 
+static inline bool
+_hb_codepoint_is_regional_indicator (hb_codepoint_t u)
+{ return hb_in_range<hb_codepoint_t> (u, 0x1F1E6u, 0x1F1FFu); }
 
 #ifndef HB_NO_AAT_SHAPE
 static inline bool
-_hb_apply_morx (hb_face_t *face, const hb_segment_properties_t *props)
+_hb_apply_morx (hb_face_t *face, const hb_segment_properties_t &props)
 {
   /* https://github.com/harfbuzz/harfbuzz/issues/2124 */
   return hb_aat_layout_has_substitution (face) &&
-        (HB_DIRECTION_IS_HORIZONTAL (props->direction) || !hb_ot_layout_has_substitution (face));
+        (HB_DIRECTION_IS_HORIZONTAL (props.direction) || !hb_ot_layout_has_substitution (face));
 }
 #endif
 
@@ -74,23 +77,24 @@ hb_ot_shape_collect_features (hb_ot_shape_planner_t          *planner,
                              unsigned int                    num_user_features);
 
 hb_ot_shape_planner_t::hb_ot_shape_planner_t (hb_face_t                     *face,
-                                             const hb_segment_properties_t *props) :
+                                             const hb_segment_properties_t &props) :
                                                face (face),
-                                               props (*props),
-                                               map (face, props),
-                                               aat_map (face, props)
+                                               props (props),
+                                               map (face, props)
 #ifndef HB_NO_AAT_SHAPE
                                                , apply_morx (_hb_apply_morx (face, props))
 #endif
 {
-  shaper = hb_ot_shape_complex_categorize (this);
+  shaper = hb_ot_shaper_categorize (this);
 
   script_zero_marks = shaper->zero_width_marks != HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE;
   script_fallback_mark_positioning = shaper->fallback_position;
 
+#ifndef HB_NO_AAT_SHAPE
   /* https://github.com/harfbuzz/harfbuzz/issues/1528 */
-  if (apply_morx && shaper != &_hb_ot_complex_shaper_default)
-    shaper = &_hb_ot_complex_shaper_dumber;
+  if (apply_morx && shaper != &_hb_ot_shaper_default)
+    shaper = &_hb_ot_shaper_dumber;
+#endif
 }
 
 void
@@ -100,10 +104,6 @@ hb_ot_shape_planner_t::compile (hb_ot_shape_plan_t           &plan,
   plan.props = props;
   plan.shaper = shaper;
   map.compile (plan.map, key);
-#ifndef HB_NO_AAT_SHAPE
-  if (apply_morx)
-    aat_map.compile (plan.aat_map);
-#endif
 
 #ifndef HB_NO_OT_SHAPE_FRACTIONS
   plan.frac_mask = plan.map.get_1_mask (HB_TAG ('f','r','a','c'));
@@ -217,12 +217,9 @@ hb_ot_shape_plan_t::init0 (hb_face_t                     *face,
                           const hb_shape_plan_key_t     *key)
 {
   map.init ();
-#ifndef HB_NO_AAT_SHAPE
-  aat_map.init ();
-#endif
 
   hb_ot_shape_planner_t planner (face,
-                                &key->props);
+                                key->props);
 
   hb_ot_shape_collect_features (&planner,
                                key->user_features,
@@ -236,9 +233,6 @@ hb_ot_shape_plan_t::init0 (hb_face_t                     *face,
     if (unlikely (!data))
     {
       map.fini ();
-#ifndef HB_NO_AAT_SHAPE
-      aat_map.fini ();
-#endif
       return false;
     }
   }
@@ -253,21 +247,13 @@ hb_ot_shape_plan_t::fini ()
     shaper->data_destroy (const_cast<void *> (data));
 
   map.fini ();
-#ifndef HB_NO_AAT_SHAPE
-  aat_map.fini ();
-#endif
 }
 
 void
 hb_ot_shape_plan_t::substitute (hb_font_t   *font,
                                hb_buffer_t *buffer) const
 {
-#ifndef HB_NO_AAT_SHAPE
-  if (unlikely (apply_morx))
-    hb_aat_layout_substitute (this, font, buffer);
-  else
-#endif
-    map.substitute (this, font, buffer);
+  map.substitute (this, font, buffer);
 }
 
 void
@@ -327,6 +313,8 @@ hb_ot_shape_collect_features (hb_ot_shape_planner_t *planner,
 {
   hb_ot_map_builder_t *map = &planner->map;
 
+  map->is_simple = true;
+
   map->enable_feature (HB_TAG('r','v','r','n'));
   map->add_gsub_pause (nullptr);
 
@@ -368,7 +356,10 @@ hb_ot_shape_collect_features (hb_ot_shape_planner_t *planner,
   map->enable_feature (HB_TAG ('H','A','R','F')); /* Considered discretionary. */
 
   if (planner->shaper->collect_features)
+  {
+    map->is_simple = false;
     planner->shaper->collect_features (planner);
+  }
 
   map->enable_feature (HB_TAG ('B','u','z','z')); /* Considered required. */
   map->enable_feature (HB_TAG ('B','U','Z','Z')); /* Considered discretionary. */
@@ -392,6 +383,8 @@ hb_ot_shape_collect_features (hb_ot_shape_planner_t *planner,
     map->enable_feature (HB_TAG ('v','e','r','t'), F_GLOBAL_SEARCH);
   }
 
+  if (num_user_features)
+    map->is_simple = false;
   for (unsigned int i = 0; i < num_user_features; i++)
   {
     const hb_feature_t *feature = &user_features[i];
@@ -401,18 +394,6 @@ hb_ot_shape_collect_features (hb_ot_shape_planner_t *planner,
                      feature->value);
   }
 
-#ifndef HB_NO_AAT_SHAPE
-  if (planner->apply_morx)
-  {
-    hb_aat_map_builder_t *aat_map = &planner->aat_map;
-    for (unsigned int i = 0; i < num_user_features; i++)
-    {
-      const hb_feature_t *feature = &user_features[i];
-      aat_map->add_feature (feature->tag, feature->value);
-    }
-  }
-#endif
-
   if (planner->shaper->override_features)
     planner->shaper->override_features (planner);
 }
@@ -495,18 +476,27 @@ hb_set_unicode_props (hb_buffer_t *buffer)
   {
     _hb_glyph_info_set_unicode_props (&info[i], buffer);
 
+    unsigned gen_cat = _hb_glyph_info_get_general_category (&info[i]);
+    if (FLAG_UNSAFE (gen_cat) &
+       (FLAG (HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER) |
+        FLAG (HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER) |
+        FLAG (HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER) |
+        FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER) |
+        FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR)))
+      continue;
+
     /* Marks are already set as continuation by the above line.
      * Handle Emoji_Modifier and ZWJ-continuation. */
-    if (unlikely (_hb_glyph_info_get_general_category (&info[i]) == HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL &&
+    if (unlikely (gen_cat == HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL &&
                  hb_in_range<hb_codepoint_t> (info[i].codepoint, 0x1F3FBu, 0x1F3FFu)))
     {
       _hb_glyph_info_set_continuation (&info[i]);
     }
     /* Regional_Indicators are hairy as hell...
      * https://github.com/harfbuzz/harfbuzz/issues/2265 */
-    else if (unlikely (i && hb_in_range<hb_codepoint_t> (info[i].codepoint, 0x1F1E6u, 0x1F1FFu)))
+    else if (unlikely (i && _hb_codepoint_is_regional_indicator (info[i].codepoint)))
     {
-      if (hb_in_range<hb_codepoint_t> (info[i - 1].codepoint, 0x1F1E6u, 0x1F1FFu) &&
+      if (_hb_codepoint_is_regional_indicator (info[i - 1].codepoint) &&
          !_hb_glyph_info_is_continuation (&info[i - 1]))
        _hb_glyph_info_set_continuation (&info[i]);
     }
@@ -524,18 +514,20 @@ hb_set_unicode_props (hb_buffer_t *buffer)
     }
 #endif
     /* Or part of the Other_Grapheme_Extend that is not marks.
-     * As of Unicode 11 that is just:
+     * As of Unicode 15 that is just:
      *
      * 200C          ; Other_Grapheme_Extend # Cf       ZERO WIDTH NON-JOINER
      * FF9E..FF9F    ; Other_Grapheme_Extend # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
      * E0020..E007F  ; Other_Grapheme_Extend # Cf  [96] TAG SPACE..CANCEL TAG
      *
      * ZWNJ is special, we don't want to merge it as there's no need, and keeping
-     * it separate results in more granular clusters.  Ignore Katakana for now.
+     * it separate results in more granular clusters.
      * Tags are used for Emoji sub-region flag sequences:
      * https://github.com/harfbuzz/harfbuzz/issues/1556
+     * Katakana ones were requested:
+     * https://github.com/harfbuzz/harfbuzz/issues/3844
      */
-    else if (unlikely (hb_in_range<hb_codepoint_t> (info[i].codepoint, 0xE0020u, 0xE007Fu)))
+    else if (unlikely (hb_in_ranges<hb_codepoint_t> (info[i].codepoint, 0xFF9Eu, 0xFF9Fu, 0xE0020u, 0xE007Fu)))
       _hb_glyph_info_set_continuation (&info[i]);
   }
 }
@@ -598,24 +590,33 @@ hb_ensure_native_direction (hb_buffer_t *buffer)
    * direction, so that features like ligatures will work as intended.
    *
    * https://github.com/harfbuzz/harfbuzz/issues/501
+   *
+   * Similar thing about Regional_Indicators; They are bidi=L, but Script=Common.
+   * If they are present in a run of natively-RTL text, they get assigned a script
+   * with natively RTL direction, which would result in wrong shaping if we
+   * assign such native RTL direction to them then. Detect that as well.
+   *
+   * https://github.com/harfbuzz/harfbuzz/issues/3314
    */
   if (unlikely (horiz_dir == HB_DIRECTION_RTL && direction == HB_DIRECTION_LTR))
   {
-    bool found_number = false, found_letter = false;
+    bool found_number = false, found_letter = false, found_ri = false;
     const auto* info = buffer->info;
     const auto count = buffer->len;
     for (unsigned i = 0; i < count; i++)
     {
       auto gc = _hb_glyph_info_get_general_category (&info[i]);
       if (gc == HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER)
-        found_number = true;
+       found_number = true;
       else if (HB_UNICODE_GENERAL_CATEGORY_IS_LETTER (gc))
       {
-        found_letter = true;
-        break;
+       found_letter = true;
+       break;
       }
+      else if (_hb_codepoint_is_regional_indicator (info[i].codepoint))
+       found_ri = true;
     }
-    if (found_number && !found_letter)
+    if ((found_number || found_ri) && !found_letter)
       horiz_dir = HB_DIRECTION_LTR;
   }
 
@@ -764,6 +765,14 @@ hb_ot_shape_setup_masks_fraction (const hb_ot_shape_context_t *c)
             _hb_glyph_info_get_general_category (&info[end]) ==
             HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER)
        end++;
+      if (start == i || end == i + 1)
+      {
+        if (start == i)
+         buffer->unsafe_to_concat (start, start + 1);
+       if (end == i + 1)
+         buffer->unsafe_to_concat (end - 1, end);
+       continue;
+      }
 
       buffer->unsafe_to_break (start, end);
 
@@ -850,7 +859,7 @@ hb_ot_hide_default_ignorables (hb_buffer_t *buffer,
     }
   }
   else
-    hb_ot_layout_delete_glyphs_inplace (buffer, _hb_glyph_info_is_default_ignorable);
+    buffer->delete_glyphs_inplace (_hb_glyph_info_is_default_ignorable);
 }
 
 
@@ -915,7 +924,7 @@ hb_ot_substitute_default (const hb_ot_shape_context_t *c)
 }
 
 static inline void
-hb_ot_substitute_complex (const hb_ot_shape_context_t *c)
+hb_ot_substitute_plan (const hb_ot_shape_context_t *c)
 {
   hb_buffer_t *buffer = c->buffer;
 
@@ -924,7 +933,13 @@ hb_ot_substitute_complex (const hb_ot_shape_context_t *c)
   if (c->plan->fallback_glyph_classes)
     hb_synthesize_glyph_classes (c->buffer);
 
-  c->plan->substitute (c->font, buffer);
+#ifndef HB_NO_AAT_SHAPE
+  if (unlikely (c->plan->apply_morx))
+    hb_aat_layout_substitute (c->plan, c->font, c->buffer,
+                             c->user_features, c->num_user_features);
+  else
+#endif
+    c->plan->substitute (c->font, buffer);
 }
 
 static inline void
@@ -934,18 +949,24 @@ hb_ot_substitute_pre (const hb_ot_shape_context_t *c)
 
   _hb_buffer_allocate_gsubgpos_vars (c->buffer);
 
-  hb_ot_substitute_complex (c);
+  hb_ot_substitute_plan (c);
+
+#ifndef HB_NO_AAT_SHAPE
+  if (c->plan->apply_morx && c->plan->apply_gpos)
+    hb_aat_layout_remove_deleted_glyphs (c->buffer);
+#endif
 }
 
 static inline void
 hb_ot_substitute_post (const hb_ot_shape_context_t *c)
 {
-  hb_ot_hide_default_ignorables (c->buffer, c->font);
 #ifndef HB_NO_AAT_SHAPE
-  if (c->plan->apply_morx)
+  if (c->plan->apply_morx && !c->plan->apply_gpos)
     hb_aat_layout_remove_deleted_glyphs (c->buffer);
 #endif
 
+  hb_ot_hide_default_ignorables (c->buffer, c->font);
+
   if (c->plan->shaper->postprocess_glyphs &&
     c->buffer->message(c->font, "start postprocess-glyphs")) {
     c->plan->shaper->postprocess_glyphs (c->plan, c->buffer, c->font);
@@ -1021,7 +1042,7 @@ hb_ot_position_default (const hb_ot_shape_context_t *c)
 }
 
 static inline void
-hb_ot_position_complex (const hb_ot_shape_context_t *c)
+hb_ot_position_plan (const hb_ot_shape_context_t *c)
 {
   unsigned int count = c->buffer->len;
   hb_glyph_info_t *info = c->buffer->info;
@@ -1033,7 +1054,7 @@ hb_ot_position_complex (const hb_ot_shape_context_t *c)
    * direction is backward we don't shift and it will end up
    * hanging over the next glyph after the final reordering.
    *
-   * Note: If fallback positinoing happens, we don't care about
+   * Note: If fallback positioning happens, we don't care about
    * this as it will be overridden.
    */
   bool adjust_offsets_when_zeroing = c->plan->adjust_mark_positioning_when_zeroing &&
@@ -1106,7 +1127,7 @@ hb_ot_position (const hb_ot_shape_context_t *c)
 
   hb_ot_position_default (c);
 
-  hb_ot_position_complex (c);
+  hb_ot_position_plan (c);
 
   if (HB_DIRECTION_IS_BACKWARD (c->buffer->props.direction))
     hb_buffer_reverse (c->buffer);
@@ -1123,6 +1144,18 @@ hb_propagate_flags (hb_buffer_t *buffer)
   if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS))
     return;
 
+  /* If we are producing SAFE_TO_INSERT_TATWEEL, then do two things:
+   *
+   * - If the places that the Arabic shaper marked as SAFE_TO_INSERT_TATWEEL,
+   *   are UNSAFE_TO_BREAK, then clear the SAFE_TO_INSERT_TATWEEL,
+   * - Any place that is SAFE_TO_INSERT_TATWEEL, is also now UNSAFE_TO_BREAK.
+   *
+   * We couldn't make this interaction earlier. It has to be done here.
+   */
+  bool flip_tatweel = buffer->flags & HB_BUFFER_FLAG_PRODUCE_SAFE_TO_INSERT_TATWEEL;
+
+  bool clear_concat = (buffer->flags & HB_BUFFER_FLAG_PRODUCE_UNSAFE_TO_CONCAT) == 0;
+
   hb_glyph_info_t *info = buffer->info;
 
   foreach_cluster (buffer, start, end)
@@ -1130,9 +1163,20 @@ hb_propagate_flags (hb_buffer_t *buffer)
     unsigned int mask = 0;
     for (unsigned int i = start; i < end; i++)
       mask |= info[i].mask & HB_GLYPH_FLAG_DEFINED;
-    if (mask)
-      for (unsigned int i = start; i < end; i++)
-       info[i].mask |= mask;
+
+    if (flip_tatweel)
+    {
+      if (mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK)
+       mask &= ~HB_GLYPH_FLAG_SAFE_TO_INSERT_TATWEEL;
+      if (mask & HB_GLYPH_FLAG_SAFE_TO_INSERT_TATWEEL)
+       mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT;
+    }
+
+    if (clear_concat)
+       mask &= ~HB_GLYPH_FLAG_UNSAFE_TO_CONCAT;
+
+    for (unsigned int i = start; i < end; i++)
+      info[i].mask = mask;
   }
 }
 
@@ -1141,8 +1185,6 @@ hb_propagate_flags (hb_buffer_t *buffer)
 static void
 hb_ot_shape_internal (hb_ot_shape_context_t *c)
 {
-  c->buffer->enter ();
-
   /* Save the original direction, we use it later. */
   c->target_direction = c->buffer->props.direction;