Imported Upstream version 1.8.1
[platform/upstream/harfbuzz.git] / src / hb-ot-cmap-table.hh
index 0207989..c1903f6 100644 (file)
 #define HB_OT_CMAP_TABLE_HH
 
 #include "hb-open-type-private.hh"
+#include "hb-set-private.hh"
 #include "hb-subset-plan.hh"
 
-namespace OT {
-
-
 /*
- * cmap -- Character To Glyph Index Mapping Table
+ * cmap -- Character to Glyph Index Mapping
+ * https://docs.microsoft.com/en-us/typography/opentype/spec/cmap
  */
-
 #define HB_OT_TAG_cmap HB_TAG('c','m','a','p')
 
 
+namespace OT {
+
+
 struct CmapSubtableFormat0
 {
   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
@@ -59,8 +60,8 @@ struct CmapSubtableFormat0
 
   protected:
   HBUINT16     format;         /* Format number is set to 0. */
-  HBUINT16     lengthZ;        /* Byte length of this subtable. */
-  HBUINT16     languageZ;      /* Ignore. */
+  HBUINT16     length        /* Byte length of this subtable. */
+  HBUINT16     language      /* Ignore. */
   HBUINT8      glyphIdArray[256];/* An array that maps character
                                 * code to glyph index values. */
   public:
@@ -69,6 +70,158 @@ struct CmapSubtableFormat0
 
 struct CmapSubtableFormat4
 {
+  struct segment_plan
+  {
+    HBUINT16 start_code;
+    HBUINT16 end_code;
+    bool use_delta;
+  };
+
+  bool serialize (hb_serialize_context_t *c,
+                  const hb_subset_plan_t *plan,
+                  const hb_vector_t<segment_plan> &segments)
+  {
+    TRACE_SERIALIZE (this);
+
+    if (unlikely (!c->extend_min (*this))) return_trace (false);
+
+    this->format.set (4);
+    this->length.set (get_sub_table_size (segments));
+
+    this->segCountX2.set (segments.len * 2);
+    this->entrySelector.set (MAX (1u, _hb_bit_storage (segments.len)) - 1);
+    this->searchRange.set (2 * (1u << this->entrySelector));
+    this->rangeShift.set (segments.len * 2 > this->searchRange
+                          ? 2 * segments.len - this->searchRange
+                          : 0);
+
+    HBUINT16 *end_count = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.len);
+    c->allocate_size<HBUINT16> (HBUINT16::static_size); // 2 bytes of padding.
+    HBUINT16 *start_count = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.len);
+    HBINT16 *id_delta = c->allocate_size<HBINT16> (HBUINT16::static_size * segments.len);
+    HBUINT16 *id_range_offset = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.len);
+
+    if (id_range_offset == nullptr)
+      return_trace (false);
+
+    for (unsigned int i = 0; i < segments.len; i++)
+    {
+      end_count[i].set (segments[i].end_code);
+      start_count[i].set (segments[i].start_code);
+      if (segments[i].use_delta)
+      {
+        hb_codepoint_t cp = segments[i].start_code;
+        hb_codepoint_t start_gid = 0;
+        if (unlikely (!plan->new_gid_for_codepoint (cp, &start_gid) && cp != 0xFFFF))
+          return_trace (false);
+        id_delta[i].set (start_gid - segments[i].start_code);
+      } else {
+        id_delta[i].set (0);
+        unsigned int num_codepoints = segments[i].end_code - segments[i].start_code + 1;
+        HBUINT16 *glyph_id_array = c->allocate_size<HBUINT16> (HBUINT16::static_size * num_codepoints);
+        if (glyph_id_array == nullptr)
+          return_trace (false);
+        // From the cmap spec:
+        //
+        // id_range_offset[i]/2
+        // + (cp - segments[i].start_code)
+        // + (id_range_offset + i)
+        // =
+        // glyph_id_array + (cp - segments[i].start_code)
+        //
+        // So, solve for id_range_offset[i]:
+        //
+        // id_range_offset[i]
+        // =
+        // 2 * (glyph_id_array - id_range_offset - i)
+        id_range_offset[i].set (2 * (
+            glyph_id_array - id_range_offset - i));
+        for (unsigned int j = 0; j < num_codepoints; j++)
+        {
+          hb_codepoint_t cp = segments[i].start_code + j;
+          hb_codepoint_t new_gid;
+          if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid)))
+            return_trace (false);
+          glyph_id_array[j].set (new_gid);
+        }
+      }
+    }
+
+    return_trace (true);
+  }
+
+  static inline size_t get_sub_table_size (const hb_vector_t<segment_plan> &segments)
+  {
+    size_t segment_size = 0;
+    for (unsigned int i = 0; i < segments.len; i++)
+    {
+      // Parallel array entries
+      segment_size +=
+            2  // end count
+          + 2  // start count
+          + 2  // delta
+          + 2; // range offset
+
+      if (!segments[i].use_delta)
+        // Add bytes for the glyph index array entries for this segment.
+        segment_size += (segments[i].end_code - segments[i].start_code + 1) * 2;
+    }
+
+    return min_size
+        + 2 // Padding
+        + segment_size;
+  }
+
+  static inline bool create_sub_table_plan (const hb_subset_plan_t *plan,
+                                            hb_vector_t<segment_plan> *segments)
+  {
+    segment_plan *segment = nullptr;
+    hb_codepoint_t last_gid = 0;
+
+    hb_codepoint_t cp = HB_SET_VALUE_INVALID;
+    while (plan->unicodes->next (&cp)) {
+      hb_codepoint_t new_gid;
+      if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid)))
+      {
+       DEBUG_MSG(SUBSET, nullptr, "Unable to find new gid for %04x", cp);
+       return false;
+      }
+
+      if (cp > 0xFFFF) {
+        // We are now outside of unicode BMP, stop adding to this cmap.
+        break;
+      }
+
+      if (!segment
+          || cp != segment->end_code + 1u)
+      {
+        segment = segments->push ();
+        segment->start_code.set (cp);
+        segment->end_code.set (cp);
+        segment->use_delta = true;
+      } else {
+        segment->end_code.set (cp);
+        if (last_gid + 1u != new_gid)
+          // gid's are not consecutive in this segment so delta
+          // cannot be used.
+          segment->use_delta = false;
+      }
+
+      last_gid = new_gid;
+    }
+
+    // There must be a final entry with end_code == 0xFFFF. Check if we need to add one.
+    if (segment == nullptr || segment->end_code != 0xFFFF)
+    {
+      segment = segments->push ();
+      segment->start_code.set (0xFFFF);
+      segment->end_code.set (0xFFFF);
+      segment->use_delta = true;
+    }
+
+    return true;
+  }
+
   struct accelerator_t
   {
     inline void init (const CmapSubtableFormat4 *subtable)
@@ -127,6 +280,17 @@ struct CmapSubtableFormat4
       return true;
     }
 
+    static inline void get_all_codepoints_func (const void *obj, hb_set_t *out)
+    {
+      const accelerator_t *thiz = (const accelerator_t *) obj;
+      for (unsigned int i = 0; i < thiz->segCount; i++)
+      {
+       if (thiz->startCount[i] != 0xFFFFu
+           || thiz->endCount[i] != 0xFFFFu) // Skip the last segment (0xFFFF)
+         hb_set_add_range (out, thiz->startCount[i], thiz->endCount[i]);
+      }
+    }
+
     const HBUINT16 *endCount;
     const HBUINT16 *startCount;
     const HBUINT16 *idDelta;
@@ -164,15 +328,17 @@ struct CmapSubtableFormat4
     return_trace (16 + 4 * (unsigned int) segCountX2 <= length);
   }
 
+
+
   protected:
   HBUINT16     format;         /* Format number is set to 4. */
   HBUINT16     length;         /* This is the length in bytes of the
                                 * subtable. */
-  HBUINT16     languageZ;      /* Ignore. */
+  HBUINT16     language      /* Ignore. */
   HBUINT16     segCountX2;     /* 2 x segCount. */
-  HBUINT16     searchRangeZ;   /* 2 * (2**floor(log2(segCount))) */
-  HBUINT16     entrySelectorZ; /* log2(searchRange/2) */
-  HBUINT16     rangeShiftZ;    /* 2 x segCount - searchRange */
+  HBUINT16     searchRange   /* 2 * (2**floor(log2(segCount))) */
+  HBUINT16     entrySelector /* log2(searchRange/2) */
+  HBUINT16     rangeShift    /* 2 x segCount - searchRange */
 
   HBUINT16     values[VAR];
 #if 0
@@ -193,6 +359,8 @@ struct CmapSubtableLongGroup
 {
   friend struct CmapSubtableFormat12;
   friend struct CmapSubtableFormat13;
+  template<typename U>
+  friend struct CmapSubtableLongSegmented;
   friend struct cmap;
 
   int cmp (hb_codepoint_t codepoint) const
@@ -238,8 +406,8 @@ struct CmapSubtableTrimmed
 
   protected:
   UINT         formatReserved; /* Subtable format and (maybe) padding. */
-  UINT         lengthZ;        /* Byte length of this subtable. */
-  UINT         languageZ;      /* Ignore. */
+  UINT         length        /* Byte length of this subtable. */
+  UINT         language      /* Ignore. */
   UINT         startCharCode;  /* First character code covered. */
   ArrayOf<GlyphID, UINT>
                glyphIdArray;   /* Array of glyph index values for character
@@ -265,6 +433,15 @@ struct CmapSubtableLongSegmented
     return true;
   }
 
+  inline void get_all_codepoints (hb_set_t *out) const
+  {
+    for (unsigned int i = 0; i < this->groups.len; i++) {
+      hb_set_add_range (out,
+                       this->groups[i].startCharCode,
+                       this->groups[i].endCharCode);
+    }
+  }
+
   inline bool sanitize (hb_sanitize_context_t *c) const
   {
     TRACE_SANITIZE (this);
@@ -272,20 +449,20 @@ struct CmapSubtableLongSegmented
   }
 
   inline bool serialize (hb_serialize_context_t *c,
-                         hb_prealloced_array_t<CmapSubtableLongGroup> &group_data)
+                         const hb_vector_t<CmapSubtableLongGroup> &group_data)
   {
     TRACE_SERIALIZE (this);
     if (unlikely (!c->extend_min (*this))) return_trace (false);
-    Supplier<CmapSubtableLongGroup> supplier (group_data.array, group_data.len);
+    Supplier<CmapSubtableLongGroup> supplier (group_data.arrayZ, group_data.len);
     if (unlikely (!groups.serialize (c, supplier, group_data.len))) return_trace (false);
     return true;
   }
 
   protected:
   HBUINT16     format;         /* Subtable format; set to 12. */
-  HBUINT16     reservedZ;      /* Reserved; set to 0. */
-  HBUINT32             lengthZ;        /* Byte length of this subtable. */
-  HBUINT32             languageZ;      /* Ignore. */
+  HBUINT16     reserved      /* Reserved; set to 0. */
+  HBUINT32     length;         /* Byte length of this subtable. */
+  HBUINT32     language;       /* Ignore. */
   SortedArrayOf<CmapSubtableLongGroup, HBUINT32>
                groups;         /* Groupings. */
   public:
@@ -297,6 +474,69 @@ struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
   static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
                                                hb_codepoint_t u)
   { return group.glyphID + (u - group.startCharCode); }
+
+
+  bool serialize (hb_serialize_context_t *c,
+                  const hb_vector_t<CmapSubtableLongGroup> &groups)
+  {
+    if (unlikely (!c->extend_min (*this))) return false;
+
+    this->format.set (12);
+    this->reserved.set (0);
+    this->length.set (get_sub_table_size (groups));
+
+    return CmapSubtableLongSegmented<CmapSubtableFormat12>::serialize (c, groups);
+  }
+
+  static inline size_t get_sub_table_size (const hb_vector_t<CmapSubtableLongGroup> &groups)
+  {
+    return 16 + 12 * groups.len;
+  }
+
+  static inline bool create_sub_table_plan (const hb_subset_plan_t *plan,
+                                            hb_vector_t<CmapSubtableLongGroup> *groups)
+  {
+    CmapSubtableLongGroup *group = nullptr;
+
+    hb_codepoint_t cp = HB_SET_VALUE_INVALID;
+    while (plan->unicodes->next (&cp)) {
+      hb_codepoint_t new_gid;
+      if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid)))
+      {
+       DEBUG_MSG(SUBSET, nullptr, "Unable to find new gid for %04x", cp);
+       return false;
+      }
+
+      if (!group || !_is_gid_consecutive (group, cp, new_gid))
+      {
+        group = groups->push ();
+        group->startCharCode.set (cp);
+        group->endCharCode.set (cp);
+        group->glyphID.set (new_gid);
+      } else
+      {
+        group->endCharCode.set (cp);
+      }
+    }
+
+    DEBUG_MSG(SUBSET, nullptr, "cmap");
+    for (unsigned int i = 0; i < groups->len; i++) {
+      CmapSubtableLongGroup& group = (*groups)[i];
+      DEBUG_MSG(SUBSET, nullptr, "  %d: U+%04X-U+%04X, gid %d-%d", i, (uint32_t) group.startCharCode, (uint32_t) group.endCharCode, (uint32_t) group.glyphID, (uint32_t) group.glyphID + ((uint32_t) group.endCharCode - (uint32_t) group.startCharCode));
+    }
+
+    return true;
+  }
+
+ private:
+  static inline bool _is_gid_consecutive (CmapSubtableLongGroup *group,
+                                         hb_codepoint_t cp,
+                                         hb_codepoint_t new_gid)
+  {
+    return (cp - 1 == group->endCharCode) &&
+       new_gid == group->glyphID + (cp - group->startCharCode);
+  }
+
 };
 
 struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13>
@@ -328,7 +568,7 @@ struct UnicodeValueRange
     return_trace (c->check_struct (this));
   }
 
-  UINT24       startUnicodeValue;      /* First value in this range. */
+  HBUINT24     startUnicodeValue;      /* First value in this range. */
   HBUINT8              additionalCount;        /* Number of additional values in this
                                         * range. */
   public:
@@ -350,7 +590,7 @@ struct UVSMapping
     return_trace (c->check_struct (this));
   }
 
-  UINT24       unicodeValue;   /* Base Unicode value of the UVS */
+  HBUINT24     unicodeValue;   /* Base Unicode value of the UVS */
   GlyphID      glyphID;        /* Glyph ID of the UVS */
   public:
   DEFINE_SIZE_STATIC (5);
@@ -392,7 +632,7 @@ struct VariationSelectorRecord
                  nonDefaultUVS.sanitize (c, base));
   }
 
-  UINT24       varSelector;    /* Variation selector. */
+  HBUINT24     varSelector;    /* Variation selector. */
   LOffsetTo<DefaultUVS>
                defaultUVS;     /* Offset to Default UVS Table. May be 0. */
   LOffsetTo<NonDefaultUVS>
@@ -419,7 +659,7 @@ struct CmapSubtableFormat14
 
   protected:
   HBUINT16     format;         /* Format number is set to 14. */
-  HBUINT32             lengthZ;        /* Byte length of this subtable. */
+  HBUINT32     length;         /* Byte length of this subtable. */
   SortedArrayOf<VariationSelectorRecord, HBUINT32>
                record;         /* Variation selector records; sorted
                                 * in increasing order of `varSelector'. */
@@ -509,6 +749,33 @@ struct cmap
 {
   static const hb_tag_t tableTag       = HB_OT_TAG_cmap;
 
+  struct subset_plan {
+    subset_plan(void)
+    {
+      format4_segments.init();
+      format12_groups.init();
+    }
+
+    ~subset_plan(void)
+    {
+      format4_segments.fini();
+      format12_groups.fini();
+    }
+
+    inline size_t final_size() const
+    {
+      return 4 // header
+          +  8 * 3 // 3 EncodingRecord
+          +  CmapSubtableFormat4::get_sub_table_size (this->format4_segments)
+          +  CmapSubtableFormat12::get_sub_table_size (this->format12_groups);
+    }
+
+    // Format 4
+    hb_vector_t<CmapSubtableFormat4::segment_plan> format4_segments;
+    // Format 12
+    hb_vector_t<CmapSubtableLongGroup> format12_groups;
+  };
+
   inline bool sanitize (hb_sanitize_context_t *c) const
   {
     TRACE_SANITIZE (this);
@@ -517,41 +784,17 @@ struct cmap
                  encodingRecord.sanitize (c, this));
   }
 
-  inline bool populate_groups (hb_subset_plan_t *plan,
-                              hb_prealloced_array_t<CmapSubtableLongGroup> *groups) const
+  inline bool _create_plan (const hb_subset_plan_t *plan,
+                            subset_plan *cmap_plan) const
   {
-    CmapSubtableLongGroup *group = nullptr;
-    for (unsigned int i = 0; i < plan->codepoints.len; i++) {
-
-      hb_codepoint_t cp = plan->codepoints[i];
-      if (!group || cp - 1 != group->endCharCode)
-      {
-        group = groups->push ();
-        group->startCharCode.set (cp);
-        group->endCharCode.set (cp);
-        hb_codepoint_t new_gid;
-        if (unlikely (!hb_subset_plan_new_gid_for_codepoint (plan, cp, &new_gid)))
-        {
-          DEBUG_MSG(SUBSET, nullptr, "Unable to find new gid for %04x", cp);
-          return false;
-        }
-        group->glyphID.set (new_gid);
-      } else
-      {
-        group->endCharCode.set (cp);
-      }
-    }
-
-    DEBUG_MSG(SUBSET, nullptr, "cmap");
-    for (unsigned int i = 0; i < groups->len; i++) {
-      CmapSubtableLongGroup& group = (*groups)[i];
-      DEBUG_MSG(SUBSET, nullptr, "  %d: U+%04X-U+%04X, gid %d-%d", i, (uint32_t) group.startCharCode, (uint32_t) group.endCharCode, (uint32_t) group.glyphID, (uint32_t) group.glyphID + ((uint32_t) group.endCharCode - (uint32_t) group.startCharCode));
-    }
+    if (unlikely( !CmapSubtableFormat4::create_sub_table_plan (plan, &cmap_plan->format4_segments)))
+      return false;
 
-    return true;
+    return CmapSubtableFormat12::create_sub_table_plan (plan, &cmap_plan->format12_groups);
   }
 
-  inline bool _subset (hb_prealloced_array_t<CmapSubtableLongGroup> &groups,
+  inline bool _subset (const hb_subset_plan_t *plan,
+                       const subset_plan &cmap_subset_plan,
                       size_t dest_sz,
                       void *dest) const
   {
@@ -565,25 +808,46 @@ struct cmap
 
     cmap->version.set (0);
 
-    if (unlikely (!cmap->encodingRecord.serialize (&c, /* numTables */ 1))) return false;
+    if (unlikely (!cmap->encodingRecord.serialize (&c, /* numTables */ 3)))
+      return false;
+
+    // TODO(grieger): Convert the below to a for loop
 
-    EncodingRecord &rec = cmap->encodingRecord[0];
-    rec.platformID.set (3); // Windows
-    rec.encodingID.set (10); // Unicode UCS-4
+    // Format 4, Plat 0 Encoding Record
+    EncodingRecord &format4_plat0_rec = cmap->encodingRecord[0];
+    format4_plat0_rec.platformID.set (0); // Unicode
+    format4_plat0_rec.encodingID.set (3);
 
-    /* capture offset to subtable */
-    CmapSubtable &subtable = rec.subtable.serialize (&c, cmap);
+    // Format 4, Plat 3 Encoding Record
+    EncodingRecord &format4_plat3_rec = cmap->encodingRecord[1];
+    format4_plat3_rec.platformID.set (3); // Windows
+    format4_plat3_rec.encodingID.set (1); // Unicode BMP
 
-    subtable.u.format.set (12);
+    // Format 12 Encoding Record
+    EncodingRecord &format12_rec = cmap->encodingRecord[2];
+    format12_rec.platformID.set (3); // Windows
+    format12_rec.encodingID.set (10); // Unicode UCS-4
 
-    CmapSubtableFormat12 &format12 = subtable.u.format12;
-    if (unlikely (!c.extend_min (format12))) return false;
+    // Write out format 4 sub table
+    {
+      CmapSubtable &subtable = format4_plat0_rec.subtable.serialize (&c, cmap);
+      format4_plat3_rec.subtable.set (format4_plat0_rec.subtable);
+      subtable.u.format.set (4);
 
-    format12.format.set (12);
-    format12.reservedZ.set (0);
-    format12.lengthZ.set (16 + 12 * groups.len);
+      CmapSubtableFormat4 &format4 = subtable.u.format4;
+      if (unlikely (!format4.serialize (&c, plan, cmap_subset_plan.format4_segments)))
+        return false;
+    }
 
-    if (unlikely (!format12.serialize (&c, groups))) return false;
+    // Write out format 12 sub table.
+    {
+      CmapSubtable &subtable = format12_rec.subtable.serialize (&c, cmap);
+      subtable.u.format.set (12);
+
+      CmapSubtableFormat12 &format12 = subtable.u.format12;
+      if (unlikely (!format12.serialize (&c, cmap_subset_plan.format12_groups)))
+        return false;
+    }
 
     c.end_serialize ();
 
@@ -592,24 +856,25 @@ struct cmap
 
   inline bool subset (hb_subset_plan_t *plan) const
   {
-    hb_auto_array_t<CmapSubtableLongGroup> groups;
+    subset_plan cmap_subset_plan;
 
-    if (unlikely (!populate_groups (plan, &groups))) return false;
+    if (unlikely (!_create_plan (plan, &cmap_subset_plan)))
+    {
+      DEBUG_MSG(SUBSET, nullptr, "Failed to generate a cmap subsetting plan.");
+      return false;
+    }
 
     // We now know how big our blob needs to be
-    // TODO use APIs from the structs to get size?
-    size_t dest_sz = 4 // header
-                   + 8 // 1 EncodingRecord
-                   + 16 // Format 12 header
-                   + 12 * groups.len; // SequentialMapGroup records
+    size_t dest_sz = cmap_subset_plan.final_size();
     void *dest = malloc (dest_sz);
     if (unlikely (!dest)) {
       DEBUG_MSG(SUBSET, nullptr, "Unable to alloc %lu for cmap subset output", (unsigned long) dest_sz);
       return false;
     }
 
-    if (unlikely (!_subset (groups, dest_sz, dest)))
+    if (unlikely (!_subset (plan, cmap_subset_plan, dest_sz, dest)))
     {
+      DEBUG_MSG(SUBSET, nullptr, "Failed to perform subsetting of cmap.");
       free (dest);
       return false;
     }
@@ -620,7 +885,7 @@ struct cmap
                                             HB_MEMORY_MODE_READONLY,
                                             dest,
                                             free);
-    bool result =  hb_subset_plan_add_table (plan, HB_OT_TAG_cmap, cmap_prime);
+    bool result =  plan->add_table (HB_OT_TAG_cmap, cmap_prime);
     hb_blob_destroy (cmap_prime);
     return result;
   }
@@ -630,7 +895,7 @@ struct cmap
     inline void init (hb_face_t *face)
     {
       this->blob = OT::Sanitizer<OT::cmap>().sanitize (face->reference_table (HB_OT_TAG_cmap));
-      const OT::cmap *cmap = OT::Sanitizer<OT::cmap>::lock_instance (this->blob);
+      const OT::cmap *cmap = this->blob->as<OT::cmap> ();
       const OT::CmapSubtable *subtable = nullptr;
       const OT::CmapSubtableFormat14 *subtable_uvs = nullptr;
 
@@ -651,7 +916,7 @@ struct cmap
        if (subtable) symbol = true;
       }
       /* Meh. */
-      if (!subtable) subtable = &OT::Null(OT::CmapSubtable);
+      if (!subtable) subtable = &Null(OT::CmapSubtable);
 
       /* UVS subtable. */
       if (!subtable_uvs)
@@ -661,26 +926,36 @@ struct cmap
          subtable_uvs = &st->u.format14;
       }
       /* Meh. */
-      if (!subtable_uvs) subtable_uvs = &OT::Null(OT::CmapSubtableFormat14);
+      if (!subtable_uvs) subtable_uvs = &Null(OT::CmapSubtableFormat14);
 
       this->uvs_table = subtable_uvs;
 
       this->get_glyph_data = subtable;
       if (unlikely (symbol))
+      {
        this->get_glyph_func = get_glyph_from_symbol<OT::CmapSubtable>;
-      else
+       this->get_all_codepoints_func = null_get_all_codepoints_func;
+      } else {
        switch (subtable->u.format) {
        /* Accelerate format 4 and format 12. */
-       default: this->get_glyph_func = get_glyph_from<OT::CmapSubtable>;               break;
-       case 12: this->get_glyph_func = get_glyph_from<OT::CmapSubtableFormat12>;       break;
+       default:
+         this->get_glyph_func = get_glyph_from<OT::CmapSubtable>;
+         this->get_all_codepoints_func = null_get_all_codepoints_func;
+         break;
+       case 12:
+         this->get_glyph_func = get_glyph_from<OT::CmapSubtableFormat12>;
+         this->get_all_codepoints_func = get_all_codepoints_from<OT::CmapSubtableFormat12>;
+         break;
        case  4:
          {
            this->format4_accel.init (&subtable->u.format4);
            this->get_glyph_data = &this->format4_accel;
            this->get_glyph_func = this->format4_accel.get_glyph_func;
+           this->get_all_codepoints_func = this->format4_accel.get_all_codepoints_func;
          }
          break;
        }
+      }
     }
 
     inline void fini (void)
@@ -710,10 +985,22 @@ struct cmap
       return get_nominal_glyph (unicode, glyph);
     }
 
+    inline void get_all_codepoints (hb_set_t *out) const
+    {
+      this->get_all_codepoints_func (get_glyph_data, out);
+    }
+
     protected:
     typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj,
                                              hb_codepoint_t codepoint,
                                              hb_codepoint_t *glyph);
+    typedef void (*hb_cmap_get_all_codepoints_func_t) (const void *obj,
+                                                      hb_set_t *out);
+
+    static inline void null_get_all_codepoints_func (const void *obj, hb_set_t *out)
+    {
+      // NOOP
+    }
 
     template <typename Type>
     static inline bool get_glyph_from (const void *obj,
@@ -725,6 +1012,14 @@ struct cmap
     }
 
     template <typename Type>
+    static inline void get_all_codepoints_from (const void *obj,
+                                               hb_set_t *out)
+    {
+      const Type *typed_obj = (const Type *) obj;
+      typed_obj->get_all_codepoints (out);
+    }
+
+    template <typename Type>
     static inline bool get_glyph_from_symbol (const void *obj,
                                              hb_codepoint_t codepoint,
                                              hb_codepoint_t *glyph)
@@ -738,7 +1033,7 @@ struct cmap
        /* For symbol-encoded OpenType fonts, we duplicate the
         * U+F000..F0FF range at U+0000..U+00FF.  That's what
         * Windows seems to do, and that's hinted about at:
-        * http://www.microsoft.com/typography/otspec/recom.htm
+        * https://docs.microsoft.com/en-us/typography/opentype/spec/recom
         * under "Non-Standard (Symbol) Fonts". */
        return typed_obj->get_glyph (0xF000u + codepoint, glyph);
       }
@@ -749,6 +1044,8 @@ struct cmap
     private:
     hb_cmap_get_glyph_func_t get_glyph_func;
     const void *get_glyph_data;
+    hb_cmap_get_all_codepoints_func_t get_all_codepoints_func;
+
     OT::CmapSubtableFormat4::accelerator_t format4_accel;
 
     const OT::CmapSubtableFormat14 *uvs_table;