From 094966959f96d9a41fb612fd0b870f5ae8f5a954 Mon Sep 17 00:00:00 2001 From: Michiharu Ariza Date: Mon, 17 Jun 2019 22:12:40 -0700 Subject: [PATCH] add hb_bimap_t, subclass hb_inc_bimap_t replacing CFF::remap_t --- src/Makefile.sources | 1 + src/hb-bimap.hh | 129 ++++++++++++++++++++++++++++++++++++++++++++ src/hb-ot-cff-common.hh | 64 +++------------------- src/hb-ot-cff1-table.hh | 16 +++--- src/hb-subset-cff-common.cc | 12 ++--- src/hb-subset-cff-common.hh | 40 ++++++-------- src/hb-subset-cff1.cc | 26 ++++----- src/hb-subset-cff2.cc | 31 +++++------ 8 files changed, 191 insertions(+), 128 deletions(-) create mode 100644 src/hb-bimap.hh diff --git a/src/Makefile.sources b/src/Makefile.sources index ab228ce..6f42ba3 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -44,6 +44,7 @@ HB_BASE_sources = \ hb-machinery.hh \ hb-map.cc \ hb-map.hh \ + hb-bimap.hh \ hb-meta.hh \ hb-mutex.hh \ hb-null.hh \ diff --git a/src/hb-bimap.hh b/src/hb-bimap.hh new file mode 100644 index 0000000..9dbe0df --- /dev/null +++ b/src/hb-bimap.hh @@ -0,0 +1,129 @@ +/* + * Copyright © 2019 Adobe Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Adobe Author(s): Michiharu Ariza + */ + +#ifndef HB_BIMAP_HH +#define HB_BIMAP_HH + +#include "hb.hh" + +/* Bi-directional map */ +struct hb_bimap_t +{ + hb_bimap_t () { init (); } + ~hb_bimap_t () { fini (); } + + void init () + { + forw_map.init (); + back_map.init (); + } + + void fini () + { + forw_map.fini (); + back_map.fini (); + } + + /* returns HB_MAP_VALUE_INVALID if unmapped */ + hb_codepoint_t forward (hb_codepoint_t lhs) const { return forw_map[lhs]; } + hb_codepoint_t backward (hb_codepoint_t rhs) const { return back_map[rhs]; } + + void set (hb_codepoint_t lhs, hb_codepoint_t rhs) + { + forw_map.set (lhs, rhs); + back_map.set (rhs, lhs); + } + + /* Create an identity map. */ + bool identity (unsigned int size) + { + clear (); + for (hb_codepoint_t i = 0; i < size; i++) set (i, i); + return forw_map.successful && back_map.successful; + } + + void clear () + { + forw_map.clear (); + back_map.clear (); + } + + void reset () + { + forw_map.reset (); + back_map.reset (); + } + + unsigned int get_population () const { return forw_map.get_population (); } + + protected: + hb_map_t forw_map; + hb_map_t back_map; +}; + +/* Inremental bimap: only lhs is given, rhs is incrementally assigned */ +struct hb_inc_bimap_t : hb_bimap_t +{ + /* Add a mapping from lhs to rhs with a unique value if lhs is unknown. + * Return the rhs value as the result. + */ + hb_codepoint_t add (hb_codepoint_t lhs) + { + hb_codepoint_t rhs = forw_map[lhs]; + if (rhs == HB_MAP_VALUE_INVALID) + { + rhs = get_population (); + set (lhs, rhs); + } + return rhs; + } + + bool has (hb_codepoint_t lhs) const { return forw_map.has (lhs); } + + protected: + static int cmp_id (const void* a, const void* b) + { return (int)*(const hb_codepoint_t *)a - (int)*(const hb_codepoint_t *)b; } + + public: + /* Optional: after finished adding all mappings in a random order, + * reassign rhs to lhs so that they are in the same order. */ + void sort () + { + hb_codepoint_t count = get_population (); + hb_vector_t work; + work.resize (count); + + for (hb_codepoint_t rhs = 0; rhs < count; rhs++) + work[rhs] = back_map[rhs]; + + work.qsort (cmp_id); + + for (hb_codepoint_t rhs = 0; rhs < count; rhs++) + set (work[rhs], rhs); + } +}; + +#endif /* HB_BIMAP_HH */ diff --git a/src/hb-ot-cff-common.hh b/src/hb-ot-cff-common.hh index 919cb81..2de5a29 100644 --- a/src/hb-ot-cff-common.hh +++ b/src/hb-ot-cff-common.hh @@ -27,6 +27,7 @@ #define HB_OT_CFF_COMMON_HH #include "hb-open-type.hh" +#include "hb-bimap.hh" #include "hb-ot-layout-common.hh" #include "hb-cff-interp-dict-common.hh" #include "hb-subset-plan.hh" @@ -410,57 +411,6 @@ struct table_info_t unsigned int offSize; }; -/* used to remap font index or SID from fullset to subset. - * set to CFF_UNDEF_CODE if excluded from subset */ -struct remap_t : hb_vector_t -{ - void init () { SUPER::init (); } - - void fini () { SUPER::fini (); } - - bool reset (unsigned int size) - { - if (unlikely (!SUPER::resize (size))) - return false; - for (unsigned int i = 0; i < length; i++) - (*this)[i] = CFF_UNDEF_CODE; - count = 0; - return true; - } - - bool identity (unsigned int size) - { - if (unlikely (!SUPER::resize (size))) - return false; - unsigned int i; - for (i = 0; i < length; i++) - (*this)[i] = i; - count = i; - return true; - } - - bool excludes (hb_codepoint_t id) const - { return (id < length) && ((*this)[id] == CFF_UNDEF_CODE); } - - bool includes (hb_codepoint_t id) const - { return !excludes (id); } - - unsigned int add (unsigned int i) - { - if ((*this)[i] == CFF_UNDEF_CODE) - (*this)[i] = count++; - return (*this)[i]; - } - - hb_codepoint_t get_count () const { return count; } - - protected: - hb_codepoint_t count; - - private: - typedef hb_vector_t SUPER; -}; - template struct FDArray : CFFIndexOf { @@ -504,7 +454,7 @@ struct FDArray : CFFIndexOf unsigned int offSize_, const hb_vector_t &fontDicts, unsigned int fdCount, - const remap_t &fdmap, + const hb_inc_bimap_t &fdmap, OP_SERIALIZER& opszr, const hb_vector_t &privateInfos) { @@ -519,7 +469,7 @@ struct FDArray : CFFIndexOf unsigned int offset = 1; unsigned int fid = 0; for (unsigned i = 0; i < fontDicts.length; i++) - if (fdmap.includes (i)) + if (fdmap.has (i)) { if (unlikely (fid >= fdCount)) return_trace (false); CFFIndexOf::set_offset_at (fid++, offset); @@ -529,10 +479,10 @@ struct FDArray : CFFIndexOf /* serialize font dicts */ for (unsigned int i = 0; i < fontDicts.length; i++) - if (fdmap.includes (i)) + if (fdmap.has (i)) { FontDict *dict = c->start_embed (); - if (unlikely (!dict->serialize (c, fontDicts[i], opszr, privateInfos[fdmap[i]]))) + if (unlikely (!dict->serialize (c, fontDicts[i], opszr, privateInfos[fdmap.forward (i)]))) return_trace (false); } return_trace (true); @@ -543,12 +493,12 @@ struct FDArray : CFFIndexOf static unsigned int calculate_serialized_size (unsigned int &offSize_ /* OUT */, const hb_vector_t &fontDicts, unsigned int fdCount, - const remap_t &fdmap, + const hb_inc_bimap_t &fdmap, OP_SERIALIZER& opszr) { unsigned int dictsSize = 0; for (unsigned int i = 0; i < fontDicts.len; i++) - if (fdmap.includes (i)) + if (fdmap.has (i)) dictsSize += FontDict::calculate_serialized_size (fontDicts[i], opszr); offSize_ = calcOffSize (dictsSize); diff --git a/src/hb-ot-cff1-table.hh b/src/hb-ot-cff1-table.hh index 31d9d87..0dbd0ba 100644 --- a/src/hb-ot-cff1-table.hh +++ b/src/hb-ot-cff1-table.hh @@ -594,10 +594,10 @@ struct Charset struct CFF1StringIndex : CFF1Index { bool serialize (hb_serialize_context_t *c, const CFF1StringIndex &strings, - unsigned int offSize_, const remap_t &sidmap) + unsigned int offSize_, const hb_inc_bimap_t &sidmap) { TRACE_SERIALIZE (this); - if (unlikely ((strings.count == 0) || (sidmap.get_count () == 0))) + if (unlikely ((strings.count == 0) || (sidmap.get_population () == 0))) { if (unlikely (!c->extend_min (this->count))) return_trace (false); @@ -607,11 +607,11 @@ struct CFF1StringIndex : CFF1Index byte_str_array_t bytesArray; bytesArray.init (); - if (!bytesArray.resize (sidmap.get_count ())) + if (!bytesArray.resize (sidmap.get_population ())) return_trace (false); for (unsigned int i = 0; i < strings.count; i++) { - hb_codepoint_t j = sidmap[i]; + hb_codepoint_t j = sidmap.forward (i); if (j != CFF_UNDEF_CODE) bytesArray[j] = strings[i]; } @@ -622,19 +622,19 @@ struct CFF1StringIndex : CFF1Index } /* in parallel to above */ - unsigned int calculate_serialized_size (unsigned int &offSize_ /*OUT*/, const remap_t &sidmap) const + unsigned int calculate_serialized_size (unsigned int &offSize_ /*OUT*/, const hb_inc_bimap_t &sidmap) const { offSize_ = 0; - if ((count == 0) || (sidmap.get_count () == 0)) + if ((count == 0) || (sidmap.get_population () == 0)) return count.static_size; unsigned int dataSize = 0; for (unsigned int i = 0; i < count; i++) - if (sidmap[i] != CFF_UNDEF_CODE) + if (sidmap.forward (i) != CFF_UNDEF_CODE) dataSize += length_at (i); offSize_ = calcOffSize(dataSize); - return CFF1Index::calculate_serialized_size (offSize_, sidmap.get_count (), dataSize); + return CFF1Index::calculate_serialized_size (offSize_, sidmap.get_population (), dataSize); } }; diff --git a/src/hb-subset-cff-common.cc b/src/hb-subset-cff-common.cc index 54f6dc2..1da5c4a 100644 --- a/src/hb-subset-cff-common.cc +++ b/src/hb-subset-cff-common.cc @@ -54,7 +54,7 @@ hb_plan_subset_cff_fdselect (const hb_subset_plan_t *plan, unsigned int &subset_fdselect_size /* OUT */, unsigned int &subset_fdselect_format /* OUT */, hb_vector_t &fdselect_ranges /* OUT */, - remap_t &fdmap /* OUT */) + hb_inc_bimap_t &fdmap /* OUT */) { subset_fd_count = 0; subset_fdselect_size = 0; @@ -102,23 +102,19 @@ hb_plan_subset_cff_fdselect (const hb_subset_plan_t *plan, else { /* create a fdmap */ - if (!fdmap.reset (fdCount)) - { - hb_set_destroy (set); - return false; - } + fdmap.reset (); hb_codepoint_t fd = CFF_UNDEF_CODE; while (set->next (&fd)) fdmap.add (fd); hb_set_destroy (set); - if (unlikely (fdmap.get_count () != subset_fd_count)) + if (unlikely (fdmap.get_population () != subset_fd_count)) return false; } /* update each font dict index stored as "code" in fdselect_ranges */ for (unsigned int i = 0; i < fdselect_ranges.length; i++) - fdselect_ranges[i].code = fdmap[fdselect_ranges[i].code]; + fdselect_ranges[i].code = fdmap.forward (fdselect_ranges[i].code); } /* determine which FDSelect format is most compact */ diff --git a/src/hb-subset-cff-common.hh b/src/hb-subset-cff-common.hh index 921b025..5c95841 100644 --- a/src/hb-subset-cff-common.hh +++ b/src/hb-subset-cff-common.hh @@ -541,39 +541,29 @@ struct subr_subset_param_t bool drop_hints; }; -struct subr_remap_t : remap_t +struct subr_remap_t : hb_inc_bimap_t { void create (hb_set_t *closure) { /* create a remapping of subroutine numbers from old to new. * no optimization based on usage counts. fonttools doesn't appear doing that either. */ - reset (closure->get_max () + 1); - for (hb_codepoint_t old_num = 0; old_num < length; old_num++) - { - if (hb_set_has (closure, old_num)) - add (old_num); - } + + hb_codepoint_t old_num = HB_SET_VALUE_INVALID; + while (hb_set_next (closure, &old_num)) + add (old_num); - if (get_count () < 1240) + if (get_population () < 1240) bias = 107; - else if (get_count () < 33900) + else if (get_population () < 33900) bias = 1131; else bias = 32768; } - hb_codepoint_t operator[] (unsigned int old_num) const - { - if (old_num >= length) - return CFF_UNDEF_CODE; - else - return remap_t::operator[] (old_num); - } - int biased_num (unsigned int old_num) const { - hb_codepoint_t new_num = (*this)[old_num]; + hb_codepoint_t new_num = forward (old_num); return (int)new_num - bias; } @@ -581,15 +571,15 @@ struct subr_remap_t : remap_t int bias; }; -struct subr_remap_ts +struct subr_remaps_t { - subr_remap_ts () + subr_remaps_t () { global_remap.init (); local_remaps.init (); } - ~subr_remap_ts () { fini (); } + ~subr_remaps_t () { fini (); } void init (unsigned int fdCount) { @@ -765,13 +755,13 @@ struct subr_subsetter_t bool encode_subrs (const parsed_cs_str_vec_t &subrs, const subr_remap_t& remap, unsigned int fd, str_buff_vec_t &buffArray) const { - unsigned int count = remap.get_count (); + unsigned int count = remap.get_population (); if (unlikely (!buffArray.resize (count))) return false; for (unsigned int old_num = 0; old_num < subrs.length; old_num++) { - hb_codepoint_t new_num = remap[old_num]; + hb_codepoint_t new_num = remap.forward (old_num); if (new_num != CFF_UNDEF_CODE) { if (unlikely (!encode_str (subrs[old_num], fd, buffArray[new_num]))) @@ -1005,7 +995,7 @@ struct subr_subsetter_t parsed_cs_str_vec_t parsed_global_subrs; hb_vector_t parsed_local_subrs; - subr_remap_ts remaps; + subr_remaps_t remaps; private: typedef typename SUBRS::count_type subr_count_type; @@ -1021,7 +1011,7 @@ hb_plan_subset_cff_fdselect (const hb_subset_plan_t *plan, unsigned int &subset_fdselect_size /* OUT */, unsigned int &subset_fdselect_format /* OUT */, hb_vector_t &fdselect_ranges /* OUT */, - CFF::remap_t &fdmap /* OUT */); + hb_inc_bimap_t &fdmap /* OUT */); HB_INTERNAL bool hb_serialize_cff_fdselect (hb_serialize_context_t *c, diff --git a/src/hb-subset-cff1.cc b/src/hb-subset-cff1.cc index ab46808..8c0447d 100644 --- a/src/hb-subset-cff1.cc +++ b/src/hb-subset-cff1.cc @@ -31,6 +31,7 @@ #include "hb-open-type.hh" #include "hb-ot-cff1-table.hh" #include "hb-set.h" +#include "hb-bimap.hh" #include "hb-subset-cff1.hh" #include "hb-subset-plan.hh" #include "hb-subset-cff-common.hh" @@ -38,12 +39,12 @@ using namespace CFF; -struct remap_sid_t : remap_t +struct remap_sid_t : hb_inc_bimap_t { unsigned int add (unsigned int sid) { if ((sid != CFF_UNDEF_SID) && !is_std_std (sid)) - return offset_sid (remap_t::add (unoffset_sid (sid))); + return offset_sid (hb_inc_bimap_t::add (unoffset_sid (sid))); else return sid; } @@ -53,7 +54,7 @@ struct remap_sid_t : remap_t if (is_std_std (sid) || (sid == CFF_UNDEF_SID)) return sid; else - return offset_sid (remap_t::operator [] (unoffset_sid (sid))); + return offset_sid (forward (unoffset_sid (sid))); } static const unsigned int num_std_strings = 391; @@ -581,8 +582,7 @@ struct cff_subset_plan { bool collect_sids_in_dicts (const OT::cff1::accelerator_subset_t &acc) { - if (unlikely (!sidmap.reset (acc.stringIndex->count))) - return false; + sidmap.reset (); for (unsigned int i = 0; i < name_dict_values_t::ValCount; i++) { @@ -596,7 +596,7 @@ struct cff_subset_plan { if (acc.fdArray != &Null(CFF1FDArray)) for (unsigned int i = 0; i < orig_fdcount; i++) - if (fdmap.includes (i)) + if (fdmap.has (i)) (void)sidmap.add (acc.fontDicts[i].fontName); return true; @@ -682,7 +682,7 @@ struct cff_subset_plan { /* SIDs for name strings in dicts are added before glyph names so they fit in 16-bit int range */ if (unlikely (!collect_sids_in_dicts (acc))) return false; - if (unlikely (sidmap.get_count () > 0x8000)) /* assumption: a dict won't reference that many strings */ + if (unlikely (sidmap.get_population () > 0x8000)) /* assumption: a dict won't reference that many strings */ return false; if (subset_charset) offsets.charsetInfo.size = plan_subset_charset (acc, plan); @@ -739,7 +739,7 @@ struct cff_subset_plan { { subset_localsubrs[fd].init (); offsets.localSubrsInfos[fd].init (); - if (fdmap.includes (fd)) + if (fdmap.has (fd)) { if (!subr_subsetter.encode_localsubrs (fd, subset_localsubrs[fd])) return false; @@ -790,7 +790,7 @@ struct cff_subset_plan { cff1_font_dict_op_serializer_t fontSzr; unsigned int dictsSize = 0; for (unsigned int i = 0; i < acc.fontDicts.length; i++) - if (fdmap.includes (i)) + if (fdmap.has (i)) dictsSize += FontDict::calculate_serialized_size (acc.fontDicts[i], fontSzr); offsets.FDArrayInfo.offSize = calcOffSize (dictsSize); @@ -813,7 +813,7 @@ struct cff_subset_plan { offsets.privateDictInfo.offset = final_size; for (unsigned int i = 0; i < orig_fdcount; i++) { - if (fdmap.includes (i)) + if (fdmap.has (i)) { bool has_localsubrs = offsets.localSubrsInfos[i].size > 0; cff_private_dict_op_serializer_t privSzr (desubroutinize, plan->drop_hints); @@ -857,7 +857,7 @@ struct cff_subset_plan { /* font dict index remap table from fullset FDArray to subset FDArray. * set to CFF_UNDEF_CODE if excluded from subset */ - remap_t fdmap; + hb_inc_bimap_t fdmap; str_buff_vec_t subset_charstrings; str_buff_vec_t subset_globalsubrs; @@ -1034,11 +1034,11 @@ static inline bool _write_cff1 (const cff_subset_plan &plan, assert (plan.offsets.privateDictInfo.offset == (unsigned) (c.head - c.start)); for (unsigned int i = 0; i < acc.privateDicts.length; i++) { - if (plan.fdmap.includes (i)) + if (plan.fdmap.has (i)) { PrivateDict *pd = c.start_embed (); if (unlikely (pd == nullptr)) return false; - unsigned int priv_size = plan.fontdicts_mod[plan.fdmap[i]].privateDictInfo.size; + unsigned int priv_size = plan.fontdicts_mod[plan.fdmap.forward (i)].privateDictInfo.size; bool result; cff_private_dict_op_serializer_t privSzr (plan.desubroutinize, plan.drop_hints); /* N.B. local subrs immediately follows its corresponding private dict. i.e., subr offset == private dict size */ diff --git a/src/hb-subset-cff2.cc b/src/hb-subset-cff2.cc index afb23ae..5e36c7f 100644 --- a/src/hb-subset-cff2.cc +++ b/src/hb-subset-cff2.cc @@ -330,18 +330,15 @@ struct cff2_subset_plan { { subset_localsubrs[fd].init (); offsets.localSubrsInfos[fd].init (); - if (fdmap.includes (fd)) - { - if (!subr_subsetter.encode_localsubrs (fd, subset_localsubrs[fd])) - return false; - - unsigned int dataSize = subset_localsubrs[fd].total_size (); - if (dataSize > 0) - { - offsets.localSubrsInfos[fd].offset = final_size; - offsets.localSubrsInfos[fd].offSize = calcOffSize (dataSize); - offsets.localSubrsInfos[fd].size = CFF2Subrs::calculate_serialized_size (offsets.localSubrsInfos[fd].offSize, subset_localsubrs[fd].length, dataSize); - } + if (!subr_subsetter.encode_localsubrs (fd, subset_localsubrs[fd])) + return false; + + unsigned int dataSize = subset_localsubrs[fd].total_size (); + if (dataSize > 0) + { + offsets.localSubrsInfos[fd].offset = final_size; + offsets.localSubrsInfos[fd].offSize = calcOffSize (dataSize); + offsets.localSubrsInfos[fd].size = CFF2Subrs::calculate_serialized_size (offsets.localSubrsInfos[fd].offSize, subset_localsubrs[fd].length, dataSize); } } } @@ -382,7 +379,7 @@ struct cff2_subset_plan { cff_font_dict_op_serializer_t fontSzr; unsigned int dictsSize = 0; for (unsigned int i = 0; i < acc.fontDicts.length; i++) - if (fdmap.includes (i)) + if (fdmap.has (i)) dictsSize += FontDict::calculate_serialized_size (acc.fontDicts[i], fontSzr); offsets.FDArrayInfo.offSize = calcOffSize (dictsSize); @@ -401,7 +398,7 @@ struct cff2_subset_plan { offsets.privateDictsOffset = final_size; for (unsigned int i = 0; i < orig_fdcount; i++) { - if (fdmap.includes (i)) + if (fdmap.has (i)) { bool has_localsubrs = offsets.localSubrsInfos[i].size > 0; cff_private_dict_op_serializer_t privSzr (desubroutinize, drop_hints); @@ -431,7 +428,7 @@ struct cff2_subset_plan { unsigned int subset_fdselect_format; hb_vector_t subset_fdselect_ranges; - remap_t fdmap; + hb_inc_bimap_t fdmap; str_buff_vec_t subset_charstrings; str_buff_vec_t subset_globalsubrs; @@ -541,11 +538,11 @@ static inline bool _write_cff2 (const cff2_subset_plan &plan, assert (plan.offsets.privateDictsOffset == (unsigned) (c.head - c.start)); for (unsigned int i = 0; i < acc.privateDicts.length; i++) { - if (plan.fdmap.includes (i)) + if (plan.fdmap.has (i)) { PrivateDict *pd = c.start_embed (); if (unlikely (pd == nullptr)) return false; - unsigned int priv_size = plan.privateDictInfos[plan.fdmap[i]].size; + unsigned int priv_size = plan.privateDictInfos[plan.fdmap.forward (i)].size; bool result; cff_private_dict_op_serializer_t privSzr (plan.desubroutinize, plan.drop_hints); /* N.B. local subrs immediately follows its corresponding private dict. i.e., subr offset == private dict size */ -- 2.7.4