2 * Copyright © 2012 Google, Inc.
4 * This is part of HarfBuzz, a text shaping library.
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 * Google Author(s): Behdad Esfahbod
27 #ifndef HB_SET_PRIVATE_HH
28 #define HB_SET_PRIVATE_HH
30 #include "hb-private.hh"
31 #include "hb-object-private.hh"
35 * The set digests here implement various "filters" that support
36 * "approximate member query". Conceptually these are like Bloom
37 * Filter and Quotient Filter, however, much smaller, faster, and
38 * designed to fit the requirements of our uses for glyph coverage
39 * queries. As a result, our filters have much higher.
42 template <typename mask_t, unsigned int shift>
43 struct hb_set_digest_lowest_bits_t
47 static const unsigned int mask_bytes = sizeof (mask_t);
48 static const unsigned int mask_bits = sizeof (mask_t) * 8;
49 static const unsigned int num_bits = 0
50 + (mask_bytes >= 1 ? 3 : 0)
51 + (mask_bytes >= 2 ? 1 : 0)
52 + (mask_bytes >= 4 ? 1 : 0)
53 + (mask_bytes >= 8 ? 1 : 0)
54 + (mask_bytes >= 16? 1 : 0)
57 ASSERT_STATIC (shift < sizeof (hb_codepoint_t) * 8);
58 ASSERT_STATIC (shift + num_bits <= sizeof (hb_codepoint_t) * 8);
60 inline void init (void) {
64 inline void add (hb_codepoint_t g) {
68 inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
69 if ((b >> shift) - (a >> shift) >= mask_bits - 1)
72 mask_t ma = mask_for (a);
73 mask_t mb = mask_for (b);
74 mask |= mb + (mb - ma) - (mb < ma);
78 inline bool may_have (hb_codepoint_t g) const {
79 return !!(mask & mask_for (g));
84 static inline mask_t mask_for (hb_codepoint_t g) {
85 return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1));
90 template <typename head_t, typename tail_t>
91 struct hb_set_digest_combiner_t
95 inline void init (void) {
100 inline void add (hb_codepoint_t g) {
105 inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
106 head.add_range (a, b);
107 tail.add_range (a, b);
110 inline bool may_have (hb_codepoint_t g) const {
111 return head.may_have (g) && tail.may_have (g);
123 * This is a combination of digests that performs "best".
124 * There is not much science to this: it's a result of intuition
127 typedef hb_set_digest_combiner_t
129 hb_set_digest_lowest_bits_t<unsigned long, 4>,
130 hb_set_digest_combiner_t
132 hb_set_digest_lowest_bits_t<unsigned long, 0>,
133 hb_set_digest_lowest_bits_t<unsigned long, 9>
144 /* TODO Make this faster and memmory efficient. */
148 hb_object_header_t header;
152 inline void init (void) {
156 inline void fini (void) {
158 inline void clear (void) {
159 if (unlikely (hb_object_is_inert (this)))
162 memset (elts, 0, sizeof elts);
164 inline bool is_empty (void) const {
165 for (unsigned int i = 0; i < ARRAY_LENGTH (elts); i++)
170 inline void add (hb_codepoint_t g)
172 if (unlikely (in_error)) return;
173 if (unlikely (g == INVALID)) return;
174 if (unlikely (g > MAX_G)) return;
177 inline void add_range (hb_codepoint_t a, hb_codepoint_t b)
179 if (unlikely (in_error)) return;
181 for (unsigned int i = a; i < b + 1; i++)
184 inline void del (hb_codepoint_t g)
186 if (unlikely (in_error)) return;
187 if (unlikely (g > MAX_G)) return;
188 elt (g) &= ~mask (g);
190 inline void del_range (hb_codepoint_t a, hb_codepoint_t b)
192 if (unlikely (in_error)) return;
194 for (unsigned int i = a; i < b + 1; i++)
197 inline bool has (hb_codepoint_t g) const
199 if (unlikely (g > MAX_G)) return false;
200 return !!(elt (g) & mask (g));
202 inline bool intersects (hb_codepoint_t first,
203 hb_codepoint_t last) const
205 if (unlikely (first > MAX_G)) return false;
206 if (unlikely (last > MAX_G)) last = MAX_G;
207 unsigned int end = last + 1;
208 for (hb_codepoint_t i = first; i < end; i++)
213 inline bool is_equal (const hb_set_t *other) const
215 for (unsigned int i = 0; i < ELTS; i++)
216 if (elts[i] != other->elts[i])
220 inline void set (const hb_set_t *other)
222 if (unlikely (in_error)) return;
223 for (unsigned int i = 0; i < ELTS; i++)
224 elts[i] = other->elts[i];
226 inline void union_ (const hb_set_t *other)
228 if (unlikely (in_error)) return;
229 for (unsigned int i = 0; i < ELTS; i++)
230 elts[i] |= other->elts[i];
232 inline void intersect (const hb_set_t *other)
234 if (unlikely (in_error)) return;
235 for (unsigned int i = 0; i < ELTS; i++)
236 elts[i] &= other->elts[i];
238 inline void subtract (const hb_set_t *other)
240 if (unlikely (in_error)) return;
241 for (unsigned int i = 0; i < ELTS; i++)
242 elts[i] &= ~other->elts[i];
244 inline void symmetric_difference (const hb_set_t *other)
246 if (unlikely (in_error)) return;
247 for (unsigned int i = 0; i < ELTS; i++)
248 elts[i] ^= other->elts[i];
250 inline void invert (void)
252 if (unlikely (in_error)) return;
253 for (unsigned int i = 0; i < ELTS; i++)
256 inline bool next (hb_codepoint_t *codepoint) const
258 if (unlikely (*codepoint == INVALID)) {
259 hb_codepoint_t i = get_min ();
264 *codepoint = INVALID;
268 for (hb_codepoint_t i = *codepoint + 1; i < MAX_G + 1; i++)
273 *codepoint = INVALID;
276 inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const
283 *last = *first = INVALID;
288 while (next (&i) && i == *last + 1)
294 inline unsigned int get_population (void) const
296 unsigned int count = 0;
297 for (unsigned int i = 0; i < ELTS; i++)
298 count += _hb_popcount32 (elts[i]);
301 inline hb_codepoint_t get_min (void) const
303 for (unsigned int i = 0; i < ELTS; i++)
305 for (unsigned int j = 0; j < BITS; j++)
306 if (elts[i] & (1 << j))
310 inline hb_codepoint_t get_max (void) const
312 for (unsigned int i = ELTS; i; i--)
314 for (unsigned int j = BITS; j; j--)
315 if (elts[i - 1] & (1 << (j - 1)))
316 return (i - 1) * BITS + (j - 1);
320 typedef uint32_t elt_t;
321 static const unsigned int MAX_G = 65536 - 1; /* XXX Fix this... */
322 static const unsigned int SHIFT = 5;
323 static const unsigned int BITS = (1 << SHIFT);
324 static const unsigned int MASK = BITS - 1;
325 static const unsigned int ELTS = (MAX_G + 1 + (BITS - 1)) / BITS;
326 static const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID;
328 elt_t &elt (hb_codepoint_t g) { return elts[g >> SHIFT]; }
329 elt_t elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; }
330 elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); }
332 elt_t elts[ELTS]; /* XXX 8kb */
334 ASSERT_STATIC (sizeof (elt_t) * 8 == BITS);
335 ASSERT_STATIC (sizeof (elt_t) * 8 * ELTS > MAX_G);
340 #endif /* HB_SET_PRIVATE_HH */