From 1cdd0fa60d9a6aaddc20e646d294d642e2db9be4 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Wed, 26 Jun 2019 14:49:15 -0700 Subject: [PATCH] [emoji] Port generator to packtab --- src/gen-emoji-table.py | 18 ++++-- src/hb-unicode-emoji-table.hh | 126 ++++++++++++++++-------------------------- src/hb-unicode.cc | 5 +- 3 files changed, 60 insertions(+), 89 deletions(-) diff --git a/src/gen-emoji-table.py b/src/gen-emoji-table.py index 9afe747..22da228 100755 --- a/src/gen-emoji-table.py +++ b/src/gen-emoji-table.py @@ -4,6 +4,7 @@ from __future__ import print_function, division, absolute_import import sys import os.path from collections import OrderedDict +import packTab if len (sys.argv) != 2: print("usage: ./gen-emoji-table.py emoji-data.txt", file=sys.stderr) @@ -54,12 +55,17 @@ print () for typ,s in ranges.items(): if typ != "Extended_Pictographic": continue - print() - print("static const struct hb_unicode_range_t _hb_unicode_emoji_%s_table[] =" % typ) - print("{") - for pair in sorted(s): - print(" {0x%04X, 0x%04X}," % pair) - print("};") + + arr = dict() + for start,end in s: + for i in range(start,end): + arr[i] = 1 + + sol = packTab.pack_table(arr, 0, compression=3) + code = packTab.Code('_hb_emoji') + sol.genCode(code, 'is_'+typ) + code.print_c(linkage='static inline') + print() print () print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */") diff --git a/src/hb-unicode-emoji-table.hh b/src/hb-unicode-emoji-table.hh index aa297fc..1ff79c9 100644 --- a/src/hb-unicode-emoji-table.hh +++ b/src/hb-unicode-emoji-table.hh @@ -23,88 +23,56 @@ #include "hb-unicode.hh" - -static const struct hb_unicode_range_t _hb_unicode_emoji_Extended_Pictographic_table[] = +static const uint8_t +_hb_emoji_u8[448] = { - {0x00A9, 0x00A9}, - {0x00AE, 0x00AE}, - {0x203C, 0x203C}, - {0x2049, 0x2049}, - {0x2122, 0x2122}, - {0x2139, 0x2139}, - {0x2194, 0x2199}, - {0x21A9, 0x21AA}, - {0x231A, 0x231B}, - {0x2328, 0x2328}, - {0x2388, 0x2388}, - {0x23CF, 0x23CF}, - {0x23E9, 0x23F3}, - {0x23F8, 0x23FA}, - {0x24C2, 0x24C2}, - {0x25AA, 0x25AB}, - {0x25B6, 0x25B6}, - {0x25C0, 0x25C0}, - {0x25FB, 0x25FE}, - {0x2600, 0x2605}, - {0x2607, 0x2612}, - {0x2614, 0x2685}, - {0x2690, 0x2705}, - {0x2708, 0x2712}, - {0x2714, 0x2714}, - {0x2716, 0x2716}, - {0x271D, 0x271D}, - {0x2721, 0x2721}, - {0x2728, 0x2728}, - {0x2733, 0x2734}, - {0x2744, 0x2744}, - {0x2747, 0x2747}, - {0x274C, 0x274C}, - {0x274E, 0x274E}, - {0x2753, 0x2755}, - {0x2757, 0x2757}, - {0x2763, 0x2767}, - {0x2795, 0x2797}, - {0x27A1, 0x27A1}, - {0x27B0, 0x27B0}, - {0x27BF, 0x27BF}, - {0x2934, 0x2935}, - {0x2B05, 0x2B07}, - {0x2B1B, 0x2B1C}, - {0x2B50, 0x2B50}, - {0x2B55, 0x2B55}, - {0x3030, 0x3030}, - {0x303D, 0x303D}, - {0x3297, 0x3297}, - {0x3299, 0x3299}, - {0x1F000, 0x1F0FF}, - {0x1F10D, 0x1F10F}, - {0x1F12F, 0x1F12F}, - {0x1F16C, 0x1F171}, - {0x1F17E, 0x1F17F}, - {0x1F18E, 0x1F18E}, - {0x1F191, 0x1F19A}, - {0x1F1AD, 0x1F1E5}, - {0x1F201, 0x1F20F}, - {0x1F21A, 0x1F21A}, - {0x1F22F, 0x1F22F}, - {0x1F232, 0x1F23A}, - {0x1F23C, 0x1F23F}, - {0x1F249, 0x1F3FA}, - {0x1F400, 0x1F53D}, - {0x1F546, 0x1F64F}, - {0x1F680, 0x1F6FF}, - {0x1F774, 0x1F77F}, - {0x1F7D5, 0x1F7FF}, - {0x1F80C, 0x1F80F}, - {0x1F848, 0x1F84F}, - {0x1F85A, 0x1F85F}, - {0x1F888, 0x1F88F}, - {0x1F8AE, 0x1F8FF}, - {0x1F90C, 0x1F93A}, - {0x1F93C, 0x1F945}, - {0x1F947, 0x1FFFD}, + 0, 0, 0, 0, 33, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 84,118, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 3, + 0, 0, 0, 0, 0, 0, 4, 5, 6, 7, 8, 7, 9, 10, 11, 0, + 0, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, + 7, 7, 7, 14, 15, 16, 17, 18, 19, 20, 7, 7, 7, 7, 7, 21, + 7, 7, 7, 7, 22, 23, 7, 7, 7, 24, 7, 14, 0, 25, 0, 26, + 27, 28, 29, 14, 30, 31, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 22, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,240, 1, 0, 2, 0, 0, + 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,254, 7, 3, + 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, + 159,255,243,255,255,255,255,255,255,255,255,255,255,255,255,255, + 31, 0,255,255,255,255,255,255, 31,255, 3, 0, 0, 0, 8, 0, + 0, 0, 24, 0,120, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 16, 0, 96, 0, 0, 8, 0, 0, 0, 0, + 255,255,255,255,255,255,255,127, 0, 96, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0,240, 1, 64, 0, 0,254, 3, 0,224,255,255, + 255,255,255,255, 31, 0, 0, 0,254,127, 0, 0, 0, 0,252,115, + 0,254,255,255,255,255,255,255,255,255,255,255,255,255,255, 3, + 255,255,255,255,255,255,255, 31,192,255,255,255,255,255,255,255, + 255,127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,240,127, + 0, 0,224,255,255,255,255,127, 0,112, 0, 0, 0, 0, 0, 0, + 0,127, 0,124, 0, 0, 0, 0, 0,127, 0, 0, 0,192,255,255, + 0,240,255,255,255,255,255,243,159,255,255,255,255,255,255,255, }; +static inline unsigned +_hb_emoji_b4 (const uint8_t* a, unsigned i) +{ + return (a[i>>1]>>((i&1u)<<2))&15u; +} +static inline unsigned +_hb_emoji_b1 (const uint8_t* a, unsigned i) +{ + return (a[i>>3]>>((i&7u)<<0))&1u; +} +static inline uint_fast8_t +_hb_emoji_is_Extended_Pictographic (unsigned u) +{ + return u<131069u?_hb_emoji_b1(192+_hb_emoji_u8,((_hb_emoji_u8[64+(((_hb_emoji_b4(_hb_emoji_u8,u>>6>>4))<<4)+((u>>6)&15u))])<<6)+((u)&63u)):0; +} + + #endif /* HB_UNICODE_EMOJI_TABLE_HH */ /* == End of generated table == */ diff --git a/src/hb-unicode.cc b/src/hb-unicode.cc index eb9451c..56f1c62 100644 --- a/src/hb-unicode.cc +++ b/src/hb-unicode.cc @@ -577,9 +577,6 @@ _hb_modified_combining_class[256] = bool _hb_unicode_is_emoji_Extended_Pictographic (hb_codepoint_t cp) { - return hb_bsearch (&cp, _hb_unicode_emoji_Extended_Pictographic_table, - ARRAY_LENGTH (_hb_unicode_emoji_Extended_Pictographic_table), - sizeof (hb_unicode_range_t), - hb_unicode_range_t::cmp); + return _hb_emoji_is_Extended_Pictographic (cp); } #endif -- 2.7.4