[ucd] Save a few more bytes
authorBehdad Esfahbod <behdad@behdad.org>
Mon, 24 Jun 2019 00:14:27 +0000 (17:14 -0700)
committerBehdad Esfahbod <behdad@behdad.org>
Mon, 24 Jun 2019 18:58:36 +0000 (11:58 -0700)
Part of https://github.com/harfbuzz/harfbuzz/issues/1652

src/gen-ucd-table.py
src/hb-ucd-table.hh
src/hb-ucd.cc

index dd3a546..7410dd3 100755 (executable)
@@ -34,15 +34,16 @@ ce = {i for i,u in enumerate(ucd) if u['Comp_Ex'] == 'Y'}
 
 assert not any(v for v in dm.values() if len(v) not in (1,2))
 dm1 = sorted(set(v for v in dm.values() if len(v) == 1))
-dm1_u16_array = ['0x%04Xu' % v for v in dm1 if v[0] <= 0xFFFF]
-dm1_u32_array = ['0x%04Xu' % v for v in dm1 if v[0] >  0xFFFF]
+assert all((v[0] >> 16) in (0,2) for v in dm1)
+dm1_p0_array = ['0x%04Xu' % (v[0] & 0xFFFF) for v in dm1 if (v[0] >> 16) == 0]
+dm1_p2_array = ['0x%04Xu' % (v[0] & 0xFFFF) for v in dm1 if (v[0] >> 16) == 2]
 dm1_order = {v:i+1 for i,v in enumerate(dm1)}
 dm2 = sorted((v, i) for i,v in dm.items() if len(v) == 2)
 dm2 = [("HB_CODEPOINT_ENCODE3 (0x%04Xu, 0x%04Xu, 0x%04Xu)" %
         (v+(i if i not in ce and not ccc[i] else 0,)), v)
        for v,i in dm2]
 dm2_array = [s for s,v in dm2]
-l = 1 + len(dm1_u16_array) + len(dm1_u32_array)
+l = 1 + len(dm1_p0_array) + len(dm1_p2_array)
 dm2_order = {v[1]:i+l for i,v in enumerate(dm2)}
 dm_order = {None: 0}
 dm_order.update(dm1_order)
@@ -93,8 +94,8 @@ print()
 
 code = packTab.Code('_hb_ucd')
 sc_array, _ = code.addArray('hb_script_t', 'sc_map', sc_array)
-dm1_16_array, _ = code.addArray('uint16_t', 'dm1_u16_map', dm1_u16_array)
-dm1_32_array, _ = code.addArray('uint32_t', 'dm1_u32_map', dm1_u32_array)
+dm1_p0_array, _ = code.addArray('uint16_t', 'dm1_p0_map', dm1_p0_array)
+dm1_p2_array, _ = code.addArray('uint16_t', 'dm1_p2_map', dm1_p2_array)
 dm2_array, _ = code.addArray('uint64_t', 'dm2_map', dm2_array)
 code.print_c(linkage='static inline')
 
index 0145080..5cd503b 100644 (file)
@@ -86,7 +86,7 @@ _hb_ucd_sc_map[138] =
                    HB_SCRIPT_TANGUT,                   HB_SCRIPT_NEWA,
 };
 static const uint16_t
-_hb_ucd_dm1_u16_map[825] =
+_hb_ucd_dm1_p0_map[825] =
 {
    0x003Bu, 0x004Bu, 0x0060u, 0x00B4u, 0x00B7u, 0x00C5u, 0x02B9u, 0x0300u,
    0x0301u, 0x0313u, 0x0385u, 0x0386u, 0x0388u, 0x0389u, 0x038Au, 0x038Cu,
@@ -193,23 +193,23 @@ _hb_ucd_dm1_u16_map[825] =
    0x9EFEu, 0x9F05u, 0x9F0Fu, 0x9F16u, 0x9F3Bu, 0x9F43u, 0x9F8Du, 0x9F8Eu,
    0x9F9Cu,
 };
-static const uint32_t
-_hb_ucd_dm1_u32_map[110] =
+static const uint16_t
+_hb_ucd_dm1_p2_map[110] =
 {
-  0x20122u,0x2051Cu,0x20525u,0x2054Bu,0x2063Au,0x20804u,0x208DEu,0x20A2Cu,
-  0x20B63u,0x214E4u,0x216A8u,0x216EAu,0x219C8u,0x21B18u,0x21D0Bu,0x21DE4u,
-  0x21DE6u,0x22183u,0x2219Fu,0x22331u,0x226D4u,0x22844u,0x2284Au,0x22B0Cu,
-  0x22BF1u,0x2300Au,0x232B8u,0x2335Fu,0x23393u,0x2339Cu,0x233C3u,0x233D5u,
-  0x2346Du,0x236A3u,0x238A7u,0x23A8Du,0x23AFAu,0x23CBCu,0x23D1Eu,0x23ED1u,
-  0x23F5Eu,0x23F8Eu,0x24263u,0x242EEu,0x243ABu,0x24608u,0x24735u,0x24814u,
-  0x24C36u,0x24C92u,0x24FA1u,0x24FB8u,0x25044u,0x250F2u,0x250F3u,0x25119u,
-  0x25133u,0x25249u,0x2541Du,0x25626u,0x2569Au,0x256C5u,0x2597Cu,0x25AA7u,
-  0x25BABu,0x25C80u,0x25CD0u,0x25F86u,0x261DAu,0x26228u,0x26247u,0x262D9u,
-  0x2633Eu,0x264DAu,0x26523u,0x265A8u,0x267A7u,0x267B5u,0x26B3Cu,0x26C36u,
-  0x26CD5u,0x26D6Bu,0x26F2Cu,0x26FB1u,0x270D2u,0x273CAu,0x27667u,0x278AEu,
-  0x27966u,0x27CA8u,0x27ED3u,0x27F2Fu,0x285D2u,0x285EDu,0x2872Eu,0x28BFAu,
-  0x28D77u,0x29145u,0x291DFu,0x2921Au,0x2940Au,0x29496u,0x295B6u,0x29B30u,
-  0x2A0CEu,0x2A105u,0x2A20Eu,0x2A291u,0x2A392u,0x2A600u,
+   0x0122u, 0x051Cu, 0x0525u, 0x054Bu, 0x063Au, 0x0804u, 0x08DEu, 0x0A2Cu,
+   0x0B63u, 0x14E4u, 0x16A8u, 0x16EAu, 0x19C8u, 0x1B18u, 0x1D0Bu, 0x1DE4u,
+   0x1DE6u, 0x2183u, 0x219Fu, 0x2331u, 0x26D4u, 0x2844u, 0x284Au, 0x2B0Cu,
+   0x2BF1u, 0x300Au, 0x32B8u, 0x335Fu, 0x3393u, 0x339Cu, 0x33C3u, 0x33D5u,
+   0x346Du, 0x36A3u, 0x38A7u, 0x3A8Du, 0x3AFAu, 0x3CBCu, 0x3D1Eu, 0x3ED1u,
+   0x3F5Eu, 0x3F8Eu, 0x4263u, 0x42EEu, 0x43ABu, 0x4608u, 0x4735u, 0x4814u,
+   0x4C36u, 0x4C92u, 0x4FA1u, 0x4FB8u, 0x5044u, 0x50F2u, 0x50F3u, 0x5119u,
+   0x5133u, 0x5249u, 0x541Du, 0x5626u, 0x569Au, 0x56C5u, 0x597Cu, 0x5AA7u,
+   0x5BABu, 0x5C80u, 0x5CD0u, 0x5F86u, 0x61DAu, 0x6228u, 0x6247u, 0x62D9u,
+   0x633Eu, 0x64DAu, 0x6523u, 0x65A8u, 0x67A7u, 0x67B5u, 0x6B3Cu, 0x6C36u,
+   0x6CD5u, 0x6D6Bu, 0x6F2Cu, 0x6FB1u, 0x70D2u, 0x73CAu, 0x7667u, 0x78AEu,
+   0x7966u, 0x7CA8u, 0x7ED3u, 0x7F2Fu, 0x85D2u, 0x85EDu, 0x872Eu, 0x8BFAu,
+   0x8D77u, 0x9145u, 0x91DFu, 0x921Au, 0x940Au, 0x9496u, 0x95B6u, 0x9B30u,
+   0xA0CEu, 0xA105u, 0xA20Eu, 0xA291u, 0xA392u, 0xA600u,
 };
 static const uint64_t
 _hb_ucd_dm2_map[1025] =
index 4d35017..4bdd4f4 100644 (file)
@@ -148,19 +148,19 @@ hb_ucd_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
   if (likely (!i)) return false;
   i--;
 
-  if (i < ARRAY_LENGTH (_hb_ucd_dm1_u16_map) + ARRAY_LENGTH (_hb_ucd_dm1_u32_map))
+  if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map))
   {
-    if (i < ARRAY_LENGTH (_hb_ucd_dm1_u16_map))
-      *a = _hb_ucd_dm1_u16_map[i];
+    if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map))
+      *a = _hb_ucd_dm1_p0_map[i];
     else
     {
-      i -= ARRAY_LENGTH (_hb_ucd_dm1_u16_map);
-      *a = _hb_ucd_dm1_u32_map[i];
+      i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map);
+      *a = 0x20000 | _hb_ucd_dm1_p2_map[i];
     }
     *b = 0;
     return true;
   }
-  i -= ARRAY_LENGTH (_hb_ucd_dm1_u16_map) + ARRAY_LENGTH (_hb_ucd_dm1_u32_map);
+  i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map);
 
   uint64_t v = _hb_ucd_dm2_map[i];
   *a = HB_CODEPOINT_DECODE3_1 (v);