1 /* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*-
2 * GObject introspection: Typelib hashing
4 * Copyright (C) 2010 Red Hat, Inc.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
23 #include <glib-object.h>
26 #include "cmph/cmph.h"
27 #include "gitypelib-internal.h"
29 #define ALIGN_VALUE(this, boundary) \
30 (( ((unsigned long)(this)) + (((unsigned long)(boundary)) -1)) & (~(((unsigned long)(boundary))-1)))
33 * String hashing in the typelib. We have a set of static (fixed) strings,
34 * and given one, we need to find its index number. This problem is perfect
35 * hashing: http://en.wikipedia.org/wiki/Perfect_hashing
37 * I chose CMPH (http://cmph.sourceforge.net/) as it seemed high
38 * quality, well documented, and easy to embed.
40 * CMPH provides a number of algorithms; I chose BDZ, because while CHD
41 * appears to be the "best", the simplicitly of BDZ appealed, and really,
42 * we're only talking about thousands of strings here, not millions, so
43 * a few microseconds is no big deal.
45 * In memory, the format is:
47 * MPH (mph_size bytes)
48 * (padding for alignment to uint32 if necessary)
49 * INDEX (array of guint16)
51 * Because BDZ is not order preserving, we need a lookaside table which
52 * maps the hash value into the directory index.
55 struct _GITypelibHashBuilder {
60 guint32 dirmap_offset;
64 GITypelibHashBuilder *
65 _gi_typelib_hash_builder_new (void)
67 GITypelibHashBuilder *builder = g_slice_new0 (GITypelibHashBuilder);
69 builder->strings = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
74 _gi_typelib_hash_builder_add_string (GITypelibHashBuilder *builder,
78 g_return_if_fail (builder->c == NULL);
79 g_hash_table_insert (builder->strings, g_strdup (str), GUINT_TO_POINTER ((guint) value));
83 _gi_typelib_hash_builder_prepare (GITypelibHashBuilder *builder)
86 GHashTableIter hashiter;
88 cmph_io_adapter_t *io;
89 cmph_config_t *config;
94 if (builder->prepared)
95 return builder->buildable;
96 g_assert (builder->c == NULL);
98 num_elts = g_hash_table_size (builder->strings);
99 g_assert (num_elts <= 65536);
101 strs = (char**) g_new (char *, num_elts + 1);
104 g_hash_table_iter_init (&hashiter, builder->strings);
105 while (g_hash_table_iter_next (&hashiter, &key, &value))
107 const char *str = key;
109 strs[i++] = g_strdup (str);
113 io = cmph_io_vector_adapter (strs, num_elts);
114 config = cmph_config_new (io);
115 cmph_config_set_algo (config, CMPH_BDZ);
117 builder->c = cmph_new (config);
118 builder->prepared = TRUE;
121 builder->buildable = FALSE;
124 builder->buildable = TRUE;
125 g_assert (cmph_size (builder->c) == num_elts);
127 /* Pack a size counter at front */
128 offset = sizeof(guint32) + cmph_packed_size (builder->c);
129 builder->dirmap_offset = ALIGN_VALUE (offset, 4);
130 builder->packed_size = builder->dirmap_offset + (num_elts * sizeof(guint16));
132 return builder->buildable;
136 _gi_typelib_hash_builder_get_buffer_size (GITypelibHashBuilder *builder)
138 g_return_val_if_fail (builder != NULL, 0);
139 g_return_val_if_fail (builder->prepared, 0);
140 g_return_val_if_fail (builder->buildable, 0 );
142 return builder->packed_size;
146 _gi_typelib_hash_builder_pack (GITypelibHashBuilder *builder, guint8* mem, guint32 len)
149 GHashTableIter hashiter;
154 g_return_if_fail (builder != NULL);
155 g_return_if_fail (builder->prepared);
156 g_return_if_fail (builder->buildable);
158 g_assert (len >= builder->packed_size);
159 g_assert ((((unsigned long)mem) & 0x3) == 0);
161 *((guint32*) mem) = builder->dirmap_offset;
162 packed_mem = (guint8*)(mem + sizeof(guint32));
163 cmph_pack (builder->c, packed_mem);
165 table = (guint16*) (mem + builder->dirmap_offset);
167 num_elts = g_hash_table_size (builder->strings);
168 g_hash_table_iter_init (&hashiter, builder->strings);
169 while (g_hash_table_iter_next (&hashiter, &key, &value))
171 const char *str = key;
172 guint16 strval = (guint16)GPOINTER_TO_UINT(value);
175 hashv = cmph_search_packed (packed_mem, str, strlen (str));
176 g_assert (hashv >= 0 && hashv < num_elts);
177 table[hashv] = strval;
182 _gi_typelib_hash_builder_destroy (GITypelibHashBuilder *builder)
186 cmph_destroy (builder->c);
189 g_hash_table_destroy (builder->strings);
190 g_slice_free (GITypelibHashBuilder, builder);
194 _gi_typelib_hash_search (guint8* memory, const char *str)
198 guint32 dirmap_offset;
201 g_assert ((((unsigned long)memory) & 0x3) == 0);
202 mph = ((guint32*)memory)+1;
204 offset = cmph_search_packed (mph, str, strlen (str));
206 dirmap_offset = *((guint32*)memory);
207 table = (guint16*) (memory + dirmap_offset);
209 return table[offset];