4 #include <rpm/rpmstring.h>
5 #include <rpm/rpmstrpool.h>
8 #define STRDATA_CHUNKS 1024
9 #define STRDATA_CHUNK 65536
10 #define STROFFS_CHUNK 2048
11 /* XXX this is ridiculously small... */
12 #define STRHASH_INITSIZE 1024
14 static int pool_debug = 0;
16 typedef struct poolHash_s * poolHash;
17 typedef struct poolHashBucket_s poolHashBucket;
19 struct poolHashBucket_s {
25 poolHashBucket * buckets;
30 char ** offs; /* pointers into data area */
31 rpmsid offs_size; /* largest offset index */;
32 rpmsid offs_alloced; /* offsets allocation size */
34 char ** chunks; /* memory chunks for storing the strings */
35 size_t chunks_size; /* current chunk */
36 size_t chunks_allocated; /* allocated size of the chunks array */
37 size_t chunk_allocated; /* size of the current chunk */
39 poolHash hash; /* string -> sid hash table */
40 int frozen; /* are new id additions allowed? */
41 int nrefs; /* refcount */
44 /* calculate hash and string length on at once */
45 static inline unsigned int rstrlenhash(const char * str, size_t * len)
47 /* Jenkins One-at-a-time hash */
48 unsigned int hash = 0xe4721b68;
67 static inline unsigned int rstrnlenhash(const char * str, size_t n, size_t * len)
69 /* Jenkins One-at-a-time hash */
70 unsigned int hash = 0xe4721b68;
73 while (*s != '\0' && n-- > 0) {
89 static inline unsigned int rstrnhash(const char * string, size_t n)
91 return rstrnlenhash(string, n, NULL);
94 unsigned int rstrhash(const char * string)
96 return rstrlenhash(string, NULL);
99 static inline unsigned int hashbucket(unsigned int hash, unsigned int number)
101 return hash + number*number;
104 static poolHash poolHashCreate(int numBuckets)
108 ht = xmalloc(sizeof(*ht));
109 ht->numBuckets = numBuckets;
110 ht->buckets = xcalloc(numBuckets, sizeof(*ht->buckets));
115 static void poolHashResize(rpmstrPool pool, int numBuckets)
117 poolHash ht = pool->hash;
118 poolHashBucket * buckets = xcalloc(numBuckets, sizeof(*ht->buckets));
120 for (int i=0; i<ht->numBuckets; i++) {
121 if (!ht->buckets[i].keyid) continue;
122 unsigned int keyHash = rstrhash(rpmstrPoolStr(pool, ht->buckets[i].keyid));
123 for (unsigned int j=0;;j++) {
124 unsigned int hash = hashbucket(keyHash, j) % numBuckets;
125 if (!buckets[hash].keyid) {
126 buckets[hash].keyid = ht->buckets[i].keyid;
131 free((void *)(ht->buckets));
132 ht->buckets = buckets;
133 ht->numBuckets = numBuckets;
136 static void poolHashAddHEntry(rpmstrPool pool, const char * key, unsigned int keyHash, rpmsid keyid)
138 poolHash ht = pool->hash;
140 /* keep load factor inbetween 0.25 and 0.5 */
141 if (2*(ht->keyCount) > ht->numBuckets) {
142 poolHashResize(pool, ht->numBuckets * 2);
145 for (unsigned int i=0;;i++) {
146 unsigned int hash = hashbucket(keyHash, i) % ht->numBuckets;
147 if (!ht->buckets[hash].keyid) {
148 ht->buckets[hash].keyid = keyid;
151 } else if (!strcmp(rpmstrPoolStr(pool, ht->buckets[hash].keyid), key)) {
157 static void poolHashAddEntry(rpmstrPool pool, const char * key, rpmsid keyid)
159 poolHashAddHEntry(pool, key, rstrhash(key), keyid);
162 static void poolHashEmpty( poolHash ht)
166 if (ht->keyCount == 0) return;
168 for (i = 0; i < ht->numBuckets; i++) {
169 ht->buckets[i].keyid = 0;
174 static poolHash poolHashFree(poolHash ht)
179 ht->buckets = _free(ht->buckets);
185 static void poolHashPrintStats(rpmstrPool pool)
187 poolHash ht = pool->hash;
190 int maxcollisions = 0;
192 for (i=0; i<ht->numBuckets; i++) {
193 unsigned int keyHash = rstrhash(rpmstrPoolStr(pool, ht->buckets[i].keyid));
194 for (unsigned int j=0;;j++) {
195 unsigned int hash = hashbucket(keyHash, i) % ht->numBuckets;
198 maxcollisions = maxcollisions > j ? maxcollisions : j;
203 fprintf(stderr, "Hashsize: %i\n", ht->numBuckets);
204 fprintf(stderr, "Keys: %i\n", ht->keyCount);
205 fprintf(stderr, "Collisions: %i\n", collisions);
206 fprintf(stderr, "Max collisions: %i\n", maxcollisions);
209 static void rpmstrPoolRehash(rpmstrPool pool)
213 if (pool->offs_size < STRHASH_INITSIZE)
214 sizehint = STRHASH_INITSIZE;
216 sizehint = pool->offs_size * 2;
219 pool->hash = poolHashFree(pool->hash);
221 pool->hash = poolHashCreate(sizehint);
222 for (int i = 1; i < pool->offs_size; i++)
223 poolHashAddEntry(pool, rpmstrPoolStr(pool, i), i);
226 rpmstrPool rpmstrPoolCreate(void)
228 rpmstrPool pool = xcalloc(1, sizeof(*pool));
230 pool->offs_alloced = STROFFS_CHUNK;
231 pool->offs = xcalloc(pool->offs_alloced, sizeof(*pool->offs));
233 pool->chunks_allocated = STRDATA_CHUNKS;
234 pool->chunks = xcalloc(pool->chunks_allocated, sizeof(*pool->chunks));
235 pool->chunks_size = 1;
236 pool->chunk_allocated = STRDATA_CHUNK;
237 pool->offs[1] = xcalloc(1, pool->chunk_allocated);
238 pool->chunks[pool->chunks_size] = pool->offs[1];
240 rpmstrPoolRehash(pool);
245 rpmstrPool rpmstrPoolFree(rpmstrPool pool)
248 if (pool->nrefs > 1) {
252 poolHashPrintStats(pool);
253 poolHashFree(pool->hash);
255 for (int i=1;i<=pool->chunks_size;i++) {
256 pool->chunks[i] = _free(pool->chunks[i]);
265 rpmstrPool rpmstrPoolLink(rpmstrPool pool)
272 void rpmstrPoolFreeze(rpmstrPool pool, int keephash)
274 if (pool && !pool->frozen) {
276 pool->hash = poolHashFree(pool->hash);
278 pool->offs_alloced = pool->offs_size + 2; /* space for end marker */
279 pool->offs = xrealloc(pool->offs,
280 pool->offs_alloced * sizeof(*pool->offs));
285 void rpmstrPoolUnfreeze(rpmstrPool pool)
288 if (pool->hash == NULL) {
289 rpmstrPoolRehash(pool);
295 static rpmsid rpmstrPoolPut(rpmstrPool pool, const char *s, size_t slen, unsigned int hash)
298 size_t ssize = slen + 1;
301 pool->offs_size += 1;
302 /* need one extra for end of string */
303 /* and one extra to mark the end of the chunk */
304 if (pool->offs_alloced <= pool->offs_size + 2) {
305 pool->offs_alloced += STROFFS_CHUNK;
306 pool->offs = xrealloc(pool->offs,
307 pool->offs_alloced * sizeof(*pool->offs));
310 chunk_used = pool->offs[pool->offs_size] - pool->chunks[pool->chunks_size];
311 if (ssize + 1 > pool->chunk_allocated - chunk_used) {
312 /* check size of ->chunks */
313 pool->chunks_size += 1;
314 if (pool->chunks_size >= pool->chunks_allocated) {
315 pool->chunks_allocated += pool->chunks_allocated;
316 pool->chunks = xrealloc(pool->chunks,
317 pool->chunks_allocated * sizeof(*pool->chunks));
320 /* Check if string is bigger than chunks */
321 if (ssize > pool->chunk_allocated) {
322 pool->chunk_allocated = 2 * ssize;
325 /* Dummy entry for end of last string*/
326 pool->offs_size += 1;
328 pool->offs[pool->offs_size] = xcalloc(1, pool->chunk_allocated);
329 pool->chunks[pool->chunks_size] = pool->offs[pool->offs_size];
332 t = memcpy(pool->offs[pool->offs_size], s, slen);
334 pool->offs[pool->offs_size+1] = t + ssize;
336 poolHashAddHEntry(pool, t, hash, pool->offs_size);
338 return pool->offs_size;
341 static rpmsid rpmstrPoolGet(rpmstrPool pool, const char * key, size_t keylen,
342 unsigned int keyHash)
344 poolHash ht = pool->hash;
348 for (unsigned int i=0;; i++) {
349 unsigned int hash = hashbucket(keyHash, i) % ht->numBuckets;
351 if (!ht->buckets[hash].keyid) {
355 s = rpmstrPoolStr(pool, ht->buckets[hash].keyid);
356 /* pool string could be longer than keylen, require exact matche */
357 if (strncmp(s, key, keylen) == 0 && s[keylen] == '\0')
358 return ht->buckets[hash].keyid;
362 static inline rpmsid strn2id(rpmstrPool pool, const char *s, size_t slen,
363 unsigned int hash, int create)
367 if (pool && pool->hash) {
368 sid = rpmstrPoolGet(pool, s, slen, hash);
369 if (sid == 0 && create && !pool->frozen)
370 sid = rpmstrPoolPut(pool, s, slen, hash);
375 rpmsid rpmstrPoolIdn(rpmstrPool pool, const char *s, size_t slen, int create)
380 unsigned int hash = rstrnhash(s, slen);
381 sid = strn2id(pool, s, slen, hash, create);
386 rpmsid rpmstrPoolId(rpmstrPool pool, const char *s, int create)
392 unsigned int hash = rstrlenhash(s, &slen);
393 sid = strn2id(pool, s, slen, hash, create);
398 const char * rpmstrPoolStr(rpmstrPool pool, rpmsid sid)
400 const char *s = NULL;
401 if (pool && sid > 0 && sid <= pool->offs_size)
406 size_t rpmstrPoolStrlen(rpmstrPool pool, rpmsid sid)
409 if (pool && sid <= pool->offs_size) {
410 slen = pool->offs[sid+1] - pool->offs[sid] - 1;
415 int rpmstrPoolStreq(rpmstrPool poolA, rpmsid sidA,
416 rpmstrPool poolB, rpmsid sidB)
419 return (sidA == sidB);
421 return rstreq(rpmstrPoolStr(poolA, sidA), rpmstrPoolStr(poolB, sidB));
424 rpmsid rpmstrPoolNumStr(rpmstrPool pool)
426 return (pool != NULL) ? pool->offs_size : 0;