From d9d9fecaeff9820dc39a1b44a68afd11a1bcc4f1 Mon Sep 17 00:00:00 2001 From: Panu Matilainen Date: Mon, 17 Sep 2012 14:20:30 +0300 Subject: [PATCH] Pull a private hash-implementation copy to string pool - The string pool is more specialized a data structure to be efficiently handled with the generic hash table implementation in rpmhash.[CH] and really requires quite a different approach. - For starters, import a private copy generated roughly with: gcc -E -DHASHTYPE=poolHash \ -DHTKEYTYPE="const char *" -DHTDATATYPE=rpmsid rpmhash.C ...and clean it up a bit: eliminate unused functions (except for stats which we'll want to keep for debug purposes), make remaining functions static and overall tidy up from the mess 'gcc -E' created. Lots of redundant fluff here still, to be cleaned up gradually... - This doesn't change anything at all, but opens up the playground for tuning the pool hash implementation in ways the generic version could not (at least sanely) be. --- rpmio/rpmstrpool.c | 222 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 213 insertions(+), 9 deletions(-) diff --git a/rpmio/rpmstrpool.c b/rpmio/rpmstrpool.c index 940dbaa..f14a124 100644 --- a/rpmio/rpmstrpool.c +++ b/rpmio/rpmstrpool.c @@ -1,22 +1,224 @@ #include "system.h" +#include #include #include #include "debug.h" -#define HASHTYPE poolHash -#define HTKEYTYPE const char * -#define HTDATATYPE rpmsid -#include "lib/rpmhash.H" -#include "lib/rpmhash.C" -#undef HASHTYPE -#undef HTKEYTYPE -#undef HTDATATYPE - #define STRDATA_CHUNK 65536 #define STROFFS_CHUNK 2048 /* XXX this is ridiculously small... */ #define STRHASH_INITSIZE 5 +static int pool_debug = 0; + +typedef struct poolHash_s * poolHash; +typedef unsigned int (*poolHashHashFunctionType) (const char * string); +typedef int (*poolHashHashEqualityType) (const char * key1, const char * key2); +typedef const char * (*poolHashFreeKey) (const char *); +typedef rpmsid (*poolHashFreeData) (rpmsid); +typedef struct poolHashBucket_s * poolHashBucket; + +struct poolHashBucket_s { + poolHashBucket next; + const char * key; + int dataCount; + rpmsid data[1]; +}; + +struct poolHash_s { + int numBuckets; + poolHashBucket * buckets; + poolHashHashFunctionType fn; + poolHashHashEqualityType eq; + poolHashFreeKey freeKey; + int bucketCount; + int keyCount; + int dataCount; + poolHashFreeData freeData; + +}; + +static +poolHashBucket poolHashfindEntry(poolHash ht, const char * key, unsigned int keyHash) +{ + unsigned int hash = keyHash % ht->numBuckets; + poolHashBucket b = ht->buckets[hash]; + + while (b && ht->eq(b->key, key)) + b = b->next; + + return b; +} + +static poolHash poolHashCreate(int numBuckets, + poolHashHashFunctionType fn, poolHashHashEqualityType eq, + poolHashFreeKey freeKey, poolHashFreeData freeData) +{ + poolHash ht; + + ht = xmalloc(sizeof(*ht)); + ht->numBuckets = numBuckets; + ht->buckets = xcalloc(numBuckets, sizeof(*ht->buckets)); + ht->freeKey = freeKey; + + ht->freeData = freeData; + ht->dataCount = 0; + + ht->fn = fn; + ht->eq = eq; + ht->bucketCount = ht->keyCount = 0; + return ht; +} + +static void poolHashResize(poolHash ht, int numBuckets) +{ + poolHashBucket * buckets = xcalloc(numBuckets, sizeof(*ht->buckets)); + + for (int i=0; inumBuckets; i++) { + poolHashBucket b = ht->buckets[i]; + poolHashBucket nextB; + while (b != NULL) { + unsigned int hash = ht->fn(b->key) % numBuckets; + nextB = b->next; + b->next = buckets[hash]; + buckets[hash] = b; + b = nextB; + } + } + free(ht->buckets); + ht->buckets = buckets; + ht->numBuckets = numBuckets; +} + +static unsigned int poolHashKeyHash(poolHash ht, const char * key) +{ + return ht->fn(key); +} + +static void poolHashAddHEntry(poolHash ht, const char * key, unsigned int keyHash, rpmsid data) +{ + unsigned int hash = keyHash % ht->numBuckets; + poolHashBucket b = ht->buckets[hash]; + poolHashBucket * b_addr = ht->buckets + hash; + + if (b == NULL) { + ht->bucketCount += 1; + } + + while (b && ht->eq(b->key, key)) { + b_addr = &(b->next); + b = b->next; + } + + if (b == NULL) { + ht->keyCount += 1; + b = xmalloc(sizeof(*b)); + b->key = key; + b->dataCount = 1; + b->data[0] = data; + b->next = ht->buckets[hash]; + ht->buckets[hash] = b; + } else { + b = *b_addr = xrealloc(b, + sizeof(*b) + sizeof(b->data[0]) * (b->dataCount)); + b->data[b->dataCount++] = data; + } + ht->dataCount += 1; + + if (ht->keyCount > ht->numBuckets) { + poolHashResize(ht, ht->numBuckets * 2); + } +} + +static void poolHashAddEntry(poolHash ht, const char * key, rpmsid data) +{ + poolHashAddHEntry(ht, key, ht->fn(key), data); +} + +static void poolHashEmpty( poolHash ht) +{ + poolHashBucket b, n; + int i; + + if (ht->bucketCount == 0) return; + + for (i = 0; i < ht->numBuckets; i++) { + b = ht->buckets[i]; + if (b == NULL) + continue; + ht->buckets[i] = NULL; + + do { + n = b->next; + if (ht->freeKey) + b->key = ht->freeKey(b->key); + + if (ht->freeData) { + int j; + for (j=0; j < b->dataCount; j++ ) { + b->data[j] = ht->freeData(b->data[j]); + } + } + b = _free(b); + } while ((b = n) != NULL); + } + ht->bucketCount = 0; + ht->keyCount = 0; + ht->dataCount = 0; +} + +static poolHash poolHashFree(poolHash ht) +{ + if (ht==NULL) + return ht; + poolHashEmpty(ht); + ht->buckets = _free(ht->buckets); + ht = _free(ht); + + return NULL; +} + +static int poolHashGetHEntry(poolHash ht, const char * key, unsigned int keyHash, + rpmsid** data, int * dataCount, const char ** tableKey) +{ + poolHashBucket b; + int rc = ((b = poolHashfindEntry(ht, key, keyHash)) != NULL); + + if (data) + *data = rc ? b->data : NULL; + if (dataCount) + *dataCount = rc ? b->dataCount : 0; + if (tableKey && rc) + *tableKey = b->key; + + return rc; +} + +static void poolHashPrintStats(poolHash ht) +{ + int i; + poolHashBucket bucket; + + int hashcnt=0, bucketcnt=0, datacnt=0; + int maxbuckets=0; + + for (i=0; inumBuckets; i++) { + int buckets = 0; + for (bucket=ht->buckets[i]; bucket; bucket=bucket->next){ + buckets++; + datacnt += bucket->dataCount; + } + if (maxbuckets < buckets) maxbuckets = buckets; + if (buckets) hashcnt++; + bucketcnt += buckets; + } + fprintf(stderr, "Hashsize: %i\n", ht->numBuckets); + fprintf(stderr, "Hashbuckets: %i\n", hashcnt); + fprintf(stderr, "Keys: %i\n", bucketcnt); + fprintf(stderr, "Values: %i\n", datacnt); + fprintf(stderr, "Max Keys/Bucket: %i\n", maxbuckets); +} + struct rpmstrPool_s { size_t * offs; /* offsets into data area */ rpmsid offs_size; /* largest offset index */; @@ -60,6 +262,8 @@ rpmstrPool rpmstrPoolFree(rpmstrPool pool) if (pool->nrefs > 1) { pool->nrefs--; } else { + if (pool_debug) + poolHashPrintStats(pool->hash); poolHashFree(pool->hash); free(pool->offs); free(pool->data); -- 2.7.4