From 9e47043b2d23d6e6657a2a18dd325d2ef014dba3 Mon Sep 17 00:00:00 2001 From: Panu Matilainen Date: Fri, 7 Sep 2012 10:33:22 +0300 Subject: [PATCH] First cut of a libsolv-style string <-> id pool API - The pool stores "arbitrary" number of strings in a space-efficient manner, with near constant (hashed) string -> id lookup/store and constant time id -> string and id -> string length lookups. - Credits for the idea go to the Suse developers working on libsolv, the basic concept is directly lifted from there but details differ due to using rpm's own hash table implementation etc. Another minor difference is using size_t for offsets to permit over 4GB total data size on 64bit systems, the total number of id's in the pool is limited to uint32 max however (like in libsolv). - Any (re)implementation bugs by yours truly, this is almost certainly going to need further tuning and tweaking, API and otherwise. --- Makefile.am | 1 + preinstall.am | 4 ++ rpmio/Makefile.am | 2 +- rpmio/rpmstrpool.c | 153 +++++++++++++++++++++++++++++++++++++++++++++++++++++ rpmio/rpmstrpool.h | 44 +++++++++++++++ 5 files changed, 203 insertions(+), 1 deletion(-) create mode 100644 rpmio/rpmstrpool.c create mode 100644 rpmio/rpmstrpool.h diff --git a/Makefile.am b/Makefile.am index 179c3e9..9e170df 100644 --- a/Makefile.am +++ b/Makefile.am @@ -58,6 +58,7 @@ pkginclude_HEADERS += rpmio/rpmlog.h pkginclude_HEADERS += rpmio/rpmpgp.h pkginclude_HEADERS += rpmio/rpmsq.h pkginclude_HEADERS += rpmio/rpmstring.h +pkginclude_HEADERS += rpmio/rpmstrpool.h pkginclude_HEADERS += rpmio/rpmsw.h pkginclude_HEADERS += rpmio/rpmfileutil.h pkginclude_HEADERS += rpmio/rpmutil.h diff --git a/preinstall.am b/preinstall.am index 4da80c1..d21d043 100644 --- a/preinstall.am +++ b/preinstall.am @@ -30,6 +30,10 @@ include/rpm/rpmstring.h: rpmio/rpmstring.h include/rpm/$(dirstamp) $(INSTALL_DATA) $(top_srcdir)/rpmio/rpmstring.h include/rpm/rpmstring.h BUILT_SOURCES += include/rpm/rpmstring.h CLEANFILES += include/rpm/rpmstring.h +include/rpm/rpmstrpool.h: rpmio/rpmstrpool.h include/rpm/$(dirstamp) + $(INSTALL_DATA) $(top_srcdir)/rpmio/rpmstrpool.h include/rpm/rpmstrpool.h +BUILT_SOURCES += include/rpm/rpmstrpool.h +CLEANFILES += include/rpm/rpmstrpool.h include/rpm/rpmsw.h: rpmio/rpmsw.h include/rpm/$(dirstamp) $(INSTALL_DATA) $(top_srcdir)/rpmio/rpmsw.h include/rpm/rpmsw.h BUILT_SOURCES += include/rpm/rpmsw.h diff --git a/rpmio/Makefile.am b/rpmio/Makefile.am index 842e482..ebe0fc0 100644 --- a/rpmio/Makefile.am +++ b/rpmio/Makefile.am @@ -15,7 +15,7 @@ librpmio_la_SOURCES = \ rpmpgp.c rpmsq.c rpmsw.c url.c \ rpmio_internal.h rpmhook.h \ rpmstring.c rpmfileutil.c rpmglob.c \ - rpmkeyring.c + rpmkeyring.c rpmstrpool.c librpmio_la_SOURCES += digest_nss.c diff --git a/rpmio/rpmstrpool.c b/rpmio/rpmstrpool.c new file mode 100644 index 0000000..4562f3c --- /dev/null +++ b/rpmio/rpmstrpool.c @@ -0,0 +1,153 @@ +#include "system.h" +#include +#include +#include "debug.h" + +#define HASHTYPE strHash +#define HTKEYTYPE const char * +#define HTDATATYPE rpmsid +#include "lib/rpmhash.H" +#include "lib/rpmhash.C" +#undef HASHTYPE +#undef HTKEYTYPE +#undef HTDATATYPE + +#define STRDATA_CHUNK 65536 +#define STROFFS_CHUNK 2048 + +struct rpmstrPool_s { + size_t * offs; /* offsets into data area */ + rpmsid offs_size; /* largest offset index */; + rpmsid offs_alloced; /* offsets allocation size */ + char * data; /* string data area */ + size_t data_size; /* string data area size */ + size_t data_alloced; /* string data area allocation size */ + strHash hash; /* string -> sid hash table */ + int nrefs; /* refcount */ +}; + +rpmstrPool rpmstrPoolCreate(void) +{ + rpmstrPool pool = xcalloc(1, sizeof(*pool)); + pool->hash = strHashCreate(5, rstrhash, strcmp, NULL, NULL); + pool->nrefs = 1; + return pool; +} + +rpmstrPool rpmstrPoolFree(rpmstrPool pool) +{ + if (pool) { + if (pool->nrefs > 1) { + pool->nrefs--; + } else { + strHashFree(pool->hash); + free(pool->offs); + free(pool->data); + free(pool); + } + } + return NULL; +} + +rpmstrPool rpmstrPoolLink(rpmstrPool pool) +{ + if (pool) + pool->nrefs++; + return pool; +} + +void rpmstrPoolFreeze(rpmstrPool pool) +{ + if (pool) { + pool->hash = strHashFree(pool->hash); + pool->data_alloced = pool->data_size; + pool->data = xrealloc(pool->data, pool->data_alloced); + pool->offs_alloced = pool->offs_size + 1; + pool->offs = xrealloc(pool->offs, + pool->offs_alloced * sizeof(*pool->offs)); + } +} + +void rpmstrPoolUnfreeze(rpmstrPool pool) +{ + if (pool) { + pool->hash = strHashCreate((pool->offs_size / 2) - 1, + rstrhash, strcmp, NULL, NULL); + for (int i = 1; i < pool->offs_size; i++) { + strHashAddEntry(pool->hash, rpmstrPoolStr(pool, i), i); + } + } +} + +static rpmsid rpmstrPoolPut(rpmstrPool pool, const char *s, size_t slen, unsigned int hash) +{ + const char *t = NULL; + size_t ssize = slen + 1; + + if (ssize > pool->data_alloced - pool->data_size) { + size_t need = pool->data_size + ssize; + size_t alloced = pool->data_alloced; + + while (alloced < need) + alloced += STRDATA_CHUNK; + + pool->data = xrealloc(pool->data, alloced); + pool->data_alloced = alloced; + } + + pool->offs_size += 1; + if (pool->offs_alloced < pool->offs_size) { + pool->offs_alloced += STROFFS_CHUNK; + pool->offs = xrealloc(pool->offs, + pool->offs_alloced * sizeof(*pool->offs)); + } + + t = memcpy(pool->data + pool->data_size, s, ssize); + pool->offs[pool->offs_size] = pool->data_size; + pool->data_size += ssize; + + strHashAddHEntry(pool->hash, t, hash, pool->offs_size); + + return pool->offs_size; +} + +rpmsid rpmstrPoolIdn(rpmstrPool pool, const char *s, size_t slen, int create) +{ + rpmsid sid = 0; + + if (pool && s) { + unsigned int hash = strHashKeyHash(pool->hash, s); + rpmsid *sids; + if (strHashGetHEntry(pool->hash, s, hash, &sids, NULL, NULL)) { + sid = sids[0]; + } else if (create && pool->hash) { + sid = rpmstrPoolPut(pool, s, slen, hash); + } + } + + return sid; +} + +rpmsid rpmstrPoolId(rpmstrPool pool, const char *s, int create) +{ + return rpmstrPoolIdn(pool, s, strlen(s), create); +} + +const char * rpmstrPoolStr(rpmstrPool pool, rpmsid sid) +{ + const char *s = NULL; + if (pool && sid <= pool->offs_size) + s = pool->data + pool->offs[sid]; + return s; +} + +size_t rpmstrPoolStrlen(rpmstrPool pool, rpmsid sid) +{ + size_t slen = 0; + if (pool && sid <= pool->offs_size) { + size_t end = (sid < pool->offs_size) ? pool->offs[sid + 1] : + pool->offs_size; + slen = end - pool->offs[sid] - 1; + } + return slen; +} diff --git a/rpmio/rpmstrpool.h b/rpmio/rpmstrpool.h new file mode 100644 index 0000000..bc1ed88 --- /dev/null +++ b/rpmio/rpmstrpool.h @@ -0,0 +1,44 @@ +#ifndef _RPMSTRPOOL_H +#define _RPMSTRPOOL_H + +typedef uint32_t rpmsid; +typedef struct rpmstrPool_s * rpmstrPool; + +#ifdef __cplusplus +extern "C" { +#endif + +/* XXX TODO: properly document... */ + +/* create a new string pool */ +rpmstrPool rpmstrPoolCreate(void); + +/* destroy a string pool (refcounted) */ +rpmstrPool rpmstrPoolFree(rpmstrPool sidpool); + +/* reference a string pool */ +rpmstrPool rpmstrPoolLink(rpmstrPool sidpool); + +/* freeze pool to free memory */ +void rpmstrPoolFreeze(rpmstrPool sidpool); + +/* unfreeze pool (ie recreate hash table) */ +void rpmstrPoolUnfreeze(rpmstrPool sidpool); + +/* get the id of a string, optionally storing if not already present */ +rpmsid rpmstrPoolId(rpmstrPool sidpool, const char *s, int create); + +/* get the id of a string + length, optionally storing if not already present */ +rpmsid rpmstrPoolIdn(rpmstrPool sidpool, const char *s, size_t slen, int create); + +/* get a string by its id */ +const char * rpmstrPoolStr(rpmstrPool sidpool, rpmsid sid); + +/* get a strings length by its id (in constant time) */ +size_t rpmstrPoolStrlen(rpmstrPool pool, rpmsid sid); + +#ifdef __cplusplus +} +#endif + +#endif /* _RPMSIDPOOL_H */ -- 2.7.4