#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <assert.h>
#include "pool.h"
+#include "util.h"
#include "repo_write.h"
+/* #define IGNORE_NEED_FREQUENCY */
+
/*------------------------------------------------------------------*/
/* Id map optimizations */
Id map;
} NeedId;
+
+#define RELOFF(id) (pool->ss.nstrings + GETRELID(id))
+
/*
* increment need Id
* idarray: array of Ids, ID_NULL terminated
while (ISRELDEP(id))
{
Reldep *rd = GETRELDEP(pool, id);
- needid[GETRELID(pool, id)].need++;
+ needid[RELOFF(id)].need++;
if (ISRELDEP(rd->evr))
{
Id ida[2];
return a->map - b->map;
}
+static Pool *cmp_pool;
+static int
+needid_cmp_need_s(const void *ap, const void *bp)
+{
+ const NeedId *a = ap;
+ const NeedId *b = bp;
+ if (a == b)
+ return 0;
+#ifdef IGNORE_NEED_FREQUENCY
+ if (a->need == 0)
+ return 1;
+ else if (b->need == 0)
+ return -1;
+#else
+ int r;
+ r = b->need - a->need;
+ if (r)
+ return r;
+#endif
+ char *as = cmp_pool->ss.stringspace + cmp_pool->ss.strings[a->map];
+ char *bs = cmp_pool->ss.stringspace + cmp_pool->ss.strings[b->map];
+ size_t alen = strlen (as);
+ size_t blen = strlen (bs);
+ return memcmp (as, bs, alen < blen ? alen + 1 : blen + 1);
+}
+
/*------------------------------------------------------------------*/
/* output routines */
return;
if (!*ids)
{
- write_u8(fp, ID_NULL);
+ write_u8(fp, 0);
return;
}
for (;;)
{
id = *ids++;
- id = needid[ISRELDEP(id) ? GETRELID(pool, id) : id].need;
+ if (needid)
+ id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
if (id >= 64)
id = (id & 63) | ((id & ~63) << 1);
if (!*ids)
}
}
+static int
+cmp_ids (const void *pa, const void *pb)
+{
+ Id a = *(Id *)pa;
+ Id b = *(Id *)pb;
+ return a - b;
+}
+
+static void
+write_idarray_sort(FILE *fp, Pool *pool, NeedId *needid, Id *ids)
+{
+ int len, i;
+ if (!ids)
+ return;
+ if (!*ids)
+ {
+ write_u8 (fp, 0);
+ return;
+ }
+ /* If we ever share idarrays we can't do this in-place. */
+ for (len = 0; ids[len]; len++)
+ {
+ Id id = ids[len];
+ if (needid)
+ ids[len] = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
+ }
+
+ /* That bloody solvable:prereqmarker needs to stay in position :-( */
+ Id prereq = SOLVABLE_PREREQMARKER;
+ if (needid)
+ prereq = needid[prereq].need;
+ for (i = 0; i < len; i++)
+ if (ids[i] == prereq)
+ break;
+ if (i > 1)
+ qsort (ids, i, sizeof (Id), cmp_ids);
+ if ((len - i) > 2)
+ qsort (ids + i + 1, len - i - 1, sizeof (Id), cmp_ids);
+
+ Id old = 0;
+ for (i = 0; i < len; i++)
+ /* Ugly PREREQ handling. A "difference" of 1 is the prereq marker,
+ hence all real differences are offsetted by 1. Otherwise we would
+ have to handle negative differences, which would cost code space for
+ the encoding of the sign. We loose the exact mapping of prereq here,
+ but we know the result, so we can recover from that in the reader. */
+ if (ids[i] == prereq)
+ old = ids[i] = 1;
+ else
+ {
+ ids[i] -= old;
+ old = ids[i] + old;
+ /* difference is zero --> we have multiple equal elements in the list */
+ assert (ids[i] > 0);
+ ids[i]++;
+ }
+
+ /* The differencing above produces many runs of ones and twos. I tried
+ fairly elaborate schemes to RLE those, but they give only very mediocre
+ improvements in compression, as coding the escapes costs quite some
+ space. Even if they are coded only as bits in IDs. The best improvement
+ was about 2.7% for the whole .solv file. It's probably better to
+ invest some complexity into sharing idarrays, than RLEing. */
+ for (;;)
+ {
+ Id id = *ids++;
+ if (id >= 64)
+ id = (id & 63) | ((id & ~63) << 1);
+ if (!*ids)
+ {
+ write_id(fp, id);
+ return;
+ }
+ write_id(fp, id | 64);
+ }
+}
/*
* Repo
repo_write(Repo *repo, FILE *fp)
{
Pool *pool = repo->pool;
- int i, numsolvdata;
+ int i, n;
Solvable *s;
NeedId *needid;
- int nstrings, nrels;
+ int nstrings, nrels, nkeys, nschemata;
unsigned int sizeid;
+ unsigned int solv_flags;
Reldep *ran;
Id *idarraydata;
int idsizes[RPM_RPMDBID + 1];
- int bits, bitmaps;
+ int id2key[RPM_RPMDBID + 1];
int nsolvables;
+ Id *schemadata, *schemadatap, *schema, *sp;
+ Id schemaid;
+ int schemadatalen;
+ Id *solvschema; /* schema of our solvables */
+ Id lastschema[256];
+ Id lastschemakey[256];
+
nsolvables = 0;
idarraydata = repo->idarraydata;
- needid = (NeedId *)calloc(pool->nstrings + pool->nrels, sizeof(*needid));
+ needid = (NeedId *)xcalloc(pool->ss.nstrings + pool->nrels, sizeof(*needid));
memset(idsizes, 0, sizeof(idsizes));
}
needid[0].need = 0;
- needid[pool->nstrings].need = 0;
- for (i = 0; i < pool->nstrings + pool->nrels; i++)
+ needid[pool->ss.nstrings].need = 0;
+ for (i = 0; i < pool->ss.nstrings + pool->nrels; i++)
needid[i].map = i;
- qsort(needid + 1, pool->nstrings - 1, sizeof(*needid), needid_cmp_need);
- qsort(needid + pool->nstrings, pool->nrels, sizeof(*needid), needid_cmp_need);
+ cmp_pool = pool;
+ qsort(needid + 1, pool->ss.nstrings - 1, sizeof(*needid), needid_cmp_need_s);
+ qsort(needid + pool->ss.nstrings, pool->nrels, sizeof(*needid), needid_cmp_need);
sizeid = 0;
- for (i = 1; i < pool->nstrings; i++)
+ for (i = 1; i < pool->ss.nstrings; i++)
{
if (!needid[i].need)
break;
needid[i].need = 0;
- sizeid += strlen(pool->stringspace + pool->strings[needid[i].map]) + 1;
+ sizeid += strlen(pool->ss.stringspace + pool->ss.strings[needid[i].map]) + 1;
}
nstrings = i;
for (i = 0; i < pool->nrels; i++)
{
- if (!needid[pool->nstrings + i].need)
+ if (!needid[pool->ss.nstrings + i].need)
break;
else
- needid[pool->nstrings + i].need = 0;
+ needid[pool->ss.nstrings + i].need = 0;
}
nrels = i;
for (i = 0; i < nrels; i++)
{
- needid[needid[pool->nstrings + i].map].need = nstrings + i;
+ needid[needid[pool->ss.nstrings + i].map].need = nstrings + i;
}
+ /* find the keys we need */
+ nkeys = 1;
+ memset(id2key, 0, sizeof(id2key));
+ for (i = SOLVABLE_NAME; i <= RPM_RPMDBID; i++)
+ if (idsizes[i])
+ id2key[i] = nkeys++;
+
+ /* find the schemata we need */
+ solvschema = xcalloc(repo->nsolvables, sizeof(Id));
+
+ memset(lastschema, 0, sizeof(lastschema));
+ memset(lastschemakey, 0, sizeof(lastschemakey));
+ schemadata = xmalloc(256 * sizeof(Id));
+ schemadatalen = 256;
+ schemadatap = schemadata;
+ *schemadatap++ = 0;
+ schemadatalen--;
+ nschemata = 0;
+ for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
+ {
+ unsigned int h;
+ Id *sp;
+
+ if (s->repo != repo)
+ continue;
+ if (schemadatalen < 32)
+ {
+ int l = schemadatap - schemadata;
+ fprintf(stderr, "growing schemadata\n");
+ schemadata = xrealloc(schemadata, (schemadatap - schemadata + 256) * sizeof(Id));
+ schemadatalen = 256;
+ schemadatap = schemadata + l;
+ }
+ schema = schemadatap;
+ *schema++ = SOLVABLE_NAME;
+ *schema++ = SOLVABLE_ARCH;
+ *schema++ = SOLVABLE_EVR;
+ if (s->vendor)
+ *schema++ = SOLVABLE_VENDOR;
+ if (s->provides)
+ *schema++ = SOLVABLE_PROVIDES;
+ if (s->obsoletes)
+ *schema++ = SOLVABLE_OBSOLETES;
+ if (s->conflicts)
+ *schema++ = SOLVABLE_CONFLICTS;
+ if (s->requires)
+ *schema++ = SOLVABLE_REQUIRES;
+ if (s->recommends)
+ *schema++ = SOLVABLE_RECOMMENDS;
+ if (s->suggests)
+ *schema++ = SOLVABLE_SUGGESTS;
+ if (s->supplements)
+ *schema++ = SOLVABLE_SUPPLEMENTS;
+ if (s->enhances)
+ *schema++ = SOLVABLE_ENHANCES;
+ if (s->freshens)
+ *schema++ = SOLVABLE_FRESHENS;
+ if (repo->rpmdbid)
+ *schema++ = RPM_RPMDBID;
+ *schema++ = 0;
+ for (sp = schemadatap, h = 0; *sp; )
+ h = h * 7 + *sp++;
+ h &= 255;
+ if (lastschema[h] && !memcmp(schemadata + lastschema[h], schemadatap, (schema - schemadatap) * sizeof(Id)))
+ {
+ solvschema[n++] = lastschemakey[h];
+ continue;
+ }
+ schemaid = 0;
+ for (sp = schemadata + 1; sp < schemadatap; )
+ {
+ if (!memcmp(sp, schemadatap, (schema - schemadatap) * sizeof(Id)))
+ break;
+ while (*sp++)
+ ;
+ schemaid++;
+ }
+ if (sp >= schemadatap)
+ {
+ if (schemaid != nschemata)
+ abort();
+ lastschema[h] = schemadatap - schemadata;
+ lastschemakey[h] = schemaid;
+ schemadatalen -= schema - schemadatap;
+ schemadatap = schema;
+ nschemata++;
+ }
+ solvschema[n++] = schemaid;
+ }
+ /* convert all schemas to keys */
+ for (sp = schemadata; sp < schemadatap; sp++)
+ *sp = id2key[*sp];
+
/* write file header */
write_u32(fp, 'S' << 24 | 'O' << 16 | 'L' << 8 | 'V');
- write_u32(fp, SOLV_VERSION);
+ write_u32(fp, SOLV_VERSION_2);
/* write counts */
write_u32(fp, nstrings);
write_u32(fp, nrels);
write_u32(fp, nsolvables);
- write_u32(fp, sizeid);
+ write_u32(fp, nkeys);
+ write_u32(fp, nschemata);
+ write_u32(fp, 0); /* no info block */
+ solv_flags = 0;
+ solv_flags |= SOLV_FLAG_PREFIX_POOL;
+#if 0
+ solv_flags |= SOLV_FLAG_VERTICAL;
+#endif
+ write_u32(fp, solv_flags);
+
+ /* Build the prefix-encoding of the string pool. We need to know
+ the size of that before writing it to the file, so we have to
+ build a separate buffer for that. As it's temporarily possible
+ that this actually is an expansion we can't easily reuse the
+ stringspace for this. The max expansion per string is 1 byte,
+ so it will fit into sizeid+nstrings bytes. */
+ char *prefix = xmalloc (sizeid + nstrings);
+ char *pp = prefix;
+ char *old_str = "";
+ for (i = 1; i < nstrings; i++)
+ {
+ char *str = pool->ss.stringspace + pool->ss.strings[needid[i].map];
+ int same;
+ size_t len;
+ for (same = 0; same < 255; same++)
+ if (old_str[same] != str[same])
+ break;
+ *pp++ = same;
+ len = strlen (str + same) + 1;
+ memcpy (pp, str + same, len);
+ pp += len;
+ old_str = str;
+ }
/*
* write strings
*/
- for (i = 1; i < nstrings; i++)
+ write_u32(fp, sizeid);
+ write_u32(fp, pp - prefix);
+ if (fwrite(prefix, pp - prefix, 1, fp) != 1)
{
- char *str = pool->stringspace + pool->strings[needid[i].map];
- if (fwrite(str, strlen(str) + 1, 1, fp) != 1)
- {
- perror("write error");
- exit(1);
- }
+ perror("write error");
+ exit(1);
}
+ xfree (prefix);
/*
* write RelDeps
*/
for (i = 0; i < nrels; i++)
{
- ran = pool->rels + (needid[pool->nstrings + i].map - pool->nstrings);
- write_id(fp, needid[ISRELDEP(ran->name) ? GETRELID(pool, ran->name) : ran->name].need);
- write_id(fp, needid[ISRELDEP(ran->evr) ? GETRELID(pool, ran->evr) : ran->evr].need);
+ ran = pool->rels + (needid[pool->ss.nstrings + i].map - pool->ss.nstrings);
+ write_id(fp, needid[ISRELDEP(ran->name) ? RELOFF(ran->name) : ran->name].need);
+ write_id(fp, needid[ISRELDEP(ran->evr) ? RELOFF(ran->evr) : ran->evr].need);
write_u8( fp, ran->flags);
}
- write_u32(fp, 0); /* no repo data */
-
/*
- * write Solvables
+ * write keys
*/
-
- numsolvdata = 0;
for (i = SOLVABLE_NAME; i <= RPM_RPMDBID; i++)
{
- if (idsizes[i])
- numsolvdata++;
- }
- write_u32(fp, (unsigned int)numsolvdata);
-
- bitmaps = 0;
- for (i = SOLVABLE_NAME; i <= SOLVABLE_FRESHENS; i++)
- {
if (!idsizes[i])
continue;
- if (i >= SOLVABLE_PROVIDES && i <= SOLVABLE_FRESHENS)
- {
- write_u8(fp, TYPE_IDARRAY|TYPE_BITMAP);
- bitmaps++;
- }
- else
- write_u8(fp, TYPE_ID);
write_id(fp, needid[i].need);
if (i >= SOLVABLE_PROVIDES && i <= SOLVABLE_FRESHENS)
- write_u32(fp, idsizes[i]);
+ write_id(fp, TYPE_REL_IDARRAY);
+ else if (i == RPM_RPMDBID)
+ write_id(fp, TYPE_U32);
else
- write_u32(fp, 0);
+ write_id(fp, TYPE_ID);
+ write_id(fp, idsizes[i]);
}
- if (repo->rpmdbid)
+ /*
+ * write schemata
+ */
+ write_id(fp, schemadatap - schemadata - 1);
+ for (sp = schemadata + 1; sp < schemadatap; )
{
- write_u8(fp, TYPE_U32);
- write_id(fp, needid[RPM_RPMDBID].need);
- write_u32(fp, 0);
+ write_idarray(fp, pool, 0, sp);
+ while (*sp++)
+ ;
}
+
+#if 0
+ if (1)
+ {
+ Id id, key;
- for (i = repo->start, s = pool->solvables + i; i < repo->end; i++, s++)
+ for (i = 0; i < nsolvables; i++)
+ write_id(fp, solvschema[i]);
+ unsigned char *used = xmalloc(nschemata);
+ for (id = SOLVABLE_NAME; id <= RPM_RPMDBID; id++)
+ {
+ key = id2key[id];
+ memset(used, 0, nschemata);
+ for (sp = schemadata + 1, i = 0; sp < schemadatap; sp++)
+ {
+ if (*sp == 0)
+ i++;
+ else if (*sp == key)
+ used[i] = 1;
+ }
+ for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
+ {
+ if (s->repo != repo)
+ continue;
+ if (!used[solvschema[n++]])
+ continue;
+ switch(id)
+ {
+ case SOLVABLE_NAME:
+ write_id(fp, needid[s->name].need);
+ break;
+ case SOLVABLE_ARCH:
+ write_id(fp, needid[s->arch].need);
+ break;
+ case SOLVABLE_EVR:
+ write_id(fp, needid[s->evr].need);
+ break;
+ case SOLVABLE_VENDOR:
+ write_id(fp, needid[s->vendor].need);
+ break;
+ case RPM_RPMDBID:
+ write_u32(fp, repo->rpmdbid[i - repo->start]);
+ break;
+ case SOLVABLE_PROVIDES:
+ write_idarray(fp, pool, needid, idarraydata + s->provides);
+ break;
+ case SOLVABLE_OBSOLETES:
+ write_idarray(fp, pool, needid, idarraydata + s->obsoletes);
+ break;
+ case SOLVABLE_CONFLICTS:
+ write_idarray(fp, pool, needid, idarraydata + s->conflicts);
+ break;
+ case SOLVABLE_REQUIRES:
+ write_idarray(fp, pool, needid, idarraydata + s->requires);
+ break;
+ case SOLVABLE_RECOMMENDS:
+ write_idarray(fp, pool, needid, idarraydata + s->recommends);
+ break;
+ case SOLVABLE_SUPPLEMENTS:
+ write_idarray(fp, pool, needid, idarraydata + s->supplements);
+ break;
+ case SOLVABLE_SUGGESTS:
+ write_idarray(fp, pool, needid, idarraydata + s->suggests);
+ break;
+ case SOLVABLE_ENHANCES:
+ write_idarray(fp, pool, needid, idarraydata + s->enhances);
+ break;
+ case SOLVABLE_FRESHENS:
+ write_idarray(fp, pool, needid, idarraydata + s->freshens);
+ break;
+ }
+ }
+ }
+ xfree(used);
+ xfree(needid);
+ xfree(solvschema);
+ xfree(schemadata);
+ return;
+ }
+
+#endif
+
+ /*
+ * write Solvables
+ */
+ for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
{
if (s->repo != repo)
continue;
- bits = 0;
- if (idsizes[SOLVABLE_FRESHENS])
- bits = (bits << 1) | (s->freshens ? 1 : 0);
- if (idsizes[SOLVABLE_ENHANCES])
- bits = (bits << 1) | (s->enhances ? 1 : 0);
- if (idsizes[SOLVABLE_SUPPLEMENTS])
- bits = (bits << 1) | (s->supplements ? 1 : 0);
- if (idsizes[SOLVABLE_SUGGESTS])
- bits = (bits << 1) | (s->suggests ? 1 : 0);
- if (idsizes[SOLVABLE_RECOMMENDS])
- bits = (bits << 1) | (s->recommends ? 1 : 0);
- if (idsizes[SOLVABLE_REQUIRES])
- bits = (bits << 1) | (s->requires ? 1 : 0);
- if (idsizes[SOLVABLE_CONFLICTS])
- bits = (bits << 1) | (s->conflicts ? 1 : 0);
- if (idsizes[SOLVABLE_OBSOLETES])
- bits = (bits << 1) | (s->obsoletes ? 1 : 0);
- if (idsizes[SOLVABLE_PROVIDES])
- bits = (bits << 1) | (s->provides ? 1 : 0);
-
- if (bitmaps > 24)
- write_u8(fp, bits >> 24);
- if (bitmaps > 16)
- write_u8(fp, bits >> 16);
- if (bitmaps > 8)
- write_u8(fp, bits >> 8);
- if (bitmaps)
- write_u8(fp, bits);
-
+ /* keep in sync with schema generation! */
+ write_id(fp, solvschema[n++]);
write_id(fp, needid[s->name].need);
write_id(fp, needid[s->arch].need);
write_id(fp, needid[s->evr].need);
- if (idsizes[SOLVABLE_VENDOR])
+ if (s->vendor)
write_id(fp, needid[s->vendor].need);
-
if (s->provides)
- write_idarray(fp, pool, needid, idarraydata + s->provides);
+ write_idarray_sort(fp, pool, needid, idarraydata + s->provides);
if (s->obsoletes)
- write_idarray(fp, pool, needid, idarraydata + s->obsoletes);
+ write_idarray_sort(fp, pool, needid, idarraydata + s->obsoletes);
if (s->conflicts)
- write_idarray(fp, pool, needid, idarraydata + s->conflicts);
+ write_idarray_sort(fp, pool, needid, idarraydata + s->conflicts);
if (s->requires)
- write_idarray(fp, pool, needid, idarraydata + s->requires);
+ write_idarray_sort(fp, pool, needid, idarraydata + s->requires);
if (s->recommends)
- write_idarray(fp, pool, needid, idarraydata + s->recommends);
+ write_idarray_sort(fp, pool, needid, idarraydata + s->recommends);
if (s->suggests)
- write_idarray(fp, pool, needid, idarraydata + s->suggests);
+ write_idarray_sort(fp, pool, needid, idarraydata + s->suggests);
if (s->supplements)
- write_idarray(fp, pool, needid, idarraydata + s->supplements);
+ write_idarray_sort(fp, pool, needid, idarraydata + s->supplements);
if (s->enhances)
- write_idarray(fp, pool, needid, idarraydata + s->enhances);
+ write_idarray_sort(fp, pool, needid, idarraydata + s->enhances);
if (s->freshens)
- write_idarray(fp, pool, needid, idarraydata + s->freshens);
+ write_idarray_sort(fp, pool, needid, idarraydata + s->freshens);
if (repo->rpmdbid)
- write_u32(fp, repo->rpmdbid[i]);
+ write_u32(fp, repo->rpmdbid[i - repo->start]);
}
- free(needid);
+ xfree(needid);
+ xfree(solvschema);
+ xfree(schemadata);
}
// EOF