2 * Copyright (c) 2007, Novell Inc.
4 * This program is licensed under the BSD license, read LICENSE.BSD
5 * for further information
11 * Write Repo data out to binary file
13 * See doc/README.format for a description
14 * of the binary file format
18 #include <sys/types.h>
28 #include "repo_write.h"
30 /* #define IGNORE_NEED_FREQUENCY */
32 /*------------------------------------------------------------------*/
33 /* Id map optimizations */
35 typedef struct needid {
41 #define RELOFF(id) (pool->ss.nstrings + GETRELID(id))
45 * idarray: array of Ids, ID_NULL terminated
46 * needid: array of Id->NeedId
53 incneedid(Pool *pool, Id *idarray, NeedId *needid)
61 while ((id = *idarray++) != 0)
66 Reldep *rd = GETRELDEP(pool, id);
67 needid[RELOFF(id)].need++;
68 if (ISRELDEP(rd->evr))
73 incneedid(pool, ida, needid);
76 needid[rd->evr].need++;
90 needid_cmp_need(const void *ap, const void *bp)
95 r = b->need - a->need;
98 return a->map - b->map;
101 static Pool *cmp_pool;
103 needid_cmp_need_s(const void *ap, const void *bp)
105 const NeedId *a = ap;
106 const NeedId *b = bp;
109 #ifdef IGNORE_NEED_FREQUENCY
112 else if (b->need == 0)
116 r = b->need - a->need;
120 char *as = cmp_pool->ss.stringspace + cmp_pool->ss.strings[a->map];
121 char *bs = cmp_pool->ss.stringspace + cmp_pool->ss.strings[b->map];
122 size_t alen = strlen (as);
123 size_t blen = strlen (bs);
124 return memcmp (as, bs, alen < blen ? alen + 1 : blen + 1);
128 /*------------------------------------------------------------------*/
129 /* output routines */
136 write_u32(FILE *fp, unsigned int x)
138 if (putc(x >> 24, fp) == EOF ||
139 putc(x >> 16, fp) == EOF ||
140 putc(x >> 8, fp) == EOF ||
143 perror("write error");
154 write_u8(FILE *fp, unsigned int x)
156 if (putc(x, fp) == EOF)
158 perror("write error");
169 write_id(FILE *fp, Id x)
174 putc((x >> 28) | 128, fp);
176 putc((x >> 21) | 128, fp);
177 putc((x >> 14) | 128, fp);
180 putc((x >> 7) | 128, fp);
181 if (putc(x & 127, fp) == EOF)
183 perror("write error");
194 write_idarray(FILE *fp, Pool *pool, NeedId *needid, Id *ids)
201 /* XXX I think this is broken. A lone '0' will be interpreted as
202 zero plus end-of-array, which stores another zero. */
210 id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
212 id = (id & 63) | ((id & ~63) << 1);
218 write_id(fp, id | 64);
223 cmp_ids (const void *pa, const void *pb)
231 write_idarray_sort(FILE *fp, Pool *pool, NeedId *needid, Id *ids)
238 /* XXX I think this is broken. A lone '0' will be interpreted as
239 zero plus end-of-array, which stores another zero. */
243 /* If we ever share idarrays we can't do this in-place. */
244 for (len = 0; ids[len]; len++)
248 ids[len] = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
251 /* That bloody solvable:prereqmarker needs to stay in position :-( */
252 Id prereq = SOLVABLE_PREREQMARKER;
254 prereq = needid[prereq].need;
255 for (i = 0; i < len; i++)
256 if (ids[i] == prereq)
259 qsort (ids, i, sizeof (Id), cmp_ids);
261 qsort (ids + i + 1, len - i - 1, sizeof (Id), cmp_ids);
264 for (i = 0; i < len; i++)
265 /* Ugly PREREQ handling. A "difference" of 0 is the prereq marker,
266 hence all real differences are offsetted by 1. Otherwise we would
267 have to handle negative differences, which would cost code space for
268 the encoding of the sign. We loose the exact mapping of prereq here,
269 but we know the result, so we can recover from that in the reader. */
270 if (ids[i] == prereq)
276 /* XXX If difference is zero we have multiple equal elements,
277 we might want to skip writing them out. */
281 /* The differencing above produces many runs of ones and twos. I tried
282 fairly elaborate schemes to RLE those, but they give only very mediocre
283 improvements in compression, as coding the escapes costs quite some
284 space. Even if they are coded only as bits in IDs. The best improvement
285 was about 2.7% for the whole .solv file. It's probably better to
286 invest some complexity into sharing idarrays, than RLEing. */
287 for (i = 0; i < len - 1; i++)
291 id = (id & 63) | ((id & ~63) << 1);
292 write_id(fp, id | 64);
296 old = (old & 63) | ((old & ~63) << 1);
305 repo_write(Repo *repo, FILE *fp)
307 Pool *pool = repo->pool;
311 int nstrings, nrels, nkeys, nschemata;
313 unsigned int solv_flags;
317 int idsizes[RPM_RPMDBID + 1];
318 int id2key[RPM_RPMDBID + 1];
321 Id *schemadata, *schemadatap, *schema, *sp;
324 Id *solvschema; /* schema of our solvables */
326 Id lastschemakey[256];
329 idarraydata = repo->idarraydata;
331 needid = (NeedId *)xcalloc(pool->ss.nstrings + pool->nrels, sizeof(*needid));
333 memset(idsizes, 0, sizeof(idsizes));
335 for (i = repo->start, s = pool->solvables + i; i < repo->end; i++, s++)
340 needid[s->name].need++;
341 needid[s->arch].need++;
342 needid[s->evr].need++;
345 needid[s->vendor].need++;
346 idsizes[SOLVABLE_VENDOR] = 1;
349 idsizes[SOLVABLE_PROVIDES] += incneedid(pool, idarraydata + s->provides, needid);
351 idsizes[SOLVABLE_REQUIRES] += incneedid(pool, idarraydata + s->requires, needid);
353 idsizes[SOLVABLE_CONFLICTS] += incneedid(pool, idarraydata + s->conflicts, needid);
355 idsizes[SOLVABLE_OBSOLETES] += incneedid(pool, idarraydata + s->obsoletes, needid);
357 idsizes[SOLVABLE_RECOMMENDS] += incneedid(pool, idarraydata + s->recommends, needid);
359 idsizes[SOLVABLE_SUGGESTS] += incneedid(pool, idarraydata + s->suggests, needid);
361 idsizes[SOLVABLE_SUPPLEMENTS] += incneedid(pool, idarraydata + s->supplements, needid);
363 idsizes[SOLVABLE_ENHANCES] += incneedid(pool, idarraydata + s->enhances, needid);
365 idsizes[SOLVABLE_FRESHENS] += incneedid(pool, idarraydata + s->freshens, needid);
367 if (nsolvables != repo->nsolvables)
370 idsizes[SOLVABLE_NAME] = 1;
371 idsizes[SOLVABLE_ARCH] = 1;
372 idsizes[SOLVABLE_EVR] = 1;
374 idsizes[RPM_RPMDBID] = 1;
376 for (i = SOLVABLE_NAME; i <= RPM_RPMDBID; i++)
383 needid[pool->ss.nstrings].need = 0;
384 for (i = 0; i < pool->ss.nstrings + pool->nrels; i++)
388 qsort(needid + 1, pool->ss.nstrings - 1, sizeof(*needid), needid_cmp_need_s);
389 qsort(needid + pool->ss.nstrings, pool->nrels, sizeof(*needid), needid_cmp_need);
392 for (i = 1; i < pool->ss.nstrings; i++)
397 sizeid += strlen(pool->ss.stringspace + pool->ss.strings[needid[i].map]) + 1;
401 for (i = 0; i < nstrings; i++)
402 needid[needid[i].map].need = i;
404 for (i = 0; i < pool->nrels; i++)
406 if (!needid[pool->ss.nstrings + i].need)
409 needid[pool->ss.nstrings + i].need = 0;
413 for (i = 0; i < nrels; i++)
415 needid[needid[pool->ss.nstrings + i].map].need = nstrings + i;
418 /* find the keys we need */
420 memset(id2key, 0, sizeof(id2key));
421 for (i = SOLVABLE_NAME; i <= RPM_RPMDBID; i++)
425 /* find the schemata we need */
426 solvschema = xcalloc(repo->nsolvables, sizeof(Id));
428 memset(lastschema, 0, sizeof(lastschema));
429 memset(lastschemakey, 0, sizeof(lastschemakey));
430 schemadata = xmalloc(256 * sizeof(Id));
432 schemadatap = schemadata;
436 for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
443 if (schemadatalen < 32)
445 int l = schemadatap - schemadata;
446 fprintf(stderr, "growing schemadata\n");
447 schemadata = xrealloc(schemadata, (schemadatap - schemadata + 256) * sizeof(Id));
449 schemadatap = schemadata + l;
451 schema = schemadatap;
452 *schema++ = SOLVABLE_NAME;
453 *schema++ = SOLVABLE_ARCH;
454 *schema++ = SOLVABLE_EVR;
456 *schema++ = SOLVABLE_VENDOR;
458 *schema++ = SOLVABLE_PROVIDES;
460 *schema++ = SOLVABLE_OBSOLETES;
462 *schema++ = SOLVABLE_CONFLICTS;
464 *schema++ = SOLVABLE_REQUIRES;
466 *schema++ = SOLVABLE_RECOMMENDS;
468 *schema++ = SOLVABLE_SUGGESTS;
470 *schema++ = SOLVABLE_SUPPLEMENTS;
472 *schema++ = SOLVABLE_ENHANCES;
474 *schema++ = SOLVABLE_FRESHENS;
476 *schema++ = RPM_RPMDBID;
478 for (sp = schemadatap, h = 0; *sp; )
481 if (lastschema[h] && !memcmp(schemadata + lastschema[h], schemadatap, (schema - schemadatap) * sizeof(Id)))
483 solvschema[n++] = lastschemakey[h];
487 for (sp = schemadata + 1; sp < schemadatap; )
489 if (!memcmp(sp, schemadatap, (schema - schemadatap) * sizeof(Id)))
495 if (sp >= schemadatap)
497 if (schemaid != nschemata)
499 lastschema[h] = schemadatap - schemadata;
500 lastschemakey[h] = schemaid;
501 schemadatalen -= schema - schemadatap;
502 schemadatap = schema;
505 solvschema[n++] = schemaid;
507 /* convert all schemas to keys */
508 for (sp = schemadata; sp < schemadatap; sp++)
511 /* write file header */
512 write_u32(fp, 'S' << 24 | 'O' << 16 | 'L' << 8 | 'V');
513 write_u32(fp, SOLV_VERSION_2);
516 write_u32(fp, nstrings);
517 write_u32(fp, nrels);
518 write_u32(fp, nsolvables);
519 write_u32(fp, nkeys);
520 write_u32(fp, nschemata);
521 write_u32(fp, 0); /* no info block */
523 solv_flags |= SOLV_FLAG_PREFIX_POOL;
525 solv_flags |= SOLV_FLAG_VERTICAL;
527 write_u32(fp, solv_flags);
529 /* Build the prefix-encoding of the string pool. We need to know
530 the size of that before writing it to the file, so we have to
531 build a separate buffer for that. As it's temporarily possible
532 that this actually is an expansion we can't easily reuse the
533 stringspace for this. The max expansion per string is 1 byte,
534 so it will fit into sizeid+nstrings bytes. */
535 char *prefix = xmalloc (sizeid + nstrings);
538 for (i = 1; i < nstrings; i++)
540 char *str = pool->ss.stringspace + pool->ss.strings[needid[i].map];
543 for (same = 0; same < 255; same++)
544 if (old_str[same] != str[same])
547 len = strlen (str + same) + 1;
548 memcpy (pp, str + same, len);
556 write_u32(fp, sizeid);
557 write_u32(fp, pp - prefix);
558 if (fwrite(prefix, pp - prefix, 1, fp) != 1)
560 perror("write error");
568 for (i = 0; i < nrels; i++)
570 ran = pool->rels + (needid[pool->ss.nstrings + i].map - pool->ss.nstrings);
571 write_id(fp, needid[ISRELDEP(ran->name) ? RELOFF(ran->name) : ran->name].need);
572 write_id(fp, needid[ISRELDEP(ran->evr) ? RELOFF(ran->evr) : ran->evr].need);
573 write_u8( fp, ran->flags);
579 for (i = SOLVABLE_NAME; i <= RPM_RPMDBID; i++)
583 write_id(fp, needid[i].need);
584 if (i >= SOLVABLE_PROVIDES && i <= SOLVABLE_FRESHENS)
585 write_id(fp, TYPE_REL_IDARRAY);
586 else if (i == RPM_RPMDBID)
587 write_id(fp, TYPE_U32);
589 write_id(fp, TYPE_ID);
590 write_id(fp, idsizes[i]);
596 write_id(fp, schemadatap - schemadata - 1);
597 for (sp = schemadata + 1; sp < schemadatap; )
599 write_idarray(fp, pool, 0, sp);
609 for (i = 0; i < nsolvables; i++)
610 write_id(fp, solvschema[i]);
611 unsigned char *used = xmalloc(nschemata);
612 for (id = SOLVABLE_NAME; id <= RPM_RPMDBID; id++)
615 memset(used, 0, nschemata);
616 for (sp = schemadata + 1, i = 0; sp < schemadatap; sp++)
623 for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
627 if (!used[solvschema[n++]])
632 write_id(fp, needid[s->name].need);
635 write_id(fp, needid[s->arch].need);
638 write_id(fp, needid[s->evr].need);
640 case SOLVABLE_VENDOR:
641 write_id(fp, needid[s->vendor].need);
644 write_u32(fp, repo->rpmdbid[i - repo->start]);
646 case SOLVABLE_PROVIDES:
647 write_idarray(fp, pool, needid, idarraydata + s->provides);
649 case SOLVABLE_OBSOLETES:
650 write_idarray(fp, pool, needid, idarraydata + s->obsoletes);
652 case SOLVABLE_CONFLICTS:
653 write_idarray(fp, pool, needid, idarraydata + s->conflicts);
655 case SOLVABLE_REQUIRES:
656 write_idarray(fp, pool, needid, idarraydata + s->requires);
658 case SOLVABLE_RECOMMENDS:
659 write_idarray(fp, pool, needid, idarraydata + s->recommends);
661 case SOLVABLE_SUPPLEMENTS:
662 write_idarray(fp, pool, needid, idarraydata + s->supplements);
664 case SOLVABLE_SUGGESTS:
665 write_idarray(fp, pool, needid, idarraydata + s->suggests);
667 case SOLVABLE_ENHANCES:
668 write_idarray(fp, pool, needid, idarraydata + s->enhances);
670 case SOLVABLE_FRESHENS:
671 write_idarray(fp, pool, needid, idarraydata + s->freshens);
688 for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
692 /* keep in sync with schema generation! */
693 write_id(fp, solvschema[n++]);
694 write_id(fp, needid[s->name].need);
695 write_id(fp, needid[s->arch].need);
696 write_id(fp, needid[s->evr].need);
698 write_id(fp, needid[s->vendor].need);
700 write_idarray_sort(fp, pool, needid, idarraydata + s->provides);
702 write_idarray_sort(fp, pool, needid, idarraydata + s->obsoletes);
704 write_idarray_sort(fp, pool, needid, idarraydata + s->conflicts);
706 write_idarray_sort(fp, pool, needid, idarraydata + s->requires);
708 write_idarray_sort(fp, pool, needid, idarraydata + s->recommends);
710 write_idarray_sort(fp, pool, needid, idarraydata + s->suggests);
712 write_idarray_sort(fp, pool, needid, idarraydata + s->supplements);
714 write_idarray_sort(fp, pool, needid, idarraydata + s->enhances);
716 write_idarray_sort(fp, pool, needid, idarraydata + s->freshens);
718 write_u32(fp, repo->rpmdbid[i - repo->start]);