2 * Copyright (c) 2007, Novell Inc.
4 * This program is licensed under the BSD license, read LICENSE.BSD
5 * for further information
11 * Write Repo data out to binary file
13 * See doc/README.format for a description
14 * of the binary file format
18 #include <sys/types.h>
28 #include "repo_write.h"
30 /* #define IGNORE_NEED_FREQUENCY */
32 /*------------------------------------------------------------------*/
33 /* Id map optimizations */
35 typedef struct needid {
41 #define RELOFF(id) (pool->ss.nstrings + GETRELID(id))
45 * idarray: array of Ids, ID_NULL terminated
46 * needid: array of Id->NeedId
53 incneedid(Pool *pool, Id *idarray, NeedId *needid)
61 while ((id = *idarray++) != 0)
66 Reldep *rd = GETRELDEP(pool, id);
67 needid[RELOFF(id)].need++;
68 if (ISRELDEP(rd->evr))
73 incneedid(pool, ida, needid);
76 needid[rd->evr].need++;
90 needid_cmp_need(const void *ap, const void *bp)
95 r = b->need - a->need;
98 return a->map - b->map;
101 static Pool *cmp_pool;
103 needid_cmp_need_s(const void *ap, const void *bp)
105 const NeedId *a = ap;
106 const NeedId *b = bp;
109 #ifdef IGNORE_NEED_FREQUENCY
112 else if (b->need == 0)
116 r = b->need - a->need;
120 char *as = cmp_pool->ss.stringspace + cmp_pool->ss.strings[a->map];
121 char *bs = cmp_pool->ss.stringspace + cmp_pool->ss.strings[b->map];
122 size_t alen = strlen (as);
123 size_t blen = strlen (bs);
124 return memcmp (as, bs, alen < blen ? alen + 1 : blen + 1);
128 /*------------------------------------------------------------------*/
129 /* output routines */
136 write_u32(FILE *fp, unsigned int x)
138 if (putc(x >> 24, fp) == EOF ||
139 putc(x >> 16, fp) == EOF ||
140 putc(x >> 8, fp) == EOF ||
143 perror("write error");
154 write_u8(FILE *fp, unsigned int x)
156 if (putc(x, fp) == EOF)
158 perror("write error");
169 write_id(FILE *fp, Id x)
174 putc((x >> 28) | 128, fp);
176 putc((x >> 21) | 128, fp);
177 putc((x >> 14) | 128, fp);
180 putc((x >> 7) | 128, fp);
181 if (putc(x & 127, fp) == EOF)
183 perror("write error");
189 write_str(FILE *fp, const char *str)
191 if (fputs (str, fp) == EOF
192 || putc (0, fp) == EOF)
194 perror("write error");
204 write_idarray(FILE *fp, Pool *pool, NeedId *needid, Id *ids)
218 id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
220 id = (id & 63) | ((id & ~63) << 1);
226 write_id(fp, id | 64);
231 cmp_ids (const void *pa, const void *pb)
239 write_idarray_sort(FILE *fp, Pool *pool, NeedId *needid, Id *ids)
249 /* If we ever share idarrays we can't do this in-place. */
250 for (len = 0; ids[len]; len++)
254 ids[len] = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
257 /* That bloody solvable:prereqmarker needs to stay in position :-( */
258 Id prereq = SOLVABLE_PREREQMARKER;
260 prereq = needid[prereq].need;
261 for (i = 0; i < len; i++)
262 if (ids[i] == prereq)
265 qsort (ids, i, sizeof (Id), cmp_ids);
267 qsort (ids + i + 1, len - i - 1, sizeof (Id), cmp_ids);
270 for (i = 0; i < len; i++)
271 /* Ugly PREREQ handling. A "difference" of 0 is the prereq marker,
272 hence all real differences are offsetted by 1. Otherwise we would
273 have to handle negative differences, which would cost code space for
274 the encoding of the sign. We loose the exact mapping of prereq here,
275 but we know the result, so we can recover from that in the reader. */
276 if (ids[i] == prereq)
282 /* XXX If difference is zero we have multiple equal elements,
283 we might want to skip writing them out. */
287 /* The differencing above produces many runs of ones and twos. I tried
288 fairly elaborate schemes to RLE those, but they give only very mediocre
289 improvements in compression, as coding the escapes costs quite some
290 space. Even if they are coded only as bits in IDs. The best improvement
291 was about 2.7% for the whole .solv file. It's probably better to
292 invest some complexity into sharing idarrays, than RLEing. */
293 for (i = 0; i < len - 1; i++)
297 id = (id & 63) | ((id & ~63) << 1);
298 write_id(fp, id | 64);
302 old = (old & 63) | ((old & ~63) << 1);
311 repo_write(Repo *repo, FILE *fp)
313 Pool *pool = repo->pool;
317 int nstrings, nrels, nkeys, nschemata;
319 unsigned int solv_flags;
323 int idsizes[RPM_RPMDBID + 1];
324 int id2key[RPM_RPMDBID + 1];
327 Id *schemadata, *schemadatap, *schema, *sp;
330 Id *solvschema; /* schema of our solvables */
332 Id lastschemakey[256];
334 /* For the info block. */
335 Id repodata_id, hello_id;
337 repodata_id = str2id (pool, "repodata", 1);
338 hello_id = str2id (pool, "hello", 1);
341 idarraydata = repo->idarraydata;
343 needid = (NeedId *)xcalloc(pool->ss.nstrings + pool->nrels, sizeof(*needid));
345 needid[repodata_id].need++;
346 needid[hello_id].need++;
347 for (i = 0; i < repo->nrepodata; i++)
350 for (j = 0; j < repo->repodata[i].nkeys; j++)
351 needid[repo->repodata[i].keys[j].name].need++;
354 memset(idsizes, 0, sizeof(idsizes));
356 for (i = repo->start, s = pool->solvables + i; i < repo->end; i++, s++)
361 needid[s->name].need++;
362 needid[s->arch].need++;
363 needid[s->evr].need++;
366 needid[s->vendor].need++;
367 idsizes[SOLVABLE_VENDOR] = 1;
370 idsizes[SOLVABLE_PROVIDES] += incneedid(pool, idarraydata + s->provides, needid);
372 idsizes[SOLVABLE_REQUIRES] += incneedid(pool, idarraydata + s->requires, needid);
374 idsizes[SOLVABLE_CONFLICTS] += incneedid(pool, idarraydata + s->conflicts, needid);
376 idsizes[SOLVABLE_OBSOLETES] += incneedid(pool, idarraydata + s->obsoletes, needid);
378 idsizes[SOLVABLE_RECOMMENDS] += incneedid(pool, idarraydata + s->recommends, needid);
380 idsizes[SOLVABLE_SUGGESTS] += incneedid(pool, idarraydata + s->suggests, needid);
382 idsizes[SOLVABLE_SUPPLEMENTS] += incneedid(pool, idarraydata + s->supplements, needid);
384 idsizes[SOLVABLE_ENHANCES] += incneedid(pool, idarraydata + s->enhances, needid);
386 idsizes[SOLVABLE_FRESHENS] += incneedid(pool, idarraydata + s->freshens, needid);
388 if (nsolvables != repo->nsolvables)
391 idsizes[SOLVABLE_NAME] = 1;
392 idsizes[SOLVABLE_ARCH] = 1;
393 idsizes[SOLVABLE_EVR] = 1;
395 idsizes[RPM_RPMDBID] = 1;
397 for (i = SOLVABLE_NAME; i <= RPM_RPMDBID; i++)
404 needid[pool->ss.nstrings].need = 0;
405 for (i = 0; i < pool->ss.nstrings + pool->nrels; i++)
409 qsort(needid + 1, pool->ss.nstrings - 1, sizeof(*needid), needid_cmp_need_s);
410 qsort(needid + pool->ss.nstrings, pool->nrels, sizeof(*needid), needid_cmp_need);
413 for (i = 1; i < pool->ss.nstrings; i++)
418 sizeid += strlen(pool->ss.stringspace + pool->ss.strings[needid[i].map]) + 1;
422 for (i = 0; i < nstrings; i++)
423 needid[needid[i].map].need = i;
425 for (i = 0; i < pool->nrels; i++)
427 if (!needid[pool->ss.nstrings + i].need)
430 needid[pool->ss.nstrings + i].need = 0;
434 for (i = 0; i < nrels; i++)
436 needid[needid[pool->ss.nstrings + i].map].need = nstrings + i;
439 /* find the keys we need */
441 memset(id2key, 0, sizeof(id2key));
442 for (i = SOLVABLE_NAME; i <= RPM_RPMDBID; i++)
446 /* find the schemata we need */
447 solvschema = xcalloc(repo->nsolvables, sizeof(Id));
449 memset(lastschema, 0, sizeof(lastschema));
450 memset(lastschemakey, 0, sizeof(lastschemakey));
451 schemadata = xmalloc(256 * sizeof(Id));
453 schemadatap = schemadata;
457 for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
464 if (schemadatalen < 32)
466 int l = schemadatap - schemadata;
467 fprintf(stderr, "growing schemadata\n");
468 schemadata = xrealloc(schemadata, (schemadatap - schemadata + 256) * sizeof(Id));
470 schemadatap = schemadata + l;
472 schema = schemadatap;
473 *schema++ = SOLVABLE_NAME;
474 *schema++ = SOLVABLE_ARCH;
475 *schema++ = SOLVABLE_EVR;
477 *schema++ = SOLVABLE_VENDOR;
479 *schema++ = SOLVABLE_PROVIDES;
481 *schema++ = SOLVABLE_OBSOLETES;
483 *schema++ = SOLVABLE_CONFLICTS;
485 *schema++ = SOLVABLE_REQUIRES;
487 *schema++ = SOLVABLE_RECOMMENDS;
489 *schema++ = SOLVABLE_SUGGESTS;
491 *schema++ = SOLVABLE_SUPPLEMENTS;
493 *schema++ = SOLVABLE_ENHANCES;
495 *schema++ = SOLVABLE_FRESHENS;
497 *schema++ = RPM_RPMDBID;
499 for (sp = schemadatap, h = 0; *sp; )
502 if (lastschema[h] && !memcmp(schemadata + lastschema[h], schemadatap, (schema - schemadatap) * sizeof(Id)))
504 solvschema[n++] = lastschemakey[h];
508 for (sp = schemadata + 1; sp < schemadatap; )
510 if (!memcmp(sp, schemadatap, (schema - schemadatap) * sizeof(Id)))
516 if (sp >= schemadatap)
518 if (schemaid != nschemata)
520 lastschema[h] = schemadatap - schemadata;
521 lastschemakey[h] = schemaid;
522 schemadatalen -= schema - schemadatap;
523 schemadatap = schema;
526 solvschema[n++] = schemaid;
528 /* convert all schemas to keys */
529 for (sp = schemadata; sp < schemadatap; sp++)
532 /* write file header */
533 write_u32(fp, 'S' << 24 | 'O' << 16 | 'L' << 8 | 'V');
534 write_u32(fp, SOLV_VERSION_3);
537 write_u32(fp, nstrings);
538 write_u32(fp, nrels);
539 write_u32(fp, nsolvables);
540 write_u32(fp, nkeys);
541 write_u32(fp, nschemata);
542 write_u32(fp, 2); /* Info block. */
544 solv_flags |= SOLV_FLAG_PREFIX_POOL;
546 solv_flags |= SOLV_FLAG_VERTICAL;
548 write_u32(fp, solv_flags);
550 /* Build the prefix-encoding of the string pool. We need to know
551 the size of that before writing it to the file, so we have to
552 build a separate buffer for that. As it's temporarily possible
553 that this actually is an expansion we can't easily reuse the
554 stringspace for this. The max expansion per string is 1 byte,
555 so it will fit into sizeid+nstrings bytes. */
556 char *prefix = xmalloc (sizeid + nstrings);
559 for (i = 1; i < nstrings; i++)
561 char *str = pool->ss.stringspace + pool->ss.strings[needid[i].map];
564 for (same = 0; same < 255; same++)
565 if (old_str[same] != str[same])
568 len = strlen (str + same) + 1;
569 memcpy (pp, str + same, len);
577 write_u32(fp, sizeid);
578 write_u32(fp, pp - prefix);
579 if (fwrite(prefix, pp - prefix, 1, fp) != 1)
581 perror("write error");
589 for (i = 0; i < nrels; i++)
591 ran = pool->rels + (needid[pool->ss.nstrings + i].map - pool->ss.nstrings);
592 write_id(fp, needid[ISRELDEP(ran->name) ? RELOFF(ran->name) : ran->name].need);
593 write_id(fp, needid[ISRELDEP(ran->evr) ? RELOFF(ran->evr) : ran->evr].need);
594 write_u8( fp, ran->flags);
600 for (i = SOLVABLE_NAME; i <= RPM_RPMDBID; i++)
604 write_id(fp, needid[i].need);
605 if (i >= SOLVABLE_PROVIDES && i <= SOLVABLE_FRESHENS)
606 write_id(fp, TYPE_REL_IDARRAY);
607 else if (i == RPM_RPMDBID)
608 write_id(fp, TYPE_U32);
610 write_id(fp, TYPE_ID);
611 write_id(fp, idsizes[i]);
617 write_id(fp, schemadatap - schemadata - 1);
618 for (sp = schemadata + 1; sp < schemadatap; )
620 write_idarray(fp, pool, 0, sp);
628 write_id (fp, needid[hello_id].need);
629 write_id (fp, TYPE_COUNT_NAMED);
631 write_id (fp, 0); //name
632 write_id (fp, TYPE_STR);
633 write_str (fp, "doll");
635 write_id (fp, needid[repodata_id].need);
636 write_id (fp, TYPE_COUNT_NAMED);
637 write_id (fp, repo->nrepodata);
638 for (i = 0; i < repo->nrepodata; i++)
641 write_id (fp, 0); /* no name, isn't important here */
642 write_id (fp, TYPE_COUNT_NAMED);
643 /* Don't emit the embedded attributes. */
644 if (repo->repodata[i].name == 0)
646 write_id (fp, 0); /* count */
649 write_id (fp, 2); /* 2 items, the filename and the keys */
651 write_id (fp, 0); /* no name */
652 write_id (fp, TYPE_STR);
653 write_str (fp, repo->repodata[i].name);
656 write_id (fp, 0); /* no name */
657 write_id (fp, TYPE_COUNTED);
658 write_id (fp, repo->repodata[i].nkeys * 2);
659 write_id (fp, TYPE_ID);
660 for (j = 0; j < repo->repodata[i].nkeys; j++)
662 write_id (fp, needid[repo->repodata[i].keys[j].name].need);
663 write_id (fp, repo->repodata[i].keys[j].type);
672 for (i = 0; i < nsolvables; i++)
673 write_id(fp, solvschema[i]);
674 unsigned char *used = xmalloc(nschemata);
675 for (id = SOLVABLE_NAME; id <= RPM_RPMDBID; id++)
678 memset(used, 0, nschemata);
679 for (sp = schemadata + 1, i = 0; sp < schemadatap; sp++)
686 for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
690 if (!used[solvschema[n++]])
695 write_id(fp, needid[s->name].need);
698 write_id(fp, needid[s->arch].need);
701 write_id(fp, needid[s->evr].need);
703 case SOLVABLE_VENDOR:
704 write_id(fp, needid[s->vendor].need);
707 write_u32(fp, repo->rpmdbid[i - repo->start]);
709 case SOLVABLE_PROVIDES:
710 write_idarray(fp, pool, needid, idarraydata + s->provides);
712 case SOLVABLE_OBSOLETES:
713 write_idarray(fp, pool, needid, idarraydata + s->obsoletes);
715 case SOLVABLE_CONFLICTS:
716 write_idarray(fp, pool, needid, idarraydata + s->conflicts);
718 case SOLVABLE_REQUIRES:
719 write_idarray(fp, pool, needid, idarraydata + s->requires);
721 case SOLVABLE_RECOMMENDS:
722 write_idarray(fp, pool, needid, idarraydata + s->recommends);
724 case SOLVABLE_SUPPLEMENTS:
725 write_idarray(fp, pool, needid, idarraydata + s->supplements);
727 case SOLVABLE_SUGGESTS:
728 write_idarray(fp, pool, needid, idarraydata + s->suggests);
730 case SOLVABLE_ENHANCES:
731 write_idarray(fp, pool, needid, idarraydata + s->enhances);
733 case SOLVABLE_FRESHENS:
734 write_idarray(fp, pool, needid, idarraydata + s->freshens);
751 for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
755 /* keep in sync with schema generation! */
756 write_id(fp, solvschema[n++]);
757 write_id(fp, needid[s->name].need);
758 write_id(fp, needid[s->arch].need);
759 write_id(fp, needid[s->evr].need);
761 write_id(fp, needid[s->vendor].need);
763 write_idarray_sort(fp, pool, needid, idarraydata + s->provides);
765 write_idarray_sort(fp, pool, needid, idarraydata + s->obsoletes);
767 write_idarray_sort(fp, pool, needid, idarraydata + s->conflicts);
769 write_idarray_sort(fp, pool, needid, idarraydata + s->requires);
771 write_idarray_sort(fp, pool, needid, idarraydata + s->recommends);
773 write_idarray_sort(fp, pool, needid, idarraydata + s->suggests);
775 write_idarray_sort(fp, pool, needid, idarraydata + s->supplements);
777 write_idarray_sort(fp, pool, needid, idarraydata + s->enhances);
779 write_idarray_sort(fp, pool, needid, idarraydata + s->freshens);
781 write_u32(fp, repo->rpmdbid[i - repo->start]);