2 * Copyright (c) 2007, Novell Inc.
4 * This program is licensed under the BSD license, read LICENSE.BSD
5 * for further information
11 * Write Repo data out to binary file
13 * See doc/README.format for a description
14 * of the binary file format
18 #include <sys/types.h>
28 #include "repo_write.h"
30 /* #define IGNORE_NEED_FREQUENCY */
32 /*------------------------------------------------------------------*/
33 /* Id map optimizations */
35 typedef struct needid {
41 #define RELOFF(id) (pool->ss.nstrings + GETRELID(id))
45 * idarray: array of Ids, ID_NULL terminated
46 * needid: array of Id->NeedId
53 incneedid(Pool *pool, Id *idarray, NeedId *needid)
61 while ((id = *idarray++) != 0)
66 Reldep *rd = GETRELDEP(pool, id);
67 needid[RELOFF(id)].need++;
68 if (ISRELDEP(rd->evr))
73 incneedid(pool, ida, needid);
76 needid[rd->evr].need++;
90 needid_cmp_need(const void *ap, const void *bp)
95 r = b->need - a->need;
98 return a->map - b->map;
101 static Pool *cmp_pool;
103 needid_cmp_need_s(const void *ap, const void *bp)
105 const NeedId *a = ap;
106 const NeedId *b = bp;
109 #ifdef IGNORE_NEED_FREQUENCY
112 else if (b->need == 0)
116 r = b->need - a->need;
120 char *as = cmp_pool->ss.stringspace + cmp_pool->ss.strings[a->map];
121 char *bs = cmp_pool->ss.stringspace + cmp_pool->ss.strings[b->map];
122 size_t alen = strlen (as);
123 size_t blen = strlen (bs);
124 return memcmp (as, bs, alen < blen ? alen + 1 : blen + 1);
128 /*------------------------------------------------------------------*/
129 /* output routines */
136 write_u32(FILE *fp, unsigned int x)
138 if (putc(x >> 24, fp) == EOF ||
139 putc(x >> 16, fp) == EOF ||
140 putc(x >> 8, fp) == EOF ||
143 perror("write error");
154 write_u8(FILE *fp, unsigned int x)
156 if (putc(x, fp) == EOF)
158 perror("write error");
169 write_id(FILE *fp, Id x)
174 putc((x >> 28) | 128, fp);
176 putc((x >> 21) | 128, fp);
177 putc((x >> 14) | 128, fp);
180 putc((x >> 7) | 128, fp);
181 if (putc(x & 127, fp) == EOF)
183 perror("write error");
194 write_idarray(FILE *fp, Pool *pool, NeedId *needid, Id *ids)
208 id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
210 id = (id & 63) | ((id & ~63) << 1);
216 write_id(fp, id | 64);
221 cmp_ids (const void *pa, const void *pb)
229 write_idarray_sort(FILE *fp, Pool *pool, NeedId *needid, Id *ids)
239 /* If we ever share idarrays we can't do this in-place. */
240 for (len = 0; ids[len]; len++)
244 ids[len] = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
247 /* That bloody solvable:prereqmarker needs to stay in position :-( */
248 Id prereq = SOLVABLE_PREREQMARKER;
250 prereq = needid[prereq].need;
251 for (i = 0; i < len; i++)
252 if (ids[i] == prereq)
255 qsort (ids, i, sizeof (Id), cmp_ids);
257 qsort (ids + i + 1, len - i - 1, sizeof (Id), cmp_ids);
260 for (i = 0; i < len; i++)
261 /* Ugly PREREQ handling. A "difference" of 1 is the prereq marker,
262 hence all real differences are offsetted by 1. Otherwise we would
263 have to handle negative differences, which would cost code space for
264 the encoding of the sign. We loose the exact mapping of prereq here,
265 but we know the result, so we can recover from that in the reader. */
266 if (ids[i] == prereq)
272 /* difference is zero --> we have multiple equal elements in the list */
277 /* The differencing above produces many runs of ones and twos. I tried
278 fairly elaborate schemes to RLE those, but they give only very mediocre
279 improvements in compression, as coding the escapes costs quite some
280 space. Even if they are coded only as bits in IDs. The best improvement
281 was about 2.7% for the whole .solv file. It's probably better to
282 invest some complexity into sharing idarrays, than RLEing. */
287 id = (id & 63) | ((id & ~63) << 1);
293 write_id(fp, id | 64);
302 repo_write(Repo *repo, FILE *fp)
304 Pool *pool = repo->pool;
308 int nstrings, nrels, nkeys, nschemata;
310 unsigned int solv_flags;
314 int idsizes[RPM_RPMDBID + 1];
315 int id2key[RPM_RPMDBID + 1];
318 Id *schemadata, *schemadatap, *schema, *sp;
321 Id *solvschema; /* schema of our solvables */
323 Id lastschemakey[256];
326 idarraydata = repo->idarraydata;
328 needid = (NeedId *)xcalloc(pool->ss.nstrings + pool->nrels, sizeof(*needid));
330 memset(idsizes, 0, sizeof(idsizes));
332 for (i = repo->start, s = pool->solvables + i; i < repo->end; i++, s++)
337 needid[s->name].need++;
338 needid[s->arch].need++;
339 needid[s->evr].need++;
342 needid[s->vendor].need++;
343 idsizes[SOLVABLE_VENDOR] = 1;
346 idsizes[SOLVABLE_PROVIDES] += incneedid(pool, idarraydata + s->provides, needid);
348 idsizes[SOLVABLE_REQUIRES] += incneedid(pool, idarraydata + s->requires, needid);
350 idsizes[SOLVABLE_CONFLICTS] += incneedid(pool, idarraydata + s->conflicts, needid);
352 idsizes[SOLVABLE_OBSOLETES] += incneedid(pool, idarraydata + s->obsoletes, needid);
354 idsizes[SOLVABLE_RECOMMENDS] += incneedid(pool, idarraydata + s->recommends, needid);
356 idsizes[SOLVABLE_SUGGESTS] += incneedid(pool, idarraydata + s->suggests, needid);
358 idsizes[SOLVABLE_SUPPLEMENTS] += incneedid(pool, idarraydata + s->supplements, needid);
360 idsizes[SOLVABLE_ENHANCES] += incneedid(pool, idarraydata + s->enhances, needid);
362 idsizes[SOLVABLE_FRESHENS] += incneedid(pool, idarraydata + s->freshens, needid);
364 if (nsolvables != repo->nsolvables)
367 idsizes[SOLVABLE_NAME] = 1;
368 idsizes[SOLVABLE_ARCH] = 1;
369 idsizes[SOLVABLE_EVR] = 1;
371 idsizes[RPM_RPMDBID] = 1;
373 for (i = SOLVABLE_NAME; i <= RPM_RPMDBID; i++)
380 needid[pool->ss.nstrings].need = 0;
381 for (i = 0; i < pool->ss.nstrings + pool->nrels; i++)
385 qsort(needid + 1, pool->ss.nstrings - 1, sizeof(*needid), needid_cmp_need_s);
386 qsort(needid + pool->ss.nstrings, pool->nrels, sizeof(*needid), needid_cmp_need);
389 for (i = 1; i < pool->ss.nstrings; i++)
394 sizeid += strlen(pool->ss.stringspace + pool->ss.strings[needid[i].map]) + 1;
398 for (i = 0; i < nstrings; i++)
399 needid[needid[i].map].need = i;
401 for (i = 0; i < pool->nrels; i++)
403 if (!needid[pool->ss.nstrings + i].need)
406 needid[pool->ss.nstrings + i].need = 0;
410 for (i = 0; i < nrels; i++)
412 needid[needid[pool->ss.nstrings + i].map].need = nstrings + i;
415 /* find the keys we need */
417 memset(id2key, 0, sizeof(id2key));
418 for (i = SOLVABLE_NAME; i <= RPM_RPMDBID; i++)
422 /* find the schemata we need */
423 solvschema = xcalloc(repo->nsolvables, sizeof(Id));
425 memset(lastschema, 0, sizeof(lastschema));
426 memset(lastschemakey, 0, sizeof(lastschemakey));
427 schemadata = xmalloc(256 * sizeof(Id));
429 schemadatap = schemadata;
433 for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
440 if (schemadatalen < 32)
442 int l = schemadatap - schemadata;
443 fprintf(stderr, "growing schemadata\n");
444 schemadata = xrealloc(schemadata, (schemadatap - schemadata + 256) * sizeof(Id));
446 schemadatap = schemadata + l;
448 schema = schemadatap;
449 *schema++ = SOLVABLE_NAME;
450 *schema++ = SOLVABLE_ARCH;
451 *schema++ = SOLVABLE_EVR;
453 *schema++ = SOLVABLE_VENDOR;
455 *schema++ = SOLVABLE_PROVIDES;
457 *schema++ = SOLVABLE_OBSOLETES;
459 *schema++ = SOLVABLE_CONFLICTS;
461 *schema++ = SOLVABLE_REQUIRES;
463 *schema++ = SOLVABLE_RECOMMENDS;
465 *schema++ = SOLVABLE_SUGGESTS;
467 *schema++ = SOLVABLE_SUPPLEMENTS;
469 *schema++ = SOLVABLE_ENHANCES;
471 *schema++ = SOLVABLE_FRESHENS;
473 *schema++ = RPM_RPMDBID;
475 for (sp = schemadatap, h = 0; *sp; )
478 if (lastschema[h] && !memcmp(schemadata + lastschema[h], schemadatap, (schema - schemadatap) * sizeof(Id)))
480 solvschema[n++] = lastschemakey[h];
484 for (sp = schemadata + 1; sp < schemadatap; )
486 if (!memcmp(sp, schemadatap, (schema - schemadatap) * sizeof(Id)))
492 if (sp >= schemadatap)
494 if (schemaid != nschemata)
496 lastschema[h] = schemadatap - schemadata;
497 lastschemakey[h] = schemaid;
498 schemadatalen -= schema - schemadatap;
499 schemadatap = schema;
502 solvschema[n++] = schemaid;
504 /* convert all schemas to keys */
505 for (sp = schemadata; sp < schemadatap; sp++)
508 /* write file header */
509 write_u32(fp, 'S' << 24 | 'O' << 16 | 'L' << 8 | 'V');
510 write_u32(fp, SOLV_VERSION_2);
513 write_u32(fp, nstrings);
514 write_u32(fp, nrels);
515 write_u32(fp, nsolvables);
516 write_u32(fp, nkeys);
517 write_u32(fp, nschemata);
518 write_u32(fp, 0); /* no info block */
520 solv_flags |= SOLV_FLAG_PREFIX_POOL;
522 solv_flags |= SOLV_FLAG_VERTICAL;
524 write_u32(fp, solv_flags);
526 /* Build the prefix-encoding of the string pool. We need to know
527 the size of that before writing it to the file, so we have to
528 build a separate buffer for that. As it's temporarily possible
529 that this actually is an expansion we can't easily reuse the
530 stringspace for this. The max expansion per string is 1 byte,
531 so it will fit into sizeid+nstrings bytes. */
532 char *prefix = xmalloc (sizeid + nstrings);
535 for (i = 1; i < nstrings; i++)
537 char *str = pool->ss.stringspace + pool->ss.strings[needid[i].map];
540 for (same = 0; same < 255; same++)
541 if (old_str[same] != str[same])
544 len = strlen (str + same) + 1;
545 memcpy (pp, str + same, len);
553 write_u32(fp, sizeid);
554 write_u32(fp, pp - prefix);
555 if (fwrite(prefix, pp - prefix, 1, fp) != 1)
557 perror("write error");
565 for (i = 0; i < nrels; i++)
567 ran = pool->rels + (needid[pool->ss.nstrings + i].map - pool->ss.nstrings);
568 write_id(fp, needid[ISRELDEP(ran->name) ? RELOFF(ran->name) : ran->name].need);
569 write_id(fp, needid[ISRELDEP(ran->evr) ? RELOFF(ran->evr) : ran->evr].need);
570 write_u8( fp, ran->flags);
576 for (i = SOLVABLE_NAME; i <= RPM_RPMDBID; i++)
580 write_id(fp, needid[i].need);
581 if (i >= SOLVABLE_PROVIDES && i <= SOLVABLE_FRESHENS)
582 write_id(fp, TYPE_REL_IDARRAY);
583 else if (i == RPM_RPMDBID)
584 write_id(fp, TYPE_U32);
586 write_id(fp, TYPE_ID);
587 write_id(fp, idsizes[i]);
593 write_id(fp, schemadatap - schemadata - 1);
594 for (sp = schemadata + 1; sp < schemadatap; )
596 write_idarray(fp, pool, 0, sp);
606 for (i = 0; i < nsolvables; i++)
607 write_id(fp, solvschema[i]);
608 unsigned char *used = xmalloc(nschemata);
609 for (id = SOLVABLE_NAME; id <= RPM_RPMDBID; id++)
612 memset(used, 0, nschemata);
613 for (sp = schemadata + 1, i = 0; sp < schemadatap; sp++)
620 for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
624 if (!used[solvschema[n++]])
629 write_id(fp, needid[s->name].need);
632 write_id(fp, needid[s->arch].need);
635 write_id(fp, needid[s->evr].need);
637 case SOLVABLE_VENDOR:
638 write_id(fp, needid[s->vendor].need);
641 write_u32(fp, repo->rpmdbid[i - repo->start]);
643 case SOLVABLE_PROVIDES:
644 write_idarray(fp, pool, needid, idarraydata + s->provides);
646 case SOLVABLE_OBSOLETES:
647 write_idarray(fp, pool, needid, idarraydata + s->obsoletes);
649 case SOLVABLE_CONFLICTS:
650 write_idarray(fp, pool, needid, idarraydata + s->conflicts);
652 case SOLVABLE_REQUIRES:
653 write_idarray(fp, pool, needid, idarraydata + s->requires);
655 case SOLVABLE_RECOMMENDS:
656 write_idarray(fp, pool, needid, idarraydata + s->recommends);
658 case SOLVABLE_SUPPLEMENTS:
659 write_idarray(fp, pool, needid, idarraydata + s->supplements);
661 case SOLVABLE_SUGGESTS:
662 write_idarray(fp, pool, needid, idarraydata + s->suggests);
664 case SOLVABLE_ENHANCES:
665 write_idarray(fp, pool, needid, idarraydata + s->enhances);
667 case SOLVABLE_FRESHENS:
668 write_idarray(fp, pool, needid, idarraydata + s->freshens);
685 for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
689 /* keep in sync with schema generation! */
690 write_id(fp, solvschema[n++]);
691 write_id(fp, needid[s->name].need);
692 write_id(fp, needid[s->arch].need);
693 write_id(fp, needid[s->evr].need);
695 write_id(fp, needid[s->vendor].need);
697 write_idarray_sort(fp, pool, needid, idarraydata + s->provides);
699 write_idarray_sort(fp, pool, needid, idarraydata + s->obsoletes);
701 write_idarray_sort(fp, pool, needid, idarraydata + s->conflicts);
703 write_idarray_sort(fp, pool, needid, idarraydata + s->requires);
705 write_idarray_sort(fp, pool, needid, idarraydata + s->recommends);
707 write_idarray_sort(fp, pool, needid, idarraydata + s->suggests);
709 write_idarray_sort(fp, pool, needid, idarraydata + s->supplements);
711 write_idarray_sort(fp, pool, needid, idarraydata + s->enhances);
713 write_idarray_sort(fp, pool, needid, idarraydata + s->freshens);
715 write_u32(fp, repo->rpmdbid[i - repo->start]);