src/repo_write.c

   1 /*
   2  * Copyright (c) 2007-2011, Novell Inc.
   3  *
   4  * This program is licensed under the BSD license, read LICENSE.BSD
   5  * for further information
   6  */
   7
   8 /*
   9  * repo_write.c
  10  *
  11  * Write Repo data out to a file in solv format
  12  *
  13  * See doc/README.format for a description
  14  * of the binary file format
  15  *
  16  */
  17
  18 #include <sys/types.h>
  19 #include <limits.h>
  20 #include <fcntl.h>
  21 #include <stdio.h>
  22 #include <stdlib.h>
  23 #include <string.h>
  24 #include <assert.h>
  25 #include <errno.h>
  26
  27 #include "pool.h"
  28 #include "util.h"
  29 #include "repo_write.h"
  30 #include "repopage.h"
  31
  32 /*------------------------------------------------------------------*/
  33 /* Id map optimizations */
  34
  35 typedef struct needid {
  36   Id need;
  37   Id map;
  38 } NeedId;
  39
  40
  41 #define RELOFF(id) (needid[0].map + GETRELID(id))
  42
  43 /*
  44  * increment need Id
  45  * idarray: array of Ids, ID_NULL terminated
  46  * needid: array of Id->NeedId
  47  *
  48  * return size of array (including trailing zero)
  49  *
  50  */
  51
  52 static void
  53 incneedid(Pool *pool, Id id, NeedId *needid)
  54 {
  55   while (ISRELDEP(id))
  56     {
  57       Reldep *rd = GETRELDEP(pool, id);
  58       needid[RELOFF(id)].need++;
  59       if (ISRELDEP(rd->evr))
  60         incneedid(pool, rd->evr, needid);
  61       else
  62         needid[rd->evr].need++;
  63       id = rd->name;
  64     }
  65   needid[id].need++;
  66 }
  67
  68 static int
  69 incneedidarray(Pool *pool, Id *idarray, NeedId *needid)
  70 {
  71   Id id;
  72   int n = 0;
  73
  74   if (!idarray)
  75     return 0;
  76   while ((id = *idarray++) != 0)
  77     {
  78       n++;
  79       while (ISRELDEP(id))
  80         {
  81           Reldep *rd = GETRELDEP(pool, id);
  82           needid[RELOFF(id)].need++;
  83           if (ISRELDEP(rd->evr))
  84             incneedid(pool, rd->evr, needid);
  85           else
  86             needid[rd->evr].need++;
  87           id = rd->name;
  88         }
  89       needid[id].need++;
  90     }
  91   return n + 1;
  92 }
  93
  94
  95 /*
  96  *
  97  */
  98
  99 static int
 100 needid_cmp_need(const void *ap, const void *bp, void *dp)
 101 {
 102   const NeedId *a = ap;
 103   const NeedId *b = bp;
 104   int r;
 105   r = b->need - a->need;
 106   if (r)
 107     return r;
 108   return a->map - b->map;
 109 }
 110
 111 static int
 112 needid_cmp_need_s(const void *ap, const void *bp, void *dp)
 113 {
 114   const NeedId *a = ap;
 115   const NeedId *b = bp;
 116   Stringpool *spool = dp;
 117   const char *as;
 118   const char *bs;
 119
 120   int r;
 121   r = b->need - a->need;
 122   if (r)
 123     return r;
 124   as = spool->stringspace + spool->strings[a->map];
 125   bs = spool->stringspace + spool->strings[b->map];
 126   return strcmp(as, bs);
 127 }
 128
 129
 130 /*------------------------------------------------------------------*/
 131 /* output helper routines, used for writing the header */
 132 /* (the data itself is accumulated in memory and written with
 133  * write_blob) */
 134
 135 /*
 136  * unsigned 32-bit
 137  */
 138
 139 static void
 140 write_u32(Repodata *data, unsigned int x)
 141 {
 142   FILE *fp = data->fp;
 143   if (data->error)
 144     return;
 145   if (putc(x >> 24, fp) == EOF ||
 146       putc(x >> 16, fp) == EOF ||
 147       putc(x >> 8, fp) == EOF ||
 148       putc(x, fp) == EOF)
 149     {
 150       data->error = pool_error(data->repo->pool, -1, "write error u32: %s", strerror(errno));
 151     }
 152 }
 153
 154
 155 /*
 156  * unsigned 8-bit
 157  */
 158
 159 static void
 160 write_u8(Repodata *data, unsigned int x)
 161 {
 162   if (data->error)
 163     return;
 164   if (putc(x, data->fp) == EOF)
 165     {
 166       data->error = pool_error(data->repo->pool, -1, "write error u8: %s", strerror(errno));
 167     }
 168 }
 169
 170 /*
 171  * data blob
 172  */
 173
 174 static void
 175 write_blob(Repodata *data, void *blob, int len)
 176 {
 177   if (data->error)
 178     return;
 179   if (len && fwrite(blob, len, 1, data->fp) != 1)
 180     {
 181       data->error = pool_error(data->repo->pool, -1, "write error blob: %s", strerror(errno));
 182     }
 183 }
 184
 185 /*
 186  * Id
 187  */
 188
 189 static void
 190 write_id(Repodata *data, Id x)
 191 {
 192   FILE *fp = data->fp;
 193   if (data->error)
 194     return;
 195   if (x >= (1 << 14))
 196     {
 197       if (x >= (1 << 28))
 198         putc((x >> 28) | 128, fp);
 199       if (x >= (1 << 21))
 200         putc((x >> 21) | 128, fp);
 201       putc((x >> 14) | 128, fp);
 202     }
 203   if (x >= (1 << 7))
 204     putc((x >> 7) | 128, fp);
 205   if (putc(x & 127, fp) == EOF)
 206     {
 207       data->error = pool_error(data->repo->pool, -1, "write error id: %s", strerror(errno));
 208     }
 209 }
 210
 211 static inline void
 212 write_id_eof(Repodata *data, Id x, int eof)
 213 {
 214   if (x >= 64)
 215     x = (x & 63) | ((x & ~63) << 1);
 216   write_id(data, x | (eof ? 0 : 64));
 217 }
 218
 219
 220
 221 static inline void
 222 write_str(Repodata *data, const char *str)
 223 {
 224   if (data->error)
 225     return;
 226   if (fputs(str, data->fp) == EOF || putc(0, data->fp) == EOF)
 227     {
 228       data->error = pool_error(data->repo->pool, -1, "write error str: %s", strerror(errno));
 229     }
 230 }
 231
 232 /*
 233  * Array of Ids
 234  */
 235
 236 static void
 237 write_idarray(Repodata *data, Pool *pool, NeedId *needid, Id *ids)
 238 {
 239   Id id;
 240   if (!ids)
 241     return;
 242   if (!*ids)
 243     {
 244       write_u8(data, 0);
 245       return;
 246     }
 247   for (;;)
 248     {
 249       id = *ids++;
 250       if (needid)
 251         id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
 252       if (id >= 64)
 253         id = (id & 63) | ((id & ~63) << 1);
 254       if (!*ids)
 255         {
 256           write_id(data, id);
 257           return;
 258         }
 259       write_id(data, id | 64);
 260     }
 261 }
 262
 263 static int
 264 cmp_ids(const void *pa, const void *pb, void *dp)
 265 {
 266   Id a = *(Id *)pa;
 267   Id b = *(Id *)pb;
 268   return a - b;
 269 }
 270
 271 #if 0
 272 static void
 273 write_idarray_sort(Repodata *data, Pool *pool, NeedId *needid, Id *ids, Id marker)
 274 {
 275   int len, i;
 276   Id lids[64], *sids;
 277
 278   if (!ids)
 279     return;
 280   if (!*ids)
 281     {
 282       write_u8(data, 0);
 283       return;
 284     }
 285   for (len = 0; len < 64 && ids[len]; len++)
 286     {
 287       Id id = ids[len];
 288       if (needid)
 289         id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
 290       lids[len] = id;
 291     }
 292   if (ids[len])
 293     {
 294       for (i = len + 1; ids[i]; i++)
 295         ;
 296       sids = solv_malloc2(i, sizeof(Id));
 297       memcpy(sids, lids, 64 * sizeof(Id));
 298       for (; ids[len]; len++)
 299         {
 300           Id id = ids[len];
 301           if (needid)
 302             id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
 303           sids[len] = id;
 304         }
 305     }
 306   else
 307     sids = lids;
 308
 309   /* That bloody solvable:prereqmarker needs to stay in position :-(  */
 310   if (needid)
 311     marker = needid[marker].need;
 312   for (i = 0; i < len; i++)
 313     if (sids[i] == marker)
 314       break;
 315   if (i > 1)
 316     solv_sort(sids, i, sizeof(Id), cmp_ids, 0);
 317   if ((len - i) > 2)
 318     solv_sort(sids + i + 1, len - i - 1, sizeof(Id), cmp_ids, 0);
 319
 320   Id id, old = 0;
 321
 322   /* The differencing above produces many runs of ones and twos.  I tried
 323      fairly elaborate schemes to RLE those, but they give only very mediocre
 324      improvements in compression, as coding the escapes costs quite some
 325      space.  Even if they are coded only as bits in IDs.  The best improvement
 326      was about 2.7% for the whole .solv file.  It's probably better to
 327      invest some complexity into sharing idarrays, than RLEing.  */
 328   for (i = 0; i < len - 1; i++)
 329     {
 330       id = sids[i];
 331     /* Ugly PREREQ handling.  A "difference" of 0 is the prereq marker,
 332        hence all real differences are offsetted by 1.  Otherwise we would
 333        have to handle negative differences, which would cost code space for
 334        the encoding of the sign.  We loose the exact mapping of prereq here,
 335        but we know the result, so we can recover from that in the reader.  */
 336       if (id == marker)
 337         id = old = 0;
 338       else
 339         {
 340           id = id - old + 1;
 341           old = sids[i];
 342         }
 343       /* XXX If difference is zero we have multiple equal elements,
 344          we might want to skip writing them out.  */
 345       if (id >= 64)
 346         id = (id & 63) | ((id & ~63) << 1);
 347       write_id(data, id | 64);
 348     }
 349   id = sids[i];
 350   if (id == marker)
 351     id = 0;
 352   else
 353     id = id - old + 1;
 354   if (id >= 64)
 355     id = (id & 63) | ((id & ~63) << 1);
 356   write_id(data, id);
 357   if (sids != lids)
 358     solv_free(sids);
 359 }
 360 #endif
 361
 362
 363 struct extdata {
 364   unsigned char *buf;
 365   int len;
 366 };
 367
 368 struct cbdata {
 369   Repo *repo;
 370   Repodata *target;
 371
 372   Stringpool *ownspool;
 373   Dirpool *owndirpool;
 374
 375   Id *keymap;
 376   int nkeymap;
 377   Id *keymapstart;
 378
 379   NeedId *needid;
 380
 381   Id *schema;           /* schema construction space */
 382   Id *sp;               /* pointer in above */
 383   Id *oldschema, *oldsp;
 384
 385   Id *solvschemata;
 386   Id *subschemata;
 387   int nsubschemata;
 388   int current_sub;
 389
 390   struct extdata *extdata;
 391
 392   Id *dirused;
 393
 394   Id vstart;
 395
 396   Id maxdata;
 397   Id lastlen;
 398
 399   int doingsolvables;   /* working on solvables data */
 400 };
 401
 402 #define NEEDED_BLOCK 1023
 403 #define SCHEMATA_BLOCK 31
 404 #define SCHEMATADATA_BLOCK 255
 405 #define EXTDATA_BLOCK 4095
 406
 407 static inline void
 408 data_addid(struct extdata *xd, Id sx)
 409 {
 410   unsigned int x = (unsigned int)sx;
 411   unsigned char *dp;
 412
 413   xd->buf = solv_extend(xd->buf, xd->len, 5, 1, EXTDATA_BLOCK);
 414   dp = xd->buf + xd->len;
 415
 416   if (x >= (1 << 14))
 417     {
 418       if (x >= (1 << 28))
 419         *dp++ = (x >> 28) | 128;
 420       if (x >= (1 << 21))
 421         *dp++ = (x >> 21) | 128;
 422       *dp++ = (x >> 14) | 128;
 423     }
 424   if (x >= (1 << 7))
 425     *dp++ = (x >> 7) | 128;
 426   *dp++ = x & 127;
 427   xd->len = dp - xd->buf;
 428 }
 429
 430 static inline void
 431 data_addideof(struct extdata *xd, Id sx, int eof)
 432 {
 433   unsigned int x = (unsigned int)sx;
 434   unsigned char *dp;
 435
 436   xd->buf = solv_extend(xd->buf, xd->len, 5, 1, EXTDATA_BLOCK);
 437   dp = xd->buf + xd->len;
 438
 439   if (x >= (1 << 13))
 440     {
 441       if (x >= (1 << 27))
 442         *dp++ = (x >> 27) | 128;
 443       if (x >= (1 << 20))
 444         *dp++ = (x >> 20) | 128;
 445       *dp++ = (x >> 13) | 128;
 446     }
 447   if (x >= (1 << 6))
 448     *dp++ = (x >> 6) | 128;
 449   *dp++ = eof ? (x & 63) : (x & 63) | 64;
 450   xd->len = dp - xd->buf;
 451 }
 452
 453 static void
 454 data_addid64(struct extdata *xd, unsigned int x, unsigned int hx)
 455 {
 456   if (hx)
 457     {
 458       if (hx > 7)
 459         {
 460           data_addid(xd, (Id)(hx >> 3));
 461           xd->buf[xd->len - 1] |= 128;
 462           hx &= 7;
 463         }
 464       data_addid(xd, (Id)(x | 0x80000000));
 465       xd->buf[xd->len - 5] = (x >> 28) | (hx << 4) | 128;
 466     }
 467   else
 468     data_addid(xd, (Id)x);
 469 }
 470
 471 static void
 472 data_addidarray_sort(struct extdata *xd, Pool *pool, NeedId *needid, Id *ids, Id marker)
 473 {
 474   int len, i;
 475   Id lids[64], *sids;
 476   Id id, old;
 477
 478   if (!ids)
 479     return;
 480   if (!*ids)
 481     {
 482       data_addid(xd, 0);
 483       return;
 484     }
 485   for (len = 0; len < 64 && ids[len]; len++)
 486     {
 487       Id id = ids[len];
 488       if (needid)
 489         id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
 490       lids[len] = id;
 491     }
 492   if (ids[len])
 493     {
 494       for (i = len + 1; ids[i]; i++)
 495         ;
 496       sids = solv_malloc2(i, sizeof(Id));
 497       memcpy(sids, lids, 64 * sizeof(Id));
 498       for (; ids[len]; len++)
 499         {
 500           Id id = ids[len];
 501           if (needid)
 502             id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
 503           sids[len] = id;
 504         }
 505     }
 506   else
 507     sids = lids;
 508
 509   /* That bloody solvable:prereqmarker needs to stay in position :-(  */
 510   if (needid)
 511     marker = needid[marker].need;
 512   for (i = 0; i < len; i++)
 513     if (sids[i] == marker)
 514       break;
 515   if (i > 1)
 516     solv_sort(sids, i, sizeof(Id), cmp_ids, 0);
 517   if ((len - i) > 2)
 518     solv_sort(sids + i + 1, len - i - 1, sizeof(Id), cmp_ids, 0);
 519
 520   old = 0;
 521
 522   /* The differencing above produces many runs of ones and twos.  I tried
 523      fairly elaborate schemes to RLE those, but they give only very mediocre
 524      improvements in compression, as coding the escapes costs quite some
 525      space.  Even if they are coded only as bits in IDs.  The best improvement
 526      was about 2.7% for the whole .solv file.  It's probably better to
 527      invest some complexity into sharing idarrays, than RLEing.  */
 528   for (i = 0; i < len - 1; i++)
 529     {
 530       id = sids[i];
 531     /* Ugly PREREQ handling.  A "difference" of 0 is the prereq marker,
 532        hence all real differences are offsetted by 1.  Otherwise we would
 533        have to handle negative differences, which would cost code space for
 534        the encoding of the sign.  We loose the exact mapping of prereq here,
 535        but we know the result, so we can recover from that in the reader.  */
 536       if (id == marker)
 537         id = old = 0;
 538       else
 539         {
 540           id = id - old + 1;
 541           old = sids[i];
 542         }
 543       /* XXX If difference is zero we have multiple equal elements,
 544          we might want to skip writing them out.  */
 545       data_addideof(xd, id, 0);
 546     }
 547   id = sids[i];
 548   if (id == marker)
 549     id = 0;
 550   else
 551     id = id - old + 1;
 552   data_addideof(xd, id, 1);
 553   if (sids != lids)
 554     solv_free(sids);
 555 }
 556
 557 static inline void
 558 data_addblob(struct extdata *xd, unsigned char *blob, int len)
 559 {
 560   xd->buf = solv_extend(xd->buf, xd->len, len, 1, EXTDATA_BLOCK);
 561   memcpy(xd->buf + xd->len, blob, len);
 562   xd->len += len;
 563 }
 564
 565 static inline void
 566 data_addu32(struct extdata *xd, unsigned int num)
 567 {
 568   unsigned char d[4];
 569   d[0] = num >> 24;
 570   d[1] = num >> 16;
 571   d[2] = num >> 8;
 572   d[3] = num;
 573   data_addblob(xd, d, 4);
 574 }
 575
 576 static Id
 577 putinownpool(struct cbdata *cbdata, Stringpool *ss, Id id)
 578 {
 579   const char *str = stringpool_id2str(ss, id);
 580   id = stringpool_str2id(cbdata->ownspool, str, 1);
 581   if (id >= cbdata->needid[0].map)
 582     {
 583       int oldoff = cbdata->needid[0].map;
 584       int newoff = (id + 1 + NEEDED_BLOCK) & ~NEEDED_BLOCK;
 585       int nrels = cbdata->repo->pool->nrels;
 586       cbdata->needid = solv_realloc2(cbdata->needid, newoff + nrels, sizeof(NeedId));
 587       if (nrels)
 588         memmove(cbdata->needid + newoff, cbdata->needid + oldoff, nrels * sizeof(NeedId));
 589       memset(cbdata->needid + oldoff, 0, (newoff - oldoff) * sizeof(NeedId));
 590       cbdata->needid[0].map = newoff;
 591     }
 592   return id;
 593 }
 594
 595 static Id
 596 putinowndirpool(struct cbdata *cbdata, Repodata *data, Dirpool *dp, Id dir)
 597 {
 598   Id compid, parent;
 599
 600   parent = dirpool_parent(dp, dir);
 601   if (parent)
 602     parent = putinowndirpool(cbdata, data, dp, parent);
 603   compid = dp->dirs[dir];
 604   if (cbdata->ownspool && compid > 1)
 605     compid = putinownpool(cbdata, data->localpool ? &data->spool : &data->repo->pool->ss, compid);
 606   return dirpool_add_dir(cbdata->owndirpool, parent, compid, 1);
 607 }
 608
 609 /*
 610  * collect usage information about the dirs
 611  * 1: dir used, no child of dir used
 612  * 2: dir used as parent of another used dir
 613  */
 614 static inline void
 615 setdirused(struct cbdata *cbdata, Dirpool *dp, Id dir)
 616 {
 617   if (cbdata->dirused[dir])
 618     return;
 619   cbdata->dirused[dir] = 1;
 620   while ((dir = dirpool_parent(dp, dir)) != 0)
 621     {
 622       if (cbdata->dirused[dir] == 2)
 623         return;
 624       if (cbdata->dirused[dir])
 625         {
 626           cbdata->dirused[dir] = 2;
 627           return;
 628         }
 629       cbdata->dirused[dir] = 2;
 630     }
 631   cbdata->dirused[0] = 2;
 632 }
 633
 634 /*
 635  * pass 1 callback:
 636  * collect key/id/dirid usage information, create needed schemas
 637  */
 638 static int
 639 repo_write_collect_needed(struct cbdata *cbdata, Repo *repo, Repodata *data, Repokey *key, KeyValue *kv)
 640 {
 641   Id id;
 642   int rm;
 643
 644   if (key->name == REPOSITORY_SOLVABLES)
 645     return SEARCH_NEXT_KEY;     /* we do not want this one */
 646
 647   /* hack: ignore some keys, see BUGS */
 648   if (data->repodataid != data->repo->nrepodata - 1)
 649     if (key->name == REPOSITORY_ADDEDFILEPROVIDES || key->name == REPOSITORY_EXTERNAL || key->name == REPOSITORY_LOCATION || key->name == REPOSITORY_KEYS || key->name == REPOSITORY_TOOLVERSION)
 650       return SEARCH_NEXT_KEY;
 651
 652   rm = cbdata->keymap[cbdata->keymapstart[data->repodataid] + (key - data->keys)];
 653   if (!rm)
 654     return SEARCH_NEXT_KEY;     /* we do not want this one */
 655
 656   /* record key in schema */
 657   if ((key->type != REPOKEY_TYPE_FIXARRAY || kv->eof == 0)
 658       && (cbdata->sp == cbdata->schema || cbdata->sp[-1] != rm))
 659     *cbdata->sp++ = rm;
 660
 661   switch(key->type)
 662     {
 663       case REPOKEY_TYPE_ID:
 664       case REPOKEY_TYPE_IDARRAY:
 665         id = kv->id;
 666         if (!ISRELDEP(id) && cbdata->ownspool && id > 1)
 667           id = putinownpool(cbdata, data->localpool ? &data->spool : &repo->pool->ss, id);
 668         incneedid(repo->pool, id, cbdata->needid);
 669         break;
 670       case REPOKEY_TYPE_DIR:
 671       case REPOKEY_TYPE_DIRNUMNUMARRAY:
 672       case REPOKEY_TYPE_DIRSTRARRAY:
 673         id = kv->id;
 674         if (cbdata->owndirpool)
 675           putinowndirpool(cbdata, data, &data->dirpool, id);
 676         else
 677           setdirused(cbdata, &data->dirpool, id);
 678         break;
 679       case REPOKEY_TYPE_FIXARRAY:
 680         if (kv->eof == 0)
 681           {
 682             if (cbdata->oldschema)
 683               {
 684                 cbdata->target->error = pool_error(cbdata->repo->pool, -1, "nested fixarray structs not yet implemented");
 685                 return SEARCH_NEXT_KEY;
 686               }
 687             cbdata->oldschema = cbdata->schema;
 688             cbdata->oldsp = cbdata->sp;
 689             cbdata->schema = solv_calloc(cbdata->target->nkeys, sizeof(Id));
 690             cbdata->sp = cbdata->schema;
 691           }
 692         else if (kv->eof == 1)
 693           {
 694             cbdata->current_sub++;
 695             *cbdata->sp = 0;
 696             cbdata->subschemata = solv_extend(cbdata->subschemata, cbdata->nsubschemata, 1, sizeof(Id), SCHEMATA_BLOCK);
 697             cbdata->subschemata[cbdata->nsubschemata++] = repodata_schema2id(cbdata->target, cbdata->schema, 1);
 698 #if 0
 699             fprintf(stderr, "Have schema %d\n", cbdata->subschemata[cbdata->nsubschemata-1]);
 700 #endif
 701             cbdata->sp = cbdata->schema;
 702           }
 703         else
 704           {
 705             solv_free(cbdata->schema);
 706             cbdata->schema = cbdata->oldschema;
 707             cbdata->sp = cbdata->oldsp;
 708             cbdata->oldsp = cbdata->oldschema = 0;
 709           }
 710         break;
 711       case REPOKEY_TYPE_FLEXARRAY:
 712         if (kv->entry == 0)
 713           {
 714             if (kv->eof != 2)
 715               *cbdata->sp++ = 0;        /* mark start */
 716           }
 717         else
 718           {
 719             /* just finished a schema, rewind */
 720             Id *sp = cbdata->sp - 1;
 721             *sp = 0;
 722             while (sp[-1])
 723               sp--;
 724             cbdata->subschemata = solv_extend(cbdata->subschemata, cbdata->nsubschemata, 1, sizeof(Id), SCHEMATA_BLOCK);
 725             cbdata->subschemata[cbdata->nsubschemata++] = repodata_schema2id(cbdata->target, sp, 1);
 726             cbdata->sp = kv->eof == 2 ? sp - 1: sp;
 727           }
 728         break;
 729       default:
 730         break;
 731     }
 732   return 0;
 733 }
 734
 735 static int
 736 repo_write_cb_needed(void *vcbdata, Solvable *s, Repodata *data, Repokey *key, KeyValue *kv)
 737 {
 738   struct cbdata *cbdata = vcbdata;
 739   Repo *repo = data->repo;
 740
 741 #if 0
 742   if (s)
 743     fprintf(stderr, "solvable %d (%s): key (%d)%s %d\n", s ? s - repo->pool->solvables : 0, s ? pool_id2str(repo->pool, s->name) : "", key->name, pool_id2str(repo->pool, key->name), key->type);
 744 #endif
 745   return repo_write_collect_needed(cbdata, repo, data, key, kv);
 746 }
 747
 748
 749 /*
 750  * pass 2 callback:
 751  * encode all of the data into the correct buffers
 752  */
 753
 754 static int
 755 repo_write_adddata(struct cbdata *cbdata, Repodata *data, Repokey *key, KeyValue *kv)
 756 {
 757   int rm;
 758   Id id;
 759   unsigned int u32;
 760   unsigned char v[4];
 761   struct extdata *xd;
 762   NeedId *needid;
 763
 764   if (key->name == REPOSITORY_SOLVABLES)
 765     return SEARCH_NEXT_KEY;
 766
 767   /* hack: ignore some keys, see BUGS */
 768   if (data->repodataid != data->repo->nrepodata - 1)
 769     if (key->name == REPOSITORY_ADDEDFILEPROVIDES || key->name == REPOSITORY_EXTERNAL || key->name == REPOSITORY_LOCATION || key->name == REPOSITORY_KEYS || key->name == REPOSITORY_TOOLVERSION)
 770       return SEARCH_NEXT_KEY;
 771
 772   rm = cbdata->keymap[cbdata->keymapstart[data->repodataid] + (key - data->keys)];
 773   if (!rm)
 774     return SEARCH_NEXT_KEY;     /* we do not want this one */
 775
 776   if (cbdata->target->keys[rm].storage == KEY_STORAGE_VERTICAL_OFFSET)
 777     {
 778       xd = cbdata->extdata + rm;        /* vertical buffer */
 779       if (cbdata->vstart == -1)
 780         cbdata->vstart = xd->len;
 781     }
 782   else
 783     xd = cbdata->extdata + 0;           /* incore buffer */
 784   switch(key->type)
 785     {
 786       case REPOKEY_TYPE_VOID:
 787       case REPOKEY_TYPE_CONSTANT:
 788       case REPOKEY_TYPE_CONSTANTID:
 789         break;
 790       case REPOKEY_TYPE_ID:
 791         id = kv->id;
 792         if (!ISRELDEP(id) && cbdata->ownspool && id > 1)
 793           id = putinownpool(cbdata, data->localpool ? &data->spool : &data->repo->pool->ss, id);
 794         needid = cbdata->needid;
 795         id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
 796         data_addid(xd, id);
 797         break;
 798       case REPOKEY_TYPE_IDARRAY:
 799         id = kv->id;
 800         if (!ISRELDEP(id) && cbdata->ownspool && id > 1)
 801           id = putinownpool(cbdata, data->localpool ? &data->spool : &data->repo->pool->ss, id);
 802         needid = cbdata->needid;
 803         id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
 804         data_addideof(xd, id, kv->eof);
 805         break;
 806       case REPOKEY_TYPE_STR:
 807         data_addblob(xd, (unsigned char *)kv->str, strlen(kv->str) + 1);
 808         break;
 809       case REPOKEY_TYPE_MD5:
 810         data_addblob(xd, (unsigned char *)kv->str, SIZEOF_MD5);
 811         break;
 812       case REPOKEY_TYPE_SHA1:
 813         data_addblob(xd, (unsigned char *)kv->str, SIZEOF_SHA1);
 814         break;
 815       case REPOKEY_TYPE_SHA256:
 816         data_addblob(xd, (unsigned char *)kv->str, SIZEOF_SHA256);
 817         break;
 818       case REPOKEY_TYPE_U32:
 819         u32 = kv->num;
 820         v[0] = u32 >> 24;
 821         v[1] = u32 >> 16;
 822         v[2] = u32 >> 8;
 823         v[3] = u32;
 824         data_addblob(xd, v, 4);
 825         break;
 826       case REPOKEY_TYPE_NUM:
 827         data_addid64(xd, kv->num, kv->num2);
 828         break;
 829       case REPOKEY_TYPE_DIR:
 830         id = kv->id;
 831         if (cbdata->owndirpool)
 832           id = putinowndirpool(cbdata, data, &data->dirpool, id);
 833         id = cbdata->dirused[id];
 834         data_addid(xd, id);
 835         break;
 836       case REPOKEY_TYPE_BINARY:
 837         data_addid(xd, kv->num);
 838         if (kv->num)
 839           data_addblob(xd, (unsigned char *)kv->str, kv->num);
 840         break;
 841       case REPOKEY_TYPE_DIRNUMNUMARRAY:
 842         id = kv->id;
 843         if (cbdata->owndirpool)
 844           id = putinowndirpool(cbdata, data, &data->dirpool, id);
 845         id = cbdata->dirused[id];
 846         data_addid(xd, id);
 847         data_addid(xd, kv->num);
 848         data_addideof(xd, kv->num2, kv->eof);
 849         break;
 850       case REPOKEY_TYPE_DIRSTRARRAY:
 851         id = kv->id;
 852         if (cbdata->owndirpool)
 853           id = putinowndirpool(cbdata, data, &data->dirpool, id);
 854         id = cbdata->dirused[id];
 855         data_addideof(xd, id, kv->eof);
 856         data_addblob(xd, (unsigned char *)kv->str, strlen(kv->str) + 1);
 857         break;
 858       case REPOKEY_TYPE_FIXARRAY:
 859         if (kv->eof == 0)
 860           {
 861             if (kv->num)
 862               {
 863                 data_addid(xd, kv->num);
 864                 data_addid(xd, cbdata->subschemata[cbdata->current_sub]);
 865 #if 0
 866                 fprintf(stderr, "writing %d %d\n", kv->num, cbdata->subschemata[cbdata->current_sub]);
 867 #endif
 868               }
 869           }
 870         else if (kv->eof == 1)
 871           {
 872             cbdata->current_sub++;
 873           }
 874         break;
 875       case REPOKEY_TYPE_FLEXARRAY:
 876         if (!kv->entry)
 877           data_addid(xd, kv->num);
 878         if (kv->eof != 2)
 879           data_addid(xd, cbdata->subschemata[cbdata->current_sub++]);
 880         if (xd == cbdata->extdata + 0 && !kv->parent && !cbdata->doingsolvables)
 881           {
 882             if (xd->len - cbdata->lastlen > cbdata->maxdata)
 883               cbdata->maxdata = xd->len - cbdata->lastlen;
 884             cbdata->lastlen = xd->len;
 885           }
 886         break;
 887       default:
 888         cbdata->target->error = pool_error(cbdata->repo->pool, -1, "unknown type for %d: %d\n", key->name, key->type);
 889         break;
 890     }
 891   if (cbdata->target->keys[rm].storage == KEY_STORAGE_VERTICAL_OFFSET && kv->eof)
 892     {
 893       /* we can re-use old data in the blob here! */
 894       data_addid(cbdata->extdata + 0, cbdata->vstart);                  /* add offset into incore data */
 895       data_addid(cbdata->extdata + 0, xd->len - cbdata->vstart);        /* add length into incore data */
 896       cbdata->vstart = -1;
 897     }
 898   return 0;
 899 }
 900
 901 static int
 902 repo_write_cb_adddata(void *vcbdata, Solvable *s, Repodata *data, Repokey *key, KeyValue *kv)
 903 {
 904   struct cbdata *cbdata = vcbdata;
 905   return repo_write_adddata(cbdata, data, key, kv);
 906 }
 907
 908 /* traverse through directory with first child "dir" */
 909 static int
 910 traverse_dirs(Dirpool *dp, Id *dirmap, Id n, Id dir, Id *used)
 911 {
 912   Id sib, child;
 913   Id parent, lastn;
 914
 915   parent = n;
 916   /* special case for '/', which has to come first */
 917   if (parent == 1)
 918     dirmap[n++] = 1;
 919   for (sib = dir; sib; sib = dirpool_sibling(dp, sib))
 920     {
 921       if (used && !used[sib])
 922         continue;
 923       if (sib == 1 && parent == 1)
 924         continue;       /* already did that one above */
 925       dirmap[n++] = sib;
 926     }
 927
 928   /* now go through all the siblings we just added and
 929    * do recursive calls on them */
 930   lastn = n;
 931   for (; parent < lastn; parent++)
 932     {
 933       sib = dirmap[parent];
 934       if (used && used[sib] != 2)       /* 2: used as parent */
 935         continue;
 936       child = dirpool_child(dp, sib);
 937       if (child)
 938         {
 939           dirmap[n++] = -parent;        /* start new block */
 940           n = traverse_dirs(dp, dirmap, n, child, used);
 941         }
 942     }
 943   return n;
 944 }
 945
 946 static void
 947 write_compressed_page(Repodata *data, unsigned char *page, int len)
 948 {
 949   int clen;
 950   unsigned char cpage[REPOPAGE_BLOBSIZE];
 951
 952   clen = repopagestore_compress_page(page, len, cpage, len - 1);
 953   if (!clen)
 954     {
 955       write_u32(data, len * 2);
 956       write_blob(data, page, len);
 957     }
 958   else
 959     {
 960       write_u32(data, clen * 2 + 1);
 961       write_blob(data, cpage, clen);
 962     }
 963 }
 964
 965 static Id verticals[] = {
 966   SOLVABLE_AUTHORS,
 967   SOLVABLE_DESCRIPTION,
 968   SOLVABLE_MESSAGEDEL,
 969   SOLVABLE_MESSAGEINS,
 970   SOLVABLE_EULA,
 971   SOLVABLE_DISKUSAGE,
 972   SOLVABLE_FILELIST,
 973   SOLVABLE_CHECKSUM,
 974   DELTA_CHECKSUM,
 975   DELTA_SEQ_NUM,
 976   SOLVABLE_PKGID,
 977   SOLVABLE_HDRID,
 978   SOLVABLE_LEADSIGID,
 979   SOLVABLE_CHANGELOG_AUTHOR,
 980   SOLVABLE_CHANGELOG_TEXT,
 981   0
 982 };
 983
 984 static char *languagetags[] = {
 985   "solvable:summary:",
 986   "solvable:description:",
 987   "solvable:messageins:",
 988   "solvable:messagedel:",
 989   "solvable:eula:",
 990   0
 991 };
 992
 993 int
 994 repo_write_stdkeyfilter(Repo *repo, Repokey *key, void *kfdata)
 995 {
 996   const char *keyname;
 997   int i;
 998
 999   for (i = 0; verticals[i]; i++)
1000     if (key->name == verticals[i])
1001       return KEY_STORAGE_VERTICAL_OFFSET;
1002   keyname = pool_id2str(repo->pool, key->name);
1003   for (i = 0; languagetags[i] != 0; i++)
1004     if (!strncmp(keyname, languagetags[i], strlen(languagetags[i])))
1005       return KEY_STORAGE_VERTICAL_OFFSET;
1006   return KEY_STORAGE_INCORE;
1007 }
1008
1009 /*
1010  * Repo
1011  */
1012
1013 /*
1014  * the code works the following way:
1015  *
1016  * 1) find which keys should be written
1017  * 2) collect usage information for keys/ids/dirids, create schema
1018  *    data
1019  * 3) use usage information to create mapping tables, so that often
1020  *    used ids get a lower number
1021  * 4) encode data into buffers using the mapping tables
1022  * 5) write everything to disk
1023  */
1024 int
1025 repo_write_filtered(Repo *repo, FILE *fp, int (*keyfilter)(Repo *repo, Repokey *key, void *kfdata), void *kfdata, Queue *keyq)
1026 {
1027   Pool *pool = repo->pool;
1028   int i, j, n;
1029   Solvable *s;
1030   NeedId *needid;
1031   int nstrings, nrels;
1032   unsigned int sizeid;
1033   unsigned int solv_flags;
1034   Reldep *ran;
1035   Id *idarraydata;
1036
1037   Id id, *sp;
1038
1039   Id *dirmap;
1040   int ndirmap;
1041   Id *keyused;
1042   unsigned char *repodataused;
1043   int anyrepodataused = 0;
1044   int anysolvableused = 0;
1045
1046   struct cbdata cbdata;
1047   int clonepool;
1048   Repokey *key;
1049   int poolusage, dirpoolusage, idused, dirused;
1050   int reloff;
1051
1052   Repodata *data, *dirpooldata;
1053
1054   Repodata target;
1055
1056   Stringpool *spool;
1057   Dirpool *dirpool;
1058
1059   Id mainschema;
1060
1061   struct extdata *xd;
1062
1063   Id type_constantid = REPOKEY_TYPE_CONSTANTID;
1064
1065
1066   memset(&cbdata, 0, sizeof(cbdata));
1067   cbdata.repo = repo;
1068   cbdata.target = &target;
1069
1070   repodata_initdata(&target, repo, 1);
1071
1072   /* go through all repodata and find the keys we need */
1073   /* also unify keys */
1074   /*          keymapstart - maps repo number to keymap offset */
1075   /*          keymap      - maps repo key to my key, 0 -> not used */
1076
1077   /* start with all KEY_STORAGE_SOLVABLE ids */
1078
1079   n = ID_NUM_INTERNAL;
1080   FOR_REPODATAS(repo, i, data)
1081     n += data->nkeys;
1082   cbdata.keymap = solv_calloc(n, sizeof(Id));
1083   cbdata.keymapstart = solv_calloc(repo->nrepodata, sizeof(Id));
1084   repodataused = solv_calloc(repo->nrepodata, 1);
1085
1086   clonepool = 0;
1087   poolusage = 0;
1088
1089   /* add keys for STORAGE_SOLVABLE */
1090   for (i = SOLVABLE_NAME; i <= RPM_RPMDBID; i++)
1091     {
1092       Repokey keyd;
1093       keyd.name = i;
1094       if (i < SOLVABLE_PROVIDES)
1095         keyd.type = REPOKEY_TYPE_ID;
1096       else if (i < RPM_RPMDBID)
1097         keyd.type = REPOKEY_TYPE_REL_IDARRAY;
1098       else
1099         keyd.type = REPOKEY_TYPE_NUM;
1100       keyd.size = 0;
1101       keyd.storage = KEY_STORAGE_SOLVABLE;
1102       if (keyfilter)
1103         {
1104           keyd.storage = keyfilter(repo, &keyd, kfdata);
1105           if (keyd.storage == KEY_STORAGE_DROPPED)
1106             continue;
1107           keyd.storage = KEY_STORAGE_SOLVABLE;
1108         }
1109       poolusage = 1;
1110       clonepool = 1;
1111       cbdata.keymap[keyd.name] = repodata_key2id(&target, &keyd, 1);
1112     }
1113
1114   if (repo->nsolvables)
1115     {
1116       Repokey keyd;
1117       keyd.name = REPOSITORY_SOLVABLES;
1118       keyd.type = REPOKEY_TYPE_FLEXARRAY;
1119       keyd.size = 0;
1120       keyd.storage = KEY_STORAGE_INCORE;
1121       cbdata.keymap[keyd.name] = repodata_key2id(&target, &keyd, 1);
1122     }
1123
1124   dirpoolusage = 0;
1125
1126   spool = 0;
1127   dirpool = 0;
1128   dirpooldata = 0;
1129   n = ID_NUM_INTERNAL;
1130   FOR_REPODATAS(repo, i, data)
1131     {
1132       cbdata.keymapstart[i] = n;
1133       cbdata.keymap[n++] = 0;   /* key 0 */
1134       idused = 0;
1135       dirused = 0;
1136       if (keyfilter)
1137         {
1138           Repokey keyd;
1139           /* check if we want this repodata */
1140           memset(&keyd, 0, sizeof(keyd));
1141           keyd.name = 1;
1142           keyd.type = 1;
1143           keyd.size = i;
1144           if (keyfilter(repo, &keyd, kfdata) == -1)
1145             continue;
1146         }
1147       for (j = 1; j < data->nkeys; j++, n++)
1148         {
1149           key = data->keys + j;
1150           if (key->name == REPOSITORY_SOLVABLES && key->type == REPOKEY_TYPE_FLEXARRAY)
1151             {
1152               cbdata.keymap[n] = cbdata.keymap[key->name];
1153               continue;
1154             }
1155           if (key->type == REPOKEY_TYPE_DELETED)
1156             {
1157               cbdata.keymap[n] = 0;
1158               continue;
1159             }
1160           if (key->type == REPOKEY_TYPE_CONSTANTID && data->localpool)
1161             {
1162               Repokey keyd = *key;
1163               keyd.size = repodata_globalize_id(data, key->size, 1);
1164               id = repodata_key2id(&target, &keyd, 0);
1165             }
1166           else
1167             id = repodata_key2id(&target, key, 0);
1168           if (!id)
1169             {
1170               Repokey keyd = *key;
1171               keyd.storage = KEY_STORAGE_INCORE;
1172               if (keyd.type == REPOKEY_TYPE_CONSTANTID)
1173                 keyd.size = repodata_globalize_id(data, key->size, 1);
1174               else if (keyd.type != REPOKEY_TYPE_CONSTANT)
1175                 keyd.size = 0;
1176               if (keyfilter)
1177                 {
1178                   keyd.storage = keyfilter(repo, &keyd, kfdata);
1179                   if (keyd.storage == KEY_STORAGE_DROPPED)
1180                     {
1181                       cbdata.keymap[n] = 0;
1182                       continue;
1183                     }
1184                 }
1185               id = repodata_key2id(&target, &keyd, 1);
1186             }
1187           cbdata.keymap[n] = id;
1188           /* load repodata if not already loaded */
1189           if (data->state == REPODATA_STUB)
1190             {
1191               if (data->loadcallback)
1192                 data->loadcallback(data);
1193               else
1194                 data->state = REPODATA_ERROR;
1195               if (data->state != REPODATA_ERROR)
1196                 {
1197                   /* redo this repodata! */
1198                   j = 0;
1199                   n = cbdata.keymapstart[i];
1200                   continue;
1201                 }
1202             }
1203           if (data->state == REPODATA_ERROR)
1204             {
1205               /* too bad! */
1206               cbdata.keymap[n] = 0;
1207               continue;
1208             }
1209
1210           repodataused[i] = 1;
1211           anyrepodataused = 1;
1212           if (key->type == REPOKEY_TYPE_CONSTANTID || key->type == REPOKEY_TYPE_ID ||
1213               key->type == REPOKEY_TYPE_IDARRAY || key->type == REPOKEY_TYPE_REL_IDARRAY)
1214             idused = 1;
1215           else if (key->type == REPOKEY_TYPE_DIR || key->type == REPOKEY_TYPE_DIRNUMNUMARRAY || key->type == REPOKEY_TYPE_DIRSTRARRAY)
1216             {
1217               idused = 1;       /* dirs also use ids */
1218               dirused = 1;
1219             }
1220         }
1221       if (idused)
1222         {
1223           if (data->localpool)
1224             {
1225               if (poolusage)
1226                 poolusage = 3;  /* need own pool */
1227               else
1228                 {
1229                   poolusage = 2;
1230                   spool = &data->spool;
1231                 }
1232             }
1233           else
1234             {
1235               if (poolusage == 0)
1236                 poolusage = 1;
1237               else if (poolusage != 1)
1238                 poolusage = 3;  /* need own pool */
1239             }
1240         }
1241       if (dirused)
1242         {
1243           if (dirpoolusage)
1244             dirpoolusage = 3;   /* need own dirpool */
1245           else
1246             {
1247               dirpoolusage = 2;
1248               dirpool = &data->dirpool;
1249               dirpooldata = data;
1250             }
1251         }
1252     }
1253   cbdata.nkeymap = n;
1254
1255   /* 0: no pool needed at all */
1256   /* 1: use global pool */
1257   /* 2: use repodata local pool */
1258   /* 3: need own pool */
1259   if (poolusage == 3)
1260     {
1261       spool = &target.spool;
1262       /* hack: reuse global pool data so we don't have to map pool ids */
1263       if (clonepool)
1264         {
1265           stringpool_free(spool);
1266           stringpool_clone(spool, &pool->ss);
1267         }
1268       cbdata.ownspool = spool;
1269     }
1270   else if (poolusage == 0 || poolusage == 1)
1271     {
1272       poolusage = 1;
1273       spool = &pool->ss;
1274     }
1275
1276   if (dirpoolusage == 3)
1277     {
1278       dirpool = &target.dirpool;
1279       dirpooldata = 0;
1280       cbdata.owndirpool = dirpool;
1281     }
1282   else if (dirpool)
1283     cbdata.dirused = solv_calloc(dirpool->ndirs, sizeof(Id));
1284
1285
1286 /********************************************************************/
1287 #if 0
1288 fprintf(stderr, "poolusage: %d\n", poolusage);
1289 fprintf(stderr, "dirpoolusage: %d\n", dirpoolusage);
1290 fprintf(stderr, "nkeys: %d\n", target.nkeys);
1291 for (i = 1; i < target.nkeys; i++)
1292   fprintf(stderr, "  %2d: %s[%d] %d %d %d\n", i, pool_id2str(pool, target.keys[i].name), target.keys[i].name, target.keys[i].type, target.keys[i].size, target.keys[i].storage);
1293 #endif
1294
1295   /* copy keys if requested */
1296   if (keyq)
1297     {
1298       queue_empty(keyq);
1299       for (i = 1; i < target.nkeys; i++)
1300         queue_push2(keyq, target.keys[i].name, target.keys[i].type);
1301     }
1302
1303   if (poolusage > 1)
1304     {
1305       /* put all the keys we need in our string pool */
1306       /* put mapped ids right into target.keys */
1307       for (i = 1, key = target.keys + i; i < target.nkeys; i++, key++)
1308         {
1309           key->name = stringpool_str2id(spool, pool_id2str(pool, key->name), 1);
1310           if (key->type == REPOKEY_TYPE_CONSTANTID)
1311             {
1312               key->type = stringpool_str2id(spool, pool_id2str(pool, key->type), 1);
1313               type_constantid = key->type;
1314               key->size = stringpool_str2id(spool, pool_id2str(pool, key->size), 1);
1315             }
1316           else
1317             key->type = stringpool_str2id(spool, pool_id2str(pool, key->type), 1);
1318         }
1319       if (poolusage == 2)
1320         stringpool_freehash(spool);     /* free some mem */
1321     }
1322
1323
1324 /********************************************************************/
1325
1326   /* set needed count of all strings and rels,
1327    * find which keys are used in the solvables
1328    * put all strings in own spool
1329    */
1330
1331   reloff = spool->nstrings;
1332   if (poolusage == 3)
1333     reloff = (reloff + NEEDED_BLOCK) & ~NEEDED_BLOCK;
1334
1335   needid = calloc(reloff + pool->nrels, sizeof(*needid));
1336   needid[0].map = reloff;
1337
1338   cbdata.needid = needid;
1339   cbdata.schema = solv_calloc(target.nkeys, sizeof(Id));
1340   cbdata.sp = cbdata.schema;
1341   cbdata.solvschemata = solv_calloc(repo->nsolvables, sizeof(Id));
1342
1343   /* create main schema */
1344   cbdata.sp = cbdata.schema;
1345   /* collect all other data from all repodatas */
1346   /* XXX: merge arrays of equal keys? */
1347   FOR_REPODATAS(repo, j, data)
1348     {
1349       if (!repodataused[j])
1350         continue;
1351       repodata_search(data, SOLVID_META, 0, SEARCH_SUB|SEARCH_ARRAYSENTINEL, repo_write_cb_needed, &cbdata);
1352     }
1353   sp = cbdata.sp;
1354   /* add solvables if needed (may revert later) */
1355   if (repo->nsolvables)
1356     {
1357       *sp++ = cbdata.keymap[REPOSITORY_SOLVABLES];
1358       target.keys[cbdata.keymap[REPOSITORY_SOLVABLES]].size++;
1359     }
1360   *sp = 0;
1361   mainschema = repodata_schema2id(cbdata.target, cbdata.schema, 1);
1362
1363   idarraydata = repo->idarraydata;
1364
1365   anysolvableused = 0;
1366   cbdata.doingsolvables = 1;
1367   for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
1368     {
1369       if (s->repo != repo)
1370         continue;
1371
1372       /* set schema info, keep in sync with further down */
1373       sp = cbdata.schema;
1374       if (cbdata.keymap[SOLVABLE_NAME])
1375         {
1376           *sp++ = cbdata.keymap[SOLVABLE_NAME];
1377           needid[s->name].need++;
1378         }
1379       if (cbdata.keymap[SOLVABLE_ARCH])
1380         {
1381           *sp++ = cbdata.keymap[SOLVABLE_ARCH];
1382           needid[s->arch].need++;
1383         }
1384       if (cbdata.keymap[SOLVABLE_EVR])
1385         {
1386           *sp++ = cbdata.keymap[SOLVABLE_EVR];
1387           needid[s->evr].need++;
1388         }
1389       if (s->vendor && cbdata.keymap[SOLVABLE_VENDOR])
1390         {
1391           *sp++ = cbdata.keymap[SOLVABLE_VENDOR];
1392           needid[s->vendor].need++;
1393         }
1394       if (s->provides && cbdata.keymap[SOLVABLE_PROVIDES])
1395         {
1396           *sp++ = cbdata.keymap[SOLVABLE_PROVIDES];
1397           target.keys[cbdata.keymap[SOLVABLE_PROVIDES]].size += incneedidarray(pool, idarraydata + s->provides, needid);
1398         }
1399       if (s->obsoletes && cbdata.keymap[SOLVABLE_OBSOLETES])
1400         {
1401           *sp++ = cbdata.keymap[SOLVABLE_OBSOLETES];
1402           target.keys[cbdata.keymap[SOLVABLE_OBSOLETES]].size += incneedidarray(pool, idarraydata + s->obsoletes, needid);
1403         }
1404       if (s->conflicts && cbdata.keymap[SOLVABLE_CONFLICTS])
1405         {
1406           *sp++ = cbdata.keymap[SOLVABLE_CONFLICTS];
1407           target.keys[cbdata.keymap[SOLVABLE_CONFLICTS]].size += incneedidarray(pool, idarraydata + s->conflicts, needid);
1408         }
1409       if (s->requires && cbdata.keymap[SOLVABLE_REQUIRES])
1410         {
1411           *sp++ = cbdata.keymap[SOLVABLE_REQUIRES];
1412           target.keys[cbdata.keymap[SOLVABLE_REQUIRES]].size += incneedidarray(pool, idarraydata + s->requires, needid);
1413         }
1414       if (s->recommends && cbdata.keymap[SOLVABLE_RECOMMENDS])
1415         {
1416           *sp++ = cbdata.keymap[SOLVABLE_RECOMMENDS];
1417           target.keys[cbdata.keymap[SOLVABLE_RECOMMENDS]].size += incneedidarray(pool, idarraydata + s->recommends, needid);
1418         }
1419       if (s->suggests && cbdata.keymap[SOLVABLE_SUGGESTS])
1420         {
1421           *sp++ = cbdata.keymap[SOLVABLE_SUGGESTS];
1422           target.keys[cbdata.keymap[SOLVABLE_SUGGESTS]].size += incneedidarray(pool, idarraydata + s->suggests, needid);
1423         }
1424       if (s->supplements && cbdata.keymap[SOLVABLE_SUPPLEMENTS])
1425         {
1426           *sp++ = cbdata.keymap[SOLVABLE_SUPPLEMENTS];
1427           target.keys[cbdata.keymap[SOLVABLE_SUPPLEMENTS]].size += incneedidarray(pool, idarraydata + s->supplements, needid);
1428         }
1429       if (s->enhances && cbdata.keymap[SOLVABLE_ENHANCES])
1430         {
1431           *sp++ = cbdata.keymap[SOLVABLE_ENHANCES];
1432           target.keys[cbdata.keymap[SOLVABLE_ENHANCES]].size += incneedidarray(pool, idarraydata + s->enhances, needid);
1433         }
1434       if (repo->rpmdbid && cbdata.keymap[RPM_RPMDBID])
1435         {
1436           *sp++ = cbdata.keymap[RPM_RPMDBID];
1437           target.keys[cbdata.keymap[RPM_RPMDBID]].size++;
1438         }
1439       cbdata.sp = sp;
1440
1441       if (anyrepodataused)
1442         {
1443           FOR_REPODATAS(repo, j, data)
1444             {
1445               if (!repodataused[j])
1446                 continue;
1447               if (i < data->start || i >= data->end)
1448                 continue;
1449               repodata_search(data, i, 0, SEARCH_SUB|SEARCH_ARRAYSENTINEL, repo_write_cb_needed, &cbdata);
1450               needid = cbdata.needid;
1451             }
1452         }
1453       *cbdata.sp = 0;
1454       cbdata.solvschemata[n] = repodata_schema2id(cbdata.target, cbdata.schema, 1);
1455       if (cbdata.solvschemata[n])
1456         anysolvableused = 1;
1457       n++;
1458     }
1459   cbdata.doingsolvables = 0;
1460   assert(n == repo->nsolvables);
1461
1462   if (repo->nsolvables && !anysolvableused)
1463     {
1464       /* strip off solvable from the main schema */
1465       target.keys[cbdata.keymap[REPOSITORY_SOLVABLES]].size = 0;
1466       sp = cbdata.schema;
1467       for (i = 0; target.schemadata[target.schemata[mainschema] + i]; i++)
1468         {
1469           *sp = target.schemadata[target.schemata[mainschema] + i];
1470           if (*sp != cbdata.keymap[REPOSITORY_SOLVABLES])
1471             sp++;
1472         }
1473       assert(target.schemadatalen == target.schemata[mainschema] + i + 1);
1474       *sp = 0;
1475       target.schemadatalen = target.schemata[mainschema];
1476       target.nschemata--;
1477       repodata_free_schemahash(&target);
1478       mainschema = repodata_schema2id(cbdata.target, cbdata.schema, 1);
1479     }
1480
1481 /********************************************************************/
1482
1483   /* remove unused keys */
1484   keyused = solv_calloc(target.nkeys, sizeof(Id));
1485   for (i = 1; i < target.schemadatalen; i++)
1486     keyused[target.schemadata[i]] = 1;
1487   keyused[0] = 0;
1488   for (n = i = 1; i < target.nkeys; i++)
1489     {
1490       if (!keyused[i])
1491         continue;
1492       keyused[i] = n;
1493       if (i != n)
1494         {
1495           target.keys[n] = target.keys[i];
1496           if (keyq)
1497             {
1498               keyq->elements[2 * n - 2] = keyq->elements[2 * i - 2];
1499               keyq->elements[2 * n - 1] = keyq->elements[2 * i - 1];
1500             }
1501         }
1502       n++;
1503     }
1504   target.nkeys = n;
1505   if (keyq)
1506     queue_truncate(keyq, 2 * n - 2);
1507
1508   /* update schema data to the new key ids */
1509   for (i = 1; i < target.schemadatalen; i++)
1510     target.schemadata[i] = keyused[target.schemadata[i]];
1511   /* update keymap to the new key ids */
1512   for (i = 0; i < cbdata.nkeymap; i++)
1513     cbdata.keymap[i] = keyused[cbdata.keymap[i]];
1514   keyused = solv_free(keyused);
1515
1516   /* increment needid of the used keys, they are already mapped to
1517    * the correct string pool  */
1518   for (i = 1; i < target.nkeys; i++)
1519     {
1520       if (target.keys[i].type == type_constantid)
1521         needid[target.keys[i].size].need++;
1522       needid[target.keys[i].name].need++;
1523       needid[target.keys[i].type].need++;
1524     }
1525
1526 /********************************************************************/
1527
1528   if (dirpool && cbdata.dirused && !cbdata.dirused[0])
1529     {
1530       /* no dirs used at all */
1531       cbdata.dirused = solv_free(cbdata.dirused);
1532       dirpool = 0;
1533     }
1534
1535   /* increment need id for used dir components */
1536   if (dirpool)
1537     {
1538       /* if we have own dirpool, all entries in it are used.
1539          also, all comp ids are already mapped by putinowndirpool(),
1540          so we can simply increment needid.
1541          (owndirpool != 0, dirused == 0, dirpooldata == 0) */
1542       /* else we re-use a dirpool of repodata "dirpooldata".
1543          dirused tells us which of the ids are used.
1544          we need to map comp ids if we generate a new pool.
1545          (owndirpool == 0, dirused != 0, dirpooldata != 0) */
1546       for (i = 1; i < dirpool->ndirs; i++)
1547         {
1548 #if 0
1549 fprintf(stderr, "dir %d used %d\n", i, cbdata.dirused ? cbdata.dirused[i] : 1);
1550 #endif
1551           if (cbdata.dirused && !cbdata.dirused[i])
1552             continue;
1553           id = dirpool->dirs[i];
1554           if (id <= 0)
1555             continue;
1556           if (dirpooldata && cbdata.ownspool && id > 1)
1557             {
1558               id = putinownpool(&cbdata, dirpooldata->localpool ? &dirpooldata->spool : &pool->ss, id);
1559               needid = cbdata.needid;
1560             }
1561           needid[id].need++;
1562         }
1563     }
1564
1565
1566 /********************************************************************/
1567
1568   /*
1569    * create mapping table, new keys are sorted by needid[].need
1570    *
1571    * needid[key].need : old key -> new key
1572    * needid[key].map  : new key -> old key
1573    */
1574
1575   /* zero out id 0 and rel 0 just in case */
1576   reloff = needid[0].map;
1577   needid[0].need = 0;
1578   needid[reloff].need = 0;
1579
1580   for (i = 1; i < reloff + pool->nrels; i++)
1581     needid[i].map = i;
1582
1583 #if 0
1584   solv_sort(needid + 1, spool->nstrings - 1, sizeof(*needid), needid_cmp_need_s, spool);
1585 #else
1586   /* make first entry '' */
1587   needid[1].need = 1;
1588   solv_sort(needid + 2, spool->nstrings - 2, sizeof(*needid), needid_cmp_need_s, spool);
1589 #endif
1590   solv_sort(needid + reloff, pool->nrels, sizeof(*needid), needid_cmp_need, 0);
1591   /* now needid is in new order, needid[newid].map -> oldid */
1592
1593   /* calculate string space size, also zero out needid[].need */
1594   sizeid = 0;
1595   for (i = 1; i < reloff; i++)
1596     {
1597       if (!needid[i].need)
1598         break;  /* as we have sorted, every entry after this also has need == 0 */
1599       needid[i].need = 0;
1600       sizeid += strlen(spool->stringspace + spool->strings[needid[i].map]) + 1;
1601     }
1602   nstrings = i; /* our new string id end */
1603
1604   /* make needid[oldid].need point to newid */
1605   for (i = 1; i < nstrings; i++)
1606     needid[needid[i].map].need = i;
1607
1608   /* same as above for relations */
1609   for (i = 0; i < pool->nrels; i++)
1610     {
1611       if (!needid[reloff + i].need)
1612         break;
1613       needid[reloff + i].need = 0;
1614     }
1615   nrels = i;    /* our new rel id end */
1616
1617   for (i = 0; i < nrels; i++)
1618     needid[needid[reloff + i].map].need = nstrings + i;
1619
1620   /* now we have: needid[oldid].need -> newid
1621                   needid[newid].map  -> oldid
1622      both for strings and relations  */
1623
1624
1625 /********************************************************************/
1626
1627   ndirmap = 0;
1628   dirmap = 0;
1629   if (dirpool)
1630     {
1631       /* create our new target directory structure by traversing through all
1632        * used dirs. This will concatenate blocks with the same parent
1633        * directory into single blocks.
1634        * Instead of components, traverse_dirs stores the old dirids,
1635        * we will change this in the second step below */
1636       /* (dirpooldata and dirused are 0 if we have our own dirpool) */
1637       if (cbdata.dirused && !cbdata.dirused[1])
1638         cbdata.dirused[1] = 1;  /* always want / entry */
1639       dirmap = solv_calloc(dirpool->ndirs, sizeof(Id));
1640       dirmap[0] = 0;
1641       ndirmap = traverse_dirs(dirpool, dirmap, 1, dirpool_child(dirpool, 0), cbdata.dirused);
1642
1643       /* (re)create dirused, so that it maps from "old dirid" to "new dirid" */
1644       /* change dirmap so that it maps from "new dirid" to "new compid" */
1645       if (!cbdata.dirused)
1646         cbdata.dirused = solv_malloc2(dirpool->ndirs, sizeof(Id));
1647       memset(cbdata.dirused, 0, dirpool->ndirs * sizeof(Id));
1648       for (i = 1; i < ndirmap; i++)
1649         {
1650           if (dirmap[i] <= 0)
1651             continue;
1652           cbdata.dirused[dirmap[i]] = i;
1653           id = dirpool->dirs[dirmap[i]];
1654           if (dirpooldata && cbdata.ownspool && id > 1)
1655             id = putinownpool(&cbdata, dirpooldata->localpool ? &dirpooldata->spool : &pool->ss, id);
1656           dirmap[i] = needid[id].need;
1657         }
1658       /* now the new target directory structure is complete (dirmap), and we have
1659        * dirused[olddirid] -> newdirid */
1660     }
1661
1662 /********************************************************************/
1663
1664   /* collect all data
1665    * we use extdata[0] for incore data and extdata[keyid] for vertical data
1666    */
1667
1668   cbdata.extdata = solv_calloc(target.nkeys, sizeof(struct extdata));
1669
1670   xd = cbdata.extdata;
1671   cbdata.current_sub = 0;
1672   /* add main schema */
1673   cbdata.lastlen = 0;
1674   data_addid(xd, mainschema);
1675
1676 #if 1
1677   FOR_REPODATAS(repo, j, data)
1678     {
1679       if (!repodataused[j])
1680         continue;
1681       repodata_search(data, SOLVID_META, 0, SEARCH_SUB|SEARCH_ARRAYSENTINEL, repo_write_cb_adddata, &cbdata);
1682     }
1683 #endif
1684
1685   if (xd->len - cbdata.lastlen > cbdata.maxdata)
1686     cbdata.maxdata = xd->len - cbdata.lastlen;
1687   cbdata.lastlen = xd->len;
1688
1689   if (anysolvableused)
1690     {
1691       data_addid(xd, repo->nsolvables); /* FLEXARRAY nentries */
1692       cbdata.doingsolvables = 1;
1693       for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
1694         {
1695           if (s->repo != repo)
1696             continue;
1697           data_addid(xd, cbdata.solvschemata[n]);
1698           if (cbdata.keymap[SOLVABLE_NAME])
1699             data_addid(xd, needid[s->name].need);
1700           if (cbdata.keymap[SOLVABLE_ARCH])
1701             data_addid(xd, needid[s->arch].need);
1702           if (cbdata.keymap[SOLVABLE_EVR])
1703             data_addid(xd, needid[s->evr].need);
1704           if (s->vendor && cbdata.keymap[SOLVABLE_VENDOR])
1705             data_addid(xd, needid[s->vendor].need);
1706           if (s->provides && cbdata.keymap[SOLVABLE_PROVIDES])
1707             data_addidarray_sort(xd, pool, needid, idarraydata + s->provides, SOLVABLE_FILEMARKER);
1708           if (s->obsoletes && cbdata.keymap[SOLVABLE_OBSOLETES])
1709             data_addidarray_sort(xd, pool, needid, idarraydata + s->obsoletes, 0);
1710           if (s->conflicts && cbdata.keymap[SOLVABLE_CONFLICTS])
1711             data_addidarray_sort(xd, pool, needid, idarraydata + s->conflicts, 0);
1712           if (s->requires && cbdata.keymap[SOLVABLE_REQUIRES])
1713             data_addidarray_sort(xd, pool, needid, idarraydata + s->requires, SOLVABLE_PREREQMARKER);
1714           if (s->recommends && cbdata.keymap[SOLVABLE_RECOMMENDS])
1715             data_addidarray_sort(xd, pool, needid, idarraydata + s->recommends, 0);
1716           if (s->suggests && cbdata.keymap[SOLVABLE_SUGGESTS])
1717             data_addidarray_sort(xd, pool, needid, idarraydata + s->suggests, 0);
1718           if (s->supplements && cbdata.keymap[SOLVABLE_SUPPLEMENTS])
1719             data_addidarray_sort(xd, pool, needid, idarraydata + s->supplements, 0);
1720           if (s->enhances && cbdata.keymap[SOLVABLE_ENHANCES])
1721             data_addidarray_sort(xd, pool, needid, idarraydata + s->enhances, 0);
1722           if (repo->rpmdbid && cbdata.keymap[RPM_RPMDBID])
1723             data_addid(xd, repo->rpmdbid[i - repo->start]);
1724           if (anyrepodataused)
1725             {
1726               cbdata.vstart = -1;
1727               FOR_REPODATAS(repo, j, data)
1728                 {
1729                   if (!repodataused[j])
1730                     continue;
1731                   if (i < data->start || i >= data->end)
1732                     continue;
1733                   repodata_search(data, i, 0, SEARCH_SUB|SEARCH_ARRAYSENTINEL, repo_write_cb_adddata, &cbdata);
1734                 }
1735             }
1736           if (xd->len - cbdata.lastlen > cbdata.maxdata)
1737             cbdata.maxdata = xd->len - cbdata.lastlen;
1738           cbdata.lastlen = xd->len;
1739           n++;
1740         }
1741       cbdata.doingsolvables = 0;
1742     }
1743
1744   assert(cbdata.current_sub == cbdata.nsubschemata);
1745   if (cbdata.subschemata)
1746     {
1747       cbdata.subschemata = solv_free(cbdata.subschemata);
1748       cbdata.nsubschemata = 0;
1749     }
1750
1751 /********************************************************************/
1752
1753   target.fp = fp;
1754
1755   /* write header */
1756
1757   /* write file header */
1758   write_u32(&target, 'S' << 24 | 'O' << 16 | 'L' << 8 | 'V');
1759   write_u32(&target, SOLV_VERSION_8);
1760
1761
1762   /* write counts */
1763   write_u32(&target, nstrings);
1764   write_u32(&target, nrels);
1765   write_u32(&target, ndirmap);
1766   write_u32(&target, anysolvableused ? repo->nsolvables : 0);
1767   write_u32(&target, target.nkeys);
1768   write_u32(&target, target.nschemata);
1769   solv_flags = 0;
1770   solv_flags |= SOLV_FLAG_PREFIX_POOL;
1771   solv_flags |= SOLV_FLAG_SIZE_BYTES;
1772   write_u32(&target, solv_flags);
1773
1774   if (nstrings)
1775     {
1776       /*
1777        * calculate prefix encoding of the strings
1778        */
1779       unsigned char *prefixcomp = solv_malloc(nstrings);
1780       unsigned int compsum = 0;
1781       char *old_str = "";
1782
1783       prefixcomp[0] = 0;
1784       for (i = 1; i < nstrings; i++)
1785         {
1786           char *str = spool->stringspace + spool->strings[needid[i].map];
1787           int same;
1788           for (same = 0; same < 255; same++)
1789             if (!old_str[same] || old_str[same] != str[same])
1790               break;
1791           prefixcomp[i] = same;
1792           compsum += same;
1793           old_str = str;
1794         }
1795
1796       /*
1797        * write strings
1798        */
1799       write_u32(&target, sizeid);
1800       /* we save compsum bytes but need 1 extra byte for every string */
1801       write_u32(&target, sizeid + nstrings - 1 - compsum);
1802       for (i = 1; i < nstrings; i++)
1803         {
1804           char *str = spool->stringspace + spool->strings[needid[i].map];
1805           write_u8(&target, prefixcomp[i]);
1806           write_str(&target, str + prefixcomp[i]);
1807         }
1808       solv_free(prefixcomp);
1809     }
1810   else
1811     {
1812       write_u32(&target, 0);
1813       write_u32(&target, 0);
1814     }
1815
1816   /*
1817    * write RelDeps
1818    */
1819   for (i = 0; i < nrels; i++)
1820     {
1821       ran = pool->rels + (needid[reloff + i].map - reloff);
1822       write_id(&target, needid[ISRELDEP(ran->name) ? RELOFF(ran->name) : ran->name].need);
1823       write_id(&target, needid[ISRELDEP(ran->evr) ? RELOFF(ran->evr) : ran->evr].need);
1824       write_u8(&target, ran->flags);
1825     }
1826
1827   /*
1828    * write dirs (skip both root and / entry)
1829    */
1830   for (i = 2; i < ndirmap; i++)
1831     {
1832       if (dirmap[i] > 0)
1833         write_id(&target, dirmap[i]);
1834       else
1835         write_id(&target, nstrings - dirmap[i]);
1836     }
1837   solv_free(dirmap);
1838
1839   /*
1840    * write keys
1841    */
1842   for (i = 1; i < target.nkeys; i++)
1843     {
1844       write_id(&target, needid[target.keys[i].name].need);
1845       write_id(&target, needid[target.keys[i].type].need);
1846       if (target.keys[i].storage != KEY_STORAGE_VERTICAL_OFFSET)
1847         {
1848           if (target.keys[i].type == type_constantid)
1849             write_id(&target, needid[target.keys[i].size].need);
1850           else
1851             write_id(&target, target.keys[i].size);
1852         }
1853       else
1854         write_id(&target, cbdata.extdata[i].len);
1855       write_id(&target, target.keys[i].storage);
1856     }
1857
1858   /*
1859    * write schemata
1860    */
1861   write_id(&target, target.schemadatalen);      /* XXX -1? */
1862   for (i = 1; i < target.nschemata; i++)
1863     write_idarray(&target, pool, 0, repodata_id2schema(&target, i));
1864
1865 /********************************************************************/
1866
1867   write_id(&target, cbdata.maxdata);
1868   write_id(&target, cbdata.extdata[0].len);
1869   if (cbdata.extdata[0].len)
1870     write_blob(&target, cbdata.extdata[0].buf, cbdata.extdata[0].len);
1871   solv_free(cbdata.extdata[0].buf);
1872
1873   /* do we have vertical data? */
1874   for (i = 1; i < target.nkeys; i++)
1875     if (cbdata.extdata[i].len)
1876       break;
1877   if (i < target.nkeys)
1878     {
1879       /* yes, write it in pages */
1880       unsigned char *dp, vpage[REPOPAGE_BLOBSIZE];
1881       int l, ll, lpage = 0;
1882
1883       write_u32(&target, REPOPAGE_BLOBSIZE);
1884       for (i = 1; i < target.nkeys; i++)
1885         {
1886           if (!cbdata.extdata[i].len)
1887             continue;
1888           l = cbdata.extdata[i].len;
1889           dp = cbdata.extdata[i].buf;
1890           while (l)
1891             {
1892               ll = REPOPAGE_BLOBSIZE - lpage;
1893               if (l < ll)
1894                 ll = l;
1895               memcpy(vpage + lpage, dp, ll);
1896               dp += ll;
1897               lpage += ll;
1898               l -= ll;
1899               if (lpage == REPOPAGE_BLOBSIZE)
1900                 {
1901                   write_compressed_page(&target, vpage, lpage);
1902                   lpage = 0;
1903                 }
1904             }
1905         }
1906       if (lpage)
1907         write_compressed_page(&target, vpage, lpage);
1908     }
1909
1910   for (i = 1; i < target.nkeys; i++)
1911     solv_free(cbdata.extdata[i].buf);
1912   solv_free(cbdata.extdata);
1913
1914   target.fp = 0;
1915   repodata_freedata(&target);
1916
1917   solv_free(needid);
1918   solv_free(cbdata.solvschemata);
1919   solv_free(cbdata.schema);
1920
1921   solv_free(cbdata.keymap);
1922   solv_free(cbdata.keymapstart);
1923   solv_free(cbdata.dirused);
1924   solv_free(repodataused);
1925   return target.error;
1926 }
1927
1928 struct repodata_write_data {
1929   int (*keyfilter)(Repo *repo, Repokey *key, void *kfdata);
1930   void *kfdata;
1931   int repodataid;
1932 };
1933
1934 static int
1935 repodata_write_keyfilter(Repo *repo, Repokey *key, void *kfdata)
1936 {
1937   struct repodata_write_data *wd = kfdata;
1938
1939   /* XXX: special repodata selection hack */
1940   if (key->name == 1 && key->size != wd->repodataid)
1941     return -1;
1942   if (key->storage == KEY_STORAGE_SOLVABLE)
1943     return KEY_STORAGE_DROPPED; /* not part of this repodata */
1944   if (wd->keyfilter)
1945     return (*wd->keyfilter)(repo, key, wd->kfdata);
1946   return key->storage;
1947 }
1948
1949 int
1950 repodata_write_filtered(Repodata *data, FILE *fp, int (*keyfilter)(Repo *repo, Repokey *key, void *kfdata), void *kfdata, Queue *keyq)
1951 {
1952   struct repodata_write_data wd;
1953
1954   wd.keyfilter = keyfilter;
1955   wd.kfdata = kfdata;
1956   wd.repodataid = data->repodataid;
1957   return repo_write_filtered(data->repo, fp, repodata_write_keyfilter, &wd, keyq);
1958 }
1959
1960 int
1961 repodata_write(Repodata *data, FILE *fp)
1962 {
1963   return repodata_write_filtered(data, fp, repo_write_stdkeyfilter, 0, 0);
1964 }
1965
1966 int
1967 repo_write(Repo *repo, FILE *fp)
1968 {
1969   return repo_write_filtered(repo, fp, repo_write_stdkeyfilter, 0, 0);
1970 }