ext/repo_write.c

   1 /*
   2  * Copyright (c) 2007-2011, Novell Inc.
   3  *
   4  * This program is licensed under the BSD license, read LICENSE.BSD
   5  * for further information
   6  */
   7
   8 /*
   9  * repo_write.c
  10  *
  11  * Write Repo data out to a file in solv format
  12  *
  13  * See doc/README.format for a description
  14  * of the binary file format
  15  *
  16  */
  17
  18 #include <sys/types.h>
  19 #include <limits.h>
  20 #include <fcntl.h>
  21 #include <stdio.h>
  22 #include <stdlib.h>
  23 #include <string.h>
  24 #include <assert.h>
  25
  26 #include "pool.h"
  27 #include "util.h"
  28 #include "repo_write.h"
  29 #include "repopage.h"
  30
  31 /*------------------------------------------------------------------*/
  32 /* Id map optimizations */
  33
  34 typedef struct needid {
  35   Id need;
  36   Id map;
  37 } NeedId;
  38
  39
  40 #define RELOFF(id) (needid[0].map + GETRELID(id))
  41
  42 /*
  43  * increment need Id
  44  * idarray: array of Ids, ID_NULL terminated
  45  * needid: array of Id->NeedId
  46  *
  47  * return size of array (including trailing zero)
  48  *
  49  */
  50
  51 static void
  52 incneedid(Pool *pool, Id id, NeedId *needid)
  53 {
  54   while (ISRELDEP(id))
  55     {
  56       Reldep *rd = GETRELDEP(pool, id);
  57       needid[RELOFF(id)].need++;
  58       if (ISRELDEP(rd->evr))
  59         incneedid(pool, rd->evr, needid);
  60       else
  61         needid[rd->evr].need++;
  62       id = rd->name;
  63     }
  64   needid[id].need++;
  65 }
  66
  67 static int
  68 incneedidarray(Pool *pool, Id *idarray, NeedId *needid)
  69 {
  70   Id id;
  71   int n = 0;
  72
  73   if (!idarray)
  74     return 0;
  75   while ((id = *idarray++) != 0)
  76     {
  77       n++;
  78       while (ISRELDEP(id))
  79         {
  80           Reldep *rd = GETRELDEP(pool, id);
  81           needid[RELOFF(id)].need++;
  82           if (ISRELDEP(rd->evr))
  83             incneedid(pool, rd->evr, needid);
  84           else
  85             needid[rd->evr].need++;
  86           id = rd->name;
  87         }
  88       needid[id].need++;
  89     }
  90   return n + 1;
  91 }
  92
  93
  94 /*
  95  *
  96  */
  97
  98 static int
  99 needid_cmp_need(const void *ap, const void *bp, void *dp)
 100 {
 101   const NeedId *a = ap;
 102   const NeedId *b = bp;
 103   int r;
 104   r = b->need - a->need;
 105   if (r)
 106     return r;
 107   return a->map - b->map;
 108 }
 109
 110 static int
 111 needid_cmp_need_s(const void *ap, const void *bp, void *dp)
 112 {
 113   const NeedId *a = ap;
 114   const NeedId *b = bp;
 115   Stringpool *spool = dp;
 116   const char *as;
 117   const char *bs;
 118
 119   int r;
 120   r = b->need - a->need;
 121   if (r)
 122     return r;
 123   as = spool->stringspace + spool->strings[a->map];
 124   bs = spool->stringspace + spool->strings[b->map];
 125   return strcmp(as, bs);
 126 }
 127
 128
 129 /*------------------------------------------------------------------*/
 130 /* output helper routines, used for writing the header */
 131 /* (the data itself is accumulated in memory and written with
 132  * write_blob) */
 133
 134 /*
 135  * unsigned 32-bit
 136  */
 137
 138 static void
 139 write_u32(FILE *fp, unsigned int x)
 140 {
 141   if (putc(x >> 24, fp) == EOF ||
 142       putc(x >> 16, fp) == EOF ||
 143       putc(x >> 8, fp) == EOF ||
 144       putc(x, fp) == EOF)
 145     {
 146       perror("write error u32");
 147       exit(1);
 148     }
 149 }
 150
 151
 152 /*
 153  * unsigned 8-bit
 154  */
 155
 156 static void
 157 write_u8(FILE *fp, unsigned int x)
 158 {
 159   if (putc(x, fp) == EOF)
 160     {
 161       perror("write error u8");
 162       exit(1);
 163     }
 164 }
 165
 166 /*
 167  * data blob
 168  */
 169
 170 static void
 171 write_blob(FILE *fp, void *data, int len)
 172 {
 173   if (len && fwrite(data, len, 1, fp) != 1)
 174     {
 175       perror("write error blob");
 176       exit(1);
 177     }
 178 }
 179
 180 /*
 181  * Id
 182  */
 183
 184 static void
 185 write_id(FILE *fp, Id x)
 186 {
 187   if (x >= (1 << 14))
 188     {
 189       if (x >= (1 << 28))
 190         putc((x >> 28) | 128, fp);
 191       if (x >= (1 << 21))
 192         putc((x >> 21) | 128, fp);
 193       putc((x >> 14) | 128, fp);
 194     }
 195   if (x >= (1 << 7))
 196     putc((x >> 7) | 128, fp);
 197   if (putc(x & 127, fp) == EOF)
 198     {
 199       perror("write error id");
 200       exit(1);
 201     }
 202 }
 203
 204 static inline void
 205 write_id_eof(FILE *fp, Id x, int eof)
 206 {
 207   if (x >= 64)
 208     x = (x & 63) | ((x & ~63) << 1);
 209   write_id(fp, x | (eof ? 0 : 64));
 210 }
 211
 212
 213
 214 static inline void
 215 write_str(FILE *fp, const char *str)
 216 {
 217   if (fputs(str, fp) == EOF || putc(0, fp) == EOF)
 218     {
 219       perror("write error str");
 220       exit(1);
 221     }
 222 }
 223
 224 /*
 225  * Array of Ids
 226  */
 227
 228 static void
 229 write_idarray(FILE *fp, Pool *pool, NeedId *needid, Id *ids)
 230 {
 231   Id id;
 232   if (!ids)
 233     return;
 234   if (!*ids)
 235     {
 236       write_u8(fp, 0);
 237       return;
 238     }
 239   for (;;)
 240     {
 241       id = *ids++;
 242       if (needid)
 243         id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
 244       if (id >= 64)
 245         id = (id & 63) | ((id & ~63) << 1);
 246       if (!*ids)
 247         {
 248           write_id(fp, id);
 249           return;
 250         }
 251       write_id(fp, id | 64);
 252     }
 253 }
 254
 255 static int
 256 cmp_ids(const void *pa, const void *pb, void *dp)
 257 {
 258   Id a = *(Id *)pa;
 259   Id b = *(Id *)pb;
 260   return a - b;
 261 }
 262
 263 #if 0
 264 static void
 265 write_idarray_sort(FILE *fp, Pool *pool, NeedId *needid, Id *ids, Id marker)
 266 {
 267   int len, i;
 268   Id lids[64], *sids;
 269
 270   if (!ids)
 271     return;
 272   if (!*ids)
 273     {
 274       write_u8(fp, 0);
 275       return;
 276     }
 277   for (len = 0; len < 64 && ids[len]; len++)
 278     {
 279       Id id = ids[len];
 280       if (needid)
 281         id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
 282       lids[len] = id;
 283     }
 284   if (ids[len])
 285     {
 286       for (i = len + 1; ids[i]; i++)
 287         ;
 288       sids = solv_malloc2(i, sizeof(Id));
 289       memcpy(sids, lids, 64 * sizeof(Id));
 290       for (; ids[len]; len++)
 291         {
 292           Id id = ids[len];
 293           if (needid)
 294             id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
 295           sids[len] = id;
 296         }
 297     }
 298   else
 299     sids = lids;
 300
 301   /* That bloody solvable:prereqmarker needs to stay in position :-(  */
 302   if (needid)
 303     marker = needid[marker].need;
 304   for (i = 0; i < len; i++)
 305     if (sids[i] == marker)
 306       break;
 307   if (i > 1)
 308     solv_sort(sids, i, sizeof(Id), cmp_ids, 0);
 309   if ((len - i) > 2)
 310     solv_sort(sids + i + 1, len - i - 1, sizeof(Id), cmp_ids, 0);
 311
 312   Id id, old = 0;
 313
 314   /* The differencing above produces many runs of ones and twos.  I tried
 315      fairly elaborate schemes to RLE those, but they give only very mediocre
 316      improvements in compression, as coding the escapes costs quite some
 317      space.  Even if they are coded only as bits in IDs.  The best improvement
 318      was about 2.7% for the whole .solv file.  It's probably better to
 319      invest some complexity into sharing idarrays, than RLEing.  */
 320   for (i = 0; i < len - 1; i++)
 321     {
 322       id = sids[i];
 323     /* Ugly PREREQ handling.  A "difference" of 0 is the prereq marker,
 324        hence all real differences are offsetted by 1.  Otherwise we would
 325        have to handle negative differences, which would cost code space for
 326        the encoding of the sign.  We loose the exact mapping of prereq here,
 327        but we know the result, so we can recover from that in the reader.  */
 328       if (id == marker)
 329         id = old = 0;
 330       else
 331         {
 332           id = id - old + 1;
 333           old = sids[i];
 334         }
 335       /* XXX If difference is zero we have multiple equal elements,
 336          we might want to skip writing them out.  */
 337       if (id >= 64)
 338         id = (id & 63) | ((id & ~63) << 1);
 339       write_id(fp, id | 64);
 340     }
 341   id = sids[i];
 342   if (id == marker)
 343     id = 0;
 344   else
 345     id = id - old + 1;
 346   if (id >= 64)
 347     id = (id & 63) | ((id & ~63) << 1);
 348   write_id(fp, id);
 349   if (sids != lids)
 350     solv_free(sids);
 351 }
 352 #endif
 353
 354
 355 struct extdata {
 356   unsigned char *buf;
 357   int len;
 358 };
 359
 360 struct cbdata {
 361   Repo *repo;
 362   Repodata *target;
 363
 364   Stringpool *ownspool;
 365   Dirpool *owndirpool;
 366
 367   Id *keymap;
 368   int nkeymap;
 369   Id *keymapstart;
 370
 371   NeedId *needid;
 372
 373   Id *schema;           /* schema construction space */
 374   Id *sp;               /* pointer in above */
 375   Id *oldschema, *oldsp;
 376
 377   Id *solvschemata;
 378   Id *subschemata;
 379   int nsubschemata;
 380   int current_sub;
 381
 382   struct extdata *extdata;
 383
 384   Id *dirused;
 385
 386   Id vstart;
 387
 388   Id maxdata;
 389   Id lastlen;
 390
 391   int doingsolvables;   /* working on solvables data */
 392 };
 393
 394 #define NEEDED_BLOCK 1023
 395 #define SCHEMATA_BLOCK 31
 396 #define SCHEMATADATA_BLOCK 255
 397 #define EXTDATA_BLOCK 4095
 398
 399 static inline void
 400 data_addid(struct extdata *xd, Id x)
 401 {
 402   unsigned char *dp;
 403   xd->buf = solv_extend(xd->buf, xd->len, 5, 1, EXTDATA_BLOCK);
 404   dp = xd->buf + xd->len;
 405
 406   if (x >= (1 << 14))
 407     {
 408       if (x >= (1 << 28))
 409         *dp++ = (x >> 28) | 128;
 410       if (x >= (1 << 21))
 411         *dp++ = (x >> 21) | 128;
 412       *dp++ = (x >> 14) | 128;
 413     }
 414   if (x >= (1 << 7))
 415     *dp++ = (x >> 7) | 128;
 416   *dp++ = x & 127;
 417   xd->len = dp - xd->buf;
 418 }
 419
 420 static inline void
 421 data_addideof(struct extdata *xd, Id x, int eof)
 422 {
 423   if (x >= 64)
 424     x = (x & 63) | ((x & ~63) << 1);
 425   data_addid(xd, (eof ? x: x | 64));
 426 }
 427
 428 static void
 429 data_addidarray_sort(struct extdata *xd, Pool *pool, NeedId *needid, Id *ids, Id marker)
 430 {
 431   int len, i;
 432   Id lids[64], *sids;
 433   Id id, old;
 434
 435   if (!ids)
 436     return;
 437   if (!*ids)
 438     {
 439       data_addid(xd, 0);
 440       return;
 441     }
 442   for (len = 0; len < 64 && ids[len]; len++)
 443     {
 444       Id id = ids[len];
 445       if (needid)
 446         id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
 447       lids[len] = id;
 448     }
 449   if (ids[len])
 450     {
 451       for (i = len + 1; ids[i]; i++)
 452         ;
 453       sids = solv_malloc2(i, sizeof(Id));
 454       memcpy(sids, lids, 64 * sizeof(Id));
 455       for (; ids[len]; len++)
 456         {
 457           Id id = ids[len];
 458           if (needid)
 459             id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
 460           sids[len] = id;
 461         }
 462     }
 463   else
 464     sids = lids;
 465
 466   /* That bloody solvable:prereqmarker needs to stay in position :-(  */
 467   if (needid)
 468     marker = needid[marker].need;
 469   for (i = 0; i < len; i++)
 470     if (sids[i] == marker)
 471       break;
 472   if (i > 1)
 473     solv_sort(sids, i, sizeof(Id), cmp_ids, 0);
 474   if ((len - i) > 2)
 475     solv_sort(sids + i + 1, len - i - 1, sizeof(Id), cmp_ids, 0);
 476
 477   old = 0;
 478
 479   /* The differencing above produces many runs of ones and twos.  I tried
 480      fairly elaborate schemes to RLE those, but they give only very mediocre
 481      improvements in compression, as coding the escapes costs quite some
 482      space.  Even if they are coded only as bits in IDs.  The best improvement
 483      was about 2.7% for the whole .solv file.  It's probably better to
 484      invest some complexity into sharing idarrays, than RLEing.  */
 485   for (i = 0; i < len - 1; i++)
 486     {
 487       id = sids[i];
 488     /* Ugly PREREQ handling.  A "difference" of 0 is the prereq marker,
 489        hence all real differences are offsetted by 1.  Otherwise we would
 490        have to handle negative differences, which would cost code space for
 491        the encoding of the sign.  We loose the exact mapping of prereq here,
 492        but we know the result, so we can recover from that in the reader.  */
 493       if (id == marker)
 494         id = old = 0;
 495       else
 496         {
 497           id = id - old + 1;
 498           old = sids[i];
 499         }
 500       /* XXX If difference is zero we have multiple equal elements,
 501          we might want to skip writing them out.  */
 502       if (id >= 64)
 503         id = (id & 63) | ((id & ~63) << 1);
 504       data_addid(xd, id | 64);
 505     }
 506   id = sids[i];
 507   if (id == marker)
 508     id = 0;
 509   else
 510     id = id - old + 1;
 511   if (id >= 64)
 512     id = (id & 63) | ((id & ~63) << 1);
 513   data_addid(xd, id);
 514   if (sids != lids)
 515     solv_free(sids);
 516 }
 517
 518 static inline void
 519 data_addblob(struct extdata *xd, unsigned char *blob, int len)
 520 {
 521   xd->buf = solv_extend(xd->buf, xd->len, len, 1, EXTDATA_BLOCK);
 522   memcpy(xd->buf + xd->len, blob, len);
 523   xd->len += len;
 524 }
 525
 526 static inline void
 527 data_addu32(struct extdata *xd, unsigned int num)
 528 {
 529   unsigned char d[4];
 530   d[0] = num >> 24;
 531   d[1] = num >> 16;
 532   d[2] = num >> 8;
 533   d[3] = num;
 534   data_addblob(xd, d, 4);
 535 }
 536
 537 static Id
 538 putinownpool(struct cbdata *cbdata, Stringpool *ss, Id id)
 539 {
 540   const char *str = stringpool_id2str(ss, id);
 541   id = stringpool_str2id(cbdata->ownspool, str, 1);
 542   if (id >= cbdata->needid[0].map)
 543     {
 544       int oldoff = cbdata->needid[0].map;
 545       int newoff = (id + 1 + NEEDED_BLOCK) & ~NEEDED_BLOCK;
 546       int nrels = cbdata->repo->pool->nrels;
 547       cbdata->needid = solv_realloc2(cbdata->needid, newoff + nrels, sizeof(NeedId));
 548       if (nrels)
 549         memmove(cbdata->needid + newoff, cbdata->needid + oldoff, nrels * sizeof(NeedId));
 550       memset(cbdata->needid + oldoff, 0, (newoff - oldoff) * sizeof(NeedId));
 551       cbdata->needid[0].map = newoff;
 552     }
 553   return id;
 554 }
 555
 556 static Id
 557 putinowndirpool(struct cbdata *cbdata, Repodata *data, Dirpool *dp, Id dir)
 558 {
 559   Id compid, parent;
 560
 561   parent = dirpool_parent(dp, dir);
 562   if (parent)
 563     parent = putinowndirpool(cbdata, data, dp, parent);
 564   compid = dp->dirs[dir];
 565   if (cbdata->ownspool && compid > 1)
 566     compid = putinownpool(cbdata, data->localpool ? &data->spool : &data->repo->pool->ss, compid);
 567   return dirpool_add_dir(cbdata->owndirpool, parent, compid, 1);
 568 }
 569
 570 /*
 571  * collect usage information about the dirs
 572  * 1: dir used, no child of dir used
 573  * 2: dir used as parent of another used dir
 574  */
 575 static inline void
 576 setdirused(struct cbdata *cbdata, Dirpool *dp, Id dir)
 577 {
 578   if (cbdata->dirused[dir])
 579     return;
 580   cbdata->dirused[dir] = 1;
 581   while ((dir = dirpool_parent(dp, dir)) != 0)
 582     {
 583       if (cbdata->dirused[dir] == 2)
 584         return;
 585       if (cbdata->dirused[dir])
 586         {
 587           cbdata->dirused[dir] = 2;
 588           return;
 589         }
 590       cbdata->dirused[dir] = 2;
 591     }
 592   cbdata->dirused[0] = 2;
 593 }
 594
 595 /*
 596  * pass 1 callback:
 597  * collect key/id/dirid usage information, create needed schemas
 598  */
 599 static int
 600 repo_write_collect_needed(struct cbdata *cbdata, Repo *repo, Repodata *data, Repokey *key, KeyValue *kv)
 601 {
 602   Id id;
 603   int rm;
 604
 605   if (key->name == REPOSITORY_SOLVABLES)
 606     return SEARCH_NEXT_KEY;     /* we do not want this one */
 607   if (data != data->repo->repodata + data->repo->nrepodata - 1)
 608     if (key->name == REPOSITORY_ADDEDFILEPROVIDES || key->name == REPOSITORY_EXTERNAL || key->name == REPOSITORY_LOCATION || key->name == REPOSITORY_KEYS || key->name == REPOSITORY_TOOLVERSION)
 609       return SEARCH_NEXT_KEY;
 610
 611   rm = cbdata->keymap[cbdata->keymapstart[data - data->repo->repodata] + (key - data->keys)];
 612   if (!rm)
 613     return SEARCH_NEXT_KEY;     /* we do not want this one */
 614
 615   /* record key in schema */
 616   if ((key->type != REPOKEY_TYPE_FIXARRAY || kv->eof == 0)
 617       && (cbdata->sp == cbdata->schema || cbdata->sp[-1] != rm))
 618     *cbdata->sp++ = rm;
 619
 620   switch(key->type)
 621     {
 622       case REPOKEY_TYPE_ID:
 623       case REPOKEY_TYPE_IDARRAY:
 624         id = kv->id;
 625         if (!ISRELDEP(id) && cbdata->ownspool && id > 1)
 626           id = putinownpool(cbdata, data->localpool ? &data->spool : &repo->pool->ss, id);
 627         incneedid(repo->pool, id, cbdata->needid);
 628         break;
 629       case REPOKEY_TYPE_DIR:
 630       case REPOKEY_TYPE_DIRNUMNUMARRAY:
 631       case REPOKEY_TYPE_DIRSTRARRAY:
 632         id = kv->id;
 633         if (cbdata->owndirpool)
 634           putinowndirpool(cbdata, data, &data->dirpool, id);
 635         else
 636           setdirused(cbdata, &data->dirpool, id);
 637         break;
 638       case REPOKEY_TYPE_FIXARRAY:
 639         if (kv->eof == 0)
 640           {
 641             if (cbdata->oldschema)
 642               {
 643                 fprintf(stderr, "nested structs not yet implemented\n");
 644                 exit(1);
 645               }
 646             cbdata->oldschema = cbdata->schema;
 647             cbdata->oldsp = cbdata->sp;
 648             cbdata->schema = solv_calloc(cbdata->target->nkeys, sizeof(Id));
 649             cbdata->sp = cbdata->schema;
 650           }
 651         else if (kv->eof == 1)
 652           {
 653             cbdata->current_sub++;
 654             *cbdata->sp = 0;
 655             cbdata->subschemata = solv_extend(cbdata->subschemata, cbdata->nsubschemata, 1, sizeof(Id), SCHEMATA_BLOCK);
 656             cbdata->subschemata[cbdata->nsubschemata++] = repodata_schema2id(cbdata->target, cbdata->schema, 1);
 657 #if 0
 658             fprintf(stderr, "Have schema %d\n", cbdata->subschemata[cbdata->nsubschemata-1]);
 659 #endif
 660             cbdata->sp = cbdata->schema;
 661           }
 662         else
 663           {
 664             solv_free(cbdata->schema);
 665             cbdata->schema = cbdata->oldschema;
 666             cbdata->sp = cbdata->oldsp;
 667             cbdata->oldsp = cbdata->oldschema = 0;
 668           }
 669         break;
 670       case REPOKEY_TYPE_FLEXARRAY:
 671         if (kv->entry == 0)
 672           {
 673             if (kv->eof != 2)
 674               *cbdata->sp++ = 0;        /* mark start */
 675           }
 676         else
 677           {
 678             /* just finished a schema, rewind */
 679             Id *sp = cbdata->sp - 1;
 680             *sp = 0;
 681             while (sp[-1])
 682               sp--;
 683             cbdata->subschemata = solv_extend(cbdata->subschemata, cbdata->nsubschemata, 1, sizeof(Id), SCHEMATA_BLOCK);
 684             cbdata->subschemata[cbdata->nsubschemata++] = repodata_schema2id(cbdata->target, sp, 1);
 685             cbdata->sp = kv->eof == 2 ? sp - 1: sp;
 686           }
 687         break;
 688       default:
 689         break;
 690     }
 691   return 0;
 692 }
 693
 694 static int
 695 repo_write_cb_needed(void *vcbdata, Solvable *s, Repodata *data, Repokey *key, KeyValue *kv)
 696 {
 697   struct cbdata *cbdata = vcbdata;
 698   Repo *repo = data->repo;
 699
 700 #if 0
 701   if (s)
 702     fprintf(stderr, "solvable %d (%s): key (%d)%s %d\n", s ? s - repo->pool->solvables : 0, s ? pool_id2str(repo->pool, s->name) : "", key->name, pool_id2str(repo->pool, key->name), key->type);
 703 #endif
 704   return repo_write_collect_needed(cbdata, repo, data, key, kv);
 705 }
 706
 707
 708 /*
 709  * pass 2 callback:
 710  * encode all of the data into the correct buffers
 711  */
 712
 713 static int
 714 repo_write_adddata(struct cbdata *cbdata, Repodata *data, Repokey *key, KeyValue *kv)
 715 {
 716   int rm;
 717   Id id;
 718   unsigned int u32;
 719   unsigned char v[4];
 720   struct extdata *xd;
 721   NeedId *needid;
 722
 723   if (key->name == REPOSITORY_SOLVABLES)
 724     return SEARCH_NEXT_KEY;
 725   if (data != data->repo->repodata + data->repo->nrepodata - 1)
 726     if (key->name == REPOSITORY_ADDEDFILEPROVIDES || key->name == REPOSITORY_EXTERNAL || key->name == REPOSITORY_LOCATION || key->name == REPOSITORY_KEYS || key->name == REPOSITORY_TOOLVERSION)
 727       return SEARCH_NEXT_KEY;
 728
 729   rm = cbdata->keymap[cbdata->keymapstart[data - data->repo->repodata] + (key - data->keys)];
 730   if (!rm)
 731     return SEARCH_NEXT_KEY;     /* we do not want this one */
 732
 733   if (cbdata->target->keys[rm].storage == KEY_STORAGE_VERTICAL_OFFSET)
 734     {
 735       xd = cbdata->extdata + rm;        /* vertical buffer */
 736       if (cbdata->vstart == -1)
 737         cbdata->vstart = xd->len;
 738     }
 739   else
 740     xd = cbdata->extdata + 0;           /* incore buffer */
 741   switch(key->type)
 742     {
 743       case REPOKEY_TYPE_VOID:
 744       case REPOKEY_TYPE_CONSTANT:
 745       case REPOKEY_TYPE_CONSTANTID:
 746         break;
 747       case REPOKEY_TYPE_ID:
 748         id = kv->id;
 749         if (!ISRELDEP(id) && cbdata->ownspool && id > 1)
 750           id = putinownpool(cbdata, data->localpool ? &data->spool : &data->repo->pool->ss, id);
 751         needid = cbdata->needid;
 752         id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
 753         data_addid(xd, id);
 754         break;
 755       case REPOKEY_TYPE_IDARRAY:
 756         id = kv->id;
 757         if (!ISRELDEP(id) && cbdata->ownspool && id > 1)
 758           id = putinownpool(cbdata, data->localpool ? &data->spool : &data->repo->pool->ss, id);
 759         needid = cbdata->needid;
 760         id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
 761         data_addideof(xd, id, kv->eof);
 762         break;
 763       case REPOKEY_TYPE_STR:
 764         data_addblob(xd, (unsigned char *)kv->str, strlen(kv->str) + 1);
 765         break;
 766       case REPOKEY_TYPE_MD5:
 767         data_addblob(xd, (unsigned char *)kv->str, SIZEOF_MD5);
 768         break;
 769       case REPOKEY_TYPE_SHA1:
 770         data_addblob(xd, (unsigned char *)kv->str, SIZEOF_SHA1);
 771         break;
 772       case REPOKEY_TYPE_SHA256:
 773         data_addblob(xd, (unsigned char *)kv->str, SIZEOF_SHA256);
 774         break;
 775       case REPOKEY_TYPE_U32:
 776         u32 = kv->num;
 777         v[0] = u32 >> 24;
 778         v[1] = u32 >> 16;
 779         v[2] = u32 >> 8;
 780         v[3] = u32;
 781         data_addblob(xd, v, 4);
 782         break;
 783       case REPOKEY_TYPE_NUM:
 784         data_addid(xd, kv->num);
 785         break;
 786       case REPOKEY_TYPE_DIR:
 787         id = kv->id;
 788         if (cbdata->owndirpool)
 789           id = putinowndirpool(cbdata, data, &data->dirpool, id);
 790         id = cbdata->dirused[id];
 791         data_addid(xd, id);
 792         break;
 793       case REPOKEY_TYPE_BINARY:
 794         data_addid(xd, kv->num);
 795         if (kv->num)
 796           data_addblob(xd, (unsigned char *)kv->str, kv->num);
 797         break;
 798       case REPOKEY_TYPE_DIRNUMNUMARRAY:
 799         id = kv->id;
 800         if (cbdata->owndirpool)
 801           id = putinowndirpool(cbdata, data, &data->dirpool, id);
 802         id = cbdata->dirused[id];
 803         data_addid(xd, id);
 804         data_addid(xd, kv->num);
 805         data_addideof(xd, kv->num2, kv->eof);
 806         break;
 807       case REPOKEY_TYPE_DIRSTRARRAY:
 808         id = kv->id;
 809         if (cbdata->owndirpool)
 810           id = putinowndirpool(cbdata, data, &data->dirpool, id);
 811         id = cbdata->dirused[id];
 812         data_addideof(xd, id, kv->eof);
 813         data_addblob(xd, (unsigned char *)kv->str, strlen(kv->str) + 1);
 814         break;
 815       case REPOKEY_TYPE_FIXARRAY:
 816         if (kv->eof == 0)
 817           {
 818             if (kv->num)
 819               {
 820                 data_addid(xd, kv->num);
 821                 data_addid(xd, cbdata->subschemata[cbdata->current_sub]);
 822 #if 0
 823                 fprintf(stderr, "writing %d %d\n", kv->num, cbdata->subschemata[cbdata->current_sub]);
 824 #endif
 825               }
 826           }
 827         else if (kv->eof == 1)
 828           {
 829             cbdata->current_sub++;
 830           }
 831         break;
 832       case REPOKEY_TYPE_FLEXARRAY:
 833         if (!kv->entry)
 834           data_addid(xd, kv->num);
 835         if (kv->eof != 2)
 836           data_addid(xd, cbdata->subschemata[cbdata->current_sub++]);
 837         if (xd == cbdata->extdata + 0 && !kv->parent && !cbdata->doingsolvables)
 838           {
 839             if (xd->len - cbdata->lastlen > cbdata->maxdata)
 840               cbdata->maxdata = xd->len - cbdata->lastlen;
 841             cbdata->lastlen = xd->len;
 842           }
 843         break;
 844       default:
 845         fprintf(stderr, "unknown type for %d: %d\n", key->name, key->type);
 846         exit(1);
 847     }
 848   if (cbdata->target->keys[rm].storage == KEY_STORAGE_VERTICAL_OFFSET && kv->eof)
 849     {
 850       /* we can re-use old data in the blob here! */
 851       data_addid(cbdata->extdata + 0, cbdata->vstart);                  /* add offset into incore data */
 852       data_addid(cbdata->extdata + 0, xd->len - cbdata->vstart);        /* add length into incore data */
 853       cbdata->vstart = -1;
 854     }
 855   return 0;
 856 }
 857
 858 static int
 859 repo_write_cb_adddata(void *vcbdata, Solvable *s, Repodata *data, Repokey *key, KeyValue *kv)
 860 {
 861   struct cbdata *cbdata = vcbdata;
 862   return repo_write_adddata(cbdata, data, key, kv);
 863 }
 864
 865 /* traverse through directory with first child "dir" */
 866 static int
 867 traverse_dirs(Dirpool *dp, Id *dirmap, Id n, Id dir, Id *used)
 868 {
 869   Id sib, child;
 870   Id parent, lastn;
 871
 872   parent = n;
 873   /* special case for '/', which has to come first */
 874   if (parent == 1)
 875     dirmap[n++] = 1;
 876   for (sib = dir; sib; sib = dirpool_sibling(dp, sib))
 877     {
 878       if (used && !used[sib])
 879         continue;
 880       if (sib == 1 && parent == 1)
 881         continue;       /* already did that one above */
 882       dirmap[n++] = sib;
 883     }
 884
 885   /* now go through all the siblings we just added and
 886    * do recursive calls on them */
 887   lastn = n;
 888   for (; parent < lastn; parent++)
 889     {
 890       sib = dirmap[parent];
 891       if (used && used[sib] != 2)       /* 2: used as parent */
 892         continue;
 893       child = dirpool_child(dp, sib);
 894       if (child)
 895         {
 896           dirmap[n++] = -parent;        /* start new block */
 897           n = traverse_dirs(dp, dirmap, n, child, used);
 898         }
 899     }
 900   return n;
 901 }
 902
 903 static void
 904 write_compressed_page(FILE *fp, unsigned char *page, int len)
 905 {
 906   int clen;
 907   unsigned char cpage[REPOPAGE_BLOBSIZE];
 908
 909   clen = repopagestore_compress_page(page, len, cpage, len - 1);
 910   if (!clen)
 911     {
 912       write_u32(fp, len * 2);
 913       write_blob(fp, page, len);
 914     }
 915   else
 916     {
 917       write_u32(fp, clen * 2 + 1);
 918       write_blob(fp, cpage, clen);
 919     }
 920 }
 921
 922 static Id verticals[] = {
 923   SOLVABLE_AUTHORS,
 924   SOLVABLE_DESCRIPTION,
 925   SOLVABLE_MESSAGEDEL,
 926   SOLVABLE_MESSAGEINS,
 927   SOLVABLE_EULA,
 928   SOLVABLE_DISKUSAGE,
 929   SOLVABLE_FILELIST,
 930   0
 931 };
 932
 933 static char *languagetags[] = {
 934   "solvable:summary:",
 935   "solvable:description:",
 936   "solvable:messageins:",
 937   "solvable:messagedel:",
 938   "solvable:eula:",
 939   0
 940 };
 941
 942 int
 943 repo_write_stdkeyfilter(Repo *repo, Repokey *key, void *kfdata)
 944 {
 945   const char *keyname;
 946   int i;
 947
 948   for (i = 0; verticals[i]; i++)
 949     if (key->name == verticals[i])
 950       return KEY_STORAGE_VERTICAL_OFFSET;
 951   keyname = pool_id2str(repo->pool, key->name);
 952   for (i = 0; languagetags[i] != 0; i++)
 953     if (!strncmp(keyname, languagetags[i], strlen(languagetags[i])))
 954       return KEY_STORAGE_VERTICAL_OFFSET;
 955   return KEY_STORAGE_INCORE;
 956 }
 957
 958 /*
 959  * Repo
 960  */
 961
 962 /*
 963  * the code works the following way:
 964  *
 965  * 1) find which keys should be written
 966  * 2) collect usage information for keys/ids/dirids, create schema
 967  *    data
 968  * 3) use usage information to create mapping tables, so that often
 969  *    used ids get a lower number
 970  * 4) encode data into buffers using the mapping tables
 971  * 5) write everything to disk
 972  */
 973 int
 974 repo_write(Repo *repo, FILE *fp, int (*keyfilter)(Repo *repo, Repokey *key, void *kfdata), void *kfdata, Id **keyarrayp)
 975 {
 976   Pool *pool = repo->pool;
 977   int i, j, n;
 978   Solvable *s;
 979   NeedId *needid;
 980   int nstrings, nrels;
 981   unsigned int sizeid;
 982   unsigned int solv_flags;
 983   Reldep *ran;
 984   Id *idarraydata;
 985
 986   Id id, *sp;
 987
 988   Id *dirmap;
 989   int ndirmap;
 990   Id *keyused;
 991   unsigned char *repodataused;
 992   int anyrepodataused = 0;
 993   int anysolvableused = 0;
 994
 995   struct cbdata cbdata;
 996   int clonepool;
 997   Repokey *key;
 998   int poolusage, dirpoolusage, idused, dirused;
 999   int reloff;
1000
1001   Repodata *data, *dirpooldata;
1002
1003   Repodata target;
1004
1005   Stringpool *spool;
1006   Dirpool *dirpool;
1007
1008   Id mainschema;
1009
1010   struct extdata *xd;
1011
1012   Id type_constantid = REPOKEY_TYPE_CONSTANTID;
1013
1014   unsigned char *prefixcomp;
1015   unsigned int compsum;
1016   char *old_str;
1017
1018
1019   memset(&cbdata, 0, sizeof(cbdata));
1020   cbdata.repo = repo;
1021   cbdata.target = &target;
1022
1023   repodata_initdata(&target, repo, 1);
1024
1025   /* go through all repodata and find the keys we need */
1026   /* also unify keys */
1027   /*          keymapstart - maps repo number to keymap offset */
1028   /*          keymap      - maps repo key to my key, 0 -> not used */
1029
1030   /* start with all KEY_STORAGE_SOLVABLE ids */
1031
1032   n = ID_NUM_INTERNAL;
1033   FOR_REPODATAS(repo, i, data)
1034     n += data->nkeys;
1035   cbdata.keymap = solv_calloc(n, sizeof(Id));
1036   cbdata.keymapstart = solv_calloc(repo->nrepodata, sizeof(Id));
1037   repodataused = solv_calloc(repo->nrepodata, 1);
1038
1039   clonepool = 0;
1040   poolusage = 0;
1041
1042   /* add keys for STORAGE_SOLVABLE */
1043   for (i = SOLVABLE_NAME; i <= RPM_RPMDBID; i++)
1044     {
1045       Repokey keyd;
1046       keyd.name = i;
1047       if (i < SOLVABLE_PROVIDES)
1048         keyd.type = REPOKEY_TYPE_ID;
1049       else if (i < RPM_RPMDBID)
1050         keyd.type = REPOKEY_TYPE_REL_IDARRAY;
1051       else
1052         keyd.type = REPOKEY_TYPE_U32;
1053       keyd.size = 0;
1054       keyd.storage = KEY_STORAGE_SOLVABLE;
1055       if (keyfilter)
1056         {
1057           keyd.storage = keyfilter(repo, &keyd, kfdata);
1058           if (keyd.storage == KEY_STORAGE_DROPPED)
1059             continue;
1060           keyd.storage = KEY_STORAGE_SOLVABLE;
1061         }
1062       poolusage = 1;
1063       clonepool = 1;
1064       cbdata.keymap[keyd.name] = repodata_key2id(&target, &keyd, 1);
1065     }
1066
1067   if (repo->nsolvables)
1068     {
1069       Repokey keyd;
1070       keyd.name = REPOSITORY_SOLVABLES;
1071       keyd.type = REPOKEY_TYPE_FLEXARRAY;
1072       keyd.size = 0;
1073       keyd.storage = KEY_STORAGE_INCORE;
1074       cbdata.keymap[keyd.name] = repodata_key2id(&target, &keyd, 1);
1075     }
1076
1077   dirpoolusage = 0;
1078
1079   spool = 0;
1080   dirpool = 0;
1081   dirpooldata = 0;
1082   n = ID_NUM_INTERNAL;
1083   FOR_REPODATAS(repo, i, data)
1084     {
1085       cbdata.keymapstart[i] = n;
1086       cbdata.keymap[n++] = 0;   /* key 0 */
1087       idused = 0;
1088       dirused = 0;
1089       if (keyfilter)
1090         {
1091           Repokey keyd;
1092           /* check if we want this repodata */
1093           memset(&keyd, 0, sizeof(keyd));
1094           keyd.name = 1;
1095           keyd.type = 1;
1096           keyd.size = i;
1097           if (keyfilter(repo, &keyd, kfdata) == -1)
1098             continue;
1099         }
1100       for (j = 1; j < data->nkeys; j++, n++)
1101         {
1102           key = data->keys + j;
1103           if (key->name == REPOSITORY_SOLVABLES && key->type == REPOKEY_TYPE_FLEXARRAY)
1104             {
1105               cbdata.keymap[n] = cbdata.keymap[key->name];
1106               continue;
1107             }
1108           if (key->type == REPOKEY_TYPE_DELETED)
1109             {
1110               cbdata.keymap[n] = 0;
1111               continue;
1112             }
1113           if (key->type == REPOKEY_TYPE_CONSTANTID && data->localpool)
1114             {
1115               Repokey keyd = *key;
1116               keyd.size = repodata_globalize_id(data, key->size, 1);
1117               id = repodata_key2id(&target, &keyd, 0);
1118             }
1119           else
1120             id = repodata_key2id(&target, key, 0);
1121           if (!id)
1122             {
1123               Repokey keyd = *key;
1124               keyd.storage = KEY_STORAGE_INCORE;
1125               if (keyd.type == REPOKEY_TYPE_CONSTANTID)
1126                 keyd.size = repodata_globalize_id(data, key->size, 1);
1127               else if (keyd.type != REPOKEY_TYPE_CONSTANT)
1128                 keyd.size = 0;
1129               if (keyfilter)
1130                 {
1131                   keyd.storage = keyfilter(repo, &keyd, kfdata);
1132                   if (keyd.storage == KEY_STORAGE_DROPPED)
1133                     {
1134                       cbdata.keymap[n] = 0;
1135                       continue;
1136                     }
1137                 }
1138               id = repodata_key2id(&target, &keyd, 1);
1139             }
1140           cbdata.keymap[n] = id;
1141           /* load repodata if not already loaded */
1142           if (data->state == REPODATA_STUB)
1143             {
1144               if (data->loadcallback)
1145                 data->loadcallback(data);
1146               else
1147                 data->state = REPODATA_ERROR;
1148               if (data->state != REPODATA_ERROR)
1149                 {
1150                   /* redo this repodata! */
1151                   j = 0;
1152                   n = cbdata.keymapstart[i];
1153                   continue;
1154                 }
1155             }
1156           if (data->state == REPODATA_ERROR)
1157             {
1158               /* too bad! */
1159               cbdata.keymap[n] = 0;
1160               continue;
1161             }
1162
1163           repodataused[i] = 1;
1164           anyrepodataused = 1;
1165           if (key->type == REPOKEY_TYPE_CONSTANTID || key->type == REPOKEY_TYPE_ID ||
1166               key->type == REPOKEY_TYPE_IDARRAY || key->type == REPOKEY_TYPE_REL_IDARRAY)
1167             idused = 1;
1168           else if (key->type == REPOKEY_TYPE_DIR || key->type == REPOKEY_TYPE_DIRNUMNUMARRAY || key->type == REPOKEY_TYPE_DIRSTRARRAY)
1169             {
1170               idused = 1;       /* dirs also use ids */
1171               dirused = 1;
1172             }
1173         }
1174       if (idused)
1175         {
1176           if (data->localpool)
1177             {
1178               if (poolusage)
1179                 poolusage = 3;  /* need own pool */
1180               else
1181                 {
1182                   poolusage = 2;
1183                   spool = &data->spool;
1184                 }
1185             }
1186           else
1187             {
1188               if (poolusage == 0)
1189                 poolusage = 1;
1190               else if (poolusage != 1)
1191                 poolusage = 3;  /* need own pool */
1192             }
1193         }
1194       if (dirused)
1195         {
1196           if (dirpoolusage)
1197             dirpoolusage = 3;   /* need own dirpool */
1198           else
1199             {
1200               dirpoolusage = 2;
1201               dirpool = &data->dirpool;
1202               dirpooldata = data;
1203             }
1204         }
1205     }
1206   cbdata.nkeymap = n;
1207
1208   /* 0: no pool needed at all */
1209   /* 1: use global pool */
1210   /* 2: use repodata local pool */
1211   /* 3: need own pool */
1212   if (poolusage == 3)
1213     {
1214       spool = &target.spool;
1215       /* hack: reuse global pool data so we don't have to map pool ids */
1216       if (clonepool)
1217         {
1218           stringpool_free(spool);
1219           stringpool_clone(spool, &pool->ss);
1220         }
1221       cbdata.ownspool = spool;
1222     }
1223   else if (poolusage == 0 || poolusage == 1)
1224     {
1225       poolusage = 1;
1226       spool = &pool->ss;
1227     }
1228
1229   if (dirpoolusage == 3)
1230     {
1231       dirpool = &target.dirpool;
1232       dirpooldata = 0;
1233       cbdata.owndirpool = dirpool;
1234     }
1235   else if (dirpool)
1236     cbdata.dirused = solv_calloc(dirpool->ndirs, sizeof(Id));
1237
1238
1239 /********************************************************************/
1240 #if 0
1241 fprintf(stderr, "poolusage: %d\n", poolusage);
1242 fprintf(stderr, "dirpoolusage: %d\n", dirpoolusage);
1243 fprintf(stderr, "nkeys: %d\n", target.nkeys);
1244 for (i = 1; i < target.nkeys; i++)
1245   fprintf(stderr, "  %2d: %s[%d] %d %d %d\n", i, pool_id2str(pool, target.keys[i].name), target.keys[i].name, target.keys[i].type, target.keys[i].size, target.keys[i].storage);
1246 #endif
1247
1248   /* copy keys if requested */
1249   if (keyarrayp)
1250     {
1251       *keyarrayp = solv_calloc(2 * target.nkeys + 1, sizeof(Id));
1252       for (i = 1; i < target.nkeys; i++)
1253         {
1254           (*keyarrayp)[2 * i - 2] = target.keys[i].name;
1255           (*keyarrayp)[2 * i - 1] = target.keys[i].type;
1256         }
1257     }
1258
1259   if (poolusage > 1)
1260     {
1261       /* put all the keys we need in our string pool */
1262       /* put mapped ids right into target.keys */
1263       for (i = 1, key = target.keys + i; i < target.nkeys; i++, key++)
1264         {
1265           key->name = stringpool_str2id(spool, pool_id2str(pool, key->name), 1);
1266           if (key->type == REPOKEY_TYPE_CONSTANTID)
1267             {
1268               key->type = stringpool_str2id(spool, pool_id2str(pool, key->type), 1);
1269               type_constantid = key->type;
1270               key->size = stringpool_str2id(spool, pool_id2str(pool, key->size), 1);
1271             }
1272           else
1273             key->type = stringpool_str2id(spool, pool_id2str(pool, key->type), 1);
1274         }
1275       if (poolusage == 2)
1276         stringpool_freehash(spool);     /* free some mem */
1277     }
1278
1279
1280 /********************************************************************/
1281
1282   /* set needed count of all strings and rels,
1283    * find which keys are used in the solvables
1284    * put all strings in own spool
1285    */
1286
1287   reloff = spool->nstrings;
1288   if (poolusage == 3)
1289     reloff = (reloff + NEEDED_BLOCK) & ~NEEDED_BLOCK;
1290
1291   needid = calloc(reloff + pool->nrels, sizeof(*needid));
1292   needid[0].map = reloff;
1293
1294   cbdata.needid = needid;
1295   cbdata.schema = solv_calloc(target.nkeys, sizeof(Id));
1296   cbdata.sp = cbdata.schema;
1297   cbdata.solvschemata = solv_calloc(repo->nsolvables, sizeof(Id));
1298
1299   /* create main schema */
1300   cbdata.sp = cbdata.schema;
1301   /* collect all other data from all repodatas */
1302   /* XXX: merge arrays of equal keys? */
1303   FOR_REPODATAS(repo, j, data)
1304     {
1305       if (!repodataused[j])
1306         continue;
1307       repodata_search(data, SOLVID_META, 0, SEARCH_SUB|SEARCH_ARRAYSENTINEL, repo_write_cb_needed, &cbdata);
1308     }
1309   sp = cbdata.sp;
1310   /* add solvables if needed (may revert later) */
1311   if (repo->nsolvables)
1312     {
1313       *sp++ = cbdata.keymap[REPOSITORY_SOLVABLES];
1314       target.keys[cbdata.keymap[REPOSITORY_SOLVABLES]].size++;
1315     }
1316   *sp = 0;
1317   mainschema = repodata_schema2id(cbdata.target, cbdata.schema, 1);
1318
1319   idarraydata = repo->idarraydata;
1320
1321   anysolvableused = 0;
1322   cbdata.doingsolvables = 1;
1323   for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
1324     {
1325       if (s->repo != repo)
1326         continue;
1327
1328       /* set schema info, keep in sync with further down */
1329       sp = cbdata.schema;
1330       if (cbdata.keymap[SOLVABLE_NAME])
1331         {
1332           *sp++ = cbdata.keymap[SOLVABLE_NAME];
1333           needid[s->name].need++;
1334         }
1335       if (cbdata.keymap[SOLVABLE_ARCH])
1336         {
1337           *sp++ = cbdata.keymap[SOLVABLE_ARCH];
1338           needid[s->arch].need++;
1339         }
1340       if (cbdata.keymap[SOLVABLE_EVR])
1341         {
1342           *sp++ = cbdata.keymap[SOLVABLE_EVR];
1343           needid[s->evr].need++;
1344         }
1345       if (s->vendor && cbdata.keymap[SOLVABLE_VENDOR])
1346         {
1347           *sp++ = cbdata.keymap[SOLVABLE_VENDOR];
1348           needid[s->vendor].need++;
1349         }
1350       if (s->provides && cbdata.keymap[SOLVABLE_PROVIDES])
1351         {
1352           *sp++ = cbdata.keymap[SOLVABLE_PROVIDES];
1353           target.keys[cbdata.keymap[SOLVABLE_PROVIDES]].size += incneedidarray(pool, idarraydata + s->provides, needid);
1354         }
1355       if (s->obsoletes && cbdata.keymap[SOLVABLE_OBSOLETES])
1356         {
1357           *sp++ = cbdata.keymap[SOLVABLE_OBSOLETES];
1358           target.keys[cbdata.keymap[SOLVABLE_OBSOLETES]].size += incneedidarray(pool, idarraydata + s->obsoletes, needid);
1359         }
1360       if (s->conflicts && cbdata.keymap[SOLVABLE_CONFLICTS])
1361         {
1362           *sp++ = cbdata.keymap[SOLVABLE_CONFLICTS];
1363           target.keys[cbdata.keymap[SOLVABLE_CONFLICTS]].size += incneedidarray(pool, idarraydata + s->conflicts, needid);
1364         }
1365       if (s->requires && cbdata.keymap[SOLVABLE_REQUIRES])
1366         {
1367           *sp++ = cbdata.keymap[SOLVABLE_REQUIRES];
1368           target.keys[cbdata.keymap[SOLVABLE_REQUIRES]].size += incneedidarray(pool, idarraydata + s->requires, needid);
1369         }
1370       if (s->recommends && cbdata.keymap[SOLVABLE_RECOMMENDS])
1371         {
1372           *sp++ = cbdata.keymap[SOLVABLE_RECOMMENDS];
1373           target.keys[cbdata.keymap[SOLVABLE_RECOMMENDS]].size += incneedidarray(pool, idarraydata + s->recommends, needid);
1374         }
1375       if (s->suggests && cbdata.keymap[SOLVABLE_SUGGESTS])
1376         {
1377           *sp++ = cbdata.keymap[SOLVABLE_SUGGESTS];
1378           target.keys[cbdata.keymap[SOLVABLE_SUGGESTS]].size += incneedidarray(pool, idarraydata + s->suggests, needid);
1379         }
1380       if (s->supplements && cbdata.keymap[SOLVABLE_SUPPLEMENTS])
1381         {
1382           *sp++ = cbdata.keymap[SOLVABLE_SUPPLEMENTS];
1383           target.keys[cbdata.keymap[SOLVABLE_SUPPLEMENTS]].size += incneedidarray(pool, idarraydata + s->supplements, needid);
1384         }
1385       if (s->enhances && cbdata.keymap[SOLVABLE_ENHANCES])
1386         {
1387           *sp++ = cbdata.keymap[SOLVABLE_ENHANCES];
1388           target.keys[cbdata.keymap[SOLVABLE_ENHANCES]].size += incneedidarray(pool, idarraydata + s->enhances, needid);
1389         }
1390       if (repo->rpmdbid && cbdata.keymap[RPM_RPMDBID])
1391         {
1392           *sp++ = cbdata.keymap[RPM_RPMDBID];
1393           target.keys[cbdata.keymap[RPM_RPMDBID]].size++;
1394         }
1395       cbdata.sp = sp;
1396
1397       if (anyrepodataused)
1398         {
1399           FOR_REPODATAS(repo, j, data)
1400             {
1401               if (!repodataused[j])
1402                 continue;
1403               if (i < data->start || i >= data->end)
1404                 continue;
1405               repodata_search(data, i, 0, SEARCH_SUB|SEARCH_ARRAYSENTINEL, repo_write_cb_needed, &cbdata);
1406               needid = cbdata.needid;
1407             }
1408         }
1409       *cbdata.sp = 0;
1410       cbdata.solvschemata[n] = repodata_schema2id(cbdata.target, cbdata.schema, 1);
1411       if (cbdata.solvschemata[n])
1412         anysolvableused = 1;
1413       n++;
1414     }
1415   cbdata.doingsolvables = 0;
1416   assert(n == repo->nsolvables);
1417
1418   if (repo->nsolvables && !anysolvableused)
1419     {
1420       /* strip off solvable from the main schema */
1421       target.keys[cbdata.keymap[REPOSITORY_SOLVABLES]].size = 0;
1422       sp = cbdata.schema;
1423       for (i = 0; target.schemadata[target.schemata[mainschema] + i]; i++)
1424         {
1425           *sp = target.schemadata[target.schemata[mainschema] + i];
1426           if (*sp != cbdata.keymap[REPOSITORY_SOLVABLES])
1427             sp++;
1428         }
1429       assert(target.schemadatalen == target.schemata[mainschema] + i + 1);
1430       *sp = 0;
1431       target.schemadatalen = target.schemata[mainschema];
1432       target.nschemata--;
1433       repodata_free_schemahash(&target);
1434       mainschema = repodata_schema2id(cbdata.target, cbdata.schema, 1);
1435     }
1436
1437 /********************************************************************/
1438
1439   /* remove unused keys */
1440   keyused = solv_calloc(target.nkeys, sizeof(Id));
1441   for (i = 1; i < target.schemadatalen; i++)
1442     keyused[target.schemadata[i]] = 1;
1443   keyused[0] = 0;
1444   for (n = i = 1; i < target.nkeys; i++)
1445     {
1446       if (!keyused[i])
1447         continue;
1448       keyused[i] = n;
1449       if (i != n)
1450         {
1451           target.keys[n] = target.keys[i];
1452           if (keyarrayp)
1453             {
1454               (*keyarrayp)[2 * n - 2] = (*keyarrayp)[2 * i - 2];
1455               (*keyarrayp)[2 * n - 1] = (*keyarrayp)[2 * i - 1];
1456             }
1457         }
1458       n++;
1459     }
1460   target.nkeys = n;
1461   if (keyarrayp)
1462     {
1463       /* terminate array */
1464       (*keyarrayp)[2 * n - 2] = 0;
1465       (*keyarrayp)[2 * n - 1] = 0;
1466     }
1467
1468   /* update schema data to the new key ids */
1469   for (i = 1; i < target.schemadatalen; i++)
1470     target.schemadata[i] = keyused[target.schemadata[i]];
1471   /* update keymap to the new key ids */
1472   for (i = 0; i < cbdata.nkeymap; i++)
1473     cbdata.keymap[i] = keyused[cbdata.keymap[i]];
1474   keyused = solv_free(keyused);
1475
1476   /* increment needid of the used keys, they are already mapped to
1477    * the correct string pool  */
1478   for (i = 1; i < target.nkeys; i++)
1479     {
1480       if (target.keys[i].type == type_constantid)
1481         needid[target.keys[i].size].need++;
1482       needid[target.keys[i].name].need++;
1483       needid[target.keys[i].type].need++;
1484     }
1485
1486 /********************************************************************/
1487
1488   if (dirpool && cbdata.dirused && !cbdata.dirused[0])
1489     {
1490       /* no dirs used at all */
1491       cbdata.dirused = solv_free(cbdata.dirused);
1492       dirpool = 0;
1493     }
1494
1495   /* increment need id for used dir components */
1496   if (dirpool)
1497     {
1498       /* if we have own dirpool, all entries in it are used.
1499          also, all comp ids are already mapped by putinowndirpool(),
1500          so we can simply increment needid.
1501          (owndirpool != 0, dirused == 0, dirpooldata == 0) */
1502       /* else we re-use a dirpool of repodata "dirpooldata".
1503          dirused tells us which of the ids are used.
1504          we need to map comp ids if we generate a new pool.
1505          (owndirpool == 0, dirused != 0, dirpooldata != 0) */
1506       for (i = 1; i < dirpool->ndirs; i++)
1507         {
1508 #if 0
1509 fprintf(stderr, "dir %d used %d\n", i, cbdata.dirused ? cbdata.dirused[i] : 1);
1510 #endif
1511           if (cbdata.dirused && !cbdata.dirused[i])
1512             continue;
1513           id = dirpool->dirs[i];
1514           if (id <= 0)
1515             continue;
1516           if (dirpooldata && cbdata.ownspool && id > 1)
1517             {
1518               id = putinownpool(&cbdata, dirpooldata->localpool ? &dirpooldata->spool : &pool->ss, id);
1519               needid = cbdata.needid;
1520             }
1521           needid[id].need++;
1522         }
1523     }
1524
1525
1526 /********************************************************************/
1527
1528   /*
1529    * create mapping table, new keys are sorted by needid[].need
1530    *
1531    * needid[key].need : old key -> new key
1532    * needid[key].map  : new key -> old key
1533    */
1534
1535   /* zero out id 0 and rel 0 just in case */
1536   reloff = needid[0].map;
1537   needid[0].need = 0;
1538   needid[reloff].need = 0;
1539
1540   for (i = 1; i < reloff + pool->nrels; i++)
1541     needid[i].map = i;
1542
1543 #if 0
1544   solv_sort(needid + 1, spool->nstrings - 1, sizeof(*needid), needid_cmp_need_s, spool);
1545 #else
1546   /* make first entry '' */
1547   needid[1].need = 1;
1548   solv_sort(needid + 2, spool->nstrings - 2, sizeof(*needid), needid_cmp_need_s, spool);
1549 #endif
1550   solv_sort(needid + reloff, pool->nrels, sizeof(*needid), needid_cmp_need, 0);
1551   /* now needid is in new order, needid[newid].map -> oldid */
1552
1553   /* calculate string space size, also zero out needid[].need */
1554   sizeid = 0;
1555   for (i = 1; i < reloff; i++)
1556     {
1557       if (!needid[i].need)
1558         break;  /* as we have sorted, every entry after this also has need == 0 */
1559       needid[i].need = 0;
1560       sizeid += strlen(spool->stringspace + spool->strings[needid[i].map]) + 1;
1561     }
1562   nstrings = i; /* our new string id end */
1563
1564   /* make needid[oldid].need point to newid */
1565   for (i = 1; i < nstrings; i++)
1566     needid[needid[i].map].need = i;
1567
1568   /* same as above for relations */
1569   for (i = 0; i < pool->nrels; i++)
1570     {
1571       if (!needid[reloff + i].need)
1572         break;
1573       needid[reloff + i].need = 0;
1574     }
1575   nrels = i;    /* our new rel id end */
1576
1577   for (i = 0; i < nrels; i++)
1578     needid[needid[reloff + i].map].need = nstrings + i;
1579
1580   /* now we have: needid[oldid].need -> newid
1581                   needid[newid].map  -> oldid
1582      both for strings and relations  */
1583
1584
1585 /********************************************************************/
1586
1587   ndirmap = 0;
1588   dirmap = 0;
1589   if (dirpool)
1590     {
1591       /* create our new target directory structure by traversing through all
1592        * used dirs. This will concatenate blocks with the same parent
1593        * directory into single blocks.
1594        * Instead of components, traverse_dirs stores the old dirids,
1595        * we will change this in the second step below */
1596       /* (dirpooldata and dirused are 0 if we have our own dirpool) */
1597       if (cbdata.dirused && !cbdata.dirused[1])
1598         cbdata.dirused[1] = 1;  /* always want / entry */
1599       dirmap = solv_calloc(dirpool->ndirs, sizeof(Id));
1600       dirmap[0] = 0;
1601       ndirmap = traverse_dirs(dirpool, dirmap, 1, dirpool_child(dirpool, 0), cbdata.dirused);
1602
1603       /* (re)create dirused, so that it maps from "old dirid" to "new dirid" */
1604       /* change dirmap so that it maps from "new dirid" to "new compid" */
1605       if (!cbdata.dirused)
1606         cbdata.dirused = solv_malloc2(dirpool->ndirs, sizeof(Id));
1607       memset(cbdata.dirused, 0, dirpool->ndirs * sizeof(Id));
1608       for (i = 1; i < ndirmap; i++)
1609         {
1610           if (dirmap[i] <= 0)
1611             continue;
1612           cbdata.dirused[dirmap[i]] = i;
1613           id = dirpool->dirs[dirmap[i]];
1614           if (dirpooldata && cbdata.ownspool && id > 1)
1615             id = putinownpool(&cbdata, dirpooldata->localpool ? &dirpooldata->spool : &pool->ss, id);
1616           dirmap[i] = needid[id].need;
1617         }
1618       /* now the new target directory structure is complete (dirmap), and we have
1619        * dirused[olddirid] -> newdirid */
1620     }
1621
1622 /********************************************************************/
1623
1624   /* collect all data
1625    * we use extdata[0] for incore data and extdata[keyid] for vertical data
1626    */
1627
1628   cbdata.extdata = solv_calloc(target.nkeys, sizeof(struct extdata));
1629
1630   xd = cbdata.extdata;
1631   cbdata.current_sub = 0;
1632   /* add main schema */
1633   cbdata.lastlen = 0;
1634   data_addid(xd, mainschema);
1635
1636 #if 1
1637   FOR_REPODATAS(repo, j, data)
1638     {
1639       if (!repodataused[j])
1640         continue;
1641       repodata_search(data, SOLVID_META, 0, SEARCH_SUB|SEARCH_ARRAYSENTINEL, repo_write_cb_adddata, &cbdata);
1642     }
1643 #endif
1644
1645   if (xd->len - cbdata.lastlen > cbdata.maxdata)
1646     cbdata.maxdata = xd->len - cbdata.lastlen;
1647   cbdata.lastlen = xd->len;
1648
1649   if (anysolvableused)
1650     {
1651       data_addid(xd, repo->nsolvables); /* FLEXARRAY nentries */
1652       cbdata.doingsolvables = 1;
1653       for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
1654         {
1655           if (s->repo != repo)
1656             continue;
1657           data_addid(xd, cbdata.solvschemata[n]);
1658           if (cbdata.keymap[SOLVABLE_NAME])
1659             data_addid(xd, needid[s->name].need);
1660           if (cbdata.keymap[SOLVABLE_ARCH])
1661             data_addid(xd, needid[s->arch].need);
1662           if (cbdata.keymap[SOLVABLE_EVR])
1663             data_addid(xd, needid[s->evr].need);
1664           if (s->vendor && cbdata.keymap[SOLVABLE_VENDOR])
1665             data_addid(xd, needid[s->vendor].need);
1666           if (s->provides && cbdata.keymap[SOLVABLE_PROVIDES])
1667             data_addidarray_sort(xd, pool, needid, idarraydata + s->provides, SOLVABLE_FILEMARKER);
1668           if (s->obsoletes && cbdata.keymap[SOLVABLE_OBSOLETES])
1669             data_addidarray_sort(xd, pool, needid, idarraydata + s->obsoletes, 0);
1670           if (s->conflicts && cbdata.keymap[SOLVABLE_CONFLICTS])
1671             data_addidarray_sort(xd, pool, needid, idarraydata + s->conflicts, 0);
1672           if (s->requires && cbdata.keymap[SOLVABLE_REQUIRES])
1673             data_addidarray_sort(xd, pool, needid, idarraydata + s->requires, SOLVABLE_PREREQMARKER);
1674           if (s->recommends && cbdata.keymap[SOLVABLE_RECOMMENDS])
1675             data_addidarray_sort(xd, pool, needid, idarraydata + s->recommends, 0);
1676           if (s->suggests && cbdata.keymap[SOLVABLE_SUGGESTS])
1677             data_addidarray_sort(xd, pool, needid, idarraydata + s->suggests, 0);
1678           if (s->supplements && cbdata.keymap[SOLVABLE_SUPPLEMENTS])
1679             data_addidarray_sort(xd, pool, needid, idarraydata + s->supplements, 0);
1680           if (s->enhances && cbdata.keymap[SOLVABLE_ENHANCES])
1681             data_addidarray_sort(xd, pool, needid, idarraydata + s->enhances, 0);
1682           if (repo->rpmdbid && cbdata.keymap[RPM_RPMDBID])
1683             data_addu32(xd, repo->rpmdbid[i - repo->start]);
1684           if (anyrepodataused)
1685             {
1686               cbdata.vstart = -1;
1687               FOR_REPODATAS(repo, j, data)
1688                 {
1689                   if (!repodataused[j])
1690                     continue;
1691                   if (i < data->start || i >= data->end)
1692                     continue;
1693                   repodata_search(data, i, 0, SEARCH_SUB|SEARCH_ARRAYSENTINEL, repo_write_cb_adddata, &cbdata);
1694                 }
1695             }
1696           if (xd->len - cbdata.lastlen > cbdata.maxdata)
1697             cbdata.maxdata = xd->len - cbdata.lastlen;
1698           cbdata.lastlen = xd->len;
1699           n++;
1700         }
1701       cbdata.doingsolvables = 0;
1702     }
1703
1704   assert(cbdata.current_sub == cbdata.nsubschemata);
1705   if (cbdata.subschemata)
1706     {
1707       cbdata.subschemata = solv_free(cbdata.subschemata);
1708       cbdata.nsubschemata = 0;
1709     }
1710
1711 /********************************************************************/
1712
1713   /* write header */
1714
1715   /* write file header */
1716   write_u32(fp, 'S' << 24 | 'O' << 16 | 'L' << 8 | 'V');
1717   write_u32(fp, SOLV_VERSION_8);
1718
1719
1720   /* write counts */
1721   write_u32(fp, nstrings);
1722   write_u32(fp, nrels);
1723   write_u32(fp, ndirmap);
1724   write_u32(fp, anysolvableused ? repo->nsolvables : 0);
1725   write_u32(fp, target.nkeys);
1726   write_u32(fp, target.nschemata);
1727   solv_flags = 0;
1728   solv_flags |= SOLV_FLAG_PREFIX_POOL;
1729   write_u32(fp, solv_flags);
1730
1731   /*
1732    * calculate prefix encoding of the strings
1733    */
1734   prefixcomp = solv_malloc(nstrings);
1735   compsum = 0;
1736   old_str = "";
1737
1738   prefixcomp[0] = 0;
1739   for (i = 1; i < nstrings; i++)
1740     {
1741       char *str = spool->stringspace + spool->strings[needid[i].map];
1742       int same;
1743       for (same = 0; same < 255; same++)
1744         if (!old_str[same] || old_str[same] != str[same])
1745           break;
1746       prefixcomp[i] = same;
1747       compsum += same;
1748       old_str = str;
1749     }
1750
1751   /*
1752    * write strings
1753    */
1754   write_u32(fp, sizeid);
1755   /* we save compsum bytes but need 1 extra byte for every string */
1756   write_u32(fp, sizeid + (nstrings ? nstrings - 1 : 0) - compsum);
1757   if (sizeid + (nstrings ? nstrings - 1 : 0) != compsum)
1758     {
1759       for (i = 1; i < nstrings; i++)
1760         {
1761           char *str = spool->stringspace + spool->strings[needid[i].map];
1762           write_u8(fp, prefixcomp[i]);
1763           write_str(fp, str + prefixcomp[i]);
1764         }
1765     }
1766   solv_free(prefixcomp);
1767
1768 #if 0
1769   /* Build the prefix-encoding of the string pool.  We need to know
1770      the size of that before writing it to the file, so we have to
1771      build a separate buffer for that.  As it's temporarily possible
1772      that this actually is an expansion we can't easily reuse the
1773      stringspace for this.  The max expansion per string is 1 byte,
1774      so it will fit into sizeid+nstrings bytes.  */
1775   char *prefix = solv_malloc(sizeid + nstrings);
1776   char *pp = prefix;
1777   char *old_str = "";
1778   for (i = 1; i < nstrings; i++)
1779     {
1780       char *str = spool->stringspace + spool->strings[needid[i].map];
1781       int same;
1782       size_t len;
1783       for (same = 0; same < 255; same++)
1784         if (!old_str[same] || !str[same] || old_str[same] != str[same])
1785           break;
1786       *pp++ = same;
1787       len = strlen(str + same) + 1;
1788       memcpy(pp, str + same, len);
1789       pp += len;
1790       old_str = str;
1791     }
1792
1793   /*
1794    * write strings
1795    */
1796   write_u32(fp, sizeid);
1797   write_u32(fp, pp - prefix);
1798   if (pp != prefix)
1799     {
1800       if (fwrite(prefix, pp - prefix, 1, fp) != 1)
1801         {
1802           perror("write error prefix");
1803           exit(1);
1804         }
1805     }
1806   solv_free(prefix);
1807 #endif
1808
1809   /*
1810    * write RelDeps
1811    */
1812   for (i = 0; i < nrels; i++)
1813     {
1814       ran = pool->rels + (needid[reloff + i].map - reloff);
1815       write_id(fp, needid[ISRELDEP(ran->name) ? RELOFF(ran->name) : ran->name].need);
1816       write_id(fp, needid[ISRELDEP(ran->evr) ? RELOFF(ran->evr) : ran->evr].need);
1817       write_u8(fp, ran->flags);
1818     }
1819
1820   /*
1821    * write dirs (skip both root and / entry)
1822    */
1823   for (i = 2; i < ndirmap; i++)
1824     {
1825       if (dirmap[i] > 0)
1826         write_id(fp, dirmap[i]);
1827       else
1828         write_id(fp, nstrings - dirmap[i]);
1829     }
1830   solv_free(dirmap);
1831
1832   /*
1833    * write keys
1834    */
1835   for (i = 1; i < target.nkeys; i++)
1836     {
1837       write_id(fp, needid[target.keys[i].name].need);
1838       write_id(fp, needid[target.keys[i].type].need);
1839       if (target.keys[i].storage != KEY_STORAGE_VERTICAL_OFFSET)
1840         {
1841           if (target.keys[i].type == type_constantid)
1842             write_id(fp, needid[target.keys[i].size].need);
1843           else
1844             write_id(fp, target.keys[i].size);
1845         }
1846       else
1847         write_id(fp, cbdata.extdata[i].len);
1848       write_id(fp, target.keys[i].storage);
1849     }
1850
1851   /*
1852    * write schemata
1853    */
1854   write_id(fp, target.schemadatalen);   /* XXX -1? */
1855   for (i = 1; i < target.nschemata; i++)
1856     write_idarray(fp, pool, 0, repodata_id2schema(&target, i));
1857
1858 /********************************************************************/
1859
1860   write_id(fp, cbdata.maxdata);
1861   write_id(fp, cbdata.extdata[0].len);
1862   if (cbdata.extdata[0].len)
1863     write_blob(fp, cbdata.extdata[0].buf, cbdata.extdata[0].len);
1864   solv_free(cbdata.extdata[0].buf);
1865
1866   /* do we have vertical data? */
1867   for (i = 1; i < target.nkeys; i++)
1868     if (cbdata.extdata[i].len)
1869       break;
1870   if (i < target.nkeys)
1871     {
1872       /* yes, write it in pages */
1873       unsigned char *dp, vpage[REPOPAGE_BLOBSIZE];
1874       int l, ll, lpage = 0;
1875
1876       write_u32(fp, REPOPAGE_BLOBSIZE);
1877       for (i = 1; i < target.nkeys; i++)
1878         {
1879           if (!cbdata.extdata[i].len)
1880             continue;
1881           l = cbdata.extdata[i].len;
1882           dp = cbdata.extdata[i].buf;
1883           while (l)
1884             {
1885               ll = REPOPAGE_BLOBSIZE - lpage;
1886               if (l < ll)
1887                 ll = l;
1888               memcpy(vpage + lpage, dp, ll);
1889               dp += ll;
1890               lpage += ll;
1891               l -= ll;
1892               if (lpage == REPOPAGE_BLOBSIZE)
1893                 {
1894                   write_compressed_page(fp, vpage, lpage);
1895                   lpage = 0;
1896                 }
1897             }
1898         }
1899       if (lpage)
1900         write_compressed_page(fp, vpage, lpage);
1901     }
1902
1903   for (i = 1; i < target.nkeys; i++)
1904     solv_free(cbdata.extdata[i].buf);
1905   solv_free(cbdata.extdata);
1906
1907   repodata_freedata(&target);
1908
1909   solv_free(needid);
1910   solv_free(cbdata.solvschemata);
1911   solv_free(cbdata.schema);
1912
1913   solv_free(cbdata.keymap);
1914   solv_free(cbdata.keymapstart);
1915   solv_free(cbdata.dirused);
1916   solv_free(repodataused);
1917   return 0;
1918 }
1919
1920 struct repodata_write_data {
1921   int (*keyfilter)(Repo *repo, Repokey *key, void *kfdata);
1922   void *kfdata;
1923   int repodataid;
1924 };
1925
1926 static int
1927 repodata_write_keyfilter(Repo *repo, Repokey *key, void *kfdata)
1928 {
1929   struct repodata_write_data *wd = kfdata;
1930
1931   /* XXX: special repodata selection hack */
1932   if (key->name == 1 && key->size != wd->repodataid)
1933     return -1;
1934   if (key->storage == KEY_STORAGE_SOLVABLE)
1935     return KEY_STORAGE_DROPPED; /* not part of this repodata */
1936   if (wd->keyfilter)
1937     return (*wd->keyfilter)(repo, key, wd->kfdata);
1938   return key->storage;
1939 }
1940
1941 int
1942 repodata_write(Repodata *data, FILE *fp, int (*keyfilter)(Repo *repo, Repokey *key, void *kfdata), void *kfdata)
1943 {
1944   struct repodata_write_data wd;
1945
1946   wd.keyfilter = keyfilter;
1947   wd.kfdata = kfdata;
1948   wd.repodataid = data - data->repo->repodata;
1949   return repo_write(data->repo, fp, repodata_write_keyfilter, &wd, 0);
1950 }