ext/repo_write.c

   1 /*
   2  * Copyright (c) 2007-2011, Novell Inc.
   3  *
   4  * This program is licensed under the BSD license, read LICENSE.BSD
   5  * for further information
   6  */
   7
   8 /*
   9  * repo_write.c
  10  *
  11  * Write Repo data out to a file in solv format
  12  *
  13  * See doc/README.format for a description
  14  * of the binary file format
  15  *
  16  */
  17
  18 #include <sys/types.h>
  19 #include <limits.h>
  20 #include <fcntl.h>
  21 #include <stdio.h>
  22 #include <stdlib.h>
  23 #include <string.h>
  24 #include <assert.h>
  25
  26 #include "pool.h"
  27 #include "util.h"
  28 #include "repo_write.h"
  29 #include "repopage.h"
  30
  31 /*------------------------------------------------------------------*/
  32 /* Id map optimizations */
  33
  34 typedef struct needid {
  35   Id need;
  36   Id map;
  37 } NeedId;
  38
  39
  40 #define RELOFF(id) (needid[0].map + GETRELID(id))
  41
  42 /*
  43  * increment need Id
  44  * idarray: array of Ids, ID_NULL terminated
  45  * needid: array of Id->NeedId
  46  *
  47  * return size of array (including trailing zero)
  48  *
  49  */
  50
  51 static void
  52 incneedid(Pool *pool, Id id, NeedId *needid)
  53 {
  54   while (ISRELDEP(id))
  55     {
  56       Reldep *rd = GETRELDEP(pool, id);
  57       needid[RELOFF(id)].need++;
  58       if (ISRELDEP(rd->evr))
  59         incneedid(pool, rd->evr, needid);
  60       else
  61         needid[rd->evr].need++;
  62       id = rd->name;
  63     }
  64   needid[id].need++;
  65 }
  66
  67 static int
  68 incneedidarray(Pool *pool, Id *idarray, NeedId *needid)
  69 {
  70   Id id;
  71   int n = 0;
  72
  73   if (!idarray)
  74     return 0;
  75   while ((id = *idarray++) != 0)
  76     {
  77       n++;
  78       while (ISRELDEP(id))
  79         {
  80           Reldep *rd = GETRELDEP(pool, id);
  81           needid[RELOFF(id)].need++;
  82           if (ISRELDEP(rd->evr))
  83             incneedid(pool, rd->evr, needid);
  84           else
  85             needid[rd->evr].need++;
  86           id = rd->name;
  87         }
  88       needid[id].need++;
  89     }
  90   return n + 1;
  91 }
  92
  93
  94 /*
  95  *
  96  */
  97
  98 static int
  99 needid_cmp_need(const void *ap, const void *bp, void *dp)
 100 {
 101   const NeedId *a = ap;
 102   const NeedId *b = bp;
 103   int r;
 104   r = b->need - a->need;
 105   if (r)
 106     return r;
 107   return a->map - b->map;
 108 }
 109
 110 static int
 111 needid_cmp_need_s(const void *ap, const void *bp, void *dp)
 112 {
 113   const NeedId *a = ap;
 114   const NeedId *b = bp;
 115   Stringpool *spool = dp;
 116
 117   int r;
 118   r = b->need - a->need;
 119   if (r)
 120     return r;
 121   const char *as = spool->stringspace + spool->strings[a->map];
 122   const char *bs = spool->stringspace + spool->strings[b->map];
 123   return strcmp(as, bs);
 124 }
 125
 126
 127 /*------------------------------------------------------------------*/
 128 /* output helper routines, used for writing the header */
 129 /* (the data itself is accumulated in memory and written with
 130  * write_blob) */
 131
 132 /*
 133  * unsigned 32-bit
 134  */
 135
 136 static void
 137 write_u32(FILE *fp, unsigned int x)
 138 {
 139   if (putc(x >> 24, fp) == EOF ||
 140       putc(x >> 16, fp) == EOF ||
 141       putc(x >> 8, fp) == EOF ||
 142       putc(x, fp) == EOF)
 143     {
 144       perror("write error u32");
 145       exit(1);
 146     }
 147 }
 148
 149
 150 /*
 151  * unsigned 8-bit
 152  */
 153
 154 static void
 155 write_u8(FILE *fp, unsigned int x)
 156 {
 157   if (putc(x, fp) == EOF)
 158     {
 159       perror("write error u8");
 160       exit(1);
 161     }
 162 }
 163
 164 /*
 165  * data blob
 166  */
 167
 168 static void
 169 write_blob(FILE *fp, void *data, int len)
 170 {
 171   if (len && fwrite(data, len, 1, fp) != 1)
 172     {
 173       perror("write error blob");
 174       exit(1);
 175     }
 176 }
 177
 178 /*
 179  * Id
 180  */
 181
 182 static void
 183 write_id(FILE *fp, Id x)
 184 {
 185   if (x >= (1 << 14))
 186     {
 187       if (x >= (1 << 28))
 188         putc((x >> 28) | 128, fp);
 189       if (x >= (1 << 21))
 190         putc((x >> 21) | 128, fp);
 191       putc((x >> 14) | 128, fp);
 192     }
 193   if (x >= (1 << 7))
 194     putc((x >> 7) | 128, fp);
 195   if (putc(x & 127, fp) == EOF)
 196     {
 197       perror("write error id");
 198       exit(1);
 199     }
 200 }
 201
 202 static inline void
 203 write_id_eof(FILE *fp, Id x, int eof)
 204 {
 205   if (x >= 64)
 206     x = (x & 63) | ((x & ~63) << 1);
 207   write_id(fp, x | (eof ? 0 : 64));
 208 }
 209
 210
 211
 212 static inline void
 213 write_str(FILE *fp, const char *str)
 214 {
 215   if (fputs(str, fp) == EOF || putc(0, fp) == EOF)
 216     {
 217       perror("write error str");
 218       exit(1);
 219     }
 220 }
 221
 222 /*
 223  * Array of Ids
 224  */
 225
 226 static void
 227 write_idarray(FILE *fp, Pool *pool, NeedId *needid, Id *ids)
 228 {
 229   Id id;
 230   if (!ids)
 231     return;
 232   if (!*ids)
 233     {
 234       write_u8(fp, 0);
 235       return;
 236     }
 237   for (;;)
 238     {
 239       id = *ids++;
 240       if (needid)
 241         id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
 242       if (id >= 64)
 243         id = (id & 63) | ((id & ~63) << 1);
 244       if (!*ids)
 245         {
 246           write_id(fp, id);
 247           return;
 248         }
 249       write_id(fp, id | 64);
 250     }
 251 }
 252
 253 static int
 254 cmp_ids(const void *pa, const void *pb, void *dp)
 255 {
 256   Id a = *(Id *)pa;
 257   Id b = *(Id *)pb;
 258   return a - b;
 259 }
 260
 261 #if 0
 262 static void
 263 write_idarray_sort(FILE *fp, Pool *pool, NeedId *needid, Id *ids, Id marker)
 264 {
 265   int len, i;
 266   Id lids[64], *sids;
 267
 268   if (!ids)
 269     return;
 270   if (!*ids)
 271     {
 272       write_u8(fp, 0);
 273       return;
 274     }
 275   for (len = 0; len < 64 && ids[len]; len++)
 276     {
 277       Id id = ids[len];
 278       if (needid)
 279         id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
 280       lids[len] = id;
 281     }
 282   if (ids[len])
 283     {
 284       for (i = len + 1; ids[i]; i++)
 285         ;
 286       sids = sat_malloc2(i, sizeof(Id));
 287       memcpy(sids, lids, 64 * sizeof(Id));
 288       for (; ids[len]; len++)
 289         {
 290           Id id = ids[len];
 291           if (needid)
 292             id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
 293           sids[len] = id;
 294         }
 295     }
 296   else
 297     sids = lids;
 298
 299   /* That bloody solvable:prereqmarker needs to stay in position :-(  */
 300   if (needid)
 301     marker = needid[marker].need;
 302   for (i = 0; i < len; i++)
 303     if (sids[i] == marker)
 304       break;
 305   if (i > 1)
 306     sat_sort(sids, i, sizeof(Id), cmp_ids, 0);
 307   if ((len - i) > 2)
 308     sat_sort(sids + i + 1, len - i - 1, sizeof(Id), cmp_ids, 0);
 309
 310   Id id, old = 0;
 311
 312   /* The differencing above produces many runs of ones and twos.  I tried
 313      fairly elaborate schemes to RLE those, but they give only very mediocre
 314      improvements in compression, as coding the escapes costs quite some
 315      space.  Even if they are coded only as bits in IDs.  The best improvement
 316      was about 2.7% for the whole .solv file.  It's probably better to
 317      invest some complexity into sharing idarrays, than RLEing.  */
 318   for (i = 0; i < len - 1; i++)
 319     {
 320       id = sids[i];
 321     /* Ugly PREREQ handling.  A "difference" of 0 is the prereq marker,
 322        hence all real differences are offsetted by 1.  Otherwise we would
 323        have to handle negative differences, which would cost code space for
 324        the encoding of the sign.  We loose the exact mapping of prereq here,
 325        but we know the result, so we can recover from that in the reader.  */
 326       if (id == marker)
 327         id = old = 0;
 328       else
 329         {
 330           id = id - old + 1;
 331           old = sids[i];
 332         }
 333       /* XXX If difference is zero we have multiple equal elements,
 334          we might want to skip writing them out.  */
 335       if (id >= 64)
 336         id = (id & 63) | ((id & ~63) << 1);
 337       write_id(fp, id | 64);
 338     }
 339   id = sids[i];
 340   if (id == marker)
 341     id = 0;
 342   else
 343     id = id - old + 1;
 344   if (id >= 64)
 345     id = (id & 63) | ((id & ~63) << 1);
 346   write_id(fp, id);
 347   if (sids != lids)
 348     sat_free(sids);
 349 }
 350 #endif
 351
 352
 353 struct extdata {
 354   unsigned char *buf;
 355   int len;
 356 };
 357
 358 struct cbdata {
 359   Repo *repo;
 360   Repodata *target;
 361
 362   Stringpool *ownspool;
 363   Dirpool *owndirpool;
 364
 365   Id *keymap;
 366   int nkeymap;
 367   Id *keymapstart;
 368
 369   NeedId *needid;
 370
 371   Id *schema;           /* schema construction space */
 372   Id *sp;               /* pointer in above */
 373   Id *oldschema, *oldsp;
 374
 375   Id *solvschemata;
 376   Id *subschemata;
 377   int nsubschemata;
 378   int current_sub;
 379
 380   struct extdata *extdata;
 381
 382   Id *dirused;
 383
 384   Id vstart;
 385
 386   Id maxdata;
 387   Id lastlen;
 388
 389   int doingsolvables;   /* working on solvables data */
 390 };
 391
 392 #define NEEDED_BLOCK 1023
 393 #define SCHEMATA_BLOCK 31
 394 #define SCHEMATADATA_BLOCK 255
 395 #define EXTDATA_BLOCK 4095
 396
 397 static inline void
 398 data_addid(struct extdata *xd, Id x)
 399 {
 400   unsigned char *dp;
 401   xd->buf = sat_extend(xd->buf, xd->len, 5, 1, EXTDATA_BLOCK);
 402   dp = xd->buf + xd->len;
 403
 404   if (x >= (1 << 14))
 405     {
 406       if (x >= (1 << 28))
 407         *dp++ = (x >> 28) | 128;
 408       if (x >= (1 << 21))
 409         *dp++ = (x >> 21) | 128;
 410       *dp++ = (x >> 14) | 128;
 411     }
 412   if (x >= (1 << 7))
 413     *dp++ = (x >> 7) | 128;
 414   *dp++ = x & 127;
 415   xd->len = dp - xd->buf;
 416 }
 417
 418 static inline void
 419 data_addideof(struct extdata *xd, Id x, int eof)
 420 {
 421   if (x >= 64)
 422     x = (x & 63) | ((x & ~63) << 1);
 423   data_addid(xd, (eof ? x: x | 64));
 424 }
 425
 426 static void
 427 data_addidarray_sort(struct extdata *xd, Pool *pool, NeedId *needid, Id *ids, Id marker)
 428 {
 429   int len, i;
 430   Id lids[64], *sids;
 431
 432   if (!ids)
 433     return;
 434   if (!*ids)
 435     {
 436       data_addid(xd, 0);
 437       return;
 438     }
 439   for (len = 0; len < 64 && ids[len]; len++)
 440     {
 441       Id id = ids[len];
 442       if (needid)
 443         id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
 444       lids[len] = id;
 445     }
 446   if (ids[len])
 447     {
 448       for (i = len + 1; ids[i]; i++)
 449         ;
 450       sids = sat_malloc2(i, sizeof(Id));
 451       memcpy(sids, lids, 64 * sizeof(Id));
 452       for (; ids[len]; len++)
 453         {
 454           Id id = ids[len];
 455           if (needid)
 456             id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
 457           sids[len] = id;
 458         }
 459     }
 460   else
 461     sids = lids;
 462
 463   /* That bloody solvable:prereqmarker needs to stay in position :-(  */
 464   if (needid)
 465     marker = needid[marker].need;
 466   for (i = 0; i < len; i++)
 467     if (sids[i] == marker)
 468       break;
 469   if (i > 1)
 470     sat_sort(sids, i, sizeof(Id), cmp_ids, 0);
 471   if ((len - i) > 2)
 472     sat_sort(sids + i + 1, len - i - 1, sizeof(Id), cmp_ids, 0);
 473
 474   Id id, old = 0;
 475
 476   /* The differencing above produces many runs of ones and twos.  I tried
 477      fairly elaborate schemes to RLE those, but they give only very mediocre
 478      improvements in compression, as coding the escapes costs quite some
 479      space.  Even if they are coded only as bits in IDs.  The best improvement
 480      was about 2.7% for the whole .solv file.  It's probably better to
 481      invest some complexity into sharing idarrays, than RLEing.  */
 482   for (i = 0; i < len - 1; i++)
 483     {
 484       id = sids[i];
 485     /* Ugly PREREQ handling.  A "difference" of 0 is the prereq marker,
 486        hence all real differences are offsetted by 1.  Otherwise we would
 487        have to handle negative differences, which would cost code space for
 488        the encoding of the sign.  We loose the exact mapping of prereq here,
 489        but we know the result, so we can recover from that in the reader.  */
 490       if (id == marker)
 491         id = old = 0;
 492       else
 493         {
 494           id = id - old + 1;
 495           old = sids[i];
 496         }
 497       /* XXX If difference is zero we have multiple equal elements,
 498          we might want to skip writing them out.  */
 499       if (id >= 64)
 500         id = (id & 63) | ((id & ~63) << 1);
 501       data_addid(xd, id | 64);
 502     }
 503   id = sids[i];
 504   if (id == marker)
 505     id = 0;
 506   else
 507     id = id - old + 1;
 508   if (id >= 64)
 509     id = (id & 63) | ((id & ~63) << 1);
 510   data_addid(xd, id);
 511   if (sids != lids)
 512     sat_free(sids);
 513 }
 514
 515 static inline void
 516 data_addblob(struct extdata *xd, unsigned char *blob, int len)
 517 {
 518   xd->buf = sat_extend(xd->buf, xd->len, len, 1, EXTDATA_BLOCK);
 519   memcpy(xd->buf + xd->len, blob, len);
 520   xd->len += len;
 521 }
 522
 523 static inline void
 524 data_addu32(struct extdata *xd, unsigned int num)
 525 {
 526   unsigned char d[4];
 527   d[0] = num >> 24;
 528   d[1] = num >> 16;
 529   d[2] = num >> 8;
 530   d[3] = num;
 531   data_addblob(xd, d, 4);
 532 }
 533
 534 static Id
 535 putinownpool(struct cbdata *cbdata, Stringpool *ss, Id id)
 536 {
 537   const char *str = stringpool_id2str(ss, id);
 538   id = stringpool_str2id(cbdata->ownspool, str, 1);
 539   if (id >= cbdata->needid[0].map)
 540     {
 541       int oldoff = cbdata->needid[0].map;
 542       int newoff = (id + 1 + NEEDED_BLOCK) & ~NEEDED_BLOCK;
 543       int nrels = cbdata->repo->pool->nrels;
 544       cbdata->needid = sat_realloc2(cbdata->needid, newoff + nrels, sizeof(NeedId));
 545       if (nrels)
 546         memmove(cbdata->needid + newoff, cbdata->needid + oldoff, nrels * sizeof(NeedId));
 547       memset(cbdata->needid + oldoff, 0, (newoff - oldoff) * sizeof(NeedId));
 548       cbdata->needid[0].map = newoff;
 549     }
 550   return id;
 551 }
 552
 553 static Id
 554 putinowndirpool(struct cbdata *cbdata, Repodata *data, Dirpool *dp, Id dir)
 555 {
 556   Id compid, parent;
 557
 558   parent = dirpool_parent(dp, dir);
 559   if (parent)
 560     parent = putinowndirpool(cbdata, data, dp, parent);
 561   compid = dp->dirs[dir];
 562   if (cbdata->ownspool && compid > 1)
 563     compid = putinownpool(cbdata, data->localpool ? &data->spool : &data->repo->pool->ss, compid);
 564   return dirpool_add_dir(cbdata->owndirpool, parent, compid, 1);
 565 }
 566
 567 /*
 568  * collect usage information about the dirs
 569  * 1: dir used, no child of dir used
 570  * 2: dir used as parent of another used dir
 571  */
 572 static inline void
 573 setdirused(struct cbdata *cbdata, Dirpool *dp, Id dir)
 574 {
 575   if (cbdata->dirused[dir])
 576     return;
 577   cbdata->dirused[dir] = 1;
 578   while ((dir = dirpool_parent(dp, dir)) != 0)
 579     {
 580       if (cbdata->dirused[dir] == 2)
 581         return;
 582       if (cbdata->dirused[dir])
 583         {
 584           cbdata->dirused[dir] = 2;
 585           return;
 586         }
 587       cbdata->dirused[dir] = 2;
 588     }
 589   cbdata->dirused[0] = 2;
 590 }
 591
 592 /*
 593  * pass 1 callback:
 594  * collect key/id/dirid usage information, create needed schemas
 595  */
 596 static int
 597 repo_write_collect_needed(struct cbdata *cbdata, Repo *repo, Repodata *data, Repokey *key, KeyValue *kv)
 598 {
 599   Id id;
 600   int rm;
 601
 602   if (key->name == REPOSITORY_SOLVABLES)
 603     return SEARCH_NEXT_KEY;     /* we do not want this one */
 604   if (data != data->repo->repodata + data->repo->nrepodata - 1)
 605     if (key->name == REPOSITORY_ADDEDFILEPROVIDES || key->name == REPOSITORY_EXTERNAL || key->name == REPOSITORY_LOCATION || key->name == REPOSITORY_KEYS || key->name == REPOSITORY_TOOLVERSION)
 606       return SEARCH_NEXT_KEY;
 607
 608   rm = cbdata->keymap[cbdata->keymapstart[data - data->repo->repodata] + (key - data->keys)];
 609   if (!rm)
 610     return SEARCH_NEXT_KEY;     /* we do not want this one */
 611
 612   /* record key in schema */
 613   if ((key->type != REPOKEY_TYPE_FIXARRAY || kv->eof == 0)
 614       && (cbdata->sp == cbdata->schema || cbdata->sp[-1] != rm))
 615     *cbdata->sp++ = rm;
 616
 617   switch(key->type)
 618     {
 619       case REPOKEY_TYPE_ID:
 620       case REPOKEY_TYPE_IDARRAY:
 621         id = kv->id;
 622         if (!ISRELDEP(id) && cbdata->ownspool && id > 1)
 623           id = putinownpool(cbdata, data->localpool ? &data->spool : &repo->pool->ss, id);
 624         incneedid(repo->pool, id, cbdata->needid);
 625         break;
 626       case REPOKEY_TYPE_DIR:
 627       case REPOKEY_TYPE_DIRNUMNUMARRAY:
 628       case REPOKEY_TYPE_DIRSTRARRAY:
 629         id = kv->id;
 630         if (cbdata->owndirpool)
 631           putinowndirpool(cbdata, data, &data->dirpool, id);
 632         else
 633           setdirused(cbdata, &data->dirpool, id);
 634         break;
 635       case REPOKEY_TYPE_FIXARRAY:
 636         if (kv->eof == 0)
 637           {
 638             if (cbdata->oldschema)
 639               {
 640                 fprintf(stderr, "nested structs not yet implemented\n");
 641                 exit(1);
 642               }
 643             cbdata->oldschema = cbdata->schema;
 644             cbdata->oldsp = cbdata->sp;
 645             cbdata->schema = sat_calloc(cbdata->target->nkeys, sizeof(Id));
 646             cbdata->sp = cbdata->schema;
 647           }
 648         else if (kv->eof == 1)
 649           {
 650             cbdata->current_sub++;
 651             *cbdata->sp = 0;
 652             cbdata->subschemata = sat_extend(cbdata->subschemata, cbdata->nsubschemata, 1, sizeof(Id), SCHEMATA_BLOCK);
 653             cbdata->subschemata[cbdata->nsubschemata++] = repodata_schema2id(cbdata->target, cbdata->schema, 1);
 654 #if 0
 655             fprintf(stderr, "Have schema %d\n", cbdata->subschemata[cbdata->nsubschemata-1]);
 656 #endif
 657             cbdata->sp = cbdata->schema;
 658           }
 659         else
 660           {
 661             sat_free(cbdata->schema);
 662             cbdata->schema = cbdata->oldschema;
 663             cbdata->sp = cbdata->oldsp;
 664             cbdata->oldsp = cbdata->oldschema = 0;
 665           }
 666         break;
 667       case REPOKEY_TYPE_FLEXARRAY:
 668         if (kv->entry == 0)
 669           {
 670             if (kv->eof != 2)
 671               *cbdata->sp++ = 0;        /* mark start */
 672           }
 673         else
 674           {
 675             /* just finished a schema, rewind */
 676             Id *sp = cbdata->sp - 1;
 677             *sp = 0;
 678             while (sp[-1])
 679               sp--;
 680             cbdata->subschemata = sat_extend(cbdata->subschemata, cbdata->nsubschemata, 1, sizeof(Id), SCHEMATA_BLOCK);
 681             cbdata->subschemata[cbdata->nsubschemata++] = repodata_schema2id(cbdata->target, sp, 1);
 682             cbdata->sp = kv->eof == 2 ? sp - 1: sp;
 683           }
 684         break;
 685       default:
 686         break;
 687     }
 688   return 0;
 689 }
 690
 691 static int
 692 repo_write_cb_needed(void *vcbdata, Solvable *s, Repodata *data, Repokey *key, KeyValue *kv)
 693 {
 694   struct cbdata *cbdata = vcbdata;
 695   Repo *repo = data->repo;
 696
 697 #if 0
 698   if (s)
 699     fprintf(stderr, "solvable %d (%s): key (%d)%s %d\n", s ? s - repo->pool->solvables : 0, s ? id2str(repo->pool, s->name) : "", key->name, id2str(repo->pool, key->name), key->type);
 700 #endif
 701   return repo_write_collect_needed(cbdata, repo, data, key, kv);
 702 }
 703
 704
 705 /*
 706  * pass 2 callback:
 707  * encode all of the data into the correct buffers
 708  */
 709
 710 static int
 711 repo_write_adddata(struct cbdata *cbdata, Repodata *data, Repokey *key, KeyValue *kv)
 712 {
 713   int rm;
 714   Id id;
 715   unsigned int u32;
 716   unsigned char v[4];
 717   struct extdata *xd;
 718   NeedId *needid;
 719
 720   if (key->name == REPOSITORY_SOLVABLES)
 721     return SEARCH_NEXT_KEY;
 722   if (data != data->repo->repodata + data->repo->nrepodata - 1)
 723     if (key->name == REPOSITORY_ADDEDFILEPROVIDES || key->name == REPOSITORY_EXTERNAL || key->name == REPOSITORY_LOCATION || key->name == REPOSITORY_KEYS || key->name == REPOSITORY_TOOLVERSION)
 724       return SEARCH_NEXT_KEY;
 725
 726   rm = cbdata->keymap[cbdata->keymapstart[data - data->repo->repodata] + (key - data->keys)];
 727   if (!rm)
 728     return SEARCH_NEXT_KEY;     /* we do not want this one */
 729
 730   if (cbdata->target->keys[rm].storage == KEY_STORAGE_VERTICAL_OFFSET)
 731     {
 732       xd = cbdata->extdata + rm;        /* vertical buffer */
 733       if (cbdata->vstart == -1)
 734         cbdata->vstart = xd->len;
 735     }
 736   else
 737     xd = cbdata->extdata + 0;           /* incore buffer */
 738   switch(key->type)
 739     {
 740       case REPOKEY_TYPE_VOID:
 741       case REPOKEY_TYPE_CONSTANT:
 742       case REPOKEY_TYPE_CONSTANTID:
 743         break;
 744       case REPOKEY_TYPE_ID:
 745         id = kv->id;
 746         if (!ISRELDEP(id) && cbdata->ownspool && id > 1)
 747           id = putinownpool(cbdata, data->localpool ? &data->spool : &data->repo->pool->ss, id);
 748         needid = cbdata->needid;
 749         id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
 750         data_addid(xd, id);
 751         break;
 752       case REPOKEY_TYPE_IDARRAY:
 753         id = kv->id;
 754         if (!ISRELDEP(id) && cbdata->ownspool && id > 1)
 755           id = putinownpool(cbdata, data->localpool ? &data->spool : &data->repo->pool->ss, id);
 756         needid = cbdata->needid;
 757         id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
 758         data_addideof(xd, id, kv->eof);
 759         break;
 760       case REPOKEY_TYPE_STR:
 761         data_addblob(xd, (unsigned char *)kv->str, strlen(kv->str) + 1);
 762         break;
 763       case REPOKEY_TYPE_MD5:
 764         data_addblob(xd, (unsigned char *)kv->str, SIZEOF_MD5);
 765         break;
 766       case REPOKEY_TYPE_SHA1:
 767         data_addblob(xd, (unsigned char *)kv->str, SIZEOF_SHA1);
 768         break;
 769       case REPOKEY_TYPE_SHA256:
 770         data_addblob(xd, (unsigned char *)kv->str, SIZEOF_SHA256);
 771         break;
 772       case REPOKEY_TYPE_U32:
 773         u32 = kv->num;
 774         v[0] = u32 >> 24;
 775         v[1] = u32 >> 16;
 776         v[2] = u32 >> 8;
 777         v[3] = u32;
 778         data_addblob(xd, v, 4);
 779         break;
 780       case REPOKEY_TYPE_NUM:
 781         data_addid(xd, kv->num);
 782         break;
 783       case REPOKEY_TYPE_DIR:
 784         id = kv->id;
 785         if (cbdata->owndirpool)
 786           id = putinowndirpool(cbdata, data, &data->dirpool, id);
 787         id = cbdata->dirused[id];
 788         data_addid(xd, id);
 789         break;
 790       case REPOKEY_TYPE_BINARY:
 791         data_addid(xd, kv->num);
 792         if (kv->num)
 793           data_addblob(xd, (unsigned char *)kv->str, kv->num);
 794         break;
 795       case REPOKEY_TYPE_DIRNUMNUMARRAY:
 796         id = kv->id;
 797         if (cbdata->owndirpool)
 798           id = putinowndirpool(cbdata, data, &data->dirpool, id);
 799         id = cbdata->dirused[id];
 800         data_addid(xd, id);
 801         data_addid(xd, kv->num);
 802         data_addideof(xd, kv->num2, kv->eof);
 803         break;
 804       case REPOKEY_TYPE_DIRSTRARRAY:
 805         id = kv->id;
 806         if (cbdata->owndirpool)
 807           id = putinowndirpool(cbdata, data, &data->dirpool, id);
 808         id = cbdata->dirused[id];
 809         data_addideof(xd, id, kv->eof);
 810         data_addblob(xd, (unsigned char *)kv->str, strlen(kv->str) + 1);
 811         break;
 812       case REPOKEY_TYPE_FIXARRAY:
 813         if (kv->eof == 0)
 814           {
 815             if (kv->num)
 816               {
 817                 data_addid(xd, kv->num);
 818                 data_addid(xd, cbdata->subschemata[cbdata->current_sub]);
 819 #if 0
 820                 fprintf(stderr, "writing %d %d\n", kv->num, cbdata->subschemata[cbdata->current_sub]);
 821 #endif
 822               }
 823           }
 824         else if (kv->eof == 1)
 825           {
 826             cbdata->current_sub++;
 827           }
 828         break;
 829       case REPOKEY_TYPE_FLEXARRAY:
 830         if (!kv->entry)
 831           data_addid(xd, kv->num);
 832         if (kv->eof != 2)
 833           data_addid(xd, cbdata->subschemata[cbdata->current_sub++]);
 834         if (xd == cbdata->extdata + 0 && !kv->parent && !cbdata->doingsolvables)
 835           {
 836             if (xd->len - cbdata->lastlen > cbdata->maxdata)
 837               cbdata->maxdata = xd->len - cbdata->lastlen;
 838             cbdata->lastlen = xd->len;
 839           }
 840         break;
 841       default:
 842         fprintf(stderr, "unknown type for %d: %d\n", key->name, key->type);
 843         exit(1);
 844     }
 845   if (cbdata->target->keys[rm].storage == KEY_STORAGE_VERTICAL_OFFSET && kv->eof)
 846     {
 847       /* we can re-use old data in the blob here! */
 848       data_addid(cbdata->extdata + 0, cbdata->vstart);                  /* add offset into incore data */
 849       data_addid(cbdata->extdata + 0, xd->len - cbdata->vstart);        /* add length into incore data */
 850       cbdata->vstart = -1;
 851     }
 852   return 0;
 853 }
 854
 855 static int
 856 repo_write_cb_adddata(void *vcbdata, Solvable *s, Repodata *data, Repokey *key, KeyValue *kv)
 857 {
 858   struct cbdata *cbdata = vcbdata;
 859   return repo_write_adddata(cbdata, data, key, kv);
 860 }
 861
 862 /* traverse through directory with first child "dir" */
 863 static int
 864 traverse_dirs(Dirpool *dp, Id *dirmap, Id n, Id dir, Id *used)
 865 {
 866   Id sib, child;
 867   Id parent, lastn;
 868
 869   parent = n;
 870   /* special case for '/', which has to come first */
 871   if (parent == 1)
 872     dirmap[n++] = 1;
 873   for (sib = dir; sib; sib = dirpool_sibling(dp, sib))
 874     {
 875       if (used && !used[sib])
 876         continue;
 877       if (sib == 1 && parent == 1)
 878         continue;       /* already did that one above */
 879       dirmap[n++] = sib;
 880     }
 881
 882   /* now go through all the siblings we just added and
 883    * do recursive calls on them */
 884   lastn = n;
 885   for (; parent < lastn; parent++)
 886     {
 887       sib = dirmap[parent];
 888       if (used && used[sib] != 2)       /* 2: used as parent */
 889         continue;
 890       child = dirpool_child(dp, sib);
 891       if (child)
 892         {
 893           dirmap[n++] = -parent;        /* start new block */
 894           n = traverse_dirs(dp, dirmap, n, child, used);
 895         }
 896     }
 897   return n;
 898 }
 899
 900 static void
 901 write_compressed_page(FILE *fp, unsigned char *page, int len)
 902 {
 903   int clen;
 904   unsigned char cpage[BLOB_PAGESIZE];
 905
 906   clen = repopagestore_compress_page(page, len, cpage, len - 1);
 907   if (!clen)
 908     {
 909       write_u32(fp, len * 2);
 910       write_blob(fp, page, len);
 911     }
 912   else
 913     {
 914       write_u32(fp, clen * 2 + 1);
 915       write_blob(fp, cpage, clen);
 916     }
 917 }
 918
 919 static Id verticals[] = {
 920   SOLVABLE_AUTHORS,
 921   SOLVABLE_DESCRIPTION,
 922   SOLVABLE_MESSAGEDEL,
 923   SOLVABLE_MESSAGEINS,
 924   SOLVABLE_EULA,
 925   SOLVABLE_DISKUSAGE,
 926   SOLVABLE_FILELIST,
 927   0
 928 };
 929
 930 static char *languagetags[] = {
 931   "solvable:summary:",
 932   "solvable:description:",
 933   "solvable:messageins:",
 934   "solvable:messagedel:",
 935   "solvable:eula:",
 936   0
 937 };
 938
 939 int
 940 repo_write_stdkeyfilter(Repo *repo, Repokey *key, void *kfdata)
 941 {
 942   const char *keyname;
 943   int i;
 944
 945   for (i = 0; verticals[i]; i++)
 946     if (key->name == verticals[i])
 947       return KEY_STORAGE_VERTICAL_OFFSET;
 948   keyname = id2str(repo->pool, key->name);
 949   for (i = 0; languagetags[i] != 0; i++)
 950     if (!strncmp(keyname, languagetags[i], strlen(languagetags[i])))
 951       return KEY_STORAGE_VERTICAL_OFFSET;
 952   return KEY_STORAGE_INCORE;
 953 }
 954
 955 /*
 956  * Repo
 957  */
 958
 959 /*
 960  * the code works the following way:
 961  *
 962  * 1) find which keys should be written
 963  * 2) collect usage information for keys/ids/dirids, create schema
 964  *    data
 965  * 3) use usage information to create mapping tables, so that often
 966  *    used ids get a lower number
 967  * 4) encode data into buffers using the mapping tables
 968  * 5) write everything to disk
 969  */
 970 void
 971 repo_write(Repo *repo, FILE *fp, int (*keyfilter)(Repo *repo, Repokey *key, void *kfdata), void *kfdata, Id **keyarrayp)
 972 {
 973   Pool *pool = repo->pool;
 974   int i, j, n;
 975   Solvable *s;
 976   NeedId *needid;
 977   int nstrings, nrels;
 978   unsigned int sizeid;
 979   unsigned int solv_flags;
 980   Reldep *ran;
 981   Id *idarraydata;
 982
 983   Id id, *sp;
 984
 985   Id *dirmap;
 986   int ndirmap;
 987   Id *keyused;
 988   unsigned char *repodataused;
 989   int anyrepodataused = 0;
 990   int anysolvableused = 0;
 991
 992   struct cbdata cbdata;
 993   int clonepool;
 994   Repokey *key;
 995   int poolusage, dirpoolusage, idused, dirused;
 996   int reloff;
 997
 998   Repodata *data, *dirpooldata;
 999
1000   Repodata target;
1001
1002   Stringpool *spool;
1003   Dirpool *dirpool;
1004
1005   Id mainschema;
1006
1007   struct extdata *xd;
1008
1009   Id type_constantid = REPOKEY_TYPE_CONSTANTID;
1010
1011
1012   memset(&cbdata, 0, sizeof(cbdata));
1013   cbdata.repo = repo;
1014   cbdata.target = &target;
1015
1016   repodata_initdata(&target, repo, 1);
1017
1018   /* go through all repodata and find the keys we need */
1019   /* also unify keys */
1020   /*          keymapstart - maps repo number to keymap offset */
1021   /*          keymap      - maps repo key to my key, 0 -> not used */
1022
1023   /* start with all KEY_STORAGE_SOLVABLE ids */
1024
1025   n = ID_NUM_INTERNAL;
1026   for (i = 0; i < repo->nrepodata; i++)
1027     n += repo->repodata[i].nkeys;
1028   cbdata.keymap = sat_calloc(n, sizeof(Id));
1029   cbdata.keymapstart = sat_calloc(repo->nrepodata, sizeof(Id));
1030   repodataused = sat_calloc(repo->nrepodata, 1);
1031
1032   clonepool = 0;
1033   poolusage = 0;
1034
1035   /* add keys for STORAGE_SOLVABLE */
1036   for (i = SOLVABLE_NAME; i <= RPM_RPMDBID; i++)
1037     {
1038       Repokey keyd;
1039       keyd.name = i;
1040       if (i < SOLVABLE_PROVIDES)
1041         keyd.type = REPOKEY_TYPE_ID;
1042       else if (i < RPM_RPMDBID)
1043         keyd.type = REPOKEY_TYPE_REL_IDARRAY;
1044       else
1045         keyd.type = REPOKEY_TYPE_U32;
1046       keyd.size = 0;
1047       keyd.storage = KEY_STORAGE_SOLVABLE;
1048       if (keyfilter)
1049         {
1050           keyd.storage = keyfilter(repo, &keyd, kfdata);
1051           if (keyd.storage == KEY_STORAGE_DROPPED)
1052             continue;
1053           keyd.storage = KEY_STORAGE_SOLVABLE;
1054         }
1055       poolusage = 1;
1056       clonepool = 1;
1057       cbdata.keymap[keyd.name] = repodata_key2id(&target, &keyd, 1);
1058     }
1059
1060   if (repo->nsolvables)
1061     {
1062       Repokey keyd;
1063       keyd.name = REPOSITORY_SOLVABLES;
1064       keyd.type = REPOKEY_TYPE_FLEXARRAY;
1065       keyd.size = 0;
1066       keyd.storage = KEY_STORAGE_INCORE;
1067       cbdata.keymap[keyd.name] = repodata_key2id(&target, &keyd, 1);
1068     }
1069
1070   dirpoolusage = 0;
1071
1072   spool = 0;
1073   dirpool = 0;
1074   dirpooldata = 0;
1075   n = ID_NUM_INTERNAL;
1076   for (i = 0; i < repo->nrepodata; i++)
1077     {
1078       data = repo->repodata + i;
1079       cbdata.keymapstart[i] = n;
1080       cbdata.keymap[n++] = 0;   /* key 0 */
1081       idused = 0;
1082       dirused = 0;
1083       if (keyfilter)
1084         {
1085           Repokey keyd;
1086           /* check if we want this repodata */
1087           memset(&keyd, 0, sizeof(keyd));
1088           keyd.name = 1;
1089           keyd.type = 1;
1090           keyd.size = i;
1091           if (keyfilter(repo, &keyd, kfdata) == -1)
1092             continue;
1093         }
1094       for (j = 1; j < data->nkeys; j++, n++)
1095         {
1096           key = data->keys + j;
1097           if (key->name == REPOSITORY_SOLVABLES && key->type == REPOKEY_TYPE_FLEXARRAY)
1098             {
1099               cbdata.keymap[n] = cbdata.keymap[key->name];
1100               continue;
1101             }
1102           if (key->type == REPOKEY_TYPE_DELETED)
1103             {
1104               cbdata.keymap[n] = 0;
1105               continue;
1106             }
1107           if (key->type == REPOKEY_TYPE_CONSTANTID && data->localpool)
1108             {
1109               Repokey keyd = *key;
1110               keyd.size = repodata_globalize_id(data, key->size, 1);
1111               id = repodata_key2id(&target, &keyd, 0);
1112             }
1113           else
1114             id = repodata_key2id(&target, key, 0);
1115           if (!id)
1116             {
1117               Repokey keyd = *key;
1118               keyd.storage = KEY_STORAGE_INCORE;
1119               if (keyd.type == REPOKEY_TYPE_CONSTANTID)
1120                 keyd.size = repodata_globalize_id(data, key->size, 1);
1121               else if (keyd.type != REPOKEY_TYPE_CONSTANT)
1122                 keyd.size = 0;
1123               if (keyfilter)
1124                 {
1125                   keyd.storage = keyfilter(repo, &keyd, kfdata);
1126                   if (keyd.storage == KEY_STORAGE_DROPPED)
1127                     {
1128                       cbdata.keymap[n] = 0;
1129                       continue;
1130                     }
1131                 }
1132               id = repodata_key2id(&target, &keyd, 1);
1133             }
1134           cbdata.keymap[n] = id;
1135           /* load repodata if not already loaded */
1136           if (data->state == REPODATA_STUB)
1137             {
1138               if (data->loadcallback)
1139                 data->loadcallback(data);
1140               else
1141                 data->state = REPODATA_ERROR;
1142               if (data->state != REPODATA_ERROR)
1143                 {
1144                   /* redo this repodata! */
1145                   j = 0;
1146                   n = cbdata.keymapstart[i];
1147                   continue;
1148                 }
1149             }
1150           if (data->state == REPODATA_ERROR)
1151             {
1152               /* too bad! */
1153               cbdata.keymap[n] = 0;
1154               continue;
1155             }
1156
1157           repodataused[i] = 1;
1158           anyrepodataused = 1;
1159           if (key->type == REPOKEY_TYPE_CONSTANTID || key->type == REPOKEY_TYPE_ID ||
1160               key->type == REPOKEY_TYPE_IDARRAY || key->type == REPOKEY_TYPE_REL_IDARRAY)
1161             idused = 1;
1162           else if (key->type == REPOKEY_TYPE_DIR || key->type == REPOKEY_TYPE_DIRNUMNUMARRAY || key->type == REPOKEY_TYPE_DIRSTRARRAY)
1163             {
1164               idused = 1;       /* dirs also use ids */
1165               dirused = 1;
1166             }
1167         }
1168       if (idused)
1169         {
1170           if (data->localpool)
1171             {
1172               if (poolusage)
1173                 poolusage = 3;  /* need own pool */
1174               else
1175                 {
1176                   poolusage = 2;
1177                   spool = &data->spool;
1178                 }
1179             }
1180           else
1181             {
1182               if (poolusage == 0)
1183                 poolusage = 1;
1184               else if (poolusage != 1)
1185                 poolusage = 3;  /* need own pool */
1186             }
1187         }
1188       if (dirused)
1189         {
1190           if (dirpoolusage)
1191             dirpoolusage = 3;   /* need own dirpool */
1192           else
1193             {
1194               dirpoolusage = 2;
1195               dirpool = &data->dirpool;
1196               dirpooldata = data;
1197             }
1198         }
1199     }
1200   cbdata.nkeymap = n;
1201
1202   /* 0: no pool needed at all */
1203   /* 1: use global pool */
1204   /* 2: use repodata local pool */
1205   /* 3: need own pool */
1206   if (poolusage == 3)
1207     {
1208       spool = &target.spool;
1209       /* hack: reuse global pool data so we don't have to map pool ids */
1210       if (clonepool)
1211         {
1212           stringpool_free(spool);
1213           stringpool_clone(spool, &pool->ss);
1214         }
1215       cbdata.ownspool = spool;
1216     }
1217   else if (poolusage == 0 || poolusage == 1)
1218     {
1219       poolusage = 1;
1220       spool = &pool->ss;
1221     }
1222
1223   if (dirpoolusage == 3)
1224     {
1225       dirpool = &target.dirpool;
1226       dirpooldata = 0;
1227       cbdata.owndirpool = dirpool;
1228     }
1229   else if (dirpool)
1230     cbdata.dirused = sat_calloc(dirpool->ndirs, sizeof(Id));
1231
1232
1233 /********************************************************************/
1234 #if 0
1235 fprintf(stderr, "poolusage: %d\n", poolusage);
1236 fprintf(stderr, "dirpoolusage: %d\n", dirpoolusage);
1237 fprintf(stderr, "nkeys: %d\n", target.nkeys);
1238 for (i = 1; i < target.nkeys; i++)
1239   fprintf(stderr, "  %2d: %s[%d] %d %d %d\n", i, id2str(pool, target.keys[i].name), target.keys[i].name, target.keys[i].type, target.keys[i].size, target.keys[i].storage);
1240 #endif
1241
1242   /* copy keys if requested */
1243   if (keyarrayp)
1244     {
1245       *keyarrayp = sat_calloc(2 * target.nkeys + 1, sizeof(Id));
1246       for (i = 1; i < target.nkeys; i++)
1247         {
1248           (*keyarrayp)[2 * i - 2] = target.keys[i].name;
1249           (*keyarrayp)[2 * i - 1] = target.keys[i].type;
1250         }
1251     }
1252
1253   if (poolusage > 1)
1254     {
1255       /* put all the keys we need in our string pool */
1256       /* put mapped ids right into target.keys */
1257       for (i = 1, key = target.keys + i; i < target.nkeys; i++, key++)
1258         {
1259           key->name = stringpool_str2id(spool, id2str(pool, key->name), 1);
1260           if (key->type == REPOKEY_TYPE_CONSTANTID)
1261             {
1262               key->type = stringpool_str2id(spool, id2str(pool, key->type), 1);
1263               type_constantid = key->type;
1264               key->size = stringpool_str2id(spool, id2str(pool, key->size), 1);
1265             }
1266           else
1267             key->type = stringpool_str2id(spool, id2str(pool, key->type), 1);
1268         }
1269       if (poolusage == 2)
1270         stringpool_freehash(spool);     /* free some mem */
1271     }
1272
1273
1274 /********************************************************************/
1275
1276   /* set needed count of all strings and rels,
1277    * find which keys are used in the solvables
1278    * put all strings in own spool
1279    */
1280
1281   reloff = spool->nstrings;
1282   if (poolusage == 3)
1283     reloff = (reloff + NEEDED_BLOCK) & ~NEEDED_BLOCK;
1284
1285   needid = calloc(reloff + pool->nrels, sizeof(*needid));
1286   needid[0].map = reloff;
1287
1288   cbdata.needid = needid;
1289   cbdata.schema = sat_calloc(target.nkeys, sizeof(Id));
1290   cbdata.sp = cbdata.schema;
1291   cbdata.solvschemata = sat_calloc(repo->nsolvables, sizeof(Id));
1292
1293   /* create main schema */
1294   cbdata.sp = cbdata.schema;
1295   /* collect all other data from all repodatas */
1296   /* XXX: merge arrays of equal keys? */
1297   for (j = 0, data = repo->repodata; j < repo->nrepodata; j++, data++)
1298     {
1299       if (!repodataused[j])
1300         continue;
1301       repodata_search(data, SOLVID_META, 0, SEARCH_SUB|SEARCH_ARRAYSENTINEL, repo_write_cb_needed, &cbdata);
1302     }
1303   sp = cbdata.sp;
1304   /* add solvables if needed (may revert later) */
1305   if (repo->nsolvables)
1306     {
1307       *sp++ = cbdata.keymap[REPOSITORY_SOLVABLES];
1308       target.keys[cbdata.keymap[REPOSITORY_SOLVABLES]].size++;
1309     }
1310   *sp = 0;
1311   mainschema = repodata_schema2id(cbdata.target, cbdata.schema, 1);
1312
1313   idarraydata = repo->idarraydata;
1314
1315   anysolvableused = 0;
1316   cbdata.doingsolvables = 1;
1317   for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
1318     {
1319       if (s->repo != repo)
1320         continue;
1321
1322       /* set schema info, keep in sync with further down */
1323       sp = cbdata.schema;
1324       if (cbdata.keymap[SOLVABLE_NAME])
1325         {
1326           *sp++ = cbdata.keymap[SOLVABLE_NAME];
1327           needid[s->name].need++;
1328         }
1329       if (cbdata.keymap[SOLVABLE_ARCH])
1330         {
1331           *sp++ = cbdata.keymap[SOLVABLE_ARCH];
1332           needid[s->arch].need++;
1333         }
1334       if (cbdata.keymap[SOLVABLE_EVR])
1335         {
1336           *sp++ = cbdata.keymap[SOLVABLE_EVR];
1337           needid[s->evr].need++;
1338         }
1339       if (s->vendor && cbdata.keymap[SOLVABLE_VENDOR])
1340         {
1341           *sp++ = cbdata.keymap[SOLVABLE_VENDOR];
1342           needid[s->vendor].need++;
1343         }
1344       if (s->provides && cbdata.keymap[SOLVABLE_PROVIDES])
1345         {
1346           *sp++ = cbdata.keymap[SOLVABLE_PROVIDES];
1347           target.keys[cbdata.keymap[SOLVABLE_PROVIDES]].size += incneedidarray(pool, idarraydata + s->provides, needid);
1348         }
1349       if (s->obsoletes && cbdata.keymap[SOLVABLE_OBSOLETES])
1350         {
1351           *sp++ = cbdata.keymap[SOLVABLE_OBSOLETES];
1352           target.keys[cbdata.keymap[SOLVABLE_OBSOLETES]].size += incneedidarray(pool, idarraydata + s->obsoletes, needid);
1353         }
1354       if (s->conflicts && cbdata.keymap[SOLVABLE_CONFLICTS])
1355         {
1356           *sp++ = cbdata.keymap[SOLVABLE_CONFLICTS];
1357           target.keys[cbdata.keymap[SOLVABLE_CONFLICTS]].size += incneedidarray(pool, idarraydata + s->conflicts, needid);
1358         }
1359       if (s->requires && cbdata.keymap[SOLVABLE_REQUIRES])
1360         {
1361           *sp++ = cbdata.keymap[SOLVABLE_REQUIRES];
1362           target.keys[cbdata.keymap[SOLVABLE_REQUIRES]].size += incneedidarray(pool, idarraydata + s->requires, needid);
1363         }
1364       if (s->recommends && cbdata.keymap[SOLVABLE_RECOMMENDS])
1365         {
1366           *sp++ = cbdata.keymap[SOLVABLE_RECOMMENDS];
1367           target.keys[cbdata.keymap[SOLVABLE_RECOMMENDS]].size += incneedidarray(pool, idarraydata + s->recommends, needid);
1368         }
1369       if (s->suggests && cbdata.keymap[SOLVABLE_SUGGESTS])
1370         {
1371           *sp++ = cbdata.keymap[SOLVABLE_SUGGESTS];
1372           target.keys[cbdata.keymap[SOLVABLE_SUGGESTS]].size += incneedidarray(pool, idarraydata + s->suggests, needid);
1373         }
1374       if (s->supplements && cbdata.keymap[SOLVABLE_SUPPLEMENTS])
1375         {
1376           *sp++ = cbdata.keymap[SOLVABLE_SUPPLEMENTS];
1377           target.keys[cbdata.keymap[SOLVABLE_SUPPLEMENTS]].size += incneedidarray(pool, idarraydata + s->supplements, needid);
1378         }
1379       if (s->enhances && cbdata.keymap[SOLVABLE_ENHANCES])
1380         {
1381           *sp++ = cbdata.keymap[SOLVABLE_ENHANCES];
1382           target.keys[cbdata.keymap[SOLVABLE_ENHANCES]].size += incneedidarray(pool, idarraydata + s->enhances, needid);
1383         }
1384       if (repo->rpmdbid && cbdata.keymap[RPM_RPMDBID])
1385         {
1386           *sp++ = cbdata.keymap[RPM_RPMDBID];
1387           target.keys[cbdata.keymap[RPM_RPMDBID]].size++;
1388         }
1389       cbdata.sp = sp;
1390
1391       if (anyrepodataused)
1392         {
1393           for (j = 0, data = repo->repodata; j < repo->nrepodata; j++, data++)
1394             {
1395               if (!repodataused[j])
1396                 continue;
1397               if (i < data->start || i >= data->end)
1398                 continue;
1399               repodata_search(data, i, 0, SEARCH_SUB|SEARCH_ARRAYSENTINEL, repo_write_cb_needed, &cbdata);
1400               needid = cbdata.needid;
1401             }
1402         }
1403       *cbdata.sp = 0;
1404       cbdata.solvschemata[n] = repodata_schema2id(cbdata.target, cbdata.schema, 1);
1405       if (cbdata.solvschemata[n])
1406         anysolvableused = 1;
1407       n++;
1408     }
1409   cbdata.doingsolvables = 0;
1410   assert(n == repo->nsolvables);
1411
1412   if (repo->nsolvables && !anysolvableused)
1413     {
1414       /* strip off solvable from the main schema */
1415       target.keys[cbdata.keymap[REPOSITORY_SOLVABLES]].size = 0;
1416       sp = cbdata.schema;
1417       for (i = 0; target.schemadata[target.schemata[mainschema] + i]; i++)
1418         {
1419           *sp = target.schemadata[target.schemata[mainschema] + i];
1420           if (*sp != cbdata.keymap[REPOSITORY_SOLVABLES])
1421             sp++;
1422         }
1423       assert(target.schemadatalen == target.schemata[mainschema] + i + 1);
1424       *sp = 0;
1425       target.schemadatalen = target.schemata[mainschema];
1426       target.nschemata--;
1427       repodata_free_schemahash(&target);
1428       mainschema = repodata_schema2id(cbdata.target, cbdata.schema, 1);
1429     }
1430
1431 /********************************************************************/
1432
1433   /* remove unused keys */
1434   keyused = sat_calloc(target.nkeys, sizeof(Id));
1435   for (i = 1; i < target.schemadatalen; i++)
1436     keyused[target.schemadata[i]] = 1;
1437   keyused[0] = 0;
1438   for (n = i = 1; i < target.nkeys; i++)
1439     {
1440       if (!keyused[i])
1441         continue;
1442       keyused[i] = n;
1443       if (i != n)
1444         {
1445           target.keys[n] = target.keys[i];
1446           if (keyarrayp)
1447             {
1448               *keyarrayp[2 * n - 2] = *keyarrayp[2 * i - 2];
1449               *keyarrayp[2 * n - 1] = *keyarrayp[2 * i - 1];
1450             }
1451         }
1452       n++;
1453     }
1454   target.nkeys = n;
1455   if (keyarrayp)
1456     {
1457       /* terminate array */
1458       *keyarrayp[2 * n - 2] = 0;
1459       *keyarrayp[2 * n - 1] = 0;
1460     }
1461
1462   /* update schema data to the new key ids */
1463   for (i = 1; i < target.schemadatalen; i++)
1464     target.schemadata[i] = keyused[target.schemadata[i]];
1465   /* update keymap to the new key ids */
1466   for (i = 0; i < cbdata.nkeymap; i++)
1467     cbdata.keymap[i] = keyused[cbdata.keymap[i]];
1468   keyused = sat_free(keyused);
1469
1470   /* increment needid of the used keys, they are already mapped to
1471    * the correct string pool  */
1472   for (i = 1; i < target.nkeys; i++)
1473     {
1474       if (target.keys[i].type == type_constantid)
1475         needid[target.keys[i].size].need++;
1476       needid[target.keys[i].name].need++;
1477       needid[target.keys[i].type].need++;
1478     }
1479
1480 /********************************************************************/
1481
1482   if (dirpool && cbdata.dirused && !cbdata.dirused[0])
1483     {
1484       /* no dirs used at all */
1485       cbdata.dirused = sat_free(cbdata.dirused);
1486       dirpool = 0;
1487     }
1488
1489   /* increment need id for used dir components */
1490   if (dirpool)
1491     {
1492       /* if we have own dirpool, all entries in it are used.
1493          also, all comp ids are already mapped by putinowndirpool(),
1494          so we can simply increment needid.
1495          (owndirpool != 0, dirused == 0, dirpooldata == 0) */
1496       /* else we re-use a dirpool of repodata "dirpooldata".
1497          dirused tells us which of the ids are used.
1498          we need to map comp ids if we generate a new pool.
1499          (owndirpool == 0, dirused != 0, dirpooldata != 0) */
1500       for (i = 1; i < dirpool->ndirs; i++)
1501         {
1502 #if 0
1503 fprintf(stderr, "dir %d used %d\n", i, cbdata.dirused ? cbdata.dirused[i] : 1);
1504 #endif
1505           if (cbdata.dirused && !cbdata.dirused[i])
1506             continue;
1507           id = dirpool->dirs[i];
1508           if (id <= 0)
1509             continue;
1510           if (dirpooldata && cbdata.ownspool && id > 1)
1511             {
1512               id = putinownpool(&cbdata, dirpooldata->localpool ? &dirpooldata->spool : &pool->ss, id);
1513               needid = cbdata.needid;
1514             }
1515           needid[id].need++;
1516         }
1517     }
1518
1519
1520 /********************************************************************/
1521
1522   /*
1523    * create mapping table, new keys are sorted by needid[].need
1524    *
1525    * needid[key].need : old key -> new key
1526    * needid[key].map  : new key -> old key
1527    */
1528
1529   /* zero out id 0 and rel 0 just in case */
1530   reloff = needid[0].map;
1531   needid[0].need = 0;
1532   needid[reloff].need = 0;
1533
1534   for (i = 1; i < reloff + pool->nrels; i++)
1535     needid[i].map = i;
1536
1537 #if 0
1538   sat_sort(needid + 1, spool->nstrings - 1, sizeof(*needid), needid_cmp_need_s, spool);
1539 #else
1540   /* make first entry '' */
1541   needid[1].need = 1;
1542   sat_sort(needid + 2, spool->nstrings - 2, sizeof(*needid), needid_cmp_need_s, spool);
1543 #endif
1544   sat_sort(needid + reloff, pool->nrels, sizeof(*needid), needid_cmp_need, 0);
1545   /* now needid is in new order, needid[newid].map -> oldid */
1546
1547   /* calculate string space size, also zero out needid[].need */
1548   sizeid = 0;
1549   for (i = 1; i < reloff; i++)
1550     {
1551       if (!needid[i].need)
1552         break;  /* as we have sorted, every entry after this also has need == 0 */
1553       needid[i].need = 0;
1554       sizeid += strlen(spool->stringspace + spool->strings[needid[i].map]) + 1;
1555     }
1556   nstrings = i; /* our new string id end */
1557
1558   /* make needid[oldid].need point to newid */
1559   for (i = 1; i < nstrings; i++)
1560     needid[needid[i].map].need = i;
1561
1562   /* same as above for relations */
1563   for (i = 0; i < pool->nrels; i++)
1564     {
1565       if (!needid[reloff + i].need)
1566         break;
1567       needid[reloff + i].need = 0;
1568     }
1569   nrels = i;    /* our new rel id end */
1570
1571   for (i = 0; i < nrels; i++)
1572     needid[needid[reloff + i].map].need = nstrings + i;
1573
1574   /* now we have: needid[oldid].need -> newid
1575                   needid[newid].map  -> oldid
1576      both for strings and relations  */
1577
1578
1579 /********************************************************************/
1580
1581   ndirmap = 0;
1582   dirmap = 0;
1583   if (dirpool)
1584     {
1585       /* create our new target directory structure by traversing through all
1586        * used dirs. This will concatenate blocks with the same parent
1587        * directory into single blocks.
1588        * Instead of components, traverse_dirs stores the old dirids,
1589        * we will change this in the second step below */
1590       /* (dirpooldata and dirused are 0 if we have our own dirpool) */
1591       if (cbdata.dirused && !cbdata.dirused[1])
1592         cbdata.dirused[1] = 1;  /* always want / entry */
1593       dirmap = sat_calloc(dirpool->ndirs, sizeof(Id));
1594       dirmap[0] = 0;
1595       ndirmap = traverse_dirs(dirpool, dirmap, 1, dirpool_child(dirpool, 0), cbdata.dirused);
1596
1597       /* (re)create dirused, so that it maps from "old dirid" to "new dirid" */
1598       /* change dirmap so that it maps from "new dirid" to "new compid" */
1599       if (!cbdata.dirused)
1600         cbdata.dirused = sat_malloc2(dirpool->ndirs, sizeof(Id));
1601       memset(cbdata.dirused, 0, dirpool->ndirs * sizeof(Id));
1602       for (i = 1; i < ndirmap; i++)
1603         {
1604           if (dirmap[i] <= 0)
1605             continue;
1606           cbdata.dirused[dirmap[i]] = i;
1607           id = dirpool->dirs[dirmap[i]];
1608           if (dirpooldata && cbdata.ownspool && id > 1)
1609             id = putinownpool(&cbdata, dirpooldata->localpool ? &dirpooldata->spool : &pool->ss, id);
1610           dirmap[i] = needid[id].need;
1611         }
1612       /* now the new target directory structure is complete (dirmap), and we have
1613        * dirused[olddirid] -> newdirid */
1614     }
1615
1616 /********************************************************************/
1617
1618   /* collect all data
1619    * we use extdata[0] for incore data and extdata[keyid] for vertical data
1620    */
1621
1622   cbdata.extdata = sat_calloc(target.nkeys, sizeof(struct extdata));
1623
1624   xd = cbdata.extdata;
1625   cbdata.current_sub = 0;
1626   /* add main schema */
1627   cbdata.lastlen = 0;
1628   data_addid(xd, mainschema);
1629
1630 #if 1
1631   for (j = 0, data = repo->repodata; j < repo->nrepodata; j++, data++)
1632     {
1633       if (!repodataused[j])
1634         continue;
1635       repodata_search(data, SOLVID_META, 0, SEARCH_SUB|SEARCH_ARRAYSENTINEL, repo_write_cb_adddata, &cbdata);
1636     }
1637 #endif
1638
1639   if (xd->len - cbdata.lastlen > cbdata.maxdata)
1640     cbdata.maxdata = xd->len - cbdata.lastlen;
1641   cbdata.lastlen = xd->len;
1642
1643   if (anysolvableused)
1644     {
1645       data_addid(xd, repo->nsolvables); /* FLEXARRAY nentries */
1646       cbdata.doingsolvables = 1;
1647       for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
1648         {
1649           if (s->repo != repo)
1650             continue;
1651           data_addid(xd, cbdata.solvschemata[n]);
1652           if (cbdata.keymap[SOLVABLE_NAME])
1653             data_addid(xd, needid[s->name].need);
1654           if (cbdata.keymap[SOLVABLE_ARCH])
1655             data_addid(xd, needid[s->arch].need);
1656           if (cbdata.keymap[SOLVABLE_EVR])
1657             data_addid(xd, needid[s->evr].need);
1658           if (s->vendor && cbdata.keymap[SOLVABLE_VENDOR])
1659             data_addid(xd, needid[s->vendor].need);
1660           if (s->provides && cbdata.keymap[SOLVABLE_PROVIDES])
1661             data_addidarray_sort(xd, pool, needid, idarraydata + s->provides, SOLVABLE_FILEMARKER);
1662           if (s->obsoletes && cbdata.keymap[SOLVABLE_OBSOLETES])
1663             data_addidarray_sort(xd, pool, needid, idarraydata + s->obsoletes, 0);
1664           if (s->conflicts && cbdata.keymap[SOLVABLE_CONFLICTS])
1665             data_addidarray_sort(xd, pool, needid, idarraydata + s->conflicts, 0);
1666           if (s->requires && cbdata.keymap[SOLVABLE_REQUIRES])
1667             data_addidarray_sort(xd, pool, needid, idarraydata + s->requires, SOLVABLE_PREREQMARKER);
1668           if (s->recommends && cbdata.keymap[SOLVABLE_RECOMMENDS])
1669             data_addidarray_sort(xd, pool, needid, idarraydata + s->recommends, 0);
1670           if (s->suggests && cbdata.keymap[SOLVABLE_SUGGESTS])
1671             data_addidarray_sort(xd, pool, needid, idarraydata + s->suggests, 0);
1672           if (s->supplements && cbdata.keymap[SOLVABLE_SUPPLEMENTS])
1673             data_addidarray_sort(xd, pool, needid, idarraydata + s->supplements, 0);
1674           if (s->enhances && cbdata.keymap[SOLVABLE_ENHANCES])
1675             data_addidarray_sort(xd, pool, needid, idarraydata + s->enhances, 0);
1676           if (repo->rpmdbid && cbdata.keymap[RPM_RPMDBID])
1677             data_addu32(xd, repo->rpmdbid[i - repo->start]);
1678           if (anyrepodataused)
1679             {
1680               cbdata.vstart = -1;
1681               for (j = 0, data = repo->repodata; j < repo->nrepodata; j++, data++)
1682                 {
1683                   if (!repodataused[j])
1684                     continue;
1685                   if (i < data->start || i >= data->end)
1686                     continue;
1687                   repodata_search(data, i, 0, SEARCH_SUB|SEARCH_ARRAYSENTINEL, repo_write_cb_adddata, &cbdata);
1688                 }
1689             }
1690           if (xd->len - cbdata.lastlen > cbdata.maxdata)
1691             cbdata.maxdata = xd->len - cbdata.lastlen;
1692           cbdata.lastlen = xd->len;
1693           n++;
1694         }
1695       cbdata.doingsolvables = 0;
1696     }
1697
1698   assert(cbdata.current_sub == cbdata.nsubschemata);
1699   if (cbdata.subschemata)
1700     {
1701       cbdata.subschemata = sat_free(cbdata.subschemata);
1702       cbdata.nsubschemata = 0;
1703     }
1704
1705 /********************************************************************/
1706
1707   /* write header */
1708
1709   /* write file header */
1710   write_u32(fp, 'S' << 24 | 'O' << 16 | 'L' << 8 | 'V');
1711   write_u32(fp, SOLV_VERSION_8);
1712
1713
1714   /* write counts */
1715   write_u32(fp, nstrings);
1716   write_u32(fp, nrels);
1717   write_u32(fp, ndirmap);
1718   write_u32(fp, anysolvableused ? repo->nsolvables : 0);
1719   write_u32(fp, target.nkeys);
1720   write_u32(fp, target.nschemata);
1721   solv_flags = 0;
1722   solv_flags |= SOLV_FLAG_PREFIX_POOL;
1723   write_u32(fp, solv_flags);
1724
1725   /*
1726    * calculate prefix encoding of the strings
1727    */
1728   unsigned char *prefixcomp = sat_malloc(nstrings);
1729   unsigned int compsum = 0;
1730   char *old_str = "";
1731
1732   prefixcomp[0] = 0;
1733   for (i = 1; i < nstrings; i++)
1734     {
1735       char *str = spool->stringspace + spool->strings[needid[i].map];
1736       int same;
1737       for (same = 0; same < 255; same++)
1738         if (!old_str[same] || old_str[same] != str[same])
1739           break;
1740       prefixcomp[i] = same;
1741       compsum += same;
1742       old_str = str;
1743     }
1744
1745   /*
1746    * write strings
1747    */
1748   write_u32(fp, sizeid);
1749   /* we save compsum bytes but need 1 extra byte for every string */
1750   write_u32(fp, sizeid + (nstrings ? nstrings - 1 : 0) - compsum);
1751   if (sizeid + (nstrings ? nstrings - 1 : 0) != compsum)
1752     {
1753       for (i = 1; i < nstrings; i++)
1754         {
1755           char *str = spool->stringspace + spool->strings[needid[i].map];
1756           write_u8(fp, prefixcomp[i]);
1757           write_str(fp, str + prefixcomp[i]);
1758         }
1759     }
1760   sat_free(prefixcomp);
1761
1762 #if 0
1763   /* Build the prefix-encoding of the string pool.  We need to know
1764      the size of that before writing it to the file, so we have to
1765      build a separate buffer for that.  As it's temporarily possible
1766      that this actually is an expansion we can't easily reuse the
1767      stringspace for this.  The max expansion per string is 1 byte,
1768      so it will fit into sizeid+nstrings bytes.  */
1769   char *prefix = sat_malloc(sizeid + nstrings);
1770   char *pp = prefix;
1771   char *old_str = "";
1772   for (i = 1; i < nstrings; i++)
1773     {
1774       char *str = spool->stringspace + spool->strings[needid[i].map];
1775       int same;
1776       size_t len;
1777       for (same = 0; same < 255; same++)
1778         if (!old_str[same] || !str[same] || old_str[same] != str[same])
1779           break;
1780       *pp++ = same;
1781       len = strlen(str + same) + 1;
1782       memcpy(pp, str + same, len);
1783       pp += len;
1784       old_str = str;
1785     }
1786
1787   /*
1788    * write strings
1789    */
1790   write_u32(fp, sizeid);
1791   write_u32(fp, pp - prefix);
1792   if (pp != prefix)
1793     {
1794       if (fwrite(prefix, pp - prefix, 1, fp) != 1)
1795         {
1796           perror("write error prefix");
1797           exit(1);
1798         }
1799     }
1800   sat_free(prefix);
1801 #endif
1802
1803   /*
1804    * write RelDeps
1805    */
1806   for (i = 0; i < nrels; i++)
1807     {
1808       ran = pool->rels + (needid[reloff + i].map - reloff);
1809       write_id(fp, needid[ISRELDEP(ran->name) ? RELOFF(ran->name) : ran->name].need);
1810       write_id(fp, needid[ISRELDEP(ran->evr) ? RELOFF(ran->evr) : ran->evr].need);
1811       write_u8(fp, ran->flags);
1812     }
1813
1814   /*
1815    * write dirs (skip both root and / entry)
1816    */
1817   for (i = 2; i < ndirmap; i++)
1818     {
1819       if (dirmap[i] > 0)
1820         write_id(fp, dirmap[i]);
1821       else
1822         write_id(fp, nstrings - dirmap[i]);
1823     }
1824   sat_free(dirmap);
1825
1826   /*
1827    * write keys
1828    */
1829   for (i = 1; i < target.nkeys; i++)
1830     {
1831       write_id(fp, needid[target.keys[i].name].need);
1832       write_id(fp, needid[target.keys[i].type].need);
1833       if (target.keys[i].storage != KEY_STORAGE_VERTICAL_OFFSET)
1834         {
1835           if (target.keys[i].type == type_constantid)
1836             write_id(fp, needid[target.keys[i].size].need);
1837           else
1838             write_id(fp, target.keys[i].size);
1839         }
1840       else
1841         write_id(fp, cbdata.extdata[i].len);
1842       write_id(fp, target.keys[i].storage);
1843     }
1844
1845   /*
1846    * write schemata
1847    */
1848   write_id(fp, target.schemadatalen);   /* XXX -1? */
1849   for (i = 1; i < target.nschemata; i++)
1850     write_idarray(fp, pool, 0, repodata_id2schema(&target, i));
1851
1852 /********************************************************************/
1853
1854   write_id(fp, cbdata.maxdata);
1855   write_id(fp, cbdata.extdata[0].len);
1856   if (cbdata.extdata[0].len)
1857     write_blob(fp, cbdata.extdata[0].buf, cbdata.extdata[0].len);
1858   sat_free(cbdata.extdata[0].buf);
1859
1860   /* do we have vertical data? */
1861   for (i = 1; i < target.nkeys; i++)
1862     if (cbdata.extdata[i].len)
1863       break;
1864   if (i < target.nkeys)
1865     {
1866       /* yes, write it in pages */
1867       unsigned char *dp, vpage[BLOB_PAGESIZE];
1868       int l, ll, lpage = 0;
1869
1870       write_u32(fp, BLOB_PAGESIZE);
1871       for (i = 1; i < target.nkeys; i++)
1872         {
1873           if (!cbdata.extdata[i].len)
1874             continue;
1875           l = cbdata.extdata[i].len;
1876           dp = cbdata.extdata[i].buf;
1877           while (l)
1878             {
1879               ll = BLOB_PAGESIZE - lpage;
1880               if (l < ll)
1881                 ll = l;
1882               memcpy(vpage + lpage, dp, ll);
1883               dp += ll;
1884               lpage += ll;
1885               l -= ll;
1886               if (lpage == BLOB_PAGESIZE)
1887                 {
1888                   write_compressed_page(fp, vpage, lpage);
1889                   lpage = 0;
1890                 }
1891             }
1892         }
1893       if (lpage)
1894         write_compressed_page(fp, vpage, lpage);
1895     }
1896
1897   for (i = 1; i < target.nkeys; i++)
1898     sat_free(cbdata.extdata[i].buf);
1899   sat_free(cbdata.extdata);
1900
1901   repodata_freedata(&target);
1902
1903   sat_free(needid);
1904   sat_free(cbdata.solvschemata);
1905   sat_free(cbdata.schema);
1906
1907   sat_free(cbdata.keymap);
1908   sat_free(cbdata.keymapstart);
1909   sat_free(cbdata.dirused);
1910   sat_free(repodataused);
1911 }
1912
1913 struct repodata_write_data {
1914   int (*keyfilter)(Repo *repo, Repokey *key, void *kfdata);
1915   void *kfdata;
1916   int repodataid;
1917 };
1918
1919 static int
1920 repodata_write_keyfilter(Repo *repo, Repokey *key, void *kfdata)
1921 {
1922   struct repodata_write_data *wd = kfdata;
1923
1924   /* XXX: special repodata selection hack */
1925   if (key->name == 1 && key->size != wd->repodataid)
1926     return -1;
1927   if (key->storage == KEY_STORAGE_SOLVABLE)
1928     return KEY_STORAGE_DROPPED; /* not part of this repodata */
1929   if (wd->keyfilter)
1930     return (*wd->keyfilter)(repo, key, wd->kfdata);
1931   return key->storage;
1932 }
1933
1934 void
1935 repodata_write(Repodata *data, FILE *fp, int (*keyfilter)(Repo *repo, Repokey *key, void *kfdata), void *kfdata)
1936 {
1937   struct repodata_write_data wd;
1938
1939   wd.keyfilter = keyfilter;
1940   wd.kfdata = kfdata;
1941   wd.repodataid = data - data->repo->repodata;
1942   repo_write(data->repo, fp, repodata_write_keyfilter, &wd, 0);
1943 }