src/repodata.c

   1 /*
   2  * Copyright (c) 2007, Novell Inc.
   3  *
   4  * This program is licensed under the BSD license, read LICENSE.BSD
   5  * for further information
   6  */
   7
   8 /*
   9  * repodata.c
  10  *
  11  * Manage data coming from one repository
  12  *
  13  */
  14
  15 #define _GNU_SOURCE
  16 #include <string.h>
  17 #include <fnmatch.h>
  18
  19 #include <stdio.h>
  20 #include <stdlib.h>
  21 #include <unistd.h>
  22 #include <assert.h>
  23
  24 #include "repo.h"
  25 #include "pool.h"
  26 #include "poolid_private.h"
  27 #include "util.h"
  28
  29 #include "repopack.h"
  30
  31 extern unsigned int compress_buf (const unsigned char *in, unsigned int in_len,
  32                                   unsigned char *out, unsigned int out_len);
  33 extern unsigned int unchecked_decompress_buf (const unsigned char *in,
  34                                               unsigned int in_len,
  35                                               unsigned char *out,
  36                                               unsigned int out_len);
  37
  38 #define REPODATA_BLOCK 255
  39
  40
  41 void
  42 repodata_init(Repodata *data, Repo *repo, int localpool)
  43 {
  44   memset(data, 0, sizeof (*data));
  45   data->repo = repo;
  46   data->localpool = localpool;
  47   if (localpool)
  48     stringpool_init_empty(&data->spool);
  49   data->keys = sat_calloc(1, sizeof(Repokey));
  50   data->nkeys = 1;
  51   data->schemata = sat_calloc(1, sizeof(Id));
  52   data->schemadata = sat_calloc(1, sizeof(Id));
  53   data->nschemata = 1;
  54   data->schemadatalen = 1;
  55   data->start = repo->start;
  56   data->end = repo->end;
  57   data->nextra = repo->nextra;
  58   data->extrastart = 0;
  59   data->incoreoffset = sat_extend_resize(0, data->end - data->start, sizeof(Id), REPODATA_BLOCK);
  60   data->extraoffset = sat_extend_resize(0, repo->nextra, sizeof(Id), REPODATA_BLOCK);
  61   data->pagefd = -1;
  62 }
  63
  64 void
  65 repodata_free(Repodata *data)
  66 {
  67   sat_free(data->keys);
  68   sat_free(data->schemata);
  69   sat_free(data->schemadata);
  70
  71   sat_free(data->spool.strings);
  72   sat_free(data->spool.stringspace);
  73   sat_free(data->spool.stringhashtbl);
  74
  75   sat_free(data->dirpool.dirs);
  76   sat_free(data->dirpool.dirtraverse);
  77
  78   sat_free(data->incoredata);
  79   sat_free(data->incoreoffset);
  80   sat_free(data->extraoffset);
  81   sat_free(data->verticaloffset);
  82
  83   sat_free(data->blob_store);
  84   sat_free(data->pages);
  85   sat_free(data->mapped);
  86
  87   sat_free(data->vincore);
  88
  89   sat_free(data->attrs);
  90   sat_free(data->extraattrs);
  91   sat_free(data->attrdata);
  92   sat_free(data->attriddata);
  93
  94   sat_free(data->location);
  95   sat_free(data->addedfileprovides);
  96
  97   if (data->pagefd != -1)
  98     close(data->pagefd);
  99 }
 100
 101 unsigned char *
 102 data_skip_recursive(Repodata *data, unsigned char *dp, Repokey *key)
 103 {
 104   KeyValue kv;
 105   if (key->type != REPOKEY_TYPE_COUNTED)
 106     return data_skip(dp, key->type);
 107   dp = data_fetch(dp, &kv, key);
 108   int num = kv.num;
 109   int schema = kv.id;
 110   while (num--)
 111     {
 112       Id *keyp = data->schemadata + data->schemata[schema];
 113       for (; *keyp; keyp++)
 114         dp = data_skip_recursive(data, dp, data->keys + *keyp);
 115     }
 116   return dp;
 117 }
 118
 119 static unsigned char *
 120 forward_to_key(Repodata *data, Id keyid, Id schemaid, unsigned char *dp)
 121 {
 122   Id k, *keyp;
 123
 124   keyp = data->schemadata + data->schemata[schemaid];
 125   while ((k = *keyp++) != 0)
 126     {
 127       if (k == keyid)
 128         return dp;
 129       if (data->keys[k].storage == KEY_STORAGE_VERTICAL_OFFSET)
 130         {
 131           dp = data_skip(dp, REPOKEY_TYPE_ID);  /* skip that offset */
 132           dp = data_skip(dp, REPOKEY_TYPE_ID);  /* skip that length */
 133           continue;
 134         }
 135       if (data->keys[k].storage != KEY_STORAGE_INCORE)
 136         continue;
 137       dp = data_skip_recursive(data, dp, data->keys + k);
 138     }
 139   return 0;
 140 }
 141
 142 #define BLOB_PAGEBITS 15
 143 #define BLOB_PAGESIZE (1 << BLOB_PAGEBITS)
 144
 145 static unsigned char *
 146 load_page_range(Repodata *data, unsigned int pstart, unsigned int pend)
 147 {
 148 /* Make sure all pages from PSTART to PEND (inclusive) are loaded,
 149    and are consecutive.  Return a pointer to the mapping of PSTART.  */
 150   unsigned char buf[BLOB_PAGESIZE];
 151   unsigned int i;
 152
 153   /* Quick check in case all pages are there already and consecutive.  */
 154   for (i = pstart; i <= pend; i++)
 155     if (data->pages[i].mapped_at == -1
 156         || (i > pstart
 157             && data->pages[i].mapped_at
 158                != data->pages[i-1].mapped_at + BLOB_PAGESIZE))
 159       break;
 160   if (i > pend)
 161     return data->blob_store + data->pages[pstart].mapped_at;
 162
 163   if (data->pagefd == -1)
 164     return 0;
 165
 166   /* Ensure that we can map the numbers of pages we need at all.  */
 167   if (pend - pstart + 1 > data->ncanmap)
 168     {
 169       unsigned int oldcan = data->ncanmap;
 170       data->ncanmap = pend - pstart + 1;
 171       if (data->ncanmap < 4)
 172         data->ncanmap = 4;
 173       data->mapped = sat_realloc2(data->mapped, data->ncanmap, sizeof(data->mapped[0]));
 174       memset (data->mapped + oldcan, 0, (data->ncanmap - oldcan) * sizeof (data->mapped[0]));
 175       data->blob_store = sat_realloc2(data->blob_store, data->ncanmap, BLOB_PAGESIZE);
 176 #ifdef DEBUG_PAGING
 177       fprintf (stderr, "PAGE: can map %d pages\n", data->ncanmap);
 178 #endif
 179     }
 180
 181   /* Now search for "cheap" space in our store.  Space is cheap if it's either
 182      free (very cheap) or contains pages we search for anyway.  */
 183
 184   /* Setup cost array.  */
 185   unsigned int cost[data->ncanmap];
 186   for (i = 0; i < data->ncanmap; i++)
 187     {
 188       unsigned int pnum = data->mapped[i];
 189       if (pnum == 0)
 190         cost[i] = 0;
 191       else
 192         {
 193           pnum--;
 194           Attrblobpage *p = data->pages + pnum;
 195           assert (p->mapped_at != -1);
 196           if (pnum >= pstart && pnum <= pend)
 197             cost[i] = 1;
 198           else
 199             cost[i] = 3;
 200         }
 201     }
 202
 203   /* And search for cheapest space.  */
 204   unsigned int best_cost = -1;
 205   unsigned int best = 0;
 206   unsigned int same_cost = 0;
 207   for (i = 0; i + pend - pstart < data->ncanmap; i++)
 208     {
 209       unsigned int c = cost[i];
 210       unsigned int j;
 211       for (j = 0; j < pend - pstart + 1; j++)
 212         c += cost[i+j];
 213       if (c < best_cost)
 214         best_cost = c, best = i;
 215       else if (c == best_cost)
 216         same_cost++;
 217       /* A null cost won't become better.  */
 218       if (c == 0)
 219         break;
 220     }
 221   /* If all places have the same cost we would thrash on slot 0.  Avoid
 222      this by doing a round-robin strategy in this case.  */
 223   if (same_cost == data->ncanmap - pend + pstart - 1)
 224     best = data->rr_counter++ % (data->ncanmap - pend + pstart);
 225
 226   /* So we want to map our pages from [best] to [best+pend-pstart].
 227      Use a very simple strategy, which doesn't make the best use of
 228      our resources, but works.  Throw away all pages in that range
 229      (even ours) then copy around ours (in case they were outside the
 230      range) or read them in.  */
 231   for (i = best; i < best + pend - pstart + 1; i++)
 232     {
 233       unsigned int pnum = data->mapped[i];
 234       if (pnum--
 235           /* If this page is exactly at the right place already,
 236              no need to evict it.  */
 237           && pnum != pstart + i - best)
 238         {
 239           /* Evict this page.  */
 240 #ifdef DEBUG_PAGING
 241           fprintf (stderr, "PAGE: evict page %d from %d\n", pnum, i);
 242 #endif
 243           cost[i] = 0;
 244           data->mapped[i] = 0;
 245           data->pages[pnum].mapped_at = -1;
 246         }
 247     }
 248
 249   /* Everything is free now.  Read in the pages we want.  */
 250   for (i = pstart; i <= pend; i++)
 251     {
 252       Attrblobpage *p = data->pages + i;
 253       unsigned int pnum = i - pstart + best;
 254       void *dest = data->blob_store + pnum * BLOB_PAGESIZE;
 255       if (p->mapped_at != -1)
 256         {
 257           if (p->mapped_at != pnum * BLOB_PAGESIZE)
 258             {
 259 #ifdef DEBUG_PAGING
 260               fprintf (stderr, "PAGECOPY: %d to %d\n", i, pnum);
 261 #endif
 262               /* Still mapped somewhere else, so just copy it from there.  */
 263               memcpy (dest, data->blob_store + p->mapped_at, BLOB_PAGESIZE);
 264               data->mapped[p->mapped_at / BLOB_PAGESIZE] = 0;
 265             }
 266         }
 267       else
 268         {
 269           unsigned int in_len = p->file_size;
 270           unsigned int compressed = in_len & 1;
 271           in_len >>= 1;
 272 #ifdef DEBUG_PAGING
 273           fprintf (stderr, "PAGEIN: %d to %d", i, pnum);
 274 #endif
 275           if (pread(data->pagefd, compressed ? buf : dest, in_len, p->file_offset) != in_len)
 276             {
 277               perror ("mapping pread");
 278               return 0;
 279             }
 280           if (compressed)
 281             {
 282               unsigned int out_len;
 283               out_len = unchecked_decompress_buf(buf, in_len,
 284                                                   dest, BLOB_PAGESIZE);
 285               if (out_len != BLOB_PAGESIZE && i < data->num_pages - 1)
 286                 {
 287                   fprintf(stderr, "can't decompress\n");
 288                   return 0;
 289                 }
 290 #ifdef DEBUG_PAGING
 291               fprintf (stderr, " (expand %d to %d)", in_len, out_len);
 292 #endif
 293             }
 294 #ifdef DEBUG_PAGING
 295           fprintf (stderr, "\n");
 296 #endif
 297         }
 298       p->mapped_at = pnum * BLOB_PAGESIZE;
 299       data->mapped[pnum] = i + 1;
 300     }
 301   return data->blob_store + best * BLOB_PAGESIZE;
 302 }
 303
 304 static unsigned char *
 305 make_vertical_available(Repodata *data, Repokey *key, Id off, Id len)
 306 {
 307   unsigned char *dp;
 308   if (!len)
 309     return 0;
 310   if (off >= data->lastverticaloffset)
 311     {
 312       off -= data->lastverticaloffset;
 313       if (off + len > data->vincorelen)
 314         return 0;
 315       return data->vincore + off;
 316     }
 317   if (off + len > key->size)
 318     return 0;
 319   /* we now have the offset, go into vertical */
 320   off += data->verticaloffset[key - data->keys];
 321   /* fprintf(stderr, "key %d page %d\n", key->name, off / BLOB_PAGESIZE); */
 322   dp = load_page_range(data, off / BLOB_PAGESIZE, (off + len - 1) / BLOB_PAGESIZE);
 323   if (dp)
 324     dp += off % BLOB_PAGESIZE;
 325   return dp;
 326 }
 327
 328 static inline unsigned char *
 329 get_data(Repodata *data, Repokey *key, unsigned char **dpp)
 330 {
 331   unsigned char *dp = *dpp;
 332
 333   if (!dp)
 334     return 0;
 335   if (key->storage == KEY_STORAGE_INCORE)
 336     {
 337       /* hmm, this is a bit expensive */
 338       *dpp = data_skip_recursive(data, dp, key);
 339       return dp;
 340     }
 341   else if (key->storage == KEY_STORAGE_VERTICAL_OFFSET)
 342     {
 343       Id off, len;
 344       dp = data_read_id(dp, &off);
 345       dp = data_read_id(dp, &len);
 346       *dpp = dp;
 347       return make_vertical_available(data, key, off, len);
 348     }
 349   return 0;
 350 }
 351
 352 static inline int
 353 maybe_load_repodata(Repodata *data, Id *keyid)
 354 {
 355   if (data->state == REPODATA_STUB)
 356     {
 357       if (data->loadcallback)
 358         {
 359           if (keyid)
 360             {
 361               /* key order may change when loading */
 362               int i;
 363               Id name = data->keys[*keyid].name;
 364               Id type = data->keys[*keyid].type;
 365               data->loadcallback(data);
 366               if (data->state == REPODATA_AVAILABLE)
 367                 {
 368                   for (i = 1; i < data->nkeys; i++)
 369                     if (data->keys[i].name == name && data->keys[i].type == type)
 370                       break;
 371                   if (i < data->nkeys)
 372                     *keyid = i;
 373                   else
 374                     return 0;
 375                 }
 376             }
 377           else
 378             data->loadcallback(data);
 379         }
 380       else
 381         data->state = REPODATA_ERROR;
 382     }
 383   if (data->state == REPODATA_AVAILABLE)
 384     return 1;
 385   data->state = REPODATA_ERROR;
 386   return 0;
 387 }
 388
 389 static inline unsigned char*
 390 entry2data(Repodata *data, Id entry)
 391 {
 392   if (entry < 0)
 393     return data->incoredata + data->extraoffset[-1 - entry];
 394   else
 395     return data->incoredata + data->incoreoffset[entry];
 396 }
 397
 398 Id
 399 repodata_lookup_id(Repodata *data, Id entry, Id keyid)
 400 {
 401   Id schema;
 402   Repokey *key;
 403   Id id, *keyp;
 404   unsigned char *dp;
 405
 406   if (!maybe_load_repodata(data, &keyid))
 407     return 0;
 408   dp = entry2data(data, entry);
 409   if (!dp)
 410     return 0;
 411   dp = data_read_id(dp, &schema);
 412   /* make sure the schema of this solvable contains the key */
 413   for (keyp = data->schemadata + data->schemata[schema]; *keyp != keyid; keyp++)
 414     if (!*keyp)
 415       return 0;
 416   dp = forward_to_key(data, keyid, schema, dp);
 417   key = data->keys + keyid;
 418   dp = get_data(data, key, &dp);
 419   if (!dp)
 420     return 0;
 421   if (key->type == REPOKEY_TYPE_CONSTANTID)
 422     return key->size;
 423   if (key->type != REPOKEY_TYPE_ID)
 424     return 0;
 425   dp = data_read_id(dp, &id);
 426   return id;
 427 }
 428
 429 const char *
 430 repodata_lookup_str(Repodata *data, Id entry, Id keyid)
 431 {
 432   Id schema;
 433   Repokey *key;
 434   Id id, *keyp;
 435   unsigned char *dp;
 436
 437   if (!maybe_load_repodata(data, &keyid))
 438     return 0;
 439
 440   dp = entry2data(data, entry);
 441   if (!dp)
 442     return 0;
 443   dp = data_read_id(dp, &schema);
 444   /* make sure the schema of this solvable contains the key */
 445   for (keyp = data->schemadata + data->schemata[schema]; *keyp != keyid; keyp++)
 446     if (!*keyp)
 447       return 0;
 448   dp = forward_to_key(data, keyid, schema, dp);
 449   key = data->keys + keyid;
 450   dp = get_data(data, key, &dp);
 451   if (!dp)
 452     return 0;
 453   if (key->type == REPOKEY_TYPE_STR)
 454     return (const char *)dp;
 455   if (key->type == REPOKEY_TYPE_CONSTANTID)
 456     return id2str(data->repo->pool, key->size);
 457   if (key->type == REPOKEY_TYPE_ID)
 458     dp = data_read_id(dp, &id);
 459   else
 460     return 0;
 461   if (data->localpool)
 462     return data->spool.stringspace + data->spool.strings[id];
 463   return id2str(data->repo->pool, id);
 464 }
 465
 466 int
 467 repodata_lookup_num(Repodata *data, Id entry, Id keyid, unsigned int *value)
 468 {
 469   Id schema;
 470   Repokey *key;
 471   Id *keyp;
 472   KeyValue kv;
 473   unsigned char *dp;
 474
 475   *value = 0;
 476
 477   if (!maybe_load_repodata(data, &keyid))
 478     return 0;
 479
 480   dp = entry2data(data, entry);
 481   if (!dp)
 482     return 0;
 483   dp = data_read_id(dp, &schema);
 484   /* make sure the schema of this solvable contains the key */
 485   for (keyp = data->schemadata + data->schemata[schema]; *keyp != keyid; keyp++)
 486     if (!*keyp)
 487       return 0;
 488   dp = forward_to_key(data, keyid, schema, dp);
 489   key = data->keys + keyid;
 490   dp = get_data(data, key, &dp);
 491   if (!dp)
 492     return 0;
 493   if (key->type == REPOKEY_TYPE_NUM
 494       || key->type == REPOKEY_TYPE_U32
 495       || key->type == REPOKEY_TYPE_CONSTANT)
 496     {
 497       dp = data_fetch(dp, &kv, key);
 498       *value = kv.num;
 499       return 1;
 500     }
 501   return 0;
 502 }
 503
 504 int
 505 repodata_lookup_void(Repodata *data, Id entry, Id keyid)
 506 {
 507   Id schema;
 508   Id *keyp;
 509   unsigned char *dp;
 510   if (!maybe_load_repodata(data, &keyid))
 511     return 0;
 512   dp = entry2data(data, entry);
 513   if (!dp)
 514     return 0;
 515   dp = data_read_id(dp, &schema);
 516   for (keyp = data->schemadata + data->schemata[schema]; *keyp != keyid; keyp++)
 517     if (!*keyp)
 518       return 0;
 519   return 1;
 520 }
 521
 522 const unsigned char *
 523 repodata_lookup_bin_checksum(Repodata *data, Id entry, Id keyid, Id *typep)
 524 {
 525   Id schema;
 526   Id *keyp;
 527   Repokey *key;
 528   unsigned char *dp;
 529
 530   if (!maybe_load_repodata(data, &keyid))
 531     return 0;
 532   dp = entry2data(data, entry);
 533   if (!dp)
 534     return 0;
 535   dp = data_read_id(dp, &schema);
 536   for (keyp = data->schemadata + data->schemata[schema]; *keyp != keyid; keyp++)
 537     if (!*keyp)
 538       return 0;
 539   dp = forward_to_key(data, keyid, schema, dp);
 540   key = data->keys + keyid;
 541   *typep = key->type;
 542   return get_data(data, key, &dp);
 543 }
 544
 545 void
 546 repodata_search(Repodata *data, Id entry, Id keyname, int (*callback)(void *cbdata, Solvable *s, Repodata *data, Repokey *key, KeyValue *kv), void *cbdata)
 547 {
 548   Id schema;
 549   Repokey *key;
 550   Id k, keyid, *kp, *keyp;
 551   unsigned char *dp, *ddp;
 552   int onekey = 0;
 553   int stop;
 554   KeyValue kv;
 555
 556   if (entry < 0
 557       || !maybe_load_repodata(data, 0))
 558     return;
 559
 560   dp = entry2data(data, entry);
 561   if (!dp)
 562     return;
 563   dp = data_read_id(dp, &schema);
 564   keyp = data->schemadata + data->schemata[schema];
 565   if (keyname)
 566     {
 567       /* search in a specific key */
 568       for (kp = keyp; (k = *kp++) != 0; )
 569         if (data->keys[k].name == keyname)
 570           break;
 571       if (k == 0)
 572         return;
 573       dp = forward_to_key(data, k, schema, dp);
 574       if (!dp)
 575         return;
 576       keyp = kp - 1;
 577       onekey = 1;
 578     }
 579   while ((keyid = *keyp++) != 0)
 580     {
 581       stop = 0;
 582       key = data->keys + keyid;
 583       ddp = get_data(data, key, &dp);
 584       do
 585         {
 586           ddp = data_fetch(ddp, &kv, key);
 587           if (!ddp)
 588             break;
 589           if (key->type == REPOKEY_TYPE_COUNTED)
 590             {
 591               int num = kv.num;
 592               int subschema = kv.id;
 593               Repokey *countkey = key;
 594               kv.eof = 0;
 595               callback(cbdata, data->repo->pool->solvables + data->start + entry, data, countkey, &kv);
 596               while (num--)
 597                 {
 598                   Id *kp = data->schemadata + data->schemata[subschema];
 599                   for (; *kp; kp++)
 600                     {
 601                       key = data->keys + *kp;
 602                       ddp = data_fetch(ddp, &kv, key);
 603                       if (!ddp)
 604                         exit(1);
 605                       callback(cbdata, data->repo->pool->solvables + data->start + entry, data, key, &kv);
 606                     }
 607                   kv.eof = 1;
 608                   callback(cbdata, data->repo->pool->solvables + data->start + entry, data, countkey, &kv);
 609                 }
 610               kv.eof = 2;
 611               stop = callback(cbdata, data->repo->pool->solvables + data->start + entry, data, countkey, &kv);
 612             }
 613           else
 614             stop = callback(cbdata, data->repo->pool->solvables + data->start + entry, data, key, &kv);
 615         }
 616       while (!kv.eof && !stop);
 617       if (onekey || stop > SEARCH_NEXT_KEY)
 618         return;
 619     }
 620 }
 621
 622 static void
 623 dataiterator_newdata(Dataiterator *di)
 624 {
 625   Id keyname = di->keyname;
 626   Repodata *data = di->data;
 627   di->nextkeydp = 0;
 628
 629   if (data->state == REPODATA_STUB)
 630     {
 631       if (keyname)
 632         {
 633           int j;
 634           for (j = 1; j < data->nkeys; j++)
 635             if (keyname == data->keys[j].name)
 636               break;
 637           if (j == data->nkeys)
 638             return;
 639         }
 640       /* load it */
 641       if (data->loadcallback)
 642         data->loadcallback(data);
 643       else
 644         data->state = REPODATA_ERROR;
 645     }
 646   if (data->state == REPODATA_ERROR)
 647     return;
 648
 649   Id schema;
 650   unsigned char *dp = data->incoredata;
 651   if (!dp)
 652     return;
 653   if (di->solvid >= 0)
 654     dp += data->incoreoffset[di->solvid - data->start];
 655   else
 656     dp += data->extraoffset[-1 - di->solvid - data->extrastart];
 657   dp = data_read_id(dp, &schema);
 658   Id *keyp = data->schemadata + data->schemata[schema];
 659   if (keyname)
 660     {
 661       Id k, *kp;
 662       /* search in a specific key */
 663       for (kp = keyp; (k = *kp++) != 0; )
 664         if (data->keys[k].name == keyname)
 665           break;
 666       if (k == 0)
 667         return;
 668       dp = forward_to_key(data, k, schema, dp);
 669       if (!dp)
 670         return;
 671       keyp = kp - 1;
 672     }
 673   Id keyid = *keyp++;
 674   if (!keyid)
 675     return;
 676
 677   di->data = data;
 678   di->key = di->data->keys + keyid;
 679   di->keyp = keyp;
 680   di->dp = 0;
 681
 682   di->nextkeydp = dp;
 683   di->dp = get_data(di->data, di->key, &di->nextkeydp);
 684   di->kv.eof = 0;
 685 }
 686
 687 void
 688 dataiterator_init(Dataiterator *di, Repo *repo, Id p, Id keyname,
 689                   const char *match, int flags)
 690 {
 691   di->flags = flags;
 692   if (p)
 693     {
 694       di->solvid = p;
 695       di->flags |= __SEARCH_ONESOLVABLE;
 696       di->data = repo->repodata - 1;
 697       if (flags & SEARCH_NO_STORAGE_SOLVABLE)
 698         di->state = 0;
 699       else
 700         di->state = 1;
 701     }
 702   else
 703     {
 704       di->solvid = repo->start - 1;
 705       if (di->solvid < 0)
 706         {
 707           fprintf(stderr, "A repo contains the NULL solvable!\n");
 708           exit(1);
 709         }
 710       di->data = repo->repodata + repo->nrepodata - 1;
 711       di->state = 0;
 712     }
 713
 714   di->match = match;
 715   if ((di->flags & SEARCH_STRINGMASK) == SEARCH_REGEX)
 716     {
 717       if (di->match)
 718         {
 719           /* We feed multiple lines eventually (e.g. authors or descriptions),
 720              so set REG_NEWLINE. */
 721           di->regex_err =
 722             regcomp(&di->regex, di->match,
 723               REG_EXTENDED | REG_NOSUB | REG_NEWLINE
 724               | ((di->flags & SEARCH_NOCASE) ? REG_ICASE : 0));
 725 #if 0
 726           if (di->regex_err != 0)
 727             {
 728               fprintf(stderr, "Given regex failed to compile: %s\n", di->match);
 729               fprintf(stderr, "regcomp error code: %d\n", di->regex_err);
 730               exit(1);
 731             }
 732 #else
 733         }
 734       else
 735         {
 736           di->flags |= (di->flags & SEARCH_STRINGMASK) | SEARCH_STRING;
 737           di->regex_err = 0;
 738 #endif
 739         }
 740     }
 741
 742   di->keyname = keyname;
 743   static Id zeroid = 0;
 744   di->keyp = &zeroid;
 745   di->kv.eof = 1;
 746   di->repo = repo;
 747   di->idp = 0;
 748   di->subkeyp = 0;
 749 }
 750
 751 /* FIXME factor and merge with repo_matchvalue */
 752 static int
 753 dataiterator_match_int_real(Dataiterator *di, int flags, const void *vmatch)
 754 {
 755   KeyValue *kv = &di->kv;
 756   const char *match = vmatch;
 757   if ((flags & SEARCH_STRINGMASK) != 0)
 758     {
 759       switch (di->key->type)
 760         {
 761         case REPOKEY_TYPE_ID:
 762         case REPOKEY_TYPE_IDARRAY:
 763           if (di->data && di->data->localpool)
 764             kv->str = stringpool_id2str(&di->data->spool, kv->id);
 765           else
 766             kv->str = id2str(di->repo->pool, kv->id);
 767           break;
 768         case REPOKEY_TYPE_STR:
 769           break;
 770         default:
 771           return 0;
 772         }
 773       /* Maybe skip the kind specifier.  Do this only for SOLVABLE attributes,
 774          for the others we can't know if a colon separates a kind or not.  */
 775       if ((flags & SEARCH_SKIP_KIND)
 776           && di->key->storage == KEY_STORAGE_SOLVABLE)
 777         {
 778           const char *s = strchr(kv->str, ':');
 779           if (s)
 780             kv->str = s + 1;
 781         }
 782       switch ((flags & SEARCH_STRINGMASK))
 783         {
 784           case SEARCH_SUBSTRING:
 785             if (flags & SEARCH_NOCASE)
 786               {
 787                 if (!strcasestr(kv->str, match))
 788                   return 0;
 789               }
 790             else
 791               {
 792                 if (!strstr(kv->str, match))
 793                   return 0;
 794               }
 795             break;
 796           case SEARCH_STRING:
 797             if (flags & SEARCH_NOCASE)
 798               {
 799                 if (strcasecmp(match, kv->str))
 800                   return 0;
 801               }
 802             else
 803               {
 804                 if (strcmp(match, kv->str))
 805                   return 0;
 806               }
 807             break;
 808           case SEARCH_GLOB:
 809             if (fnmatch(match, kv->str, (flags & SEARCH_NOCASE) ? FNM_CASEFOLD : 0))
 810               return 0;
 811             break;
 812           case SEARCH_REGEX:
 813             if (regexec((const regex_t *)vmatch, kv->str, 0, NULL, 0))
 814               return 0;
 815             break;
 816           default:
 817             return 0;
 818         }
 819     }
 820   return 1;
 821 }
 822
 823 static int
 824 dataiterator_match_int(Dataiterator *di)
 825 {
 826   if ((di->flags & SEARCH_STRINGMASK) == SEARCH_REGEX)
 827     return dataiterator_match_int_real(di, di->flags, &di->regex);
 828   else
 829     return dataiterator_match_int_real(di, di->flags, di->match);
 830 }
 831
 832 int
 833 dataiterator_match(Dataiterator *di, int flags, const void *vmatch)
 834 {
 835   return dataiterator_match_int_real(di, flags, vmatch);
 836 }
 837
 838 static Repokey solvablekeys[RPM_RPMDBID - SOLVABLE_NAME + 1] = {
 839   { SOLVABLE_NAME,        REPOKEY_TYPE_ID, 0, KEY_STORAGE_SOLVABLE },
 840   { SOLVABLE_ARCH,        REPOKEY_TYPE_ID, 0, KEY_STORAGE_SOLVABLE },
 841   { SOLVABLE_EVR,         REPOKEY_TYPE_ID, 0, KEY_STORAGE_SOLVABLE },
 842   { SOLVABLE_VENDOR,      REPOKEY_TYPE_ID, 0, KEY_STORAGE_SOLVABLE },
 843   { SOLVABLE_PROVIDES,    REPOKEY_TYPE_IDARRAY, 0, KEY_STORAGE_SOLVABLE },
 844   { SOLVABLE_OBSOLETES,   REPOKEY_TYPE_IDARRAY, 0, KEY_STORAGE_SOLVABLE },
 845   { SOLVABLE_CONFLICTS,   REPOKEY_TYPE_IDARRAY, 0, KEY_STORAGE_SOLVABLE },
 846   { SOLVABLE_REQUIRES,    REPOKEY_TYPE_IDARRAY, 0, KEY_STORAGE_SOLVABLE },
 847   { SOLVABLE_RECOMMENDS,  REPOKEY_TYPE_IDARRAY, 0, KEY_STORAGE_SOLVABLE },
 848   { SOLVABLE_SUGGESTS,    REPOKEY_TYPE_IDARRAY, 0, KEY_STORAGE_SOLVABLE },
 849   { SOLVABLE_SUPPLEMENTS, REPOKEY_TYPE_IDARRAY, 0, KEY_STORAGE_SOLVABLE },
 850   { SOLVABLE_ENHANCES,    REPOKEY_TYPE_IDARRAY, 0, KEY_STORAGE_SOLVABLE },
 851   { SOLVABLE_FRESHENS,    REPOKEY_TYPE_IDARRAY, 0, KEY_STORAGE_SOLVABLE },
 852   { RPM_RPMDBID,          REPOKEY_TYPE_U32, 0, KEY_STORAGE_SOLVABLE },
 853 };
 854
 855 int
 856 dataiterator_step(Dataiterator *di)
 857 {
 858 restart:
 859   while (1)
 860     {
 861       if (di->state)
 862         {
 863           if (di->idp)
 864             {
 865               Id *idp = di->idp;
 866               if (*idp)
 867                 {
 868                   di->kv.id = *idp;
 869                   di->idp++;
 870                   di->kv.eof = idp[1] ? 0 : 1;
 871                   goto weg2;
 872                 }
 873               else
 874                 di->idp = 0;
 875             }
 876           Solvable *s = di->repo->pool->solvables + di->solvid;
 877           int state = di->state;
 878           di->key = solvablekeys + state - 1;
 879           if (di->keyname)
 880             di->state = RPM_RPMDBID;
 881           else
 882             di->state++;
 883           if (state == 1)
 884             {
 885               di->data = 0;
 886               if (di->keyname)
 887                 state = di->keyname - 1;
 888             }
 889           switch (state + 1)
 890             {
 891               case SOLVABLE_NAME:
 892                 if (!s->name)
 893                   continue;
 894                 di->kv.id = s->name;
 895                 di->kv.eof = 1;
 896                 break;
 897               case SOLVABLE_ARCH:
 898                 if (!s->arch)
 899                   continue;
 900                 di->kv.id = s->arch;
 901                 di->kv.eof = 1;
 902                 break;
 903               case SOLVABLE_EVR:
 904                 if (!s->evr)
 905                   continue;
 906                 di->kv.id = s->evr;
 907                 di->kv.eof = 1;
 908                 break;
 909               case SOLVABLE_VENDOR:
 910                 if (!s->vendor)
 911                   continue;
 912                 di->kv.id = s->vendor;
 913                 di->kv.eof = 1;
 914                 break;
 915               case SOLVABLE_PROVIDES:
 916                 di->idp = s->provides
 917                     ? di->repo->idarraydata + s->provides : 0;
 918                 continue;
 919               case SOLVABLE_OBSOLETES:
 920                 di->idp = s->obsoletes
 921                     ? di->repo->idarraydata + s->obsoletes : 0;
 922                 continue;
 923               case SOLVABLE_CONFLICTS:
 924                 di->idp = s->conflicts
 925                     ? di->repo->idarraydata + s->conflicts : 0;
 926                 continue;
 927               case SOLVABLE_REQUIRES:
 928                 di->idp = s->requires
 929                     ? di->repo->idarraydata + s->requires : 0;
 930                 continue;
 931               case SOLVABLE_RECOMMENDS:
 932                 di->idp = s->recommends
 933                     ? di->repo->idarraydata + s->recommends : 0;
 934                 continue;
 935               case SOLVABLE_SUPPLEMENTS:
 936                 di->idp = s->supplements
 937                     ? di->repo->idarraydata + s->supplements : 0;
 938                 continue;
 939               case SOLVABLE_SUGGESTS:
 940                 di->idp = s->suggests
 941                     ? di->repo->idarraydata + s->suggests : 0;
 942                 continue;
 943               case SOLVABLE_ENHANCES:
 944                 di->idp = s->enhances
 945                     ? di->repo->idarraydata + s->enhances : 0;
 946                 continue;
 947               case SOLVABLE_FRESHENS:
 948                 di->idp = s->freshens
 949                     ? di->repo->idarraydata + s->freshens : 0;
 950                 continue;
 951               case RPM_RPMDBID:
 952                 if (!di->repo->rpmdbid)
 953                   continue;
 954                 di->kv.num = di->repo->rpmdbid[di->solvid - di->repo->start];
 955                 di->kv.eof = 1;
 956                 break;
 957               default:
 958                 di->data = di->repo->repodata - 1;
 959                 di->kv.eof = 1;
 960                 di->state = 0;
 961                 continue;
 962             }
 963         }
 964       else if (di->subkeyp)
 965         {
 966           Id keyid;
 967           if (!di->subnum)
 968             {
 969               /* Send end-of-substruct.  We are here only when we saw a
 970                  _COUNTED key one level up.  Since then we didn't increment
 971                  ->keyp, so it still can be found at keyp[-1].  */
 972               di->kv.eof = 2;
 973               di->key = di->data->keys + di->keyp[-1];
 974               di->subkeyp = 0;
 975             }
 976           else if (!(keyid = *di->subkeyp++))
 977             {
 978               /* Send end-of-element.  See above for keyp[-1].  */
 979               di->kv.eof = 1;
 980               di->key = di->data->keys + di->keyp[-1];
 981               di->subkeyp = di->data->schemadata + di->data->schemata[di->subschema];
 982               di->subnum--;
 983             }
 984           else
 985             {
 986               di->key = di->data->keys + keyid;
 987               di->dp = data_fetch(di->dp, &di->kv, di->key);
 988               if (!di->dp)
 989                 exit(1);
 990             }
 991         }
 992       else
 993         {
 994           if (di->kv.eof)
 995             di->dp = 0;
 996           else
 997             di->dp = data_fetch(di->dp, &di->kv, di->key);
 998
 999           while (!di->dp)
1000             {
1001               Id keyid;
1002               if (di->keyname || !(keyid = *di->keyp++))
1003                 {
1004                   while (1)
1005                     {
1006                       Repo *repo = di->repo;
1007                       Repodata *data = ++di->data;
1008                       if (data >= repo->repodata + repo->nrepodata)
1009                         {
1010                           if (di->flags & __SEARCH_ONESOLVABLE)
1011                             return 0;
1012                           if (di->solvid >= 0)
1013                             {
1014                               while (++di->solvid < repo->end)
1015                                 if (repo->pool->solvables[di->solvid].repo == repo)
1016                                   break;
1017                               if (di->solvid >= repo->end)
1018                                 {
1019                                   if (!(di->flags & SEARCH_EXTRA))
1020                                     goto skiprepo;
1021                                   di->solvid = -1;
1022                                   if (di->solvid < -repo->nextra)
1023                                     goto skiprepo;
1024                                 }
1025                             }
1026                           else
1027                             {
1028                               --di->solvid;
1029                               if (di->solvid < -repo->nextra)
1030                                 {
1031 skiprepo:;
1032                                   Pool *pool = di->repo->pool;
1033                                   if (!(di->flags & SEARCH_ALL_REPOS)
1034                                       || di->repo == pool->repos[pool->nrepos - 1])
1035                                     return 0;
1036                                   int i;
1037                                   for (i = 0; i < pool->nrepos; i++)
1038                                     if (di->repo == pool->repos[i])
1039                                       break;
1040                                   di->repo = pool->repos[i + 1];
1041                                   dataiterator_init(di, di->repo, 0, di->keyname, di->match, di->flags);
1042                                   continue;
1043                                 }
1044                             }
1045                           di->data = repo->repodata - 1;
1046                           if (di->solvid < 0
1047                               || (di->flags & SEARCH_NO_STORAGE_SOLVABLE))
1048                             continue;
1049                           static Id zeroid = 0;
1050                           di->keyp = &zeroid;
1051                           di->state = 1;
1052                           goto restart;
1053                         }
1054                       if ((di->solvid < 0 && (-1 - di->solvid) >= data->extrastart && (-1 - di->solvid) < (data->extrastart + data->nextra))
1055                           || (di->solvid >= 0 && di->solvid >= data->start && di->solvid < data->end))
1056                         {
1057                           dataiterator_newdata(di);
1058                           if (di->nextkeydp)
1059                             break;
1060                         }
1061                     }
1062                 }
1063               else
1064                 {
1065                   di->key = di->data->keys + keyid;
1066                   di->dp = get_data(di->data, di->key, &di->nextkeydp);
1067                 }
1068               di->dp = data_fetch(di->dp, &di->kv, di->key);
1069             }
1070           if (di->key->type == REPOKEY_TYPE_COUNTED)
1071             {
1072               di->subnum = di->kv.num;
1073               di->subschema = di->kv.id;
1074               di->kv.eof = 0;
1075               di->subkeyp = di->data->schemadata + di->data->schemata[di->subschema];
1076             }
1077         }
1078 weg2:
1079       if (!di->match
1080           || dataiterator_match_int(di))
1081         break;
1082     }
1083   return 1;
1084 }
1085
1086 void
1087 dataiterator_skip_attribute(Dataiterator *di)
1088 {
1089   if (di->state)
1090     di->idp = 0;
1091   /* This will make the next _step call to retrieve the next field.  */
1092   di->kv.eof = 1;
1093 }
1094
1095 void
1096 dataiterator_skip_solvable(Dataiterator *di)
1097 {
1098   /* We're done with this field.  */
1099   di->kv.eof = 1;
1100   /* And with solvable data.  */
1101   di->state = 0;
1102   /* And with all keys for this repodata and thing. */
1103   static Id zeroid = 0;
1104   di->keyp = &zeroid;
1105   /* And with all repodatas for this thing.  */
1106   di->data = di->repo->repodata + di->repo->nrepodata - 1;
1107   /* Hence the next call to _step will retrieve the next thing.  */
1108 }
1109
1110 void
1111 dataiterator_skip_repo(Dataiterator *di)
1112 {
1113   dataiterator_skip_solvable(di);
1114   /* We're done with all solvables and all extra things for this repo.  */
1115   di->solvid = -1 - di->repo->nextra;
1116 }
1117
1118 void
1119 dataiterator_jump_to_solvable(Dataiterator *di, Solvable *s)
1120 {
1121   di->repo = s->repo;
1122   /* Simulate us being done with the solvable before the requested one.  */
1123   dataiterator_skip_solvable(di);
1124   di->solvid = s - s->repo->pool->solvables;
1125   di->solvid--;
1126 }
1127
1128 void
1129 dataiterator_jump_to_repo(Dataiterator *di, Repo *repo)
1130 {
1131   di->repo = repo;
1132   dataiterator_skip_solvable(di);
1133   di->solvid = repo->start - 1;
1134 }
1135
1136 /* extend repodata so that it includes solvables p */
1137 void
1138 repodata_extend(Repodata *data, Id p)
1139 {
1140   if (data->start == data->end)
1141     data->start = data->end = p;
1142   if (p >= data->end)
1143     {
1144       int old = data->end - data->start;
1145       int new = p - data->end + 1;
1146       if (data->attrs)
1147         {
1148           data->attrs = sat_extend(data->attrs, old, new, sizeof(Id), REPODATA_BLOCK);
1149           memset(data->attrs + old, 0, new * sizeof(Id));
1150         }
1151       data->incoreoffset = sat_extend(data->incoreoffset, old, new, sizeof(Id), REPODATA_BLOCK);
1152       memset(data->incoreoffset + old, 0, new * sizeof(Id));
1153       data->end = p + 1;
1154     }
1155   if (p < data->start)
1156     {
1157       int old = data->end - data->start;
1158       int new = data->start - p;
1159       if (data->attrs)
1160         {
1161           data->attrs = sat_extend_resize(data->attrs, old + new, sizeof(Id), REPODATA_BLOCK);
1162           memmove(data->attrs + new, data->attrs, old * sizeof(Id));
1163           memset(data->attrs, 0, new * sizeof(Id));
1164         }
1165       data->incoreoffset = sat_extend_resize(data->incoreoffset, old + new, sizeof(Id), REPODATA_BLOCK);
1166       memmove(data->incoreoffset + new, data->incoreoffset, old * sizeof(Id));
1167       memset(data->incoreoffset, 0, new * sizeof(Id));
1168       data->start = p;
1169     }
1170 }
1171
1172 void
1173 repodata_extend_extra(Repodata *data, int nextra)
1174 {
1175   if (nextra <= data->nextra)
1176     return;
1177   if (data->extraattrs)
1178     {
1179       data->extraattrs = sat_extend(data->extraattrs, data->nextra, nextra - data->nextra, sizeof(Id), REPODATA_BLOCK);
1180       memset(data->extraattrs + data->nextra, 0, (nextra - data->nextra) * sizeof (Id));
1181     }
1182   data->extraoffset = sat_extend(data->extraoffset, data->nextra, nextra - data->nextra, sizeof(Id), REPODATA_BLOCK);
1183   memset(data->extraoffset + data->nextra, 0, (nextra - data->nextra) * sizeof(Id));
1184   data->nextra = nextra;
1185 }
1186
1187 void
1188 repodata_extend_block(Repodata *data, Id start, Id num)
1189 {
1190   if (!num)
1191     return;
1192   if (!data->incoreoffset)
1193     {
1194       data->incoreoffset = sat_calloc_block(num, sizeof(Id), REPODATA_BLOCK);
1195       data->start = start;
1196       data->end = start + num;
1197       return;
1198     }
1199   repodata_extend(data, start);
1200   if (num > 1)
1201     repodata_extend(data, start + num - 1);
1202 }
1203
1204 /**********************************************************************/
1205
1206 #define REPODATA_ATTRS_BLOCK 63
1207 #define REPODATA_ATTRDATA_BLOCK 1023
1208 #define REPODATA_ATTRIDDATA_BLOCK 63
1209
1210 static inline Id
1211 get_new_struct(Repodata *data)
1212 {
1213   /* Make sure to never give out struct id 0. */
1214   if (!data->structs)
1215     {
1216       data->structs = sat_extend(0, 0, 2, sizeof(Id *), REPODATA_BLOCK);
1217       data->structs[0] = 0;
1218       data->structs[1] = 0;
1219       data->nstructs = 2;
1220       return 1;
1221     }
1222   data->structs = sat_extend(data->structs, data->nstructs, 1, sizeof(Id *), REPODATA_BLOCK);
1223   data->structs[data->nstructs] = 0;
1224   return data->nstructs++;
1225 }
1226
1227 static Id
1228 repodata_get_handle_int(Repodata *data, Id entry)
1229 {
1230   Id *ap;
1231   if (!data->attrs && entry >= 0)
1232     {
1233       data->attrs = sat_calloc_block(data->end - data->start, sizeof(Id),
1234                                      REPODATA_BLOCK);
1235     }
1236   else if (!data->extraattrs && entry < 0)
1237     data->extraattrs = sat_calloc_block(data->nextra, sizeof(Id), REPODATA_BLOCK);
1238   if (entry < 0)
1239     ap = &data->extraattrs[-1 - entry];
1240   else
1241     ap = &data->attrs[entry];
1242   if (!*ap)
1243     *ap = get_new_struct(data);
1244   return *ap;
1245 }
1246
1247 Id
1248 repodata_get_handle(Repodata *data, Id entry)
1249 {
1250   return repodata_get_handle_int(data, entry);
1251 }
1252
1253 static void
1254 repodata_insert_keyid(Repodata *data, Id handle, Id keyid, Id val, int overwrite)
1255 {
1256   Id *pp;
1257   Id *ap;
1258   int i;
1259   ap = data->structs[handle];
1260   i = 0;
1261   if (ap)
1262     {
1263       for (pp = ap; *pp; pp += 2)
1264         /* Determine equality based on the name only, allows us to change
1265            type (when overwrite is set), and makes TYPE_CONSTANT work.  */
1266         if (data->keys[*pp].name == data->keys[keyid].name)
1267           break;
1268       if (*pp)
1269         {
1270           if (overwrite)
1271             {
1272               pp[0] = keyid;
1273               pp[1] = val;
1274             }
1275           return;
1276         }
1277       i = pp - ap;
1278     }
1279   ap = sat_extend(ap, i, 3, sizeof(Id), REPODATA_ATTRS_BLOCK);
1280   data->structs[handle] = ap;
1281   pp = ap + i;
1282   *pp++ = keyid;
1283   *pp++ = val;
1284   *pp = 0;
1285 }
1286
1287 void
1288 repodata_set(Repodata *data, Id handle, Repokey *key, Id val)
1289 {
1290   Id keyid;
1291
1292   /* find key in keys */
1293   for (keyid = 1; keyid < data->nkeys; keyid++)
1294     if (data->keys[keyid].name == key->name && data->keys[keyid].type == key->type)
1295       {
1296         if ((key->type == REPOKEY_TYPE_CONSTANT || key->type == REPOKEY_TYPE_CONSTANTID) && key->size != data->keys[keyid].size)
1297           continue;
1298         break;
1299       }
1300   if (keyid == data->nkeys)
1301     {
1302       /* allocate new key */
1303       data->keys = sat_realloc2(data->keys, data->nkeys + 1, sizeof(Repokey));
1304       data->keys[data->nkeys++] = *key;
1305       if (data->verticaloffset)
1306         {
1307           data->verticaloffset = sat_realloc2(data->verticaloffset, data->nkeys, sizeof(Id));
1308           data->verticaloffset[data->nkeys - 1] = 0;
1309         }
1310     }
1311   repodata_insert_keyid(data, handle, keyid, val, 1);
1312 }
1313
1314 void
1315 repodata_set_id(Repodata *data, Id handle, Id keyname, Id id)
1316 {
1317   Repokey key;
1318   key.name = keyname;
1319   key.type = REPOKEY_TYPE_ID;
1320   key.size = 0;
1321   key.storage = KEY_STORAGE_INCORE;
1322   repodata_set(data, handle, &key, id);
1323 }
1324
1325 void
1326 repodata_set_num(Repodata *data, Id handle, Id keyname, unsigned int num)
1327 {
1328   Repokey key;
1329   key.name = keyname;
1330   key.type = REPOKEY_TYPE_NUM;
1331   key.size = 0;
1332   key.storage = KEY_STORAGE_INCORE;
1333   repodata_set(data, handle, &key, (Id)num);
1334 }
1335
1336 void
1337 repodata_set_poolstr(Repodata *data, Id handle, Id keyname, const char *str)
1338 {
1339   Repokey key;
1340   Id id;
1341   if (data->localpool)
1342     id = stringpool_str2id(&data->spool, str, 1);
1343   else
1344     id = str2id(data->repo->pool, str, 1);
1345   key.name = keyname;
1346   key.type = REPOKEY_TYPE_ID;
1347   key.size = 0;
1348   key.storage = KEY_STORAGE_INCORE;
1349   repodata_set(data, handle, &key, id);
1350 }
1351
1352 void
1353 repodata_set_constant(Repodata *data, Id handle, Id keyname, unsigned int constant)
1354 {
1355   Repokey key;
1356   key.name = keyname;
1357   key.type = REPOKEY_TYPE_CONSTANT;
1358   key.size = constant;
1359   key.storage = KEY_STORAGE_INCORE;
1360   repodata_set(data, handle, &key, 0);
1361 }
1362
1363 void
1364 repodata_set_constantid(Repodata *data, Id handle, Id keyname, Id id)
1365 {
1366   Repokey key;
1367   key.name = keyname;
1368   key.type = REPOKEY_TYPE_CONSTANTID;
1369   key.size = id;
1370   key.storage = KEY_STORAGE_INCORE;
1371   repodata_set(data, handle, &key, 0);
1372 }
1373
1374 void
1375 repodata_set_void(Repodata *data, Id handle, Id keyname)
1376 {
1377   Repokey key;
1378   key.name = keyname;
1379   key.type = REPOKEY_TYPE_VOID;
1380   key.size = 0;
1381   key.storage = KEY_STORAGE_INCORE;
1382   repodata_set(data, handle, &key, 0);
1383 }
1384
1385 void
1386 repodata_set_str(Repodata *data, Id handle, Id keyname, const char *str)
1387 {
1388   Repokey key;
1389   int l;
1390
1391   l = strlen(str) + 1;
1392   key.name = keyname;
1393   key.type = REPOKEY_TYPE_STR;
1394   key.size = 0;
1395   key.storage = KEY_STORAGE_INCORE;
1396   data->attrdata = sat_extend(data->attrdata, data->attrdatalen, l, 1, REPODATA_ATTRDATA_BLOCK);
1397   memcpy(data->attrdata + data->attrdatalen, str, l);
1398   repodata_set(data, handle, &key, data->attrdatalen);
1399   data->attrdatalen += l;
1400 }
1401
1402 static void
1403 repodata_add_array(Repodata *data, Id handle, Id keyname, Id keytype, int entrysize)
1404 {
1405   int oldsize;
1406   Id *ida, *pp;
1407
1408   if (handle == data->lasthandle && data->keys[data->lastkey].name == keyname && data->keys[data->lastkey].type == keytype && data->attriddatalen == data->lastdatalen)
1409     {
1410       /* great! just append the new data */
1411       data->attriddata = sat_extend(data->attriddata, data->attriddatalen, entrysize, sizeof(Id), REPODATA_ATTRIDDATA_BLOCK);
1412       data->attriddatalen--;    /* overwrite terminating 0  */
1413       data->lastdatalen += entrysize;
1414       return;
1415     }
1416   pp = data->structs[handle];
1417   if (pp)
1418     for (; *pp; pp += 2)
1419       if (data->keys[*pp].name == keyname && data->keys[*pp].type == keytype)
1420         break;
1421   if (!pp || !*pp)
1422     {
1423       /* not found. allocate new key */
1424       Repokey key;
1425       key.name = keyname;
1426       key.type = keytype;
1427       key.size = 0;
1428       key.storage = KEY_STORAGE_INCORE;
1429       data->attriddata = sat_extend(data->attriddata, data->attriddatalen, entrysize + 1, sizeof(Id), REPODATA_ATTRIDDATA_BLOCK);
1430       repodata_set(data, handle, &key, data->attriddatalen);
1431       data->lasthandle = 0;     /* next time... */
1432       return;
1433     }
1434   oldsize = 0;
1435   for (ida = data->attriddata + pp[1]; *ida; ida += entrysize)
1436     oldsize += entrysize;
1437   if (ida + 1 == data->attriddata + data->attriddatalen)
1438     {
1439       /* this was the last entry, just append it */
1440       data->attriddata = sat_extend(data->attriddata, data->attriddatalen, entrysize, sizeof(Id), REPODATA_ATTRIDDATA_BLOCK);
1441       data->attriddatalen--;    /* overwrite terminating 0  */
1442     }
1443   else
1444     {
1445       /* too bad. move to back. */
1446       data->attriddata = sat_extend(data->attriddata, data->attriddatalen,  oldsize + entrysize + 1, sizeof(Id), REPODATA_ATTRIDDATA_BLOCK);
1447       memcpy(data->attriddata + data->attriddatalen, data->attriddata + pp[1], oldsize * sizeof(Id));
1448       pp[1] = data->attriddatalen;
1449       data->attriddatalen += oldsize;
1450     }
1451   data->lasthandle = handle;
1452   data->lastkey = *pp;
1453   data->lastdatalen = data->attriddatalen + entrysize + 1;
1454 }
1455
1456 static inline int
1457 checksumtype2len(Id type)
1458 {
1459   switch (type)
1460     {
1461     case REPOKEY_TYPE_MD5:
1462       return SIZEOF_MD5;
1463     case REPOKEY_TYPE_SHA1:
1464       return SIZEOF_SHA1;
1465     case REPOKEY_TYPE_SHA256:
1466       return SIZEOF_SHA256;
1467     default:
1468       return 0;
1469     }
1470 }
1471
1472 void
1473 repodata_set_bin_checksum(Repodata *data, Id handle, Id keyname, Id type,
1474                       const unsigned char *str)
1475 {
1476   Repokey key;
1477   int l = checksumtype2len(type);
1478
1479   if (!l)
1480     return;
1481   key.name = keyname;
1482   key.type = type;
1483   key.size = 0;
1484   key.storage = KEY_STORAGE_INCORE;
1485   data->attrdata = sat_extend(data->attrdata, data->attrdatalen, l, 1, REPODATA_ATTRDATA_BLOCK);
1486   memcpy(data->attrdata + data->attrdatalen, str, l);
1487   repodata_set(data, handle, &key, data->attrdatalen);
1488   data->attrdatalen += l;
1489 }
1490
1491 static int
1492 hexstr2bytes(unsigned char *buf, const char *str, int buflen)
1493 {
1494   int i;
1495   for (i = 0; i < buflen; i++)
1496     {
1497 #define c2h(c) (((c)>='0' && (c)<='9') ? ((c)-'0')      \
1498                 : ((c)>='a' && (c)<='f') ? ((c)-'a'+10) \
1499                 : ((c)>='A' && (c)<='F') ? ((c)-'A'+10) \
1500                 : -1)
1501       int v = c2h(*str);
1502       str++;
1503       if (v < 0)
1504         return 0;
1505       buf[i] = v;
1506       v = c2h(*str);
1507       str++;
1508       if (v < 0)
1509         return 0;
1510       buf[i] = (buf[i] << 4) | v;
1511 #undef c2h
1512     }
1513   return buflen;
1514 }
1515
1516 void
1517 repodata_set_checksum(Repodata *data, Id handle, Id keyname, Id type,
1518                       const char *str)
1519 {
1520   unsigned char buf[64];
1521   int l = checksumtype2len(type);
1522
1523   if (!l)
1524     return;
1525   if (hexstr2bytes(buf, str, l) != l)
1526     {
1527       fprintf(stderr, "Invalid hex character in '%s'\n", str);
1528       return;
1529     }
1530   repodata_set_bin_checksum(data, handle, keyname, type, buf);
1531 }
1532
1533 const char *
1534 repodata_chk2str(Repodata *data, Id type, const unsigned char *buf)
1535 {
1536   int i, l;
1537   char *str, *s;
1538
1539   l = checksumtype2len(type);
1540   if (!l)
1541     return "";
1542   s = str = pool_alloctmpspace(data->repo->pool, 2 * l + 1);
1543   for (i = 0; i < l; i++)
1544     {
1545       unsigned char v = buf[i];
1546       unsigned char w = v >> 4;
1547       *s++ = w >= 10 ? w + ('a' - 10) : w + '0';
1548       w = v & 15;
1549       *s++ = w >= 10 ? w + ('a' - 10) : w + '0';
1550     }
1551   *s = 0;
1552   return str;
1553 }
1554
1555 Id
1556 repodata_globalize_id(Repodata *data, Id id)
1557 {
1558   if (!data || !data->localpool)
1559     return id;
1560   return str2id(data->repo->pool, stringpool_id2str(&data->spool, id), 1);
1561 }
1562
1563 void
1564 repodata_add_dirnumnum(Repodata *data, Id handle, Id keyname, Id dir, Id num, Id num2)
1565 {
1566   assert(dir);
1567 #if 0
1568 fprintf(stderr, "repodata_add_dirnumnum %d %d %d %d (%d)\n", handle, dir, num, num2, data->attriddatalen);
1569 #endif
1570   repodata_add_array(data, handle, keyname, REPOKEY_TYPE_DIRNUMNUMARRAY, 3);
1571   data->attriddata[data->attriddatalen++] = dir;
1572   data->attriddata[data->attriddatalen++] = num;
1573   data->attriddata[data->attriddatalen++] = num2;
1574   data->attriddata[data->attriddatalen++] = 0;
1575 }
1576
1577 void
1578 repodata_add_dirstr(Repodata *data, Id handle, Id keyname, Id dir, const char *str)
1579 {
1580   Id stroff;
1581   int l;
1582
1583   assert(dir);
1584   l = strlen(str) + 1;
1585   data->attrdata = sat_extend(data->attrdata, data->attrdatalen, l, 1, REPODATA_ATTRDATA_BLOCK);
1586   memcpy(data->attrdata + data->attrdatalen, str, l);
1587   stroff = data->attrdatalen;
1588   data->attrdatalen += l;
1589
1590 #if 0
1591 fprintf(stderr, "repodata_add_dirstr %d %d %s (%d)\n", handle, dir, str,  data->attriddatalen);
1592 #endif
1593   repodata_add_array(data, handle, keyname, REPOKEY_TYPE_DIRSTRARRAY, 2);
1594   data->attriddata[data->attriddatalen++] = dir;
1595   data->attriddata[data->attriddatalen++] = stroff;
1596   data->attriddata[data->attriddatalen++] = 0;
1597 }
1598
1599 void
1600 repodata_add_idarray(Repodata *data, Id handle, Id keyname, Id id)
1601 {
1602 #if 0
1603 fprintf(stderr, "repodata_add_idarray %d %d (%d)\n", handle, id, data->attriddatalen);
1604 #endif
1605   repodata_add_array(data, handle, keyname, REPOKEY_TYPE_IDARRAY, 1);
1606   data->attriddata[data->attriddatalen++] = id;
1607   data->attriddata[data->attriddatalen++] = 0;
1608 }
1609
1610 void
1611 repodata_add_poolstr_array(Repodata *data, Id handle, Id keyname,
1612                            const char *str)
1613 {
1614   Id id;
1615   if (data->localpool)
1616     id = stringpool_str2id(&data->spool, str, 1);
1617   else
1618     id = str2id(data->repo->pool, str, 1);
1619   repodata_add_idarray(data, handle, keyname, id);
1620 }
1621
1622 Id
1623 repodata_create_struct(Repodata *data, Id handle, Id keyname)
1624 {
1625   Id newhandle = get_new_struct(data);
1626   repodata_add_array(data, handle, keyname, REPOKEY_TYPE_COUNTED, 1);
1627   data->attriddata[data->attriddatalen++] = newhandle;
1628   data->attriddata[data->attriddatalen++] = 0;
1629   return newhandle;
1630 }
1631
1632 void
1633 repodata_merge_attrs(Repodata *data, Id dest, Id src)
1634 {
1635   Id *keyp;
1636   if (dest == src
1637       || !(keyp = data->structs[src < 0
1638                                 ? data->extraattrs[-1 - src]
1639                                 : data->attrs[src]]))
1640     return;
1641   dest = repodata_get_handle_int(data, dest);
1642   for (; *keyp; keyp += 2)
1643     repodata_insert_keyid(data, dest, keyp[0], keyp[1], 0);
1644 }
1645
1646 /*********************************/
1647
1648 /* unify with repo_write! */
1649
1650 #define EXTDATA_BLOCK 1023
1651 #define SCHEMATA_BLOCK 31
1652 #define SCHEMATADATA_BLOCK 255
1653
1654 struct extdata {
1655   unsigned char *buf;
1656   int len;
1657 };
1658
1659 static void
1660 data_addid(struct extdata *xd, Id x)
1661 {
1662   unsigned char *dp;
1663   xd->buf = sat_extend(xd->buf, xd->len, 5, 1, EXTDATA_BLOCK);
1664   dp = xd->buf + xd->len;
1665
1666   if (x >= (1 << 14))
1667     {
1668       if (x >= (1 << 28))
1669         *dp++ = (x >> 28) | 128;
1670       if (x >= (1 << 21))
1671         *dp++ = (x >> 21) | 128;
1672       *dp++ = (x >> 14) | 128;
1673     }
1674   if (x >= (1 << 7))
1675     *dp++ = (x >> 7) | 128;
1676   *dp++ = x & 127;
1677   xd->len = dp - xd->buf;
1678 }
1679
1680 static void
1681 data_addideof(struct extdata *xd, Id x, int eof)
1682 {
1683   if (x >= 64)
1684     x = (x & 63) | ((x & ~63) << 1);
1685   data_addid(xd, (eof ? x: x | 64));
1686 }
1687
1688 static void
1689 data_addblob(struct extdata *xd, unsigned char *blob, int len)
1690 {
1691   xd->buf = sat_extend(xd->buf, xd->len, len, 1, EXTDATA_BLOCK);
1692   memcpy(xd->buf + xd->len, blob, len);
1693   xd->len += len;
1694 }
1695
1696 /*********************************/
1697
1698 static void
1699 addschema_prepare(Repodata *data, Id *schematacache)
1700 {
1701   int h, len, i;
1702   Id *sp;
1703
1704   memset(schematacache, 0, 256 * sizeof(Id));
1705   for (i = 0; i < data->nschemata; i++)
1706     {
1707       for (sp = data->schemadata + data->schemata[i], h = 0; *sp; len++)
1708         h = h * 7 + *sp++;
1709       h &= 255;
1710       schematacache[h] = i + 1;
1711     }
1712   data->schemadata = sat_extend_resize(data->schemadata, data->schemadatalen, sizeof(Id), SCHEMATADATA_BLOCK);
1713   data->schemata = sat_extend_resize(data->schemata, data->nschemata, sizeof(Id), SCHEMATA_BLOCK);
1714 }
1715
1716 static Id
1717 addschema(Repodata *data, Id *schema, Id *schematacache)
1718 {
1719   int h, len;
1720   Id *sp, cid;
1721
1722   for (sp = schema, len = 0, h = 0; *sp; len++)
1723     h = h * 7 + *sp++;
1724   h &= 255;
1725   len++;
1726
1727   cid = schematacache[h];
1728   if (cid)
1729     {
1730       cid--;
1731       if (!memcmp(data->schemadata + data->schemata[cid], schema, len * sizeof(Id)))
1732         return cid;
1733       /* cache conflict */
1734       for (cid = 0; cid < data->nschemata; cid++)
1735         if (!memcmp(data->schemadata + data->schemata[cid], schema, len * sizeof(Id)))
1736           return cid;
1737     }
1738   /* a new one. make room. */
1739   data->schemadata = sat_extend(data->schemadata, data->schemadatalen, len, sizeof(Id), SCHEMATADATA_BLOCK);
1740   data->schemata = sat_extend(data->schemata, data->nschemata, 1, sizeof(Id), SCHEMATA_BLOCK);
1741   /* add schema */
1742   memcpy(data->schemadata + data->schemadatalen, schema, len * sizeof(Id));
1743   data->schemata[data->nschemata] = data->schemadatalen;
1744   data->schemadatalen += len;
1745   schematacache[h] = data->nschemata + 1;
1746 #if 0
1747 fprintf(stderr, "addschema: new schema\n");
1748 #endif
1749   return data->nschemata++;
1750 }
1751
1752 static void
1753 repodata_serialize_key(Repodata *data, struct extdata *newincore,
1754                        struct extdata *newvincore,
1755                        Id *schema, Id *schematacache,
1756                        Repokey *key, Id val)
1757 {
1758   /* Otherwise we have a new value.  Parse it into the internal
1759      form.  */
1760   Id *ida;
1761   struct extdata *xd;
1762   unsigned int oldvincorelen = 0;
1763   Id schemaid, *sp;
1764
1765   xd = newincore;
1766   if (key->storage == KEY_STORAGE_VERTICAL_OFFSET)
1767     {
1768       xd = newvincore;
1769       oldvincorelen = xd->len;
1770     }
1771   switch (key->type)
1772     {
1773       case REPOKEY_TYPE_VOID:
1774       case REPOKEY_TYPE_CONSTANT:
1775       case REPOKEY_TYPE_CONSTANTID:
1776         break;
1777       case REPOKEY_TYPE_STR:
1778         data_addblob(xd, data->attrdata + val, strlen((char *)(data->attrdata + val)) + 1);
1779         break;
1780       case REPOKEY_TYPE_MD5:
1781         data_addblob(xd, data->attrdata + val, SIZEOF_MD5);
1782         break;
1783       case REPOKEY_TYPE_SHA1:
1784         data_addblob(xd, data->attrdata + val, SIZEOF_SHA1);
1785         break;
1786       case REPOKEY_TYPE_ID:
1787       case REPOKEY_TYPE_NUM:
1788       case REPOKEY_TYPE_DIR:
1789         data_addid(xd, val);
1790         break;
1791       case REPOKEY_TYPE_IDARRAY:
1792         for (ida = data->attriddata + val; *ida; ida++)
1793           data_addideof(xd, ida[0], ida[1] ? 0 : 1);
1794         break;
1795       case REPOKEY_TYPE_DIRNUMNUMARRAY:
1796         for (ida = data->attriddata + val; *ida; ida += 3)
1797           {
1798             data_addid(xd, ida[0]);
1799             data_addid(xd, ida[1]);
1800             data_addideof(xd, ida[2], ida[3] ? 0 : 1);
1801           }
1802         break;
1803       case REPOKEY_TYPE_DIRSTRARRAY:
1804         for (ida = data->attriddata + val; *ida; ida += 2)
1805           {
1806             data_addideof(xd, ida[0], ida[2] ? 0 : 1);
1807             data_addblob(xd, data->attrdata + ida[1], strlen((char *)(data->attrdata + ida[1])) + 1);
1808           }
1809         break;
1810       case REPOKEY_TYPE_COUNTED:
1811         {
1812           int num = 0;
1813           schemaid = 0;
1814           for (ida = data->attriddata + val; *ida; ida++)
1815             {
1816 #if 0
1817               fprintf(stderr, "serialize struct %d\n", *ida);
1818 #endif
1819               sp = schema;
1820               Id *kp = data->structs[*ida];
1821               if (!kp)
1822                 continue;
1823               num++;
1824               for (;*kp; kp += 2)
1825                 {
1826 #if 0
1827                   fprintf(stderr, "  %s:%d\n", id2str(data->repo->pool, data->keys[*kp].name), kp[1]);
1828 #endif
1829                   *sp++ = *kp;
1830                 }
1831               *sp = 0;
1832               if (!schemaid)
1833                 schemaid = addschema(data, schema, schematacache);
1834               else if (schemaid != addschema(data, schema, schematacache))
1835                 {
1836                   fprintf(stderr, "  not yet implemented: substructs with different schemas\n");
1837                   exit(1);
1838                 }
1839 #if 0
1840               fprintf(stderr, "  schema %d\n", schemaid);
1841 #endif
1842             }
1843           if (!num)
1844             break;
1845           data_addid(xd, num);
1846           data_addid(xd, schemaid);
1847           for (ida = data->attriddata + val; *ida; ida++)
1848             {
1849               Id *kp = data->structs[*ida];
1850               if (!kp)
1851                 continue;
1852               for (;*kp; kp += 2)
1853                 {
1854                   repodata_serialize_key(data, newincore, newvincore,
1855                                          schema, schematacache,
1856                                          data->keys + *kp, kp[1]);
1857                 }
1858             }
1859           break;
1860         }
1861       default:
1862         fprintf(stderr, "don't know how to handle type %d\n", key->type);
1863         exit(1);
1864     }
1865   if (key->storage == KEY_STORAGE_VERTICAL_OFFSET)
1866     {
1867       /* put offset/len in incore */
1868       data_addid(newincore, data->lastverticaloffset + oldvincorelen);
1869       oldvincorelen = xd->len - oldvincorelen;
1870       data_addid(newincore, oldvincorelen);
1871     }
1872 }
1873
1874 void
1875 repodata_internalize(Repodata *data)
1876 {
1877   Repokey *key;
1878   Id entry, nentry;
1879   Id schematacache[256];
1880   Id schemaid, *schema, *sp, oldschema, *keyp, *seen;
1881   unsigned char *dp, *ndp;
1882   int newschema, oldcount;
1883   struct extdata newincore;
1884   struct extdata newvincore;
1885
1886   if (!data->attrs && !data->extraattrs)
1887     return;
1888
1889   newvincore.buf = data->vincore;
1890   newvincore.len = data->vincorelen;
1891
1892   schema = sat_malloc2(data->nkeys, sizeof(Id));
1893   seen = sat_malloc2(data->nkeys, sizeof(Id));
1894
1895   /* Merge the data already existing (in data->schemata, ->incoredata and
1896      friends) with the new attributes in data->attrs[].  */
1897   nentry = data->end - data->start;
1898   addschema_prepare(data, schematacache);
1899   memset(&newincore, 0, sizeof(newincore));
1900   data_addid(&newincore, 0);
1901   if (!data->attrs)
1902     nentry = 0;
1903   for (entry = data->extraattrs ? -data->nextra : 0; entry < nentry; entry++)
1904     {
1905       Id handle;
1906       memset(seen, 0, data->nkeys * sizeof(Id));
1907       sp = schema;
1908       dp = entry2data(data, entry);
1909       if (data->incoredata)
1910         dp = data_read_id(dp, &oldschema);
1911       else
1912         oldschema = 0;
1913 #if 0
1914 fprintf(stderr, "oldschema %d\n", oldschema);
1915 fprintf(stderr, "schemata %d\n", data->schemata[oldschema]);
1916 fprintf(stderr, "schemadata %p\n", data->schemadata);
1917 #endif
1918       /* seen: -1: old data  0: skipped  >0: id + 1 */
1919       newschema = 0;
1920       oldcount = 0;
1921       for (keyp = data->schemadata + data->schemata[oldschema]; *keyp; keyp++)
1922         {
1923           if (seen[*keyp])
1924             {
1925               fprintf(stderr, "Inconsistent old data (key occured twice).\n");
1926               exit(1);
1927             }
1928           seen[*keyp] = -1;
1929           *sp++ = *keyp;
1930           oldcount++;
1931         }
1932       handle = entry < 0 ? data->extraattrs[-1 - entry] : data->attrs[entry];
1933       keyp = data->structs[handle];
1934       if (keyp)
1935         for (; *keyp; keyp += 2)
1936           {
1937             if (!seen[*keyp])
1938               {
1939                 newschema = 1;
1940                 *sp++ = *keyp;
1941               }
1942             seen[*keyp] = keyp[1] + 1;
1943           }
1944       *sp++ = 0;
1945       if (newschema)
1946         /* Ideally we'd like to sort the new schema here, to ensure
1947            schema equality independend of the ordering.  We can't do that
1948            yet.  For once see below (old ids need to come before new ids).
1949            An additional difficulty is that we also need to move
1950            the values with the keys.  */
1951         schemaid = addschema(data, schema, schematacache);
1952       else
1953         schemaid = oldschema;
1954
1955
1956       /* Now create data blob.  We walk through the (possibly new) schema
1957          and either copy over old data, or insert the new.  */
1958       /* XXX Here we rely on the fact that the (new) schema has the form
1959          o1 o2 o3 o4 ... | n1 n2 n3 ...
1960          (oX being the old keyids (possibly overwritten), and nX being
1961           the new keyids).  This rules out sorting the keyids in order
1962          to ensure a small schema count.  */
1963       if (entry < 0)
1964         data->extraoffset[-1 - entry] = newincore.len;
1965       else
1966         data->incoreoffset[entry] = newincore.len;
1967       data_addid(&newincore, schemaid);
1968       for (keyp = data->schemadata + data->schemata[schemaid]; *keyp; keyp++)
1969         {
1970           key = data->keys + *keyp;
1971 #if 0
1972           fprintf(stderr, "internalize %d:%s:%s\n", entry, id2str(data->repo->pool, key->name), id2str(data->repo->pool, key->type));
1973 #endif
1974           ndp = dp;
1975           if (oldcount)
1976             {
1977               /* Skip the data associated with this old key.  */
1978               if (key->storage == KEY_STORAGE_VERTICAL_OFFSET)
1979                 {
1980                   ndp = data_skip(dp, REPOKEY_TYPE_ID);
1981                   ndp = data_skip(ndp, REPOKEY_TYPE_ID);
1982                 }
1983               else if (key->storage == KEY_STORAGE_INCORE)
1984                 ndp = data_skip_recursive(data, dp, key);
1985               oldcount--;
1986             }
1987           if (seen[*keyp] == -1)
1988             {
1989               /* If this key was an old one _and_ was not overwritten with
1990                  a different value copy over the old value (we skipped it
1991                  above).  */
1992               if (dp != ndp)
1993                 data_addblob(&newincore, dp, ndp - dp);
1994               seen[*keyp] = 0;
1995             }
1996           else if (seen[*keyp])
1997             {
1998               /* Otherwise we have a new value.  Parse it into the internal
1999                  form.  */
2000               repodata_serialize_key(data, &newincore, &newvincore,
2001                                      schema, schematacache,
2002                                      key, seen[*keyp] - 1);
2003             }
2004           dp = ndp;
2005         }
2006       if (data->structs[handle])
2007         data->structs[handle] = sat_free(data->structs[handle]);
2008     }
2009   for (entry = 0; entry < data->nstructs; entry++)
2010     if (data->structs[entry])
2011       sat_free(data->structs[entry]);
2012   data->structs = sat_free(data->structs);
2013   data->lasthandle = 0;
2014   data->lastkey = 0;
2015   data->lastdatalen = 0;
2016   sat_free(schema);
2017   sat_free(seen);
2018
2019   sat_free(data->incoredata);
2020   data->incoredata = newincore.buf;
2021   data->incoredatalen = newincore.len;
2022   data->incoredatafree = 0;
2023
2024   sat_free(data->vincore);
2025   data->vincore = newvincore.buf;
2026   data->vincorelen = newvincore.len;
2027
2028   data->attrs = sat_free(data->attrs);
2029   data->extraattrs = sat_free(data->extraattrs);
2030   data->attrdata = sat_free(data->attrdata);
2031   data->attriddata = sat_free(data->attriddata);
2032   data->attrdatalen = 0;
2033   data->attriddatalen = 0;
2034 }
2035
2036 Id
2037 repodata_str2dir(Repodata *data, const char *dir, int create)
2038 {
2039   Id id, parent;
2040   const char *dire;
2041
2042   parent = 0;
2043   while (*dir == '/' && dir[1] == '/')
2044     dir++;
2045   if (*dir == '/' && !dir[1])
2046     return 1;
2047   while (*dir)
2048     {
2049       dire = strchrnul(dir, '/');
2050       if (data->localpool)
2051         id = stringpool_strn2id(&data->spool, dir, dire - dir, create);
2052       else
2053         id = strn2id(data->repo->pool, dir, dire - dir, create);
2054       if (!id)
2055         return 0;
2056       parent = dirpool_add_dir(&data->dirpool, parent, id, create);
2057       if (!parent)
2058         return 0;
2059       if (!*dire)
2060         break;
2061       dir = dire + 1;
2062       while (*dir == '/')
2063         dir++;
2064     }
2065   return parent;
2066 }
2067
2068 const char *
2069 repodata_dir2str(Repodata *data, Id did, const char *suf)
2070 {
2071   Pool *pool = data->repo->pool;
2072   int l = 0;
2073   Id parent, comp;
2074   const char *comps;
2075   char *p;
2076
2077   if (!did)
2078     return suf ? suf : "";
2079   parent = did;
2080   while (parent)
2081     {
2082       comp = dirpool_compid(&data->dirpool, parent);
2083       comps = stringpool_id2str(data->localpool ? &data->spool : &pool->ss, comp);
2084       l += strlen(comps);
2085       parent = dirpool_parent(&data->dirpool, parent);
2086       if (parent)
2087         l++;
2088     }
2089   if (suf)
2090     l += strlen(suf) + 1;
2091   p = pool_alloctmpspace(pool, l + 1) + l;
2092   *p = 0;
2093   if (suf)
2094     {
2095       p -= strlen(suf);
2096       strcpy(p, suf);
2097       *--p = '/';
2098     }
2099   parent = did;
2100   while (parent)
2101     {
2102       comp = dirpool_compid(&data->dirpool, parent);
2103       comps = stringpool_id2str(data->localpool ? &data->spool : &pool->ss, comp);
2104       l = strlen(comps);
2105       p -= l;
2106       strncpy(p, comps, l);
2107       parent = dirpool_parent(&data->dirpool, parent);
2108       if (parent)
2109         *--p = '/';
2110     }
2111   return p;
2112 }
2113
2114 unsigned int
2115 repodata_compress_page(unsigned char *page, unsigned int len, unsigned char *cpage, unsigned int max)
2116 {
2117   return compress_buf(page, len, cpage, max);
2118 }
2119
2120 #define SOLV_ERROR_EOF              3
2121
2122 static inline unsigned int
2123 read_u32(FILE *fp)
2124 {
2125   int c, i;
2126   unsigned int x = 0;
2127
2128   for (i = 0; i < 4; i++)
2129     {
2130       c = getc(fp);
2131       if (c == EOF)
2132         return 0;
2133       x = (x << 8) | c;
2134     }
2135   return x;
2136 }
2137
2138 #define SOLV_ERROR_EOF          3
2139 #define SOLV_ERROR_CORRUPT      6
2140
2141 /* Try to either setup on-demand paging (using FP as backing
2142    file), or in case that doesn't work (FP not seekable) slurps in
2143    all pages and deactivates paging.  */
2144 void
2145 repodata_read_or_setup_pages(Repodata *data, unsigned int pagesz, unsigned int blobsz)
2146 {
2147   FILE *fp = data->fp;
2148   unsigned int npages;
2149   unsigned int i;
2150   unsigned int can_seek;
2151   long cur_file_ofs;
2152   unsigned char buf[BLOB_PAGESIZE];
2153
2154   if (pagesz != BLOB_PAGESIZE)
2155     {
2156       /* We could handle this by slurping in everything.  */
2157       data->error = SOLV_ERROR_CORRUPT;
2158       return;
2159     }
2160   can_seek = 1;
2161   if ((cur_file_ofs = ftell(fp)) < 0)
2162     can_seek = 0;
2163   clearerr(fp);
2164   if (can_seek)
2165     data->pagefd = dup(fileno(fp));
2166   if (data->pagefd == -1)
2167     can_seek = 0;
2168
2169 #ifdef DEBUG_PAGING
2170   fprintf (stderr, "can %sseek\n", can_seek ? "" : "NOT ");
2171 #endif
2172   npages = (blobsz + BLOB_PAGESIZE - 1) / BLOB_PAGESIZE;
2173
2174   data->num_pages = npages;
2175   data->pages = sat_malloc2(npages, sizeof(data->pages[0]));
2176
2177   /* If we can't seek on our input we have to slurp in everything.  */
2178   if (!can_seek)
2179     data->blob_store = sat_malloc(npages * BLOB_PAGESIZE);
2180   for (i = 0; i < npages; i++)
2181     {
2182       unsigned int in_len = read_u32(fp);
2183       unsigned int compressed = in_len & 1;
2184       Attrblobpage *p = data->pages + i;
2185       in_len >>= 1;
2186 #ifdef DEBUG_PAGING
2187       fprintf (stderr, "page %d: len %d (%scompressed)\n",
2188                i, in_len, compressed ? "" : "not ");
2189 #endif
2190       if (can_seek)
2191         {
2192           cur_file_ofs += 4;
2193           p->mapped_at = -1;
2194           p->file_offset = cur_file_ofs;
2195           p->file_size = in_len * 2 + compressed;
2196           if (fseek(fp, in_len, SEEK_CUR) < 0)
2197             {
2198               perror ("fseek");
2199               fprintf (stderr, "can't seek after we thought we can\n");
2200               /* We can't fall back to non-seeking behaviour as we already
2201                  read over some data pages without storing them away.  */
2202               data->error = SOLV_ERROR_EOF;
2203               close(data->pagefd);
2204               data->pagefd = -1;
2205               return;
2206             }
2207           cur_file_ofs += in_len;
2208         }
2209       else
2210         {
2211           unsigned int out_len;
2212           void *dest = data->blob_store + i * BLOB_PAGESIZE;
2213           p->mapped_at = i * BLOB_PAGESIZE;
2214           p->file_offset = 0;
2215           p->file_size = 0;
2216           /* We can't seek, so suck everything in.  */
2217           if (fread(compressed ? buf : dest, in_len, 1, fp) != 1)
2218             {
2219               perror("fread");
2220               data->error = SOLV_ERROR_EOF;
2221               return;
2222             }
2223           if (compressed)
2224             {
2225               out_len = unchecked_decompress_buf(buf, in_len, dest, BLOB_PAGESIZE);
2226               if (out_len != BLOB_PAGESIZE && i < npages - 1)
2227                 {
2228                   data->error = SOLV_ERROR_CORRUPT;
2229                   return;
2230                 }
2231             }
2232         }
2233     }
2234 }
2235
2236 void
2237 repodata_disable_paging(Repodata *data)
2238 {
2239   if (maybe_load_repodata(data, 0)
2240       && data->num_pages)
2241     load_page_range (data, 0, data->num_pages - 1);
2242 }
2243 /*
2244 vim:cinoptions={.5s,g0,p5,t0,(0,^-0.5s,n-0.5s:tw=78:cindent:sw=4:
2245 */