src/repodata.c

   1 /*
   2  * Copyright (c) 2007, Novell Inc.
   3  *
   4  * This program is licensed under the BSD license, read LICENSE.BSD
   5  * for further information
   6  */
   7
   8 /*
   9  * repodata.c
  10  *
  11  * Manage data coming from one repository
  12  *
  13  */
  14
  15 #define _GNU_SOURCE
  16 #include <string.h>
  17 #include <fnmatch.h>
  18
  19 #include <stdio.h>
  20 #include <stdlib.h>
  21 #include <unistd.h>
  22 #include <assert.h>
  23
  24 #include "repo.h"
  25 #include "pool.h"
  26 #include "poolid_private.h"
  27 #include "util.h"
  28
  29 #include "repopack.h"
  30
  31 extern unsigned int compress_buf (const unsigned char *in, unsigned int in_len,
  32                                   unsigned char *out, unsigned int out_len);
  33 extern unsigned int unchecked_decompress_buf (const unsigned char *in,
  34                                               unsigned int in_len,
  35                                               unsigned char *out,
  36                                               unsigned int out_len);
  37
  38 #define REPODATA_BLOCK 255
  39
  40
  41 void
  42 repodata_init(Repodata *data, Repo *repo, int localpool)
  43 {
  44   memset(data, 0, sizeof (*data));
  45   data->repo = repo;
  46   data->localpool = localpool;
  47   if (localpool)
  48     stringpool_init_empty(&data->spool);
  49   data->keys = sat_calloc(1, sizeof(Repokey));
  50   data->nkeys = 1;
  51   data->schemata = sat_calloc(1, sizeof(Id));
  52   data->schemadata = sat_calloc(1, sizeof(Id));
  53   data->nschemata = 1;
  54   data->schemadatalen = 1;
  55   data->start = repo->start;
  56   data->end = repo->end;
  57   data->nextra = repo->nextra;
  58   data->extrastart = 0;
  59   data->incoreoffset = sat_extend_resize(0, data->end - data->start, sizeof(Id), REPODATA_BLOCK);
  60   data->extraoffset = sat_extend_resize(0, repo->nextra, sizeof(Id), REPODATA_BLOCK);
  61   data->pagefd = -1;
  62 }
  63
  64 void
  65 repodata_free(Repodata *data)
  66 {
  67   sat_free(data->keys);
  68   sat_free(data->schemata);
  69   sat_free(data->schemadata);
  70
  71   sat_free(data->spool.strings);
  72   sat_free(data->spool.stringspace);
  73   sat_free(data->spool.stringhashtbl);
  74
  75   sat_free(data->dirpool.dirs);
  76   sat_free(data->dirpool.dirtraverse);
  77
  78   sat_free(data->incoredata);
  79   sat_free(data->incoreoffset);
  80   sat_free(data->extraoffset);
  81   sat_free(data->verticaloffset);
  82
  83   sat_free(data->blob_store);
  84   sat_free(data->pages);
  85   sat_free(data->mapped);
  86
  87   sat_free(data->vincore);
  88
  89   sat_free(data->attrs);
  90   sat_free(data->extraattrs);
  91   sat_free(data->attrdata);
  92   sat_free(data->attriddata);
  93
  94   sat_free(data->location);
  95   sat_free(data->addedfileprovides);
  96
  97   if (data->pagefd != -1)
  98     close(data->pagefd);
  99 }
 100
 101 unsigned char *
 102 data_skip_recursive(Repodata *data, unsigned char *dp, Repokey *key)
 103 {
 104   KeyValue kv;
 105   if (key->type != REPOKEY_TYPE_COUNTED)
 106     return data_skip(dp, key->type);
 107   dp = data_fetch(dp, &kv, key);
 108   int num = kv.num;
 109   int schema = kv.id;
 110   while (num--)
 111     {
 112       Id *keyp = data->schemadata + data->schemata[schema];
 113       for (; *keyp; keyp++)
 114         dp = data_skip_recursive(data, dp, data->keys + *keyp);
 115     }
 116   return dp;
 117 }
 118
 119 static unsigned char *
 120 forward_to_key(Repodata *data, Id keyid, Id schemaid, unsigned char *dp)
 121 {
 122   Id k, *keyp;
 123
 124   keyp = data->schemadata + data->schemata[schemaid];
 125   while ((k = *keyp++) != 0)
 126     {
 127       if (k == keyid)
 128         return dp;
 129       if (data->keys[k].storage == KEY_STORAGE_VERTICAL_OFFSET)
 130         {
 131           dp = data_skip(dp, REPOKEY_TYPE_ID);  /* skip that offset */
 132           dp = data_skip(dp, REPOKEY_TYPE_ID);  /* skip that length */
 133           continue;
 134         }
 135       if (data->keys[k].storage != KEY_STORAGE_INCORE)
 136         continue;
 137       dp = data_skip_recursive(data, dp, data->keys + k);
 138     }
 139   return 0;
 140 }
 141
 142 #define BLOB_PAGEBITS 15
 143 #define BLOB_PAGESIZE (1 << BLOB_PAGEBITS)
 144
 145 static unsigned char *
 146 load_page_range(Repodata *data, unsigned int pstart, unsigned int pend)
 147 {
 148 /* Make sure all pages from PSTART to PEND (inclusive) are loaded,
 149    and are consecutive.  Return a pointer to the mapping of PSTART.  */
 150   unsigned char buf[BLOB_PAGESIZE];
 151   unsigned int i;
 152
 153   /* Quick check in case all pages are there already and consecutive.  */
 154   for (i = pstart; i <= pend; i++)
 155     if (data->pages[i].mapped_at == -1
 156         || (i > pstart
 157             && data->pages[i].mapped_at
 158                != data->pages[i-1].mapped_at + BLOB_PAGESIZE))
 159       break;
 160   if (i > pend)
 161     return data->blob_store + data->pages[pstart].mapped_at;
 162
 163   if (data->pagefd == -1)
 164     return 0;
 165
 166   /* Ensure that we can map the numbers of pages we need at all.  */
 167   if (pend - pstart + 1 > data->ncanmap)
 168     {
 169       unsigned int oldcan = data->ncanmap;
 170       data->ncanmap = pend - pstart + 1;
 171       if (data->ncanmap < 4)
 172         data->ncanmap = 4;
 173       data->mapped = sat_realloc2(data->mapped, data->ncanmap, sizeof(data->mapped[0]));
 174       memset (data->mapped + oldcan, 0, (data->ncanmap - oldcan) * sizeof (data->mapped[0]));
 175       data->blob_store = sat_realloc2(data->blob_store, data->ncanmap, BLOB_PAGESIZE);
 176 #ifdef DEBUG_PAGING
 177       fprintf (stderr, "PAGE: can map %d pages\n", data->ncanmap);
 178 #endif
 179     }
 180
 181   /* Now search for "cheap" space in our store.  Space is cheap if it's either
 182      free (very cheap) or contains pages we search for anyway.  */
 183
 184   /* Setup cost array.  */
 185   unsigned int cost[data->ncanmap];
 186   for (i = 0; i < data->ncanmap; i++)
 187     {
 188       unsigned int pnum = data->mapped[i];
 189       if (pnum == 0)
 190         cost[i] = 0;
 191       else
 192         {
 193           pnum--;
 194           Attrblobpage *p = data->pages + pnum;
 195           assert (p->mapped_at != -1);
 196           if (pnum >= pstart && pnum <= pend)
 197             cost[i] = 1;
 198           else
 199             cost[i] = 3;
 200         }
 201     }
 202
 203   /* And search for cheapest space.  */
 204   unsigned int best_cost = -1;
 205   unsigned int best = 0;
 206   unsigned int same_cost = 0;
 207   for (i = 0; i + pend - pstart < data->ncanmap; i++)
 208     {
 209       unsigned int c = cost[i];
 210       unsigned int j;
 211       for (j = 0; j < pend - pstart + 1; j++)
 212         c += cost[i+j];
 213       if (c < best_cost)
 214         best_cost = c, best = i;
 215       else if (c == best_cost)
 216         same_cost++;
 217       /* A null cost won't become better.  */
 218       if (c == 0)
 219         break;
 220     }
 221   /* If all places have the same cost we would thrash on slot 0.  Avoid
 222      this by doing a round-robin strategy in this case.  */
 223   if (same_cost == data->ncanmap - pend + pstart - 1)
 224     best = data->rr_counter++ % (data->ncanmap - pend + pstart);
 225
 226   /* So we want to map our pages from [best] to [best+pend-pstart].
 227      Use a very simple strategy, which doesn't make the best use of
 228      our resources, but works.  Throw away all pages in that range
 229      (even ours) then copy around ours (in case they were outside the
 230      range) or read them in.  */
 231   for (i = best; i < best + pend - pstart + 1; i++)
 232     {
 233       unsigned int pnum = data->mapped[i];
 234       if (pnum--
 235           /* If this page is exactly at the right place already,
 236              no need to evict it.  */
 237           && pnum != pstart + i - best)
 238         {
 239           /* Evict this page.  */
 240 #ifdef DEBUG_PAGING
 241           fprintf (stderr, "PAGE: evict page %d from %d\n", pnum, i);
 242 #endif
 243           cost[i] = 0;
 244           data->mapped[i] = 0;
 245           data->pages[pnum].mapped_at = -1;
 246         }
 247     }
 248
 249   /* Everything is free now.  Read in the pages we want.  */
 250   for (i = pstart; i <= pend; i++)
 251     {
 252       Attrblobpage *p = data->pages + i;
 253       unsigned int pnum = i - pstart + best;
 254       void *dest = data->blob_store + pnum * BLOB_PAGESIZE;
 255       if (p->mapped_at != -1)
 256         {
 257           if (p->mapped_at != pnum * BLOB_PAGESIZE)
 258             {
 259 #ifdef DEBUG_PAGING
 260               fprintf (stderr, "PAGECOPY: %d to %d\n", i, pnum);
 261 #endif
 262               /* Still mapped somewhere else, so just copy it from there.  */
 263               memcpy (dest, data->blob_store + p->mapped_at, BLOB_PAGESIZE);
 264               data->mapped[p->mapped_at / BLOB_PAGESIZE] = 0;
 265             }
 266         }
 267       else
 268         {
 269           unsigned int in_len = p->file_size;
 270           unsigned int compressed = in_len & 1;
 271           in_len >>= 1;
 272 #ifdef DEBUG_PAGING
 273           fprintf (stderr, "PAGEIN: %d to %d", i, pnum);
 274 #endif
 275           if (pread(data->pagefd, compressed ? buf : dest, in_len, p->file_offset) != in_len)
 276             {
 277               perror ("mapping pread");
 278               return 0;
 279             }
 280           if (compressed)
 281             {
 282               unsigned int out_len;
 283               out_len = unchecked_decompress_buf(buf, in_len,
 284                                                   dest, BLOB_PAGESIZE);
 285               if (out_len != BLOB_PAGESIZE && i < data->num_pages - 1)
 286                 {
 287                   fprintf(stderr, "can't decompress\n");
 288                   return 0;
 289                 }
 290 #ifdef DEBUG_PAGING
 291               fprintf (stderr, " (expand %d to %d)", in_len, out_len);
 292 #endif
 293             }
 294 #ifdef DEBUG_PAGING
 295           fprintf (stderr, "\n");
 296 #endif
 297         }
 298       p->mapped_at = pnum * BLOB_PAGESIZE;
 299       data->mapped[pnum] = i + 1;
 300     }
 301   return data->blob_store + best * BLOB_PAGESIZE;
 302 }
 303
 304 static unsigned char *
 305 make_vertical_available(Repodata *data, Repokey *key, Id off, Id len)
 306 {
 307   unsigned char *dp;
 308   if (!len)
 309     return 0;
 310   if (off >= data->lastverticaloffset)
 311     {
 312       off -= data->lastverticaloffset;
 313       if (off + len > data->vincorelen)
 314         return 0;
 315       return data->vincore + off;
 316     }
 317   if (off + len > key->size)
 318     return 0;
 319   /* we now have the offset, go into vertical */
 320   off += data->verticaloffset[key - data->keys];
 321   /* fprintf(stderr, "key %d page %d\n", key->name, off / BLOB_PAGESIZE); */
 322   dp = load_page_range(data, off / BLOB_PAGESIZE, (off + len - 1) / BLOB_PAGESIZE);
 323   if (dp)
 324     dp += off % BLOB_PAGESIZE;
 325   return dp;
 326 }
 327
 328 static inline unsigned char *
 329 get_data(Repodata *data, Repokey *key, unsigned char **dpp)
 330 {
 331   unsigned char *dp = *dpp;
 332
 333   if (!dp)
 334     return 0;
 335   if (key->storage == KEY_STORAGE_INCORE)
 336     {
 337       /* hmm, this is a bit expensive */
 338       *dpp = data_skip_recursive(data, dp, key);
 339       return dp;
 340     }
 341   else if (key->storage == KEY_STORAGE_VERTICAL_OFFSET)
 342     {
 343       Id off, len;
 344       dp = data_read_id(dp, &off);
 345       dp = data_read_id(dp, &len);
 346       *dpp = dp;
 347       return make_vertical_available(data, key, off, len);
 348     }
 349   return 0;
 350 }
 351
 352 static inline int
 353 maybe_load_repodata(Repodata *data, Id *keyid)
 354 {
 355   if (data->state == REPODATA_STUB)
 356     {
 357       if (data->loadcallback)
 358         {
 359           if (keyid)
 360             {
 361               /* key order may change when loading */
 362               int i;
 363               Id name = data->keys[*keyid].name;
 364               Id type = data->keys[*keyid].type;
 365               data->loadcallback(data);
 366               if (data->state == REPODATA_AVAILABLE)
 367                 {
 368                   for (i = 1; i < data->nkeys; i++)
 369                     if (data->keys[i].name == name && data->keys[i].type == type)
 370                       break;
 371                   if (i < data->nkeys)
 372                     *keyid = i;
 373                   else
 374                     return 0;
 375                 }
 376             }
 377           else
 378             data->loadcallback(data);
 379         }
 380       else
 381         data->state = REPODATA_ERROR;
 382     }
 383   if (data->state == REPODATA_AVAILABLE)
 384     return 1;
 385   data->state = REPODATA_ERROR;
 386   return 0;
 387 }
 388
 389 static inline unsigned char*
 390 entry2data(Repodata *data, Id entry)
 391 {
 392   if (entry < 0)
 393     return data->incoredata + data->extraoffset[-1 - entry];
 394   else
 395     return data->incoredata + data->incoreoffset[entry];
 396 }
 397
 398 Id
 399 repodata_lookup_id(Repodata *data, Id entry, Id keyid)
 400 {
 401   Id schema;
 402   Repokey *key;
 403   Id id, *keyp;
 404   unsigned char *dp;
 405
 406   if (!maybe_load_repodata(data, &keyid))
 407     return 0;
 408   dp = entry2data(data, entry);
 409   if (!dp)
 410     return 0;
 411   dp = data_read_id(dp, &schema);
 412   /* make sure the schema of this solvable contains the key */
 413   for (keyp = data->schemadata + data->schemata[schema]; *keyp != keyid; keyp++)
 414     if (!*keyp)
 415       return 0;
 416   dp = forward_to_key(data, keyid, schema, dp);
 417   key = data->keys + keyid;
 418   dp = get_data(data, key, &dp);
 419   if (!dp)
 420     return 0;
 421   if (key->type == REPOKEY_TYPE_CONSTANTID)
 422     return key->size;
 423   if (key->type != REPOKEY_TYPE_ID)
 424     return 0;
 425   dp = data_read_id(dp, &id);
 426   return id;
 427 }
 428
 429 const char *
 430 repodata_lookup_str(Repodata *data, Id entry, Id keyid)
 431 {
 432   Id schema;
 433   Repokey *key;
 434   Id id, *keyp;
 435   unsigned char *dp;
 436
 437   if (!maybe_load_repodata(data, &keyid))
 438     return 0;
 439
 440   dp = entry2data(data, entry);
 441   if (!dp)
 442     return 0;
 443   dp = data_read_id(dp, &schema);
 444   /* make sure the schema of this solvable contains the key */
 445   for (keyp = data->schemadata + data->schemata[schema]; *keyp != keyid; keyp++)
 446     if (!*keyp)
 447       return 0;
 448   dp = forward_to_key(data, keyid, schema, dp);
 449   key = data->keys + keyid;
 450   dp = get_data(data, key, &dp);
 451   if (!dp)
 452     return 0;
 453   if (key->type == REPOKEY_TYPE_STR)
 454     return (const char *)dp;
 455   if (key->type == REPOKEY_TYPE_CONSTANTID)
 456     return id2str(data->repo->pool, key->size);
 457   if (key->type == REPOKEY_TYPE_ID)
 458     dp = data_read_id(dp, &id);
 459   else
 460     return 0;
 461   if (data->localpool)
 462     return data->spool.stringspace + data->spool.strings[id];
 463   return id2str(data->repo->pool, id);
 464 }
 465
 466 int
 467 repodata_lookup_num(Repodata *data, Id entry, Id keyid, unsigned int *value)
 468 {
 469   Id schema;
 470   Repokey *key;
 471   Id *keyp;
 472   KeyValue kv;
 473   unsigned char *dp;
 474
 475   *value = 0;
 476
 477   if (!maybe_load_repodata(data, &keyid))
 478     return 0;
 479
 480   dp = entry2data(data, entry);
 481   if (!dp)
 482     return 0;
 483   dp = data_read_id(dp, &schema);
 484   /* make sure the schema of this solvable contains the key */
 485   for (keyp = data->schemadata + data->schemata[schema]; *keyp != keyid; keyp++)
 486     if (!*keyp)
 487       return 0;
 488   dp = forward_to_key(data, keyid, schema, dp);
 489   key = data->keys + keyid;
 490   dp = get_data(data, key, &dp);
 491   if (!dp)
 492     return 0;
 493   if (key->type == REPOKEY_TYPE_NUM
 494       || key->type == REPOKEY_TYPE_U32
 495       || key->type == REPOKEY_TYPE_CONSTANT)
 496     {
 497       dp = data_fetch(dp, &kv, key);
 498       *value = kv.num;
 499       return 1;
 500     }
 501   return 0;
 502 }
 503
 504 int
 505 repodata_lookup_void(Repodata *data, Id entry, Id keyid)
 506 {
 507   Id schema;
 508   Id *keyp;
 509   unsigned char *dp;
 510   if (!maybe_load_repodata(data, &keyid))
 511     return 0;
 512   dp = entry2data(data, entry);
 513   if (!dp)
 514     return 0;
 515   dp = data_read_id(dp, &schema);
 516   for (keyp = data->schemadata + data->schemata[schema]; *keyp != keyid; keyp++)
 517     if (!*keyp)
 518       return 0;
 519   return 1;
 520 }
 521
 522 const unsigned char *
 523 repodata_lookup_bin_checksum(Repodata *data, Id entry, Id keyid, Id *typep)
 524 {
 525   Id schema;
 526   Id *keyp;
 527   Repokey *key;
 528   unsigned char *dp;
 529
 530   if (!maybe_load_repodata(data, &keyid))
 531     return 0;
 532   dp = entry2data(data, entry);
 533   if (!dp)
 534     return 0;
 535   dp = data_read_id(dp, &schema);
 536   for (keyp = data->schemadata + data->schemata[schema]; *keyp != keyid; keyp++)
 537     if (!*keyp)
 538       return 0;
 539   dp = forward_to_key(data, keyid, schema, dp);
 540   key = data->keys + keyid;
 541   *typep = key->type;
 542   return get_data(data, key, &dp);
 543 }
 544
 545 void
 546 repodata_search(Repodata *data, Id entry, Id keyname, int (*callback)(void *cbdata, Solvable *s, Repodata *data, Repokey *key, KeyValue *kv), void *cbdata)
 547 {
 548   Id schema;
 549   Repokey *key;
 550   Id k, keyid, *kp, *keyp;
 551   unsigned char *dp, *ddp;
 552   int onekey = 0;
 553   int stop;
 554   KeyValue kv;
 555
 556   if (entry < 0
 557       || !maybe_load_repodata(data, 0))
 558     return;
 559
 560   dp = entry2data(data, entry);
 561   if (!dp)
 562     return;
 563   dp = data_read_id(dp, &schema);
 564   keyp = data->schemadata + data->schemata[schema];
 565   if (keyname)
 566     {
 567       /* search in a specific key */
 568       for (kp = keyp; (k = *kp++) != 0; )
 569         if (data->keys[k].name == keyname)
 570           break;
 571       if (k == 0)
 572         return;
 573       dp = forward_to_key(data, k, schema, dp);
 574       if (!dp)
 575         return;
 576       keyp = kp - 1;
 577       onekey = 1;
 578     }
 579   while ((keyid = *keyp++) != 0)
 580     {
 581       stop = 0;
 582       key = data->keys + keyid;
 583       ddp = get_data(data, key, &dp);
 584       do
 585         {
 586           ddp = data_fetch(ddp, &kv, key);
 587           if (!ddp)
 588             break;
 589           if (key->type == REPOKEY_TYPE_COUNTED)
 590             {
 591               int num = kv.num;
 592               int subschema = kv.id;
 593               Repokey *countkey = key;
 594               kv.eof = 0;
 595               callback(cbdata, data->repo->pool->solvables + data->start + entry, data, countkey, &kv);
 596               while (num--)
 597                 {
 598                   Id *kp = data->schemadata + data->schemata[subschema];
 599                   for (; *kp; kp++)
 600                     {
 601                       key = data->keys + *kp;
 602                       ddp = data_fetch(ddp, &kv, key);
 603                       if (!ddp)
 604                         exit(1);
 605                       callback(cbdata, data->repo->pool->solvables + data->start + entry, data, key, &kv);
 606                     }
 607                   kv.eof = 1;
 608                   callback(cbdata, data->repo->pool->solvables + data->start + entry, data, countkey, &kv);
 609                 }
 610               kv.eof = 2;
 611               stop = callback(cbdata, data->repo->pool->solvables + data->start + entry, data, countkey, &kv);
 612             }
 613           else
 614             stop = callback(cbdata, data->repo->pool->solvables + data->start + entry, data, key, &kv);
 615         }
 616       while (!kv.eof && !stop);
 617       if (onekey || stop > SEARCH_NEXT_KEY)
 618         return;
 619     }
 620 }
 621
 622 static void
 623 dataiterator_newdata(Dataiterator *di)
 624 {
 625   Id keyname = di->keyname;
 626   Repodata *data = di->data;
 627   di->nextkeydp = 0;
 628
 629   if (data->state == REPODATA_STUB)
 630     {
 631       if (keyname)
 632         {
 633           int j;
 634           for (j = 1; j < data->nkeys; j++)
 635             if (keyname == data->keys[j].name)
 636               break;
 637           if (j == data->nkeys)
 638             return;
 639         }
 640       /* load it */
 641       if (data->loadcallback)
 642         data->loadcallback(data);
 643       else
 644         data->state = REPODATA_ERROR;
 645     }
 646   if (data->state == REPODATA_ERROR)
 647     return;
 648
 649   Id schema;
 650   unsigned char *dp = data->incoredata;
 651   if (!dp)
 652     return;
 653   if (di->solvid >= 0)
 654     dp += data->incoreoffset[di->solvid - data->start];
 655   else
 656     dp += data->extraoffset[-1 - di->solvid - data->extrastart];
 657   dp = data_read_id(dp, &schema);
 658   Id *keyp = data->schemadata + data->schemata[schema];
 659   if (keyname)
 660     {
 661       Id k, *kp;
 662       /* search in a specific key */
 663       for (kp = keyp; (k = *kp++) != 0; )
 664         if (data->keys[k].name == keyname)
 665           break;
 666       if (k == 0)
 667         return;
 668       dp = forward_to_key(data, k, schema, dp);
 669       if (!dp)
 670         return;
 671       keyp = kp - 1;
 672     }
 673   Id keyid = *keyp++;
 674   if (!keyid)
 675     return;
 676
 677   di->data = data;
 678   di->key = di->data->keys + keyid;
 679   di->keyp = keyp;
 680   di->dp = 0;
 681
 682   di->nextkeydp = dp;
 683   di->dp = get_data(di->data, di->key, &di->nextkeydp);
 684   di->kv.eof = 0;
 685 }
 686
 687 void
 688 dataiterator_init(Dataiterator *di, Repo *repo, Id p, Id keyname,
 689                   const char *match, int flags)
 690 {
 691   di->flags = flags;
 692   if (p)
 693     {
 694       di->solvid = p;
 695       di->flags |= __SEARCH_ONESOLVABLE;
 696       di->data = repo->repodata - 1;
 697       if (flags & SEARCH_NO_STORAGE_SOLVABLE)
 698         di->state = 0;
 699       else
 700         di->state = 1;
 701     }
 702   else
 703     {
 704       di->solvid = repo->start - 1;
 705       if (di->solvid < 0)
 706         {
 707           fprintf(stderr, "A repo contains the NULL solvable!\n");
 708           exit(1);
 709         }
 710       di->data = repo->repodata + repo->nrepodata - 1;
 711       di->state = 0;
 712     }
 713
 714   di->match = match;
 715   if ((di->flags & SEARCH_STRINGMASK) == SEARCH_REGEX)
 716     {
 717       if (di->match)
 718         {
 719           /* We feed multiple lines eventually (e.g. authors or descriptions),
 720              so set REG_NEWLINE. */
 721           di->regex_err =
 722             regcomp(&di->regex, di->match,
 723               REG_EXTENDED | REG_NOSUB | REG_NEWLINE
 724               | ((di->flags & SEARCH_NOCASE) ? REG_ICASE : 0));
 725 #if 0
 726           if (di->regex_err != 0)
 727             {
 728               fprintf(stderr, "Given regex failed to compile: %s\n", di->match);
 729               fprintf(stderr, "regcomp error code: %d\n", di->regex_err);
 730               exit(1);
 731             }
 732 #else
 733         }
 734       else
 735         {
 736           di->flags |= (di->flags & SEARCH_STRINGMASK) | SEARCH_STRING;
 737           di->regex_err = 0;
 738 #endif
 739         }
 740     }
 741
 742   di->keyname = keyname;
 743   static Id zeroid = 0;
 744   di->keyp = &zeroid;
 745   di->kv.eof = 1;
 746   di->repo = repo;
 747   di->idp = 0;
 748   di->subkeyp = 0;
 749 }
 750
 751 /* FIXME factor and merge with repo_matchvalue */
 752 static int
 753 dataiterator_match_int_real(Dataiterator *di, int flags, const void *vmatch)
 754 {
 755   KeyValue *kv = &di->kv;
 756   const char *match = vmatch;
 757   if ((flags & SEARCH_STRINGMASK) != 0)
 758     {
 759       switch (di->key->type)
 760         {
 761         case REPOKEY_TYPE_ID:
 762         case REPOKEY_TYPE_IDARRAY:
 763           if (di->data && di->data->localpool)
 764             kv->str = stringpool_id2str(&di->data->spool, kv->id);
 765           else
 766             kv->str = id2str(di->repo->pool, kv->id);
 767           break;
 768         case REPOKEY_TYPE_STR:
 769           break;
 770         default:
 771           return 0;
 772         }
 773       /* Maybe skip the kind specifier.  Do this only for SOLVABLE attributes,
 774          for the others we can't know if a colon separates a kind or not.  */
 775       if ((flags & SEARCH_SKIP_KIND)
 776           && di->key->storage == KEY_STORAGE_SOLVABLE)
 777         {
 778           const char *s = strchr(kv->str, ':');
 779           if (s)
 780             kv->str = s + 1;
 781         }
 782       switch ((flags & SEARCH_STRINGMASK))
 783         {
 784           case SEARCH_SUBSTRING:
 785             if (flags & SEARCH_NOCASE)
 786               {
 787                 if (!strcasestr(kv->str, match))
 788                   return 0;
 789               }
 790             else
 791               {
 792                 if (!strstr(kv->str, match))
 793                   return 0;
 794               }
 795             break;
 796           case SEARCH_STRING:
 797             if (flags & SEARCH_NOCASE)
 798               {
 799                 if (strcasecmp(match, kv->str))
 800                   return 0;
 801               }
 802             else
 803               {
 804                 if (strcmp(match, kv->str))
 805                   return 0;
 806               }
 807             break;
 808           case SEARCH_GLOB:
 809             if (fnmatch(match, kv->str, (flags & SEARCH_NOCASE) ? FNM_CASEFOLD : 0))
 810               return 0;
 811             break;
 812           case SEARCH_REGEX:
 813             if (regexec((const regex_t *)vmatch, kv->str, 0, NULL, 0))
 814               return 0;
 815             break;
 816           default:
 817             return 0;
 818         }
 819     }
 820   return 1;
 821 }
 822
 823 static int
 824 dataiterator_match_int(Dataiterator *di)
 825 {
 826   if ((di->flags & SEARCH_STRINGMASK) == SEARCH_REGEX)
 827     return dataiterator_match_int_real(di, di->flags, &di->regex);
 828   else
 829     return dataiterator_match_int_real(di, di->flags, di->match);
 830 }
 831
 832 int
 833 dataiterator_match(Dataiterator *di, int flags, const void *vmatch)
 834 {
 835   return dataiterator_match_int_real(di, flags, vmatch);
 836 }
 837
 838 static Repokey solvablekeys[RPM_RPMDBID - SOLVABLE_NAME + 1] = {
 839   { SOLVABLE_NAME,        REPOKEY_TYPE_ID, 0, KEY_STORAGE_SOLVABLE },
 840   { SOLVABLE_ARCH,        REPOKEY_TYPE_ID, 0, KEY_STORAGE_SOLVABLE },
 841   { SOLVABLE_EVR,         REPOKEY_TYPE_ID, 0, KEY_STORAGE_SOLVABLE },
 842   { SOLVABLE_VENDOR,      REPOKEY_TYPE_ID, 0, KEY_STORAGE_SOLVABLE },
 843   { SOLVABLE_PROVIDES,    REPOKEY_TYPE_IDARRAY, 0, KEY_STORAGE_SOLVABLE },
 844   { SOLVABLE_OBSOLETES,   REPOKEY_TYPE_IDARRAY, 0, KEY_STORAGE_SOLVABLE },
 845   { SOLVABLE_CONFLICTS,   REPOKEY_TYPE_IDARRAY, 0, KEY_STORAGE_SOLVABLE },
 846   { SOLVABLE_REQUIRES,    REPOKEY_TYPE_IDARRAY, 0, KEY_STORAGE_SOLVABLE },
 847   { SOLVABLE_RECOMMENDS,  REPOKEY_TYPE_IDARRAY, 0, KEY_STORAGE_SOLVABLE },
 848   { SOLVABLE_SUGGESTS,    REPOKEY_TYPE_IDARRAY, 0, KEY_STORAGE_SOLVABLE },
 849   { SOLVABLE_SUPPLEMENTS, REPOKEY_TYPE_IDARRAY, 0, KEY_STORAGE_SOLVABLE },
 850   { SOLVABLE_ENHANCES,    REPOKEY_TYPE_IDARRAY, 0, KEY_STORAGE_SOLVABLE },
 851   { SOLVABLE_FRESHENS,    REPOKEY_TYPE_IDARRAY, 0, KEY_STORAGE_SOLVABLE },
 852   { RPM_RPMDBID,          REPOKEY_TYPE_U32, 0, KEY_STORAGE_SOLVABLE },
 853 };
 854
 855 int
 856 dataiterator_step(Dataiterator *di)
 857 {
 858 restart:
 859   while (1)
 860     {
 861       if (di->state)
 862         {
 863           if (di->idp)
 864             {
 865               Id *idp = di->idp;
 866               if (*idp)
 867                 {
 868                   di->kv.id = *idp;
 869                   di->idp++;
 870                   di->kv.eof = idp[1] ? 0 : 1;
 871                   goto weg2;
 872                 }
 873               else
 874                 di->idp = 0;
 875             }
 876           Solvable *s = di->repo->pool->solvables + di->solvid;
 877           int state = di->state;
 878           di->key = solvablekeys + state - 1;
 879           if (di->keyname)
 880             di->state = RPM_RPMDBID;
 881           else
 882             di->state++;
 883           if (state == 1)
 884             {
 885               di->data = 0;
 886               if (di->keyname)
 887                 state = di->keyname - 1;
 888             }
 889           switch (state + 1)
 890             {
 891               case SOLVABLE_NAME:
 892                 if (!s->name)
 893                   continue;
 894                 di->kv.id = s->name;
 895                 di->kv.eof = 1;
 896                 break;
 897               case SOLVABLE_ARCH:
 898                 if (!s->arch)
 899                   continue;
 900                 di->kv.id = s->arch;
 901                 di->kv.eof = 1;
 902                 break;
 903               case SOLVABLE_EVR:
 904                 if (!s->evr)
 905                   continue;
 906                 di->kv.id = s->evr;
 907                 di->kv.eof = 1;
 908                 break;
 909               case SOLVABLE_VENDOR:
 910                 if (!s->vendor)
 911                   continue;
 912                 di->kv.id = s->vendor;
 913                 di->kv.eof = 1;
 914                 break;
 915               case SOLVABLE_PROVIDES:
 916                 di->idp = s->provides
 917                     ? di->repo->idarraydata + s->provides : 0;
 918                 continue;
 919               case SOLVABLE_OBSOLETES:
 920                 di->idp = s->obsoletes
 921                     ? di->repo->idarraydata + s->obsoletes : 0;
 922                 continue;
 923               case SOLVABLE_CONFLICTS:
 924                 di->idp = s->conflicts
 925                     ? di->repo->idarraydata + s->conflicts : 0;
 926                 continue;
 927               case SOLVABLE_REQUIRES:
 928                 di->idp = s->requires
 929                     ? di->repo->idarraydata + s->requires : 0;
 930                 continue;
 931               case SOLVABLE_RECOMMENDS:
 932                 di->idp = s->recommends
 933                     ? di->repo->idarraydata + s->recommends : 0;
 934                 continue;
 935               case SOLVABLE_SUPPLEMENTS:
 936                 di->idp = s->supplements
 937                     ? di->repo->idarraydata + s->supplements : 0;
 938                 continue;
 939               case SOLVABLE_SUGGESTS:
 940                 di->idp = s->suggests
 941                     ? di->repo->idarraydata + s->suggests : 0;
 942                 continue;
 943               case SOLVABLE_ENHANCES:
 944                 di->idp = s->enhances
 945                     ? di->repo->idarraydata + s->enhances : 0;
 946                 continue;
 947               case SOLVABLE_FRESHENS:
 948                 di->idp = s->freshens
 949                     ? di->repo->idarraydata + s->freshens : 0;
 950                 continue;
 951               case RPM_RPMDBID:
 952                 if (!di->repo->rpmdbid)
 953                   continue;
 954                 di->kv.num = di->repo->rpmdbid[di->solvid - di->repo->start];
 955                 di->kv.eof = 1;
 956                 break;
 957               default:
 958                 di->data = di->repo->repodata - 1;
 959                 di->kv.eof = 1;
 960                 di->state = 0;
 961                 continue;
 962             }
 963         }
 964       else if (di->subkeyp)
 965         {
 966           Id keyid;
 967           if (!di->subnum)
 968             {
 969               /* Send end-of-substruct.  We are here only when we saw a
 970                  _COUNTED key one level up.  Since then we didn't increment
 971                  ->keyp, so it still can be found at keyp[-1].  */
 972               di->kv.eof = 2;
 973               di->key = di->data->keys + di->keyp[-1];
 974               di->subkeyp = 0;
 975             }
 976           else if (!(keyid = *di->subkeyp++))
 977             {
 978               /* Send end-of-element.  See above for keyp[-1].  */
 979               di->kv.eof = 1;
 980               di->key = di->data->keys + di->keyp[-1];
 981               di->subkeyp = di->data->schemadata + di->data->schemata[di->subschema];
 982               di->subnum--;
 983             }
 984           else
 985             {
 986               di->key = di->data->keys + keyid;
 987               di->dp = data_fetch(di->dp, &di->kv, di->key);
 988               if (!di->dp)
 989                 exit(1);
 990             }
 991         }
 992       else
 993         {
 994           if (di->kv.eof)
 995             di->dp = 0;
 996           else
 997             di->dp = data_fetch(di->dp, &di->kv, di->key);
 998
 999           while (!di->dp)
1000             {
1001               Id keyid;
1002               if (di->keyname || !(keyid = *di->keyp++))
1003                 {
1004                   while (1)
1005                     {
1006                       Repo *repo = di->repo;
1007                       Repodata *data = ++di->data;
1008                       if (data >= repo->repodata + repo->nrepodata)
1009                         {
1010                           if (di->flags & __SEARCH_ONESOLVABLE)
1011                             return 0;
1012                           if (di->solvid >= 0)
1013                             {
1014                               while (++di->solvid < repo->end)
1015                                 if (repo->pool->solvables[di->solvid].repo == repo)
1016                                   break;
1017                               if (di->solvid >= repo->end)
1018                                 {
1019                                   if (!(di->flags & SEARCH_EXTRA))
1020                                     goto skiprepo;
1021                                   di->solvid = -1;
1022                                   if (di->solvid < -repo->nextra)
1023                                     goto skiprepo;
1024                                 }
1025                             }
1026                           else
1027                             {
1028                               --di->solvid;
1029                               if (di->solvid < -repo->nextra)
1030                                 {
1031 skiprepo:;
1032                                   Pool *pool = di->repo->pool;
1033                                   if (!(di->flags & SEARCH_ALL_REPOS)
1034                                       || di->repo == pool->repos[pool->nrepos - 1])
1035                                     return 0;
1036                                   int i;
1037                                   for (i = 0; i < pool->nrepos; i++)
1038                                     if (di->repo == pool->repos[i])
1039                                       break;
1040                                   di->repo = pool->repos[i + 1];
1041                                   dataiterator_init(di, di->repo, 0, di->keyname, di->match, di->flags);
1042                                   continue;
1043                                 }
1044                             }
1045                           di->data = repo->repodata - 1;
1046                           if (di->solvid < 0
1047                               || (di->flags & SEARCH_NO_STORAGE_SOLVABLE))
1048                             continue;
1049                           static Id zeroid = 0;
1050                           di->keyp = &zeroid;
1051                           di->state = 1;
1052                           goto restart;
1053                         }
1054                       if ((di->solvid < 0 && (-1 - di->solvid) >= data->extrastart && (-1 - di->solvid) < (data->extrastart + data->nextra))
1055                           || (di->solvid >= 0 && di->solvid >= data->start && di->solvid < data->end))
1056                         {
1057                           dataiterator_newdata(di);
1058                           if (di->nextkeydp)
1059                             break;
1060                         }
1061                     }
1062                 }
1063               else
1064                 {
1065                   di->key = di->data->keys + keyid;
1066                   di->dp = get_data(di->data, di->key, &di->nextkeydp);
1067                 }
1068               di->dp = data_fetch(di->dp, &di->kv, di->key);
1069             }
1070           if (di->key->type == REPOKEY_TYPE_COUNTED)
1071             {
1072               di->subnum = di->kv.num;
1073               di->subschema = di->kv.id;
1074               di->kv.eof = 0;
1075               di->subkeyp = di->data->schemadata + di->data->schemata[di->subschema];
1076             }
1077         }
1078 weg2:
1079       if (!di->match
1080           || dataiterator_match_int(di))
1081         break;
1082     }
1083   return 1;
1084 }
1085
1086 void
1087 dataiterator_skip_attribute(Dataiterator *di)
1088 {
1089   if (di->state)
1090     di->idp = 0;
1091   /* This will make the next _step call to retrieve the next field.  */
1092   di->kv.eof = 1;
1093 }
1094
1095 void
1096 dataiterator_skip_solvable(Dataiterator *di)
1097 {
1098   /* We're done with this field.  */
1099   di->kv.eof = 1;
1100   /* And with solvable data.  */
1101   di->state = 0;
1102   /* And with all keys for this repodata and thing. */
1103   static Id zeroid = 0;
1104   di->keyp = &zeroid;
1105   /* And with all repodatas for this thing.  */
1106   di->data = di->repo->repodata + di->repo->nrepodata - 1;
1107   /* Hence the next call to _step will retrieve the next thing.  */
1108 }
1109
1110 void
1111 dataiterator_skip_repo(Dataiterator *di)
1112 {
1113   dataiterator_skip_solvable(di);
1114   /* We're done with all solvables and all extra things for this repo.  */
1115   di->solvid = -1 - di->repo->nextra;
1116 }
1117
1118 void
1119 dataiterator_jump_to_solvable(Dataiterator *di, Solvable *s)
1120 {
1121   di->repo = s->repo;
1122   /* Simulate us being done with the solvable before the requested one.  */
1123   dataiterator_skip_solvable(di);
1124   di->solvid = s - s->repo->pool->solvables;
1125   di->solvid--;
1126 }
1127
1128 void
1129 dataiterator_jump_to_repo(Dataiterator *di, Repo *repo)
1130 {
1131   di->repo = repo;
1132   dataiterator_skip_solvable(di);
1133   di->solvid = repo->start - 1;
1134 }
1135
1136 /* extend repodata so that it includes solvables p */
1137 void
1138 repodata_extend(Repodata *data, Id p)
1139 {
1140   if (data->start == data->end)
1141     data->start = data->end = p;
1142   if (p >= data->end)
1143     {
1144       int old = data->end - data->start;
1145       int new = p - data->end + 1;
1146       if (data->attrs)
1147         {
1148           data->attrs = sat_extend(data->attrs, old, new, sizeof(Id), REPODATA_BLOCK);
1149           memset(data->attrs + old, 0, new * sizeof(Id));
1150         }
1151       data->incoreoffset = sat_extend(data->incoreoffset, old, new, sizeof(Id), REPODATA_BLOCK);
1152       memset(data->incoreoffset + old, 0, new * sizeof(Id));
1153       data->end = p + 1;
1154     }
1155   if (p < data->start)
1156     {
1157       int old = data->end - data->start;
1158       int new = data->start - p;
1159       if (data->attrs)
1160         {
1161           data->attrs = sat_extend_resize(data->attrs, old + new, sizeof(Id), REPODATA_BLOCK);
1162           memmove(data->attrs + new, data->attrs, old * sizeof(Id));
1163           memset(data->attrs, 0, new * sizeof(Id));
1164         }
1165       data->incoreoffset = sat_extend_resize(data->incoreoffset, old + new, sizeof(Id), REPODATA_BLOCK);
1166       memmove(data->incoreoffset + new, data->incoreoffset, old * sizeof(Id));
1167       memset(data->incoreoffset, 0, new * sizeof(Id));
1168       data->start = p;
1169     }
1170 }
1171
1172 void
1173 repodata_extend_extra(Repodata *data, int nextra)
1174 {
1175   if (nextra <= data->nextra)
1176     return;
1177   if (data->extraattrs)
1178     {
1179       data->extraattrs = sat_extend(data->extraattrs, data->nextra, nextra - data->nextra, sizeof(Id), REPODATA_BLOCK);
1180       memset(data->extraattrs + data->nextra, 0, (nextra - data->nextra) * sizeof (Id));
1181     }
1182   data->extraoffset = sat_extend(data->extraoffset, data->nextra, nextra - data->nextra, sizeof(Id), REPODATA_BLOCK);
1183   memset(data->extraoffset + data->nextra, 0, (nextra - data->nextra) * sizeof(Id));
1184   data->nextra = nextra;
1185 }
1186
1187 void
1188 repodata_extend_block(Repodata *data, Id start, Id num)
1189 {
1190   if (!num)
1191     return;
1192   if (!data->incoreoffset)
1193     {
1194       data->incoreoffset = sat_calloc_block(num, sizeof(Id), REPODATA_BLOCK);
1195       data->start = start;
1196       data->end = start + num;
1197       return;
1198     }
1199   repodata_extend(data, start);
1200   if (num > 1)
1201     repodata_extend(data, start + num - 1);
1202 }
1203
1204 /**********************************************************************/
1205
1206 #define REPODATA_ATTRS_BLOCK 63
1207 #define REPODATA_ATTRDATA_BLOCK 1023
1208 #define REPODATA_ATTRIDDATA_BLOCK 63
1209
1210 static inline Id
1211 get_new_struct(Repodata *data)
1212 {
1213   /* Make sure to never give out struct id 0. */
1214   if (!data->structs)
1215     {
1216       data->structs = sat_extend(0, 0, 2, sizeof(Id *), REPODATA_BLOCK);
1217       data->structs[0] = 0;
1218       data->structs[1] = 0;
1219       data->nstructs = 2;
1220       return 1;
1221     }
1222   data->structs = sat_extend(data->structs, data->nstructs, 1, sizeof(Id *), REPODATA_BLOCK);
1223   data->structs[data->nstructs] = 0;
1224   return data->nstructs++;
1225 }
1226
1227 static Id
1228 repodata_get_handle_int(Repodata *data, Id entry)
1229 {
1230   Id *ap;
1231   if (!data->attrs && entry >= 0)
1232     {
1233       data->attrs = sat_calloc_block(data->end - data->start, sizeof(Id),
1234                                      REPODATA_BLOCK);
1235     }
1236   else if (!data->extraattrs && entry < 0)
1237     data->extraattrs = sat_calloc_block(data->nextra, sizeof(Id), REPODATA_BLOCK);
1238   if (entry < 0)
1239     ap = &data->extraattrs[-1 - entry];
1240   else
1241     ap = &data->attrs[entry];
1242   if (!*ap)
1243     *ap = get_new_struct(data);
1244   return *ap;
1245 }
1246
1247 Id
1248 repodata_get_handle(Repodata *data, Id entry)
1249 {
1250   return repodata_get_handle_int(data, entry);
1251 }
1252
1253 static void
1254 repodata_insert_keyid(Repodata *data, Id handle, Id keyid, Id val, int overwrite)
1255 {
1256   Id *pp;
1257   Id *ap;
1258   int i;
1259   ap = data->structs[handle];
1260   i = 0;
1261   if (ap)
1262     {
1263       for (pp = ap; *pp; pp += 2)
1264         /* Determine equality based on the name only, allows us to change
1265            type (when overwrite is set), and makes TYPE_CONSTANT work.  */
1266         if (data->keys[*pp].name == data->keys[keyid].name)
1267           break;
1268       if (*pp)
1269         {
1270           if (overwrite)
1271             {
1272               pp[0] = keyid;
1273               pp[1] = val;
1274             }
1275           return;
1276         }
1277       i = pp - ap;
1278     }
1279   ap = sat_extend(ap, i, 3, sizeof(Id), REPODATA_ATTRS_BLOCK);
1280   data->structs[handle] = ap;
1281   pp = ap + i;
1282   *pp++ = keyid;
1283   *pp++ = val;
1284   *pp = 0;
1285 }
1286
1287 void
1288 repodata_set(Repodata *data, Id handle, Repokey *key, Id val)
1289 {
1290   Id keyid;
1291
1292   /* find key in keys */
1293   for (keyid = 1; keyid < data->nkeys; keyid++)
1294     if (data->keys[keyid].name == key->name && data->keys[keyid].type == key->type)
1295       {
1296         if ((key->type == REPOKEY_TYPE_CONSTANT || key->type == REPOKEY_TYPE_CONSTANTID) && key->size != data->keys[keyid].size)
1297           continue;
1298         break;
1299       }
1300   if (keyid == data->nkeys)
1301     {
1302       /* allocate new key */
1303       data->keys = sat_realloc2(data->keys, data->nkeys + 1, sizeof(Repokey));
1304       data->keys[data->nkeys++] = *key;
1305       if (data->verticaloffset)
1306         {
1307           data->verticaloffset = sat_realloc2(data->verticaloffset, data->nkeys, sizeof(Id));
1308           data->verticaloffset[data->nkeys - 1] = 0;
1309         }
1310     }
1311   repodata_insert_keyid(data, handle, keyid, val, 1);
1312 }
1313
1314 void
1315 repodata_set_id(Repodata *data, Id handle, Id keyname, Id id)
1316 {
1317   Repokey key;
1318   key.name = keyname;
1319   key.type = REPOKEY_TYPE_ID;
1320   key.size = 0;
1321   key.storage = KEY_STORAGE_INCORE;
1322   repodata_set(data, handle, &key, id);
1323 }
1324
1325 void
1326 repodata_set_num(Repodata *data, Id handle, Id keyname, unsigned int num)
1327 {
1328   Repokey key;
1329   key.name = keyname;
1330   key.type = REPOKEY_TYPE_NUM;
1331   key.size = 0;
1332   key.storage = KEY_STORAGE_INCORE;
1333   repodata_set(data, handle, &key, (Id)num);
1334 }
1335
1336 void
1337 repodata_set_poolstr(Repodata *data, Id handle, Id keyname, const char *str)
1338 {
1339   Repokey key;
1340   Id id;
1341   if (data->localpool)
1342     id = stringpool_str2id(&data->spool, str, 1);
1343   else
1344     id = str2id(data->repo->pool, str, 1);
1345   key.name = keyname;
1346   key.type = REPOKEY_TYPE_ID;
1347   key.size = 0;
1348   key.storage = KEY_STORAGE_INCORE;
1349   repodata_set(data, handle, &key, id);
1350 }
1351
1352 void
1353 repodata_set_constant(Repodata *data, Id handle, Id keyname, unsigned int constant)
1354 {
1355   Repokey key;
1356   key.name = keyname;
1357   key.type = REPOKEY_TYPE_CONSTANT;
1358   key.size = constant;
1359   key.storage = KEY_STORAGE_INCORE;
1360   repodata_set(data, handle, &key, 0);
1361 }
1362
1363 void
1364 repodata_set_constantid(Repodata *data, Id handle, Id keyname, Id id)
1365 {
1366   Repokey key;
1367   key.name = keyname;
1368   key.type = REPOKEY_TYPE_CONSTANTID;
1369   key.size = id;
1370   key.storage = KEY_STORAGE_INCORE;
1371   repodata_set(data, handle, &key, 0);
1372 }
1373
1374 void
1375 repodata_set_void(Repodata *data, Id handle, Id keyname)
1376 {
1377   Repokey key;
1378   key.name = keyname;
1379   key.type = REPOKEY_TYPE_VOID;
1380   key.size = 0;
1381   key.storage = KEY_STORAGE_INCORE;
1382   repodata_set(data, handle, &key, 0);
1383 }
1384
1385 void
1386 repodata_set_str(Repodata *data, Id handle, Id keyname, const char *str)
1387 {
1388   Repokey key;
1389   int l;
1390
1391   l = strlen(str) + 1;
1392   key.name = keyname;
1393   key.type = REPOKEY_TYPE_STR;
1394   key.size = 0;
1395   key.storage = KEY_STORAGE_INCORE;
1396   data->attrdata = sat_extend(data->attrdata, data->attrdatalen, l, 1, REPODATA_ATTRDATA_BLOCK);
1397   memcpy(data->attrdata + data->attrdatalen, str, l);
1398   repodata_set(data, handle, &key, data->attrdatalen);
1399   data->attrdatalen += l;
1400 }
1401
1402 static void
1403 repodata_add_array(Repodata *data, Id handle, Id keyname, Id keytype, int entrysize)
1404 {
1405   int oldsize;
1406   Id *ida, *pp;
1407
1408   pp = data->structs[handle];
1409   if (pp)
1410     for (; *pp; pp += 2)
1411       if (data->keys[*pp].name == keyname && data->keys[*pp].type == keytype)
1412         break;
1413   if (!pp || !*pp)
1414     {
1415       /* not found. allocate new key */
1416       Repokey key;
1417       key.name = keyname;
1418       key.type = keytype;
1419       key.size = 0;
1420       key.storage = KEY_STORAGE_INCORE;
1421       data->attriddata = sat_extend(data->attriddata, data->attriddatalen, entrysize + 1, sizeof(Id), REPODATA_ATTRIDDATA_BLOCK);
1422       repodata_set(data, handle, &key, data->attriddatalen);
1423       return;
1424     }
1425   oldsize = 0;
1426   for (ida = data->attriddata + pp[1]; *ida; ida += entrysize)
1427     oldsize += entrysize;
1428   if (ida + 1 == data->attriddata + data->attriddatalen)
1429     {
1430       /* this was the last entry, just append it */
1431       data->attriddata = sat_extend(data->attriddata, data->attriddatalen, entrysize, sizeof(Id), REPODATA_ATTRIDDATA_BLOCK);
1432       data->attriddatalen--;    /* overwrite terminating 0  */
1433     }
1434   else
1435     {
1436       /* too bad. move to back. */
1437       data->attriddata = sat_extend(data->attriddata, data->attriddatalen,  oldsize + entrysize + 1, sizeof(Id), REPODATA_ATTRIDDATA_BLOCK);
1438       memcpy(data->attriddata + data->attriddatalen, data->attriddata + pp[1], oldsize * sizeof(Id));
1439       pp[1] = data->attriddatalen;
1440       data->attriddatalen += oldsize;
1441     }
1442 }
1443
1444 static inline int
1445 checksumtype2len(Id type)
1446 {
1447   switch (type)
1448     {
1449     case REPOKEY_TYPE_MD5:
1450       return SIZEOF_MD5;
1451     case REPOKEY_TYPE_SHA1:
1452       return SIZEOF_SHA1;
1453     case REPOKEY_TYPE_SHA256:
1454       return SIZEOF_SHA256;
1455     default:
1456       return 0;
1457     }
1458 }
1459
1460 void
1461 repodata_set_bin_checksum(Repodata *data, Id handle, Id keyname, Id type,
1462                       const unsigned char *str)
1463 {
1464   Repokey key;
1465   int l = checksumtype2len(type);
1466
1467   if (!l)
1468     return;
1469   key.name = keyname;
1470   key.type = type;
1471   key.size = 0;
1472   key.storage = KEY_STORAGE_INCORE;
1473   data->attrdata = sat_extend(data->attrdata, data->attrdatalen, l, 1, REPODATA_ATTRDATA_BLOCK);
1474   memcpy(data->attrdata + data->attrdatalen, str, l);
1475   repodata_set(data, handle, &key, data->attrdatalen);
1476   data->attrdatalen += l;
1477 }
1478
1479 static int
1480 hexstr2bytes(unsigned char *buf, const char *str, int buflen)
1481 {
1482   int i;
1483   for (i = 0; i < buflen; i++)
1484     {
1485 #define c2h(c) (((c)>='0' && (c)<='9') ? ((c)-'0')      \
1486                 : ((c)>='a' && (c)<='f') ? ((c)-'a'+10) \
1487                 : ((c)>='A' && (c)<='F') ? ((c)-'A'+10) \
1488                 : -1)
1489       int v = c2h(*str);
1490       str++;
1491       if (v < 0)
1492         return 0;
1493       buf[i] = v;
1494       v = c2h(*str);
1495       str++;
1496       if (v < 0)
1497         return 0;
1498       buf[i] = (buf[i] << 4) | v;
1499 #undef c2h
1500     }
1501   return buflen;
1502 }
1503
1504 void
1505 repodata_set_checksum(Repodata *data, Id handle, Id keyname, Id type,
1506                       const char *str)
1507 {
1508   unsigned char buf[64];
1509   int l = checksumtype2len(type);
1510
1511   if (!l)
1512     return;
1513   if (hexstr2bytes(buf, str, l) != l)
1514     {
1515       fprintf(stderr, "Invalid hex character in '%s'\n", str);
1516       return;
1517     }
1518   repodata_set_bin_checksum(data, handle, keyname, type, buf);
1519 }
1520
1521 const char *
1522 repodata_chk2str(Repodata *data, Id type, const unsigned char *buf)
1523 {
1524   int i, l;
1525   char *str, *s;
1526
1527   l = checksumtype2len(type);
1528   if (!l)
1529     return "";
1530   s = str = pool_alloctmpspace(data->repo->pool, 2 * l + 1);
1531   for (i = 0; i < l; i++)
1532     {
1533       unsigned char v = buf[i];
1534       unsigned char w = v >> 4;
1535       *s++ = w >= 10 ? w + ('a' - 10) : w + '0';
1536       w = v & 15;
1537       *s++ = w >= 10 ? w + ('a' - 10) : w + '0';
1538     }
1539   *s = 0;
1540   return str;
1541 }
1542
1543 Id
1544 repodata_globalize_id(Repodata *data, Id id)
1545 {
1546   if (!data || !data->localpool)
1547     return id;
1548   return str2id(data->repo->pool, stringpool_id2str(&data->spool, id), 1);
1549 }
1550
1551 void
1552 repodata_add_dirnumnum(Repodata *data, Id handle, Id keyname, Id dir, Id num, Id num2)
1553 {
1554   assert(dir);
1555 #if 0
1556 fprintf(stderr, "repodata_add_dirnumnum %d %d %d %d (%d)\n", handle, dir, num, num2, data->attriddatalen);
1557 #endif
1558   repodata_add_array(data, handle, keyname, REPOKEY_TYPE_DIRNUMNUMARRAY, 3);
1559   data->attriddata[data->attriddatalen++] = dir;
1560   data->attriddata[data->attriddatalen++] = num;
1561   data->attriddata[data->attriddatalen++] = num2;
1562   data->attriddata[data->attriddatalen++] = 0;
1563 }
1564
1565 void
1566 repodata_add_dirstr(Repodata *data, Id handle, Id keyname, Id dir, const char *str)
1567 {
1568   Id stroff;
1569   int l;
1570
1571   assert(dir);
1572   l = strlen(str) + 1;
1573   data->attrdata = sat_extend(data->attrdata, data->attrdatalen, l, 1, REPODATA_ATTRDATA_BLOCK);
1574   memcpy(data->attrdata + data->attrdatalen, str, l);
1575   stroff = data->attrdatalen;
1576   data->attrdatalen += l;
1577
1578 #if 0
1579 fprintf(stderr, "repodata_add_dirstr %d %d %s (%d)\n", handle, dir, str,  data->attriddatalen);
1580 #endif
1581   repodata_add_array(data, handle, keyname, REPOKEY_TYPE_DIRSTRARRAY, 2);
1582   data->attriddata[data->attriddatalen++] = dir;
1583   data->attriddata[data->attriddatalen++] = stroff;
1584   data->attriddata[data->attriddatalen++] = 0;
1585 }
1586
1587 void
1588 repodata_add_idarray(Repodata *data, Id handle, Id keyname, Id id)
1589 {
1590 #if 0
1591 fprintf(stderr, "repodata_add_idarray %d %d (%d)\n", handle, id, data->attriddatalen);
1592 #endif
1593   repodata_add_array(data, handle, keyname, REPOKEY_TYPE_IDARRAY, 1);
1594   data->attriddata[data->attriddatalen++] = id;
1595   data->attriddata[data->attriddatalen++] = 0;
1596 }
1597
1598 void
1599 repodata_add_poolstr_array(Repodata *data, Id handle, Id keyname,
1600                            const char *str)
1601 {
1602   Id id;
1603   if (data->localpool)
1604     id = stringpool_str2id(&data->spool, str, 1);
1605   else
1606     id = str2id(data->repo->pool, str, 1);
1607   repodata_add_idarray(data, handle, keyname, id);
1608 }
1609
1610 Id
1611 repodata_create_struct(Repodata *data, Id handle, Id keyname)
1612 {
1613   Id newhandle = get_new_struct(data);
1614   repodata_add_array(data, handle, keyname, REPOKEY_TYPE_COUNTED, 1);
1615   data->attriddata[data->attriddatalen++] = newhandle;
1616   data->attriddata[data->attriddatalen++] = 0;
1617   return newhandle;
1618 }
1619
1620 void
1621 repodata_merge_attrs(Repodata *data, Id dest, Id src)
1622 {
1623   Id *keyp;
1624   if (dest == src
1625       || !(keyp = data->structs[src < 0
1626                                 ? data->extraattrs[-1 - src]
1627                                 : data->attrs[src]]))
1628     return;
1629   dest = repodata_get_handle_int(data, dest);
1630   for (; *keyp; keyp += 2)
1631     repodata_insert_keyid(data, dest, keyp[0], keyp[1], 0);
1632 }
1633
1634 /*********************************/
1635
1636 /* unify with repo_write! */
1637
1638 #define EXTDATA_BLOCK 1023
1639 #define SCHEMATA_BLOCK 31
1640 #define SCHEMATADATA_BLOCK 255
1641
1642 struct extdata {
1643   unsigned char *buf;
1644   int len;
1645 };
1646
1647 static void
1648 data_addid(struct extdata *xd, Id x)
1649 {
1650   unsigned char *dp;
1651   xd->buf = sat_extend(xd->buf, xd->len, 5, 1, EXTDATA_BLOCK);
1652   dp = xd->buf + xd->len;
1653
1654   if (x >= (1 << 14))
1655     {
1656       if (x >= (1 << 28))
1657         *dp++ = (x >> 28) | 128;
1658       if (x >= (1 << 21))
1659         *dp++ = (x >> 21) | 128;
1660       *dp++ = (x >> 14) | 128;
1661     }
1662   if (x >= (1 << 7))
1663     *dp++ = (x >> 7) | 128;
1664   *dp++ = x & 127;
1665   xd->len = dp - xd->buf;
1666 }
1667
1668 static void
1669 data_addideof(struct extdata *xd, Id x, int eof)
1670 {
1671   if (x >= 64)
1672     x = (x & 63) | ((x & ~63) << 1);
1673   data_addid(xd, (eof ? x: x | 64));
1674 }
1675
1676 static void
1677 data_addblob(struct extdata *xd, unsigned char *blob, int len)
1678 {
1679   xd->buf = sat_extend(xd->buf, xd->len, len, 1, EXTDATA_BLOCK);
1680   memcpy(xd->buf + xd->len, blob, len);
1681   xd->len += len;
1682 }
1683
1684 /*********************************/
1685
1686 static void
1687 addschema_prepare(Repodata *data, Id *schematacache)
1688 {
1689   int h, len, i;
1690   Id *sp;
1691
1692   memset(schematacache, 0, 256 * sizeof(Id));
1693   for (i = 0; i < data->nschemata; i++)
1694     {
1695       for (sp = data->schemadata + data->schemata[i], h = 0; *sp; len++)
1696         h = h * 7 + *sp++;
1697       h &= 255;
1698       schematacache[h] = i + 1;
1699     }
1700   data->schemadata = sat_extend_resize(data->schemadata, data->schemadatalen, sizeof(Id), SCHEMATADATA_BLOCK);
1701   data->schemata = sat_extend_resize(data->schemata, data->nschemata, sizeof(Id), SCHEMATA_BLOCK);
1702 }
1703
1704 static Id
1705 addschema(Repodata *data, Id *schema, Id *schematacache)
1706 {
1707   int h, len;
1708   Id *sp, cid;
1709
1710   for (sp = schema, len = 0, h = 0; *sp; len++)
1711     h = h * 7 + *sp++;
1712   h &= 255;
1713   len++;
1714
1715   cid = schematacache[h];
1716   if (cid)
1717     {
1718       cid--;
1719       if (!memcmp(data->schemadata + data->schemata[cid], schema, len * sizeof(Id)))
1720         return cid;
1721       /* cache conflict */
1722       for (cid = 0; cid < data->nschemata; cid++)
1723         if (!memcmp(data->schemadata + data->schemata[cid], schema, len * sizeof(Id)))
1724           return cid;
1725     }
1726   /* a new one. make room. */
1727   data->schemadata = sat_extend(data->schemadata, data->schemadatalen, len, sizeof(Id), SCHEMATADATA_BLOCK);
1728   data->schemata = sat_extend(data->schemata, data->nschemata, 1, sizeof(Id), SCHEMATA_BLOCK);
1729   /* add schema */
1730   memcpy(data->schemadata + data->schemadatalen, schema, len * sizeof(Id));
1731   data->schemata[data->nschemata] = data->schemadatalen;
1732   data->schemadatalen += len;
1733   schematacache[h] = data->nschemata + 1;
1734 #if 0
1735 fprintf(stderr, "addschema: new schema\n");
1736 #endif
1737   return data->nschemata++;
1738 }
1739
1740 static void
1741 repodata_serialize_key(Repodata *data, struct extdata *newincore,
1742                        struct extdata *newvincore,
1743                        Id *schema, Id *schematacache,
1744                        Repokey *key, Id val)
1745 {
1746   /* Otherwise we have a new value.  Parse it into the internal
1747      form.  */
1748   Id *ida;
1749   struct extdata *xd;
1750   unsigned int oldvincorelen = 0;
1751   Id schemaid, *sp;
1752
1753   xd = newincore;
1754   if (key->storage == KEY_STORAGE_VERTICAL_OFFSET)
1755     {
1756       xd = newvincore;
1757       oldvincorelen = xd->len;
1758     }
1759   switch (key->type)
1760     {
1761       case REPOKEY_TYPE_VOID:
1762       case REPOKEY_TYPE_CONSTANT:
1763       case REPOKEY_TYPE_CONSTANTID:
1764         break;
1765       case REPOKEY_TYPE_STR:
1766         data_addblob(xd, data->attrdata + val, strlen((char *)(data->attrdata + val)) + 1);
1767         break;
1768       case REPOKEY_TYPE_MD5:
1769         data_addblob(xd, data->attrdata + val, SIZEOF_MD5);
1770         break;
1771       case REPOKEY_TYPE_SHA1:
1772         data_addblob(xd, data->attrdata + val, SIZEOF_SHA1);
1773         break;
1774       case REPOKEY_TYPE_ID:
1775       case REPOKEY_TYPE_NUM:
1776       case REPOKEY_TYPE_DIR:
1777         data_addid(xd, val);
1778         break;
1779       case REPOKEY_TYPE_IDARRAY:
1780         for (ida = data->attriddata + val; *ida; ida++)
1781           data_addideof(xd, ida[0], ida[1] ? 0 : 1);
1782         break;
1783       case REPOKEY_TYPE_DIRNUMNUMARRAY:
1784         for (ida = data->attriddata + val; *ida; ida += 3)
1785           {
1786             data_addid(xd, ida[0]);
1787             data_addid(xd, ida[1]);
1788             data_addideof(xd, ida[2], ida[3] ? 0 : 1);
1789           }
1790         break;
1791       case REPOKEY_TYPE_DIRSTRARRAY:
1792         for (ida = data->attriddata + val; *ida; ida += 2)
1793           {
1794             data_addideof(xd, ida[0], ida[2] ? 0 : 1);
1795             data_addblob(xd, data->attrdata + ida[1], strlen((char *)(data->attrdata + ida[1])) + 1);
1796           }
1797         break;
1798       case REPOKEY_TYPE_COUNTED:
1799         {
1800           int num = 0;
1801           schemaid = 0;
1802           for (ida = data->attriddata + val; *ida; ida++)
1803             {
1804 #if 0
1805               fprintf(stderr, "serialize struct %d\n", *ida);
1806 #endif
1807               sp = schema;
1808               Id *kp = data->structs[*ida];
1809               if (!kp)
1810                 continue;
1811               num++;
1812               for (;*kp; kp += 2)
1813                 {
1814 #if 0
1815                   fprintf(stderr, "  %s:%d\n", id2str(data->repo->pool, data->keys[*kp].name), kp[1]);
1816 #endif
1817                   *sp++ = *kp;
1818                 }
1819               *sp = 0;
1820               if (!schemaid)
1821                 schemaid = addschema(data, schema, schematacache);
1822               else if (schemaid != addschema(data, schema, schematacache))
1823                 {
1824                   fprintf(stderr, "  not yet implemented: substructs with different schemas\n");
1825                   exit(1);
1826                 }
1827 #if 0
1828               fprintf(stderr, "  schema %d\n", schemaid);
1829 #endif
1830             }
1831           if (!num)
1832             break;
1833           data_addid(xd, num);
1834           data_addid(xd, schemaid);
1835           for (ida = data->attriddata + val; *ida; ida++)
1836             {
1837               Id *kp = data->structs[*ida];
1838               if (!kp)
1839                 continue;
1840               for (;*kp; kp += 2)
1841                 {
1842                   repodata_serialize_key(data, newincore, newvincore,
1843                                          schema, schematacache,
1844                                          data->keys + *kp, kp[1]);
1845                 }
1846             }
1847           break;
1848         }
1849       default:
1850         fprintf(stderr, "don't know how to handle type %d\n", key->type);
1851         exit(1);
1852     }
1853   if (key->storage == KEY_STORAGE_VERTICAL_OFFSET)
1854     {
1855       /* put offset/len in incore */
1856       data_addid(newincore, data->lastverticaloffset + oldvincorelen);
1857       oldvincorelen = xd->len - oldvincorelen;
1858       data_addid(newincore, oldvincorelen);
1859     }
1860 }
1861
1862 void
1863 repodata_internalize(Repodata *data)
1864 {
1865   Repokey *key;
1866   Id entry, nentry;
1867   Id schematacache[256];
1868   Id schemaid, *schema, *sp, oldschema, *keyp, *seen;
1869   unsigned char *dp, *ndp;
1870   int newschema, oldcount;
1871   struct extdata newincore;
1872   struct extdata newvincore;
1873
1874   if (!data->attrs && !data->extraattrs)
1875     return;
1876
1877   newvincore.buf = data->vincore;
1878   newvincore.len = data->vincorelen;
1879
1880   schema = sat_malloc2(data->nkeys, sizeof(Id));
1881   seen = sat_malloc2(data->nkeys, sizeof(Id));
1882
1883   /* Merge the data already existing (in data->schemata, ->incoredata and
1884      friends) with the new attributes in data->attrs[].  */
1885   nentry = data->end - data->start;
1886   addschema_prepare(data, schematacache);
1887   memset(&newincore, 0, sizeof(newincore));
1888   data_addid(&newincore, 0);
1889   if (!data->attrs)
1890     nentry = 0;
1891   for (entry = data->extraattrs ? -data->nextra : 0; entry < nentry; entry++)
1892     {
1893       Id handle;
1894       memset(seen, 0, data->nkeys * sizeof(Id));
1895       sp = schema;
1896       dp = entry2data(data, entry);
1897       if (data->incoredata)
1898         dp = data_read_id(dp, &oldschema);
1899       else
1900         oldschema = 0;
1901 #if 0
1902 fprintf(stderr, "oldschema %d\n", oldschema);
1903 fprintf(stderr, "schemata %d\n", data->schemata[oldschema]);
1904 fprintf(stderr, "schemadata %p\n", data->schemadata);
1905 #endif
1906       /* seen: -1: old data  0: skipped  >0: id + 1 */
1907       newschema = 0;
1908       oldcount = 0;
1909       for (keyp = data->schemadata + data->schemata[oldschema]; *keyp; keyp++)
1910         {
1911           if (seen[*keyp])
1912             {
1913               fprintf(stderr, "Inconsistent old data (key occured twice).\n");
1914               exit(1);
1915             }
1916           seen[*keyp] = -1;
1917           *sp++ = *keyp;
1918           oldcount++;
1919         }
1920       handle = entry < 0 ? data->extraattrs[-1 - entry] : data->attrs[entry];
1921       keyp = data->structs[handle];
1922       if (keyp)
1923         for (; *keyp; keyp += 2)
1924           {
1925             if (!seen[*keyp])
1926               {
1927                 newschema = 1;
1928                 *sp++ = *keyp;
1929               }
1930             seen[*keyp] = keyp[1] + 1;
1931           }
1932       *sp++ = 0;
1933       if (newschema)
1934         /* Ideally we'd like to sort the new schema here, to ensure
1935            schema equality independend of the ordering.  We can't do that
1936            yet.  For once see below (old ids need to come before new ids).
1937            An additional difficulty is that we also need to move
1938            the values with the keys.  */
1939         schemaid = addschema(data, schema, schematacache);
1940       else
1941         schemaid = oldschema;
1942
1943
1944       /* Now create data blob.  We walk through the (possibly new) schema
1945          and either copy over old data, or insert the new.  */
1946       /* XXX Here we rely on the fact that the (new) schema has the form
1947          o1 o2 o3 o4 ... | n1 n2 n3 ...
1948          (oX being the old keyids (possibly overwritten), and nX being
1949           the new keyids).  This rules out sorting the keyids in order
1950          to ensure a small schema count.  */
1951       if (entry < 0)
1952         data->extraoffset[-1 - entry] = newincore.len;
1953       else
1954         data->incoreoffset[entry] = newincore.len;
1955       data_addid(&newincore, schemaid);
1956       for (keyp = data->schemadata + data->schemata[schemaid]; *keyp; keyp++)
1957         {
1958           key = data->keys + *keyp;
1959 #if 0
1960           fprintf(stderr, "internalize %d:%s:%s\n", entry, id2str(data->repo->pool, key->name), id2str(data->repo->pool, key->type));
1961 #endif
1962           ndp = dp;
1963           if (oldcount)
1964             {
1965               /* Skip the data associated with this old key.  */
1966               if (key->storage == KEY_STORAGE_VERTICAL_OFFSET)
1967                 {
1968                   ndp = data_skip(dp, REPOKEY_TYPE_ID);
1969                   ndp = data_skip(ndp, REPOKEY_TYPE_ID);
1970                 }
1971               else if (key->storage == KEY_STORAGE_INCORE)
1972                 ndp = data_skip_recursive(data, dp, key);
1973               oldcount--;
1974             }
1975           if (seen[*keyp] == -1)
1976             {
1977               /* If this key was an old one _and_ was not overwritten with
1978                  a different value copy over the old value (we skipped it
1979                  above).  */
1980               if (dp != ndp)
1981                 data_addblob(&newincore, dp, ndp - dp);
1982               seen[*keyp] = 0;
1983             }
1984           else if (seen[*keyp])
1985             {
1986               /* Otherwise we have a new value.  Parse it into the internal
1987                  form.  */
1988               repodata_serialize_key(data, &newincore, &newvincore,
1989                                      schema, schematacache,
1990                                      key, seen[*keyp] - 1);
1991             }
1992           dp = ndp;
1993         }
1994       if (data->structs[handle])
1995         data->structs[handle] = sat_free(data->structs[handle]);
1996     }
1997   for (entry = 0; entry < data->nstructs; entry++)
1998     if (data->structs[entry])
1999       sat_free(data->structs[entry]);
2000   data->structs = sat_free(data->structs);
2001   sat_free(schema);
2002   sat_free(seen);
2003
2004   sat_free(data->incoredata);
2005   data->incoredata = newincore.buf;
2006   data->incoredatalen = newincore.len;
2007   data->incoredatafree = 0;
2008
2009   sat_free(data->vincore);
2010   data->vincore = newvincore.buf;
2011   data->vincorelen = newvincore.len;
2012
2013   data->attrs = sat_free(data->attrs);
2014   data->extraattrs = sat_free(data->extraattrs);
2015   data->attrdata = sat_free(data->attrdata);
2016   data->attriddata = sat_free(data->attriddata);
2017   data->attrdatalen = 0;
2018   data->attriddatalen = 0;
2019 }
2020
2021 Id
2022 repodata_str2dir(Repodata *data, const char *dir, int create)
2023 {
2024   Id id, parent;
2025   const char *dire;
2026
2027   parent = 0;
2028   while (*dir == '/' && dir[1] == '/')
2029     dir++;
2030   if (*dir == '/' && !dir[1])
2031     return 1;
2032   while (*dir)
2033     {
2034       dire = strchrnul(dir, '/');
2035       if (data->localpool)
2036         id = stringpool_strn2id(&data->spool, dir, dire - dir, create);
2037       else
2038         id = strn2id(data->repo->pool, dir, dire - dir, create);
2039       if (!id)
2040         return 0;
2041       parent = dirpool_add_dir(&data->dirpool, parent, id, create);
2042       if (!parent)
2043         return 0;
2044       if (!*dire)
2045         break;
2046       dir = dire + 1;
2047       while (*dir == '/')
2048         dir++;
2049     }
2050   return parent;
2051 }
2052
2053 const char *
2054 repodata_dir2str(Repodata *data, Id did, const char *suf)
2055 {
2056   Pool *pool = data->repo->pool;
2057   int l = 0;
2058   Id parent, comp;
2059   const char *comps;
2060   char *p;
2061
2062   if (!did)
2063     return suf ? suf : "";
2064   parent = did;
2065   while (parent)
2066     {
2067       comp = dirpool_compid(&data->dirpool, parent);
2068       comps = stringpool_id2str(data->localpool ? &data->spool : &pool->ss, comp);
2069       l += strlen(comps);
2070       parent = dirpool_parent(&data->dirpool, parent);
2071       if (parent)
2072         l++;
2073     }
2074   if (suf)
2075     l += strlen(suf) + 1;
2076   p = pool_alloctmpspace(pool, l + 1) + l;
2077   *p = 0;
2078   if (suf)
2079     {
2080       p -= strlen(suf);
2081       strcpy(p, suf);
2082       *--p = '/';
2083     }
2084   parent = did;
2085   while (parent)
2086     {
2087       comp = dirpool_compid(&data->dirpool, parent);
2088       comps = stringpool_id2str(data->localpool ? &data->spool : &pool->ss, comp);
2089       l = strlen(comps);
2090       p -= l;
2091       strncpy(p, comps, l);
2092       parent = dirpool_parent(&data->dirpool, parent);
2093       if (parent)
2094         *--p = '/';
2095     }
2096   return p;
2097 }
2098
2099 unsigned int
2100 repodata_compress_page(unsigned char *page, unsigned int len, unsigned char *cpage, unsigned int max)
2101 {
2102   return compress_buf(page, len, cpage, max);
2103 }
2104
2105 #define SOLV_ERROR_EOF              3
2106
2107 static inline unsigned int
2108 read_u32(FILE *fp)
2109 {
2110   int c, i;
2111   unsigned int x = 0;
2112
2113   for (i = 0; i < 4; i++)
2114     {
2115       c = getc(fp);
2116       if (c == EOF)
2117         return 0;
2118       x = (x << 8) | c;
2119     }
2120   return x;
2121 }
2122
2123 #define SOLV_ERROR_EOF          3
2124 #define SOLV_ERROR_CORRUPT      6
2125
2126 /* Try to either setup on-demand paging (using FP as backing
2127    file), or in case that doesn't work (FP not seekable) slurps in
2128    all pages and deactivates paging.  */
2129 void
2130 repodata_read_or_setup_pages(Repodata *data, unsigned int pagesz, unsigned int blobsz)
2131 {
2132   FILE *fp = data->fp;
2133   unsigned int npages;
2134   unsigned int i;
2135   unsigned int can_seek;
2136   long cur_file_ofs;
2137   unsigned char buf[BLOB_PAGESIZE];
2138
2139   if (pagesz != BLOB_PAGESIZE)
2140     {
2141       /* We could handle this by slurping in everything.  */
2142       data->error = SOLV_ERROR_CORRUPT;
2143       return;
2144     }
2145   can_seek = 1;
2146   if ((cur_file_ofs = ftell(fp)) < 0)
2147     can_seek = 0;
2148   clearerr(fp);
2149   if (can_seek)
2150     data->pagefd = dup(fileno(fp));
2151   if (data->pagefd == -1)
2152     can_seek = 0;
2153
2154 #ifdef DEBUG_PAGING
2155   fprintf (stderr, "can %sseek\n", can_seek ? "" : "NOT ");
2156 #endif
2157   npages = (blobsz + BLOB_PAGESIZE - 1) / BLOB_PAGESIZE;
2158
2159   data->num_pages = npages;
2160   data->pages = sat_malloc2(npages, sizeof(data->pages[0]));
2161
2162   /* If we can't seek on our input we have to slurp in everything.  */
2163   if (!can_seek)
2164     data->blob_store = sat_malloc(npages * BLOB_PAGESIZE);
2165   for (i = 0; i < npages; i++)
2166     {
2167       unsigned int in_len = read_u32(fp);
2168       unsigned int compressed = in_len & 1;
2169       Attrblobpage *p = data->pages + i;
2170       in_len >>= 1;
2171 #ifdef DEBUG_PAGING
2172       fprintf (stderr, "page %d: len %d (%scompressed)\n",
2173                i, in_len, compressed ? "" : "not ");
2174 #endif
2175       if (can_seek)
2176         {
2177           cur_file_ofs += 4;
2178           p->mapped_at = -1;
2179           p->file_offset = cur_file_ofs;
2180           p->file_size = in_len * 2 + compressed;
2181           if (fseek(fp, in_len, SEEK_CUR) < 0)
2182             {
2183               perror ("fseek");
2184               fprintf (stderr, "can't seek after we thought we can\n");
2185               /* We can't fall back to non-seeking behaviour as we already
2186                  read over some data pages without storing them away.  */
2187               data->error = SOLV_ERROR_EOF;
2188               close(data->pagefd);
2189               data->pagefd = -1;
2190               return;
2191             }
2192           cur_file_ofs += in_len;
2193         }
2194       else
2195         {
2196           unsigned int out_len;
2197           void *dest = data->blob_store + i * BLOB_PAGESIZE;
2198           p->mapped_at = i * BLOB_PAGESIZE;
2199           p->file_offset = 0;
2200           p->file_size = 0;
2201           /* We can't seek, so suck everything in.  */
2202           if (fread(compressed ? buf : dest, in_len, 1, fp) != 1)
2203             {
2204               perror("fread");
2205               data->error = SOLV_ERROR_EOF;
2206               return;
2207             }
2208           if (compressed)
2209             {
2210               out_len = unchecked_decompress_buf(buf, in_len, dest, BLOB_PAGESIZE);
2211               if (out_len != BLOB_PAGESIZE && i < npages - 1)
2212                 {
2213                   data->error = SOLV_ERROR_CORRUPT;
2214                   return;
2215                 }
2216             }
2217         }
2218     }
2219 }
2220
2221 void
2222 repodata_disable_paging(Repodata *data)
2223 {
2224   if (maybe_load_repodata(data, 0)
2225       && data->num_pages)
2226     load_page_range (data, 0, data->num_pages - 1);
2227 }
2228 /*
2229 vim:cinoptions={.5s,g0,p5,t0,(0,^-0.5s,n-0.5s:tw=78:cindent:sw=4:
2230 */