src/repodata.c

   1 /*
   2  * Copyright (c) 2007, Novell Inc.
   3  *
   4  * This program is licensed under the BSD license, read LICENSE.BSD
   5  * for further information
   6  */
   7
   8 /*
   9  * repodata.c
  10  *
  11  * Manage data coming from one repository
  12  *
  13  */
  14
  15 #define _GNU_SOURCE
  16 #include <string.h>
  17
  18 #include <stdio.h>
  19 #include <stdlib.h>
  20 #include <unistd.h>
  21 #include <assert.h>
  22
  23 #include "repo.h"
  24 #include "pool.h"
  25 #include "poolid_private.h"
  26 #include "util.h"
  27
  28 #include "fastlz.c"
  29
  30 unsigned char *
  31 data_read_id(unsigned char *dp, Id *idp)
  32 {
  33   Id x = 0;
  34   unsigned char c;
  35   for (;;)
  36     {
  37       c = *dp++;
  38       if (!(c & 0x80))
  39         {
  40           *idp = (x << 7) ^ c;
  41           return dp;
  42         }
  43       x = (x << 7) ^ c ^ 128;
  44     }
  45 }
  46
  47 static unsigned char *
  48 data_read_ideof(unsigned char *dp, Id *idp, int *eof)
  49 {
  50   Id x = 0;
  51   unsigned char c;
  52   for (;;)
  53     {
  54       c = *dp++;
  55       if (!(c & 0x80))
  56         {
  57           if (c & 0x40)
  58             {
  59               c ^= 0x40;
  60               *eof = 0;
  61             }
  62           else
  63             *eof = 1;
  64           *idp = (x << 6) ^ c;
  65           return dp;
  66         }
  67       x = (x << 7) ^ c ^ 128;
  68     }
  69 }
  70
  71 static unsigned char *
  72 data_skip(unsigned char *dp, int type)
  73 {
  74   unsigned char x;
  75   switch (type)
  76     {
  77     case TYPE_VOID:
  78     case TYPE_CONSTANT:
  79       return dp;
  80     case TYPE_ID:
  81     case TYPE_NUM:
  82     case TYPE_DIR:
  83       while ((*dp & 0x80) != 0)
  84         dp++;
  85       return dp + 1;
  86     case TYPE_IDARRAY:
  87       while ((*dp & 0xc0) != 0)
  88         dp++;
  89       return dp + 1;
  90     case TYPE_STR:
  91       while ((*dp) != 0)
  92         dp++;
  93       return dp + 1;
  94     case TYPE_DIRSTRARRAY:
  95       for (;;)
  96         {
  97           while ((*dp & 0x80) != 0)
  98             dp++;
  99           x = *dp++;
 100           while ((*dp) != 0)
 101             dp++;
 102           dp++;
 103           if (!(x & 0x40))
 104             return dp;
 105         }
 106     case TYPE_DIRNUMNUMARRAY:
 107       for (;;)
 108         {
 109           while ((*dp & 0x80) != 0)
 110             dp++;
 111           dp++;
 112           while ((*dp & 0x80) != 0)
 113             dp++;
 114           dp++;
 115           while ((*dp & 0x80) != 0)
 116             dp++;
 117           if (!(*dp & 0x40))
 118             return dp + 1;
 119           dp++;
 120         }
 121     default:
 122       fprintf(stderr, "unknown type in data_skip\n");
 123       exit(1);
 124     }
 125 }
 126
 127 static unsigned char *
 128 data_fetch(unsigned char *dp, KeyValue *kv, Repokey *key)
 129 {
 130   kv->eof = 1;
 131   if (!dp)
 132     return 0;
 133   switch (key->type)
 134     {
 135     case TYPE_VOID:
 136       return dp;
 137     case TYPE_CONSTANT:
 138       kv->num = key->size;
 139       return dp;
 140     case TYPE_STR:
 141       kv->str = (const char *)dp;
 142       return dp + strlen(kv->str) + 1;
 143     case TYPE_ID:
 144       return data_read_id(dp, &kv->id);
 145     case TYPE_NUM:
 146       return data_read_id(dp, &kv->num);
 147     case TYPE_IDARRAY:
 148       return data_read_ideof(dp, &kv->id, &kv->eof);
 149     case TYPE_DIR:
 150       return data_read_id(dp, &kv->id);
 151     case TYPE_DIRSTRARRAY:
 152       dp = data_read_ideof(dp, &kv->id, &kv->eof);
 153       kv->str = (const char *)dp;
 154       return dp + strlen(kv->str) + 1;
 155     case TYPE_DIRNUMNUMARRAY:
 156       dp = data_read_id(dp, &kv->id);
 157       dp = data_read_id(dp, &kv->num);
 158       return data_read_ideof(dp, &kv->num2, &kv->eof);
 159     default:
 160       return 0;
 161     }
 162 }
 163
 164 static unsigned char *
 165 forward_to_key(Repodata *data, Id key, Id schemaid, unsigned char *dp)
 166 {
 167   Id k, *keyp;
 168
 169   keyp = data->schemadata + data->schemata[schemaid];
 170   while ((k = *keyp++) != 0)
 171     {
 172       if (k == key)
 173         return dp;
 174       if (data->keys[k].storage == KEY_STORAGE_VERTICAL_OFFSET)
 175         {
 176           dp = data_skip(dp, TYPE_ID);  /* skip that offset */
 177           dp = data_skip(dp, TYPE_ID);  /* skip that length */
 178           continue;
 179         }
 180       if (data->keys[k].storage != KEY_STORAGE_INCORE)
 181         continue;
 182       dp = data_skip(dp, data->keys[k].type);
 183     }
 184   return 0;
 185 }
 186
 187 #define BLOB_PAGEBITS 15
 188 #define BLOB_PAGESIZE (1 << BLOB_PAGEBITS)
 189
 190 static unsigned char *
 191 load_page_range(Repodata *data, unsigned int pstart, unsigned int pend)
 192 {
 193 /* Make sure all pages from PSTART to PEND (inclusive) are loaded,
 194    and are consecutive.  Return a pointer to the mapping of PSTART.  */
 195   unsigned char buf[BLOB_PAGESIZE];
 196   unsigned int i;
 197
 198   /* Quick check in case all pages are there already and consecutive.  */
 199   for (i = pstart; i <= pend; i++)
 200     if (data->pages[i].mapped_at == -1
 201         || (i > pstart
 202             && data->pages[i].mapped_at
 203                != data->pages[i-1].mapped_at + BLOB_PAGESIZE))
 204       break;
 205   if (i > pend)
 206     return data->blob_store + data->pages[pstart].mapped_at;
 207
 208   /* Ensure that we can map the numbers of pages we need at all.  */
 209   if (pend - pstart + 1 > data->ncanmap)
 210     {
 211       unsigned int oldcan = data->ncanmap;
 212       data->ncanmap = pend - pstart + 1;
 213       if (data->ncanmap < 4)
 214         data->ncanmap = 4;
 215       data->mapped = sat_realloc2(data->mapped, data->ncanmap, sizeof(data->mapped[0]));
 216       memset (data->mapped + oldcan, 0, (data->ncanmap - oldcan) * sizeof (data->mapped[0]));
 217       data->blob_store = sat_realloc2(data->blob_store, data->ncanmap, BLOB_PAGESIZE);
 218 #ifdef DEBUG_PAGING
 219       fprintf (stderr, "PAGE: can map %d pages\n", data->ncanmap);
 220 #endif
 221     }
 222
 223   /* Now search for "cheap" space in our store.  Space is cheap if it's either
 224      free (very cheap) or contains pages we search for anyway.  */
 225
 226   /* Setup cost array.  */
 227   unsigned int cost[data->ncanmap];
 228   for (i = 0; i < data->ncanmap; i++)
 229     {
 230       unsigned int pnum = data->mapped[i];
 231       if (pnum == 0)
 232         cost[i] = 0;
 233       else
 234         {
 235           pnum--;
 236           Attrblobpage *p = data->pages + pnum;
 237           assert (p->mapped_at != -1);
 238           if (pnum >= pstart && pnum <= pend)
 239             cost[i] = 1;
 240           else
 241             cost[i] = 3;
 242         }
 243     }
 244
 245   /* And search for cheapest space.  */
 246   unsigned int best_cost = -1;
 247   unsigned int best = 0;
 248   unsigned int same_cost = 0;
 249   for (i = 0; i + pend - pstart < data->ncanmap; i++)
 250     {
 251       unsigned int c = cost[i];
 252       unsigned int j;
 253       for (j = 0; j < pend - pstart + 1; j++)
 254         c += cost[i+j];
 255       if (c < best_cost)
 256         best_cost = c, best = i;
 257       else if (c == best_cost)
 258         same_cost++;
 259       /* A null cost won't become better.  */
 260       if (c == 0)
 261         break;
 262     }
 263   /* If all places have the same cost we would thrash on slot 0.  Avoid
 264      this by doing a round-robin strategy in this case.  */
 265   if (same_cost == data->ncanmap - pend + pstart - 1)
 266     best = data->rr_counter++ % (data->ncanmap - pend + pstart);
 267
 268   /* So we want to map our pages from [best] to [best+pend-pstart].
 269      Use a very simple strategy, which doesn't make the best use of
 270      our resources, but works.  Throw away all pages in that range
 271      (even ours) then copy around ours (in case they were outside the
 272      range) or read them in.  */
 273   for (i = best; i < best + pend - pstart + 1; i++)
 274     {
 275       unsigned int pnum = data->mapped[i];
 276       if (pnum--
 277           /* If this page is exactly at the right place already,
 278              no need to evict it.  */
 279           && pnum != pstart + i - best)
 280         {
 281           /* Evict this page.  */
 282 #ifdef DEBUG_PAGING
 283           fprintf (stderr, "PAGE: evict page %d from %d\n", pnum, i);
 284 #endif
 285           cost[i] = 0;
 286           data->mapped[i] = 0;
 287           data->pages[pnum].mapped_at = -1;
 288         }
 289     }
 290
 291   /* Everything is free now.  Read in the pages we want.  */
 292   for (i = pstart; i <= pend; i++)
 293     {
 294       Attrblobpage *p = data->pages + i;
 295       unsigned int pnum = i - pstart + best;
 296       void *dest = data->blob_store + pnum * BLOB_PAGESIZE;
 297       if (p->mapped_at != -1)
 298         {
 299           if (p->mapped_at != pnum * BLOB_PAGESIZE)
 300             {
 301 #ifdef DEBUG_PAGING
 302               fprintf (stderr, "PAGECOPY: %d to %d\n", i, pnum);
 303 #endif
 304               /* Still mapped somewhere else, so just copy it from there.  */
 305               memcpy (dest, data->blob_store + p->mapped_at, BLOB_PAGESIZE);
 306               data->mapped[p->mapped_at / BLOB_PAGESIZE] = 0;
 307             }
 308         }
 309       else
 310         {
 311           unsigned int in_len = p->file_size;
 312           unsigned int compressed = in_len & 1;
 313           in_len >>= 1;
 314 #ifdef DEBUG_PAGING
 315           fprintf (stderr, "PAGEIN: %d to %d", i, pnum);
 316 #endif
 317           /* Not mapped, so read in this page.  */
 318           if (fseek(data->fp, p->file_offset, SEEK_SET) < 0)
 319             {
 320               perror ("mapping fseek");
 321               exit (1);
 322             }
 323           if (fread(compressed ? buf : dest, in_len, 1, data->fp) != 1)
 324             {
 325               perror ("mapping fread");
 326               exit (1);
 327             }
 328           if (compressed)
 329             {
 330               unsigned int out_len;
 331               out_len = unchecked_decompress_buf(buf, in_len,
 332                                                   dest, BLOB_PAGESIZE);
 333               if (out_len != BLOB_PAGESIZE
 334                   && i < data->num_pages - 1)
 335                 {
 336                   fprintf (stderr, "can't decompress\n");
 337                   exit (1);
 338                 }
 339 #ifdef DEBUG_PAGING
 340               fprintf (stderr, " (expand %d to %d)", in_len, out_len);
 341 #endif
 342             }
 343 #ifdef DEBUG_PAGING
 344           fprintf (stderr, "\n");
 345 #endif
 346         }
 347       p->mapped_at = pnum * BLOB_PAGESIZE;
 348       data->mapped[pnum] = i + 1;
 349     }
 350   return data->blob_store + best * BLOB_PAGESIZE;
 351 }
 352
 353 static unsigned char *
 354 make_vertical_available(Repodata *data, Repokey *key, Id off, Id len)
 355 {
 356   unsigned char *dp;
 357   if (key->type == TYPE_VOID)
 358     return 0;
 359   if (off >= data->lastverticaloffset)
 360     {
 361       off -= data->lastverticaloffset;
 362       if (off + len > data->vincorelen)
 363         return 0;
 364       return data->vincore + off;
 365     }
 366   if (!data->fp)
 367     return 0;
 368   if (off + len > key->size)
 369     return 0;
 370   /* we now have the offset, go into vertical */
 371   off += data->verticaloffset[key - data->keys];
 372   dp = load_page_range(data, off / BLOB_PAGESIZE, (off + len - 1) / BLOB_PAGESIZE);
 373   if (dp)
 374     dp += off % BLOB_PAGESIZE;
 375   return dp;
 376 }
 377
 378 static inline unsigned char *
 379 get_data(Repodata *data, Repokey *key, unsigned char **dpp)
 380 {
 381   unsigned char *dp = *dpp;
 382
 383   if (!dp)
 384     return 0;
 385   if (key->storage == KEY_STORAGE_INCORE)
 386     {
 387       /* hmm, this is a bit expensive */
 388       *dpp = data_skip(dp, key->type);
 389       return dp;
 390     }
 391   else if (key->storage == KEY_STORAGE_VERTICAL_OFFSET)
 392     {
 393       Id off, len;
 394       dp = data_read_id(dp, &off);
 395       dp = data_read_id(dp, &len);
 396       *dpp = dp;
 397       return make_vertical_available(data, key, off, len);
 398     }
 399   return 0;
 400 }
 401
 402
 403 const char *
 404 repodata_lookup_str(Repodata *data, Id entry, Id keyid)
 405 {
 406   Id schema;
 407   Repokey *key;
 408   Id id, *keyp;
 409   unsigned char *dp;
 410
 411   dp = data->incoredata + data->incoreoffset[entry];
 412   dp = data_read_id(dp, &schema);
 413   /* make sure the schema of this solvable contains the key */
 414   for (keyp = data->schemadata + data->schemata[schema]; *keyp != keyid; keyp++)
 415     if (!*keyp)
 416       return 0;
 417   dp = forward_to_key(data, keyid, schema, dp);
 418   key = data->keys + keyid;
 419   dp = get_data(data, key, &dp);
 420   if (!dp)
 421     return 0;
 422   if (key->type == TYPE_STR)
 423     return (const char *)dp;
 424   if (key->type != TYPE_ID)
 425     return 0;
 426   /* id type, must either use global or local string strore*/
 427   dp = data_read_id(dp, &id);
 428   if (data->localpool)
 429     return data->spool.stringspace + data->spool.strings[id];
 430   return id2str(data->repo->pool, id);
 431 }
 432
 433 int
 434 repodata_lookup_num(Repodata *data, Id entry, Id keyid)
 435 {
 436   Id schema;
 437   Repokey *key;
 438   Id *keyp;
 439   KeyValue kv;
 440   unsigned char *dp;
 441
 442   dp = data->incoredata + data->incoreoffset[entry];
 443   dp = data_read_id(dp, &schema);
 444   /* make sure the schema of this solvable contains the key */
 445   for (keyp = data->schemadata + data->schemata[schema]; *keyp != keyid; keyp++)
 446     if (!*keyp)
 447       return 0;
 448   dp = forward_to_key(data, keyid, schema, dp);
 449   key = data->keys + keyid;
 450   dp = get_data(data, key, &dp);
 451   if (!dp)
 452     return 0;
 453   if (key->type == TYPE_NUM || key->type == TYPE_U32)
 454   {
 455     dp = data_fetch(dp, &kv, key);
 456     return kv.num;
 457   }
 458   return 0;
 459 }
 460
 461 void
 462 repodata_search(Repodata *data, Id entry, Id keyname, int (*callback)(void *cbdata, Solvable *s, Repodata *data, Repokey *key, KeyValue *kv), void *cbdata)
 463 {
 464   Id schema;
 465   Repokey *key;
 466   Id k, keyid, *kp, *keyp;
 467   unsigned char *dp, *ddp;
 468   int onekey = 0;
 469   int stop;
 470   KeyValue kv;
 471
 472   dp = data->incoredata + data->incoreoffset[entry];
 473   dp = data_read_id(dp, &schema);
 474   keyp = data->schemadata + data->schemata[schema];
 475   if (keyname)
 476     {
 477       /* search in a specific key */
 478       for (kp = keyp; (k = *kp++) != 0; )
 479         if (data->keys[k].name == keyname)
 480           break;
 481       if (k == 0)
 482         return;
 483       dp = forward_to_key(data, k, schema, dp);
 484       if (!dp)
 485         return;
 486       keyp = kp - 1;
 487       onekey = 1;
 488     }
 489   while ((keyid = *keyp++) != 0)
 490     {
 491       stop = 0;
 492       key = data->keys + keyid;
 493       ddp = get_data(data, key, &dp);
 494       do
 495         {
 496           ddp = data_fetch(ddp, &kv, key);
 497           if (!ddp)
 498             break;
 499           stop = callback(cbdata, data->repo->pool->solvables + data->start + entry, data, key, &kv);
 500         }
 501       while (!kv.eof && !stop);
 502       if (onekey || stop > SEARCH_NEXT_KEY)
 503         return;
 504     }
 505 }
 506
 507
 508 /* extend repodata so that it includes solvables p */
 509 void
 510 repodata_extend(Repodata *data, Id p)
 511 {
 512   if (data->start == data->end)
 513     data->start = data->end = p;
 514   if (p >= data->end)
 515     {
 516       int old = data->end - data->start;
 517       int new = p - data->end + 1;
 518       if (data->attrs)
 519         {
 520           data->attrs = sat_realloc2(data->attrs, old + new, sizeof(Id *));
 521           memset(data->attrs + old, 0, new * sizeof(Id *));
 522         }
 523       data->incoreoffset = sat_realloc2(data->incoreoffset, old + new, sizeof(Id));
 524       memset(data->incoreoffset + old, 0, new * sizeof(Id));
 525       data->end = p + 1;
 526     }
 527   if (p < data->start)
 528     {
 529       int old = data->end - data->start;
 530       int new = data->start - p;
 531       if (data->attrs)
 532         {
 533           data->attrs = sat_realloc2(data->attrs, old + new, sizeof(Id *));
 534           memmove(data->attrs + new, data->attrs, old * sizeof(Id *));
 535           memset(data->attrs, 0, new * sizeof(Id *));
 536         }
 537       data->incoreoffset = sat_realloc2(data->incoreoffset, old + new, sizeof(Id));
 538       memmove(data->incoreoffset + new, data->incoreoffset, old * sizeof(Id));
 539       memset(data->incoreoffset, 0, new * sizeof(Id));
 540       data->start = p;
 541     }
 542 }
 543
 544 static void
 545 repodata_insert_keyid(Repodata *data, Id entry, Id keyid, Id val, int overwrite)
 546 {
 547   Id *pp;
 548   int i;
 549   if (!data->attrs)
 550     data->attrs = sat_calloc(data->end - data->start + 1, sizeof(Id *));
 551   i = 0;
 552   if (data->attrs[entry])
 553     {
 554       for (pp = data->attrs[entry]; *pp; pp += 2)
 555         if (*pp == keyid)
 556           break;
 557       if (*pp)
 558         {
 559           if (overwrite)
 560             pp[1] = val;
 561           return;
 562         }
 563       i = pp - data->attrs[entry];
 564     }
 565   data->attrs[entry] = sat_realloc2(data->attrs[entry], i + 3, sizeof(Id));
 566   pp = data->attrs[entry] + i;
 567   *pp++ = keyid;
 568   *pp++ = val;
 569   *pp = 0;
 570 }
 571
 572 void
 573 repodata_set(Repodata *data, Id entry, Repokey *key, Id val)
 574 {
 575   Id keyid;
 576
 577   /* find key in keys */
 578   for (keyid = 1; keyid < data->nkeys; keyid++)
 579     if (data->keys[keyid].name == key->name && data->keys[keyid].type == key->type)
 580       {
 581         if (key->type == TYPE_CONSTANT && key->size != data->keys[keyid].size)
 582           continue;
 583         break;
 584       }
 585   if (keyid == data->nkeys)
 586     {
 587       /* allocate new key */
 588       data->keys = sat_realloc2(data->keys, data->nkeys + 1, sizeof(Repokey));
 589       data->keys[data->nkeys++] = *key;
 590       if (data->verticaloffset)
 591         {
 592           data->verticaloffset = sat_realloc2(data->verticaloffset, data->nkeys, sizeof(Id));
 593           data->verticaloffset[data->nkeys - 1] = 0;
 594         }
 595     }
 596   repodata_insert_keyid(data, entry, keyid, val, 1);
 597 }
 598
 599 void
 600 repodata_set_id(Repodata *data, Id entry, Id keyname, Id id)
 601 {
 602   Repokey key;
 603   key.name = keyname;
 604   key.type = TYPE_ID;
 605   key.size = 0;
 606   key.storage = KEY_STORAGE_INCORE;
 607   repodata_set(data, entry, &key, id);
 608 }
 609
 610 void
 611 repodata_set_num(Repodata *data, Id entry, Id keyname, Id num)
 612 {
 613   Repokey key;
 614   key.name = keyname;
 615   key.type = TYPE_NUM;
 616   key.size = 0;
 617   key.storage = KEY_STORAGE_INCORE;
 618   repodata_set(data, entry, &key, num);
 619 }
 620
 621 void
 622 repodata_set_poolstr(Repodata *data, Id entry, Id keyname, const char *str)
 623 {
 624   Repokey key;
 625   Id id;
 626   if (data->localpool)
 627     id = stringpool_str2id(&data->spool, str, 1);
 628   else
 629     id = str2id(data->repo->pool, str, 1);
 630   key.name = keyname;
 631   key.type = TYPE_ID;
 632   key.size = 0;
 633   key.storage = KEY_STORAGE_INCORE;
 634   repodata_set(data, entry, &key, id);
 635 }
 636
 637 void
 638 repodata_set_constant(Repodata *data, Id entry, Id keyname, Id constant)
 639 {
 640   Repokey key;
 641   key.name = keyname;
 642   key.type = TYPE_CONSTANT;
 643   key.size = constant;
 644   key.storage = KEY_STORAGE_INCORE;
 645   repodata_set(data, entry, &key, 0);
 646 }
 647
 648 void
 649 repodata_set_void(Repodata *data, Id entry, Id keyname)
 650 {
 651   Repokey key;
 652   key.name = keyname;
 653   key.type = TYPE_VOID;
 654   key.size = 0;
 655   key.storage = KEY_STORAGE_INCORE;
 656   repodata_set(data, entry, &key, 0);
 657 }
 658
 659 void
 660 repodata_set_str(Repodata *data, Id entry, Id keyname, const char *str)
 661 {
 662   Repokey key;
 663   int l;
 664
 665   l = strlen(str) + 1;
 666   key.name = keyname;
 667   key.type = TYPE_STR;
 668   key.size = 0;
 669   key.storage = KEY_STORAGE_INCORE;
 670   data->attrdata = sat_realloc(data->attrdata, data->attrdatalen + l);
 671   memcpy(data->attrdata + data->attrdatalen, str, l);
 672   repodata_set(data, entry, &key, data->attrdatalen);
 673   data->attrdatalen += l;
 674 }
 675
 676 void
 677 repodata_add_dirnumnum(Repodata *data, Id entry, Id keyname, Id dir, Id num, Id num2)
 678 {
 679   Id *ida, *pp;
 680   Repokey key;
 681
 682 #if 0
 683 fprintf(stderr, "repodata_add_dirnumnum %d %d %d %d (%d)\n", entry, dir, num, num2, data->attriddatalen);
 684 #endif
 685   if (data->attrs && data->attrs[entry])
 686     {
 687       for (pp = data->attrs[entry]; *pp; pp += 2)
 688         if (data->keys[*pp].name == keyname && data->keys[*pp].type == TYPE_DIRNUMNUMARRAY)
 689           break;
 690       if (*pp)
 691         {
 692           int oldsize = 0;
 693           for (ida = data->attriddata + pp[1]; *ida; ida += 3)
 694             oldsize += 3;
 695           if (ida + 1 == data->attriddata + data->attriddatalen)
 696             {
 697               /* this was the last entry, just append it */
 698               data->attriddata = sat_realloc2(data->attriddata, data->attriddatalen + 3, sizeof(Id));
 699               data->attriddatalen--;    /* overwrite terminating 0  */
 700             }
 701           else
 702             {
 703               /* too bad. move to back. */
 704               data->attriddata = sat_realloc2(data->attriddata, data->attriddatalen + oldsize + 4, sizeof(Id));
 705               memcpy(data->attriddata + data->attriddatalen, data->attriddata + pp[1], oldsize * sizeof(Id));
 706               pp[1] = data->attriddatalen;
 707               data->attriddatalen += oldsize;
 708             }
 709           data->attriddata[data->attriddatalen++] = dir;
 710           data->attriddata[data->attriddatalen++] = num;
 711           data->attriddata[data->attriddatalen++] = num2;
 712           data->attriddata[data->attriddatalen++] = 0;
 713           return;
 714         }
 715     }
 716   key.name = keyname;
 717   key.type = TYPE_DIRNUMNUMARRAY;
 718   key.size = 0;
 719   key.storage = KEY_STORAGE_INCORE;
 720   data->attriddata = sat_realloc2(data->attriddata, data->attriddatalen + 4, sizeof(Id));
 721   repodata_set(data, entry, &key, data->attriddatalen);
 722   data->attriddata[data->attriddatalen++] = dir;
 723   data->attriddata[data->attriddatalen++] = num;
 724   data->attriddata[data->attriddatalen++] = num2;
 725   data->attriddata[data->attriddatalen++] = 0;
 726 }
 727
 728 void
 729 repodata_merge_attrs (Repodata *data, Id dest, Id src)
 730 {
 731   Id *keyp;
 732   for (keyp = data->attrs[src]; *keyp; keyp += 2)
 733     repodata_insert_keyid(data, dest, keyp[0], keyp[1], 0);
 734 }
 735
 736 /*********************************/
 737
 738 /* unify with repo_write! */
 739
 740 #define EXTDATA_BLOCK 1023
 741 #define SCHEMATA_BLOCK 31
 742 #define SCHEMATADATA_BLOCK 255
 743
 744 struct extdata {
 745   unsigned char *buf;
 746   int len;
 747 };
 748
 749 static void
 750 data_addid(struct extdata *xd, Id x)
 751 {
 752   unsigned char *dp;
 753   xd->buf = sat_extend(xd->buf, xd->len, 5, 1, EXTDATA_BLOCK);
 754   dp = xd->buf + xd->len;
 755
 756   if (x >= (1 << 14))
 757     {
 758       if (x >= (1 << 28))
 759         *dp++ = (x >> 28) | 128;
 760       if (x >= (1 << 21))
 761         *dp++ = (x >> 21) | 128;
 762       *dp++ = (x >> 14) | 128;
 763     }
 764   if (x >= (1 << 7))
 765     *dp++ = (x >> 7) | 128;
 766   *dp++ = x & 127;
 767   xd->len = dp - xd->buf;
 768 }
 769
 770 static void
 771 data_addideof(struct extdata *xd, Id x, int eof)
 772 {
 773   if (x >= 64)
 774     x = (x & 63) | ((x & ~63) << 1);
 775   data_addid(xd, (eof ? x: x | 64));
 776 }
 777
 778 static void
 779 data_addblob(struct extdata *xd, unsigned char *blob, int len)
 780 {
 781   xd->buf = sat_extend(xd->buf, xd->len, len, 1, EXTDATA_BLOCK);
 782   memcpy(xd->buf + xd->len, blob, len);
 783   xd->len += len;
 784 }
 785
 786 /*********************************/
 787
 788 static void
 789 addschema_prepare(Repodata *data, Id *schematacache)
 790 {
 791   int h, len, i;
 792   Id *sp;
 793
 794   memset(schematacache, 0, 256 * sizeof(Id));
 795   for (i = 0; i < data->nschemata; i++)
 796     {
 797       for (sp = data->schemadata + data->schemata[i], h = 0; *sp; len++)
 798         h = h * 7 + *sp++;
 799       h &= 255;
 800       schematacache[h] = i + 1;
 801     }
 802   data->schemadata = sat_extend_resize(data->schemadata, data->schemadatalen, sizeof(Id), SCHEMATADATA_BLOCK);
 803   data->schemata = sat_extend_resize(data->schemata, data->nschemata, sizeof(Id), SCHEMATA_BLOCK);
 804 }
 805
 806 static Id
 807 addschema(Repodata *data, Id *schema, Id *schematacache)
 808 {
 809   int h, len;
 810   Id *sp, cid;
 811
 812   for (sp = schema, len = 0, h = 0; *sp; len++)
 813     h = h * 7 + *sp++;
 814   h &= 255;
 815   len++;
 816
 817   cid = schematacache[h];
 818   if (cid)
 819     {
 820       cid--;
 821       if (!memcmp(data->schemadata + data->schemata[cid], schema, len * sizeof(Id)))
 822         return cid;
 823       /* cache conflict */
 824       for (cid = 0; cid < data->nschemata; cid++)
 825         if (!memcmp(data->schemadata + data->schemata[cid], schema, len * sizeof(Id)))
 826           return cid;
 827     }
 828   /* a new one. make room. */
 829   data->schemadata = sat_extend(data->schemadata, data->schemadatalen, len, sizeof(Id), SCHEMATADATA_BLOCK);
 830   data->schemata = sat_extend(data->schemata, data->nschemata, 1, sizeof(Id), SCHEMATA_BLOCK);
 831   /* add schema */
 832   memcpy(data->schemadata + data->schemadatalen, schema, len * sizeof(Id));
 833   data->schemata[data->nschemata] = data->schemadatalen;
 834   data->schemadatalen += len;
 835   schematacache[h] = data->nschemata + 1;
 836 #if 0
 837 fprintf(stderr, "addschema: new schema\n");
 838 #endif
 839   return data->nschemata++;
 840 }
 841
 842
 843 void
 844 repodata_internalize(Repodata *data)
 845 {
 846   Repokey *key;
 847   Id id, entry, nentry, *ida;
 848   Id schematacache[256];
 849   Id schemaid, *schema, *sp, oldschema, *keyp, *seen;
 850   unsigned char *dp, *ndp;
 851   int newschema, oldcount;
 852   struct extdata newincore;
 853   struct extdata newvincore;
 854
 855   if (!data->attrs)
 856     return;
 857
 858   newvincore.buf = data->vincore;
 859   newvincore.len = data->vincorelen;
 860
 861   schema = sat_malloc2(data->nkeys, sizeof(Id));
 862   seen = sat_malloc2(data->nkeys, sizeof(Id));
 863
 864   /* Merge the data already existing (in data->schemata, ->incoredata and
 865      friends) with the new attributes in data->attrs[].  */
 866   nentry = data->end - data->start;
 867   addschema_prepare(data, schematacache);
 868   memset(&newincore, 0, sizeof(newincore));
 869   for (entry = 0; entry < nentry; entry++)
 870     {
 871       memset(seen, 0, data->nkeys * sizeof(Id));
 872       sp = schema;
 873       dp = data->incoredata + data->incoreoffset[entry];
 874       if (data->incoredata)
 875         dp = data_read_id(dp, &oldschema);
 876       else
 877         oldschema = 0;
 878 #if 0
 879 fprintf(stderr, "oldschema %d\n", oldschema);
 880 fprintf(stderr, "schemata %d\n", data->schemata[oldschema]);
 881 fprintf(stderr, "schemadata %p\n", data->schemadata);
 882 #endif
 883       /* seen: -1: old data  0: skipped  >0: id + 1 */
 884       newschema = 0;
 885       oldcount = 0;
 886       for (keyp = data->schemadata + data->schemata[oldschema]; *keyp; keyp++)
 887         {
 888           if (seen[*keyp])
 889             {
 890               fprintf(stderr, "Inconsistent old data (key occured twice).\n");
 891               exit(1);
 892             }
 893           seen[*keyp] = -1;
 894           *sp++ = *keyp;
 895           oldcount++;
 896         }
 897       if (data->attrs[entry])
 898         for (keyp = data->attrs[entry]; *keyp; keyp += 2)
 899           {
 900             if (!seen[*keyp])
 901               {
 902                 newschema = 1;
 903                 *sp++ = *keyp;
 904               }
 905             seen[*keyp] = keyp[1] + 1;
 906           }
 907       *sp++ = 0;
 908       if (newschema)
 909         /* Ideally we'd like to sort the new schema here, to ensure
 910            schema equality independend of the ordering.  We can't do that
 911            yet.  For once see below (old ids need to come before new ids).
 912            An additional difficulty is that we also need to move
 913            the values with the keys.  */
 914         schemaid = addschema(data, schema, schematacache);
 915       else
 916         schemaid = oldschema;
 917
 918
 919       /* Now create data blob.  We walk through the (possibly new) schema
 920          and either copy over old data, or insert the new.  */
 921       /* XXX Here we rely on the fact that the (new) schema has the form
 922          o1 o2 o3 o4 ... | n1 n2 n3 ...
 923          (oX being the old keyids (possibly overwritten), and nX being
 924           the new keyids).  This rules out sorting the keyids in order
 925          to ensure a small schema count.  */
 926       data->incoreoffset[entry] = newincore.len;
 927       data_addid(&newincore, schemaid);
 928       for (keyp = data->schemadata + data->schemata[schemaid]; *keyp; keyp++)
 929         {
 930           key = data->keys + *keyp;
 931           ndp = dp;
 932           if (oldcount)
 933             {
 934               /* Skip the data associated with this old key.  */
 935               if (key->storage == KEY_STORAGE_VERTICAL_OFFSET)
 936                 {
 937                   ndp = data_skip(dp, TYPE_ID);
 938                   ndp = data_skip(ndp, TYPE_ID);
 939                 }
 940               else if (key->storage == KEY_STORAGE_INCORE)
 941                 ndp = data_skip(dp, key->type);
 942               oldcount--;
 943             }
 944           if (seen[*keyp] == -1)
 945             {
 946               /* If this key was an old one _and_ was not overwritten with
 947                  a different value copy over the old value (we skipped it
 948                  above).  */
 949               if (dp != ndp)
 950                 data_addblob(&newincore, dp, ndp - dp);
 951               seen[*keyp] = 0;
 952             }
 953           else if (seen[*keyp])
 954             {
 955               /* Otherwise we have a new value.  Parse it into the internal
 956                  form.  */
 957               struct extdata *xd;
 958               unsigned int oldvincorelen = 0;
 959
 960               xd = &newincore;
 961               if (key->storage == KEY_STORAGE_VERTICAL_OFFSET)
 962                 {
 963                   xd = &newvincore;
 964                   oldvincorelen = xd->len;
 965                 }
 966               id = seen[*keyp] - 1;
 967               switch (key->type)
 968                 {
 969                 case TYPE_VOID:
 970                 case TYPE_CONSTANT:
 971                   break;
 972                 case TYPE_STR:
 973                   data_addblob(xd, data->attrdata + id, strlen((char *)(data->attrdata + id)) + 1);
 974                   break;
 975                 case TYPE_ID:
 976                 case TYPE_NUM:
 977                 case TYPE_DIR:
 978                   data_addid(xd, id);
 979                   break;
 980                 case TYPE_DIRNUMNUMARRAY:
 981                   for (ida = data->attriddata + id; *ida; ida += 3)
 982                     {
 983                       data_addid(xd, ida[0]);
 984                       data_addid(xd, ida[1]);
 985                       data_addideof(xd, ida[2], ida[3] ? 0 : 1);
 986                     }
 987                   break;
 988                 default:
 989                   fprintf(stderr, "don't know how to handle type %d\n", key->type);
 990                   exit(1);
 991                 }
 992               if (key->storage == KEY_STORAGE_VERTICAL_OFFSET)
 993                 {
 994                   /* put offset/len in incore */
 995                   data_addid(&newincore, data->lastverticaloffset + oldvincorelen);
 996                   oldvincorelen = xd->len - oldvincorelen;
 997                   data_addid(&newincore, oldvincorelen);
 998                 }
 999             }
1000           dp = ndp;
1001         }
1002     }
1003   data->incoredata = newincore.buf;
1004   data->incoredatalen = newincore.len;
1005   data->incoredatafree = 0;
1006
1007   data->vincore = newvincore.buf;
1008   data->vincorelen = newvincore.len;
1009
1010   data->attrs = sat_free(data->attrs);
1011   data->attrdata = sat_free(data->attrdata);
1012   data->attrdatalen = 0;
1013 }
1014
1015 Id
1016 repodata_str2dir(Repodata *data, const char *dir, int create)
1017 {
1018   Id id, parent;
1019   const char *dire;
1020
1021   parent = 0;
1022   while (*dir == '/' && dir[1] == '/')
1023     dir++;
1024   while (*dir)
1025     {
1026       dire = strchrnul(dir, '/');
1027       if (data->localpool)
1028         id = stringpool_strn2id(&data->spool, dir, dire - dir, create);
1029       else
1030         id = strn2id(data->repo->pool, dir, dire - dir, create);
1031       if (!id)
1032         return 0;
1033       parent = dirpool_add_dir(&data->dirpool, parent, id, create);
1034       if (!parent)
1035         return 0;
1036       if (!*dire)
1037         break;
1038       dir = dire + 1;
1039       while (*dir == '/')
1040         dir++;
1041     }
1042   return parent;
1043 }
1044
1045 unsigned int
1046 repodata_compress_page(unsigned char *page, unsigned int len, unsigned char *cpage, unsigned int max)
1047 {
1048   return compress_buf(page, len, cpage, max);
1049 }
1050
1051 #define SOLV_ERROR_EOF              3
1052
1053 static inline unsigned int
1054 read_u32(FILE *fp)
1055 {
1056   int c, i;
1057   unsigned int x = 0;
1058
1059   for (i = 0; i < 4; i++)
1060     {
1061       c = getc(fp);
1062       if (c == EOF)
1063         return 0;
1064       x = (x << 8) | c;
1065     }
1066   return x;
1067 }
1068
1069 /* Try to either setup on-demand paging (using FP as backing
1070    file), or in case that doesn't work (FP not seekable) slurps in
1071    all pages and deactivates paging.  */
1072
1073 void
1074 repodata_read_or_setup_pages(Repodata *data, unsigned int pagesz, unsigned int blobsz)
1075 {
1076   FILE *fp = data->fp;
1077   unsigned int npages;
1078   unsigned int i;
1079   unsigned int can_seek;
1080   long cur_file_ofs;
1081   unsigned char buf[BLOB_PAGESIZE];
1082   if (pagesz != BLOB_PAGESIZE)
1083     {
1084       /* We could handle this by slurping in everything.  */
1085       fprintf (stderr, "non matching page size\n");
1086       exit (1);
1087     }
1088   can_seek = 1;
1089   if ((cur_file_ofs = ftell(fp)) < 0)
1090     can_seek = 0;
1091   clearerr (fp);
1092 #ifdef DEBUG_PAGING
1093   fprintf (stderr, "can %sseek\n", can_seek ? "" : "NOT ");
1094 #endif
1095   npages = (blobsz + BLOB_PAGESIZE - 1) / BLOB_PAGESIZE;
1096
1097   data->num_pages = npages;
1098   data->pages = sat_malloc2(npages, sizeof(data->pages[0]));
1099
1100   /* If we can't seek on our input we have to slurp in everything.  */
1101   if (!can_seek)
1102     data->blob_store = sat_malloc(npages * BLOB_PAGESIZE);
1103   for (i = 0; i < npages; i++)
1104     {
1105       unsigned int in_len = read_u32(fp);
1106       unsigned int compressed = in_len & 1;
1107       Attrblobpage *p = data->pages + i;
1108       in_len >>= 1;
1109 #ifdef DEBUG_PAGING
1110       fprintf (stderr, "page %d: len %d (%scompressed)\n",
1111                i, in_len, compressed ? "" : "not ");
1112 #endif
1113       if (can_seek)
1114         {
1115           cur_file_ofs += 4;
1116           p->mapped_at = -1;
1117           p->file_offset = cur_file_ofs;
1118           p->file_size = in_len * 2 + compressed;
1119           if (fseek(fp, in_len, SEEK_CUR) < 0)
1120             {
1121               perror ("fseek");
1122               fprintf (stderr, "can't seek after we thought we can\n");
1123               /* We can't fall back to non-seeking behaviour as we already
1124                  read over some data pages without storing them away.  */
1125               exit (1);
1126             }
1127           cur_file_ofs += in_len;
1128         }
1129       else
1130         {
1131           unsigned int out_len;
1132           void *dest = data->blob_store + i * BLOB_PAGESIZE;
1133           p->mapped_at = i * BLOB_PAGESIZE;
1134           p->file_offset = 0;
1135           p->file_size = 0;
1136           /* We can't seek, so suck everything in.  */
1137           if (fread(compressed ? buf : dest, in_len, 1, fp) != 1)
1138             {
1139               perror ("fread");
1140               exit (1);
1141             }
1142           if (compressed)
1143             {
1144               out_len = unchecked_decompress_buf(buf, in_len, dest, BLOB_PAGESIZE);
1145               if (out_len != BLOB_PAGESIZE
1146                   && i < npages - 1)
1147                 {
1148                   fprintf (stderr, "can't decompress\n");
1149                   exit (1);
1150                 }
1151             }
1152         }
1153     }
1154
1155   if (can_seek)
1156     {
1157       /* If we are here we were able to seek to all page
1158          positions, so activate paging by copying FP into our structure.
1159          We dup() the file, so that our callers can fclose() it and we
1160          still have it open.  But this means that we share file positions
1161          with the input filedesc.  So in case our caller reads it after us,
1162          and calls back into us we might change the file position unexpectedly
1163          to him.  */
1164       int fd = dup (fileno (fp));
1165       if (fd < 0)
1166         {
1167           /* Jeez!  What a bloody system, we can't dup() anymore.  */
1168           perror ("dup");
1169           exit (1);
1170         }
1171       /* XXX we don't close this yet anywhere.  */
1172       data->fp = fdopen (fd, "r");
1173       if (!data->fp)
1174         {
1175           /* My God!  What happened now?  */
1176           perror ("fdopen");
1177           exit (1);
1178         }
1179     }
1180 }