src/repodata.c

   1 /*
   2  * Copyright (c) 2007, Novell Inc.
   3  *
   4  * This program is licensed under the BSD license, read LICENSE.BSD
   5  * for further information
   6  */
   7
   8 /*
   9  * repodata.c
  10  *
  11  * Manage data coming from one repository
  12  *
  13  */
  14
  15 #define _GNU_SOURCE
  16 #include <string.h>
  17
  18 #include <stdio.h>
  19 #include <stdlib.h>
  20 #include <unistd.h>
  21 #include <assert.h>
  22
  23 #include "repo.h"
  24 #include "pool.h"
  25 #include "poolid_private.h"
  26 #include "util.h"
  27
  28 #include "fastlz.c"
  29
  30 unsigned char *
  31 data_read_id(unsigned char *dp, Id *idp)
  32 {
  33   Id x = 0;
  34   unsigned char c;
  35   for (;;)
  36     {
  37       c = *dp++;
  38       if (!(c & 0x80))
  39         {
  40           *idp = (x << 7) ^ c;
  41           return dp;
  42         }
  43       x = (x << 7) ^ c ^ 128;
  44     }
  45 }
  46
  47 static unsigned char *
  48 data_read_ideof(unsigned char *dp, Id *idp, int *eof)
  49 {
  50   Id x = 0;
  51   unsigned char c;
  52   for (;;)
  53     {
  54       c = *dp++;
  55       if (!(c & 0x80))
  56         {
  57           if (c & 0x40)
  58             {
  59               c ^= 0x40;
  60               *eof = 0;
  61             }
  62           else
  63             *eof = 1;
  64           *idp = (x << 6) ^ c;
  65           return dp;
  66         }
  67       x = (x << 7) ^ c ^ 128;
  68     }
  69 }
  70
  71 static unsigned char *
  72 data_skip(unsigned char *dp, int type)
  73 {
  74   unsigned char x;
  75   switch (type)
  76     {
  77     case TYPE_VOID:
  78     case TYPE_CONSTANT:
  79       return dp;
  80     case TYPE_ID:
  81     case TYPE_NUM:
  82     case TYPE_DIR:
  83       while ((*dp & 0x80) != 0)
  84         dp++;
  85       return dp + 1;
  86     case TYPE_IDARRAY:
  87       while ((*dp & 0xc0) != 0)
  88         dp++;
  89       return dp + 1;
  90     case TYPE_STR:
  91       while ((*dp) != 0)
  92         dp++;
  93       return dp + 1;
  94     case TYPE_DIRSTRARRAY:
  95       for (;;)
  96         {
  97           while ((*dp & 0x80) != 0)
  98             dp++;
  99           x = *dp++;
 100           while ((*dp) != 0)
 101             dp++;
 102           dp++;
 103           if (!(x & 0x40))
 104             return dp;
 105         }
 106     case TYPE_DIRNUMNUMARRAY:
 107       for (;;)
 108         {
 109           while ((*dp & 0x80) != 0)
 110             dp++;
 111           dp++;
 112           while ((*dp & 0x80) != 0)
 113             dp++;
 114           dp++;
 115           while ((*dp & 0x80) != 0)
 116             dp++;
 117           if (!(*dp & 0x40))
 118             return dp + 1;
 119           dp++;
 120         }
 121     default:
 122       fprintf(stderr, "unknown type in data_skip\n");
 123       exit(1);
 124     }
 125 }
 126
 127 static unsigned char *
 128 data_fetch(unsigned char *dp, KeyValue *kv, Repokey *key)
 129 {
 130   kv->eof = 1;
 131   if (!dp)
 132     return 0;
 133   switch (key->type)
 134     {
 135     case TYPE_VOID:
 136       return dp;
 137     case TYPE_CONSTANT:
 138       kv->num = key->size;
 139       return dp;
 140     case TYPE_STR:
 141       kv->str = (const char *)dp;
 142       return dp + strlen(kv->str) + 1;
 143     case TYPE_ID:
 144       return data_read_id(dp, &kv->id);
 145     case TYPE_NUM:
 146       return data_read_id(dp, &kv->num);
 147     case TYPE_IDARRAY:
 148       return data_read_ideof(dp, &kv->id, &kv->eof);
 149     case TYPE_DIR:
 150       return data_read_id(dp, &kv->id);
 151     case TYPE_DIRSTRARRAY:
 152       dp = data_read_ideof(dp, &kv->id, &kv->eof);
 153       kv->str = (const char *)dp;
 154       return dp + strlen(kv->str) + 1;
 155     case TYPE_DIRNUMNUMARRAY:
 156       dp = data_read_id(dp, &kv->id);
 157       dp = data_read_id(dp, &kv->num);
 158       return data_read_ideof(dp, &kv->num2, &kv->eof);
 159     default:
 160       return 0;
 161     }
 162 }
 163
 164 static unsigned char *
 165 forward_to_key(Repodata *data, Id key, Id schemaid, unsigned char *dp)
 166 {
 167   Id k, *keyp;
 168
 169   keyp = data->schemadata + data->schemata[schemaid];
 170   while ((k = *keyp++) != 0)
 171     {
 172       if (k == key)
 173         return dp;
 174       if (data->keys[k].storage == KEY_STORAGE_VERTICAL_OFFSET)
 175         {
 176           dp = data_skip(dp, TYPE_ID);  /* skip that offset */
 177           dp = data_skip(dp, TYPE_ID);  /* skip that length */
 178           continue;
 179         }
 180       if (data->keys[k].storage != KEY_STORAGE_INCORE)
 181         continue;
 182       dp = data_skip(dp, data->keys[k].type);
 183     }
 184   return 0;
 185 }
 186
 187 #define BLOB_PAGEBITS 15
 188 #define BLOB_PAGESIZE (1 << BLOB_PAGEBITS)
 189
 190 static unsigned char *
 191 load_page_range(Repodata *data, unsigned int pstart, unsigned int pend)
 192 {
 193 /* Make sure all pages from PSTART to PEND (inclusive) are loaded,
 194    and are consecutive.  Return a pointer to the mapping of PSTART.  */
 195   unsigned char buf[BLOB_PAGESIZE];
 196   unsigned int i;
 197
 198   /* Quick check in case all pages are there already and consecutive.  */
 199   for (i = pstart; i <= pend; i++)
 200     if (data->pages[i].mapped_at == -1
 201         || (i > pstart
 202             && data->pages[i].mapped_at
 203                != data->pages[i-1].mapped_at + BLOB_PAGESIZE))
 204       break;
 205   if (i > pend)
 206     return data->blob_store + data->pages[pstart].mapped_at;
 207
 208   /* Ensure that we can map the numbers of pages we need at all.  */
 209   if (pend - pstart + 1 > data->ncanmap)
 210     {
 211       unsigned int oldcan = data->ncanmap;
 212       data->ncanmap = pend - pstart + 1;
 213       if (data->ncanmap < 4)
 214         data->ncanmap = 4;
 215       data->mapped = sat_realloc2(data->mapped, data->ncanmap, sizeof(data->mapped[0]));
 216       memset (data->mapped + oldcan, 0, (data->ncanmap - oldcan) * sizeof (data->mapped[0]));
 217       data->blob_store = sat_realloc2(data->blob_store, data->ncanmap, BLOB_PAGESIZE);
 218 #ifdef DEBUG_PAGING
 219       fprintf (stderr, "PAGE: can map %d pages\n", data->ncanmap);
 220 #endif
 221     }
 222
 223   /* Now search for "cheap" space in our store.  Space is cheap if it's either
 224      free (very cheap) or contains pages we search for anyway.  */
 225
 226   /* Setup cost array.  */
 227   unsigned int cost[data->ncanmap];
 228   for (i = 0; i < data->ncanmap; i++)
 229     {
 230       unsigned int pnum = data->mapped[i];
 231       if (pnum == 0)
 232         cost[i] = 0;
 233       else
 234         {
 235           pnum--;
 236           Attrblobpage *p = data->pages + pnum;
 237           assert (p->mapped_at != -1);
 238           if (pnum >= pstart && pnum <= pend)
 239             cost[i] = 1;
 240           else
 241             cost[i] = 3;
 242         }
 243     }
 244
 245   /* And search for cheapest space.  */
 246   unsigned int best_cost = -1;
 247   unsigned int best = 0;
 248   unsigned int same_cost = 0;
 249   for (i = 0; i + pend - pstart < data->ncanmap; i++)
 250     {
 251       unsigned int c = cost[i];
 252       unsigned int j;
 253       for (j = 0; j < pend - pstart + 1; j++)
 254         c += cost[i+j];
 255       if (c < best_cost)
 256         best_cost = c, best = i;
 257       else if (c == best_cost)
 258         same_cost++;
 259       /* A null cost won't become better.  */
 260       if (c == 0)
 261         break;
 262     }
 263   /* If all places have the same cost we would thrash on slot 0.  Avoid
 264      this by doing a round-robin strategy in this case.  */
 265   if (same_cost == data->ncanmap - pend + pstart - 1)
 266     best = data->rr_counter++ % (data->ncanmap - pend + pstart);
 267
 268   /* So we want to map our pages from [best] to [best+pend-pstart].
 269      Use a very simple strategy, which doesn't make the best use of
 270      our resources, but works.  Throw away all pages in that range
 271      (even ours) then copy around ours (in case they were outside the
 272      range) or read them in.  */
 273   for (i = best; i < best + pend - pstart + 1; i++)
 274     {
 275       unsigned int pnum = data->mapped[i];
 276       if (pnum--
 277           /* If this page is exactly at the right place already,
 278              no need to evict it.  */
 279           && pnum != pstart + i - best)
 280         {
 281           /* Evict this page.  */
 282 #ifdef DEBUG_PAGING
 283           fprintf (stderr, "PAGE: evict page %d from %d\n", pnum, i);
 284 #endif
 285           cost[i] = 0;
 286           data->mapped[i] = 0;
 287           data->pages[pnum].mapped_at = -1;
 288         }
 289     }
 290
 291   /* Everything is free now.  Read in the pages we want.  */
 292   for (i = pstart; i <= pend; i++)
 293     {
 294       Attrblobpage *p = data->pages + i;
 295       unsigned int pnum = i - pstart + best;
 296       void *dest = data->blob_store + pnum * BLOB_PAGESIZE;
 297       if (p->mapped_at != -1)
 298         {
 299           if (p->mapped_at != pnum * BLOB_PAGESIZE)
 300             {
 301 #ifdef DEBUG_PAGING
 302               fprintf (stderr, "PAGECOPY: %d to %d\n", i, pnum);
 303 #endif
 304               /* Still mapped somewhere else, so just copy it from there.  */
 305               memcpy (dest, data->blob_store + p->mapped_at, BLOB_PAGESIZE);
 306               data->mapped[p->mapped_at / BLOB_PAGESIZE] = 0;
 307             }
 308         }
 309       else
 310         {
 311           unsigned int in_len = p->file_size;
 312           unsigned int compressed = in_len & 1;
 313           in_len >>= 1;
 314 #ifdef DEBUG_PAGING
 315           fprintf (stderr, "PAGEIN: %d to %d", i, pnum);
 316 #endif
 317           /* Not mapped, so read in this page.  */
 318           if (fseek(data->fp, p->file_offset, SEEK_SET) < 0)
 319             {
 320               perror ("mapping fseek");
 321               exit (1);
 322             }
 323           if (fread(compressed ? buf : dest, in_len, 1, data->fp) != 1)
 324             {
 325               perror ("mapping fread");
 326               exit (1);
 327             }
 328           if (compressed)
 329             {
 330               unsigned int out_len;
 331               out_len = unchecked_decompress_buf(buf, in_len,
 332                                                   dest, BLOB_PAGESIZE);
 333               if (out_len != BLOB_PAGESIZE
 334                   && i < data->num_pages - 1)
 335                 {
 336                   fprintf (stderr, "can't decompress\n");
 337                   exit (1);
 338                 }
 339 #ifdef DEBUG_PAGING
 340               fprintf (stderr, " (expand %d to %d)", in_len, out_len);
 341 #endif
 342             }
 343 #ifdef DEBUG_PAGING
 344           fprintf (stderr, "\n");
 345 #endif
 346         }
 347       p->mapped_at = pnum * BLOB_PAGESIZE;
 348       data->mapped[pnum] = i + 1;
 349     }
 350   return data->blob_store + best * BLOB_PAGESIZE;
 351 }
 352
 353 static unsigned char *
 354 make_vertical_available(Repodata *data, Repokey *key, Id off, Id len)
 355 {
 356   unsigned char *dp;
 357   if (key->type == TYPE_VOID)
 358     return 0;
 359   if (off >= data->lastverticaloffset)
 360     {
 361       off -= data->lastverticaloffset;
 362       if (off + len > data->vincorelen)
 363         return 0;
 364       return data->vincore + off;
 365     }
 366   if (!data->fp)
 367     return 0;
 368   if (off + len > key->size)
 369     return 0;
 370   /* we now have the offset, go into vertical */
 371   off += data->verticaloffset[key - data->keys];
 372   dp = load_page_range(data, off / BLOB_PAGESIZE, (off + len - 1) / BLOB_PAGESIZE);
 373   if (dp)
 374     dp += off % BLOB_PAGESIZE;
 375   return dp;
 376 }
 377
 378 static inline unsigned char *
 379 get_data(Repodata *data, Repokey *key, unsigned char **dpp)
 380 {
 381   unsigned char *dp = *dpp;
 382
 383   if (!dp)
 384     return 0;
 385   if (key->storage == KEY_STORAGE_INCORE)
 386     {
 387       /* hmm, this is a bit expensive */
 388       *dpp = data_skip(dp, key->type);
 389       return dp;
 390     }
 391   else if (key->storage == KEY_STORAGE_VERTICAL_OFFSET)
 392     {
 393       Id off, len;
 394       dp = data_read_id(dp, &off);
 395       dp = data_read_id(dp, &len);
 396       *dpp = dp;
 397       return make_vertical_available(data, key, off, len);
 398     }
 399   return 0;
 400 }
 401
 402
 403 const char *
 404 repodata_lookup_str(Repodata *data, Id entry, Id keyid)
 405 {
 406   Id schema;
 407   Repokey *key;
 408   Id id, *keyp;
 409   unsigned char *dp;
 410
 411   dp = data->incoredata + data->incoreoffset[entry];
 412   dp = data_read_id(dp, &schema);
 413   /* make sure the schema of this solvable contains the key */
 414   for (keyp = data->schemadata + data->schemata[schema]; *keyp != keyid; keyp++)
 415     if (!*keyp)
 416       return 0;
 417   dp = forward_to_key(data, keyid, schema, dp);
 418   key = data->keys + keyid;
 419   dp = get_data(data, key, &dp);
 420   if (!dp)
 421     return 0;
 422   if (key->type == TYPE_STR)
 423     return (const char *)dp;
 424   if (key->type != TYPE_ID)
 425     return 0;
 426   /* id type, must either use global or local string store*/
 427   dp = data_read_id(dp, &id);
 428   if (data->localpool)
 429     return data->spool.stringspace + data->spool.strings[id];
 430   return id2str(data->repo->pool, id);
 431 }
 432
 433 int
 434 repodata_lookup_num(Repodata *data, Id entry, Id keyid, unsigned *value)
 435 {
 436   Id schema;
 437   Repokey *key;
 438   Id *keyp;
 439   KeyValue kv;
 440   unsigned char *dp;
 441
 442   *value = 0;
 443   dp = data->incoredata + data->incoreoffset[entry];
 444   dp = data_read_id(dp, &schema);
 445   /* make sure the schema of this solvable contains the key */
 446   for (keyp = data->schemadata + data->schemata[schema]; *keyp != keyid; keyp++)
 447     if (!*keyp)
 448       return 0;
 449   dp = forward_to_key(data, keyid, schema, dp);
 450   key = data->keys + keyid;
 451   dp = get_data(data, key, &dp);
 452   if (!dp)
 453     return 0;
 454   if (key->type == TYPE_NUM
 455       || key->type == TYPE_U32
 456       || key->type == TYPE_CONSTANT)
 457     {
 458       dp = data_fetch(dp, &kv, key);
 459       *value = kv.num;
 460       return 1;
 461     }
 462   return 0;
 463 }
 464
 465 void
 466 repodata_search(Repodata *data, Id entry, Id keyname, int (*callback)(void *cbdata, Solvable *s, Repodata *data, Repokey *key, KeyValue *kv), void *cbdata)
 467 {
 468   Id schema;
 469   Repokey *key;
 470   Id k, keyid, *kp, *keyp;
 471   unsigned char *dp, *ddp;
 472   int onekey = 0;
 473   int stop;
 474   KeyValue kv;
 475
 476   dp = data->incoredata + data->incoreoffset[entry];
 477   dp = data_read_id(dp, &schema);
 478   keyp = data->schemadata + data->schemata[schema];
 479   if (keyname)
 480     {
 481       /* search in a specific key */
 482       for (kp = keyp; (k = *kp++) != 0; )
 483         if (data->keys[k].name == keyname)
 484           break;
 485       if (k == 0)
 486         return;
 487       dp = forward_to_key(data, k, schema, dp);
 488       if (!dp)
 489         return;
 490       keyp = kp - 1;
 491       onekey = 1;
 492     }
 493   while ((keyid = *keyp++) != 0)
 494     {
 495       stop = 0;
 496       key = data->keys + keyid;
 497       ddp = get_data(data, key, &dp);
 498       do
 499         {
 500           ddp = data_fetch(ddp, &kv, key);
 501           if (!ddp)
 502             break;
 503           stop = callback(cbdata, data->repo->pool->solvables + data->start + entry, data, key, &kv);
 504         }
 505       while (!kv.eof && !stop);
 506       if (onekey || stop > SEARCH_NEXT_KEY)
 507         return;
 508     }
 509 }
 510
 511
 512 /* extend repodata so that it includes solvables p */
 513 void
 514 repodata_extend(Repodata *data, Id p)
 515 {
 516   if (data->start == data->end)
 517     data->start = data->end = p;
 518   if (p >= data->end)
 519     {
 520       int old = data->end - data->start;
 521       int new = p - data->end + 1;
 522       if (data->attrs)
 523         {
 524           data->attrs = sat_realloc2(data->attrs, old + new, sizeof(Id *));
 525           memset(data->attrs + old, 0, new * sizeof(Id *));
 526         }
 527       data->incoreoffset = sat_realloc2(data->incoreoffset, old + new, sizeof(Id));
 528       memset(data->incoreoffset + old, 0, new * sizeof(Id));
 529       data->end = p + 1;
 530     }
 531   if (p < data->start)
 532     {
 533       int old = data->end - data->start;
 534       int new = data->start - p;
 535       if (data->attrs)
 536         {
 537           data->attrs = sat_realloc2(data->attrs, old + new, sizeof(Id *));
 538           memmove(data->attrs + new, data->attrs, old * sizeof(Id *));
 539           memset(data->attrs, 0, new * sizeof(Id *));
 540         }
 541       data->incoreoffset = sat_realloc2(data->incoreoffset, old + new, sizeof(Id));
 542       memmove(data->incoreoffset + new, data->incoreoffset, old * sizeof(Id));
 543       memset(data->incoreoffset, 0, new * sizeof(Id));
 544       data->start = p;
 545     }
 546 }
 547
 548 static void
 549 repodata_insert_keyid(Repodata *data, Id entry, Id keyid, Id val, int overwrite)
 550 {
 551   Id *pp;
 552   int i;
 553   if (!data->attrs)
 554     data->attrs = sat_calloc(data->end - data->start + 1, sizeof(Id *));
 555   i = 0;
 556   if (data->attrs[entry])
 557     {
 558       for (pp = data->attrs[entry]; *pp; pp += 2)
 559         if (*pp == keyid)
 560           break;
 561       if (*pp)
 562         {
 563           if (overwrite)
 564             pp[1] = val;
 565           return;
 566         }
 567       i = pp - data->attrs[entry];
 568     }
 569   data->attrs[entry] = sat_realloc2(data->attrs[entry], i + 3, sizeof(Id));
 570   pp = data->attrs[entry] + i;
 571   *pp++ = keyid;
 572   *pp++ = val;
 573   *pp = 0;
 574 }
 575
 576 void
 577 repodata_set(Repodata *data, Id entry, Repokey *key, Id val)
 578 {
 579   Id keyid;
 580
 581   /* find key in keys */
 582   for (keyid = 1; keyid < data->nkeys; keyid++)
 583     if (data->keys[keyid].name == key->name && data->keys[keyid].type == key->type)
 584       {
 585         if (key->type == TYPE_CONSTANT && key->size != data->keys[keyid].size)
 586           continue;
 587         break;
 588       }
 589   if (keyid == data->nkeys)
 590     {
 591       /* allocate new key */
 592       data->keys = sat_realloc2(data->keys, data->nkeys + 1, sizeof(Repokey));
 593       data->keys[data->nkeys++] = *key;
 594       if (data->verticaloffset)
 595         {
 596           data->verticaloffset = sat_realloc2(data->verticaloffset, data->nkeys, sizeof(Id));
 597           data->verticaloffset[data->nkeys - 1] = 0;
 598         }
 599     }
 600   repodata_insert_keyid(data, entry, keyid, val, 1);
 601 }
 602
 603 void
 604 repodata_set_id(Repodata *data, Id entry, Id keyname, Id id)
 605 {
 606   Repokey key;
 607   key.name = keyname;
 608   key.type = TYPE_ID;
 609   key.size = 0;
 610   key.storage = KEY_STORAGE_INCORE;
 611   repodata_set(data, entry, &key, id);
 612 }
 613
 614 void
 615 repodata_set_num(Repodata *data, Id entry, Id keyname, Id num)
 616 {
 617   Repokey key;
 618   key.name = keyname;
 619   key.type = TYPE_NUM;
 620   key.size = 0;
 621   key.storage = KEY_STORAGE_INCORE;
 622   repodata_set(data, entry, &key, num);
 623 }
 624
 625 void
 626 repodata_set_poolstr(Repodata *data, Id entry, Id keyname, const char *str)
 627 {
 628   Repokey key;
 629   Id id;
 630   if (data->localpool)
 631     id = stringpool_str2id(&data->spool, str, 1);
 632   else
 633     id = str2id(data->repo->pool, str, 1);
 634   key.name = keyname;
 635   key.type = TYPE_ID;
 636   key.size = 0;
 637   key.storage = KEY_STORAGE_INCORE;
 638   repodata_set(data, entry, &key, id);
 639 }
 640
 641 void
 642 repodata_set_constant(Repodata *data, Id entry, Id keyname, Id constant)
 643 {
 644   Repokey key;
 645   key.name = keyname;
 646   key.type = TYPE_CONSTANT;
 647   key.size = constant;
 648   key.storage = KEY_STORAGE_INCORE;
 649   repodata_set(data, entry, &key, 0);
 650 }
 651
 652 void
 653 repodata_set_void(Repodata *data, Id entry, Id keyname)
 654 {
 655   Repokey key;
 656   key.name = keyname;
 657   key.type = TYPE_VOID;
 658   key.size = 0;
 659   key.storage = KEY_STORAGE_INCORE;
 660   repodata_set(data, entry, &key, 0);
 661 }
 662
 663 void
 664 repodata_set_str(Repodata *data, Id entry, Id keyname, const char *str)
 665 {
 666   Repokey key;
 667   int l;
 668
 669   l = strlen(str) + 1;
 670   key.name = keyname;
 671   key.type = TYPE_STR;
 672   key.size = 0;
 673   key.storage = KEY_STORAGE_INCORE;
 674   data->attrdata = sat_realloc(data->attrdata, data->attrdatalen + l);
 675   memcpy(data->attrdata + data->attrdatalen, str, l);
 676   repodata_set(data, entry, &key, data->attrdatalen);
 677   data->attrdatalen += l;
 678 }
 679
 680 void
 681 repodata_add_dirnumnum(Repodata *data, Id entry, Id keyname, Id dir, Id num, Id num2)
 682 {
 683   Id *ida, *pp;
 684   Repokey key;
 685
 686 #if 0
 687 fprintf(stderr, "repodata_add_dirnumnum %d %d %d %d (%d)\n", entry, dir, num, num2, data->attriddatalen);
 688 #endif
 689   if (data->attrs && data->attrs[entry])
 690     {
 691       for (pp = data->attrs[entry]; *pp; pp += 2)
 692         if (data->keys[*pp].name == keyname && data->keys[*pp].type == TYPE_DIRNUMNUMARRAY)
 693           break;
 694       if (*pp)
 695         {
 696           int oldsize = 0;
 697           for (ida = data->attriddata + pp[1]; *ida; ida += 3)
 698             oldsize += 3;
 699           if (ida + 1 == data->attriddata + data->attriddatalen)
 700             {
 701               /* this was the last entry, just append it */
 702               data->attriddata = sat_realloc2(data->attriddata, data->attriddatalen + 3, sizeof(Id));
 703               data->attriddatalen--;    /* overwrite terminating 0  */
 704             }
 705           else
 706             {
 707               /* too bad. move to back. */
 708               data->attriddata = sat_realloc2(data->attriddata, data->attriddatalen + oldsize + 4, sizeof(Id));
 709               memcpy(data->attriddata + data->attriddatalen, data->attriddata + pp[1], oldsize * sizeof(Id));
 710               pp[1] = data->attriddatalen;
 711               data->attriddatalen += oldsize;
 712             }
 713           data->attriddata[data->attriddatalen++] = dir;
 714           data->attriddata[data->attriddatalen++] = num;
 715           data->attriddata[data->attriddatalen++] = num2;
 716           data->attriddata[data->attriddatalen++] = 0;
 717           return;
 718         }
 719     }
 720   key.name = keyname;
 721   key.type = TYPE_DIRNUMNUMARRAY;
 722   key.size = 0;
 723   key.storage = KEY_STORAGE_INCORE;
 724   data->attriddata = sat_realloc2(data->attriddata, data->attriddatalen + 4, sizeof(Id));
 725   repodata_set(data, entry, &key, data->attriddatalen);
 726   data->attriddata[data->attriddatalen++] = dir;
 727   data->attriddata[data->attriddatalen++] = num;
 728   data->attriddata[data->attriddatalen++] = num2;
 729   data->attriddata[data->attriddatalen++] = 0;
 730 }
 731
 732 void
 733 repodata_merge_attrs (Repodata *data, Id dest, Id src)
 734 {
 735   Id *keyp;
 736   for (keyp = data->attrs[src]; *keyp; keyp += 2)
 737     repodata_insert_keyid(data, dest, keyp[0], keyp[1], 0);
 738 }
 739
 740 /*********************************/
 741
 742 /* unify with repo_write! */
 743
 744 #define EXTDATA_BLOCK 1023
 745 #define SCHEMATA_BLOCK 31
 746 #define SCHEMATADATA_BLOCK 255
 747
 748 struct extdata {
 749   unsigned char *buf;
 750   int len;
 751 };
 752
 753 static void
 754 data_addid(struct extdata *xd, Id x)
 755 {
 756   unsigned char *dp;
 757   xd->buf = sat_extend(xd->buf, xd->len, 5, 1, EXTDATA_BLOCK);
 758   dp = xd->buf + xd->len;
 759
 760   if (x >= (1 << 14))
 761     {
 762       if (x >= (1 << 28))
 763         *dp++ = (x >> 28) | 128;
 764       if (x >= (1 << 21))
 765         *dp++ = (x >> 21) | 128;
 766       *dp++ = (x >> 14) | 128;
 767     }
 768   if (x >= (1 << 7))
 769     *dp++ = (x >> 7) | 128;
 770   *dp++ = x & 127;
 771   xd->len = dp - xd->buf;
 772 }
 773
 774 static void
 775 data_addideof(struct extdata *xd, Id x, int eof)
 776 {
 777   if (x >= 64)
 778     x = (x & 63) | ((x & ~63) << 1);
 779   data_addid(xd, (eof ? x: x | 64));
 780 }
 781
 782 static void
 783 data_addblob(struct extdata *xd, unsigned char *blob, int len)
 784 {
 785   xd->buf = sat_extend(xd->buf, xd->len, len, 1, EXTDATA_BLOCK);
 786   memcpy(xd->buf + xd->len, blob, len);
 787   xd->len += len;
 788 }
 789
 790 /*********************************/
 791
 792 static void
 793 addschema_prepare(Repodata *data, Id *schematacache)
 794 {
 795   int h, len, i;
 796   Id *sp;
 797
 798   memset(schematacache, 0, 256 * sizeof(Id));
 799   for (i = 0; i < data->nschemata; i++)
 800     {
 801       for (sp = data->schemadata + data->schemata[i], h = 0; *sp; len++)
 802         h = h * 7 + *sp++;
 803       h &= 255;
 804       schematacache[h] = i + 1;
 805     }
 806   data->schemadata = sat_extend_resize(data->schemadata, data->schemadatalen, sizeof(Id), SCHEMATADATA_BLOCK);
 807   data->schemata = sat_extend_resize(data->schemata, data->nschemata, sizeof(Id), SCHEMATA_BLOCK);
 808 }
 809
 810 static Id
 811 addschema(Repodata *data, Id *schema, Id *schematacache)
 812 {
 813   int h, len;
 814   Id *sp, cid;
 815
 816   for (sp = schema, len = 0, h = 0; *sp; len++)
 817     h = h * 7 + *sp++;
 818   h &= 255;
 819   len++;
 820
 821   cid = schematacache[h];
 822   if (cid)
 823     {
 824       cid--;
 825       if (!memcmp(data->schemadata + data->schemata[cid], schema, len * sizeof(Id)))
 826         return cid;
 827       /* cache conflict */
 828       for (cid = 0; cid < data->nschemata; cid++)
 829         if (!memcmp(data->schemadata + data->schemata[cid], schema, len * sizeof(Id)))
 830           return cid;
 831     }
 832   /* a new one. make room. */
 833   data->schemadata = sat_extend(data->schemadata, data->schemadatalen, len, sizeof(Id), SCHEMATADATA_BLOCK);
 834   data->schemata = sat_extend(data->schemata, data->nschemata, 1, sizeof(Id), SCHEMATA_BLOCK);
 835   /* add schema */
 836   memcpy(data->schemadata + data->schemadatalen, schema, len * sizeof(Id));
 837   data->schemata[data->nschemata] = data->schemadatalen;
 838   data->schemadatalen += len;
 839   schematacache[h] = data->nschemata + 1;
 840 #if 0
 841 fprintf(stderr, "addschema: new schema\n");
 842 #endif
 843   return data->nschemata++;
 844 }
 845
 846
 847 void
 848 repodata_internalize(Repodata *data)
 849 {
 850   Repokey *key;
 851   Id id, entry, nentry, *ida;
 852   Id schematacache[256];
 853   Id schemaid, *schema, *sp, oldschema, *keyp, *seen;
 854   unsigned char *dp, *ndp;
 855   int newschema, oldcount;
 856   struct extdata newincore;
 857   struct extdata newvincore;
 858
 859   if (!data->attrs)
 860     return;
 861
 862   newvincore.buf = data->vincore;
 863   newvincore.len = data->vincorelen;
 864
 865   schema = sat_malloc2(data->nkeys, sizeof(Id));
 866   seen = sat_malloc2(data->nkeys, sizeof(Id));
 867
 868   /* Merge the data already existing (in data->schemata, ->incoredata and
 869      friends) with the new attributes in data->attrs[].  */
 870   nentry = data->end - data->start;
 871   addschema_prepare(data, schematacache);
 872   memset(&newincore, 0, sizeof(newincore));
 873   for (entry = 0; entry < nentry; entry++)
 874     {
 875       memset(seen, 0, data->nkeys * sizeof(Id));
 876       sp = schema;
 877       dp = data->incoredata + data->incoreoffset[entry];
 878       if (data->incoredata)
 879         dp = data_read_id(dp, &oldschema);
 880       else
 881         oldschema = 0;
 882 #if 0
 883 fprintf(stderr, "oldschema %d\n", oldschema);
 884 fprintf(stderr, "schemata %d\n", data->schemata[oldschema]);
 885 fprintf(stderr, "schemadata %p\n", data->schemadata);
 886 #endif
 887       /* seen: -1: old data  0: skipped  >0: id + 1 */
 888       newschema = 0;
 889       oldcount = 0;
 890       for (keyp = data->schemadata + data->schemata[oldschema]; *keyp; keyp++)
 891         {
 892           if (seen[*keyp])
 893             {
 894               fprintf(stderr, "Inconsistent old data (key occured twice).\n");
 895               exit(1);
 896             }
 897           seen[*keyp] = -1;
 898           *sp++ = *keyp;
 899           oldcount++;
 900         }
 901       if (data->attrs[entry])
 902         for (keyp = data->attrs[entry]; *keyp; keyp += 2)
 903           {
 904             if (!seen[*keyp])
 905               {
 906                 newschema = 1;
 907                 *sp++ = *keyp;
 908               }
 909             seen[*keyp] = keyp[1] + 1;
 910           }
 911       *sp++ = 0;
 912       if (newschema)
 913         /* Ideally we'd like to sort the new schema here, to ensure
 914            schema equality independend of the ordering.  We can't do that
 915            yet.  For once see below (old ids need to come before new ids).
 916            An additional difficulty is that we also need to move
 917            the values with the keys.  */
 918         schemaid = addschema(data, schema, schematacache);
 919       else
 920         schemaid = oldschema;
 921
 922
 923       /* Now create data blob.  We walk through the (possibly new) schema
 924          and either copy over old data, or insert the new.  */
 925       /* XXX Here we rely on the fact that the (new) schema has the form
 926          o1 o2 o3 o4 ... | n1 n2 n3 ...
 927          (oX being the old keyids (possibly overwritten), and nX being
 928           the new keyids).  This rules out sorting the keyids in order
 929          to ensure a small schema count.  */
 930       data->incoreoffset[entry] = newincore.len;
 931       data_addid(&newincore, schemaid);
 932       for (keyp = data->schemadata + data->schemata[schemaid]; *keyp; keyp++)
 933         {
 934           key = data->keys + *keyp;
 935           ndp = dp;
 936           if (oldcount)
 937             {
 938               /* Skip the data associated with this old key.  */
 939               if (key->storage == KEY_STORAGE_VERTICAL_OFFSET)
 940                 {
 941                   ndp = data_skip(dp, TYPE_ID);
 942                   ndp = data_skip(ndp, TYPE_ID);
 943                 }
 944               else if (key->storage == KEY_STORAGE_INCORE)
 945                 ndp = data_skip(dp, key->type);
 946               oldcount--;
 947             }
 948           if (seen[*keyp] == -1)
 949             {
 950               /* If this key was an old one _and_ was not overwritten with
 951                  a different value copy over the old value (we skipped it
 952                  above).  */
 953               if (dp != ndp)
 954                 data_addblob(&newincore, dp, ndp - dp);
 955               seen[*keyp] = 0;
 956             }
 957           else if (seen[*keyp])
 958             {
 959               /* Otherwise we have a new value.  Parse it into the internal
 960                  form.  */
 961               struct extdata *xd;
 962               unsigned int oldvincorelen = 0;
 963
 964               xd = &newincore;
 965               if (key->storage == KEY_STORAGE_VERTICAL_OFFSET)
 966                 {
 967                   xd = &newvincore;
 968                   oldvincorelen = xd->len;
 969                 }
 970               id = seen[*keyp] - 1;
 971               switch (key->type)
 972                 {
 973                 case TYPE_VOID:
 974                 case TYPE_CONSTANT:
 975                   break;
 976                 case TYPE_STR:
 977                   data_addblob(xd, data->attrdata + id, strlen((char *)(data->attrdata + id)) + 1);
 978                   break;
 979                 case TYPE_ID:
 980                 case TYPE_NUM:
 981                 case TYPE_DIR:
 982                   data_addid(xd, id);
 983                   break;
 984                 case TYPE_DIRNUMNUMARRAY:
 985                   for (ida = data->attriddata + id; *ida; ida += 3)
 986                     {
 987                       data_addid(xd, ida[0]);
 988                       data_addid(xd, ida[1]);
 989                       data_addideof(xd, ida[2], ida[3] ? 0 : 1);
 990                     }
 991                   break;
 992                 default:
 993                   fprintf(stderr, "don't know how to handle type %d\n", key->type);
 994                   exit(1);
 995                 }
 996               if (key->storage == KEY_STORAGE_VERTICAL_OFFSET)
 997                 {
 998                   /* put offset/len in incore */
 999                   data_addid(&newincore, data->lastverticaloffset + oldvincorelen);
1000                   oldvincorelen = xd->len - oldvincorelen;
1001                   data_addid(&newincore, oldvincorelen);
1002                 }
1003             }
1004           dp = ndp;
1005         }
1006     }
1007   data->incoredata = newincore.buf;
1008   data->incoredatalen = newincore.len;
1009   data->incoredatafree = 0;
1010
1011   data->vincore = newvincore.buf;
1012   data->vincorelen = newvincore.len;
1013
1014   data->attrs = sat_free(data->attrs);
1015   data->attrdata = sat_free(data->attrdata);
1016   data->attrdatalen = 0;
1017 }
1018
1019 Id
1020 repodata_str2dir(Repodata *data, const char *dir, int create)
1021 {
1022   Id id, parent;
1023   const char *dire;
1024
1025   parent = 0;
1026   while (*dir == '/' && dir[1] == '/')
1027     dir++;
1028   while (*dir)
1029     {
1030       dire = strchrnul(dir, '/');
1031       if (data->localpool)
1032         id = stringpool_strn2id(&data->spool, dir, dire - dir, create);
1033       else
1034         id = strn2id(data->repo->pool, dir, dire - dir, create);
1035       if (!id)
1036         return 0;
1037       parent = dirpool_add_dir(&data->dirpool, parent, id, create);
1038       if (!parent)
1039         return 0;
1040       if (!*dire)
1041         break;
1042       dir = dire + 1;
1043       while (*dir == '/')
1044         dir++;
1045     }
1046   return parent;
1047 }
1048
1049 unsigned int
1050 repodata_compress_page(unsigned char *page, unsigned int len, unsigned char *cpage, unsigned int max)
1051 {
1052   return compress_buf(page, len, cpage, max);
1053 }
1054
1055 #define SOLV_ERROR_EOF              3
1056
1057 static inline unsigned int
1058 read_u32(FILE *fp)
1059 {
1060   int c, i;
1061   unsigned int x = 0;
1062
1063   for (i = 0; i < 4; i++)
1064     {
1065       c = getc(fp);
1066       if (c == EOF)
1067         return 0;
1068       x = (x << 8) | c;
1069     }
1070   return x;
1071 }
1072
1073 /* Try to either setup on-demand paging (using FP as backing
1074    file), or in case that doesn't work (FP not seekable) slurps in
1075    all pages and deactivates paging.  */
1076
1077 void
1078 repodata_read_or_setup_pages(Repodata *data, unsigned int pagesz, unsigned int blobsz)
1079 {
1080   FILE *fp = data->fp;
1081   unsigned int npages;
1082   unsigned int i;
1083   unsigned int can_seek;
1084   long cur_file_ofs;
1085   unsigned char buf[BLOB_PAGESIZE];
1086   if (pagesz != BLOB_PAGESIZE)
1087     {
1088       /* We could handle this by slurping in everything.  */
1089       fprintf (stderr, "non matching page size\n");
1090       exit (1);
1091     }
1092   can_seek = 1;
1093   if ((cur_file_ofs = ftell(fp)) < 0)
1094     can_seek = 0;
1095   clearerr (fp);
1096 #ifdef DEBUG_PAGING
1097   fprintf (stderr, "can %sseek\n", can_seek ? "" : "NOT ");
1098 #endif
1099   npages = (blobsz + BLOB_PAGESIZE - 1) / BLOB_PAGESIZE;
1100
1101   data->num_pages = npages;
1102   data->pages = sat_malloc2(npages, sizeof(data->pages[0]));
1103
1104   /* If we can't seek on our input we have to slurp in everything.  */
1105   if (!can_seek)
1106     data->blob_store = sat_malloc(npages * BLOB_PAGESIZE);
1107   for (i = 0; i < npages; i++)
1108     {
1109       unsigned int in_len = read_u32(fp);
1110       unsigned int compressed = in_len & 1;
1111       Attrblobpage *p = data->pages + i;
1112       in_len >>= 1;
1113 #ifdef DEBUG_PAGING
1114       fprintf (stderr, "page %d: len %d (%scompressed)\n",
1115                i, in_len, compressed ? "" : "not ");
1116 #endif
1117       if (can_seek)
1118         {
1119           cur_file_ofs += 4;
1120           p->mapped_at = -1;
1121           p->file_offset = cur_file_ofs;
1122           p->file_size = in_len * 2 + compressed;
1123           if (fseek(fp, in_len, SEEK_CUR) < 0)
1124             {
1125               perror ("fseek");
1126               fprintf (stderr, "can't seek after we thought we can\n");
1127               /* We can't fall back to non-seeking behaviour as we already
1128                  read over some data pages without storing them away.  */
1129               exit (1);
1130             }
1131           cur_file_ofs += in_len;
1132         }
1133       else
1134         {
1135           unsigned int out_len;
1136           void *dest = data->blob_store + i * BLOB_PAGESIZE;
1137           p->mapped_at = i * BLOB_PAGESIZE;
1138           p->file_offset = 0;
1139           p->file_size = 0;
1140           /* We can't seek, so suck everything in.  */
1141           if (fread(compressed ? buf : dest, in_len, 1, fp) != 1)
1142             {
1143               perror ("fread");
1144               exit (1);
1145             }
1146           if (compressed)
1147             {
1148               out_len = unchecked_decompress_buf(buf, in_len, dest, BLOB_PAGESIZE);
1149               if (out_len != BLOB_PAGESIZE
1150                   && i < npages - 1)
1151                 {
1152                   fprintf (stderr, "can't decompress\n");
1153                   exit (1);
1154                 }
1155             }
1156         }
1157     }
1158
1159   if (can_seek)
1160     {
1161       /* If we are here we were able to seek to all page
1162          positions, so activate paging by copying FP into our structure.
1163          We dup() the file, so that our callers can fclose() it and we
1164          still have it open.  But this means that we share file positions
1165          with the input filedesc.  So in case our caller reads it after us,
1166          and calls back into us we might change the file position unexpectedly
1167          to him.  */
1168       int fd = dup (fileno (fp));
1169       if (fd < 0)
1170         {
1171           /* Jeez!  What a bloody system, we can't dup() anymore.  */
1172           perror ("dup");
1173           exit (1);
1174         }
1175       /* XXX we don't close this yet anywhere.  */
1176       data->fp = fdopen (fd, "r");
1177       if (!data->fp)
1178         {
1179           /* My God!  What happened now?  */
1180           perror ("fdopen");
1181           exit (1);
1182         }
1183     }
1184 }