Imported Upstream version 0.6.35

[platform/upstream/libsolv.git] / src / repodata.c
diff --git a/src/repodata.c b/src/repodata.c

index 4d0a46d..4ab5d18 100644 (file)
--- a/src/repodata.c
+++ b/src/repodata.c
@@ -188,7 +188,7 @@ repodata_schema2id(Repodata *data, Id *schema, int create)
        data->schematahash = schematahash = solv_calloc(256, sizeof(Id));
        for (i = 1; i < data->nschemata; i++)
         {
-         for (sp = data->schemadata + data->schemata[i], h = 0; *sp; len++)
+         for (sp = data->schemadata + data->schemata[i], h = 0; *sp;)
             h = h * 7 + *sp++;
           h &= 255;
           schematahash[h] = i;
@@ -268,17 +268,13 @@ repodata_str2dir(Repodata *data, const char *dir, int create)
  #endif
    const char *dire;
  
-  parent = 0;
    if (!*dir)
-    return 0;
+    return data->dirpool.ndirs ? 0 : dirpool_add_dir(&data->dirpool, 0, 0, create);
    while (*dir == '/' && dir[1] == '/')
      dir++;
    if (*dir == '/' && !dir[1])
-    {
-      if (data->dirpool.ndirs)
-        return 1;
-      return dirpool_add_dir(&data->dirpool, 0, 1, create);
-    }
+    return data->dirpool.ndirs ? 1 : dirpool_add_dir(&data->dirpool, 0, 1, create);
+  parent = 0;
  #ifdef DIRCACHE_SIZE
    dirs = dir;
    if (data->dircache)
@@ -355,6 +351,8 @@ repodata_dir2str(Repodata *data, Id did, const char *suf)
  
    if (!did)
      return suf ? suf : "";
+  if (did == 1 && !suf)
+    return "/";
    parent = did;
    while (parent)
      {
@@ -479,11 +477,11 @@ get_vertical_data(Repodata *data, Repokey *key, Id off, Id len)
    if (off >= data->lastverticaloffset)
      {
        off -= data->lastverticaloffset;
-      if (off + len > data->vincorelen)
+      if ((unsigned int)off + len > data->vincorelen)
         return 0;
        return data->vincore + off;
      }
-  if (off + len > key->size)
+  if ((unsigned int)off + len > key->size)
      return 0;
    /* we now have the offset, go into vertical */
    off += data->verticaloffset[key - data->keys];
@@ -597,7 +595,7 @@ solvid2data(Repodata *data, Id solvid, Id *schemap)
   * data lookup
   */
  
-static inline unsigned char *
+static unsigned char *
  find_key_data(Repodata *data, Id solvid, Id keyname, Repokey **keypp)
  {
    unsigned char *dp;
@@ -620,6 +618,8 @@ find_key_data(Repodata *data, Id solvid, Id keyname, Repokey **keypp)
      return 0;
    if (key->type == REPOKEY_TYPE_VOID || key->type == REPOKEY_TYPE_CONSTANT || key->type == REPOKEY_TYPE_CONSTANTID)
      return dp; /* no need to forward... */
+  if (key->storage != KEY_STORAGE_INCORE && key->storage != KEY_STORAGE_VERTICAL_OFFSET)
+    return 0;  /* get_data will not work, no need to forward */
    dp = forward_to_key(data, *kp, keyp, dp);
    if (!dp)
      return 0;
@@ -739,8 +739,13 @@ repodata_lookup_bin_checksum(Repodata *data, Id solvid, Id keyname, Id *typep)
    dp = find_key_data(data, solvid, keyname, &key);
    if (!dp)
      return 0;
-  if (!(key->type == REPOKEY_TYPE_MD5 || key->type == REPOKEY_TYPE_SHA1 || key->type == REPOKEY_TYPE_SHA256))
-    return 0;
+  switch (key->type)
+    {
+    case_CHKSUM_TYPES:
+      break;
+    default:
+      return 0;
+    }
    *typep = key->type;
    return dp;
  }
@@ -757,7 +762,7 @@ repodata_lookup_idarray(Repodata *data, Id solvid, Id keyname, Queue *q)
    dp = find_key_data(data, solvid, keyname, &key);
    if (!dp)
      return 0;
-  if (key->type != REPOKEY_TYPE_IDARRAY && key->type != REPOKEY_TYPE_REL_IDARRAY)
+  if (key->type != REPOKEY_TYPE_IDARRAY)
      return 0;
    for (;;)
      {
@@ -769,6 +774,24 @@ repodata_lookup_idarray(Repodata *data, Id solvid, Id keyname, Queue *q)
    return 1;
  }
  
+const void *
+repodata_lookup_binary(Repodata *data, Id solvid, Id keyname, int *lenp)
+{
+  unsigned char *dp;
+  Repokey *key;
+  Id len;
+
+  dp = find_key_data(data, solvid, keyname, &key);
+  if (!dp || key->type != REPOKEY_TYPE_BINARY)
+    {
+      *lenp = 0;
+      return 0;
+    }
+  dp = data_read_id(dp, &len);
+  *lenp = len;
+  return dp;
+}
+
  Id
  repodata_globalize_id(Repodata *data, Id id, int create)
  {
@@ -824,13 +847,69 @@ repodata_lookup_id_uninternalized(Repodata *data, Id solvid, Id keyname, Id void
    return 0;
  }
  
+const char *
+repodata_lookup_dirstrarray_uninternalized(Repodata *data, Id solvid, Id keyname, Id *didp, Id *iterp)
+{
+  Id *ap, did;
+  Id iter = *iterp;
+  if (iter == 0)       /* find key data */
+    {
+      if (!data->attrs)
+       return 0;
+      ap = data->attrs[solvid - data->start];
+      if (!ap)
+       return 0;
+      for (; *ap; ap += 2)
+       if (data->keys[*ap].name == keyname && data->keys[*ap].type == REPOKEY_TYPE_DIRSTRARRAY)
+         break;
+      if (!*ap)
+       return 0;
+      iter = ap[1];
+    }
+  did = *didp;
+  for (ap = data->attriddata + iter; *ap; ap += 2)
+    {
+      if (did && ap[0] != did)
+       continue;
+      *didp = ap[0];
+      *iterp = ap - data->attriddata + 2;
+      return (const char *)data->attrdata + ap[1];
+    }
+  *iterp = 0;
+  return 0;
+}
+
+const unsigned char *
+repodata_lookup_bin_checksum_uninternalized(Repodata *data, Id solvid, Id keyname, Id *typep)
+{
+  Id *ap;
+  if (!data->attrs)
+    return 0;
+  ap = data->attrs[solvid - data->start];
+  if (!ap)
+    return 0;
+  for (; *ap; ap += 2)
+    {
+      if (data->keys[*ap].name != keyname)
+       continue;
+      switch (data->keys[*ap].type)
+       {
+         case_CHKSUM_TYPES:
+           *typep = data->keys[*ap].type;
+           return (const unsigned char *)data->attrdata + ap[1];
+         default:
+           break;
+       }
+    }
+  return 0;
+}
  
  /************************************************************************
   * data search
   */
  
  
-int
+const char *
  repodata_stringify(Pool *pool, Repodata *data, Repokey *key, KeyValue *kv, int flags)
  {
    switch (key->type)
@@ -850,28 +929,26 @@ repodata_stringify(Pool *pool, Repodata *data, Repokey *key, KeyValue *kv, int f
           if (*s == ':' && s > kv->str)
             kv->str = s + 1;
         }
-      return 1;
+      return kv->str;
      case REPOKEY_TYPE_STR:
-      return 1;
+      return kv->str;
      case REPOKEY_TYPE_DIRSTRARRAY:
        if (!(flags & SEARCH_FILES))
-       return 1;       /* match just the basename */
+       return kv->str; /* match just the basename */
        if (kv->num)
-       return 1;       /* already stringified */
+       return kv->str; /* already stringified */
        /* Put the full filename into kv->str.  */
        kv->str = repodata_dir2str(data, kv->id, kv->str);
        kv->num = 1;     /* mark stringification */
-      return 1;
-    case REPOKEY_TYPE_MD5:
-    case REPOKEY_TYPE_SHA1:
-    case REPOKEY_TYPE_SHA256:
+      return kv->str;
+    case_CHKSUM_TYPES:
        if (!(flags & SEARCH_CHECKSUMS))
         return 0;       /* skip em */
        if (kv->num)
-       return 1;       /* already stringified */
+       return kv->str; /* already stringified */
        kv->str = repodata_chk2str(data, key->type, (const unsigned char *)kv->str);
        kv->num = 1;     /* mark stringification */
-      return 1;
+      return kv->str;
      default:
        return 0;
      }
@@ -939,7 +1016,11 @@ repodata_search(Repodata *data, Id solvid, Id keyname, int flags, int (*callback
        ddp = get_data(data, key, &dp, *keyp ? 1 : 0);
  
        if (key->type == REPOKEY_TYPE_DELETED)
-       continue;
+       {
+         if (onekey)
+           return;
+         continue;
+       }
        if (key->type == REPOKEY_TYPE_FLEXARRAY || key->type == REPOKEY_TYPE_FIXARRAY)
         {
           struct subschema_data subd;
@@ -1071,7 +1152,8 @@ solvabledata_fetch(Solvable *s, KeyValue *kv, Id keyname)
  int
  datamatcher_init(Datamatcher *ma, const char *match, int flags)
  {
-  ma->match = match ? solv_strdup(match) : 0;
+  match = match ? solv_strdup(match) : 0;
+  ma->match = match;
    ma->flags = flags;
    ma->error = 0;
    ma->matchdata = 0;
@@ -1264,10 +1346,7 @@ dataiterator_init_clone(Dataiterator *di, Dataiterator *from)
    if (di->dupstr)
      {
        if (di->dupstr == di->kv.str)
-       {
-         di->dupstr = solv_malloc(di->dupstrn);
-         memcpy(di->dupstr, from->dupstr, di->dupstrn);
-       }
+        di->dupstr = solv_memdup(di->dupstr, di->dupstrn);
        else
         {
           di->dupstr = 0;
@@ -1362,11 +1441,11 @@ dataiterator_free(Dataiterator *di)
      solv_free(di->dupstr);
  }
  
-static inline unsigned char *
+static unsigned char *
  dataiterator_find_keyname(Dataiterator *di, Id keyname)
  {
-  Id *keyp = di->keyp;
-  Repokey *keys = di->data->keys;
+  Id *keyp;
+  Repokey *keys = di->data->keys, *key;
    unsigned char *dp;
  
    for (keyp = di->keyp; *keyp; keyp++)
@@ -1374,6 +1453,11 @@ dataiterator_find_keyname(Dataiterator *di, Id keyname)
        break;
    if (!*keyp)
      return 0;
+  key = keys + *keyp;
+  if (key->type == REPOKEY_TYPE_DELETED)
+    return 0;
+  if (key->storage != KEY_STORAGE_INCORE && key->storage != KEY_STORAGE_VERTICAL_OFFSET)
+    return 0;          /* get_data will not work, no need to forward */
    dp = forward_to_key(di->data, *keyp, di->keyp, di->dp);
    if (!dp)
      return 0;
@@ -1381,6 +1465,25 @@ dataiterator_find_keyname(Dataiterator *di, Id keyname)
    return dp;
  }
  
+static inline int
+is_filelist_extension(Repodata *data)
+{
+  int j;
+  if (!repodata_precheck_keyname(data, SOLVABLE_FILELIST))
+    return 0;
+  for (j = 1; j < data->nkeys; j++)
+    if (data->keys[j].name == SOLVABLE_FILELIST)
+      break;
+  if (j == data->nkeys)
+    return 0;
+  if (data->state != REPODATA_AVAILABLE)
+    return 1;
+  for (j = 1; j < data->nkeys; j++)
+    if (data->keys[j].name != REPOSITORY_SOLVABLES && data->keys[j].name != SOLVABLE_FILELIST)
+      return 0;
+  return 1;
+}
+
  static int
  dataiterator_filelistcheck(Dataiterator *di)
  {
@@ -1388,18 +1491,46 @@ dataiterator_filelistcheck(Dataiterator *di)
    int needcomplete = 0;
    Repodata *data = di->data;
  
-  if ((di->matcher.flags & SEARCH_COMPLETE_FILELIST) != 0)
+  if ((di->flags & SEARCH_COMPLETE_FILELIST) != 0)
      if (!di->matcher.match
         || ((di->matcher.flags & (SEARCH_STRINGMASK|SEARCH_NOCASE)) != SEARCH_STRING
             && (di->matcher.flags & (SEARCH_STRINGMASK|SEARCH_NOCASE)) != SEARCH_GLOB)
-       || !repodata_filelistfilter_matches(di->data, di->matcher.match))
+       || !repodata_filelistfilter_matches(data, di->matcher.match))
        needcomplete = 1;
    if (data->state != REPODATA_AVAILABLE)
      return needcomplete ? 1 : 0;
-  for (j = 1; j < data->nkeys; j++)
-    if (data->keys[j].name != REPOSITORY_SOLVABLES && data->keys[j].name != SOLVABLE_FILELIST)
-      break;
-  return j == data->nkeys && !needcomplete ? 0 : 1;
+  if (!needcomplete)
+    {
+      /* we don't need the complete filelist, so ignore all stubs */
+      if (data->repo->nrepodata == 2)
+       return 1;
+      for (j = 1; j < data->nkeys; j++)
+       if (data->keys[j].name != REPOSITORY_SOLVABLES && data->keys[j].name != SOLVABLE_FILELIST)
+         return 1;
+      return 0;
+    }
+  else
+    {
+      /* we need the complete filelist. check if we habe a filtered filelist and there's
+       * a extension with the complete filelist later on */
+      for (j = 1; j < data->nkeys; j++)
+       if (data->keys[j].name == SOLVABLE_FILELIST)
+         break;
+      if (j == data->nkeys)
+       return 0;       /* does not have filelist */
+      for (j = 1; j < data->nkeys; j++)
+       if (data->keys[j].name != REPOSITORY_SOLVABLES && data->keys[j].name != SOLVABLE_FILELIST)
+         break;
+      if (j == data->nkeys)
+       return 1;       /* this is the externsion */
+      while (data - data->repo->repodata + 1 < data->repo->nrepodata)
+       {
+         data++;
+         if (is_filelist_extension(data))
+           return 0;
+       }
+      return 1;
+    }
  }
  
  int
@@ -1665,17 +1796,18 @@ dataiterator_step(Dataiterator *di)
  
        if (di->matcher.match)
         {
+         const char *str;
           /* simple pre-check so that we don't need to stringify */
           if (di->keyname == SOLVABLE_FILELIST && di->key->type == REPOKEY_TYPE_DIRSTRARRAY && (di->matcher.flags & SEARCH_FILES) != 0)
             if (!datamatcher_checkbasename(&di->matcher, di->kv.str))
               continue;
-         if (!repodata_stringify(di->pool, di->data, di->key, &di->kv, di->flags))
+         if (!(str = repodata_stringify(di->pool, di->data, di->key, &di->kv, di->flags)))
             {
               if (di->keyname && (di->key->type == REPOKEY_TYPE_FIXARRAY || di->key->type == REPOKEY_TYPE_FLEXARRAY))
                 return 1;
               continue;
             }
-         if (!datamatcher_match(&di->matcher, di->kv.str))
+         if (!datamatcher_match(&di->matcher, str))
             continue;
         }
        else
@@ -1758,8 +1890,7 @@ dataiterator_clonepos(Dataiterator *di, Dataiterator *from)
    if (from->dupstr && from->dupstr == from->kv.str)
      {
        di->dupstrn = from->dupstrn;
-      di->dupstr = solv_malloc(from->dupstrn);
-      memcpy(di->dupstr, from->dupstr, di->dupstrn);
+      di->dupstr = solv_memdup(from->dupstr, from->dupstrn);
      }
  }
  
@@ -1854,19 +1985,19 @@ dataiterator_jump_to_solvid(Dataiterator *di, Id solvid)
           return;
         }
        di->repoid = 0;
-      di->data = di->repo->repodata + di->pool->pos.repodataid;
-      di->repodataid = 0;
-      di->solvid = solvid;
-      di->state = di_enterrepo;
-      di->flags |= SEARCH_THISSOLVID;
-      return;
+      if (!di->pool->pos.repodataid && di->pool->pos.solvid == SOLVID_META) {
+       solvid = SOLVID_META;           /* META pos hack */
+      } else {
+        di->data = di->repo->repodata + di->pool->pos.repodataid;
+        di->repodataid = 0;
+      }
      }
-  if (solvid > 0)
+  else if (solvid > 0)
      {
        di->repo = di->pool->solvables[solvid].repo;
        di->repoid = 0;
      }
-  else if (di->repoid > 0)
+  if (di->repoid > 0)
      {
        if (!di->pool->urepos)
         {
@@ -1876,7 +2007,8 @@ dataiterator_jump_to_solvid(Dataiterator *di, Id solvid)
        di->repoid = 1;
        di->repo = di->pool->repos[di->repoid];
      }
-  di->repodataid = 1;
+  if (solvid != SOLVID_POS)
+    di->repodataid = 1;
    di->solvid = solvid;
    if (solvid)
      di->flags |= SEARCH_THISSOLVID;
@@ -1900,11 +2032,10 @@ dataiterator_jump_to_repo(Dataiterator *di, Repo *repo)
  int
  dataiterator_match(Dataiterator *di, Datamatcher *ma)
  {
-  if (!repodata_stringify(di->pool, di->data, di->key, &di->kv, di->flags))
+  const char *str;
+  if (!(str = repodata_stringify(di->pool, di->data, di->key, &di->kv, di->flags)))
      return 0;
-  if (!ma)
-    return 1;
-  return datamatcher_match(ma, di->kv.str);
+  return ma ? datamatcher_match(ma, str) : 1;
  }
  
  void
@@ -1916,9 +2047,7 @@ dataiterator_strdup(Dataiterator *di)
      return;
    switch (di->key->type)
      {
-    case REPOKEY_TYPE_MD5:
-    case REPOKEY_TYPE_SHA1:
-    case REPOKEY_TYPE_SHA256:
+    case_CHKSUM_TYPES:
      case REPOKEY_TYPE_DIRSTRARRAY:
        if (di->kv.num)  /* was it stringified into tmp space? */
          l = strlen(di->kv.str) + 1;
@@ -1934,14 +2063,8 @@ dataiterator_strdup(Dataiterator *di)
         case REPOKEY_TYPE_DIRSTRARRAY:
           l = strlen(di->kv.str) + 1;
           break;
-       case REPOKEY_TYPE_MD5:
-         l = SIZEOF_MD5;
-         break;
-       case REPOKEY_TYPE_SHA1:
-         l = SIZEOF_SHA1;
-         break;
-       case REPOKEY_TYPE_SHA256:
-         l = SIZEOF_SHA256;
+       case_CHKSUM_TYPES:
+         l = solv_chksum_len(di->key->type);
           break;
         case REPOKEY_TYPE_BINARY:
           l = di->kv.num;
@@ -2040,6 +2163,7 @@ repodata_extend_block(Repodata *data, Id start, Id num)
      return;
    if (!data->incoreoffset)
      {
+      /* this also means that data->attrs is NULL */
        data->incoreoffset = solv_calloc_block(num, sizeof(Id), REPODATA_BLOCK);
        data->start = start;
        data->end = start + num;
@@ -2684,6 +2808,47 @@ repodata_add_flexarray(Repodata *data, Id solvid, Id keyname, Id ghandle)
  }
  
  void
+repodata_set_kv(Repodata *data, Id solvid, Id keyname, Id keytype, KeyValue *kv)
+{
+  switch (keytype)
+    {
+    case REPOKEY_TYPE_ID:
+      repodata_set_id(data, solvid, keyname, kv->id);
+      break;
+    case REPOKEY_TYPE_CONSTANTID:
+      repodata_set_constantid(data, solvid, keyname, kv->id);
+      break;
+    case REPOKEY_TYPE_IDARRAY:
+      repodata_add_idarray(data, solvid, keyname, kv->id);
+      break;
+    case REPOKEY_TYPE_STR:
+      repodata_set_str(data, solvid, keyname, kv->str);
+      break;
+    case REPOKEY_TYPE_VOID:
+      repodata_set_void(data, solvid, keyname);
+      break;
+    case REPOKEY_TYPE_NUM:
+      repodata_set_num(data, solvid, keyname, SOLV_KV_NUM64(kv));
+      break;
+    case REPOKEY_TYPE_CONSTANT:
+      repodata_set_constant(data, solvid, keyname, kv->num);
+      break;
+    case REPOKEY_TYPE_DIRNUMNUMARRAY:
+      if (kv->id)
+        repodata_add_dirnumnum(data, solvid, keyname, kv->id, kv->num, kv->num2);
+      break;
+    case REPOKEY_TYPE_DIRSTRARRAY:
+      repodata_add_dirstr(data, solvid, keyname, kv->id, kv->str);
+      break;
+    case_CHKSUM_TYPES:
+      repodata_set_bin_checksum(data, solvid, keyname, keytype, (const unsigned char *)kv->str);
+      break;
+    default:
+      break;
+    }
+}
+
+void
  repodata_unset_uninternalized(Repodata *data, Id solvid, Id keyname)
  {
    Id *pp, *ap, **app;
@@ -2750,6 +2915,10 @@ repodata_swap_attrs(Repodata *data, Id dest, Id src)
    Id *tmpattrs;
    if (!data->attrs || dest == src)
      return;
+  if (dest < data->start || dest >= data->end)
+    repodata_extend(data, dest);
+  if (src < data->start || src >= data->end)
+    repodata_extend(data, src);
    tmpattrs = data->attrs[dest - data->start];
    data->attrs[dest - data->start] = data->attrs[src - data->start];
    data->attrs[src - data->start] = tmpattrs;
@@ -2840,6 +3009,96 @@ data_addblob(struct extdata *xd, unsigned char *blob, int len)
  
  /*********************************/
  
+/* this is to reduct memory usage when internalizing oversized repos */
+static void
+compact_attrdata(Repodata *data, int entry, int nentry)
+{
+  int i;
+  unsigned int attrdatastart = data->attrdatalen;
+  unsigned int attriddatastart = data->attriddatalen;
+  if (attrdatastart < 1024 * 1024 * 4 && attriddatastart < 1024 * 1024)
+    return;
+  for (i = entry; i < nentry; i++)
+    {
+      Id v, *attrs = data->attrs[i];
+      if (!attrs)
+       continue;
+      for (; *attrs; attrs += 2)
+       {
+         switch (data->keys[*attrs].type)
+           {
+           case REPOKEY_TYPE_STR:
+           case REPOKEY_TYPE_BINARY:
+           case_CHKSUM_TYPES:
+             if ((unsigned int)attrs[1] < attrdatastart)
+                attrdatastart = attrs[1];
+             break;
+           case REPOKEY_TYPE_DIRSTRARRAY:
+             for (v = attrs[1]; data->attriddata[v] ; v += 2)
+               if (data->attriddata[v + 1] < attrdatastart)
+                 attrdatastart = data->attriddata[v + 1];
+             /* FALLTHROUGH */
+           case REPOKEY_TYPE_IDARRAY:
+           case REPOKEY_TYPE_DIRNUMNUMARRAY:
+             if ((unsigned int)attrs[1] < attriddatastart)
+               attriddatastart = attrs[1];
+             break;
+           case REPOKEY_TYPE_FIXARRAY:
+           case REPOKEY_TYPE_FLEXARRAY:
+             return;
+           default:
+             break;
+           }
+       }
+    }
+#if 0
+  printf("compact_attrdata %d %d\n", entry, nentry);
+  printf("attrdatastart: %d\n", attrdatastart);
+  printf("attriddatastart: %d\n", attriddatastart);
+#endif
+  if (attrdatastart < 1024 * 1024 * 4 && attriddatastart < 1024 * 1024)
+    return;
+  for (i = entry; i < nentry; i++)
+    {
+      Id v, *attrs = data->attrs[i];
+      if (!attrs)
+       continue;
+      for (; *attrs; attrs += 2)
+       {
+         switch (data->keys[*attrs].type)
+           {
+           case REPOKEY_TYPE_STR:
+           case REPOKEY_TYPE_BINARY:
+           case_CHKSUM_TYPES:
+             attrs[1] -= attrdatastart;
+             break;
+           case REPOKEY_TYPE_DIRSTRARRAY:
+             for (v = attrs[1]; data->attriddata[v] ; v += 2)
+               data->attriddata[v + 1] -= attrdatastart;
+             /* FALLTHROUGH */
+           case REPOKEY_TYPE_IDARRAY:
+           case REPOKEY_TYPE_DIRNUMNUMARRAY:
+             attrs[1] -= attriddatastart;
+             break;
+           default:
+             break;
+           }
+       }
+    }
+  if (attrdatastart)
+    {
+      data->attrdatalen -= attrdatastart;
+      memmove(data->attrdata, data->attrdata + attrdatastart, data->attrdatalen);
+      data->attrdata = solv_extend_resize(data->attrdata, data->attrdatalen, 1, REPODATA_ATTRDATA_BLOCK);
+    }
+  if (attriddatastart)
+    {
+      data->attriddatalen -= attriddatastart;
+      memmove(data->attriddata, data->attriddata + attriddatastart, data->attriddatalen * sizeof(Id));
+      data->attriddata = solv_extend_resize(data->attriddata, data->attriddatalen, sizeof(Id), REPODATA_ATTRIDDATA_BLOCK);
+    }
+}
+
  /* internalalize some key into incore/vincore data */
  
  static void
@@ -2864,6 +3123,7 @@ repodata_serialize_key(Repodata *data, struct extdata *newincore,
      case REPOKEY_TYPE_VOID:
      case REPOKEY_TYPE_CONSTANT:
      case REPOKEY_TYPE_CONSTANTID:
+    case REPOKEY_TYPE_DELETED:
        break;
      case REPOKEY_TYPE_STR:
        data_addblob(xd, data->attrdata + val, strlen((char *)(data->attrdata + val)) + 1);
@@ -2874,9 +3134,18 @@ repodata_serialize_key(Repodata *data, struct extdata *newincore,
      case REPOKEY_TYPE_SHA1:
        data_addblob(xd, data->attrdata + val, SIZEOF_SHA1);
        break;
+    case REPOKEY_TYPE_SHA224:
+      data_addblob(xd, data->attrdata + val, SIZEOF_SHA224);
+      break;
      case REPOKEY_TYPE_SHA256:
        data_addblob(xd, data->attrdata + val, SIZEOF_SHA256);
        break;
+    case REPOKEY_TYPE_SHA384:
+      data_addblob(xd, data->attrdata + val, SIZEOF_SHA384);
+      break;
+    case REPOKEY_TYPE_SHA512:
+      data_addblob(xd, data->attrdata + val, SIZEOF_SHA512);
+      break;
      case REPOKEY_TYPE_NUM:
        if (val & 0x80000000)
         {
@@ -2925,29 +3194,30 @@ repodata_serialize_key(Repodata *data, struct extdata *newincore,
             sp = schema;
             kp = data->xattrs[-*ida];
             if (!kp)
-             continue;
+             continue;         /* ignore empty elements */
             num++;
-           for (;*kp; kp += 2)
+           for (; *kp; kp += 2)
               *sp++ = *kp;
             *sp = 0;
             if (!schemaid)
               schemaid = repodata_schema2id(data, schema, 1);
             else if (schemaid != repodata_schema2id(data, schema, 0))
               {
-               pool_debug(data->repo->pool, SOLV_FATAL, "fixarray substructs with different schemas\n");
-               exit(1);
+               pool_debug(data->repo->pool, SOLV_ERROR, "repodata_serialize_key: fixarray substructs with different schemas\n");
+               num = 0;
+               break;
               }
           }
+       data_addid(xd, num);
         if (!num)
           break;
-       data_addid(xd, num);
         data_addid(xd, schemaid);
         for (ida = data->attriddata + val; *ida; ida++)
           {
             Id *kp = data->xattrs[-*ida];
             if (!kp)
               continue;
-           for (;*kp; kp += 2)
+           for (; *kp; kp += 2)
               repodata_serialize_key(data, newincore, newvincore, schema, data->keys + *kp, kp[1]);
           }
         break;
@@ -2979,7 +3249,7 @@ repodata_serialize_key(Repodata *data, struct extdata *newincore,
         break;
        }
      default:
-      pool_debug(data->repo->pool, SOLV_FATAL, "don't know how to handle type %d\n", key->type);
+      pool_debug(data->repo->pool, SOLV_FATAL, "repodata_serialize_key: don't know how to handle type %d\n", key->type);
        exit(1);
      }
    if (key->storage == KEY_STORAGE_VERTICAL_OFFSET)
@@ -2991,21 +3261,66 @@ repodata_serialize_key(Repodata *data, struct extdata *newincore,
      }
  }
  
+/* create a circular linked list of all keys that share
+ * the same keyname */
+static Id *
+calculate_keylink(Repodata *data)
+{
+  int i, j;
+  Id *link;
+  Id maxkeyname = 0, *keytable = 0;
+  link = solv_calloc(data->nkeys, sizeof(Id));
+  if (data->nkeys <= 2)
+    return link;
+  for (i = 1; i < data->nkeys; i++)
+    {
+      Id n = data->keys[i].name;
+      if (n >= maxkeyname)
+       {
+         keytable = solv_realloc2(keytable, n + 128, sizeof(Id));
+         memset(keytable + maxkeyname, 0, (n + 128 - maxkeyname) * sizeof(Id));
+         maxkeyname = n + 128;
+       }
+      j = keytable[n];
+      if (j)
+       link[i] = link[j];
+      else
+       j = i;
+      link[j] = i;
+      keytable[n] = i;
+    }
+  /* remove links that just point to themselfs */
+  for (i = 1; i < data->nkeys; i++)
+    if (link[i] == i)
+      link[i] = 0;
+  solv_free(keytable);
+  return link;
+}
+
  void
  repodata_internalize(Repodata *data)
  {
    Repokey *key, solvkey;
    Id entry, nentry;
-  Id schemaid, *schema, *sp, oldschema, *keyp, *keypstart, *seen;
+  Id schemaid, keyid, *schema, *sp, oldschemaid, *keyp, *seen;
+  Offset *oldincoreoffs = 0;
+  int schemaidx;
    unsigned char *dp, *ndp;
-  int newschema, oldcount;
+  int neednewschema;
    struct extdata newincore;
    struct extdata newvincore;
    Id solvkeyid;
+  Id *keylink;
+  int haveoldkl;
  
    if (!data->attrs && !data->xattrs)
      return;
  
+#if 0
+  printf("repodata_internalize %d\n", data->repodataid);
+  printf("  attr data: %d K\n", data->attrdatalen / 1024);
+  printf("  attrid data: %d K\n", data->attriddatalen / (1024 / 4));
+#endif
    newvincore.buf = data->vincore;
    newvincore.len = data->vincorelen;
  
@@ -3029,143 +3344,197 @@ repodata_internalize(Repodata *data)
    data->mainschema = 0;
    data->mainschemaoffsets = solv_free(data->mainschemaoffsets);
  
+  keylink = calculate_keylink(data);
    /* join entry data */
    /* we start with the meta data, entry -1 */
    for (entry = -1; entry < nentry; entry++)
      {
-      memset(seen, 0, data->nkeys * sizeof(Id));
-      oldschema = 0;
+      oldschemaid = 0;
        dp = data->incoredata;
        if (dp)
         {
           dp += entry >= 0 ? data->incoreoffset[entry] : 1;
-          dp = data_read_id(dp, &oldschema);
+          dp = data_read_id(dp, &oldschemaid);
         }
+      memset(seen, 0, data->nkeys * sizeof(Id));
  #if 0
-fprintf(stderr, "oldschema %d\n", oldschema);
-fprintf(stderr, "schemata %d\n", data->schemata[oldschema]);
+fprintf(stderr, "oldschemaid %d\n", oldschemaid);
+fprintf(stderr, "schemata %d\n", data->schemata[oldschemaid]);
  fprintf(stderr, "schemadata %p\n", data->schemadata);
  #endif
-      /* seen: -1: old data  0: skipped  >0: id + 1 */
-      newschema = 0;
-      oldcount = 0;
+
+      /* seen: -1: old data,  0: skipped,  >0: id + 1 */
+      neednewschema = 0;
        sp = schema;
-      for (keyp = data->schemadata + data->schemata[oldschema]; *keyp; keyp++)
+      haveoldkl = 0;
+      for (keyp = data->schemadata + data->schemata[oldschemaid]; *keyp; keyp++)
         {
           if (seen[*keyp])
             {
-             pool_debug(data->repo->pool, SOLV_FATAL, "Inconsistent old data (key occured twice).\n");
-             exit(1);
+             /* oops, should not happen */
+             neednewschema = 1;
+             continue;
             }
-         seen[*keyp] = -1;
+         seen[*keyp] = -1;     /* use old marker */
           *sp++ = *keyp;
-         oldcount++;
+         if (keylink[*keyp])
+           haveoldkl = 1;      /* potential keylink conflict */
         }
-      if (entry >= 0)
-       keyp = data->attrs ? data->attrs[entry] : 0;
-      else
+
+      /* strip solvables key */
+      if (entry < 0 && solvkeyid && seen[solvkeyid])
         {
-         /* strip solvables key */
           *sp = 0;
           for (sp = keyp = schema; *sp; sp++)
             if (*sp != solvkeyid)
               *keyp++ = *sp;
-           else
-             oldcount--;
           sp = keyp;
           seen[solvkeyid] = 0;
-         keyp = data->xattrs ? data->xattrs[1] : 0;
+         neednewschema = 1;
         }
+
+      /* add new entries */
+      if (entry >= 0)
+       keyp = data->attrs ? data->attrs[entry] : 0;
+      else
+        keyp = data->xattrs ? data->xattrs[1] : 0;
        if (keyp)
          for (; *keyp; keyp += 2)
           {
             if (!seen[*keyp])
               {
-               newschema = 1;
+               neednewschema = 1;
                 *sp++ = *keyp;
+               if (haveoldkl && keylink[*keyp])                /* this should be pretty rare */
+                 {
+                   Id kl;
+                   for (kl = keylink[*keyp]; kl != *keyp; kl = keylink[kl])
+                     if (seen[kl] == -1)
+                       {
+                         /* replacing old key kl, remove from schema and seen */
+                         Id *osp;
+                         for (osp = schema; osp < sp; osp++)
+                           if (*osp == kl)
+                             {
+                               memmove(osp, osp + 1, (sp - osp) * sizeof(Id));
+                               sp--;
+                               seen[kl] = 0;
+                               break;
+                             }
+                       }
+                 }
               }
             seen[*keyp] = keyp[1] + 1;
           }
+
+      /* add solvables key if needed */
        if (entry < 0 && data->end != data->start)
         {
-         *sp++ = solvkeyid;
-         newschema = 1;
+         *sp++ = solvkeyid;    /* always last in schema */
+         neednewschema = 1;
         }
+
+      /* commit schema */
        *sp = 0;
-      if (newschema)
+      if (neednewschema)
          /* Ideally we'd like to sort the new schema here, to ensure
-          schema equality independend of the ordering.  We can't do that
-          yet.  For once see below (old ids need to come before new ids).
-          An additional difficulty is that we also need to move
-          the values with the keys.  */
+          schema equality independend of the ordering. */
         schemaid = repodata_schema2id(data, schema, 1);
        else
-       schemaid = oldschema;
+       schemaid = oldschemaid;
+
+      if (entry < 0)
+       {
+         data->mainschemaoffsets = solv_calloc(sp - schema, sizeof(Id));
+         data->mainschema = schemaid;
+       }
+
+      /* find offsets in old incore data */
+      if (oldschemaid)
+       {
+         Id *lastneeded = 0;
+         for (sp = data->schemadata + data->schemata[oldschemaid]; *sp; sp++)
+           if (seen[*sp] == -1)
+             lastneeded = sp + 1;
+         if (lastneeded)
+           {
+             if (!oldincoreoffs)
+               oldincoreoffs = solv_malloc2(data->nkeys, 2 * sizeof(Offset));
+             for (sp = data->schemadata + data->schemata[oldschemaid]; sp != lastneeded; sp++)
+               {
+                 /* Skip the data associated with this old key.  */
+                 key = data->keys + *sp;
+                 ndp = dp;
+                 if (key->storage == KEY_STORAGE_VERTICAL_OFFSET)
+                   {
+                     ndp = data_skip(ndp, REPOKEY_TYPE_ID);
+                     ndp = data_skip(ndp, REPOKEY_TYPE_ID);
+                   }
+                 else if (key->storage == KEY_STORAGE_INCORE)
+                   ndp = data_skip_key(data, ndp, key);
+                 oldincoreoffs[*sp * 2] = dp - data->incoredata;
+                 oldincoreoffs[*sp * 2 + 1] = ndp - dp;
+                 dp = ndp;
+               }
+           }
+       }
  
+      /* just copy over the complete old entry (including the schemaid) if there was no new data */
+      if (entry >= 0 && !neednewschema && oldschemaid && (!data->attrs || !data->attrs[entry]) && dp)
+       {
+         ndp = data->incoredata + data->incoreoffset[entry];
+         data->incoreoffset[entry] = newincore.len;
+         data_addblob(&newincore, ndp, dp - ndp);
+         goto entrydone;
+       }
  
        /* Now create data blob.  We walk through the (possibly new) schema
          and either copy over old data, or insert the new.  */
-      /* XXX Here we rely on the fact that the (new) schema has the form
-        o1 o2 o3 o4 ... | n1 n2 n3 ...
-        (oX being the old keyids (possibly overwritten), and nX being
-         the new keyids).  This rules out sorting the keyids in order
-        to ensure a small schema count.  */
        if (entry >= 0)
          data->incoreoffset[entry] = newincore.len;
        data_addid(&newincore, schemaid);
-      if (entry == -1)
-       {
-         data->mainschema = schemaid;
-         data->mainschemaoffsets = solv_calloc(sp - schema, sizeof(Id));
-       }
-      keypstart = data->schemadata + data->schemata[schemaid];
-      for (keyp = keypstart; *keyp; keyp++)
+
+      /* we don't use a pointer to the schemadata here as repodata_serialize_key
+       * may call repodata_schema2id() which might realloc our schemadata */
+      for (schemaidx = data->schemata[schemaid]; (keyid = data->schemadata[schemaidx]) != 0; schemaidx++)
         {
-         if (entry == -1)
-           data->mainschemaoffsets[keyp - keypstart] = newincore.len;
-         if (*keyp == solvkeyid)
-           {
-             /* add flexarray entry count */
-             data_addid(&newincore, data->end - data->start);
-             break;
-           }
-         key = data->keys + *keyp;
-#if 0
-         fprintf(stderr, "internalize %d(%d):%s:%s\n", entry, entry + data->start, pool_id2str(data->repo->pool, key->name), pool_id2str(data->repo->pool, key->type));
-#endif
-         ndp = dp;
-         if (oldcount)
+         if (entry < 0)
             {
-             /* Skip the data associated with this old key.  */
-             if (key->storage == KEY_STORAGE_VERTICAL_OFFSET)
+             data->mainschemaoffsets[schemaidx - data->schemata[schemaid]] = newincore.len;
+             if (keyid == solvkeyid)
                 {
-                 ndp = data_skip(dp, REPOKEY_TYPE_ID);
-                 ndp = data_skip(ndp, REPOKEY_TYPE_ID);
+                 /* add flexarray entry count */
+                 data_addid(&newincore, data->end - data->start);
+                 break;        /* always the last entry */
                 }
-             else if (key->storage == KEY_STORAGE_INCORE)
-               ndp = data_skip_key(data, dp, key);
-             oldcount--;
             }
-         if (seen[*keyp] == -1)
+         if (seen[keyid] == -1)
             {
-             /* If this key was an old one _and_ was not overwritten with
-                a different value copy over the old value (we skipped it
-                above).  */
-             if (dp != ndp)
-               data_addblob(&newincore, dp, ndp - dp);
-             seen[*keyp] = 0;
+             if (oldincoreoffs[keyid * 2 + 1])
+               data_addblob(&newincore, data->incoredata + oldincoreoffs[keyid * 2], oldincoreoffs[keyid * 2 + 1]);
             }
-         else if (seen[*keyp])
+         else if (seen[keyid])
+           repodata_serialize_key(data, &newincore, &newvincore, schema, data->keys + keyid, seen[keyid] - 1);
+       }
+
+entrydone:
+      /* free memory */
+      if (entry >= 0 && data->attrs)
+       {
+         if (data->attrs[entry])
+           data->attrs[entry] = solv_free(data->attrs[entry]);
+         if (entry && entry % 4096 == 0 && data->nxattrs <= 2 && entry + 64 < nentry)
             {
-             /* Otherwise we have a new value.  Parse it into the internal
-                form.  */
-             repodata_serialize_key(data, &newincore, &newvincore,
-                                    schema, key, seen[*keyp] - 1);
+             compact_attrdata(data, entry + 1, nentry);        /* try to free some memory */
+#if 0
+             printf("  attr data: %d K\n", data->attrdatalen / 1024);
+             printf("  attrid data: %d K\n", data->attriddatalen / (1024 / 4));
+             printf("  incore data: %d K\n", newincore.len / 1024);
+             printf("  sum: %d K\n", (newincore.len + data->attrdatalen + data->attriddatalen * 4) / 1024);
+             /* malloc_stats(); */
+#endif
             }
-         dp = ndp;
         }
-      if (entry >= 0 && data->attrs && data->attrs[entry])
-       data->attrs[entry] = solv_free(data->attrs[entry]);
      }
    /* free all xattrs */
    for (entry = 0; entry < data->nxattrs; entry++)
@@ -3179,6 +3548,8 @@ fprintf(stderr, "schemadata %p\n", data->schemadata);
    data->lastdatalen = 0;
    solv_free(schema);
    solv_free(seen);
+  solv_free(keylink);
+  solv_free(oldincoreoffs);
    repodata_free_schemahash(data);
  
    solv_free(data->incoredata);
@@ -3186,7 +3557,6 @@ fprintf(stderr, "schemadata %p\n", data->schemadata);
    data->incoredatalen = newincore.len;
    data->incoredatafree = 0;
  
-  solv_free(data->vincore);
    data->vincore = newvincore.buf;
    data->vincorelen = newvincore.len;
  
@@ -3197,6 +3567,10 @@ fprintf(stderr, "schemadata %p\n", data->schemadata);
    data->attrdatalen = 0;
    data->attriddatalen = 0;
    data->attrnum64datalen = 0;
+#if 0
+  printf("repodata_internalize %d done\n", data->repodataid);
+  printf("  incore data: %d K\n", data->incoredatalen / 1024);
+#endif
  }
  
  void
@@ -3216,6 +3590,7 @@ repodata_load_stub(Repodata *data)
    Pool *pool = repo->pool;
    int r, i;
    struct _Pool_tmpspace oldtmpspace;
+  Datapos oldpos;
  
    if (!pool->loadcallback)
      {
@@ -3224,21 +3599,53 @@ repodata_load_stub(Repodata *data)
      }
    data->state = REPODATA_LOADING;
  
-  /* save tmp space */
+  /* save tmp space and pos */
    oldtmpspace = pool->tmpspace;
    memset(&pool->tmpspace, 0, sizeof(pool->tmpspace));
+  oldpos = pool->pos;
  
    r = pool->loadcallback(pool, data, pool->loadcallbackdata);
  
-  /* restore tmp space */
+  /* restore tmp space and pos */
    for (i = 0; i < POOL_TMPSPACEBUF; i++)
      solv_free(pool->tmpspace.buf[i]);
    pool->tmpspace = oldtmpspace;
+  if (r && oldpos.repo == repo && oldpos.repodataid == data->repodataid)
+    memset(&oldpos, 0, sizeof(oldpos));
+  pool->pos = oldpos;
  
    data->state = r ? REPODATA_AVAILABLE : REPODATA_ERROR;
  }
  
-void
+static inline void
+repodata_add_stubkey(Repodata *data, Id keyname, Id keytype)
+{
+  Repokey xkey;
+
+  xkey.name = keyname;
+  xkey.type = keytype;
+  xkey.storage = KEY_STORAGE_INCORE;
+  xkey.size = 0;
+  repodata_key2id(data, &xkey, 1);
+}
+
+static Repodata *
+repodata_add_stub(Repodata **datap)
+{
+  Repodata *data = *datap;
+  Repo *repo = data->repo;
+  Id repodataid = data - repo->repodata;
+  Repodata *sdata = repo_add_repodata(repo, 0);
+  data = repo->repodata + repodataid;
+  if (data->end > data->start)
+    repodata_extend_block(sdata, data->start, data->end - data->start);
+  sdata->state = REPODATA_STUB;
+  sdata->loadcallback = repodata_load_stub;
+  *datap = data;
+  return sdata;
+}
+
+Repodata *
  repodata_create_stubs(Repodata *data)
  {
    Repo *repo = data->repo;
@@ -3248,38 +3655,26 @@ repodata_create_stubs(Repodata *data)
    Dataiterator di;
    Id xkeyname = 0;
    int i, cnt = 0;
-  int repodataid;
-  int datastart, dataend;
  
-  repodataid = data - repo->repodata;
-  datastart = data->start;
-  dataend = data->end;
    dataiterator_init(&di, pool, repo, SOLVID_META, REPOSITORY_EXTERNAL, 0, 0);
    while (dataiterator_step(&di))
-    {
-      if (di.data - repo->repodata != repodataid)
-       continue;
+    if (di.data == data)
        cnt++;
-    }
    dataiterator_free(&di);
    if (!cnt)
-    return;
+    return data;
    stubdataids = solv_calloc(cnt, sizeof(*stubdataids));
    for (i = 0; i < cnt; i++)
      {
-      sdata = repo_add_repodata(repo, 0);
-      if (dataend > datastart)
-        repodata_extend_block(sdata, datastart, dataend - datastart);
+      sdata = repodata_add_stub(&data);
        stubdataids[i] = sdata - repo->repodata;
-      sdata->state = REPODATA_STUB;
-      sdata->loadcallback = repodata_load_stub;
      }
    i = 0;
    dataiterator_init(&di, pool, repo, SOLVID_META, REPOSITORY_EXTERNAL, 0, 0);
    sdata = 0;
    while (dataiterator_step(&di))
      {
-      if (di.data - repo->repodata != repodataid)
+      if (di.data != data)
         continue;
        if (di.key->name == REPOSITORY_EXTERNAL && !di.nparents)
         {
@@ -3288,55 +3683,26 @@ repodata_create_stubs(Repodata *data)
           xkeyname = 0;
           continue;
         }
-      switch (di.key->type)
+      repodata_set_kv(sdata, SOLVID_META, di.key->name, di.key->type, &di.kv);
+      if (di.key->name == REPOSITORY_KEYS && di.key->type == REPOKEY_TYPE_IDARRAY)
         {
-        case REPOKEY_TYPE_ID:
-         repodata_set_id(sdata, SOLVID_META, di.key->name, di.kv.id);
-         break;
-       case REPOKEY_TYPE_CONSTANTID:
-         repodata_set_constantid(sdata, SOLVID_META, di.key->name, di.kv.id);
-         break;
-       case REPOKEY_TYPE_STR:
-         repodata_set_str(sdata, SOLVID_META, di.key->name, di.kv.str);
-         break;
-       case REPOKEY_TYPE_VOID:
-         repodata_set_void(sdata, SOLVID_META, di.key->name);
-         break;
-       case REPOKEY_TYPE_NUM:
-         repodata_set_num(sdata, SOLVID_META, di.key->name, SOLV_KV_NUM64(&di.kv));
-         break;
-       case REPOKEY_TYPE_MD5:
-       case REPOKEY_TYPE_SHA1:
-       case REPOKEY_TYPE_SHA256:
-         repodata_set_bin_checksum(sdata, SOLVID_META, di.key->name, di.key->type, (const unsigned char *)di.kv.str);
-         break;
-       case REPOKEY_TYPE_IDARRAY:
-         repodata_add_idarray(sdata, SOLVID_META, di.key->name, di.kv.id);
-         if (di.key->name == REPOSITORY_KEYS)
+         if (!xkeyname)
             {
-             Repokey xkey;
-
-             if (!xkeyname)
-               {
-                 if (!di.kv.eof)
-                   xkeyname = di.kv.id;
-                 continue;
-               }
-             xkey.name = xkeyname;
-              xkey.type = di.kv.id;
-              xkey.storage = KEY_STORAGE_INCORE;
-              xkey.size = 0;
-              repodata_key2id(sdata, &xkey, 1);
-              xkeyname = 0;
+             if (!di.kv.eof)
+               xkeyname = di.kv.id;
+           }
+         else
+           {
+             repodata_add_stubkey(sdata, xkeyname, di.kv.id);
+             xkeyname = 0;
             }
-       default:
-         break;
         }
      }
    dataiterator_free(&di);
    for (i = 0; i < cnt; i++)
      repodata_internalize(repo->repodata + stubdataids[i]);
    solv_free(stubdataids);
+  return data;
  }
  
  unsigned int
@@ -3345,6 +3711,3 @@ repodata_memused(Repodata *data)
    return data->incoredatalen + data->vincorelen;
  }
  
-/*
-vim:cinoptions={.5s,g0,p5,t0,(0,^-0.5s,n-0.5s:tw=78:cindent:sw=4:
-*/