Imported Upstream version 0.7.2

[platform/upstream/libsolv.git] / ext / repo_rpmmd.c
diff --git a/ext/repo_rpmmd.c b/ext/repo_rpmmd.c

index 668c78d..9bb50a0 100644 (file)
--- a/ext/repo_rpmmd.c
+++ b/ext/repo_rpmmd.c
@@ -6,12 +6,9 @@
   */
  
  #include <sys/types.h>
-#include <limits.h>
-#include <fcntl.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
-#include <expat.h>
  
  #include "pool.h"
  #include "repo.h"
@@ -19,7 +16,11 @@
  #include "tools_util.h"
  #include "repo_rpmmd.h"
  #include "chksum.h"
-
+#include "solv_xmlparser.h"
+#ifdef ENABLE_COMPLEX_DEPS
+#include "pool_parserpmrichdep.h"
+#endif
+#include "repodata_diskusage.h"
  
  enum state {
    STATE_START,
@@ -30,7 +31,7 @@ enum state {
    STATE_ARCH,
    STATE_VERSION,
  
-  // package rpm-md
+  /* package rpm-md */
    STATE_LOCATION,
    STATE_CHECKSUM,
    STATE_RPM_GROUP,
@@ -64,7 +65,7 @@ enum state {
    STATE_AFFECTSPKG,
    STATE_REBOOTNEEDED,
  
-  // pattern attributes
+  /* pattern attributes */
    STATE_CATEGORY, /* pattern and patches */
    STATE_ORDER,
    STATE_INCLUDES,
@@ -78,18 +79,12 @@ enum state {
    STATE_INSTALL_TIME,
  
    /* product */
-  STATE_SHORTNAME,
-  STATE_DISTNAME, // obsolete
-  STATE_DISTEDITION, // obsolete
-  STATE_SOURCE,
-  STATE_TYPE,
    STATE_RELNOTESURL,
    STATE_UPDATEURL,
    STATE_OPTIONALURL,
    STATE_FLAG,
  
-  /* rpm-md dependencies inside the
-     format tag */
+  /* rpm-md dependencies inside the format tag */
    STATE_PROVIDES,
    STATE_REQUIRES,
    STATE_OBSOLETES,
@@ -114,22 +109,19 @@ enum state {
  
    STATE_FILE,
  
-  // general
-  NUMSTATES
-};
+  STATE_CHANGELOG,
  
-struct stateswitch {
-  enum state from;
-  char *ename;
-  enum state to;
-  int docontent;
+  /* general */
+  NUMSTATES
  };
  
-static struct stateswitch stateswitches[] = {
-  /** fake tag used to enclose 2 different xml files in one **/
+static struct solv_xmlparser_element stateswitches[] = {
+  /** fake tag used to enclose multiple xml files in one **/
    { STATE_START,       "rpmmd",           STATE_START,    0 },
  
-  /** tags for different package data, we just ignore the tag **/
+  /** tags for different package data, just ignore them **/
+  { STATE_START,       "patterns",        STATE_START,    0 },
+  { STATE_START,       "products",        STATE_START,    0 },
    { STATE_START,       "metadata",        STATE_START,    0 },
    { STATE_START,       "otherdata",       STATE_START,    0 },
    { STATE_START,       "filelists",       STATE_START,    0 },
@@ -141,11 +133,13 @@ static struct stateswitch stateswitches[] = {
    { STATE_START,       "patch",           STATE_SOLVABLE, 0 },
    { STATE_START,       "package",         STATE_SOLVABLE, 0 },
  
+  { STATE_SOLVABLE,    "format",          STATE_SOLVABLE, 0 },
+
    { STATE_SOLVABLE,    "name",            STATE_NAME, 1 },
    { STATE_SOLVABLE,    "arch",            STATE_ARCH, 1 },
    { STATE_SOLVABLE,    "version",         STATE_VERSION, 0 },
  
-  // package attributes rpm-md
+  /* package attributes rpm-md */
    { STATE_SOLVABLE,    "location",        STATE_LOCATION, 0 },
    { STATE_SOLVABLE,    "checksum",        STATE_CHECKSUM, 1 },
  
@@ -156,8 +150,6 @@ static struct stateswitch stateswitches[] = {
    { STATE_SOLVABLE,    "distribution",    STATE_DISTRIBUTION, 1 },
    { STATE_SOLVABLE,    "url",             STATE_URL,          1 },
    { STATE_SOLVABLE,    "packager",        STATE_PACKAGER,     1 },
-  //{ STATE_SOLVABLE,    "???",         STATE_INSNOTIFY, 1 },
-  //{ STATE_SOLVABLE,    "??",     STATE_DELNOTIFY, 1 },
    { STATE_SOLVABLE,    "vendor",          STATE_VENDOR,       1 },
    { STATE_SOLVABLE,    "size",            STATE_SIZE,         0 },
    { STATE_SOLVABLE,    "archive-size",    STATE_DOWNLOADSIZE, 1 },
@@ -170,7 +162,7 @@ static struct stateswitch stateswitches[] = {
    { STATE_SOLVABLE,    "keyword",         STATE_KEYWORD,      1 },
    { STATE_SOLVABLE,    "diskusage",       STATE_DISKUSAGE,    0 },
  
-  // pattern attribute
+  /* pattern attribute */
    { STATE_SOLVABLE,    "script",          STATE_SCRIPT,        1 },
    { STATE_SOLVABLE,    "icon",            STATE_ICON,          1 },
    { STATE_SOLVABLE,    "uservisible",     STATE_USERVISIBLE,   1 },
@@ -184,27 +176,28 @@ static struct stateswitch stateswitches[] = {
    /* product attributes */
    /* note the product type is an attribute */
    { STATE_SOLVABLE,    "release-notes-url", STATE_RELNOTESURL, 1 },
-  { STATE_SOLVABLE,    "update-url",        STATE_UPDATEURL,   1 },
-  { STATE_SOLVABLE,    "optional-url",      STATE_OPTIONALURL, 1 },
-  { STATE_SOLVABLE,    "flag",              STATE_FLAG,        1 },
+  { STATE_SOLVABLE,    "update-url",      STATE_UPDATEURL,   1 },
+  { STATE_SOLVABLE,    "optional-url",    STATE_OPTIONALURL, 1 },
+  { STATE_SOLVABLE,    "flag",            STATE_FLAG,        1 },
  
-  { STATE_SOLVABLE,      "rpm:vendor",      STATE_VENDOR,      1 },
-  { STATE_SOLVABLE,      "rpm:group",       STATE_RPM_GROUP,   1 },
-  { STATE_SOLVABLE,      "rpm:license",     STATE_RPM_LICENSE, 1 },
+  { STATE_SOLVABLE,    "rpm:vendor",      STATE_VENDOR,      1 },
+  { STATE_SOLVABLE,    "rpm:group",       STATE_RPM_GROUP,   1 },
+  { STATE_SOLVABLE,    "rpm:license",     STATE_RPM_LICENSE, 1 },
  
    /* rpm-md dependencies */
-  { STATE_SOLVABLE,      "rpm:provides",    STATE_PROVIDES,     0 },
-  { STATE_SOLVABLE,      "rpm:requires",    STATE_REQUIRES,     0 },
-  { STATE_SOLVABLE,      "rpm:obsoletes",   STATE_OBSOLETES,    0 },
-  { STATE_SOLVABLE,      "rpm:conflicts",   STATE_CONFLICTS,    0 },
-  { STATE_SOLVABLE,      "rpm:recommends",  STATE_RECOMMENDS ,  0 },
-  { STATE_SOLVABLE,      "rpm:supplements", STATE_SUPPLEMENTS,  0 },
-  { STATE_SOLVABLE,      "rpm:suggests",    STATE_SUGGESTS,     0 },
-  { STATE_SOLVABLE,      "rpm:enhances",    STATE_ENHANCES,     0 },
-  { STATE_SOLVABLE,      "rpm:freshens",    STATE_FRESHENS,     0 },
-  { STATE_SOLVABLE,      "rpm:sourcerpm",   STATE_SOURCERPM,    1 },
-  { STATE_SOLVABLE,      "rpm:header-range", STATE_HEADERRANGE, 0 },
-  { STATE_SOLVABLE,      "file",            STATE_FILE, 1 },
+  { STATE_SOLVABLE,    "rpm:provides",    STATE_PROVIDES,     0 },
+  { STATE_SOLVABLE,    "rpm:requires",    STATE_REQUIRES,     0 },
+  { STATE_SOLVABLE,    "rpm:obsoletes",   STATE_OBSOLETES,    0 },
+  { STATE_SOLVABLE,    "rpm:conflicts",   STATE_CONFLICTS,    0 },
+  { STATE_SOLVABLE,    "rpm:recommends",  STATE_RECOMMENDS ,  0 },
+  { STATE_SOLVABLE,    "rpm:supplements", STATE_SUPPLEMENTS,  0 },
+  { STATE_SOLVABLE,    "rpm:suggests",    STATE_SUGGESTS,     0 },
+  { STATE_SOLVABLE,    "rpm:enhances",    STATE_ENHANCES,     0 },
+  { STATE_SOLVABLE,    "rpm:freshens",    STATE_FRESHENS,     0 },
+  { STATE_SOLVABLE,    "rpm:sourcerpm",   STATE_SOURCERPM,    1 },
+  { STATE_SOLVABLE,    "rpm:header-range", STATE_HEADERRANGE, 0 },
+  { STATE_SOLVABLE,    "file",            STATE_FILE, 1 },
+  { STATE_SOLVABLE,    "changelog",       STATE_CHANGELOG, 1 },
  
     /* extended Novell/SUSE diskusage attributes (susedata.xml) */
    { STATE_DISKUSAGE,   "dirs",            STATE_DIRS,         0 },
@@ -226,35 +219,22 @@ static struct stateswitch stateswitches[] = {
    { NUMSTATES}
  };
  
-/* maxmum initial size of
-   the checksum cache */
-#define MAX_CSCACHE 32768
-#define CSREALLOC_STEP 1024
-
  struct parsedata {
+  int ret;
    Pool *pool;
    Repo *repo;
    Repodata *data;
    char *kind;
-  int depth;
-  enum state state;
-  int statedepth;
-  char *content;
-  int lcontent;
-  int acontent;
-  int docontent;
    Solvable *solvable;
    Offset freshens;
-  struct stateswitch *swtab[NUMSTATES];
-  enum state sbtab[NUMSTATES];
+
+  struct solv_xmlparser xmlp;
    struct joindata jd;
    /* temporal to store attribute tag language */
    const char *tmplang;
    Id chksumtype;
    Id handle;
-  XML_Parser *parser;
-  Id (*dirs)[3]; // dirid, size, nfiles
-  int ndirs;
+  Queue diskusageq;
    const char *language;                        /* default language */
    Id langcache[ID_NUM_INTERNAL];       /* cache for the default language */
  
@@ -262,12 +242,17 @@ struct parsedata {
    char *lastdirstr;
    int lastdirstrl;
  
-  /** Hash to maps checksums to solv */
-  Stringpool cspool;
-  /** Cache of known checksums to solvable id */
-  Id *cscache;
-  /* the current longest index in the table */
-  int ncscache;
+  Id changelog_handle;
+
+  int extending;                       /* are we extending an existing solvable? */
+  int first;                           /* first solvable we added */
+  int cshash_filled;                   /* hash is filled with data */
+
+  Hashtable cshash;                    /* checksum hash -> offset into csdata */
+  Hashval cshashm;                     /* hash mask */
+  int ncshash;                         /* entries used */
+  unsigned char *csdata;               /* [len, checksum, id] */
+  int ncsdata;                         /* used bytes */
  };
  
  static Id
@@ -288,74 +273,6 @@ langtag(struct parsedata *pd, Id tag, const char *language)
    return pd->langcache[tag];
  }
  
-static int
-id3_cmp (const void *v1, const void *v2, void *dp)
-{
-  Id *i1 = (Id*)v1;
-  Id *i2 = (Id*)v2;
-  return i1[0] - i2[0];
-}
-
-static void
-commit_diskusage (struct parsedata *pd, unsigned handle)
-{
-  unsigned i;
-  Dirpool *dp = &pd->data->dirpool;
-  /* Now sort in dirid order.  This ensures that parents come before
-     their children.  */
-  if (pd->ndirs > 1)
-    solv_sort(pd->dirs, pd->ndirs, sizeof (pd->dirs[0]), id3_cmp, 0);
-  /* Substract leaf numbers from all parents to make the numbers
-     non-cumulative.  This must be done post-order (i.e. all leafs
-     adjusted before parents).  We ensure this by starting at the end of
-     the array moving to the start, hence seeing leafs before parents.  */
-  for (i = pd->ndirs; i--;)
-    {
-      unsigned p = dirpool_parent(dp, pd->dirs[i][0]);
-      unsigned j = i;
-      for (; p; p = dirpool_parent(dp, p))
-        {
-          for (; j--;)
-           if (pd->dirs[j][0] == p)
-             break;
-         if (j < pd->ndirs)
-           {
-             if (pd->dirs[j][1] < pd->dirs[i][1])
-               pd->dirs[j][1] = 0;
-             else
-               pd->dirs[j][1] -= pd->dirs[i][1];
-             if (pd->dirs[j][2] < pd->dirs[i][2])
-               pd->dirs[j][2] = 0;
-             else
-               pd->dirs[j][2] -= pd->dirs[i][2];
-           }
-         else
-           /* Haven't found this parent in the list, look further if
-              we maybe find the parents parent.  */
-           j = i;
-       }
-    }
-#if 0
-  char sbuf[1024];
-  char *buf = sbuf;
-  unsigned slen = sizeof (sbuf);
-  for (i = 0; i < pd->ndirs; i++)
-    {
-      dir2str (attr, pd->dirs[i][0], &buf, &slen);
-      fprintf (stderr, "have dir %d %d %d %s\n", pd->dirs[i][0], pd->dirs[i][1], pd->dirs[i][2], buf);
-    }
-  if (buf != sbuf)
-    free (buf);
-#endif
-  for (i = 0; i < pd->ndirs; i++)
-    if (pd->dirs[i][1] || pd->dirs[i][2])
-      {
-       repodata_add_dirnumnum(pd->data, handle, SOLVABLE_DISKUSAGE, pd->dirs[i][0], pd->dirs[i][1], pd->dirs[i][2]);
-      }
-  pd->ndirs = 0;
-}
-
-
  /*
   * makeevr_atts
   * parse 'epoch', 'ver' and 'rel', return evr Id
@@ -366,7 +283,7 @@ static Id
  makeevr_atts(Pool *pool, struct parsedata *pd, const char **atts)
  {
    const char *e, *v, *r, *v2;
-  char *c;
+  char *c, *space;
    int l;
  
    e = v = r = 0;
@@ -379,7 +296,7 @@ makeevr_atts(Pool *pool, struct parsedata *pd, const char **atts)
        else if (!strcmp(*atts, "rel"))
         r = atts[1];
      }
-  if (e && !strcmp(e, "0"))
+  if (e && (!*e || !strcmp(e, "0")))
      e = 0;
    if (v && !e)
      {
@@ -395,12 +312,7 @@ makeevr_atts(Pool *pool, struct parsedata *pd, const char **atts)
      l += strlen(v);
    if (r)
      l += strlen(r) + 1;
-  if (l > pd->acontent)
-    {
-      pd->content = solv_realloc(pd->content, l + 256);
-      pd->acontent = l + 256;
-    }
-  c = pd->content;
+  c = space = solv_xmlparser_contentspace(&pd->xmlp, l);
    if (e)
      {
        strcpy(c, e);
@@ -419,33 +331,12 @@ makeevr_atts(Pool *pool, struct parsedata *pd, const char **atts)
        c += strlen(c);
      }
    *c = 0;
-  if (!*pd->content)
+  if (!*space)
      return 0;
  #if 0
-  fprintf(stderr, "evr: %s\n", pd->content);
+  fprintf(stderr, "evr: %s\n", space);
  #endif
-  return pool_str2id(pool, pd->content, 1);
-}
-
-
-/*
- * find_attr
- * find value for xml attribute
- * I: txt, name of attribute
- * I: atts, list of key/value attributes
- * O: pointer to value of matching key, or NULL
- *
- */
-
-static inline const char *
-find_attr(const char *txt, const char **atts)
-{
-  for (; *atts; atts += 2)
-    {
-      if (!strcmp(*atts, txt))
-        return atts[1];
-    }
-  return 0;
+  return pool_str2id(pool, space, 1);
  }
  
  
@@ -472,7 +363,7 @@ static char *flagtab[] = {
  static unsigned int
  adddep(Pool *pool, struct parsedata *pd, unsigned int olddeps, const char **atts, int isreq)
  {
-  Id id, name, marker;
+  Id id, marker;
    const char *n, *f, *k;
    const char **a;
  
@@ -496,16 +387,20 @@ adddep(Pool *pool, struct parsedata *pd, unsigned int olddeps, const char **atts
    if (k)
      {
        int l = strlen(k) + 1 + strlen(n) + 1;
-      if (l > pd->acontent)
-       {
-         pd->content = solv_realloc(pd->content, l + 256);
-         pd->acontent = l + 256;
-       }
-      sprintf(pd->content, "%s:%s", k, n);
-      name = pool_str2id(pool, pd->content, 1);
+      char *space = solv_xmlparser_contentspace(&pd->xmlp, l);
+      sprintf(space, "%s:%s", k, n);
+      id = pool_str2id(pool, space, 1);
      }
+#ifdef ENABLE_COMPLEX_DEPS
+  else if (!f && n[0] == '(')
+    {
+      id = pool_parserpmrichdep(pool, n);
+      if (!id)
+       return olddeps;
+    }
+#endif
    else
-    name = pool_str2id(pool, (char *)n, 1);
+    id = pool_str2id(pool, (char *)n, 1);
    if (f)
      {
        Id evr = makeevr_atts(pool, pd, atts);
@@ -514,12 +409,10 @@ adddep(Pool *pool, struct parsedata *pd, unsigned int olddeps, const char **atts
         if (!strcmp(f, flagtab[flags]))
           break;
        flags = flags < 6 ? flags + 1 : 0;
-      id = pool_rel2id(pool, name, evr, flags, 1);
+      id = pool_rel2id(pool, id, evr, flags, 1);
      }
-  else
-    id = name;
  #if 0
-  fprintf(stderr, "new dep %s%s%s\n", pool_id2str(pool, d), id2rel(pool, d), id2evr(pool, d));
+  fprintf(stderr, "new dep %s\n", pool_dep2str(pool, id));
  #endif
    return repo_addid_dep(pd->repo, olddeps, id, marker);
  }
@@ -574,53 +467,155 @@ set_description_author(Repodata *data, Id handle, char *str, struct parsedata *p
  }
  
  
-/*
- * set_sourcerpm
+/*-----------------------------------------------*/
+/* checksum hash functions
+ *
+ * used to look up a solvable with the checksum for solvable extension purposes.
   *
   */
  
  static void
-set_sourcerpm(Repodata *data, Solvable *s, Id handle, char *sourcerpm)
+init_cshash(struct parsedata *pd)
  {
-  const char *p, *sevr, *sarch, *name, *evr;
-  Pool *pool;
+}
  
-  p = strrchr(sourcerpm, '.');
-  if (!p || strcmp(p, ".rpm") != 0)
-    return;
-  p--;
-  while (p > sourcerpm && *p != '.')
-    p--;
-  if (*p != '.' || p == sourcerpm)
-    return;
-  sarch = p-- + 1;
-  while (p > sourcerpm && *p != '-')
-    p--;
-  if (*p != '-' || p == sourcerpm)
-    return;
-  p--;
-  while (p > sourcerpm && *p != '-')
-    p--;
-  if (*p != '-' || p == sourcerpm)
+static void
+free_cshash(struct parsedata *pd)
+{
+  pd->cshash = solv_free(pd->cshash);
+  pd->ncshash = 0;
+  pd->cshashm = 0;
+  pd->csdata = solv_free(pd->csdata);
+  pd->ncsdata = 0;
+}
+
+static inline Hashval
+hashkey(const unsigned char *key, int keyl)
+{
+  return key[0] << 24 | key[1] << 16 | key[2] << 8 | key[3];
+}
+
+static void
+rebuild_cshash(struct parsedata *pd)
+{
+  Hashval h, hh, hm;
+  Hashtable ht;
+  unsigned char *d, *de;
+
+  hm = pd->cshashm;
+#if 0
+  fprintf(stderr, "rebuild cshash with mask 0x%x\n", hm);
+#endif
+  solv_free(pd->cshash);
+  ht = pd->cshash = (Hashtable)solv_calloc(hm + 1, sizeof(Id));
+  d = pd->csdata;
+  de = d + pd->ncsdata;
+  while (d != de)
+    {
+      h = hashkey(d + 1, d[0] + 1) & hm;
+      hh = HASHCHAIN_START;
+      while (ht[h])
+       h = HASHCHAIN_NEXT(h, hh, hm);
+      ht[h] = d + 1 - pd->csdata;
+      d += 2 + d[0] + sizeof(Id);
+    }
+}
+
+static void
+put_in_cshash(struct parsedata *pd, const unsigned char *key, int keyl, Id id)
+{
+  Hashtable ht;
+  Hashval h, hh, hm;
+  unsigned char *d;
+
+  if (keyl < 4 || keyl > 256)
      return;
-  sevr = p + 1;
-  pool = s->repo->pool;
-  if (!strcmp(sarch, "src.rpm"))
-    repodata_set_constantid(data, handle, SOLVABLE_SOURCEARCH, ARCH_SRC);
-  else if (!strcmp(sarch, "nosrc.rpm"))
-    repodata_set_constantid(data, handle, SOLVABLE_SOURCEARCH, ARCH_NOSRC);
-  else
-    repodata_set_constantid(data, handle, SOLVABLE_SOURCEARCH, pool_strn2id(pool, sarch, strlen(sarch) - 4, 1));
-  evr = pool_id2str(pool, s->evr);
-  if (evr && !strncmp(sevr, evr, sarch - sevr - 1) && evr[sarch - sevr - 1] == 0)
-    repodata_set_void(data, handle, SOLVABLE_SOURCEEVR);
-  else
-    repodata_set_id(data, handle, SOLVABLE_SOURCEEVR, pool_strn2id(pool, sevr, sarch - sevr - 1, 1));
-  name = pool_id2str(pool, s->name);
-  if (name && !strncmp(sourcerpm, name, sevr - sourcerpm - 1) && name[sevr - sourcerpm - 1] == 0)
-    repodata_set_void(data, handle, SOLVABLE_SOURCENAME);
+  ht = pd->cshash;
+  hm = pd->cshashm;
+  h = hashkey(key, keyl) & hm;
+  hh = HASHCHAIN_START;
+  if (ht)
+    {
+      while (ht[h])
+       {
+         unsigned char *d = pd->csdata + ht[h];
+         if (d[-1] == keyl - 1 && !memcmp(key, d, keyl))
+           return;             /* XXX: first id wins... */
+         h = HASHCHAIN_NEXT(h, hh, hm);
+       }
+    }
+  /* a new entry. put in csdata */
+  pd->csdata = solv_extend(pd->csdata, pd->ncsdata, 1 + keyl + sizeof(Id), 1, 4095);
+  d = pd->csdata + pd->ncsdata;
+  d[0] = keyl - 1;
+  memcpy(d + 1, key, keyl);
+  memcpy(d + 1 + keyl, &id, sizeof(Id));
+  pd->ncsdata += 1 + keyl + sizeof(Id);
+  if ((Hashval)++pd->ncshash * 2 > hm)
+    {
+      pd->cshashm = pd->cshashm ? (2 * pd->cshashm + 1) : 4095;
+      rebuild_cshash(pd);
+    }
    else
-    repodata_set_id(data, handle, SOLVABLE_SOURCENAME, pool_strn2id(pool, sourcerpm, sevr - sourcerpm - 1, 1));
+    ht[h] = pd->ncsdata - (keyl + sizeof(Id));
+}
+
+static Id
+lookup_cshash(struct parsedata *pd, const unsigned char *key, int keyl)
+{
+  Hashtable ht;
+  Hashval h, hh, hm;
+
+  if (keyl < 4 || keyl > 256)
+    return 0;
+  ht = pd->cshash;
+  if (!ht)
+    return 0;
+  hm = pd->cshashm;
+  h = hashkey(key, keyl) & hm;
+  hh = HASHCHAIN_START;
+  while (ht[h])
+    {
+      unsigned char *d = pd->csdata + ht[h];
+      if (d[-1] == keyl - 1 && !memcmp(key, d, keyl))
+       {
+         Id id;
+         memcpy(&id, d + keyl, sizeof(Id));
+         return id;
+       }
+      h = HASHCHAIN_NEXT(h, hh, hm);
+    }
+  return 0;
+}
+
+static void
+fill_cshash_from_repo(struct parsedata *pd)
+{
+  Dataiterator di;
+  /* setup join data */
+  dataiterator_init(&di, pd->pool, pd->repo, 0, SOLVABLE_CHECKSUM, 0, 0);
+  while (dataiterator_step(&di))
+    put_in_cshash(pd, (const unsigned char *)di.kv.str, solv_chksum_len(di.key->type), di.solvid);
+  dataiterator_free(&di);
+}
+
+static void
+fill_cshash_from_new_solvables(struct parsedata *pd)
+{
+  Pool *pool = pd->pool;
+  int i, l;
+  KeyValue kv;
+  Repokey *key;
+
+  for (i = pd->first; i < pool->nsolvables; i++)
+    {
+      if (pool->solvables[i].repo != pd->repo)
+       continue;
+      if ((key = repodata_lookup_kv_uninternalized(pd->data, i, SOLVABLE_CHECKSUM, &kv)) == 0)
+       continue;
+      if ((l = solv_chksum_len(key->type)) != 0)
+       put_in_cshash(pd, (const unsigned char *)kv.str, l, i);
+    }
  }
  
  /*-----------------------------------------------*/
@@ -628,58 +623,22 @@ set_sourcerpm(Repodata *data, Solvable *s, Id handle, char *sourcerpm)
  
  /*
   * startElement
- * XML callback
- *
   */
  
-static void XMLCALL
-startElement(void *userData, const char *name, const char **atts)
+static void
+startElement(struct solv_xmlparser *xmlp, int state, const char *name, const char **atts)
  {
-  //fprintf(stderr,"+tag: %s\n", name);
-  struct parsedata *pd = userData;
+  struct parsedata *pd = xmlp->userdata;
    Pool *pool = pd->pool;
    Solvable *s = pd->solvable;
-  struct stateswitch *sw;
-  const char *str;
    Id handle = pd->handle;
+  const char *str;
    const char *pkgid;
  
-  // fprintf(stderr, "into %s, from %d, depth %d, statedepth %d\n", name, pd->state, pd->depth, pd->statedepth);
-
-  if (pd->depth != pd->statedepth)
-    {
-      pd->depth++;
-      return;
-    }
-
-  if (pd->state == STATE_START && !strcmp(name, "patterns"))
-    return;
-  if (pd->state == STATE_START && !strcmp(name, "products"))
-    return;
-  //if (pd->state == STATE_START && !strcmp(name, "metadata"))
-  //  return;
-  if (pd->state == STATE_SOLVABLE && !strcmp(name, "format"))
+  if (!s && state != STATE_SOLVABLE)
      return;
  
-  pd->depth++;
-  if (!pd->swtab[pd->state])
-    return;
-  for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++)
-    if (!strcmp(sw->ename, name))
-      break;
-  if (sw->from != pd->state)
-    {
-#if 0
-      fprintf(stderr, "into unknown: %s\n", name);
-#endif
-      return;
-    }
-  pd->state = sw->to;
-  pd->docontent = sw->docontent;
-  pd->statedepth = pd->depth;
-  pd->lcontent = 0;
-  *pd->content = 0;
-  switch(pd->state)
+  switch(state)
      {
      case STATE_SOLVABLE:
        pd->kind = 0;
@@ -706,30 +665,60 @@ startElement(void *userData, const char *name, const char **atts)
           a new solvable but just append the attributes to the existing
           one.
        */
-      if ((pkgid = find_attr("pkgid", atts)) != NULL)
+      pd->extending = 0;
+      if ((pkgid = solv_xmlparser_find_attr("pkgid", atts)) != NULL)
          {
-          // look at the checksum cache
-          Id index = stringpool_str2id(&pd->cspool, pkgid, 0);
-          if (!index || index >= pd->ncscache || !pd->cscache[index])
+         unsigned char chk[256];
+         int l;
+         const char *str = pkgid;
+         if (!pd->cshash_filled)
             {
-              fprintf(stderr, "error, the repository specifies extra information about package with checksum '%s', which does not exist in the repository.\n", pkgid);
-              exit(1);
+             pd->cshash_filled = 1;
+             fill_cshash_from_new_solvables(pd);
+           }
+         handle = 0;
+         /* convert into bin checksum */
+         l = solv_hex2bin(&str, chk, sizeof(chk));
+          /* look at the checksum cache */
+         if (l >= 4 && !pkgid[2 * l])
+           handle = lookup_cshash(pd, chk, l);
+#if 0
+         fprintf(stderr, "Lookup %s -> %d\n", pkgid, handle);
+#endif
+         if (!handle)
+           {
+              pool_debug(pool, SOLV_WARN, "the repository specifies extra information about package with checksum '%s', which does not exist in the repository.\n", pkgid);
+             pd->handle = 0;
+             pd->solvable = 0;
+             break;
             }
-         pd->solvable = pool_id2solvable(pool, pd->cscache[index]);
+         pd->extending = 1;
          }
-       else
+      else
          {
            /* this is a new package */
-          pd->solvable = pool_id2solvable(pool, repo_add_solvable(pd->repo));
+         handle = repo_add_solvable(pd->repo);
+         if (!pd->first)
+           pd->first = handle;
            pd->freshens = 0;
          }
-      pd->handle = pd->solvable - pool->solvables;
+      pd->handle = handle;
+      pd->solvable = pool_id2solvable(pool, handle);
+      if (pd->kind && pd->kind[1] == 'r')
+       {
+         /* products can have a type */
+         const char *type = solv_xmlparser_find_attr("type", atts);
+         if (type && *type)
+           repodata_set_str(pd->data, handle, PRODUCT_TYPE, type);
+       }
  #if 0
        fprintf(stderr, "package #%d\n", pd->solvable - pool->solvables);
  #endif
  
        break;
      case STATE_VERSION:
+      if (pd->extending && s->evr)
+       break;          /* ignore version tag repetition in extend data */
        s->evr = makeevr_atts(pool, pd, atts);
        break;
      case STATE_PROVIDES:
@@ -790,70 +779,68 @@ startElement(void *userData, const char *name, const char **atts)
      case STATE_SUMMARY:
      case STATE_CATEGORY:
      case STATE_DESCRIPTION:
-      pd->tmplang = join_dup(&pd->jd, find_attr("lang", atts));
+      pd->tmplang = join_dup(&pd->jd, solv_xmlparser_find_attr("lang", atts));
        break;
      case STATE_USERVISIBLE:
        repodata_set_void(pd->data, handle, SOLVABLE_ISVISIBLE);
        break;
      case STATE_INCLUDESENTRY:
-      {
-       const char *tmp = find_attr("pattern", atts);
-       if (tmp)
-         repodata_add_poolstr_array(pd->data, pd->handle, SOLVABLE_INCLUDES, join2(&pd->jd, "pattern", ":", tmp));
-        break;
-      }
+      str = solv_xmlparser_find_attr("pattern", atts);
+      if (str)
+       repodata_add_poolstr_array(pd->data, handle, SOLVABLE_INCLUDES, join2(&pd->jd, "pattern", ":", str));
+      break;
      case STATE_EXTENDSENTRY:
-      {
-       const char *tmp = find_attr("pattern", atts);
-       if (tmp)
-         repodata_add_poolstr_array(pd->data, pd->handle, SOLVABLE_EXTENDS, join2(&pd->jd, "pattern", ":", tmp));
-        break;
-      }
+      str = solv_xmlparser_find_attr("pattern", atts);
+      if (str)
+       repodata_add_poolstr_array(pd->data, handle, SOLVABLE_EXTENDS, join2(&pd->jd, "pattern", ":", str));
+      break;
      case STATE_LOCATION:
-      str = find_attr("href", atts);
+      str = solv_xmlparser_find_attr("href", atts);
        if (str)
-       repodata_set_location(pd->data, handle, 0, 0, str);
+       {
+         int medianr = 0;
+         const char *base = solv_xmlparser_find_attr("xml:base", atts);
+         if (base  && !strncmp(base, "media:", 6))
+           {
+             /* check for the media number in the fragment */
+             int l = strlen(base);
+             while (l && base[l - 1] >= '0' && base[l - 1] <= '9')
+               l--;
+             if (l && base[l - 1] == '#' && base[l])
+               medianr = atoi(base + l);
+           }
+         repodata_set_location(pd->data, handle, medianr, 0, str);
+         if (base)
+           repodata_set_poolstr(pd->data, handle, SOLVABLE_MEDIABASE, base);
+       }
        break;
      case STATE_CHECKSUM:
-      {
-       const char *tmp = find_attr("type", atts);
-       pd->chksumtype = tmp && *tmp ? solv_chksum_str2type(tmp) : 0;
-        if (!pd->chksumtype)
-         {
-           fprintf(stderr, "Unknown checksum type: %d: %s\n", (unsigned int)XML_GetCurrentLineNumber(*pd->parser), tmp ? tmp: "NULL");
-            exit(1);
-         }
-      }
+      str = solv_xmlparser_find_attr("type", atts);
+      pd->chksumtype = str && *str ? solv_chksum_str2type(str) : 0;
+      if (!pd->chksumtype)
+       pd->ret = pool_error(pool, -1, "line %d: unknown checksum type: %s", solv_xmlparser_lineno(xmlp), str ? str : "NULL");
        break;
      case STATE_TIME:
        {
          unsigned int t;
-        str = find_attr("build", atts);
+        str = solv_xmlparser_find_attr("build", atts);
          if (str && (t = atoi(str)) != 0)
            repodata_set_num(pd->data, handle, SOLVABLE_BUILDTIME, t);
         break;
        }
      case STATE_SIZE:
-      {
-        unsigned int k;
-        str = find_attr("installed", atts);
-       if (str && (k = atoi(str)) != 0)
-         repodata_set_num(pd->data, handle, SOLVABLE_INSTALLSIZE, (k + 1023) / 1024);
-       /* XXX the "package" attribute gives the size of the rpm file,
-          i.e. the download size.  Except on packman, there it seems to be
-          something else entirely, it has a value near to the other two
-          values, as if the rpm is uncompressed.  */
-        str = find_attr("package", atts);
-       if (str && (k = atoi(str)) != 0)
-         repodata_set_num(pd->data, handle, SOLVABLE_DOWNLOADSIZE, (k + 1023) / 1024);
-        break;
-      }
+      if ((str = solv_xmlparser_find_attr("installed", atts)) != 0)
+       repodata_set_num(pd->data, handle, SOLVABLE_INSTALLSIZE, strtoull(str, 0, 10));
+      if ((str = solv_xmlparser_find_attr("package", atts)) != 0)
+       repodata_set_num(pd->data, handle, SOLVABLE_DOWNLOADSIZE, strtoull(str, 0, 10));
+      break;
      case STATE_HEADERRANGE:
        {
          unsigned int end;
-        str = find_attr("end", atts);
+        str = solv_xmlparser_find_attr("end", atts);
         if (str && (end = atoi(str)) != 0)
           repodata_set_num(pd->data, handle, SOLVABLE_HEADEREND, end);
+       break;
        }
        /*
          <diskusage>
@@ -868,32 +855,51 @@ startElement(void *userData, const char *name, const char **atts)
        */
      case STATE_DISKUSAGE:
        {
-        /* Really, do nothing, wat for <dir> tag */
+        /* Really, do nothing, wait for <dir> tag */
          break;
        }
      case STATE_DIR:
        {
          long filesz = 0, filenum = 0;
-        unsigned dirid;
-        if ((str = find_attr("name", atts)) != 0)
-          dirid = repodata_str2dir(pd->data, str, 1);
-        else
-          {
-            fprintf( stderr, "<dir .../> tag without 'name' attribute, atts = %p, *atts = %p\n",
-                    (void *)atts, *atts);
+        Id did;
+
+        if ((str = solv_xmlparser_find_attr("name", atts)) == 0)
+         {
+           pd->ret = pool_error(pool, -1, "<dir .../> tag without 'name' attribute");
              break;
-          }
-        if ((str = find_attr("size", atts)) != 0)
+         }
+       if (*str != '/')
+         {
+           if (s->arch == ARCH_SRC || s->arch == ARCH_NOSRC)
+             str = "/usr/src";
+           else
+             {
+               int l = strlen(str) + 2;
+               char *space = solv_xmlparser_contentspace(xmlp, l);
+               space[0] = '/';
+               memcpy(space + 1, str, l - 1);
+               str = space;
+           }
+         }
+        did = repodata_str2dir(pd->data, str, 1);
+        if ((str = solv_xmlparser_find_attr("size", atts)) != 0)
            filesz = strtol(str, 0, 0);
-        if ((str = find_attr("count", atts)) != 0)
+        if ((str = solv_xmlparser_find_attr("count", atts)) != 0)
            filenum = strtol(str, 0, 0);
-        pd->dirs = solv_extend(pd->dirs, pd->ndirs, 1, sizeof(pd->dirs[0]), 31);
-        pd->dirs[pd->ndirs][0] = dirid;
-        pd->dirs[pd->ndirs][1] = filesz;
-        pd->dirs[pd->ndirs][2] = filenum;
-        pd->ndirs++;
+        if (filesz || filenum)
+          {
+            queue_push(&pd->diskusageq, did);
+            queue_push2(&pd->diskusageq, filesz, filenum);
+          }
          break;
        }
+    case STATE_CHANGELOG:
+      pd->changelog_handle = repodata_new_handle(pd->data);
+      if ((str = solv_xmlparser_find_attr("date", atts)) != 0)
+       repodata_set_num(pd->data, pd->changelog_handle, SOLVABLE_CHANGELOG_TIME, strtoull(str, 0, 10));
+      if ((str = solv_xmlparser_find_attr("author", atts)) != 0)
+       repodata_set_str(pd->data, pd->changelog_handle, SOLVABLE_CHANGELOG_AUTHOR, str);
+      break;
      default:
        break;
      }
@@ -902,15 +908,12 @@ startElement(void *userData, const char *name, const char **atts)
  
  /*
   * endElement
- * XML callback
- *
   */
  
-static void XMLCALL
-endElement(void *userData, const char *name)
+static void
+endElement(struct solv_xmlparser *xmlp, int state, char *content)
  {
-  //fprintf(stderr,"-tag: %s\n", name);
-  struct parsedata *pd = userData;
+  struct parsedata *pd = xmlp->userdata;
    Pool *pool = pd->pool;
    Solvable *s = pd->solvable;
    Repo *repo = pd->repo;
@@ -918,28 +921,17 @@ endElement(void *userData, const char *name)
    Id id;
    char *p;
  
-  if (pd->depth != pd->statedepth)
-    {
-      pd->depth--;
-      // printf("back from unknown %d %d %d\n", pd->state, pd->depth, pd->statedepth);
-      return;
-    }
-
-  /* ignore patterns & metadata */
-  if (pd->state == STATE_START && !strcmp(name, "patterns"))
-    return;
-  if (pd->state == STATE_START && !strcmp(name, "products"))
-    return;
-  //if (pd->state == STATE_START && !strcmp(name, "metadata"))
-  //  return;
-  if (pd->state == STATE_SOLVABLE && !strcmp(name, "format"))
+  if (!s)
      return;
  
-  pd->depth--;
-  pd->statedepth--;
-  switch (pd->state)
+  switch (state)
      {
      case STATE_SOLVABLE:
+      if (pd->extending)
+       {
+         pd->solvable = 0;
+         break;
+       }
        if (pd->kind && !s->name) /* add namespace in case of NULL name */
          s->name = pool_str2id(pool, join2(&pd->jd, pd->kind, ":", 0), 1);
        if (!s->arch)
@@ -948,191 +940,159 @@ endElement(void *userData, const char *name)
          s->evr = ID_EMPTY;     /* some patterns have this */
        if (s->name && s->arch != ARCH_SRC && s->arch != ARCH_NOSRC)
          s->provides = repo_addid_dep(repo, s->provides, pool_rel2id(pool, s->name, s->evr, REL_EQ, 1), 0);
-      s->supplements = repo_fix_supplements(repo, s->provides, s->supplements, pd->freshens);
-      s->conflicts = repo_fix_conflicts(repo, s->conflicts);
+      repo_rewrite_suse_deps(s, pd->freshens);
        pd->freshens = 0;
        pd->kind = 0;
+      pd->solvable = 0;
        break;
      case STATE_NAME:
        if (pd->kind)
-        s->name = pool_str2id(pool, join2(&pd->jd, pd->kind, ":", pd->content), 1);
+        s->name = pool_str2id(pool, join2(&pd->jd, pd->kind, ":", content), 1);
        else
-        s->name = pool_str2id(pool, pd->content, 1);
+        s->name = pool_str2id(pool, content, 1);
        break;
      case STATE_ARCH:
-      s->arch = pool_str2id(pool, pd->content, 1);
+      s->arch = pool_str2id(pool, content, 1);
        break;
      case STATE_VENDOR:
-      s->vendor = pool_str2id(pool, pd->content, 1);
+      s->vendor = pool_str2id(pool, content, 1);
        break;
      case STATE_RPM_GROUP:
-      repodata_set_poolstr(pd->data, handle, SOLVABLE_GROUP, pd->content);
+      repodata_set_poolstr(pd->data, handle, SOLVABLE_GROUP, content);
        break;
      case STATE_RPM_LICENSE:
-      repodata_set_poolstr(pd->data, handle, SOLVABLE_LICENSE, pd->content);
+      repodata_set_poolstr(pd->data, handle, SOLVABLE_LICENSE, content);
        break;
      case STATE_CHECKSUM:
        {
-        Id index;
-       
-        if (strlen(pd->content) != 2 * solv_chksum_len(pd->chksumtype))
-          {
-            fprintf(stderr, "Invalid checksum length: %d: for %s\n", (unsigned int)XML_GetCurrentLineNumber(*pd->parser), solv_chksum_type2str(pd->chksumtype));
-            exit(1);
-          }
-        repodata_set_checksum(pd->data, handle, SOLVABLE_CHECKSUM, pd->chksumtype, pd->content);
-        /* we save the checksum to solvable id relationship for extended
-           metadata */
-        index = stringpool_str2id(&pd->cspool, pd->content, 1 /* create it */);
-        if (index >= pd->ncscache)
+       unsigned char chk[256];
+       int l = solv_chksum_len(pd->chksumtype);
+       const char *str = content;
+       if (!l || l > sizeof(chk))
+         break;
+       if (solv_hex2bin(&str, chk, l) != l || content[2 * l])
            {
-            pd->cscache = solv_zextend(pd->cscache, pd->ncscache, index + 1 - pd->ncscache, sizeof(Id), 255);
-            pd->ncscache = index + 1;
+           pd->ret = pool_error(pool, -1, "line %u: invalid %s checksum", solv_xmlparser_lineno(xmlp), solv_chksum_type2str(pd->chksumtype));
+           break;
            }
-        /* add the checksum to the cache */
-        pd->cscache[index] = s - pool->solvables;
+        repodata_set_bin_checksum(pd->data, handle, SOLVABLE_CHECKSUM, pd->chksumtype, chk);
+       /* we save the checksum to solvable id relationship for extending metadata */
+       if (pd->cshash_filled)
+         put_in_cshash(pd, chk, l, s - pool->solvables);
          break;
        }
      case STATE_FILE:
-#if 0
-      id = pool_str2id(pool, pd->content, 1);
-      s->provides = repo_addid_dep(repo, s->provides, id, SOLVABLE_FILEMARKER);
-#endif
-      if ((p = strrchr(pd->content, '/')) != 0)
+      if ((p = strrchr(content, '/')) != 0)
         {
           *p++ = 0;
-         if (pd->lastdir && !strcmp(pd->lastdirstr, pd->content))
+         if (pd->lastdir && !strcmp(pd->lastdirstr, content))
             {
               id = pd->lastdir;
             }
           else
             {
-             int l;
-             id = repodata_str2dir(pd->data, pd->content, 1);
-             l = strlen(pd->content) + 1;
-             if (l > pd->lastdirstrl)
+             int l = p - content;
+             if (l + 1 > pd->lastdirstrl)      /* + 1 for the possible leading / we need to insert */
                 {
                   pd->lastdirstrl = l + 128;
                   pd->lastdirstr = solv_realloc(pd->lastdirstr, pd->lastdirstrl);
                 }
-             strcpy(pd->lastdirstr, pd->content);
+             if (content[0] != '/')
+               {
+                 pd->lastdirstr[0] = '/';
+                 memcpy(pd->lastdirstr + 1, content, l);
+                 id = repodata_str2dir(pd->data, pd->lastdirstr, 1);
+               }
+             else
+               id = repodata_str2dir(pd->data, content, 1);
               pd->lastdir = id;
+             memcpy(pd->lastdirstr, content, l);
             }
         }
        else
         {
-         p = pd->content;
-         id = 0;
+         p = content;
+         id = repodata_str2dir(pd->data, "/", 1);
         }
-      if (!id)
-       id = repodata_str2dir(pd->data, "/", 1);
        repodata_add_dirstr(pd->data, handle, SOLVABLE_FILELIST, id, p);
        break;
      case STATE_SUMMARY:
-      repodata_set_str(pd->data, handle, langtag(pd, SOLVABLE_SUMMARY, pd->tmplang), pd->content);
+      repodata_set_str(pd->data, handle, langtag(pd, SOLVABLE_SUMMARY, pd->tmplang), content);
        break;
      case STATE_DESCRIPTION:
-      set_description_author(pd->data, handle, pd->content, pd);
+      set_description_author(pd->data, handle, content, pd);
        break;
      case STATE_CATEGORY:
-      repodata_set_str(pd->data, handle, langtag(pd, SOLVABLE_CATEGORY, pd->tmplang), pd->content);
+      repodata_set_str(pd->data, handle, langtag(pd, SOLVABLE_CATEGORY, pd->tmplang), content);
        break;
      case STATE_DISTRIBUTION:
-        repodata_set_poolstr(pd->data, handle, SOLVABLE_DISTRIBUTION, pd->content);
+        repodata_set_poolstr(pd->data, handle, SOLVABLE_DISTRIBUTION, content);
          break;
      case STATE_URL:
-      if (pd->content[0])
-       repodata_set_str(pd->data, handle, SOLVABLE_URL, pd->content);
+      if (*content)
+       repodata_set_str(pd->data, handle, SOLVABLE_URL, content);
        break;
      case STATE_PACKAGER:
-      if (pd->content[0])
-       repodata_set_poolstr(pd->data, handle, SOLVABLE_PACKAGER, pd->content);
+      if (*content)
+       repodata_set_poolstr(pd->data, handle, SOLVABLE_PACKAGER, content);
        break;
      case STATE_SOURCERPM:
-      set_sourcerpm(pd->data, s, handle, pd->content);
+      if (*content)
+       repodata_set_sourcepkg(pd->data, handle, content);
        break;
      case STATE_RELNOTESURL:
-      if (pd->content[0])
+      if (*content)
          {
-          repodata_add_poolstr_array(pd->data, pd->handle, PRODUCT_URL, pd->content);
-          repodata_add_idarray(pd->data, pd->handle, PRODUCT_URL_TYPE, pool_str2id(pool, "releasenotes", 1));
+          repodata_add_poolstr_array(pd->data, handle, PRODUCT_URL, content);
+          repodata_add_idarray(pd->data, handle, PRODUCT_URL_TYPE, pool_str2id(pool, "releasenotes", 1));
          }
        break;
      case STATE_UPDATEURL:
-      if (pd->content[0])
+      if (*content)
          {
-          repodata_add_poolstr_array(pd->data, pd->handle, PRODUCT_URL, pd->content);
-          repodata_add_idarray(pd->data, pd->handle, PRODUCT_URL_TYPE, pool_str2id(pool, "update", 1));
+          repodata_add_poolstr_array(pd->data, handle, PRODUCT_URL, content);
+          repodata_add_idarray(pd->data, handle, PRODUCT_URL_TYPE, pool_str2id(pool, "update", 1));
          }
        break;
      case STATE_OPTIONALURL:
-      if (pd->content[0])
+      if (*content)
          {
-          repodata_add_poolstr_array(pd->data, pd->handle, PRODUCT_URL, pd->content);
-          repodata_add_idarray(pd->data, pd->handle, PRODUCT_URL_TYPE, pool_str2id(pool, "optional", 1));
+          repodata_add_poolstr_array(pd->data, handle, PRODUCT_URL, content);
+          repodata_add_idarray(pd->data, handle, PRODUCT_URL_TYPE, pool_str2id(pool, "optional", 1));
          }
        break;
      case STATE_FLAG:
-      if (pd->content[0])
-          repodata_set_poolstr(pd->data, handle, PRODUCT_FLAGS, pd->content);
+      if (*content)
+        repodata_add_poolstr_array(pd->data, handle, PRODUCT_FLAGS, content);
        break;
      case STATE_EULA:
-      if (pd->content[0])
-       repodata_set_str(pd->data, handle, langtag(pd, SOLVABLE_EULA, pd->tmplang), pd->content);
+      if (*content)
+       repodata_set_str(pd->data, handle, langtag(pd, SOLVABLE_EULA, pd->tmplang), content);
        break;
      case STATE_KEYWORD:
-      if (pd->content[0])
-        repodata_add_poolstr_array(pd->data, pd->handle, SOLVABLE_KEYWORDS, pd->content);
+      if (*content)
+        repodata_add_poolstr_array(pd->data, handle, SOLVABLE_KEYWORDS, content);
        break;
      case STATE_DISKUSAGE:
-      if (pd->ndirs)
-        commit_diskusage(pd, pd->handle);
+      if (pd->diskusageq.count)
+        repodata_add_diskusage(pd->data, handle, &pd->diskusageq);
        break;
      case STATE_ORDER:
-      if (pd->content[0])
-        repodata_set_str(pd->data, pd->handle, SOLVABLE_ORDER, pd->content);
+      if (*content)
+        repodata_set_str(pd->data, handle, SOLVABLE_ORDER, content);
+      break;
+    case STATE_CHANGELOG:
+      repodata_set_str(pd->data, pd->changelog_handle, SOLVABLE_CHANGELOG_TEXT, content);
+      repodata_add_flexarray(pd->data, handle, SOLVABLE_CHANGELOG, pd->changelog_handle);
+      pd->changelog_handle = 0;
+      break;
      default:
        break;
      }
-  pd->state = pd->sbtab[pd->state];
-  pd->docontent = 0;
-  // fprintf(stderr, "back from known %d %d %d\n", pd->state, pd->depth, pd->statedepth);
-}
-
-
-/*
- * characterData
- * XML callback
- *
- */
-
-static void XMLCALL
-characterData(void *userData, const XML_Char *s, int len)
-{
-  struct parsedata *pd = userData;
-  int l;
-  char *c;
-
-  if (!pd->docontent)
-    return;
-  l = pd->lcontent + len + 1;
-  if (l > pd->acontent)
-    {
-      pd->content = solv_realloc(pd->content, l + 256);
-      pd->acontent = l + 256;
-    }
-  c = pd->content + pd->lcontent;
-  pd->lcontent += len;
-  while (len-- > 0)
-    *c++ = *s++;
-  *c = 0;
  }
  
  
  /*-----------------------------------------------*/
-/* 'main' */
-
-#define BUFF_SIZE 8192
  
  /*
   * repo_add_rpmmd
@@ -1145,88 +1105,60 @@ repo_add_rpmmd(Repo *repo, FILE *fp, const char *language, int flags)
  {
    Pool *pool = repo->pool;
    struct parsedata pd;
-  char buf[BUFF_SIZE];
-  int i, l;
-  struct stateswitch *sw;
    Repodata *data;
    unsigned int now;
-  XML_Parser parser;
  
    now = solv_timems(0);
    data = repo_add_repodata(repo, flags);
  
    memset(&pd, 0, sizeof(pd));
-  for (i = 0, sw = stateswitches; sw->from != NUMSTATES; i++, sw++)
-    {
-      if (!pd.swtab[sw->from])
-        pd.swtab[sw->from] = sw;
-      pd.sbtab[sw->to] = sw->from;
-    }
    pd.pool = pool;
    pd.repo = repo;
    pd.data = data;
  
-  pd.content = solv_malloc(256);
-  pd.acontent = 256;
-  pd.lcontent = 0;
    pd.kind = 0;
    pd.language = language && *language && strcmp(language, "en") != 0 ? language : 0;
+  queue_init(&pd.diskusageq);
  
-  /* initialize the string pool where we will store
-     the package checksums we know about, to get an Id
-     we can use in a cache */
-  stringpool_init_empty(&pd.cspool);
+  init_cshash(&pd);
    if ((flags & REPO_EXTEND_SOLVABLES) != 0)
      {
        /* setup join data */
-      Dataiterator di;
-      dataiterator_init(&di, pool, repo, 0, SOLVABLE_CHECKSUM, 0, 0);
-      while (dataiterator_step(&di))
-       {
-         const char *str;
-         int index;
-
-         if (!solv_chksum_len(di.key->type))
-           continue;
-         str = repodata_chk2str(di.data, di.key->type, (const unsigned char *)di.kv.str);
-          index = stringpool_str2id(&pd.cspool, str, 1);
-         if (index >= pd.ncscache)
-           {
-             pd.cscache = solv_zextend(pd.cscache, pd.ncscache, index + 1 - pd.ncscache, sizeof(Id), 255);
-             pd.ncscache = index + 1;
-           }
-          pd.cscache[index] = di.solvid;
-       }
-      dataiterator_free(&di);
+      pd.cshash_filled = 1;
+      fill_cshash_from_repo(&pd);
      }
  
-  parser = XML_ParserCreate(NULL);
-  XML_SetUserData(parser, &pd);
-  pd.parser = &parser;
-  XML_SetElementHandler(parser, startElement, endElement);
-  XML_SetCharacterDataHandler(parser, characterData);
-  for (;;)
+  solv_xmlparser_init(&pd.xmlp, stateswitches, &pd, startElement, endElement);
+  if (solv_xmlparser_parse(&pd.xmlp, fp) != SOLV_XMLPARSER_OK)
+    pd.ret = pool_error(pool, -1, "repo_rpmmd: %s at line %u:%u", pd.xmlp.errstr, pd.xmlp.line, pd.xmlp.column);
+  solv_xmlparser_free(&pd.xmlp);
+
+  solv_free(pd.lastdirstr);
+  join_freemem(&pd.jd);
+  free_cshash(&pd);
+  repodata_free_dircache(data);
+  queue_free(&pd.diskusageq);
+
+  if ((flags & REPO_EXTEND_SOLVABLES) != 0)
      {
-      l = fread(buf, 1, sizeof(buf), fp);
-      if (XML_Parse(parser, buf, l, l == 0) == XML_STATUS_ERROR)
+      /* is this a filelist extension? */
+      if (repodata_has_keyname(data, SOLVABLE_FILELIST))
+       repodata_set_filelisttype(data, REPODATA_FILELIST_EXTENSION);
+    }
+  else
+    {
+      /* is this a primary with a filtered filelist? */
+      if (data->end > data->start)
         {
-         pool_debug(pool, SOLV_FATAL, "repo_rpmmd: %s at line %u:%u\n", XML_ErrorString(XML_GetErrorCode(parser)), (unsigned int)XML_GetCurrentLineNumber(parser), (unsigned int)XML_GetCurrentColumnNumber(parser));
-         exit(1);
+         repodata_set_filelisttype(data, REPODATA_FILELIST_FILTERED);
+         repodata_set_void(data, SOLVID_META, REPOSITORY_FILTEREDFILELIST);
         }
-      if (l == 0)
-       break;
      }
-  XML_ParserFree(parser);
-  solv_free(pd.content);
-  solv_free(pd.lastdirstr);
-  join_freemem(&pd.jd);
-  stringpool_free(&pd.cspool);
-  solv_free(pd.cscache);
  
    if (!(flags & REPO_NO_INTERNALIZE))
      repodata_internalize(data);
    POOL_DEBUG(SOLV_DEBUG_STATS, "repo_add_rpmmd took %d ms\n", solv_timems(now));
    POOL_DEBUG(SOLV_DEBUG_STATS, "repo size: %d solvables\n", repo->nsolvables);
-  POOL_DEBUG(SOLV_DEBUG_STATS, "repo memory used: %d K incore, %d K idarray\n", data->incoredatalen/1024, repo->idarraysize / (int)(1024/sizeof(Id)));
-  return 0;
+  POOL_DEBUG(SOLV_DEBUG_STATS, "repo memory used: %d K incore, %d K idarray\n", repodata_memused(data)/1024, repo->idarraysize / (int)(1024/sizeof(Id)));
+  return pd.ret;
  }