Alpha version of new metadata parser
authorTomas Mlcoch <tmlcoch@redhat.com>
Mon, 26 Mar 2012 08:33:46 +0000 (10:33 +0200)
committerTomas Mlcoch <tmlcoch@redhat.com>
Tue, 27 Mar 2012 14:42:03 +0000 (16:42 +0200)
CMakeLists.txt
doc/createrepo_c.8.gz
src/CMakeLists.txt
src/load_metadata_2.c [new file with mode: 0644]
src/load_metadata_2.h [new file with mode: 0644]

index b92b337..c33a19e 100644 (file)
@@ -10,7 +10,7 @@ message("LIBXML2_FLAGS: ${LIBXML2_FLAGS}")
 execute_process(COMMAND xml2-config --libs
     OUTPUT_VARIABLE LIBXML2_LDFLAGS)
 string(REPLACE "\n" "" LIBXML2_LDFLAGS ${LIBXML2_LDFLAGS})
-message("LIBXML2_LFLAGS: ${LIBXML2_LDFLAGS}")
+message("LIBXML2_LDFLAGS: ${LIBXML2_LDFLAGS}")
 
 
 set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wl,--as-needed ${LIBXML2_FLAGS}")
@@ -26,6 +26,7 @@ find_package(BZip2 REQUIRED)
 find_package(GLIB2 REQUIRED)
 find_package(GTHREAD2 REQUIRED)
 find_package(ZLIB REQUIRED)
+find_package(EXPAT REQUIRED)    # Doesn't work!
 #find_package(LibXml2 REQUIRED) # Doesn't work!
 
 include_directories(${GLIB2_INCLUDE_DIRS})
index e2a652d..795c9f7 100644 (file)
Binary files a/doc/createrepo_c.8.gz and b/doc/createrepo_c.8.gz differ
index ece5e4c..5ee04dc 100644 (file)
@@ -15,4 +15,12 @@ SET_TARGET_PROPERTIES(libcreaterepo_c PROPERTIES OUTPUT_NAME "libcreaterepo_c")
 ADD_EXECUTABLE(../createrepo_c createrepo_c.c cmd_parser.c)
 TARGET_LINK_LIBRARIES(../createrepo_c libcreaterepo_c ${GLIB2_LIBRARIES} ${GTHREAD2_LIBRARIES})
 
+SET (parser_test_SRCS load_metadata_2.c)
+ADD_LIBRARY(parser_testlib ${parser_test_SRCS})
+TARGET_LINK_LIBRARIES(parser_testlib libcreaterepo_c)
+TARGET_LINK_LIBRARIES(parser_testlib "-lexpat")
+
+ADD_EXECUTABLE(../parser_test parser_test.c)
+TARGET_LINK_LIBRARIES(../parser_test parser_testlib)
+
 INSTALL_PROGRAMS(/usr/bin/ FILES createrepo_c)
\ No newline at end of file
diff --git a/src/load_metadata_2.c b/src/load_metadata_2.c
new file mode 100644 (file)
index 0000000..c62d02b
--- /dev/null
@@ -0,0 +1,1075 @@
+#include <glib.h>
+#include <string.h>
+#include <assert.h>
+#include <expat.h>
+#include "package.h"
+#include "logging.h"
+#include "misc.h"
+#include "load_metadata.h"
+#include "load_metadata_2.h"
+#include "compression_wrapper.h"
+
+#undef MODULE
+#define MODULE "load_metadata_2: "
+
+#define CHUNK_SIZE              8192
+#define PKGS_REALLOC_STEP       2000
+
+
+typedef enum {
+    NONE_ELEM,
+    NAME_ELEM,
+    ARCH_ELEM,
+    CHECKSUM_ELEM,
+    SUMMARY_ELEM,
+    DESCRIPTION_ELEM,
+    PACKAGER_ELEM,
+    URL_ELEM,
+    RPM_LICENSE_ELEM,
+    RPM_VENDOR_ELEM,
+    RPM_GROUP_ELEM,
+    RPM_BUILDHOST_ELEM,
+    RPM_SOURCERPM_ELEM,
+    FILE_ELEM,
+    FILE_DIR_ELEM,
+    FILE_GHOST_ELEM,
+    CHANGELOG_ELEM
+} PrimaryTextElement;
+
+
+typedef enum {
+    ROOT,
+    METADATA,
+    PACKAGE,
+    FORMAT,
+    PROVIDES,
+    CONFLICTS,
+    OBSOLETES,
+    REQUIRES,
+    // filelists
+    FILELISTS,
+    // other
+    OTHERDATA
+} PrimaryParserContext;
+
+
+
+struct PrimaryParserData {
+    int total_pkgs;
+    int actual_pkg;
+    int pkgs_size;
+    Package **pkgs;
+
+    GString *current_string;
+    GHashTable *hashtable;
+    Package *pkg;
+    PrimaryParserContext context;
+    PrimaryTextElement last_elem;
+};
+
+
+
+void free_values_2(gpointer data)
+{
+    package_free((Package *) data);
+}
+
+
+
+GHashTable *new_metadata_hashtable()
+{
+    GHashTable *hashtable = g_hash_table_new_full(g_str_hash, g_str_equal, NULL, free_values_2);
+    return hashtable;
+}
+
+
+
+void destroy_metadata_hashtable(GHashTable *hashtable)
+{
+    if (hashtable) {
+        g_hash_table_destroy (hashtable);
+    }
+}
+
+
+// primary.xml parser handlers
+
+void pri_start_handler(void *data, const char *el, const char **attr) {
+    struct PrimaryParserData *ppd = (struct PrimaryParserData *) data;
+    Package *pkg = ppd->pkg;
+    int i;
+
+    // <file> and <rpm:entry> are most frequently used tags in primary.xml
+
+    // <file>
+    if (!strcmp(el, "file")) {
+        ppd->last_elem = FILE_ELEM;
+
+    // <rpm:entry>
+    } else if (!strcmp(el, "rpm:entry")) {
+        Dependency *dependency;
+        dependency = dependency_new();
+
+        for (i = 0; attr[i]; i += 2) {
+            if (!strcmp(attr[i], "name")) {
+                dependency->name = g_string_chunk_insert(pkg->chunk, attr[i+1]);
+            } else if (!strcmp(attr[i], "flags")) {
+                dependency->flags = g_string_chunk_insert(pkg->chunk, attr[i+1]);
+            } else if (!strcmp(attr[i], "epoch")) {
+                dependency->epoch = g_string_chunk_insert(pkg->chunk, attr[i+1]);
+            } else if (!strcmp(attr[i], "ver")) {
+                dependency->version = g_string_chunk_insert(pkg->chunk, attr[i+1]);
+            } else if (!strcmp(attr[i], "rel")) {
+                dependency->release = g_string_chunk_insert(pkg->chunk, attr[i+1]);
+            } else if (!strcmp(attr[i], "pre")) {
+                if (!strcmp(attr[i+1], "0") || !strcmp(attr[i+1], "FALSE")) {
+                    dependency->pre = FALSE;
+                } else {
+                    dependency->pre = TRUE;
+                }
+            } else {
+                g_warning(MODULE"%s: Unknown attribute \"%s\"", __func__, attr[i]);
+            }
+        }
+
+        switch (ppd->context) {
+            case PROVIDES:
+                pkg->provides = g_slist_prepend(pkg->provides, dependency);
+                break;
+            case CONFLICTS:
+                pkg->conflicts = g_slist_prepend(pkg->conflicts, dependency);
+                break;
+            case OBSOLETES:
+                pkg->obsoletes = g_slist_prepend(pkg->obsoletes, dependency);
+                break;
+            case REQUIRES:
+                pkg->requires = g_slist_prepend(pkg->requires, dependency);
+                break;
+            default:
+                g_free(dependency);
+                g_warning(MODULE"%s: Bad context (%d) for rpm:entry", __func__, ppd->context);
+                break;
+        }
+
+    // <package>
+    } else if (!strcmp(el, "package")) {
+        // Check sanity
+        if (ppd->context != METADATA) {
+            g_critical(MODULE"%s: Package element: Bad XML context!", __func__);
+            return;
+        }
+        if (ppd->pkg) {
+            g_critical(MODULE"%s: Package element: Pkg pointer is not NULL", __func__);
+            return;
+        }
+
+        ppd->context = PACKAGE;
+        ppd->pkg = package_new();
+
+    // <name>
+    } else if (!strcmp(el, "name")) {
+        ppd->last_elem = NAME_ELEM;
+
+    // <arch>
+    } else if (!strcmp(el, "arch")) {
+        ppd->last_elem = ARCH_ELEM;
+
+    // <version>
+    } else if (!strcmp(el, "version")) {
+        for (i = 0; attr[i]; i += 2) {
+            if (!strcmp(attr[i], "epoch")) {
+                pkg->epoch = g_string_chunk_insert(pkg->chunk, attr[i+1]);
+            } else if (!strcmp(attr[i], "ver")) {
+                pkg->version = g_string_chunk_insert(pkg->chunk, attr[i+1]);
+            } else if (!strcmp(attr[i], "rel")) {
+                pkg->release = g_string_chunk_insert(pkg->chunk, attr[i+1]);
+            } else {
+                g_warning(MODULE"%s: Unknown attribute \"%s\"", __func__, attr[i]);
+            }
+        }
+
+    // <checksum>
+    } else if (!strcmp(el, "checksum")) {
+        ppd->last_elem = CHECKSUM_ELEM;
+
+        for (i = 0; attr[i]; i += 2) {
+            if (!strcmp(attr[i], "type")) {
+                pkg->checksum_type = g_string_chunk_insert(pkg->chunk, attr[i+1]);
+            }
+        }
+
+    // <summary>
+    } else if (!strcmp(el, "summary")) {
+        ppd->last_elem = SUMMARY_ELEM;
+
+    // <description>
+    } else if (!strcmp(el, "description")) {
+        ppd->last_elem = DESCRIPTION_ELEM;
+
+    // <packager>
+    } else if (!strcmp(el, "packager")) {
+        ppd->last_elem = PACKAGER_ELEM;
+
+    // <url>
+    } else if (!strcmp(el, "url")) {
+        ppd->last_elem = URL_ELEM;
+
+    // <time>
+    } else if (!strcmp(el, "time")) {
+        for (i = 0; attr[i]; i += 2) {
+            if (!strcmp(attr[i], "file")) {
+                pkg->time_file = g_ascii_strtoll(attr[i+1], NULL, 10);
+            } else if (!strcmp(attr[i], "build")) {
+                pkg->time_build = g_ascii_strtoll(attr[i+1], NULL, 10);
+            } else {
+                g_warning(MODULE"%s: Unknown attribute \"%s\"", __func__, attr[i]);
+            }
+        }
+
+    // <size>
+    } else if (!strcmp(el, "size")) {
+        for (i = 0; attr[i]; i += 2) {
+            if (!strcmp(attr[i], "package")) {
+                pkg->size_package = g_ascii_strtoll(attr[i+1], NULL, 10);
+            } else if (!strcmp(attr[i], "installed")) {
+                pkg->size_installed = g_ascii_strtoll(attr[i+1], NULL, 10);
+            } else if (!strcmp(attr[i], "archive")) {
+                pkg->size_archive = g_ascii_strtoll(attr[i+1], NULL, 10);
+            } else {
+                g_warning(MODULE"%s: Unknown attribute \"%s\"", __func__, attr[i]);
+            }
+        }
+
+    // <location>
+    } else if (!strcmp(el, "location")) {
+        for (i = 0; attr[i]; i += 2) {
+            if (!strcmp(attr[i], "href")) {
+                pkg->location_href = g_string_chunk_insert(pkg->chunk, attr[i+1]);
+            } else if (!strcmp(attr[i], "xml:base")) {
+                pkg->location_base = g_string_chunk_insert(pkg->chunk, attr[i+1]);
+            } else {
+                g_warning(MODULE"%s: Unknown attribute \"%s\"", __func__, attr[i]);
+            }
+        }
+
+    // <format>
+    } else if (!strcmp(el, "format")) {
+        ppd->context = FORMAT;
+
+    // <rpm:license>
+    } else if (!strcmp(el, "rpm:license")) {
+        ppd->last_elem = RPM_LICENSE_ELEM;
+
+    // <rpm:vendor>
+    } else if (!strcmp(el, "rpm:vendor")) {
+        ppd->last_elem = RPM_VENDOR_ELEM;
+
+    // <rpm:group>
+    } else if (!strcmp(el, "rpm:group")) {
+        ppd->last_elem = RPM_GROUP_ELEM;
+
+    // <rpm:buildhost>
+    } else if (!strcmp(el, "rpm:buildhost")) {
+        ppd->last_elem = RPM_BUILDHOST_ELEM;
+
+    // <rpm:sourcerpm>
+    } else if (!strcmp(el, "rpm:sourcerpm")) {
+        ppd->last_elem = RPM_SOURCERPM_ELEM;
+
+    // <rpm:header-range>
+    } else if (!strcmp(el, "rpm:header-range")) {
+        for (i = 0; attr[i]; i += 2) {
+            if (!strcmp(attr[i], "start")) {
+                pkg->rpm_header_start = g_ascii_strtoll(attr[i+1], NULL, 10);
+            } else if (!strcmp(attr[i], "end")) {
+                pkg->rpm_header_end = g_ascii_strtoll(attr[i+1], NULL, 10);
+            } else {
+                g_warning(MODULE"%s: Unknown attribute \"%s\"", __func__, attr[i]);
+            }
+        }
+
+    // <rpm:provides>
+    } else if (!strcmp(el, "rpm:provides")) {
+        ppd->context = PROVIDES;
+
+    // <rpm:conflicts>
+    } else if (!strcmp(el, "rpm:conflicts")) {
+        ppd->context = CONFLICTS;
+
+    // <rpm:obsoletes>
+    } else if (!strcmp(el, "rpm:obsoletes")) {
+        ppd->context = OBSOLETES;
+
+    // <rpm:requires>
+    } else if (!strcmp(el, "rpm:requires")) {
+        ppd->context = REQUIRES;
+
+    // <metadata>
+    } else if (!strcmp(el, "metadata")) {
+        if (ppd->context != ROOT) {
+            g_critical(MODULE"%s: Bad context (%d) for %s element", __func__, ppd->context, el);
+            return;
+        }
+        ppd->context = METADATA;
+
+    // unknown element
+    } else {
+        g_warning(MODULE"%s: Unknown element: %s", __func__, el);
+    }
+}
+
+
+
+void pri_char_handler(void *data, const char *txt, int txtlen) {
+    struct PrimaryParserData *ppd = (struct PrimaryParserData *) data;
+    Package *pkg = ppd->pkg;
+
+    if (!pkg || ppd->last_elem == NONE_ELEM) {
+        return;
+    }
+
+    switch (ppd->last_elem) {
+        case FILE_ELEM:
+        case FILE_DIR_ELEM:
+        case FILE_GHOST_ELEM:
+            // Files are readed from filelists.xml not from primary.xml -> skip
+            break;
+        case NAME_ELEM:
+        case ARCH_ELEM:
+        case CHECKSUM_ELEM:
+        case SUMMARY_ELEM:
+        case DESCRIPTION_ELEM:
+        case PACKAGER_ELEM:
+        case URL_ELEM:
+        case RPM_LICENSE_ELEM:
+        case RPM_VENDOR_ELEM:
+        case RPM_GROUP_ELEM:
+        case RPM_BUILDHOST_ELEM:
+        case RPM_SOURCERPM_ELEM:
+            g_string_append_len(ppd->current_string, txt, (gsize) txtlen);
+            break;
+        default:
+            g_warning(MODULE"%s: Unknown last xml element", __func__);
+            break;
+    }
+}
+
+
+
+void pri_end_handler(void *data, const char *el) {
+    struct PrimaryParserData *ppd = (struct PrimaryParserData *) data;
+    Package *pkg = ppd->pkg;
+
+
+    // Store strings
+
+    gchar *txt = ppd->current_string->str;
+    gsize txtlen = ppd->current_string->len;
+
+    if (ppd->last_elem != NONE_ELEM) {
+        switch (ppd->last_elem) {
+            case NAME_ELEM:
+                pkg->name = g_string_chunk_insert_len(pkg->chunk, txt, txtlen);
+                break;
+            case ARCH_ELEM:
+                pkg->arch = g_string_chunk_insert_len(pkg->chunk, txt, txtlen);
+                break;
+            case CHECKSUM_ELEM:
+                pkg->pkgId = g_string_chunk_insert_len(pkg->chunk, txt, txtlen);
+                break;
+            case SUMMARY_ELEM:
+                pkg->summary = g_string_chunk_insert_len(pkg->chunk, txt, txtlen);
+                break;
+            case DESCRIPTION_ELEM:
+                pkg->description = g_string_chunk_insert_len(pkg->chunk, txt, txtlen);
+                break;
+            case PACKAGER_ELEM:
+                pkg->rpm_packager = g_string_chunk_insert_len(pkg->chunk, txt, txtlen);
+                break;
+            case URL_ELEM:
+                pkg->url = g_string_chunk_insert_len(pkg->chunk, txt, txtlen);
+                break;
+            case RPM_LICENSE_ELEM:
+                pkg->rpm_license = g_string_chunk_insert_len(pkg->chunk, txt, txtlen);
+                break;
+            case RPM_VENDOR_ELEM:
+                pkg->rpm_vendor = g_string_chunk_insert_len(pkg->chunk, txt, txtlen);
+                break;
+            case RPM_GROUP_ELEM:
+                pkg->rpm_group = g_string_chunk_insert_len(pkg->chunk, txt, txtlen);
+                break;
+            case RPM_BUILDHOST_ELEM:
+                pkg->rpm_buildhost = g_string_chunk_insert_len(pkg->chunk, txt, txtlen);
+                break;
+            case RPM_SOURCERPM_ELEM:
+                pkg->rpm_sourcerpm = g_string_chunk_insert_len(pkg->chunk, txt, txtlen);
+                break;
+            case FILE_ELEM:
+            case FILE_DIR_ELEM:
+            case FILE_GHOST_ELEM:
+            default:
+                break;
+        }
+
+        ppd->last_elem = NONE_ELEM;
+    }
+
+    g_string_erase(ppd->current_string, 0, -1);
+
+
+    // Set proper context
+
+    if (!strcmp(el, "package")) {
+        ppd->context = METADATA;
+
+        if (ppd->pkg){
+            // Store package into the hashtable
+            char *key = pkg->pkgId;
+            if (key && key[0] != '\0') {
+                g_hash_table_insert(ppd->hashtable, key, ppd->pkg);
+            } else {
+                g_warning(MODULE"%s: Empty hashtable key!", __func__);
+            }
+
+            // Update PrimaryParserData
+            ppd->pkg = NULL;
+/*            ppd->total_pkgs++;
+            if (ppd->total_pkgs > ppd->pkgs_size) {
+                ppd->pkgs_size += PKGS_REALLOC_STEP;
+                ppd->pkgs = realloc(ppd->pkgs, (sizeof(Package*) * PKGS_REALLOC_STEP));
+                if (!ppd->pkgs) {
+                    g_critical(MODULE"%s: Realloc fail! Ran out of memory.", __func__);
+                    return;
+                }
+            }
+            ppd->pkgs[ppd->actual_pkg] = ppd->pkg;
+            ppd->actual_pkg++;
+            */
+
+            // Reverse lists
+            pkg->requires  = g_slist_reverse(pkg->requires);
+            pkg->provides  = g_slist_reverse(pkg->provides);
+            pkg->conflicts = g_slist_reverse(pkg->conflicts);
+            pkg->obsoletes  = g_slist_reverse(pkg->obsoletes);
+        }
+    } else if (!strcmp(el, "rpm:provides")) {
+        ppd->context = FORMAT;
+    } else if (!strcmp(el, "rpm:conflicts")) {
+        ppd->context = FORMAT;
+    } else if (!strcmp(el, "rpm:obsoletes")) {
+        ppd->context = FORMAT;
+    } else if (!strcmp(el, "rpm:requires")) {
+        ppd->context = FORMAT;
+    } else if (!strcmp(el, "format")) {
+        ppd->context = PACKAGE;
+    } else if (!strcmp(el, "metadata")) {
+        ppd->context = ROOT;
+    }
+}
+
+
+// filelists.xml parser handlers
+
+void fil_start_handler(void *data, const char *el, const char **attr) {
+    int i;
+    struct PrimaryParserData *ppd = (struct PrimaryParserData *) data;
+    Package *pkg = ppd->pkg;
+
+
+    // <file>
+    if (!strcmp(el, "file")) {
+        assert(pkg);
+        ppd->last_elem = FILE_ELEM;
+
+        for (i = 0; attr[i]; i += 2) {
+            if (!strcmp(attr[i], "type")) {
+                if (!strcmp(attr[i+1], "dir")) {
+                    ppd->last_elem = FILE_DIR_ELEM;
+                } else if (!strcmp(attr[i+1], "ghost")) {
+                    ppd->last_elem = FILE_GHOST_ELEM;
+                }
+            } else {
+                g_warning(MODULE"%s: Unknown attribute \"%s\"", __func__, attr[i]);
+            }
+        }
+
+    // <package>
+    } else if (!strcmp(el, "package")) {
+        // Check sanity
+        if (ppd->context != FILELISTS) {
+            g_critical(MODULE"%s: Package element: Bad XML context!", __func__);
+            return;
+        }
+        if (ppd->pkg) {
+            g_critical(MODULE"%s: Package element: Pkg pointer is not NULL", __func__);
+            return;
+        }
+
+        ppd->context = PACKAGE;
+
+        gchar *key;
+        for (i = 0; attr[i]; i += 2) {
+            if (!strcmp(attr[i], "pkgid")) {
+                key = (gchar *) attr[i+1];
+            }
+        }
+
+        if (key) {
+            ppd->pkg = (Package *) g_hash_table_lookup(ppd->hashtable, (gconstpointer) key);
+            if (!ppd->pkg) {
+                g_critical(MODULE"%s: Unknown package (package ID: %s)", __func__, key);
+            }
+        } else {
+            g_critical(MODULE"%s: Package withou pkgid attribute found!", __func__);
+        }
+
+    // <version>
+    } else if (!strcmp(el, "version")) {
+        ;
+
+    // <filelists>
+    } else if (!strcmp(el, "filelists")) {
+        if (ppd->context != ROOT) {
+            g_critical(MODULE"%s: Bad context (%d) for %s element", __func__, ppd->context, el);
+            return;
+        }
+        ppd->context = FILELISTS;
+
+    // Unknown element
+    } else {
+        g_warning(MODULE"%s: Unknown element: %s", __func__, el);
+    }
+}
+
+
+
+void fil_char_handler(void *data, const char *txt, int txtlen) {
+    struct PrimaryParserData *ppd = (struct PrimaryParserData *) data;
+    Package *pkg = ppd->pkg;
+
+    if (!pkg || ppd->last_elem == NONE_ELEM) {
+        return;
+    }
+
+    switch (ppd->last_elem) {
+        case FILE_ELEM:
+        case FILE_DIR_ELEM:
+        case FILE_GHOST_ELEM:
+            g_string_append_len(ppd->current_string, txt, (gsize) txtlen);
+            break;
+        case NAME_ELEM:
+        case ARCH_ELEM:
+        case CHECKSUM_ELEM:
+        case SUMMARY_ELEM:
+        case DESCRIPTION_ELEM:
+        case PACKAGER_ELEM:
+        case URL_ELEM:
+        case RPM_LICENSE_ELEM:
+        case RPM_VENDOR_ELEM:
+        case RPM_GROUP_ELEM:
+        case RPM_BUILDHOST_ELEM:
+        case RPM_SOURCERPM_ELEM:
+            g_warning(MODULE"%s: unsupported last xml element", __func__);
+            break;
+        default:
+            g_warning(MODULE"%s: Unknown last xml element", __func__);
+            break;
+    }
+}
+
+
+
+void fil_end_handler(void *data, const char *el) {
+    struct PrimaryParserData *ppd = (struct PrimaryParserData *) data;
+    Package *pkg = ppd->pkg;
+
+
+    // Store strings
+
+    PackageFile *file;
+    gchar *filename;
+    gchar *txt = ppd->current_string->str;
+    gsize txtlen = ppd->current_string->len;
+
+    if (ppd->last_elem != NONE_ELEM) {
+        switch (ppd->last_elem) {
+            case NAME_ELEM:
+            case ARCH_ELEM:
+            case CHECKSUM_ELEM:
+            case SUMMARY_ELEM:
+            case DESCRIPTION_ELEM:
+            case PACKAGER_ELEM:
+            case URL_ELEM:
+            case RPM_LICENSE_ELEM:
+            case RPM_VENDOR_ELEM:
+            case RPM_GROUP_ELEM:
+            case RPM_BUILDHOST_ELEM:
+            case RPM_SOURCERPM_ELEM:
+                g_warning(MODULE"%s: Bad last xml element state", __func__);
+                break;
+            case FILE_ELEM:
+            case FILE_DIR_ELEM:
+            case FILE_GHOST_ELEM:
+                if (!txt || txtlen == 0) {
+                    g_warning(MODULE"%s: File with empty filename found!", __func__);
+                    break;
+                }
+
+                file = package_file_new();
+                filename = get_filename(txt);
+                file->name = g_string_chunk_insert(pkg->chunk, filename);
+                file->path = g_string_chunk_insert_len(pkg->chunk, txt, (txtlen - strlen(filename)));
+
+                if (ppd->last_elem == FILE_ELEM) {
+                    file->type = NULL;
+                } else if (ppd->last_elem == FILE_DIR_ELEM) {
+                    file->type = "dir";
+                } else if (ppd->last_elem == FILE_GHOST_ELEM) {
+                    file->type = "ghost";
+                }
+
+                pkg->files = g_slist_prepend(pkg->files, file);
+
+            default:
+                break;
+        }
+
+        ppd->last_elem = NONE_ELEM;
+    }
+
+    g_string_erase(ppd->current_string, 0, -1);
+
+
+    // Set proper context
+
+    if (!strcmp(el, "package")) {
+        ppd->context = FILELISTS;
+        ppd->pkg = NULL;
+
+        // Reverse list of files
+        pkg->files = g_slist_reverse(pkg->files);
+
+    } else if (!strcmp(el, "filelists")) {
+        ppd->context = ROOT;
+    }
+}
+
+
+// other.xml parser handlers
+
+void oth_start_handler(void *data, const char *el, const char **attr) {
+    int i;
+    struct PrimaryParserData *ppd = (struct PrimaryParserData *) data;
+    Package *pkg = ppd->pkg;
+
+
+    // <changelog>
+    if (!strcmp(el, "changelog")) {
+        assert(pkg);
+        ppd->last_elem = CHANGELOG_ELEM;
+
+        ChangelogEntry *changelog_entry;
+        changelog_entry = changelog_entry_new();
+
+        for (i = 0; attr[i]; i += 2) {
+            if (!strcmp(attr[i], "author")) {
+                changelog_entry->author = g_string_chunk_insert(pkg->chunk, attr[i+1]);
+            } else if (!strcmp(attr[i], "date")) {
+                changelog_entry->date = g_ascii_strtoll(attr[i+1], NULL, 10);;
+            } else {
+                g_warning(MODULE"%s: Unknown attribute \"%s\"", __func__, attr[i]);
+            }
+        }
+
+        pkg->changelogs = g_slist_prepend(pkg->changelogs, changelog_entry);
+
+    // <package>
+    } else if (!strcmp(el, "package")) {
+        // Check sanity
+        if (ppd->context != OTHERDATA) {
+            g_critical(MODULE"%s: Package element: Bad XML context (%d)!", __func__, ppd->context);
+            return;
+        }
+        if (ppd->pkg) {
+            g_critical(MODULE"%s: Package element: Pkg pointer is not NULL", __func__);
+            return;
+        }
+
+        ppd->context = PACKAGE;
+
+        gchar *key;
+        for (i = 0; attr[i]; i += 2) {
+            if (!strcmp(attr[i], "pkgid")) {
+                key = (gchar *) attr[i+1];
+            }
+        }
+
+        if (key) {
+            ppd->pkg = (Package *) g_hash_table_lookup(ppd->hashtable, (gconstpointer) key);
+            if (!ppd->pkg) {
+                g_critical(MODULE"%s: Unknown package (package ID: %s)", __func__, key);
+            }
+        } else {
+            g_critical(MODULE"%s: Package withou pkgid attribute found!", __func__);
+        }
+
+    // <version>
+    } else if (!strcmp(el, "version")) {
+        ;
+
+    // <otherdata>
+    } else if (!strcmp(el, "otherdata")) {
+        if (ppd->context != ROOT) {
+            g_critical(MODULE"%s: Bad context (%d) for %s element", __func__, ppd->context, el);
+            return;
+        }
+        ppd->context = OTHERDATA;
+
+    // Unknown element
+    } else {
+        g_warning(MODULE"%s: Unknown element: %s", __func__, el);
+    }
+}
+
+
+
+void oth_char_handler(void *data, const char *txt, int txtlen) {
+    struct PrimaryParserData *ppd = (struct PrimaryParserData *) data;
+    Package *pkg = ppd->pkg;
+
+    if (!pkg || ppd->last_elem == NONE_ELEM) {
+        return;
+    }
+
+    switch (ppd->last_elem) {
+        case CHANGELOG_ELEM:
+            g_string_append_len(ppd->current_string, txt, (gsize) txtlen);
+            break;
+        case FILE_ELEM:
+        case FILE_DIR_ELEM:
+        case FILE_GHOST_ELEM:
+        case NAME_ELEM:
+        case ARCH_ELEM:
+        case CHECKSUM_ELEM:
+        case SUMMARY_ELEM:
+        case DESCRIPTION_ELEM:
+        case PACKAGER_ELEM:
+        case URL_ELEM:
+        case RPM_LICENSE_ELEM:
+        case RPM_VENDOR_ELEM:
+        case RPM_GROUP_ELEM:
+        case RPM_BUILDHOST_ELEM:
+        case RPM_SOURCERPM_ELEM:
+            g_warning(MODULE"%s: unsupported last xml element", __func__);
+            break;
+        default:
+            g_warning(MODULE"%s: Unknown last xml element", __func__);
+            break;
+    }
+}
+
+
+
+void oth_end_handler(void *data, const char *el) {
+    struct PrimaryParserData *ppd = (struct PrimaryParserData *) data;
+    Package *pkg = ppd->pkg;
+
+
+    // Store strings
+
+    gchar *txt = ppd->current_string->str;
+    gsize txtlen = ppd->current_string->len;
+
+    if (ppd->last_elem != NONE_ELEM) {
+        switch (ppd->last_elem) {
+            case CHANGELOG_ELEM:
+                ((ChangelogEntry *) pkg->changelogs->data)->changelog = g_string_chunk_insert_len(pkg->chunk, txt, txtlen);
+                break;
+            case NAME_ELEM:
+            case ARCH_ELEM:
+            case CHECKSUM_ELEM:
+            case SUMMARY_ELEM:
+            case DESCRIPTION_ELEM:
+            case PACKAGER_ELEM:
+            case URL_ELEM:
+            case RPM_LICENSE_ELEM:
+            case RPM_VENDOR_ELEM:
+            case RPM_GROUP_ELEM:
+            case RPM_BUILDHOST_ELEM:
+            case RPM_SOURCERPM_ELEM:
+            case FILE_ELEM:
+            case FILE_DIR_ELEM:
+            case FILE_GHOST_ELEM:
+                g_warning(MODULE"%s: Bad last xml element state", __func__);
+                break;
+            default:
+                break;
+        }
+
+        ppd->last_elem = NONE_ELEM;
+    }
+
+    g_string_erase(ppd->current_string, 0, -1);
+
+
+    // Set proper context
+
+    if (!strcmp(el, "package")) {
+        ppd->context = OTHERDATA;
+        ppd->pkg = NULL;
+
+        // Reverse list of changelogs
+        pkg->changelogs = g_slist_reverse(pkg->changelogs);
+
+    } else if (!strcmp(el, "otherdata")) {
+        ppd->context = ROOT;
+    }
+}
+
+
+
+int load_xml_metadata_2(GHashTable *hashtable, const char *primary_xml_path, const char *filelists_xml_path, const char *other_xml_path)
+{
+    CompressionType compression_type;
+    CW_FILE *pri_xml_cwfile, *fil_xml_cwfile, *oth_xml_cwfile;
+    XML_Parser pri_p, fil_p, oth_p;
+    struct PrimaryParserData pri_pd;
+
+
+    // Detect compression type
+
+    compression_type = detect_compression(primary_xml_path);
+    if (compression_type == UNKNOWN_COMPRESSION) {
+        g_debug(MODULE"%s: Unknown compression", __func__);
+        return 0;
+    }
+
+
+    // Open files
+
+    if (!(pri_xml_cwfile = cw_open(primary_xml_path, CW_MODE_READ, compression_type))) {
+        g_debug(MODULE"%s: Cannot open file: %s", __func__, primary_xml_path);
+        return 0;
+    }
+
+    if (!(fil_xml_cwfile = cw_open(filelists_xml_path, CW_MODE_READ, compression_type))) {
+        g_debug(MODULE"%s: Cannot open file: %s", __func__, filelists_xml_path);
+        return 0;
+    }
+
+    if (!(oth_xml_cwfile = cw_open(other_xml_path, CW_MODE_READ, compression_type))) {
+        g_debug(MODULE"%s: Cannot open file: %s", __func__, other_xml_path);
+        return 0;
+    }
+
+
+    // Prepare parsers
+
+    // XXX: Maybe try rely on package order and do not use hashtable
+/*    pri_pd.total_pkgs = 0;
+    pri_pd.actual_pkg = 0;
+    pri_pd.pkgs_size = 0;
+    pri_pd.pkgs = 0; */
+    // XXX
+
+    pri_pd.current_string = g_string_sized_new(1024);
+    pri_pd.hashtable = hashtable;
+    pri_pd.pkg = NULL;
+    pri_pd.context = ROOT;
+    pri_pd.last_elem = NONE_ELEM;
+
+    pri_p = XML_ParserCreate(NULL);
+    XML_SetUserData(pri_p, (void *) &pri_pd);
+    XML_SetElementHandler(pri_p, pri_start_handler, pri_end_handler);
+    XML_SetCharacterDataHandler(pri_p, pri_char_handler);
+
+    fil_p = XML_ParserCreate(NULL);
+    XML_SetUserData(fil_p, (void *) &pri_pd);
+    XML_SetElementHandler(fil_p, fil_start_handler, fil_end_handler);
+    XML_SetCharacterDataHandler(fil_p, fil_char_handler);
+
+    oth_p = XML_ParserCreate(NULL);
+    XML_SetUserData(oth_p, (void *) &pri_pd);
+    XML_SetElementHandler(oth_p, oth_start_handler, oth_end_handler);
+    XML_SetCharacterDataHandler(oth_p, oth_char_handler);
+
+
+    // Parse
+
+    // This loop should iterate over package chunks in primary.xml
+    for (;;) {
+        char *pri_buff;
+        int pri_len;
+
+        pri_buff = XML_GetBuffer(pri_p, CHUNK_SIZE);
+        if (!pri_buff) {
+            g_critical(MODULE"%s: Ran out of memory for parse", __func__);
+            return 0;
+        }
+
+        pri_len = cw_read(pri_xml_cwfile, (void *) pri_buff, CHUNK_SIZE);
+        if (pri_len < 0) {
+            g_critical(MODULE"%s: Read error", __func__);
+            return 0;
+        }
+
+        if (! XML_ParseBuffer(pri_p, pri_len, pri_len == 0)) {
+            g_critical(MODULE"%s: Parse error at line: %d (%s)", __func__,
+                                (int) XML_GetCurrentLineNumber(pri_p),
+                                (char *) XML_ErrorString(XML_GetErrorCode(pri_p)));
+            return 0;
+        }
+
+        if (pri_len == 0) {
+            break;
+        }
+    }
+
+    assert(!pri_pd.pkg);
+    pri_pd.context = ROOT;
+    pri_pd.last_elem = NONE_ELEM;
+
+    // This loop should iterate over package chunks in filelists.xml
+    for (;;) {
+        char *fil_buff;
+        int fil_len;
+
+        fil_buff = XML_GetBuffer(fil_p, CHUNK_SIZE);
+        if (!fil_buff) {
+            g_critical(MODULE"%s: Ran out of memory for parse", __func__);
+            return 0;
+        }
+
+        fil_len = cw_read(fil_xml_cwfile, (void *) fil_buff, CHUNK_SIZE);
+        if (fil_len < 0) {
+            g_critical(MODULE"%s: Read error", __func__);
+            return 0;
+        }
+
+        if (! XML_ParseBuffer(fil_p, fil_len, fil_len == 0)) {
+            g_critical(MODULE"%s: Parse error at line: %d (%s)", __func__,
+                                (int) XML_GetCurrentLineNumber(fil_p),
+                                (char *) XML_ErrorString(XML_GetErrorCode(fil_p)));
+            return 0;
+        }
+
+        if (fil_len == 0) {
+            break;
+        }
+    }
+
+    assert(!pri_pd.pkg);
+    pri_pd.context = ROOT;
+    pri_pd.last_elem = NONE_ELEM;
+
+    // This loop should iterate over package chunks in other.xml
+    for (;;) {
+        char *oth_buff;
+        int oth_len;
+
+        oth_buff = XML_GetBuffer(oth_p, CHUNK_SIZE);
+        if (!oth_buff) {
+            g_critical(MODULE"%s: Ran out of memory for parse", __func__);
+            return 0;
+        }
+
+        oth_len = cw_read(oth_xml_cwfile, (void *) oth_buff, CHUNK_SIZE);
+        if (oth_len < 0) {
+            g_critical(MODULE"%s: Read error", __func__);
+            return 0;
+        }
+
+        if (! XML_ParseBuffer(oth_p, oth_len, oth_len == 0)) {
+            g_critical(MODULE"%s: Parse error at line: %d (%s)", __func__,
+                                (int) XML_GetCurrentLineNumber(oth_p),
+                                (char *) XML_ErrorString(XML_GetErrorCode(oth_p)));
+            return 0;
+        }
+
+        if (oth_len == 0) {
+            break;
+        }
+    }
+
+
+    // Cleanup
+
+    XML_ParserFree(pri_p);
+    XML_ParserFree(fil_p);
+    XML_ParserFree(oth_p);
+    cw_close(pri_xml_cwfile);
+    cw_close(fil_xml_cwfile);
+    cw_close(oth_xml_cwfile);
+
+    // TODO: free userdata
+    g_string_free(pri_pd.current_string, TRUE);
+
+    return 1;
+}
+
+
+
+int locate_and_load_xml_metadata_2(GHashTable *hashtable, const char *repopath)
+{
+    if (!hashtable || !repopath || !g_file_test(repopath, G_FILE_TEST_EXISTS|G_FILE_TEST_IS_DIR)) {
+        return 0;
+    }
+
+
+    // Get paths of old metadata files from repomd
+
+    struct MetadataLocation *ml;
+    ml = locate_metadata_via_repomd(repopath);
+    if (!ml) {
+        return 0;
+    }
+
+
+    if (!ml->pri_xml_href || !ml->fil_xml_href || !ml->oth_xml_href) {
+        // Some file(s) is/are missing
+        free_metadata_location(ml);
+        return 0;
+    }
+
+
+    // Load metadata
+
+    int result;
+    GHashTable *intern_hashtable; // key is checksum (pkgId)
+
+    intern_hashtable = new_metadata_hashtable();
+    result = load_xml_metadata_2(intern_hashtable, ml->pri_xml_href, ml->fil_xml_href, ml->oth_xml_href);
+    g_debug(MODULE"%s: Parsed items: %d\n", __func__, g_hash_table_size(intern_hashtable));
+
+    // Fill user hashtable and use user selected key
+
+    GHashTableIter iter;
+    gpointer key, value;
+
+    g_hash_table_iter_init (&iter, intern_hashtable);
+    while (g_hash_table_iter_next (&iter, &key, &value)) {
+        Package *pkg = (Package *) value;
+        gpointer new_key;
+
+        // TODO: Switch and param for key selection
+//        new_key = pkg->name;
+//        new_key = pkg->pkgId;
+        new_key = pkg->location_href;
+
+        if (g_hash_table_lookup(hashtable, new_key)) {
+            g_debug(MODULE"%s: Key \"%s\" already exists in hashtable\n", __func__, (char *) new_key);
+            g_hash_table_iter_remove(&iter);
+        } else {
+            g_hash_table_insert(hashtable, new_key, value);
+            g_hash_table_iter_steal(&iter);
+        }
+    }
+
+
+    // Cleanup
+
+    destroy_metadata_hashtable(intern_hashtable);
+    free_metadata_location(ml);
+
+    return result;
+}
diff --git a/src/load_metadata_2.h b/src/load_metadata_2.h
new file mode 100644 (file)
index 0000000..2968cc4
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef __C_CREATEREPOLIB_LOAD_METADATA_2_H__
+#define __C_CREATEREPOLIB_LOAD_METADATA_2_H__
+
+#include <glib.h>
+#include "constants.h"
+
+GHashTable *new_metadata_hashtable();
+void destroy_metadata_hashtable(GHashTable *hashtable);
+
+int locate_and_load_xml_metadata_2(GHashTable *hashtable, const char *repopath);
+
+#endif /* __C_CREATEREPOLIB_LOAD_METADATA_2_H__ */