From 074e517f4b56b8dd78abd6534885b4858fa7ab08 Mon Sep 17 00:00:00 2001 From: Tomas Mlcoch Date: Fri, 31 May 2013 14:11:06 +0200 Subject: [PATCH] load_metadata: Switched from internal (ugly) xml parser to new xml_parser module. --- src/load_metadata.c | 1264 ++++----------------------------------------------- 1 file changed, 101 insertions(+), 1163 deletions(-) diff --git a/src/load_metadata.c b/src/load_metadata.c index 7e2013f..59fd2ce 100644 --- a/src/load_metadata.c +++ b/src/load_metadata.c @@ -23,91 +23,20 @@ #include #include #include -#include #include "error.h" #include "package.h" #include "logging.h" #include "misc.h" #include "load_metadata.h" -#include "compression_wrapper.h" #include "locate_metadata.h" - -#define STRINGCHUNK_SIZE 16384 -#define CHUNK_SIZE 8192 -#define PKGS_REALLOC_STEP 2000 - +#include "xml_parser.h" /** TODO: - * This module has one known issue about a memory management. - * The issue acts in cases when you are using one single string chunk - * for all parsed packages. - * (use_single_chunk != 0 during call of cr_metadata_new()) - * - * Description of issue: - * During parsing of primary.xml, all string from obtained during parsing are - * stored into the chunk. When we have all the information from primary.xml and - * we found out that we don't want the package (according the pkglist passed - * via cr_metadata_new) and we drop the package (package is not inserted - * into the hashtable of metadatas), all strings from primary.xml are yet - * stored in the chunk and they remains there! - * - * This issue is not so important, but it shoud be fixed in future. - * Tomas - **/ - -typedef enum { - NONE_ELEM, - NAME_ELEM, - ARCH_ELEM, - CHECKSUM_ELEM, - SUMMARY_ELEM, - DESCRIPTION_ELEM, - PACKAGER_ELEM, - URL_ELEM, - RPM_LICENSE_ELEM, - RPM_VENDOR_ELEM, - RPM_GROUP_ELEM, - RPM_BUILDHOST_ELEM, - RPM_SOURCERPM_ELEM, - FILE_ELEM, - FILE_DIR_ELEM, - FILE_GHOST_ELEM, - CHANGELOG_ELEM -} TextElement; - - - -typedef enum { - ROOT, - METADATA, - PACKAGE, - FORMAT, - PROVIDES, - CONFLICTS, - OBSOLETES, - REQUIRES, - // filelists - FILELISTS, - // other - OTHERDATA -} ParserContext; - - - -struct ParserData { - GString *current_string; - GHashTable *hashtable; - cr_Package *pkg; - ParserContext context; - TextElement last_elem; - - GStringChunk *chunk; - GHashTable *pkglist; - - gboolean error; -}; - + * - Add support for single chunk (?) + * - Support for warning cbs + */ +#define STRINGCHUNK_SIZE 16384 void cr_free_values(gpointer data) @@ -115,8 +44,6 @@ cr_free_values(gpointer data) cr_package_free((cr_Package *) data); } - - GHashTable * cr_new_metadata_hashtable() { @@ -125,8 +52,6 @@ cr_new_metadata_hashtable() return hashtable; } - - void cr_destroy_metadata_hashtable(GHashTable *hashtable) { @@ -134,8 +59,6 @@ cr_destroy_metadata_hashtable(GHashTable *hashtable) g_hash_table_destroy (hashtable); } - - cr_Metadata cr_metadata_new(cr_HashTableKey key, int use_single_chunk, GSList *pkglist) { @@ -170,8 +93,6 @@ cr_metadata_new(cr_HashTableKey key, int use_single_chunk, GSList *pkglist) return md; } - - void cr_metadata_free(cr_Metadata md) { @@ -186,869 +107,68 @@ cr_metadata_free(cr_Metadata md) g_free(md); } +// Callbacks for XML parsers +typedef struct { + GHashTable *ht; + GStringChunk *chunk; + GHashTable *pkglist_ht; +} cr_CbData; -static inline gchar * -cr_chunk_insert_len_or_null (GStringChunk *chunk, const gchar *str, gssize len) -{ - if (!str || len <= 0) - return NULL; - - return g_string_chunk_insert_len(chunk, str, len); -} - - - -// primary.xml parser handlers - -void -cr_pri_start_handler(void *data, const char *el, const char **attr) -{ - struct ParserData *ppd = (struct ParserData *) data; - cr_Package *pkg = ppd->pkg; - int i; - - // and are most frequently used tags in primary.xml - - // - if (!strcmp(el, "file")) { - ppd->last_elem = FILE_ELEM; - - // - } else if (!strcmp(el, "rpm:entry")) { - if (!pkg) { - ppd->error = TRUE; - g_critical("%s: Have but pkg object doesn't exist!", - __func__); - return; - } - - cr_Dependency *dependency; - dependency = cr_dependency_new(); - - for (i = 0; attr[i]; i += 2) { - if (!strcmp(attr[i], "name")) { - dependency->name = g_string_chunk_insert(pkg->chunk, attr[i+1]); - } else if (!strcmp(attr[i], "flags")) { - dependency->flags = g_string_chunk_insert(pkg->chunk, attr[i+1]); - } else if (!strcmp(attr[i], "epoch")) { - dependency->epoch = g_string_chunk_insert(pkg->chunk, attr[i+1]); - } else if (!strcmp(attr[i], "ver")) { - dependency->version = g_string_chunk_insert(pkg->chunk, attr[i+1]); - } else if (!strcmp(attr[i], "rel")) { - dependency->release = g_string_chunk_insert(pkg->chunk, attr[i+1]); - } else if (!strcmp(attr[i], "pre")) { - if (!strcmp(attr[i+1], "0") || !strcmp(attr[i+1], "FALSE")) { - dependency->pre = FALSE; - } else { - dependency->pre = TRUE; - } - } else { - g_warning("%s: Unknown attribute \"%s\"", __func__, attr[i]); - } - } - - switch (ppd->context) { - case PROVIDES: - pkg->provides = g_slist_prepend(pkg->provides, dependency); - break; - case CONFLICTS: - pkg->conflicts = g_slist_prepend(pkg->conflicts, dependency); - break; - case OBSOLETES: - pkg->obsoletes = g_slist_prepend(pkg->obsoletes, dependency); - break; - case REQUIRES: - pkg->requires = g_slist_prepend(pkg->requires, dependency); - break; - default: - g_free(dependency); - g_warning("%s: Bad context (%d) for rpm:entry", __func__, - ppd->context); - break; - } - - // - } else if (!strcmp(el, "package")) { - // Check sanity - if (ppd->context != METADATA) { - ppd->error = TRUE; - g_critical("%s: Package element: Bad XML context!", __func__); - return; - } - if (ppd->pkg) { - ppd->error = TRUE; - g_critical("%s: Package element: Pkg pointer is not NULL", - __func__); - return; - } - - ppd->context = PACKAGE; - if (ppd->chunk) { - ppd->pkg = cr_package_new_without_chunk(); - ppd->pkg->chunk = ppd->chunk; - } else - ppd->pkg = cr_package_new(); - - // - } else if (!strcmp(el, "name")) { - ppd->last_elem = NAME_ELEM; - - // - } else if (!strcmp(el, "arch")) { - ppd->last_elem = ARCH_ELEM; - - // - } else if (!strcmp(el, "version")) { - if (!pkg) { - ppd->error = TRUE; - g_critical("%s: Have but pkg object doesn't exist!", - __func__); - return; - } - - for (i = 0; attr[i]; i += 2) { - if (!strcmp(attr[i], "epoch")) { - pkg->epoch = g_string_chunk_insert(pkg->chunk, attr[i+1]); - } else if (!strcmp(attr[i], "ver")) { - pkg->version = g_string_chunk_insert(pkg->chunk, attr[i+1]); - } else if (!strcmp(attr[i], "rel")) { - pkg->release = g_string_chunk_insert(pkg->chunk, attr[i+1]); - } else { - g_warning("%s: Unknown attribute \"%s\"", __func__, attr[i]); - } - } - - // - } else if (!strcmp(el, "checksum")) { - ppd->last_elem = CHECKSUM_ELEM; - - if (!pkg) { - ppd->error = TRUE; - g_critical("%s: Have but pkg object doesn't exist!", - __func__); - return; - } - - for (i = 0; attr[i]; i += 2) { - if (!strcmp(attr[i], "type")) { - pkg->checksum_type = g_string_chunk_insert(pkg->chunk, attr[i+1]); - } - } - - // - } else if (!strcmp(el, "summary")) { - ppd->last_elem = SUMMARY_ELEM; - - // - } else if (!strcmp(el, "description")) { - ppd->last_elem = DESCRIPTION_ELEM; - - // - } else if (!strcmp(el, "packager")) { - ppd->last_elem = PACKAGER_ELEM; - - // - } else if (!strcmp(el, "url")) { - ppd->last_elem = URL_ELEM; - - //