xml_parser: Add other.xml parser.
authorTomas Mlcoch <tmlcoch@redhat.com>
Thu, 30 May 2013 12:13:40 +0000 (14:13 +0200)
committerTomas Mlcoch <tmlcoch@redhat.com>
Thu, 30 May 2013 12:13:40 +0000 (14:13 +0200)
src/xml_parser_other.c

index e69de29..f149e7e 100644 (file)
@@ -0,0 +1,347 @@
+/* createrepo_c - Library of routines for manipulation with repodata
+ * Copyright (C) 2013  Tomas Mlcoch
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
+ * USA.
+ */
+
+#include <glib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+#include <expat.h>
+#include "xml_parser_internal.h"
+#include "xml_parser.h"
+#include "error.h"
+#include "package.h"
+#include "logging.h"
+#include "misc.h"
+
+#define ERR_DOMAIN      CR_XML_PARSER_OTH_ERROR
+#define ERR_CODE_XML    CRE_BADXMLOTHER
+
+typedef enum {
+    STATE_START,
+    STATE_OTHERDATA,
+    STATE_PACKAGE,
+    STATE_VERSION,
+    STATE_CHANGELOG,
+    NUMSTATES,
+} cr_OthState;
+
+/* NOTE: Same states in the first column must be together!!!
+ * Performance tip: More frequent elements shoud be listed
+ * first in its group (eg: element "package" (STATE_PACKAGE)
+ * has a "file" element listed first, because it is more frequent
+ * than a "version" element). */
+static cr_StatesSwitch stateswitches[] = {
+    { STATE_START,      "otherdata",    STATE_OTHERDATA,    0 },
+    { STATE_OTHERDATA,  "package",      STATE_PACKAGE,      0 },
+    { STATE_PACKAGE,    "changelog",    STATE_CHANGELOG,    1 },
+    { STATE_PACKAGE,    "version",      STATE_VERSION,      0 },
+    { NUMSTATES,        NULL,           NUMSTATES,          0 },
+};
+
+static void XMLCALL
+cr_start_handler(void *pdata, const char *element, const char **attr)
+{
+    GError *tmp_err = NULL;
+    cr_ParserData *pd = pdata;
+    cr_StatesSwitch *sw;
+
+    if (pd->err)
+        return;  // There was an error -> do nothing
+
+    if (pd->depth != pd->statedepth) {
+        // We are inside of unknown element
+        pd->depth++;
+        return;
+    }
+    pd->depth++;
+
+    if (!pd->swtab[pd->state]) {
+        // Current element should not have any sub elements
+        return;
+    }
+
+    if (!pd->pkg && pd->state != STATE_OTHERDATA && pd->state != STATE_START)
+        return;  // Do not parse current package tag and its content
+
+    // Find current state by its name
+    for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++)
+        if (!strcmp(element, sw->ename))
+            break;
+    if (sw->from != pd->state) {
+        // No state for current element (unknown element)
+        cr_xml_parser_warning(pd, CR_XML_WARNING_UNKNOWNTAG,
+                              "Unknown element \"%s\"", element);
+        return;
+    }
+
+    // Update parser data
+    pd->state      = sw->to;
+    pd->docontent  = sw->docontent;
+    pd->statedepth = pd->depth;
+    pd->lcontent   = 0;
+    pd->content[0] = '\0';
+
+    const char *val;
+
+    switch(pd->state) {
+    case STATE_START:
+    case STATE_OTHERDATA:
+        break;
+
+    case STATE_PACKAGE: {
+        const char *pkgId = cr_find_attr("pkgid", attr);
+        const char *name  = cr_find_attr("name", attr);
+        const char *arch  = cr_find_attr("arch", attr);
+
+        if (!pkgId) {
+            // Package without a pkgid attr is error
+            g_set_error(&pd->err, ERR_DOMAIN, ERR_CODE_XML,
+                        "Package pkgid attributte is missing!");
+            break;
+        }
+
+        if (!name)
+            cr_xml_parser_warning(pd, CR_XML_WARNING_MISSINGATTR,
+                           "Missing attribute \"name\" of a package element");
+
+        if (!arch)
+            cr_xml_parser_warning(pd, CR_XML_WARNING_MISSINGATTR,
+                           "Missing attribute \"arch\" of a package element");
+
+        // Get package object to store current package or NULL if
+        // current XML package element shoud be skipped/ignored.
+        if (pd->newpkgcb(&pd->pkg,
+                         pkgId,
+                         name,
+                         arch,
+                         pd->newpkgcb_data,
+                         &tmp_err))
+        {
+            if (tmp_err)
+                g_propagate_prefixed_error(&pd->err,
+                                           tmp_err,
+                                           "Parsing interrupted:");
+            else
+                g_set_error(&pd->err, ERR_DOMAIN, CRE_CBINTERRUPTED,
+                            "Parsing interrupted");
+            break;
+        } else {
+            // If callback return CRE_OK but it simultaneously set
+            // the tmp_err then it's a programming error.
+            assert(tmp_err == NULL);
+        }
+
+        if (pd->pkg) {
+            if (!pd->pkg->pkgId)
+                pd->pkg->pkgId = g_string_chunk_insert(pd->pkg->chunk, pkgId);
+            if (!pd->pkg->name && name)
+                pd->pkg->name = g_string_chunk_insert(pd->pkg->chunk, name);
+            if (!pd->pkg->arch && arch)
+                pd->pkg->arch = g_string_chunk_insert(pd->pkg->chunk, arch);
+        }
+        break;
+    }
+
+    case STATE_VERSION:
+        assert(pd->pkg);
+
+        // Version string insert only if them don't already exists
+
+        if (!pd->pkg->epoch)
+            pd->pkg->epoch = cr_safe_string_chunk_insert(pd->pkg->chunk,
+                                            cr_find_attr("epoch", attr));
+        if (!pd->pkg->version)
+            pd->pkg->version = cr_safe_string_chunk_insert(pd->pkg->chunk,
+                                            cr_find_attr("ver", attr));
+        if (!pd->pkg->release)
+            pd->pkg->release = cr_safe_string_chunk_insert(pd->pkg->chunk,
+                                            cr_find_attr("rel", attr));
+        break;
+
+    case STATE_CHANGELOG:
+        assert(pd->pkg);
+        assert(!pd->changelog);
+
+        cr_ChangelogEntry *changelog = cr_changelog_entry_new();
+
+        val = cr_find_attr("author", attr);
+        if (!val)
+            cr_xml_parser_warning(pd, CR_XML_WARNING_MISSINGATTR,
+                        "Missing attribute \"author\" of a package element");
+        else
+            changelog->author = g_string_chunk_insert(pd->pkg->chunk, val);
+
+        val = cr_find_attr("date", attr);
+        if (!val)
+            cr_xml_parser_warning(pd, CR_XML_WARNING_MISSINGATTR,
+                        "Missing attribute \"date\" of a package element");
+        else
+            changelog->date = cr_xml_parser_strtoll(pd, val, 10);
+
+        pd->pkg->changelogs = g_slist_prepend(pd->pkg->changelogs, changelog);
+        pd->changelog = changelog;
+
+        break;
+
+    default:
+        break;
+    }
+}
+
+static void XMLCALL
+cr_end_handler(void *pdata, const char *element)
+{
+    cr_ParserData *pd = pdata;
+    GError *tmp_err = NULL;
+    unsigned int state = pd->state;
+
+    CR_UNUSED(element);
+
+    if (pd->err)
+        return; // There was an error -> do nothing
+
+    if (pd->depth != pd->statedepth) {
+        // Back from the unknown state
+        pd->depth--;
+        return;
+    }
+
+    pd->depth--;
+    pd->statedepth--;
+    pd->state = pd->sbtab[pd->state];
+    pd->docontent = 0;
+
+    switch (state) {
+    case STATE_START:
+    case STATE_OTHERDATA:
+    case STATE_VERSION:
+        break;
+
+    case STATE_PACKAGE:
+        if (!pd->pkg)
+            return;
+
+        // Reverse list of changelogs
+        pd->pkg->changelogs = g_slist_reverse(pd->pkg->changelogs);
+
+        if (pd->pkgcb && pd->pkgcb(pd->pkg, pd->pkgcb_data, &tmp_err)) {
+            if (tmp_err)
+                g_propagate_prefixed_error(&pd->err,
+                                           tmp_err,
+                                           "Parsing interrupted: ");
+            else
+                g_set_error(&pd->err, ERR_DOMAIN, CRE_CBINTERRUPTED,
+                            "Parsing interrupted");
+        } else {
+            // If callback return CRE_OK but it simultaneously set
+            // the tmp_err then it's a programming error.
+            assert(tmp_err == NULL);
+        }
+
+        pd->pkg = NULL;
+        break;
+
+    case STATE_CHANGELOG: {
+        assert(pd->pkg);
+        assert(pd->changelog);
+
+        if (!pd->content)
+            break;
+
+        pd->changelog->changelog = g_string_chunk_insert(pd->pkg->chunk,
+                                                         pd->content);
+        pd->changelog = NULL;
+        break;
+    }
+
+    default:
+        break;
+    }
+}
+
+int
+cr_xml_parse_other(const char *path,
+                   cr_XmlParserNewPkgCb newpkgcb,
+                   void *newpkgcb_data,
+                   cr_XmlParserPkgCb pkgcb,
+                   void *pkgcb_data,
+                   cr_XmlParserWarningCb warningcb,
+                   void *warningcb_data,
+                   GError **err)
+{
+    int ret = CRE_OK;
+    cr_ParserData *pd;
+    XML_Parser parser;
+    GError *tmp_err = NULL;
+
+    assert(path);
+    assert(newpkgcb || pkgcb);
+    assert(!err || *err == NULL);
+
+    if (!newpkgcb)  // Use default newpkgcb
+        newpkgcb = cr_newpkgcb;
+
+    // Init
+
+    parser = XML_ParserCreate(NULL);
+    XML_SetElementHandler(parser, cr_start_handler, cr_end_handler);
+    XML_SetCharacterDataHandler(parser, cr_char_handler);
+
+    pd = cr_xml_parser_data(NUMSTATES);
+    pd->parser = &parser;
+    pd->state = STATE_START;
+    pd->newpkgcb_data = newpkgcb_data;
+    pd->newpkgcb = newpkgcb;
+    pd->pkgcb_data = pkgcb_data;
+    pd->pkgcb = pkgcb;
+    pd->warningcb = warningcb;
+    pd->warningcb_data = warningcb_data;
+    for (cr_StatesSwitch *sw = stateswitches; sw->from != NUMSTATES; sw++) {
+        if (!pd->swtab[sw->from])
+            pd->swtab[sw->from] = sw;
+        pd->sbtab[sw->to] = sw->from;
+    }
+
+    XML_SetUserData(parser, pd);
+
+    // Parsing
+
+    ret = cr_xml_parser_generic(parser, pd, path, &tmp_err);
+    if (tmp_err)
+        g_propagate_error(err, tmp_err);
+
+    // Clean up
+
+    if (ret != CRE_OK && newpkgcb == cr_newpkgcb) {
+        // Prevent memory leak when the parsing is interrupted by an error.
+        // If a new package object was created by the cr_newpkgcb then
+        // is obvious that there is no other reference to the package
+        // except of the parser reference in pd->pkg.
+        // If a caller supplied its own newpkgcb, then the freeing
+        // of the currently parsed package is the caller responsibility.
+        cr_package_free(pd->pkg);
+    }
+
+    cr_xml_parser_data_free(pd);
+    XML_ParserFree(parser);
+
+    return ret;
+}