+/* createrepo_c - Library of routines for manipulation with repodata
+ * Copyright (C) 2013 Tomas Mlcoch
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+ * USA.
+ */
+
+#include <glib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+#include <expat.h>
+#include "xml_parser_internal.h"
+#include "xml_parser.h"
+#include "error.h"
+#include "package.h"
+#include "logging.h"
+#include "misc.h"
+
+#define ERR_DOMAIN CR_XML_PARSER_OTH_ERROR
+#define ERR_CODE_XML CRE_BADXMLOTHER
+
+typedef enum {
+ STATE_START,
+ STATE_OTHERDATA,
+ STATE_PACKAGE,
+ STATE_VERSION,
+ STATE_CHANGELOG,
+ NUMSTATES,
+} cr_OthState;
+
+/* NOTE: Same states in the first column must be together!!!
+ * Performance tip: More frequent elements shoud be listed
+ * first in its group (eg: element "package" (STATE_PACKAGE)
+ * has a "file" element listed first, because it is more frequent
+ * than a "version" element). */
+static cr_StatesSwitch stateswitches[] = {
+ { STATE_START, "otherdata", STATE_OTHERDATA, 0 },
+ { STATE_OTHERDATA, "package", STATE_PACKAGE, 0 },
+ { STATE_PACKAGE, "changelog", STATE_CHANGELOG, 1 },
+ { STATE_PACKAGE, "version", STATE_VERSION, 0 },
+ { NUMSTATES, NULL, NUMSTATES, 0 },
+};
+
+static void XMLCALL
+cr_start_handler(void *pdata, const char *element, const char **attr)
+{
+ GError *tmp_err = NULL;
+ cr_ParserData *pd = pdata;
+ cr_StatesSwitch *sw;
+
+ if (pd->err)
+ return; // There was an error -> do nothing
+
+ if (pd->depth != pd->statedepth) {
+ // We are inside of unknown element
+ pd->depth++;
+ return;
+ }
+ pd->depth++;
+
+ if (!pd->swtab[pd->state]) {
+ // Current element should not have any sub elements
+ return;
+ }
+
+ if (!pd->pkg && pd->state != STATE_OTHERDATA && pd->state != STATE_START)
+ return; // Do not parse current package tag and its content
+
+ // Find current state by its name
+ for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++)
+ if (!strcmp(element, sw->ename))
+ break;
+ if (sw->from != pd->state) {
+ // No state for current element (unknown element)
+ cr_xml_parser_warning(pd, CR_XML_WARNING_UNKNOWNTAG,
+ "Unknown element \"%s\"", element);
+ return;
+ }
+
+ // Update parser data
+ pd->state = sw->to;
+ pd->docontent = sw->docontent;
+ pd->statedepth = pd->depth;
+ pd->lcontent = 0;
+ pd->content[0] = '\0';
+
+ const char *val;
+
+ switch(pd->state) {
+ case STATE_START:
+ case STATE_OTHERDATA:
+ break;
+
+ case STATE_PACKAGE: {
+ const char *pkgId = cr_find_attr("pkgid", attr);
+ const char *name = cr_find_attr("name", attr);
+ const char *arch = cr_find_attr("arch", attr);
+
+ if (!pkgId) {
+ // Package without a pkgid attr is error
+ g_set_error(&pd->err, ERR_DOMAIN, ERR_CODE_XML,
+ "Package pkgid attributte is missing!");
+ break;
+ }
+
+ if (!name)
+ cr_xml_parser_warning(pd, CR_XML_WARNING_MISSINGATTR,
+ "Missing attribute \"name\" of a package element");
+
+ if (!arch)
+ cr_xml_parser_warning(pd, CR_XML_WARNING_MISSINGATTR,
+ "Missing attribute \"arch\" of a package element");
+
+ // Get package object to store current package or NULL if
+ // current XML package element shoud be skipped/ignored.
+ if (pd->newpkgcb(&pd->pkg,
+ pkgId,
+ name,
+ arch,
+ pd->newpkgcb_data,
+ &tmp_err))
+ {
+ if (tmp_err)
+ g_propagate_prefixed_error(&pd->err,
+ tmp_err,
+ "Parsing interrupted:");
+ else
+ g_set_error(&pd->err, ERR_DOMAIN, CRE_CBINTERRUPTED,
+ "Parsing interrupted");
+ break;
+ } else {
+ // If callback return CRE_OK but it simultaneously set
+ // the tmp_err then it's a programming error.
+ assert(tmp_err == NULL);
+ }
+
+ if (pd->pkg) {
+ if (!pd->pkg->pkgId)
+ pd->pkg->pkgId = g_string_chunk_insert(pd->pkg->chunk, pkgId);
+ if (!pd->pkg->name && name)
+ pd->pkg->name = g_string_chunk_insert(pd->pkg->chunk, name);
+ if (!pd->pkg->arch && arch)
+ pd->pkg->arch = g_string_chunk_insert(pd->pkg->chunk, arch);
+ }
+ break;
+ }
+
+ case STATE_VERSION:
+ assert(pd->pkg);
+
+ // Version string insert only if them don't already exists
+
+ if (!pd->pkg->epoch)
+ pd->pkg->epoch = cr_safe_string_chunk_insert(pd->pkg->chunk,
+ cr_find_attr("epoch", attr));
+ if (!pd->pkg->version)
+ pd->pkg->version = cr_safe_string_chunk_insert(pd->pkg->chunk,
+ cr_find_attr("ver", attr));
+ if (!pd->pkg->release)
+ pd->pkg->release = cr_safe_string_chunk_insert(pd->pkg->chunk,
+ cr_find_attr("rel", attr));
+ break;
+
+ case STATE_CHANGELOG:
+ assert(pd->pkg);
+ assert(!pd->changelog);
+
+ cr_ChangelogEntry *changelog = cr_changelog_entry_new();
+
+ val = cr_find_attr("author", attr);
+ if (!val)
+ cr_xml_parser_warning(pd, CR_XML_WARNING_MISSINGATTR,
+ "Missing attribute \"author\" of a package element");
+ else
+ changelog->author = g_string_chunk_insert(pd->pkg->chunk, val);
+
+ val = cr_find_attr("date", attr);
+ if (!val)
+ cr_xml_parser_warning(pd, CR_XML_WARNING_MISSINGATTR,
+ "Missing attribute \"date\" of a package element");
+ else
+ changelog->date = cr_xml_parser_strtoll(pd, val, 10);
+
+ pd->pkg->changelogs = g_slist_prepend(pd->pkg->changelogs, changelog);
+ pd->changelog = changelog;
+
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void XMLCALL
+cr_end_handler(void *pdata, const char *element)
+{
+ cr_ParserData *pd = pdata;
+ GError *tmp_err = NULL;
+ unsigned int state = pd->state;
+
+ CR_UNUSED(element);
+
+ if (pd->err)
+ return; // There was an error -> do nothing
+
+ if (pd->depth != pd->statedepth) {
+ // Back from the unknown state
+ pd->depth--;
+ return;
+ }
+
+ pd->depth--;
+ pd->statedepth--;
+ pd->state = pd->sbtab[pd->state];
+ pd->docontent = 0;
+
+ switch (state) {
+ case STATE_START:
+ case STATE_OTHERDATA:
+ case STATE_VERSION:
+ break;
+
+ case STATE_PACKAGE:
+ if (!pd->pkg)
+ return;
+
+ // Reverse list of changelogs
+ pd->pkg->changelogs = g_slist_reverse(pd->pkg->changelogs);
+
+ if (pd->pkgcb && pd->pkgcb(pd->pkg, pd->pkgcb_data, &tmp_err)) {
+ if (tmp_err)
+ g_propagate_prefixed_error(&pd->err,
+ tmp_err,
+ "Parsing interrupted: ");
+ else
+ g_set_error(&pd->err, ERR_DOMAIN, CRE_CBINTERRUPTED,
+ "Parsing interrupted");
+ } else {
+ // If callback return CRE_OK but it simultaneously set
+ // the tmp_err then it's a programming error.
+ assert(tmp_err == NULL);
+ }
+
+ pd->pkg = NULL;
+ break;
+
+ case STATE_CHANGELOG: {
+ assert(pd->pkg);
+ assert(pd->changelog);
+
+ if (!pd->content)
+ break;
+
+ pd->changelog->changelog = g_string_chunk_insert(pd->pkg->chunk,
+ pd->content);
+ pd->changelog = NULL;
+ break;
+ }
+
+ default:
+ break;
+ }
+}
+
+int
+cr_xml_parse_other(const char *path,
+ cr_XmlParserNewPkgCb newpkgcb,
+ void *newpkgcb_data,
+ cr_XmlParserPkgCb pkgcb,
+ void *pkgcb_data,
+ cr_XmlParserWarningCb warningcb,
+ void *warningcb_data,
+ GError **err)
+{
+ int ret = CRE_OK;
+ cr_ParserData *pd;
+ XML_Parser parser;
+ GError *tmp_err = NULL;
+
+ assert(path);
+ assert(newpkgcb || pkgcb);
+ assert(!err || *err == NULL);
+
+ if (!newpkgcb) // Use default newpkgcb
+ newpkgcb = cr_newpkgcb;
+
+ // Init
+
+ parser = XML_ParserCreate(NULL);
+ XML_SetElementHandler(parser, cr_start_handler, cr_end_handler);
+ XML_SetCharacterDataHandler(parser, cr_char_handler);
+
+ pd = cr_xml_parser_data(NUMSTATES);
+ pd->parser = &parser;
+ pd->state = STATE_START;
+ pd->newpkgcb_data = newpkgcb_data;
+ pd->newpkgcb = newpkgcb;
+ pd->pkgcb_data = pkgcb_data;
+ pd->pkgcb = pkgcb;
+ pd->warningcb = warningcb;
+ pd->warningcb_data = warningcb_data;
+ for (cr_StatesSwitch *sw = stateswitches; sw->from != NUMSTATES; sw++) {
+ if (!pd->swtab[sw->from])
+ pd->swtab[sw->from] = sw;
+ pd->sbtab[sw->to] = sw->from;
+ }
+
+ XML_SetUserData(parser, pd);
+
+ // Parsing
+
+ ret = cr_xml_parser_generic(parser, pd, path, &tmp_err);
+ if (tmp_err)
+ g_propagate_error(err, tmp_err);
+
+ // Clean up
+
+ if (ret != CRE_OK && newpkgcb == cr_newpkgcb) {
+ // Prevent memory leak when the parsing is interrupted by an error.
+ // If a new package object was created by the cr_newpkgcb then
+ // is obvious that there is no other reference to the package
+ // except of the parser reference in pd->pkg.
+ // If a caller supplied its own newpkgcb, then the freeing
+ // of the currently parsed package is the caller responsibility.
+ cr_package_free(pd->pkg);
+ }
+
+ cr_xml_parser_data_free(pd);
+ XML_ParserFree(parser);
+
+ return ret;
+}