xml_dump_filelists.c
xml_dump_other.c
xml_dump_primary.c
- xml_file.c)
+ xml_file.c
+ xml_parser.c
+ xml_parser_filelists.c
+ xml_parser_other.c
+ xml_parser_primary.c)
SET(headers
compression_wrapper.h
sqlite.h
version.h
xml_dump.h
- xml_file.h)
+ xml_file.h
+ xml_parser.h)
ADD_LIBRARY(libcreaterepo_c SHARED ${createrepo_c_SRCS})
TARGET_LINK_LIBRARIES(libcreaterepo_c ${BZIP2_LIBRARIES})
--- /dev/null
+#include <assert.h>
+#include "error.h"
+#include "xml_parser.h"
+#include "xml_parser_internal.h"
+#include "misc.h"
+
+cr_ParserData *
+cr_xml_parser_data()
+{
+ cr_ParserData *pd = g_new0(cr_ParserData, 1);
+ pd->ret = CRE_OK;
+ pd->content = g_malloc(CONTENT_REALLOC_STEP);
+ pd->acontent = CONTENT_REALLOC_STEP;
+ pd->msgs = g_string_new(0);
+
+ return pd;
+}
+
+void
+cr_xml_parser_data_free(cr_ParserData *pd)
+{
+ g_free(pd->content);
+ g_string_free(pd->msgs, TRUE);
+ g_free(pd);
+}
+
+void XMLCALL
+cr_char_handler(void *pdata, const XML_Char *s, int len)
+{
+ int l;
+ char *c;
+ cr_ParserData *pd = pdata;
+
+ if (pd->ret != CRE_OK)
+ return; /* There was an error -> do nothing */
+
+ if (!pd->docontent)
+ return; /* Do not store the content */
+
+ /* XXX: TODO: Maybe rewrite this reallocation step */
+ l = pd->lcontent + len + 1;
+ if (l > pd->acontent) {
+ pd->acontent = l + CONTENT_REALLOC_STEP;
+ pd->content = realloc(pd->content, pd->acontent);
+ }
+
+ c = pd->content + pd->lcontent;
+ pd->lcontent += len;
+ while (len-- > 0)
+ *c++ = *s++;
+ *c = '\0';
+}
+
+int
+cr_newpkgcb(cr_Package **pkg,
+ const char *pkgId,
+ const char *name,
+ const char *arch,
+ void *cbdata,
+ GError **err)
+{
+ CR_UNUSED(pkgId);
+ CR_UNUSED(name);
+ CR_UNUSED(arch);
+ CR_UNUSED(cbdata);
+
+ assert(pkg && *pkg == NULL);
+ assert(!err || *err == NULL);
+
+ *pkg = cr_package_new();
+
+ return CRE_OK;
+}
--- /dev/null
+/* createrepo_c - Library of routines for manipulation with repodata
+ * Copyright (C) 2013 Tomas Mlcoch
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+ * USA.
+ */
+
+#ifndef __C_CREATEREPOLIB_XML_PARSER_H__
+#define __C_CREATEREPOLIB_XML_PARSER_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "package.h"
+
+/** \defgroup xml_parser XML parser API.
+ * \addtogroup xml_parser
+ * @{
+ */
+
+/** Callback for XML parser wich is called when a package element is parsed.
+ * @param pkg Currently parsed package.
+ * @param cbdata User data.
+ * @err GError **
+ * @return 0 - OK, 1 - ERROR (stops the parsing)
+ */
+typedef int (*cr_XmlParserPkgCb)(cr_Package *pkg,
+ void *cbdata,
+ GError **err);
+
+/** Callback for XML parser wich is called when a new package object parsing
+ * is started. This function has to set *pkg to package object which will
+ * be populated by parser. The object could be empty, or already partially
+ * filled (by other XML parsers) package object.
+ * If the pointer is set to NULL, current package will be skiped.
+ * Note: For the primary.xml file pkgId, name and arch are NULL!
+ * @param pkg Package that will be populated.
+ * @param pkgId pkgId (hash) of the new package.
+ * @param name Name of the new package.
+ * @param arch Arch of the new package.
+ * @param cbdata User data.
+ * @param err GError **
+ * @return 0 - OK, 1 - ERR (stops the parsing)
+ */
+typedef int (*cr_XmlParserNewPkgCb)(cr_Package **pkg,
+ const char *pkgId,
+ const char *name,
+ const char *arch,
+ void *cbdata,
+ GError **err);
+
+int cr_newpkgcb(cr_Package **pkg,
+ const char *pkgId,
+ const char *name,
+ const char *arch,
+ void *cbdata,
+ GError **err);
+
+int cr_xml_parse_filelists(const char *path,
+ cr_XmlParserNewPkgCb newpkgcb,
+ void *newpkgcb_data,
+ cr_XmlParserPkgCb pkgcb,
+ void *pkgcb_data,
+ GError **err);
+
+
+/** @} */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __C_CREATEREPOLIB_XML_PARSER_H__ */
--- /dev/null
+/* createrepo_c - Library of routines for manipulation with repodata
+ * Copyright (C) 2013 Tomas Mlcoch
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+ * USA.
+ */
+
+#include <glib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+#include <expat.h>
+#include "xml_parser_internal.h"
+#include "xml_parser.h"
+#include "error.h"
+#include "package.h"
+#include "logging.h"
+#include "misc.h"
+
+typedef enum {
+ STATE_START,
+ STATE_FILELISTS,
+ STATE_PACKAGE,
+ STATE_VERSION,
+ STATE_FILE,
+ NUMSTATES,
+} cr_FilState;
+
+/* NOTE: Same states in the first column must be together!!!
+ * Performance tip: More frequent elements shoud be listed
+ * first in its group (eg: element "package" (STATE_PACKAGE)
+ * has a "file" element listed first, because it is more frequent
+ * than a "version" element). */
+static cr_StatesSwitch stateswitches[] = {
+ { STATE_START, "filelists", STATE_FILELISTS, 0 },
+ { STATE_FILELISTS, "package", STATE_PACKAGE, 0 },
+ { STATE_PACKAGE, "file", STATE_FILE, 1 },
+ { STATE_PACKAGE, "version", STATE_VERSION, 0 },
+ { NUMSTATES, NULL, NUMSTATES, 0 },
+};
+
+static void XMLCALL
+cr_start_handler(void *pdata, const char *element, const char **attr)
+{
+ GError *tmp_err = NULL;
+ cr_ParserData *pd = pdata;
+ cr_StatesSwitch *sw;
+
+ if (pd->ret != CRE_OK)
+ return; // There was an error -> do nothing
+
+ if (pd->depth != pd->statedepth) {
+ // There probably was an unknown element
+ pd->depth++;
+ return;
+ }
+ pd->depth++;
+
+ if (!pd->swtab[pd->state])
+ return; // Current element should not have any sub elements
+
+ /* TODO TEST THIS */
+ if (!pd->pkg && pd->state != STATE_FILELISTS && pd->state != STATE_START)
+ return; // Do not parse current package tag and its content
+
+ // Find current state by its name
+ for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++)
+ if (!strcmp(element, sw->ename))
+ break;
+ if (sw->from != pd->state)
+ return; // There is no state for the name -> skip
+
+ // Update parser data
+ pd->state = sw->to;
+ pd->docontent = sw->docontent;
+ pd->statedepth = pd->depth;
+ pd->lcontent = 0;
+ pd->content[0] = '\0';
+
+ switch(pd->state) {
+ case STATE_START:
+ case STATE_FILELISTS:
+ break;
+
+ case STATE_PACKAGE: {
+ /* TODO: Parse all attrs in single loop instead of use cr_find_attr */
+ const char *pkgId = cr_find_attr("pkgid", attr);
+ const char *name = cr_find_attr("name", attr);
+ const char *arch = cr_find_attr("arch", attr);
+
+ if (!pkgId) {
+ pd->ret = CRE_BADXMLFILELISTS;
+ g_set_error(pd->err, CR_XML_PARSER_FIL_ERROR, CRE_BADXMLFILELISTS,
+ "Package pkgid attributte is missing!");
+ break;
+ }
+
+ if (pd->newpkgcb(&pd->pkg,
+ pkgId,
+ name,
+ arch,
+ pd->newpkgcb_data,
+ &tmp_err))
+ {
+ pd->ret = CRE_CBINTERRUPTED;
+ if (tmp_err)
+ g_propagate_prefixed_error(pd->err,
+ tmp_err,
+ "Parsing interrupted:");
+ else
+ g_set_error(pd->err, CR_XML_PARSER_FIL_ERROR, CRE_CBINTERRUPTED,
+ "Parsing interrupted");
+ }
+
+ /* TODO: Insert name and pkg id to the package */
+ break;
+ }
+
+ case STATE_VERSION:
+ /* TODO: Parse version */
+ break;
+
+ case STATE_FILE: {
+ const char *type = cr_find_attr("type", attr);
+ pd->last_file_type = FILE_FILE;
+ if (type) {
+ if (!strcmp(type, "dir"))
+ pd->last_file_type = FILE_DIR;
+ else if (!strcmp(type, "ghost"))
+ pd->last_file_type = FILE_GHOST;
+ else
+ g_string_append_printf(pd->msgs,
+ "Unknown file type \"%s\";",
+ type);
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+}
+
+static void XMLCALL
+cr_end_handler(void *pdata, const char *element)
+{
+ cr_ParserData *pd = pdata;
+ GError *tmp_err = NULL;
+ unsigned int state = pd->state;
+
+ CR_UNUSED(element);
+
+ if (pd->ret != CRE_OK)
+ return; /* There was an error -> do nothing */
+
+ if (pd->depth != pd->statedepth) {
+ /* Back from the unknown state */
+ pd->depth--;
+ return;
+ }
+
+ pd->depth--;
+ pd->statedepth--;
+ pd->state = pd->sbtab[pd->state];
+ pd->docontent = 0;
+
+ switch (state) {
+ case STATE_START:
+ case STATE_FILELISTS:
+ case STATE_VERSION:
+ break;
+
+ case STATE_PACKAGE:
+ if (!pd->pkg)
+ return;
+
+ if (pd->pkgcb(pd->pkg, pd->pkgcb_data, &tmp_err)) {
+ pd->ret = CRE_CBINTERRUPTED;
+ if (tmp_err)
+ g_propagate_prefixed_error(pd->err,
+ tmp_err,
+ "Parsing interrupted:");
+ else
+ g_set_error(pd->err, CR_XML_PARSER_FIL_ERROR, CRE_CBINTERRUPTED,
+ "Parsing interrupted");
+ }
+ pd->pkg = NULL;
+ break;
+
+ case STATE_FILE: {
+ if (!pd->pkg || !pd->content)
+ break;
+
+ cr_PackageFile *pkg_file = cr_package_file_new();
+ pkg_file->name = cr_safe_string_chunk_insert(pd->pkg->chunk,
+ cr_get_filename(pd->content));
+ pkg_file->path = cr_safe_string_chunk_insert(pd->pkg->chunk,
+ pd->content);
+ switch (pd->last_file_type) {
+ case FILE_FILE: pkg_file->type = NULL; break; // NULL => "file"
+ case FILE_DIR: pkg_file->type = "dir"; break;
+ case FILE_GHOST: pkg_file->type = "ghost"; break;
+ }
+
+ pd->pkg->files = g_slist_prepend(pd->pkg->files, pkg_file);
+ break;
+ }
+
+ default:
+ break;
+ }
+}
+
+int
+cr_xml_parse_filelists(const char *path,
+ cr_XmlParserNewPkgCb newpkgcb,
+ void *newpkgcb_data,
+ cr_XmlParserPkgCb pkgcb,
+ void *pkgcb_data,
+ GError **err)
+{
+ int ret = CRE_OK;
+ CR_FILE *f;
+ cr_ParserData *pd;
+ XML_Parser parser;
+
+ assert(path);
+ assert(pkgcb);
+ assert(!err || *err == NULL);
+
+ if (!newpkgcb)
+ newpkgcb = cr_newpkgcb;
+
+ f = cr_open(path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION);
+ if (!f) {
+ g_set_error(err, CR_XML_PARSER_FIL_ERROR, CRE_IO, "Cannot open %s", path);
+ return CRE_IO;
+ }
+
+ parser = XML_ParserCreate(NULL);
+ XML_SetElementHandler(parser, cr_start_handler, cr_end_handler);
+ XML_SetCharacterDataHandler(parser, cr_char_handler);
+
+ pd = cr_xml_parser_data();
+ pd->parser = &parser;
+ pd->state = STATE_START;
+ pd->newpkgcb_data = newpkgcb_data;
+ pd->newpkgcb = newpkgcb;
+ pd->pkgcb_data = pkgcb_data;
+ pd->pkgcb = pkgcb;
+ pd->swtab = g_malloc0(sizeof(cr_StatesSwitch *) * NUMSTATES);
+ pd->sbtab = g_malloc(sizeof(cr_FilState) * NUMSTATES);
+ for (cr_StatesSwitch *sw = stateswitches; sw->from != NUMSTATES; sw++) {
+ if (!pd->swtab[sw->from])
+ pd->swtab[sw->from] = sw;
+ pd->sbtab[sw->to] = sw->from;
+ }
+
+ XML_SetUserData(parser, pd);
+
+ while (1) {
+ int len;
+ void *buf = XML_GetBuffer(parser, XML_BUFFER_SIZE);
+ if (!buf) {
+ ret = CRE_MEMORY;
+ g_set_error(err, CR_XML_PARSER_FIL_ERROR, CRE_MEMORY,
+ "Out of memory: Cannot allocate buffer for xml parser");
+ break;
+ }
+
+ len = cr_read(f, buf, XML_BUFFER_SIZE);
+ if (len < 0) {
+ ret = CRE_IO;
+ g_critical("%s: Cannot read for parsing : %s\n",
+ __func__, strerror(errno));
+ g_set_error(err, CR_XML_PARSER_FIL_ERROR, CRE_IO,
+ "Error while reading xml");
+ break;
+ }
+
+ if (!XML_ParseBuffer(parser, len, len == 0)) {
+ ret = CRE_XMLPARSER;
+ g_critical("%s: parsing error: %s\n",
+ __func__, XML_ErrorString(XML_GetErrorCode(parser)));
+ g_set_error(err, CR_XML_PARSER_FIL_ERROR, CRE_XMLPARSER,
+ "Parse error at line: %d (%s)",
+ (int) XML_GetCurrentLineNumber(parser),
+ (char *) XML_ErrorString(XML_GetErrorCode(parser)));
+ break;
+ }
+
+ if (len == 0)
+ break;
+
+ if (pd->ret != CRE_OK) {
+ ret = pd->ret;
+ break;
+ }
+ }
+
+ cr_xml_parser_data_free(pd);
+ XML_ParserFree(parser);
+
+ return ret;
+}
--- /dev/null
+/* createrepo_c - Library of routines for manipulation with repodata
+ * Copyright (C) 2013 Tomas Mlcoch
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+ * USA.
+ */
+
+#ifndef __C_CREATEREPOLIB_XML_PARSER_INTERNAL_H__
+#define __C_CREATEREPOLIB_XML_PARSER_INTERNAL_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <string.h>
+#include <expat.h>
+#include "xml_parser.h"
+#include "error.h"
+#include "package.h"
+
+#define XML_BUFFER_SIZE 8192
+#define CONTENT_REALLOC_STEP 256
+
+typedef enum {
+ FILE_FILE,
+ FILE_DIR,
+ FILE_GHOST,
+ FILE_SENTINEL,
+} cr_FileType;
+
+typedef struct {
+ unsigned int from; /*!< State (current tag) */
+ char *ename; /*!< String name of sub-tag */
+ unsigned int to; /*!< State of sub-tag */
+ int docontent; /*!< Read text content of element? */
+} cr_StatesSwitch;
+
+typedef struct _cr_ParserData {
+ int ret; /*!< status of parsing (return code) */
+ int depth;
+ int statedepth;
+ unsigned int state; /*!< current state */
+
+ /* Tag content related values */
+
+ int docontent; /*!< Store text content of the current element? */
+ char *content; /*!< Text content of the element */
+ int lcontent; /*!< The content lenght */
+ int acontent; /*!< Available bytes in the content */
+
+ XML_Parser *parser; /*!< The parser */
+ cr_StatesSwitch **swtab; /*!< Pointers to statesswitches table */
+ unsigned int *sbtab; /*!< stab[to_state] = from_state */
+
+ /* Package stuff */
+
+ void *newpkgcb_data; /*!<
+ User data for the newpkgcb. */
+ cr_XmlParserNewPkgCb newpkgcb; /*!<
+ Callback called to get (create new or use existing from a previous
+ parsing of other or primary xml file) pkg object for the currently
+ loaded pkg. */
+ void *pkgcb_data; /*!<
+ User data for the pkgcb. */
+ cr_XmlParserPkgCb pkgcb; /*!<
+ Callback called when a signel pkg data are completly parsed. */
+ cr_Package *pkg; /*!<
+ The package which is currently loaded. */
+ GString *msgs; /*!<
+ Messages from xml parser (warnings about unknown elements etc.) */
+ GError **err; /*!<
+ Error message */
+
+ /* Filelists related stuff */
+
+ int last_file_type;
+} cr_ParserData;
+
+cr_ParserData *cr_xml_parser_data();
+
+void cr_xml_parser_data_free(cr_ParserData *pd);
+
+static inline const char *
+cr_find_attr(const char *name, const char **attr)
+{
+ while (*attr) {
+ if (!strcmp(name, *attr))
+ return attr[1];
+ attr += 2;
+ }
+
+ return NULL;
+}
+
+void XMLCALL cr_char_handler(void *pdata, const XML_Char *s, int len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __C_CREATEREPOLIB_XML_PARSER_INTERNAL_H__ */
TARGET_LINK_LIBRARIES(test_xml_file libcreaterepo_c ${GLIB2_LIBRARIES})
ADD_DEPENDENCIES(tests test_xml_file)
+ADD_EXECUTABLE(test_xml_parser_filelists test_xml_parser_filelists.c)
+TARGET_LINK_LIBRARIES(test_xml_parser_filelists libcreaterepo_c ${GLIB2_LIBRARIES})
+ADD_DEPENDENCIES(tests test_xml_parser_filelists)
+
CONFIGURE_FILE("run_gtester.sh.in" "run_gtester.sh")
ADD_TEST(test_main run_gtester.sh)
--- /dev/null
+/* createrepo_c - Library of routines for manipulation with repodata
+ * Copyright (C) 2013 Tomas Mlcoch
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+ * USA.
+ */
+
+#include <glib.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "fixtures.h"
+#include "createrepo/error.h"
+#include "createrepo/package.h"
+#include "createrepo/misc.h"
+#include "createrepo/xml_parser.h"
+
+static int
+pkgcb(cr_Package *pkg, void *cbdata, GError **err)
+{
+ g_assert(!err || *err == NULL);
+ if (cbdata) *((int *)cbdata) += 1;
+ return CRE_OK;
+}
+
+static void test_cr_xml_parse_filelists_00(void)
+{
+ int ret;
+ GError *tmp_err = NULL;
+
+ ret = cr_xml_parse_filelists(TEST_REPO_00_FILELISTS,
+ NULL,
+ NULL,
+ pkgcb,
+ NULL,
+ &tmp_err);
+
+ g_assert(tmp_err == NULL);
+ g_assert_cmpint(ret, ==, CRE_OK);
+}
+
+static void test_cr_xml_parse_filelists_01(void)
+{
+ int ret;
+ int parsed = 0;
+ GError *tmp_err = NULL;
+
+ ret = cr_xml_parse_filelists(TEST_REPO_01_FILELISTS,
+ NULL,
+ NULL,
+ pkgcb,
+ &parsed,
+ &tmp_err);
+
+ g_assert(tmp_err == NULL);
+ g_assert_cmpint(ret, ==, CRE_OK);
+ g_assert_cmpint(parsed, ==, 1);
+}
+
+static void test_cr_xml_parse_filelists_02(void)
+{
+ int ret;
+ int parsed = 0;
+ GError *tmp_err = NULL;
+
+ ret = cr_xml_parse_filelists(TEST_REPO_02_FILELISTS,
+ NULL,
+ NULL,
+ pkgcb,
+ &parsed,
+ &tmp_err);
+
+ g_assert(tmp_err == NULL);
+ g_assert_cmpint(ret, ==, CRE_OK);
+ g_assert_cmpint(parsed, ==, 2);
+}
+
+int main(int argc, char *argv[])
+{
+ g_test_init(&argc, &argv, NULL);
+
+ g_test_add_func("/xml_parser_filelists/test_cr_xml_parse_filelists_00",
+ test_cr_xml_parse_filelists_00);
+ g_test_add_func("/xml_parser_filelists/test_cr_xml_parse_filelists_01",
+ test_cr_xml_parse_filelists_01);
+ g_test_add_func("/xml_parser_filelists/test_cr_xml_parse_filelists_02",
+ test_cr_xml_parse_filelists_02);
+ return g_test_run();
+}