4 * XML parser abstraction
6 * Copyright (c) 2017, Novell Inc.
8 * This program is licensed under the BSD license, read LICENSE.BSD
9 * for further information
12 #include <sys/types.h>
18 #include <libxml/parser.h>
25 #include "solv_xmlparser.h"
28 add_contentspace(struct solv_xmlparser *xmlp, int l)
30 l += xmlp->lcontent + 1; /* plus room for trailing zero */
31 if (l > xmlp->acontent)
33 xmlp->acontent = l + 256;
34 xmlp->content = solv_realloc(xmlp->content, xmlp->acontent);
41 character_data(void *userData, const xmlChar *s, int len)
44 character_data(void *userData, const XML_Char *s, int len)
47 struct solv_xmlparser *xmlp = userData;
49 if (!xmlp->docontent || !len)
51 add_contentspace(xmlp, len);
52 memcpy(xmlp->content + xmlp->lcontent, s, len);
53 xmlp->lcontent += len;
57 static void fixup_att_inplace(char *at)
59 while ((at = strchr(at, '&')) != 0)
62 if (!memcmp(at, "#38;", 4))
63 memmove(at, at + 4, strlen(at + 4) + 1);
67 static const xmlChar **fixup_atts(struct solv_xmlparser *xmlp, const xmlChar **atts)
73 for (natts = 0; atts[natts]; natts++)
74 if (strchr((char *)atts[natts], '&'))
75 needsize += strlen((const char *)atts[natts]) + 1;
78 at = xmlp->attsdata = solv_realloc(xmlp->attsdata, (natts + 1) * sizeof(xmlChar *) + needsize);
79 needsize = (natts + 1) * sizeof(xmlChar *);
80 for (natts = 0; atts[natts]; natts++)
82 at[natts] = (char *)atts[natts];
83 if (strchr(at[natts], '&'))
85 size_t l = strlen(at[natts]) + 1;
86 memcpy((char *)at + needsize, at[natts], l);
87 at[natts] = (char *)at + needsize;
89 fixup_att_inplace(at[natts]);
93 return (const xmlChar **)at;
99 start_element(void *userData, const xmlChar *name, const xmlChar **atts)
102 start_element(void *userData, const char *name, const char **atts)
105 struct solv_xmlparser *xmlp = userData;
106 struct solv_xmlparser_element *elements;
108 struct solv_xmlparser_element *el;
111 if (xmlp->unknowncnt)
116 elementhelper = xmlp->elementhelper;
117 elements = xmlp->elements;
118 oldstate = xmlp->state;
119 for (i = elementhelper[xmlp->nelements + oldstate]; i; i = elementhelper[i - 1])
120 if (!strcmp(elements[i - 1].element, (char *)name))
125 fprintf(stderr, "into unknown: %s\n", name);
130 el = xmlp->elements + i - 1;
131 queue_push(&xmlp->elementq, xmlp->state);
132 xmlp->state = el->tostate;
133 xmlp->docontent = el->docontent;
138 static const char *nullattr;
139 atts = (const xmlChar **)&nullattr;
141 else if (xmlp->state != oldstate)
142 atts = fixup_atts(xmlp, atts);
144 if (xmlp->state != oldstate)
145 xmlp->startelement(xmlp, xmlp->state, el->element, (const char **)atts);
150 end_element(void *userData, const xmlChar *name)
153 end_element(void *userData, const char *name)
156 struct solv_xmlparser *xmlp = userData;
158 if (xmlp->unknowncnt)
165 xmlp->content[xmlp->lcontent] = 0;
166 if (xmlp->elementq.count && xmlp->state != xmlp->elementq.elements[xmlp->elementq.count - 1])
167 xmlp->endelement(xmlp, xmlp->state, xmlp->content);
168 xmlp->state = queue_pop(&xmlp->elementq);
174 solv_xmlparser_init(struct solv_xmlparser *xmlp,
175 struct solv_xmlparser_element *elements,
177 void (*startelement)(struct solv_xmlparser *, int state, const char *name, const char **atts),
178 void (*endelement)(struct solv_xmlparser *, int state, char *content))
180 int i, nstates, nelements;
181 struct solv_xmlparser_element *el;
184 memset(xmlp, 0, sizeof(*xmlp));
187 for (el = elements; el->element; el++)
190 if (el->fromstate > nstates)
191 nstates = el->fromstate;
192 if (el->tostate > nstates)
193 nstates = el->tostate;
197 xmlp->elements = elements;
198 xmlp->nelements = nelements;
199 elementhelper = solv_calloc(nelements + nstates, sizeof(Id));
200 for (i = nelements - 1; i >= 0; i--)
202 int fromstate = elements[i].fromstate;
203 elementhelper[i] = elementhelper[nelements + fromstate];
204 elementhelper[nelements + fromstate] = i + 1;
206 xmlp->elementhelper = elementhelper;
207 queue_init(&xmlp->elementq);
208 xmlp->acontent = 256;
209 xmlp->content = solv_malloc(xmlp->acontent);
211 xmlp->userdata = userdata;
212 xmlp->startelement = startelement;
213 xmlp->endelement = endelement;
217 solv_xmlparser_free(struct solv_xmlparser *xmlp)
219 xmlp->elementhelper = solv_free(xmlp->elementhelper);
220 queue_free(&xmlp->elementq);
221 xmlp->content = solv_free(xmlp->content);
222 xmlp->errstr = solv_free(xmlp->errstr);
223 xmlp->attsdata = solv_free(xmlp->attsdata);
227 set_error(struct solv_xmlparser *xmlp, const char *errstr, unsigned int line, unsigned int column)
229 solv_free(xmlp->errstr);
230 xmlp->errstr = solv_strdup(errstr);
232 xmlp->column = column;
238 create_parser(struct solv_xmlparser *xmlp)
240 /* delayed to parse_block so that we have the first bytes */
245 free_parser(struct solv_xmlparser *xmlp)
248 xmlFreeParserCtxt(xmlp->parser);
252 static xmlParserCtxtPtr
253 create_parser_ctx(struct solv_xmlparser *xmlp, char *buf, int l)
256 memset(&sax, 0, sizeof(sax));
257 sax.startElement = start_element;
258 sax.endElement = end_element;
259 sax.characters = character_data;
260 return xmlCreatePushParserCtxt(&sax, xmlp, buf, l, NULL);
264 parse_block(struct solv_xmlparser *xmlp, char *buf, int l)
268 int l2 = l > 4 ? 4 : 0;
269 xmlp->parser = create_parser_ctx(xmlp, buf, l2);
272 set_error(xmlp, "could not create parser", 0, 0);
280 if (xmlParseChunk(xmlp->parser, buf, l, l == 0 ? 1 : 0))
282 xmlErrorPtr err = xmlCtxtGetLastError(xmlp->parser);
283 set_error(xmlp, err->message, err->line, err->int2);
290 solv_xmlparser_lineno(struct solv_xmlparser *xmlp)
292 return (unsigned int)xmlSAX2GetLineNumber(xmlp->parser);
298 create_parser(struct solv_xmlparser *xmlp)
300 xmlp->parser = XML_ParserCreate(NULL);
303 XML_SetUserData(xmlp->parser, xmlp);
304 XML_SetElementHandler(xmlp->parser, start_element, end_element);
305 XML_SetCharacterDataHandler(xmlp->parser, character_data);
310 free_parser(struct solv_xmlparser *xmlp)
312 XML_ParserFree(xmlp->parser);
317 parse_block(struct solv_xmlparser *xmlp, char *buf, int l)
319 if (XML_Parse(xmlp->parser, buf, l, l == 0) == XML_STATUS_ERROR)
321 set_error(xmlp, XML_ErrorString(XML_GetErrorCode(xmlp->parser)), XML_GetCurrentLineNumber(xmlp->parser), XML_GetCurrentColumnNumber(xmlp->parser));
328 solv_xmlparser_lineno(struct solv_xmlparser *xmlp)
330 return (unsigned int)XML_GetCurrentLineNumber(xmlp->parser);
336 solv_xmlparser_parse(struct solv_xmlparser *xmlp, FILE *fp)
339 int l, ret = SOLV_XMLPARSER_OK;
342 xmlp->unknowncnt = 0;
345 queue_empty(&xmlp->elementq);
347 if (!create_parser(xmlp))
349 set_error(xmlp, "could not create parser", 0, 0);
350 return SOLV_XMLPARSER_ERROR;
354 l = fread(buf, 1, sizeof(buf), fp);
355 if (!parse_block(xmlp, buf, l))
357 ret = SOLV_XMLPARSER_ERROR;
368 solv_xmlparser_contentspace(struct solv_xmlparser *xmlp, int l)
371 if (l > xmlp->acontent)
373 xmlp->acontent = l + 256;
374 xmlp->content = solv_realloc(xmlp->content, xmlp->acontent);
376 return xmlp->content;