1 /* EINA - EFL data type library
2 * Copyright (C) 2011 Gustavo Sverzut Barbieri
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library;
17 * if not, see <http://www.gnu.org/licenses/>.
20 #ifndef EINA_SIMPLE_XML_H_
21 #define EINA_SIMPLE_XML_H_
23 #include "eina_config.h"
25 #include "eina_types.h"
26 #include "eina_magic.h"
27 #include "eina_inlist.h"
30 * @page eina_simple_xml_parser_example_01_page
31 * @dontinclude eina_simple_xml_parser_01.c
33 * We are going to parse an XML sample file and print the data to stdout.
35 * Like all examples we start by including Eina:
38 * We declare 2 booleans to keep track of tags:
40 * @skipline tag_message
42 * Here we declare some variables and initialize eina:
45 * We fill buffer with the XML data from chat.xml:
48 * We will use an Eina_Array to store the data:
51 * Here we call eina_simple_xml_parse(). We pass the buffer with data, its size,
52 * we ask to strip leading and trailing whitespace, we give the callback
53 * function and the array to store the formatted data:
56 * This will loop over the array and print the data using _print callback:
59 * This is the main XML parser callback, it will check for known tags and get
60 * the corresponding values:
64 * We first check for opening tag:
67 * If we know the tag should have attributes, then we find them using
68 * eina_simple_xml_tag_attributes_find() and give them to another parsing
69 * function using eina_simple_xml_attributes_parse():
72 * We check for other known tags:
75 * We then check data for corresponding tag:
78 * We are doing the formatting in same time and put all the \<post\> children
82 * Finally, we store our string in the array:
85 * This is the callback to parse the attributes, we check for key name and keep
90 * This is the function that simply print items of the array:
93 * You can see the full source code
94 * @ref eina_simple_xml_parser_example_01 "here".
98 * @page eina_simple_xml_parser_example_01
99 * @include eina_simple_xml_parser_01.c
100 * @example eina_simple_xml_parser_01.c
104 * @defgroup Eina_Simple_XML_Group Simple_XML
106 * Simplistic relaxed SAX-like XML parser.
108 * This parser is far from being compliant with XML standard, but will
109 * do for most XMLs out there. If you know that your format is simple
110 * and will not vary in future with strange corner cases, then you can
113 * The parser is SAX like, that is, it will tokenize contents and call
114 * you back so you can take some action. No contents are allocated
115 * during this parser work and it's not recursive, so you can use it
116 * with a very large document without worries.
118 * It will not validate the document anyhow, neither it will create a
119 * tree hierarchy. That's up to you.
121 * Accordingly to XML, open tags may contain attributes. This parser
122 * will not tokenize this. If you want you can use
123 * eina_simple_xml_tag_attributes_find() and then
124 * eina_simple_xml_attributes_parse().
126 * For more information, see
127 * @ref eina_simple_xml_parser_example_01_page "this example".
131 * @addtogroup Eina_Tools_Group Tools
137 * @defgroup Eina_Simple_XML_Group Simple_XML
142 typedef struct _Eina_Simple_XML_Node Eina_Simple_XML_Node;
143 typedef struct _Eina_Simple_XML_Node_Tag Eina_Simple_XML_Node_Root;
144 typedef struct _Eina_Simple_XML_Node_Tag Eina_Simple_XML_Node_Tag;
145 typedef struct _Eina_Simple_XML_Node_Data Eina_Simple_XML_Node_Data;
146 typedef struct _Eina_Simple_XML_Node_Data Eina_Simple_XML_Node_CData;
147 typedef struct _Eina_Simple_XML_Node_Data Eina_Simple_XML_Node_Processing;
148 typedef struct _Eina_Simple_XML_Node_Data Eina_Simple_XML_Node_Doctype;
149 typedef struct _Eina_Simple_XML_Node_Data Eina_Simple_XML_Node_Comment;
150 typedef struct _Eina_Simple_XML_Attribute Eina_Simple_XML_Attribute;
152 struct _Eina_Simple_XML_Attribute
157 Eina_Simple_XML_Node_Tag *parent;
162 typedef enum _Eina_Simple_XML_Node_Type
164 EINA_SIMPLE_XML_NODE_ROOT = 0,
165 EINA_SIMPLE_XML_NODE_TAG,
166 EINA_SIMPLE_XML_NODE_DATA,
167 EINA_SIMPLE_XML_NODE_CDATA,
168 EINA_SIMPLE_XML_NODE_PROCESSING,
169 EINA_SIMPLE_XML_NODE_DOCTYPE,
170 EINA_SIMPLE_XML_NODE_COMMENT
171 } Eina_Simple_XML_Node_Type;
173 struct _Eina_Simple_XML_Node
178 Eina_Simple_XML_Node_Tag *parent;
179 Eina_Simple_XML_Node_Type type;
182 struct _Eina_Simple_XML_Node_Tag
184 Eina_Simple_XML_Node base;
185 Eina_Inlist *children;
186 Eina_Inlist *attributes;
190 struct _Eina_Simple_XML_Node_Data
192 Eina_Simple_XML_Node base;
197 * @typedef _Eina_Simple_XML_Type
200 typedef enum _Eina_Simple_XML_Type
202 EINA_SIMPLE_XML_OPEN = 0, /*!< \<tag attribute="value"\> */
203 EINA_SIMPLE_XML_OPEN_EMPTY, /*!< \<tag attribute="value" /\> */
204 EINA_SIMPLE_XML_CLOSE, /*!< \</tag\> */
205 EINA_SIMPLE_XML_DATA, /*!< tag text data */
206 EINA_SIMPLE_XML_CDATA, /*!< \<![CDATA[something]]\> */
207 EINA_SIMPLE_XML_ERROR, /*!< error contents */
208 EINA_SIMPLE_XML_PROCESSING, /*!< \<?xml ... ?\> \<?php .. ?\> */
209 EINA_SIMPLE_XML_DOCTYPE, /*!< \<!DOCTYPE html */
210 EINA_SIMPLE_XML_COMMENT, /*!< \<!-- something --\> */
211 EINA_SIMPLE_XML_IGNORED /*!< whatever is ignored by parser, like whitespace */
212 } Eina_Simple_XML_Type;
214 typedef Eina_Bool (*Eina_Simple_XML_Cb)(void *data, Eina_Simple_XML_Type type, const char *content, unsigned offset, unsigned length);
215 typedef Eina_Bool (*Eina_Simple_XML_Attribute_Cb)(void *data, const char *key, const char *value);
219 * Parse a section of XML string text
221 * @param buf the input string. May not contain \0 terminator.
222 * @param buflen the input string size.
223 * @param strip whenever this parser should strip leading and trailing
224 * whitespace. These whitespace will still be issued, but as type
225 * #EINA_SIMPLE_XML_IGNORED.
226 * @param func what to call back while parse to do some action. The
227 * first parameter is the given user @a data, the second is the
228 * token type, the third is the pointer to content start (it's
229 * not a NULL terminated string!), the forth is where this
230 * content is located inside @a buf (does not include tag
231 * start, for instance "<!DOCTYPE value>" the offset points at
232 * "value"), the fifth is the size of the content. Whenever this
233 * function return #EINA_FALSE the parser will abort. @param
234 * data what to give as context to @a func.
236 * @return #EINA_TRUE on success or #EINA_FALSE if it was aborted by user or
239 EAPI Eina_Bool eina_simple_xml_parse(const char *buf, unsigned buflen,
241 Eina_Simple_XML_Cb func, const void *data);
245 * Given the contents of a tag, find where the attributes start.
247 * @param buf the input string. May not contain \0 terminator.
248 * @param buflen the input string size.
249 * @return pointer to the start of attributes, it can be used
250 * to feed eina_simple_xml_attributes_parse(). @c NULL is returned
251 * if no attributes were found.
253 * The tag contents is returned by eina_simple_xml_parse() when
254 * type is #EINA_SIMPLE_XML_OPEN or #EINA_SIMPLE_XML_OPEN_EMPTY.
257 EAPI const char * eina_simple_xml_tag_attributes_find(const char *buf, unsigned buflen);
260 * Given a buffer with xml attributes, parse them to key=value pairs.
262 * @param buf the input string. May not contain \0 terminator.
263 * @param buflen the input string size.
264 * @param func what to call back while parse to do some action. The
265 * first parameter is the given user @a data, the second is the
266 * key (null-terminated) and the last is the value (null
267 * terminated). These strings should not be modified and
268 * reference is just valid until the function return.
269 * @param data data to pass to the callback function.
271 * @return #EINA_TRUE on success or #EINA_FALSE if it was aborted by user or
274 EAPI Eina_Bool eina_simple_xml_attributes_parse(const char *buf, unsigned buflen,
275 Eina_Simple_XML_Attribute_Cb func, const void *data);
278 * Create (and append) new attribute to tag.
280 * @param parent if provided, will be set in the resulting structure
281 * as well as the attribute will be appended to attributes list.
282 * @param key Null-terminated string. Must not be @c NULL.
283 * @param value Null-terminated string. If @c NULL, the empty string will be used.
285 * @return Newly allocated memory or @c NULL on error. This memory should be
286 * released with eina_simple_xml_attribute_free() or indirectly
287 * with eina_simple_xml_node_tag_free().
289 EAPI Eina_Simple_XML_Attribute * eina_simple_xml_attribute_new(Eina_Simple_XML_Node_Tag *parent, const char *key, const char *value);
292 * Remove attribute from parent and delete it.
294 * @param attr attribute to release memory.
296 EAPI void eina_simple_xml_attribute_free(Eina_Simple_XML_Attribute *attr);
300 * Create new tag. If parent is provided, it is automatically appended.
302 * @param parent if provided, will be set in the resulting structure
303 * as well as the tag will be appended to children list.
304 * @param name Null-terminated string. Must not be @c NULL.
306 * @return Newly allocated memory or @c NULL on error. This memory should be
307 * released with eina_simple_xml_node_tag_free() or indirectly
308 * with eina_simple_xml_node_tag_free() of the parent.
310 EAPI Eina_Simple_XML_Node_Tag * eina_simple_xml_node_tag_new(Eina_Simple_XML_Node_Tag *parent, const char *name);
313 * Remove tag from parent and delete it.
315 * @param tag to release memory.
317 EAPI void eina_simple_xml_node_tag_free(Eina_Simple_XML_Node_Tag *tag);
321 * Create new data. If parent is provided, it is automatically appended.
323 * @param parent if provided, will be set in the resulting structure
324 * as well as the data will be appended to children list.
325 * @param contents String to be used. Must not be @c NULL.
326 * @param length size in bytes of @a contents.
328 * @return Newly allocated memory or NULL on error. This memory should be
329 * released with eina_simple_xml_node_data_free() or indirectly
330 * with eina_simple_xml_node_tag_free() of the parent.
332 EAPI Eina_Simple_XML_Node_Data * eina_simple_xml_node_data_new(Eina_Simple_XML_Node_Tag *parent, const char *contents, size_t length);
335 * Remove data from parent and delete it.
337 * @param node to release memory.
339 EAPI void eina_simple_xml_node_data_free(Eina_Simple_XML_Node_Data *node);
343 * Create new cdata. If parent is provided, it is automatically appended.
345 * @param parent if provided, will be set in the resulting structure
346 * as well as the cdata will be appended to children list.
347 * @param contents String to be used. Must not be @c NULL.
348 * @param length size in bytes of @a content.
350 * @return Newly allocated memory or @c NULL on error. This memory should be
351 * released with eina_simple_xml_node_cdata_free() or indirectly
352 * with eina_simple_xml_node_tag_free() of the parent.
354 EAPI Eina_Simple_XML_Node_CData * eina_simple_xml_node_cdata_new(Eina_Simple_XML_Node_Tag *parent, const char *contents, size_t length);
357 * Remove cdata from parent and delete it.
359 * @param node to release memory.
361 EAPI void eina_simple_xml_node_cdata_free(Eina_Simple_XML_Node_Data *node);
365 * Create new processing. If parent is provided, it is automatically appended.
367 * @param parent if provided, will be set in the resulting structure
368 * as well as the processing will be appended to children list.
369 * @param contents String to be used. Must not be @c NULL.
370 * @param length size in bytes of @a contents.
372 * @return Newly allocated memory or @c NULL on error. This memory should be
373 * released with eina_simple_xml_node_processing_free() or indirectly
374 * with eina_simple_xml_node_tag_free() of the parent.
376 EAPI Eina_Simple_XML_Node_Processing * eina_simple_xml_node_processing_new(Eina_Simple_XML_Node_Tag *parent, const char *contents, size_t length);
379 * Remove processing from parent and delete it.
381 * @param node processing to release memory.
383 EAPI void eina_simple_xml_node_processing_free(Eina_Simple_XML_Node_Data *node);
387 * Create new doctype. If parent is provided, it is automatically appended.
389 * @param parent if provided, will be set in the resulting structure
390 * as well as the doctype will be appended to children list.
391 * @param contents String to be used. Must not be @c NULL.
392 * @param length size in bytes of @a contents.
394 * @return Newly allocated memory or @c NULL on error. This memory should be
395 * released with eina_simple_xml_node_doctype_free() or indirectly
396 * with eina_simple_xml_node_tag_free() of the parent.
398 EAPI Eina_Simple_XML_Node_Doctype * eina_simple_xml_node_doctype_new(Eina_Simple_XML_Node_Tag *parent, const char *contents, size_t length);
401 * Remove doctype from parent and delete it.
403 * @param node doctype to release memory.
405 EAPI void eina_simple_xml_node_doctype_free(Eina_Simple_XML_Node_Data *node);
409 * Create new comment. If parent is provided, it is automatically appended.
411 * @param parent if provided, will be set in the resulting structure
412 * as well as the comment will be appended to children list.
413 * @param contents String to be used. Must not be @c NULL.
414 * @param length size in bytes of @a contents.
416 * @return Newly allocated memory or @c NULL on error. This memory should be
417 * released with eina_simple_xml_node_comment_free() or indirectly
418 * with eina_simple_xml_node_tag_free() of the parent.
420 EAPI Eina_Simple_XML_Node_Comment * eina_simple_xml_node_comment_new(Eina_Simple_XML_Node_Tag *parent, const char *contents, size_t length);
423 * Remove comment from parent and delete it.
425 * @param node comment to release memory.
427 EAPI void eina_simple_xml_node_comment_free(Eina_Simple_XML_Node_Data *node);
431 * Load a XML node tree based on the given string.
433 * @param buf the input string. May not contain \0 terminator.
434 * @param buflen the input string size.
435 * @param strip whenever this parser should strip leading and trailing
438 * @return Document root with children tags, or @c NULL on errors.
439 * Document with errors may return partial tree instead of @c NULL,
440 * we'll do our best to avoid returning nothing.
442 EAPI Eina_Simple_XML_Node_Root * eina_simple_xml_node_load(const char *buf, unsigned buflen, Eina_Bool strip);
445 * Free node tree build with eina_simple_xml_node_load()
447 * @param root memory returned by eina_simple_xml_node_load()
449 EAPI void eina_simple_xml_node_root_free(Eina_Simple_XML_Node_Root *root);
452 * Converts the node tree under the given element to a XML string.
454 * @param node the base node to convert.
455 * @param indent Indentation string, or @c NULL to disable it.
457 * @return @c NULL on errors or a newly allocated string on success.
459 EAPI char * eina_simple_xml_node_dump(Eina_Simple_XML_Node *node, const char *indent);
470 #endif /* EINA_SIMPLE_XML_H_ */