2 * Summary: the core parser module
3 * Description: Interfaces, constants and types related to the XML parser
5 * Copy: See Copyright for the status of this software.
7 * Author: Daniel Veillard
10 #ifndef __XML_PARSER_H__
11 #define __XML_PARSER_H__
15 #include <libxml/xmlversion.h>
16 #include <libxml/tree.h>
17 #include <libxml/dict.h>
18 #include <libxml/hash.h>
19 #include <libxml/valid.h>
20 #include <libxml/entities.h>
21 #include <libxml/xmlerror.h>
22 #include <libxml/xmlstring.h>
29 * XML_DEFAULT_VERSION:
31 * The default version of XML used: 1.0
33 #define XML_DEFAULT_VERSION "1.0"
38 * An xmlParserInput is an input flow for the XML processor.
39 * Each entity parsed is associated an xmlParserInput (except the
40 * few predefined ones). This is the case both for internal entities
41 * - in which case the flow is already completely in memory - or
42 * external entities - in which case we use the buf structure for
43 * progressive reading and I18N conversions to the internal UTF-8 format.
47 * xmlParserInputDeallocate:
48 * @str: the string to deallocate
50 * Callback for freeing some parser input allocations.
52 typedef void (* xmlParserInputDeallocate)(xmlChar *str);
54 struct _xmlParserInput {
56 xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */
58 const char *filename; /* The file analyzed, if any */
59 const char *directory; /* the directory/base of the file */
60 const xmlChar *base; /* Base of the array to parse */
61 const xmlChar *cur; /* Current char being parsed */
62 const xmlChar *end; /* end of the array to parse */
63 int length; /* length if known */
64 int line; /* Current line */
65 int col; /* Current column */
67 * NOTE: consumed is only tested for equality in the parser code,
68 * so even if there is an overflow this should not give troubles
69 * for parsing very large instances.
71 unsigned long consumed; /* How many xmlChars already consumed */
72 xmlParserInputDeallocate free; /* function to deallocate the base */
73 const xmlChar *encoding; /* the encoding string for entity */
74 const xmlChar *version; /* the version string for entity */
75 int standalone; /* Was that entity marked standalone */
76 int id; /* an unique identifier for the entity */
82 * The parser can be asked to collect Node informations, i.e. at what
83 * place in the file they were detected.
84 * NOTE: This is off by default and not very well tested.
86 typedef struct _xmlParserNodeInfo xmlParserNodeInfo;
87 typedef xmlParserNodeInfo *xmlParserNodeInfoPtr;
89 struct _xmlParserNodeInfo {
90 const struct _xmlNode* node;
91 /* Position & line # that text that created the node begins & ends on */
92 unsigned long begin_pos;
93 unsigned long begin_line;
94 unsigned long end_pos;
95 unsigned long end_line;
98 typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
99 typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
100 struct _xmlParserNodeInfoSeq {
101 unsigned long maximum;
102 unsigned long length;
103 xmlParserNodeInfo* buffer;
107 * xmlParserInputState:
109 * The parser is now working also as a state based parser.
110 * The recursive one use the state info for entities processing.
113 XML_PARSER_EOF = -1, /* nothing is to be parsed */
114 XML_PARSER_START = 0, /* nothing has been parsed */
115 XML_PARSER_MISC, /* Misc* before int subset */
116 XML_PARSER_PI, /* Within a processing instruction */
117 XML_PARSER_DTD, /* within some DTD content */
118 XML_PARSER_PROLOG, /* Misc* after internal subset */
119 XML_PARSER_COMMENT, /* within a comment */
120 XML_PARSER_START_TAG, /* within a start tag */
121 XML_PARSER_CONTENT, /* within the content */
122 XML_PARSER_CDATA_SECTION, /* within a CDATA section */
123 XML_PARSER_END_TAG, /* within a closing tag */
124 XML_PARSER_ENTITY_DECL, /* within an entity declaration */
125 XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */
126 XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */
127 XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */
128 XML_PARSER_EPILOG, /* the Misc* after the last end tag */
129 XML_PARSER_IGNORE, /* within an IGNORED section */
130 XML_PARSER_PUBLIC_LITERAL /* within a PUBLIC value */
131 } xmlParserInputState;
136 * Bit in the loadsubset context field to tell to do ID/REFs lookups.
137 * Use it to initialize xmlLoadExtDtdDefaultValue.
139 #define XML_DETECT_IDS 2
142 * XML_COMPLETE_ATTRS:
144 * Bit in the loadsubset context field to tell to do complete the
145 * elements attributes lists with the ones defaulted from the DTDs.
146 * Use it to initialize xmlLoadExtDtdDefaultValue.
148 #define XML_COMPLETE_ATTRS 4
153 * Bit in the loadsubset context field to tell to not do ID/REFs registration.
154 * Used to initialize xmlLoadExtDtdDefaultValue in some special cases.
156 #define XML_SKIP_IDS 8
161 * A parser can operate in various modes
164 XML_PARSE_UNKNOWN = 0,
167 XML_PARSE_PUSH_DOM = 3,
168 XML_PARSE_PUSH_SAX = 4,
175 * The parser context.
176 * NOTE This doesn't completely define the parser state, the (current ?)
177 * design of the parser uses recursive function calls since this allow
178 * and easy mapping from the production rules of the specification
179 * to the actual code. The drawback is that the actual function call
180 * also reflect the parser state. However most of the parsing routines
181 * takes as the only argument the parser context pointer, so migrating
182 * to a state based parser for progressive parsing shouldn't be too hard.
184 struct _xmlParserCtxt {
185 struct _xmlSAXHandler *sax; /* The SAX handler */
186 void *userData; /* For SAX interface only, used by DOM build */
187 xmlDocPtr myDoc; /* the document being built */
188 int wellFormed; /* is the document well formed */
189 int replaceEntities; /* shall we replace entities ? */
190 const xmlChar *version; /* the XML version string */
191 const xmlChar *encoding; /* the declared encoding, if any */
192 int standalone; /* standalone document */
193 int html; /* an HTML(1)/Docbook(2) document
194 * 3 is HTML after <head>
195 * 10 is HTML after <body>
198 /* Input stream stack */
199 xmlParserInputPtr input; /* Current input stream */
200 int inputNr; /* Number of current input streams */
201 int inputMax; /* Max number of input streams */
202 xmlParserInputPtr *inputTab; /* stack of inputs */
204 /* Node analysis stack only used for DOM building */
205 xmlNodePtr node; /* Current parsed Node */
206 int nodeNr; /* Depth of the parsing stack */
207 int nodeMax; /* Max depth of the parsing stack */
208 xmlNodePtr *nodeTab; /* array of nodes */
210 int record_info; /* Whether node info should be kept */
211 xmlParserNodeInfoSeq node_seq; /* info about each node parsed */
213 int errNo; /* error code */
215 int hasExternalSubset; /* reference and external subset */
216 int hasPErefs; /* the internal subset has PE refs */
217 int external; /* are we parsing an external entity */
219 int valid; /* is the document valid */
220 int validate; /* shall we try to validate ? */
221 xmlValidCtxt vctxt; /* The validity context */
223 xmlParserInputState instate; /* current type of input */
224 int token; /* next char look-ahead */
226 char *directory; /* the data directory */
228 /* Node name stack */
229 const xmlChar *name; /* Current parsed Node */
230 int nameNr; /* Depth of the parsing stack */
231 int nameMax; /* Max depth of the parsing stack */
232 const xmlChar * *nameTab; /* array of nodes */
234 long nbChars; /* number of xmlChar processed */
235 long checkIndex; /* used by progressive parsing lookup */
236 int keepBlanks; /* ugly but ... */
237 int disableSAX; /* SAX callbacks are disabled */
238 int inSubset; /* Parsing is in int 1/ext 2 subset */
239 const xmlChar * intSubName; /* name of subset */
240 xmlChar * extSubURI; /* URI of external subset */
241 xmlChar * extSubSystem; /* SYSTEM ID of external subset */
243 /* xml:space values */
244 int * space; /* Should the parser preserve spaces */
245 int spaceNr; /* Depth of the parsing stack */
246 int spaceMax; /* Max depth of the parsing stack */
247 int * spaceTab; /* array of space infos */
249 int depth; /* to prevent entity substitution loops */
250 xmlParserInputPtr entity; /* used to check entities boundaries */
251 int charset; /* encoding of the in-memory content
252 actually an xmlCharEncoding */
253 int nodelen; /* Those two fields are there to */
254 int nodemem; /* Speed up large node parsing */
255 int pedantic; /* signal pedantic warnings */
256 void *_private; /* For user data, libxml won't touch it */
258 int loadsubset; /* should the external subset be loaded */
259 int linenumbers; /* set line number in element content */
260 void *catalogs; /* document's own catalog */
261 int recovery; /* run in recovery mode */
262 int progressive; /* is this a progressive parsing */
263 xmlDictPtr dict; /* dictionnary for the parser */
264 const xmlChar * *atts; /* array for the attributes callbacks */
265 int maxatts; /* the size of the array */
266 int docdict; /* use strings from dict to build tree */
269 * pre-interned strings
271 const xmlChar *str_xml;
272 const xmlChar *str_xmlns;
273 const xmlChar *str_xml_ns;
276 * Everything below is used only by the new SAX mode
278 int sax2; /* operating in the new SAX mode */
279 int nsNr; /* the number of inherited namespaces */
280 int nsMax; /* the size of the arrays */
281 const xmlChar * *nsTab; /* the array of prefix/namespace name */
282 int *attallocs; /* which attribute were allocated */
283 void * *pushTab; /* array of data for push */
284 xmlHashTablePtr attsDefault; /* defaulted attributes if any */
285 xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */
286 int nsWellFormed; /* is the document XML Nanespace okay */
287 int options; /* Extra options */
290 * Those fields are needed only for treaming parsing so far
292 int dictNames; /* Use dictionary names for the tree */
293 int freeElemsNr; /* number of freed element nodes */
294 xmlNodePtr freeElems; /* List of freed element nodes */
295 int freeAttrsNr; /* number of freed attributes nodes */
296 xmlAttrPtr freeAttrs; /* List of freed attributes nodes */
299 * the complete error informations for the last error.
302 xmlParserMode parseMode; /* the parser mode */
303 unsigned long nbentities; /* number of entities references */
304 unsigned long sizeentities; /* size of parsed entities */
306 /* for use by HTML non-recursive parser */
307 xmlParserNodeInfo *nodeInfo; /* Current NodeInfo */
308 int nodeInfoNr; /* Depth of the parsing stack */
309 int nodeInfoMax; /* Max depth of the parsing stack */
310 xmlParserNodeInfo *nodeInfoTab; /* array of nodeInfos */
312 int input_id; /* we need to label inputs */
313 unsigned long sizeentcopy; /* volume of entity copy */
321 struct _xmlSAXLocator {
322 const xmlChar *(*getPublicId)(void *ctx);
323 const xmlChar *(*getSystemId)(void *ctx);
324 int (*getLineNumber)(void *ctx);
325 int (*getColumnNumber)(void *ctx);
331 * A SAX handler is bunch of callbacks called by the parser when processing
332 * of the input generate data or structure informations.
336 * resolveEntitySAXFunc:
337 * @ctx: the user data (XML parser context)
338 * @publicId: The public ID of the entity
339 * @systemId: The system ID of the entity
342 * The entity loader, to control the loading of external entities,
343 * the application can either:
344 * - override this resolveEntity() callback in the SAX block
345 * - or better use the xmlSetExternalEntityLoader() function to
346 * set up it's own entity resolution routine
348 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
350 typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
351 const xmlChar *publicId,
352 const xmlChar *systemId);
354 * internalSubsetSAXFunc:
355 * @ctx: the user data (XML parser context)
356 * @name: the root element name
357 * @ExternalID: the external ID
358 * @SystemID: the SYSTEM ID (e.g. filename or URL)
360 * Callback on internal subset declaration.
362 typedef void (*internalSubsetSAXFunc) (void *ctx,
364 const xmlChar *ExternalID,
365 const xmlChar *SystemID);
367 * externalSubsetSAXFunc:
368 * @ctx: the user data (XML parser context)
369 * @name: the root element name
370 * @ExternalID: the external ID
371 * @SystemID: the SYSTEM ID (e.g. filename or URL)
373 * Callback on external subset declaration.
375 typedef void (*externalSubsetSAXFunc) (void *ctx,
377 const xmlChar *ExternalID,
378 const xmlChar *SystemID);
381 * @ctx: the user data (XML parser context)
382 * @name: The entity name
384 * Get an entity by name.
386 * Returns the xmlEntityPtr if found.
388 typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
389 const xmlChar *name);
391 * getParameterEntitySAXFunc:
392 * @ctx: the user data (XML parser context)
393 * @name: The entity name
395 * Get a parameter entity by name.
397 * Returns the xmlEntityPtr if found.
399 typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
400 const xmlChar *name);
403 * @ctx: the user data (XML parser context)
404 * @name: the entity name
405 * @type: the entity type
406 * @publicId: The public ID of the entity
407 * @systemId: The system ID of the entity
408 * @content: the entity value (without processing).
410 * An entity definition has been parsed.
412 typedef void (*entityDeclSAXFunc) (void *ctx,
415 const xmlChar *publicId,
416 const xmlChar *systemId,
419 * notationDeclSAXFunc:
420 * @ctx: the user data (XML parser context)
421 * @name: The name of the notation
422 * @publicId: The public ID of the entity
423 * @systemId: The system ID of the entity
425 * What to do when a notation declaration has been parsed.
427 typedef void (*notationDeclSAXFunc)(void *ctx,
429 const xmlChar *publicId,
430 const xmlChar *systemId);
432 * attributeDeclSAXFunc:
433 * @ctx: the user data (XML parser context)
434 * @elem: the name of the element
435 * @fullname: the attribute name
436 * @type: the attribute type
437 * @def: the type of default value
438 * @defaultValue: the attribute default value
439 * @tree: the tree of enumerated value set
441 * An attribute definition has been parsed.
443 typedef void (*attributeDeclSAXFunc)(void *ctx,
445 const xmlChar *fullname,
448 const xmlChar *defaultValue,
449 xmlEnumerationPtr tree);
451 * elementDeclSAXFunc:
452 * @ctx: the user data (XML parser context)
453 * @name: the element name
454 * @type: the element type
455 * @content: the element value tree
457 * An element definition has been parsed.
459 typedef void (*elementDeclSAXFunc)(void *ctx,
462 xmlElementContentPtr content);
464 * unparsedEntityDeclSAXFunc:
465 * @ctx: the user data (XML parser context)
466 * @name: The name of the entity
467 * @publicId: The public ID of the entity
468 * @systemId: The system ID of the entity
469 * @notationName: the name of the notation
471 * What to do when an unparsed entity declaration is parsed.
473 typedef void (*unparsedEntityDeclSAXFunc)(void *ctx,
475 const xmlChar *publicId,
476 const xmlChar *systemId,
477 const xmlChar *notationName);
479 * setDocumentLocatorSAXFunc:
480 * @ctx: the user data (XML parser context)
481 * @loc: A SAX Locator
483 * Receive the document locator at startup, actually xmlDefaultSAXLocator.
484 * Everything is available on the context, so this is useless in our case.
486 typedef void (*setDocumentLocatorSAXFunc) (void *ctx,
487 xmlSAXLocatorPtr loc);
489 * startDocumentSAXFunc:
490 * @ctx: the user data (XML parser context)
492 * Called when the document start being processed.
494 typedef void (*startDocumentSAXFunc) (void *ctx);
496 * endDocumentSAXFunc:
497 * @ctx: the user data (XML parser context)
499 * Called when the document end has been detected.
501 typedef void (*endDocumentSAXFunc) (void *ctx);
503 * startElementSAXFunc:
504 * @ctx: the user data (XML parser context)
505 * @name: The element name, including namespace prefix
506 * @atts: An array of name/value attributes pairs, NULL terminated
508 * Called when an opening tag has been processed.
510 typedef void (*startElementSAXFunc) (void *ctx,
512 const xmlChar **atts);
515 * @ctx: the user data (XML parser context)
516 * @name: The element name
518 * Called when the end of an element has been detected.
520 typedef void (*endElementSAXFunc) (void *ctx,
521 const xmlChar *name);
524 * @ctx: the user data (XML parser context)
525 * @name: The attribute name, including namespace prefix
526 * @value: The attribute value
528 * Handle an attribute that has been read by the parser.
529 * The default handling is to convert the attribute into an
530 * DOM subtree and past it in a new xmlAttr element added to
533 typedef void (*attributeSAXFunc) (void *ctx,
535 const xmlChar *value);
538 * @ctx: the user data (XML parser context)
539 * @name: The entity name
541 * Called when an entity reference is detected.
543 typedef void (*referenceSAXFunc) (void *ctx,
544 const xmlChar *name);
547 * @ctx: the user data (XML parser context)
548 * @ch: a xmlChar string
549 * @len: the number of xmlChar
551 * Receiving some chars from the parser.
553 typedef void (*charactersSAXFunc) (void *ctx,
557 * ignorableWhitespaceSAXFunc:
558 * @ctx: the user data (XML parser context)
559 * @ch: a xmlChar string
560 * @len: the number of xmlChar
562 * Receiving some ignorable whitespaces from the parser.
563 * UNUSED: by default the DOM building will use characters.
565 typedef void (*ignorableWhitespaceSAXFunc) (void *ctx,
569 * processingInstructionSAXFunc:
570 * @ctx: the user data (XML parser context)
571 * @target: the target name
572 * @data: the PI data's
574 * A processing instruction has been parsed.
576 typedef void (*processingInstructionSAXFunc) (void *ctx,
577 const xmlChar *target,
578 const xmlChar *data);
581 * @ctx: the user data (XML parser context)
582 * @value: the comment content
584 * A comment has been parsed.
586 typedef void (*commentSAXFunc) (void *ctx,
587 const xmlChar *value);
590 * @ctx: the user data (XML parser context)
591 * @value: The pcdata content
592 * @len: the block length
594 * Called when a pcdata block has been parsed.
596 typedef void (*cdataBlockSAXFunc) (
598 const xmlChar *value,
602 * @ctx: an XML parser context
603 * @msg: the message to display/transmit
604 * @...: extra parameters for the message display
606 * Display and format a warning messages, callback.
608 typedef void (XMLCDECL *warningSAXFunc) (void *ctx,
609 const char *msg, ...) LIBXML_ATTR_FORMAT(2,3);
612 * @ctx: an XML parser context
613 * @msg: the message to display/transmit
614 * @...: extra parameters for the message display
616 * Display and format an error messages, callback.
618 typedef void (XMLCDECL *errorSAXFunc) (void *ctx,
619 const char *msg, ...) LIBXML_ATTR_FORMAT(2,3);
622 * @ctx: an XML parser context
623 * @msg: the message to display/transmit
624 * @...: extra parameters for the message display
626 * Display and format fatal error messages, callback.
627 * Note: so far fatalError() SAX callbacks are not used, error()
628 * get all the callbacks for errors.
630 typedef void (XMLCDECL *fatalErrorSAXFunc) (void *ctx,
631 const char *msg, ...) LIBXML_ATTR_FORMAT(2,3);
633 * isStandaloneSAXFunc:
634 * @ctx: the user data (XML parser context)
636 * Is this document tagged standalone?
640 typedef int (*isStandaloneSAXFunc) (void *ctx);
642 * hasInternalSubsetSAXFunc:
643 * @ctx: the user data (XML parser context)
645 * Does this document has an internal subset.
649 typedef int (*hasInternalSubsetSAXFunc) (void *ctx);
652 * hasExternalSubsetSAXFunc:
653 * @ctx: the user data (XML parser context)
655 * Does this document has an external subset?
659 typedef int (*hasExternalSubsetSAXFunc) (void *ctx);
661 /************************************************************************
663 * The SAX version 2 API extensions *
665 ************************************************************************/
669 * Special constant found in SAX2 blocks initialized fields
671 #define XML_SAX2_MAGIC 0xDEEDBEAF
674 * startElementNsSAX2Func:
675 * @ctx: the user data (XML parser context)
676 * @localname: the local name of the element
677 * @prefix: the element namespace prefix if available
678 * @URI: the element namespace name if available
679 * @nb_namespaces: number of namespace definitions on that node
680 * @namespaces: pointer to the array of prefix/URI pairs namespace definitions
681 * @nb_attributes: the number of attributes on that node
682 * @nb_defaulted: the number of defaulted attributes. The defaulted
683 * ones are at the end of the array
684 * @attributes: pointer to the array of (localname/prefix/URI/value/end)
687 * SAX2 callback when an element start has been detected by the parser.
688 * It provides the namespace informations for the element, as well as
689 * the new namespace declarations on the element.
692 typedef void (*startElementNsSAX2Func) (void *ctx,
693 const xmlChar *localname,
694 const xmlChar *prefix,
697 const xmlChar **namespaces,
700 const xmlChar **attributes);
703 * endElementNsSAX2Func:
704 * @ctx: the user data (XML parser context)
705 * @localname: the local name of the element
706 * @prefix: the element namespace prefix if available
707 * @URI: the element namespace name if available
709 * SAX2 callback when an element end has been detected by the parser.
710 * It provides the namespace informations for the element.
713 typedef void (*endElementNsSAX2Func) (void *ctx,
714 const xmlChar *localname,
715 const xmlChar *prefix,
719 struct _xmlSAXHandler {
720 internalSubsetSAXFunc internalSubset;
721 isStandaloneSAXFunc isStandalone;
722 hasInternalSubsetSAXFunc hasInternalSubset;
723 hasExternalSubsetSAXFunc hasExternalSubset;
724 resolveEntitySAXFunc resolveEntity;
725 getEntitySAXFunc getEntity;
726 entityDeclSAXFunc entityDecl;
727 notationDeclSAXFunc notationDecl;
728 attributeDeclSAXFunc attributeDecl;
729 elementDeclSAXFunc elementDecl;
730 unparsedEntityDeclSAXFunc unparsedEntityDecl;
731 setDocumentLocatorSAXFunc setDocumentLocator;
732 startDocumentSAXFunc startDocument;
733 endDocumentSAXFunc endDocument;
734 startElementSAXFunc startElement;
735 endElementSAXFunc endElement;
736 referenceSAXFunc reference;
737 charactersSAXFunc characters;
738 ignorableWhitespaceSAXFunc ignorableWhitespace;
739 processingInstructionSAXFunc processingInstruction;
740 commentSAXFunc comment;
741 warningSAXFunc warning;
743 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */
744 getParameterEntitySAXFunc getParameterEntity;
745 cdataBlockSAXFunc cdataBlock;
746 externalSubsetSAXFunc externalSubset;
747 unsigned int initialized;
748 /* The following fields are extensions available only on version 2 */
750 startElementNsSAX2Func startElementNs;
751 endElementNsSAX2Func endElementNs;
752 xmlStructuredErrorFunc serror;
758 typedef struct _xmlSAXHandlerV1 xmlSAXHandlerV1;
759 typedef xmlSAXHandlerV1 *xmlSAXHandlerV1Ptr;
760 struct _xmlSAXHandlerV1 {
761 internalSubsetSAXFunc internalSubset;
762 isStandaloneSAXFunc isStandalone;
763 hasInternalSubsetSAXFunc hasInternalSubset;
764 hasExternalSubsetSAXFunc hasExternalSubset;
765 resolveEntitySAXFunc resolveEntity;
766 getEntitySAXFunc getEntity;
767 entityDeclSAXFunc entityDecl;
768 notationDeclSAXFunc notationDecl;
769 attributeDeclSAXFunc attributeDecl;
770 elementDeclSAXFunc elementDecl;
771 unparsedEntityDeclSAXFunc unparsedEntityDecl;
772 setDocumentLocatorSAXFunc setDocumentLocator;
773 startDocumentSAXFunc startDocument;
774 endDocumentSAXFunc endDocument;
775 startElementSAXFunc startElement;
776 endElementSAXFunc endElement;
777 referenceSAXFunc reference;
778 charactersSAXFunc characters;
779 ignorableWhitespaceSAXFunc ignorableWhitespace;
780 processingInstructionSAXFunc processingInstruction;
781 commentSAXFunc comment;
782 warningSAXFunc warning;
784 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */
785 getParameterEntitySAXFunc getParameterEntity;
786 cdataBlockSAXFunc cdataBlock;
787 externalSubsetSAXFunc externalSubset;
788 unsigned int initialized;
793 * xmlExternalEntityLoader:
794 * @URL: The System ID of the resource requested
795 * @ID: The Public ID of the resource requested
796 * @context: the XML parser context
798 * External entity loaders types.
800 * Returns the entity input parser.
802 typedef xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL,
804 xmlParserCtxtPtr context);
810 #include <libxml/encoding.h>
811 #include <libxml/xmlIO.h>
812 #include <libxml/globals.h>
822 XMLPUBFUN void XMLCALL
823 xmlInitParser (void);
824 XMLPUBFUN void XMLCALL
825 xmlCleanupParser (void);
830 XMLPUBFUN int XMLCALL
831 xmlParserInputRead (xmlParserInputPtr in,
833 XMLPUBFUN int XMLCALL
834 xmlParserInputGrow (xmlParserInputPtr in,
838 * Basic parsing Interfaces
840 #ifdef LIBXML_SAX1_ENABLED
841 XMLPUBFUN xmlDocPtr XMLCALL
842 xmlParseDoc (const xmlChar *cur);
843 XMLPUBFUN xmlDocPtr XMLCALL
844 xmlParseFile (const char *filename);
845 XMLPUBFUN xmlDocPtr XMLCALL
846 xmlParseMemory (const char *buffer,
848 #endif /* LIBXML_SAX1_ENABLED */
849 XMLPUBFUN int XMLCALL
850 xmlSubstituteEntitiesDefault(int val);
851 XMLPUBFUN int XMLCALL
852 xmlKeepBlanksDefault (int val);
853 XMLPUBFUN void XMLCALL
854 xmlStopParser (xmlParserCtxtPtr ctxt);
855 XMLPUBFUN int XMLCALL
856 xmlPedanticParserDefault(int val);
857 XMLPUBFUN int XMLCALL
858 xmlLineNumbersDefault (int val);
860 #ifdef LIBXML_SAX1_ENABLED
864 XMLPUBFUN xmlDocPtr XMLCALL
865 xmlRecoverDoc (const xmlChar *cur);
866 XMLPUBFUN xmlDocPtr XMLCALL
867 xmlRecoverMemory (const char *buffer,
869 XMLPUBFUN xmlDocPtr XMLCALL
870 xmlRecoverFile (const char *filename);
871 #endif /* LIBXML_SAX1_ENABLED */
874 * Less common routines and SAX interfaces
876 XMLPUBFUN int XMLCALL
877 xmlParseDocument (xmlParserCtxtPtr ctxt);
878 XMLPUBFUN int XMLCALL
879 xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt);
880 #ifdef LIBXML_SAX1_ENABLED
881 XMLPUBFUN int XMLCALL
882 xmlSAXUserParseFile (xmlSAXHandlerPtr sax,
884 const char *filename);
885 XMLPUBFUN int XMLCALL
886 xmlSAXUserParseMemory (xmlSAXHandlerPtr sax,
890 XMLPUBFUN xmlDocPtr XMLCALL
891 xmlSAXParseDoc (xmlSAXHandlerPtr sax,
894 XMLPUBFUN xmlDocPtr XMLCALL
895 xmlSAXParseMemory (xmlSAXHandlerPtr sax,
899 XMLPUBFUN xmlDocPtr XMLCALL
900 xmlSAXParseMemoryWithData (xmlSAXHandlerPtr sax,
905 XMLPUBFUN xmlDocPtr XMLCALL
906 xmlSAXParseFile (xmlSAXHandlerPtr sax,
907 const char *filename,
909 XMLPUBFUN xmlDocPtr XMLCALL
910 xmlSAXParseFileWithData (xmlSAXHandlerPtr sax,
911 const char *filename,
914 XMLPUBFUN xmlDocPtr XMLCALL
915 xmlSAXParseEntity (xmlSAXHandlerPtr sax,
916 const char *filename);
917 XMLPUBFUN xmlDocPtr XMLCALL
918 xmlParseEntity (const char *filename);
919 #endif /* LIBXML_SAX1_ENABLED */
921 #ifdef LIBXML_VALID_ENABLED
922 XMLPUBFUN xmlDtdPtr XMLCALL
923 xmlSAXParseDTD (xmlSAXHandlerPtr sax,
924 const xmlChar *ExternalID,
925 const xmlChar *SystemID);
926 XMLPUBFUN xmlDtdPtr XMLCALL
927 xmlParseDTD (const xmlChar *ExternalID,
928 const xmlChar *SystemID);
929 XMLPUBFUN xmlDtdPtr XMLCALL
930 xmlIOParseDTD (xmlSAXHandlerPtr sax,
931 xmlParserInputBufferPtr input,
932 xmlCharEncoding enc);
933 #endif /* LIBXML_VALID_ENABLE */
934 #ifdef LIBXML_SAX1_ENABLED
935 XMLPUBFUN int XMLCALL
936 xmlParseBalancedChunkMemory(xmlDocPtr doc,
937 xmlSAXHandlerPtr sax,
940 const xmlChar *string,
942 #endif /* LIBXML_SAX1_ENABLED */
943 XMLPUBFUN xmlParserErrors XMLCALL
944 xmlParseInNodeContext (xmlNodePtr node,
949 #ifdef LIBXML_SAX1_ENABLED
950 XMLPUBFUN int XMLCALL
951 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,
952 xmlSAXHandlerPtr sax,
955 const xmlChar *string,
958 XMLPUBFUN int XMLCALL
959 xmlParseExternalEntity (xmlDocPtr doc,
960 xmlSAXHandlerPtr sax,
966 #endif /* LIBXML_SAX1_ENABLED */
967 XMLPUBFUN int XMLCALL
968 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,
974 * Parser contexts handling.
976 XMLPUBFUN xmlParserCtxtPtr XMLCALL
977 xmlNewParserCtxt (void);
978 XMLPUBFUN int XMLCALL
979 xmlInitParserCtxt (xmlParserCtxtPtr ctxt);
980 XMLPUBFUN void XMLCALL
981 xmlClearParserCtxt (xmlParserCtxtPtr ctxt);
982 XMLPUBFUN void XMLCALL
983 xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
984 #ifdef LIBXML_SAX1_ENABLED
985 XMLPUBFUN void XMLCALL
986 xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt,
987 const xmlChar* buffer,
988 const char *filename);
989 #endif /* LIBXML_SAX1_ENABLED */
990 XMLPUBFUN xmlParserCtxtPtr XMLCALL
991 xmlCreateDocParserCtxt (const xmlChar *cur);
993 #ifdef LIBXML_LEGACY_ENABLED
995 * Reading/setting optional parsing features.
997 XMLPUBFUN int XMLCALL
998 xmlGetFeaturesList (int *len,
999 const char **result);
1000 XMLPUBFUN int XMLCALL
1001 xmlGetFeature (xmlParserCtxtPtr ctxt,
1004 XMLPUBFUN int XMLCALL
1005 xmlSetFeature (xmlParserCtxtPtr ctxt,
1008 #endif /* LIBXML_LEGACY_ENABLED */
1010 #ifdef LIBXML_PUSH_ENABLED
1012 * Interfaces for the Push mode.
1014 XMLPUBFUN xmlParserCtxtPtr XMLCALL
1015 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,
1019 const char *filename);
1020 XMLPUBFUN int XMLCALL
1021 xmlParseChunk (xmlParserCtxtPtr ctxt,
1025 #endif /* LIBXML_PUSH_ENABLED */
1031 XMLPUBFUN xmlParserCtxtPtr XMLCALL
1032 xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax,
1034 xmlInputReadCallback ioread,
1035 xmlInputCloseCallback ioclose,
1037 xmlCharEncoding enc);
1039 XMLPUBFUN xmlParserInputPtr XMLCALL
1040 xmlNewIOInputStream (xmlParserCtxtPtr ctxt,
1041 xmlParserInputBufferPtr input,
1042 xmlCharEncoding enc);
1047 XMLPUBFUN const xmlParserNodeInfo* XMLCALL
1048 xmlParserFindNodeInfo (const xmlParserCtxtPtr ctxt,
1049 const xmlNodePtr node);
1050 XMLPUBFUN void XMLCALL
1051 xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
1052 XMLPUBFUN void XMLCALL
1053 xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
1054 XMLPUBFUN unsigned long XMLCALL
1055 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
1056 const xmlNodePtr node);
1057 XMLPUBFUN void XMLCALL
1058 xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt,
1059 const xmlParserNodeInfoPtr info);
1062 * External entities handling actually implemented in xmlIO.
1065 XMLPUBFUN void XMLCALL
1066 xmlSetExternalEntityLoader(xmlExternalEntityLoader f);
1067 XMLPUBFUN xmlExternalEntityLoader XMLCALL
1068 xmlGetExternalEntityLoader(void);
1069 XMLPUBFUN xmlParserInputPtr XMLCALL
1070 xmlLoadExternalEntity (const char *URL,
1072 xmlParserCtxtPtr ctxt);
1075 * Index lookup, actually implemented in the encoding module
1077 XMLPUBFUN long XMLCALL
1078 xmlByteConsumed (xmlParserCtxtPtr ctxt);
1081 * New set of simpler/more flexible APIs
1086 * This is the set of XML parser options that can be passed down
1087 * to the xmlReadDoc() and similar calls.
1090 XML_PARSE_RECOVER = 1<<0, /* recover on errors */
1091 XML_PARSE_NOENT = 1<<1, /* substitute entities */
1092 XML_PARSE_DTDLOAD = 1<<2, /* load the external subset */
1093 XML_PARSE_DTDATTR = 1<<3, /* default DTD attributes */
1094 XML_PARSE_DTDVALID = 1<<4, /* validate with the DTD */
1095 XML_PARSE_NOERROR = 1<<5, /* suppress error reports */
1096 XML_PARSE_NOWARNING = 1<<6, /* suppress warning reports */
1097 XML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
1098 XML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
1099 XML_PARSE_SAX1 = 1<<9, /* use the SAX1 interface internally */
1100 XML_PARSE_XINCLUDE = 1<<10,/* Implement XInclude substitition */
1101 XML_PARSE_NONET = 1<<11,/* Forbid network access */
1102 XML_PARSE_NODICT = 1<<12,/* Do not reuse the context dictionnary */
1103 XML_PARSE_NSCLEAN = 1<<13,/* remove redundant namespaces declarations */
1104 XML_PARSE_NOCDATA = 1<<14,/* merge CDATA as text nodes */
1105 XML_PARSE_NOXINCNODE= 1<<15,/* do not generate XINCLUDE START/END nodes */
1106 XML_PARSE_COMPACT = 1<<16,/* compact small text nodes; no modification of
1107 the tree allowed afterwards (will possibly
1108 crash if you try to modify the tree) */
1109 XML_PARSE_OLD10 = 1<<17,/* parse using XML-1.0 before update 5 */
1110 XML_PARSE_NOBASEFIX = 1<<18,/* do not fixup XINCLUDE xml:base uris */
1111 XML_PARSE_HUGE = 1<<19,/* relax any hardcoded limit from the parser */
1112 XML_PARSE_OLDSAX = 1<<20,/* parse using SAX2 interface before 2.7.0 */
1113 XML_PARSE_IGNORE_ENC= 1<<21,/* ignore internal document encoding hint */
1114 XML_PARSE_BIG_LINES = 1<<22 /* Store big lines numbers in text PSVI field */
1117 XMLPUBFUN void XMLCALL
1118 xmlCtxtReset (xmlParserCtxtPtr ctxt);
1119 XMLPUBFUN int XMLCALL
1120 xmlCtxtResetPush (xmlParserCtxtPtr ctxt,
1123 const char *filename,
1124 const char *encoding);
1125 XMLPUBFUN int XMLCALL
1126 xmlCtxtUseOptions (xmlParserCtxtPtr ctxt,
1128 XMLPUBFUN xmlDocPtr XMLCALL
1129 xmlReadDoc (const xmlChar *cur,
1131 const char *encoding,
1133 XMLPUBFUN xmlDocPtr XMLCALL
1134 xmlReadFile (const char *URL,
1135 const char *encoding,
1137 XMLPUBFUN xmlDocPtr XMLCALL
1138 xmlReadMemory (const char *buffer,
1141 const char *encoding,
1143 XMLPUBFUN xmlDocPtr XMLCALL
1146 const char *encoding,
1148 XMLPUBFUN xmlDocPtr XMLCALL
1149 xmlReadIO (xmlInputReadCallback ioread,
1150 xmlInputCloseCallback ioclose,
1153 const char *encoding,
1155 XMLPUBFUN xmlDocPtr XMLCALL
1156 xmlCtxtReadDoc (xmlParserCtxtPtr ctxt,
1159 const char *encoding,
1161 XMLPUBFUN xmlDocPtr XMLCALL
1162 xmlCtxtReadFile (xmlParserCtxtPtr ctxt,
1163 const char *filename,
1164 const char *encoding,
1166 XMLPUBFUN xmlDocPtr XMLCALL
1167 xmlCtxtReadMemory (xmlParserCtxtPtr ctxt,
1171 const char *encoding,
1173 XMLPUBFUN xmlDocPtr XMLCALL
1174 xmlCtxtReadFd (xmlParserCtxtPtr ctxt,
1177 const char *encoding,
1179 XMLPUBFUN xmlDocPtr XMLCALL
1180 xmlCtxtReadIO (xmlParserCtxtPtr ctxt,
1181 xmlInputReadCallback ioread,
1182 xmlInputCloseCallback ioclose,
1185 const char *encoding,
1189 * Library wide options
1194 * Used to examine the existance of features that can be enabled
1195 * or disabled at compile-time.
1196 * They used to be called XML_FEATURE_xxx but this clashed with Expat
1199 XML_WITH_THREAD = 1,
1201 XML_WITH_OUTPUT = 3,
1203 XML_WITH_READER = 5,
1204 XML_WITH_PATTERN = 6,
1205 XML_WITH_WRITER = 7,
1209 XML_WITH_VALID = 11,
1211 XML_WITH_LEGACY = 13,
1213 XML_WITH_CATALOG = 15,
1214 XML_WITH_XPATH = 16,
1216 XML_WITH_XINCLUDE = 18,
1217 XML_WITH_ICONV = 19,
1218 XML_WITH_ISO8859X = 20,
1219 XML_WITH_UNICODE = 21,
1220 XML_WITH_REGEXP = 22,
1221 XML_WITH_AUTOMATA = 23,
1223 XML_WITH_SCHEMAS = 25,
1224 XML_WITH_SCHEMATRON = 26,
1225 XML_WITH_MODULES = 27,
1226 XML_WITH_DEBUG = 28,
1227 XML_WITH_DEBUG_MEM = 29,
1228 XML_WITH_DEBUG_RUN = 30,
1232 XML_WITH_NONE = 99999 /* just to be sure of allocation size */
1235 XMLPUBFUN int XMLCALL
1236 xmlHasFeature (xmlFeature feature);
1241 #endif /* __XML_PARSER_H__ */