2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
28 * See Copyright for the status of this software.
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
39 #define XML_DIR_SEP '/'
45 #include <libxml/xmlmemory.h>
46 #include <libxml/threads.h>
47 #include <libxml/globals.h>
48 #include <libxml/tree.h>
49 #include <libxml/parser.h>
50 #include <libxml/parserInternals.h>
51 #include <libxml/valid.h>
52 #include <libxml/entities.h>
53 #include <libxml/xmlerror.h>
54 #include <libxml/encoding.h>
55 #include <libxml/xmlIO.h>
56 #include <libxml/uri.h>
57 #ifdef LIBXML_CATALOG_ENABLED
58 #include <libxml/catalog.h>
60 #ifdef LIBXML_SCHEMAS_ENABLED
61 #include <libxml/xmlschemastypes.h>
62 #include <libxml/relaxng.h>
70 #ifdef HAVE_SYS_STAT_H
84 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
86 static xmlParserCtxtPtr
87 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
88 const xmlChar *base, xmlParserCtxtPtr pctx);
90 /************************************************************************
92 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
94 ************************************************************************/
96 #define XML_PARSER_BIG_ENTITY 1000
97 #define XML_PARSER_LOT_ENTITY 5000
100 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
101 * replacement over the size in byte of the input indicates that you have
102 * and eponential behaviour. A value of 10 correspond to at least 3 entity
103 * replacement per byte of input.
105 #define XML_PARSER_NON_LINEAR 10
108 * xmlParserEntityCheck
110 * Function to check non-linear entity expansion behaviour
111 * This is here to detect and stop exponential linear entity expansion
112 * This is not a limitation of the parser but a safety
113 * boundary feature. It can be disabled with the XML_PARSE_HUGE
117 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
120 unsigned long consumed = 0;
122 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
124 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
128 * Do the check based on the replacement size of the entity
130 if (size < XML_PARSER_BIG_ENTITY)
134 * A limit on the amount of text data reasonably used
136 if (ctxt->input != NULL) {
137 consumed = ctxt->input->consumed +
138 (ctxt->input->cur - ctxt->input->base);
140 consumed += ctxt->sizeentities;
142 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
143 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
145 } else if (ent != NULL) {
147 * use the number of parsed entities in the replacement
152 * The amount of data parsed counting entities size only once
154 if (ctxt->input != NULL) {
155 consumed = ctxt->input->consumed +
156 (ctxt->input->cur - ctxt->input->base);
158 consumed += ctxt->sizeentities;
161 * Check the density of entities for the amount of data
162 * knowing an entity reference will take at least 3 bytes
164 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
168 * strange we got no data for checking just return
173 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
180 * arbitrary depth limit for the XML documents that we allow to
181 * process. This is not a limitation of the parser but a safety
182 * boundary feature. It can be disabled with the XML_PARSE_HUGE
185 unsigned int xmlParserMaxDepth = 256;
190 #define XML_PARSER_BIG_BUFFER_SIZE 300
191 #define XML_PARSER_BUFFER_SIZE 100
192 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
195 * List of XML prefixed PI allowed by W3C specs
198 static const char *xmlW3CPIs[] = {
204 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
205 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206 const xmlChar **str);
208 static xmlParserErrors
209 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
210 xmlSAXHandlerPtr sax,
211 void *user_data, int depth, const xmlChar *URL,
212 const xmlChar *ID, xmlNodePtr *list);
215 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
216 const char *encoding);
217 #ifdef LIBXML_LEGACY_ENABLED
219 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
220 xmlNodePtr lastNode);
221 #endif /* LIBXML_LEGACY_ENABLED */
223 static xmlParserErrors
224 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
225 const xmlChar *string, void *user_data, xmlNodePtr *lst);
228 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
230 /************************************************************************
232 * Some factorized error routines *
234 ************************************************************************/
237 * xmlErrAttributeDup:
238 * @ctxt: an XML parser context
239 * @prefix: the attribute prefix
240 * @localname: the attribute localname
242 * Handle a redefinition of attribute error
245 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
246 const xmlChar * localname)
248 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
249 (ctxt->instate == XML_PARSER_EOF))
252 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
255 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
256 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
257 (const char *) localname, NULL, NULL, 0, 0,
258 "Attribute %s redefined\n", localname);
260 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
261 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
262 (const char *) prefix, (const char *) localname,
263 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
266 ctxt->wellFormed = 0;
267 if (ctxt->recovery == 0)
268 ctxt->disableSAX = 1;
274 * @ctxt: an XML parser context
275 * @error: the error number
276 * @extra: extra information string
278 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
281 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
285 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
286 (ctxt->instate == XML_PARSER_EOF))
289 case XML_ERR_INVALID_HEX_CHARREF:
290 errmsg = "CharRef: invalid hexadecimal value\n";
292 case XML_ERR_INVALID_DEC_CHARREF:
293 errmsg = "CharRef: invalid decimal value\n";
295 case XML_ERR_INVALID_CHARREF:
296 errmsg = "CharRef: invalid value\n";
298 case XML_ERR_INTERNAL_ERROR:
299 errmsg = "internal error";
301 case XML_ERR_PEREF_AT_EOF:
302 errmsg = "PEReference at end of document\n";
304 case XML_ERR_PEREF_IN_PROLOG:
305 errmsg = "PEReference in prolog\n";
307 case XML_ERR_PEREF_IN_EPILOG:
308 errmsg = "PEReference in epilog\n";
310 case XML_ERR_PEREF_NO_NAME:
311 errmsg = "PEReference: no name\n";
313 case XML_ERR_PEREF_SEMICOL_MISSING:
314 errmsg = "PEReference: expecting ';'\n";
316 case XML_ERR_ENTITY_LOOP:
317 errmsg = "Detected an entity reference loop\n";
319 case XML_ERR_ENTITY_NOT_STARTED:
320 errmsg = "EntityValue: \" or ' expected\n";
322 case XML_ERR_ENTITY_PE_INTERNAL:
323 errmsg = "PEReferences forbidden in internal subset\n";
325 case XML_ERR_ENTITY_NOT_FINISHED:
326 errmsg = "EntityValue: \" or ' expected\n";
328 case XML_ERR_ATTRIBUTE_NOT_STARTED:
329 errmsg = "AttValue: \" or ' expected\n";
331 case XML_ERR_LT_IN_ATTRIBUTE:
332 errmsg = "Unescaped '<' not allowed in attributes values\n";
334 case XML_ERR_LITERAL_NOT_STARTED:
335 errmsg = "SystemLiteral \" or ' expected\n";
337 case XML_ERR_LITERAL_NOT_FINISHED:
338 errmsg = "Unfinished System or Public ID \" or ' expected\n";
340 case XML_ERR_MISPLACED_CDATA_END:
341 errmsg = "Sequence ']]>' not allowed in content\n";
343 case XML_ERR_URI_REQUIRED:
344 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
346 case XML_ERR_PUBID_REQUIRED:
347 errmsg = "PUBLIC, the Public Identifier is missing\n";
349 case XML_ERR_HYPHEN_IN_COMMENT:
350 errmsg = "Comment must not contain '--' (double-hyphen)\n";
352 case XML_ERR_PI_NOT_STARTED:
353 errmsg = "xmlParsePI : no target name\n";
355 case XML_ERR_RESERVED_XML_NAME:
356 errmsg = "Invalid PI name\n";
358 case XML_ERR_NOTATION_NOT_STARTED:
359 errmsg = "NOTATION: Name expected here\n";
361 case XML_ERR_NOTATION_NOT_FINISHED:
362 errmsg = "'>' required to close NOTATION declaration\n";
364 case XML_ERR_VALUE_REQUIRED:
365 errmsg = "Entity value required\n";
367 case XML_ERR_URI_FRAGMENT:
368 errmsg = "Fragment not allowed";
370 case XML_ERR_ATTLIST_NOT_STARTED:
371 errmsg = "'(' required to start ATTLIST enumeration\n";
373 case XML_ERR_NMTOKEN_REQUIRED:
374 errmsg = "NmToken expected in ATTLIST enumeration\n";
376 case XML_ERR_ATTLIST_NOT_FINISHED:
377 errmsg = "')' required to finish ATTLIST enumeration\n";
379 case XML_ERR_MIXED_NOT_STARTED:
380 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
382 case XML_ERR_PCDATA_REQUIRED:
383 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
385 case XML_ERR_ELEMCONTENT_NOT_STARTED:
386 errmsg = "ContentDecl : Name or '(' expected\n";
388 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
389 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
391 case XML_ERR_PEREF_IN_INT_SUBSET:
393 "PEReference: forbidden within markup decl in internal subset\n";
395 case XML_ERR_GT_REQUIRED:
396 errmsg = "expected '>'\n";
398 case XML_ERR_CONDSEC_INVALID:
399 errmsg = "XML conditional section '[' expected\n";
401 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
402 errmsg = "Content error in the external subset\n";
404 case XML_ERR_CONDSEC_INVALID_KEYWORD:
406 "conditional section INCLUDE or IGNORE keyword expected\n";
408 case XML_ERR_CONDSEC_NOT_FINISHED:
409 errmsg = "XML conditional section not closed\n";
411 case XML_ERR_XMLDECL_NOT_STARTED:
412 errmsg = "Text declaration '<?xml' required\n";
414 case XML_ERR_XMLDECL_NOT_FINISHED:
415 errmsg = "parsing XML declaration: '?>' expected\n";
417 case XML_ERR_EXT_ENTITY_STANDALONE:
418 errmsg = "external parsed entities cannot be standalone\n";
420 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
421 errmsg = "EntityRef: expecting ';'\n";
423 case XML_ERR_DOCTYPE_NOT_FINISHED:
424 errmsg = "DOCTYPE improperly terminated\n";
426 case XML_ERR_LTSLASH_REQUIRED:
427 errmsg = "EndTag: '</' not found\n";
429 case XML_ERR_EQUAL_REQUIRED:
430 errmsg = "expected '='\n";
432 case XML_ERR_STRING_NOT_CLOSED:
433 errmsg = "String not closed expecting \" or '\n";
435 case XML_ERR_STRING_NOT_STARTED:
436 errmsg = "String not started expecting ' or \"\n";
438 case XML_ERR_ENCODING_NAME:
439 errmsg = "Invalid XML encoding name\n";
441 case XML_ERR_STANDALONE_VALUE:
442 errmsg = "standalone accepts only 'yes' or 'no'\n";
444 case XML_ERR_DOCUMENT_EMPTY:
445 errmsg = "Document is empty\n";
447 case XML_ERR_DOCUMENT_END:
448 errmsg = "Extra content at the end of the document\n";
450 case XML_ERR_NOT_WELL_BALANCED:
451 errmsg = "chunk is not well balanced\n";
453 case XML_ERR_EXTRA_CONTENT:
454 errmsg = "extra content at the end of well balanced chunk\n";
456 case XML_ERR_VERSION_MISSING:
457 errmsg = "Malformed declaration expecting version\n";
465 errmsg = "Unregistered error message\n";
469 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
470 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
473 ctxt->wellFormed = 0;
474 if (ctxt->recovery == 0)
475 ctxt->disableSAX = 1;
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
485 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
488 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
491 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
492 (ctxt->instate == XML_PARSER_EOF))
496 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
497 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
499 ctxt->wellFormed = 0;
500 if (ctxt->recovery == 0)
501 ctxt->disableSAX = 1;
507 * @ctxt: an XML parser context
508 * @error: the error number
509 * @msg: the error message
516 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
517 const char *msg, const xmlChar *str1, const xmlChar *str2)
519 xmlStructuredErrorFunc schannel = NULL;
521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522 (ctxt->instate == XML_PARSER_EOF))
524 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
525 (ctxt->sax->initialized == XML_SAX2_MAGIC))
526 schannel = ctxt->sax->serror;
528 __xmlRaiseError(schannel,
529 (ctxt->sax) ? ctxt->sax->warning : NULL,
531 ctxt, NULL, XML_FROM_PARSER, error,
532 XML_ERR_WARNING, NULL, 0,
533 (const char *) str1, (const char *) str2, NULL, 0, 0,
534 msg, (const char *) str1, (const char *) str2);
536 __xmlRaiseError(schannel, NULL, NULL,
537 ctxt, NULL, XML_FROM_PARSER, error,
538 XML_ERR_WARNING, NULL, 0,
539 (const char *) str1, (const char *) str2, NULL, 0, 0,
540 msg, (const char *) str1, (const char *) str2);
546 * @ctxt: an XML parser context
547 * @error: the error number
548 * @msg: the error message
551 * Handle a validity error.
554 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
555 const char *msg, const xmlChar *str1, const xmlChar *str2)
557 xmlStructuredErrorFunc schannel = NULL;
559 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
560 (ctxt->instate == XML_PARSER_EOF))
564 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
565 schannel = ctxt->sax->serror;
568 __xmlRaiseError(schannel,
569 ctxt->vctxt.error, ctxt->vctxt.userData,
570 ctxt, NULL, XML_FROM_DTD, error,
571 XML_ERR_ERROR, NULL, 0, (const char *) str1,
572 (const char *) str2, NULL, 0, 0,
573 msg, (const char *) str1, (const char *) str2);
576 __xmlRaiseError(schannel, NULL, NULL,
577 ctxt, NULL, XML_FROM_DTD, error,
578 XML_ERR_ERROR, NULL, 0, (const char *) str1,
579 (const char *) str2, NULL, 0, 0,
580 msg, (const char *) str1, (const char *) str2);
586 * @ctxt: an XML parser context
587 * @error: the error number
588 * @msg: the error message
589 * @val: an integer value
591 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
594 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595 const char *msg, int val)
597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
602 __xmlRaiseError(NULL, NULL, NULL,
603 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
604 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
606 ctxt->wellFormed = 0;
607 if (ctxt->recovery == 0)
608 ctxt->disableSAX = 1;
613 * xmlFatalErrMsgStrIntStr:
614 * @ctxt: an XML parser context
615 * @error: the error number
616 * @msg: the error message
617 * @str1: an string info
618 * @val: an integer value
619 * @str2: an string info
621 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
624 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
625 const char *msg, const xmlChar *str1, int val,
628 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
629 (ctxt->instate == XML_PARSER_EOF))
633 __xmlRaiseError(NULL, NULL, NULL,
634 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
635 NULL, 0, (const char *) str1, (const char *) str2,
636 NULL, val, 0, msg, str1, val, str2);
638 ctxt->wellFormed = 0;
639 if (ctxt->recovery == 0)
640 ctxt->disableSAX = 1;
646 * @ctxt: an XML parser context
647 * @error: the error number
648 * @msg: the error message
649 * @val: a string value
651 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
654 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
655 const char *msg, const xmlChar * val)
657 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
658 (ctxt->instate == XML_PARSER_EOF))
662 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
663 XML_FROM_PARSER, error, XML_ERR_FATAL,
664 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
667 ctxt->wellFormed = 0;
668 if (ctxt->recovery == 0)
669 ctxt->disableSAX = 1;
675 * @ctxt: an XML parser context
676 * @error: the error number
677 * @msg: the error message
678 * @val: a string value
680 * Handle a non fatal parser error
683 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
684 const char *msg, const xmlChar * val)
686 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
687 (ctxt->instate == XML_PARSER_EOF))
691 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
692 XML_FROM_PARSER, error, XML_ERR_ERROR,
693 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
699 * @ctxt: an XML parser context
700 * @error: the error number
702 * @info1: extra information string
703 * @info2: extra information string
705 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
708 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
710 const xmlChar * info1, const xmlChar * info2,
711 const xmlChar * info3)
713 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
714 (ctxt->instate == XML_PARSER_EOF))
718 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
719 XML_ERR_ERROR, NULL, 0, (const char *) info1,
720 (const char *) info2, (const char *) info3, 0, 0, msg,
721 info1, info2, info3);
723 ctxt->nsWellFormed = 0;
728 * @ctxt: an XML parser context
729 * @error: the error number
731 * @info1: extra information string
732 * @info2: extra information string
734 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
737 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
739 const xmlChar * info1, const xmlChar * info2,
740 const xmlChar * info3)
742 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
743 (ctxt->instate == XML_PARSER_EOF))
745 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
746 XML_ERR_WARNING, NULL, 0, (const char *) info1,
747 (const char *) info2, (const char *) info3, 0, 0, msg,
748 info1, info2, info3);
751 /************************************************************************
753 * Library wide options *
755 ************************************************************************/
759 * @feature: the feature to be examined
761 * Examines if the library has been compiled with a given feature.
763 * Returns a non-zero value if the feature exist, otherwise zero.
764 * Returns zero (0) if the feature does not exist or an unknown
765 * unknown feature is requested, non-zero otherwise.
768 xmlHasFeature(xmlFeature feature)
771 case XML_WITH_THREAD:
772 #ifdef LIBXML_THREAD_ENABLED
778 #ifdef LIBXML_TREE_ENABLED
783 case XML_WITH_OUTPUT:
784 #ifdef LIBXML_OUTPUT_ENABLED
790 #ifdef LIBXML_PUSH_ENABLED
795 case XML_WITH_READER:
796 #ifdef LIBXML_READER_ENABLED
801 case XML_WITH_PATTERN:
802 #ifdef LIBXML_PATTERN_ENABLED
807 case XML_WITH_WRITER:
808 #ifdef LIBXML_WRITER_ENABLED
814 #ifdef LIBXML_SAX1_ENABLED
820 #ifdef LIBXML_FTP_ENABLED
826 #ifdef LIBXML_HTTP_ENABLED
832 #ifdef LIBXML_VALID_ENABLED
838 #ifdef LIBXML_HTML_ENABLED
843 case XML_WITH_LEGACY:
844 #ifdef LIBXML_LEGACY_ENABLED
850 #ifdef LIBXML_C14N_ENABLED
855 case XML_WITH_CATALOG:
856 #ifdef LIBXML_CATALOG_ENABLED
862 #ifdef LIBXML_XPATH_ENABLED
868 #ifdef LIBXML_XPTR_ENABLED
873 case XML_WITH_XINCLUDE:
874 #ifdef LIBXML_XINCLUDE_ENABLED
880 #ifdef LIBXML_ICONV_ENABLED
885 case XML_WITH_ISO8859X:
886 #ifdef LIBXML_ISO8859X_ENABLED
891 case XML_WITH_UNICODE:
892 #ifdef LIBXML_UNICODE_ENABLED
897 case XML_WITH_REGEXP:
898 #ifdef LIBXML_REGEXP_ENABLED
903 case XML_WITH_AUTOMATA:
904 #ifdef LIBXML_AUTOMATA_ENABLED
910 #ifdef LIBXML_EXPR_ENABLED
915 case XML_WITH_SCHEMAS:
916 #ifdef LIBXML_SCHEMAS_ENABLED
921 case XML_WITH_SCHEMATRON:
922 #ifdef LIBXML_SCHEMATRON_ENABLED
927 case XML_WITH_MODULES:
928 #ifdef LIBXML_MODULES_ENABLED
934 #ifdef LIBXML_DEBUG_ENABLED
939 case XML_WITH_DEBUG_MEM:
940 #ifdef DEBUG_MEMORY_LOCATION
945 case XML_WITH_DEBUG_RUN:
946 #ifdef LIBXML_DEBUG_RUNTIME
952 #ifdef LIBXML_ZLIB_ENABLED
958 #ifdef LIBXML_ICU_ENABLED
969 /************************************************************************
971 * SAX2 defaulted attributes handling *
973 ************************************************************************/
977 * @ctxt: an XML parser context
979 * Do the SAX2 detection and specific intialization
982 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
983 if (ctxt == NULL) return;
984 #ifdef LIBXML_SAX1_ENABLED
985 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
986 ((ctxt->sax->startElementNs != NULL) ||
987 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
990 #endif /* LIBXML_SAX1_ENABLED */
992 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
993 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
994 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
995 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
996 (ctxt->str_xml_ns == NULL)) {
997 xmlErrMemory(ctxt, NULL);
1001 typedef struct _xmlDefAttrs xmlDefAttrs;
1002 typedef xmlDefAttrs *xmlDefAttrsPtr;
1003 struct _xmlDefAttrs {
1004 int nbAttrs; /* number of defaulted attributes on that element */
1005 int maxAttrs; /* the size of the array */
1006 const xmlChar *values[5]; /* array of localname/prefix/values/external */
1010 * xmlAttrNormalizeSpace:
1011 * @src: the source string
1012 * @dst: the target string
1014 * Normalize the space in non CDATA attribute values:
1015 * If the attribute type is not CDATA, then the XML processor MUST further
1016 * process the normalized attribute value by discarding any leading and
1017 * trailing space (#x20) characters, and by replacing sequences of space
1018 * (#x20) characters by a single space (#x20) character.
1019 * Note that the size of dst need to be at least src, and if one doesn't need
1020 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1021 * passing src as dst is just fine.
1023 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1027 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1029 if ((src == NULL) || (dst == NULL))
1032 while (*src == 0x20) src++;
1035 while (*src == 0x20) src++;
1049 * xmlAttrNormalizeSpace2:
1050 * @src: the source string
1052 * Normalize the space in non CDATA attribute values, a slightly more complex
1053 * front end to avoid allocation problems when running on attribute values
1054 * coming from the input.
1056 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1059 static const xmlChar *
1060 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1063 int remove_head = 0;
1064 int need_realloc = 0;
1067 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1074 while (*cur == 0x20) {
1081 if ((*cur == 0x20) || (*cur == 0)) {
1091 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1093 xmlErrMemory(ctxt, NULL);
1096 xmlAttrNormalizeSpace(ret, ret);
1097 *len = (int) strlen((const char *)ret);
1099 } else if (remove_head) {
1100 *len -= remove_head;
1101 memmove(src, src + remove_head, 1 + *len);
1109 * @ctxt: an XML parser context
1110 * @fullname: the element fullname
1111 * @fullattr: the attribute fullname
1112 * @value: the attribute value
1114 * Add a defaulted attribute for an element
1117 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1118 const xmlChar *fullname,
1119 const xmlChar *fullattr,
1120 const xmlChar *value) {
1121 xmlDefAttrsPtr defaults;
1123 const xmlChar *name;
1124 const xmlChar *prefix;
1127 * Allows to detect attribute redefinitions
1129 if (ctxt->attsSpecial != NULL) {
1130 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1134 if (ctxt->attsDefault == NULL) {
1135 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1136 if (ctxt->attsDefault == NULL)
1141 * split the element name into prefix:localname , the string found
1142 * are within the DTD and then not associated to namespace names.
1144 name = xmlSplitQName3(fullname, &len);
1146 name = xmlDictLookup(ctxt->dict, fullname, -1);
1149 name = xmlDictLookup(ctxt->dict, name, -1);
1150 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1154 * make sure there is some storage
1156 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1157 if (defaults == NULL) {
1158 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1159 (4 * 5) * sizeof(const xmlChar *));
1160 if (defaults == NULL)
1162 defaults->nbAttrs = 0;
1163 defaults->maxAttrs = 4;
1164 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1165 defaults, NULL) < 0) {
1169 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1170 xmlDefAttrsPtr temp;
1172 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1173 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1177 defaults->maxAttrs *= 2;
1178 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1179 defaults, NULL) < 0) {
1186 * Split the element name into prefix:localname , the string found
1187 * are within the DTD and hen not associated to namespace names.
1189 name = xmlSplitQName3(fullattr, &len);
1191 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1194 name = xmlDictLookup(ctxt->dict, name, -1);
1195 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1198 defaults->values[5 * defaults->nbAttrs] = name;
1199 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1200 /* intern the string and precompute the end */
1201 len = xmlStrlen(value);
1202 value = xmlDictLookup(ctxt->dict, value, len);
1203 defaults->values[5 * defaults->nbAttrs + 2] = value;
1204 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1206 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1208 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1209 defaults->nbAttrs++;
1214 xmlErrMemory(ctxt, NULL);
1219 * xmlAddSpecialAttr:
1220 * @ctxt: an XML parser context
1221 * @fullname: the element fullname
1222 * @fullattr: the attribute fullname
1223 * @type: the attribute type
1225 * Register this attribute type
1228 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1229 const xmlChar *fullname,
1230 const xmlChar *fullattr,
1233 if (ctxt->attsSpecial == NULL) {
1234 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1235 if (ctxt->attsSpecial == NULL)
1239 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1242 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1243 (void *) (long) type);
1247 xmlErrMemory(ctxt, NULL);
1252 * xmlCleanSpecialAttrCallback:
1254 * Removes CDATA attributes from the special attribute table
1257 xmlCleanSpecialAttrCallback(void *payload, void *data,
1258 const xmlChar *fullname, const xmlChar *fullattr,
1259 const xmlChar *unused ATTRIBUTE_UNUSED) {
1260 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1262 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1263 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1268 * xmlCleanSpecialAttr:
1269 * @ctxt: an XML parser context
1271 * Trim the list of attributes defined to remove all those of type
1272 * CDATA as they are not special. This call should be done when finishing
1273 * to parse the DTD and before starting to parse the document root.
1276 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1278 if (ctxt->attsSpecial == NULL)
1281 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1283 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1284 xmlHashFree(ctxt->attsSpecial, NULL);
1285 ctxt->attsSpecial = NULL;
1291 * xmlCheckLanguageID:
1292 * @lang: pointer to the string value
1294 * Checks that the value conforms to the LanguageID production:
1296 * NOTE: this is somewhat deprecated, those productions were removed from
1297 * the XML Second edition.
1299 * [33] LanguageID ::= Langcode ('-' Subcode)*
1300 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1301 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1302 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1303 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1304 * [38] Subcode ::= ([a-z] | [A-Z])+
1306 * The current REC reference the sucessors of RFC 1766, currently 5646
1308 * http://www.rfc-editor.org/rfc/rfc5646.txt
1309 * langtag = language
1315 * language = 2*3ALPHA ; shortest ISO 639 code
1316 * ["-" extlang] ; sometimes followed by
1317 * ; extended language subtags
1318 * / 4ALPHA ; or reserved for future use
1319 * / 5*8ALPHA ; or registered language subtag
1321 * extlang = 3ALPHA ; selected ISO 639 codes
1322 * *2("-" 3ALPHA) ; permanently reserved
1324 * script = 4ALPHA ; ISO 15924 code
1326 * region = 2ALPHA ; ISO 3166-1 code
1327 * / 3DIGIT ; UN M.49 code
1329 * variant = 5*8alphanum ; registered variants
1330 * / (DIGIT 3alphanum)
1332 * extension = singleton 1*("-" (2*8alphanum))
1334 * ; Single alphanumerics
1335 * ; "x" reserved for private use
1336 * singleton = DIGIT ; 0 - 9
1342 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1343 * The parser below doesn't try to cope with extension or privateuse
1344 * that could be added but that's not interoperable anyway
1346 * Returns 1 if correct 0 otherwise
1349 xmlCheckLanguageID(const xmlChar * lang)
1351 const xmlChar *cur = lang, *nxt;
1355 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1356 ((cur[0] == 'I') && (cur[1] == '-')) ||
1357 ((cur[0] == 'x') && (cur[1] == '-')) ||
1358 ((cur[0] == 'X') && (cur[1] == '-'))) {
1360 * Still allow IANA code and user code which were coming
1361 * from the previous version of the XML-1.0 specification
1362 * it's deprecated but we should not fail
1365 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1366 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1368 return(cur[0] == 0);
1371 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1372 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1374 if (nxt - cur >= 4) {
1378 if ((nxt - cur > 8) || (nxt[0] != 0))
1384 /* we got an ISO 639 code */
1392 /* now we can have extlang or script or region or variant */
1393 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1396 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1397 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1403 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1407 /* we parsed an extlang */
1415 /* now we can have script or region or variant */
1416 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1419 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1420 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1424 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1428 /* we parsed a script */
1437 /* now we can have region or variant */
1438 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1441 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1442 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1445 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1449 /* we parsed a region */
1458 /* now we can just have a variant */
1459 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1460 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1463 if ((nxt - cur < 5) || (nxt - cur > 8))
1466 /* we parsed a variant */
1472 /* extensions and private use subtags not checked */
1476 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1477 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1484 /************************************************************************
1486 * Parser stacks related functions and macros *
1488 ************************************************************************/
1490 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1491 const xmlChar ** str);
1496 * @ctxt: an XML parser context
1497 * @prefix: the namespace prefix or NULL
1498 * @URL: the namespace name
1500 * Pushes a new parser namespace on top of the ns stack
1502 * Returns -1 in case of error, -2 if the namespace should be discarded
1503 * and the index in the stack otherwise.
1506 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1508 if (ctxt->options & XML_PARSE_NSCLEAN) {
1510 for (i = 0;i < ctxt->nsNr;i += 2) {
1511 if (ctxt->nsTab[i] == prefix) {
1513 if (ctxt->nsTab[i + 1] == URL)
1515 /* out of scope keep it */
1520 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1523 ctxt->nsTab = (const xmlChar **)
1524 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1525 if (ctxt->nsTab == NULL) {
1526 xmlErrMemory(ctxt, NULL);
1530 } else if (ctxt->nsNr >= ctxt->nsMax) {
1531 const xmlChar ** tmp;
1533 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1534 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1536 xmlErrMemory(ctxt, NULL);
1542 ctxt->nsTab[ctxt->nsNr++] = prefix;
1543 ctxt->nsTab[ctxt->nsNr++] = URL;
1544 return (ctxt->nsNr);
1548 * @ctxt: an XML parser context
1549 * @nr: the number to pop
1551 * Pops the top @nr parser prefix/namespace from the ns stack
1553 * Returns the number of namespaces removed
1556 nsPop(xmlParserCtxtPtr ctxt, int nr)
1560 if (ctxt->nsTab == NULL) return(0);
1561 if (ctxt->nsNr < nr) {
1562 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1565 if (ctxt->nsNr <= 0)
1568 for (i = 0;i < nr;i++) {
1570 ctxt->nsTab[ctxt->nsNr] = NULL;
1577 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1578 const xmlChar **atts;
1582 if (ctxt->atts == NULL) {
1583 maxatts = 55; /* allow for 10 attrs by default */
1584 atts = (const xmlChar **)
1585 xmlMalloc(maxatts * sizeof(xmlChar *));
1586 if (atts == NULL) goto mem_error;
1588 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1589 if (attallocs == NULL) goto mem_error;
1590 ctxt->attallocs = attallocs;
1591 ctxt->maxatts = maxatts;
1592 } else if (nr + 5 > ctxt->maxatts) {
1593 maxatts = (nr + 5) * 2;
1594 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1595 maxatts * sizeof(const xmlChar *));
1596 if (atts == NULL) goto mem_error;
1598 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1599 (maxatts / 5) * sizeof(int));
1600 if (attallocs == NULL) goto mem_error;
1601 ctxt->attallocs = attallocs;
1602 ctxt->maxatts = maxatts;
1604 return(ctxt->maxatts);
1606 xmlErrMemory(ctxt, NULL);
1612 * @ctxt: an XML parser context
1613 * @value: the parser input
1615 * Pushes a new parser input on top of the input stack
1617 * Returns -1 in case of error, the index in the stack otherwise
1620 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1622 if ((ctxt == NULL) || (value == NULL))
1624 if (ctxt->inputNr >= ctxt->inputMax) {
1625 ctxt->inputMax *= 2;
1627 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1629 sizeof(ctxt->inputTab[0]));
1630 if (ctxt->inputTab == NULL) {
1631 xmlErrMemory(ctxt, NULL);
1632 xmlFreeInputStream(value);
1633 ctxt->inputMax /= 2;
1638 ctxt->inputTab[ctxt->inputNr] = value;
1639 ctxt->input = value;
1640 return (ctxt->inputNr++);
1644 * @ctxt: an XML parser context
1646 * Pops the top parser input from the input stack
1648 * Returns the input just removed
1651 inputPop(xmlParserCtxtPtr ctxt)
1653 xmlParserInputPtr ret;
1657 if (ctxt->inputNr <= 0)
1660 if (ctxt->inputNr > 0)
1661 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1664 ret = ctxt->inputTab[ctxt->inputNr];
1665 ctxt->inputTab[ctxt->inputNr] = NULL;
1670 * @ctxt: an XML parser context
1671 * @value: the element node
1673 * Pushes a new element node on top of the node stack
1675 * Returns -1 in case of error, the index in the stack otherwise
1678 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1680 if (ctxt == NULL) return(0);
1681 if (ctxt->nodeNr >= ctxt->nodeMax) {
1684 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1686 sizeof(ctxt->nodeTab[0]));
1688 xmlErrMemory(ctxt, NULL);
1691 ctxt->nodeTab = tmp;
1694 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1695 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1696 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1697 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1699 ctxt->instate = XML_PARSER_EOF;
1702 ctxt->nodeTab[ctxt->nodeNr] = value;
1704 return (ctxt->nodeNr++);
1709 * @ctxt: an XML parser context
1711 * Pops the top element node from the node stack
1713 * Returns the node just removed
1716 nodePop(xmlParserCtxtPtr ctxt)
1720 if (ctxt == NULL) return(NULL);
1721 if (ctxt->nodeNr <= 0)
1724 if (ctxt->nodeNr > 0)
1725 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1728 ret = ctxt->nodeTab[ctxt->nodeNr];
1729 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1733 #ifdef LIBXML_PUSH_ENABLED
1736 * @ctxt: an XML parser context
1737 * @value: the element name
1738 * @prefix: the element prefix
1739 * @URI: the element namespace name
1741 * Pushes a new element name/prefix/URL on top of the name stack
1743 * Returns -1 in case of error, the index in the stack otherwise
1746 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1747 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1749 if (ctxt->nameNr >= ctxt->nameMax) {
1750 const xmlChar * *tmp;
1753 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1755 sizeof(ctxt->nameTab[0]));
1760 ctxt->nameTab = tmp;
1761 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1763 sizeof(ctxt->pushTab[0]));
1768 ctxt->pushTab = tmp2;
1770 ctxt->nameTab[ctxt->nameNr] = value;
1772 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1773 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1774 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1775 return (ctxt->nameNr++);
1777 xmlErrMemory(ctxt, NULL);
1782 * @ctxt: an XML parser context
1784 * Pops the top element/prefix/URI name from the name stack
1786 * Returns the name just removed
1788 static const xmlChar *
1789 nameNsPop(xmlParserCtxtPtr ctxt)
1793 if (ctxt->nameNr <= 0)
1796 if (ctxt->nameNr > 0)
1797 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1800 ret = ctxt->nameTab[ctxt->nameNr];
1801 ctxt->nameTab[ctxt->nameNr] = NULL;
1804 #endif /* LIBXML_PUSH_ENABLED */
1808 * @ctxt: an XML parser context
1809 * @value: the element name
1811 * Pushes a new element name on top of the name stack
1813 * Returns -1 in case of error, the index in the stack otherwise
1816 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1818 if (ctxt == NULL) return (-1);
1820 if (ctxt->nameNr >= ctxt->nameMax) {
1821 const xmlChar * *tmp;
1823 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1825 sizeof(ctxt->nameTab[0]));
1830 ctxt->nameTab = tmp;
1832 ctxt->nameTab[ctxt->nameNr] = value;
1834 return (ctxt->nameNr++);
1836 xmlErrMemory(ctxt, NULL);
1841 * @ctxt: an XML parser context
1843 * Pops the top element name from the name stack
1845 * Returns the name just removed
1848 namePop(xmlParserCtxtPtr ctxt)
1852 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1855 if (ctxt->nameNr > 0)
1856 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1859 ret = ctxt->nameTab[ctxt->nameNr];
1860 ctxt->nameTab[ctxt->nameNr] = NULL;
1864 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1865 if (ctxt->spaceNr >= ctxt->spaceMax) {
1868 ctxt->spaceMax *= 2;
1869 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1870 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1872 xmlErrMemory(ctxt, NULL);
1876 ctxt->spaceTab = tmp;
1878 ctxt->spaceTab[ctxt->spaceNr] = val;
1879 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1880 return(ctxt->spaceNr++);
1883 static int spacePop(xmlParserCtxtPtr ctxt) {
1885 if (ctxt->spaceNr <= 0) return(0);
1887 if (ctxt->spaceNr > 0)
1888 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1890 ctxt->space = &ctxt->spaceTab[0];
1891 ret = ctxt->spaceTab[ctxt->spaceNr];
1892 ctxt->spaceTab[ctxt->spaceNr] = -1;
1897 * Macros for accessing the content. Those should be used only by the parser,
1900 * Dirty macros, i.e. one often need to make assumption on the context to
1903 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1904 * To be used with extreme caution since operations consuming
1905 * characters may move the input buffer to a different location !
1906 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1907 * This should be used internally by the parser
1908 * only to compare to ASCII values otherwise it would break when
1909 * running with UTF-8 encoding.
1910 * RAW same as CUR but in the input buffer, bypass any token
1911 * extraction that may have been done
1912 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1913 * to compare on ASCII based substring.
1914 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1915 * strings without newlines within the parser.
1916 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1917 * defined char within the parser.
1918 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1920 * NEXT Skip to the next character, this does the proper decoding
1921 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1922 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1923 * CUR_CHAR(l) returns the current unicode character (int), set l
1924 * to the number of xmlChars used for the encoding [0-5].
1925 * CUR_SCHAR same but operate on a string instead of the context
1926 * COPY_BUF copy the current unicode char to the target buffer, increment
1928 * GROW, SHRINK handling of input buffers
1931 #define RAW (*ctxt->input->cur)
1932 #define CUR (*ctxt->input->cur)
1933 #define NXT(val) ctxt->input->cur[(val)]
1934 #define CUR_PTR ctxt->input->cur
1936 #define CMP4( s, c1, c2, c3, c4 ) \
1937 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1938 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1939 #define CMP5( s, c1, c2, c3, c4, c5 ) \
1940 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1941 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1942 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1943 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1944 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1945 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1946 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1947 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1948 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1949 ((unsigned char *) s)[ 8 ] == c9 )
1950 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1951 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1952 ((unsigned char *) s)[ 9 ] == c10 )
1954 #define SKIP(val) do { \
1955 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1956 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1957 if ((*ctxt->input->cur == 0) && \
1958 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1959 xmlPopInput(ctxt); \
1962 #define SKIPL(val) do { \
1964 for(skipl=0; skipl<val; skipl++) { \
1965 if (*(ctxt->input->cur) == '\n') { \
1966 ctxt->input->line++; ctxt->input->col = 1; \
1967 } else ctxt->input->col++; \
1969 ctxt->input->cur++; \
1971 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1972 if ((*ctxt->input->cur == 0) && \
1973 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1974 xmlPopInput(ctxt); \
1977 #define SHRINK if ((ctxt->progressive == 0) && \
1978 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1979 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1982 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1983 xmlParserInputShrink(ctxt->input);
1984 if ((*ctxt->input->cur == 0) &&
1985 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1989 #define GROW if ((ctxt->progressive == 0) && \
1990 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
1993 static void xmlGROW (xmlParserCtxtPtr ctxt) {
1994 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1995 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
1996 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2000 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2002 #define NEXT xmlNextChar(ctxt)
2005 ctxt->input->col++; \
2006 ctxt->input->cur++; \
2008 if (*ctxt->input->cur == 0) \
2009 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2012 #define NEXTL(l) do { \
2013 if (*(ctxt->input->cur) == '\n') { \
2014 ctxt->input->line++; ctxt->input->col = 1; \
2015 } else ctxt->input->col++; \
2016 ctxt->input->cur += l; \
2017 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2020 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2021 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2023 #define COPY_BUF(l,b,i,v) \
2024 if (l == 1) b[i++] = (xmlChar) v; \
2025 else i += xmlCopyCharMultiByte(&b[i],v)
2028 * xmlSkipBlankChars:
2029 * @ctxt: the XML parser context
2031 * skip all blanks character found at that point in the input streams.
2032 * It pops up finished entities in the process if allowable at that point.
2034 * Returns the number of space chars skipped
2038 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2042 * It's Okay to use CUR/NEXT here since all the blanks are on
2045 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2048 * if we are in the document content, go really fast
2050 cur = ctxt->input->cur;
2051 while (IS_BLANK_CH(*cur)) {
2053 ctxt->input->line++; ctxt->input->col = 1;
2058 ctxt->input->cur = cur;
2059 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2060 cur = ctxt->input->cur;
2063 ctxt->input->cur = cur;
2068 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2073 while ((cur == 0) && (ctxt->inputNr > 1) &&
2074 (ctxt->instate != XML_PARSER_COMMENT)) {
2079 * Need to handle support of entities branching here
2081 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2082 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2087 /************************************************************************
2089 * Commodity functions to handle entities *
2091 ************************************************************************/
2095 * @ctxt: an XML parser context
2097 * xmlPopInput: the current input pointed by ctxt->input came to an end
2098 * pop it and return the next char.
2100 * Returns the current xmlChar in the parser context
2103 xmlPopInput(xmlParserCtxtPtr ctxt) {
2104 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2105 if (xmlParserDebugEntities)
2106 xmlGenericError(xmlGenericErrorContext,
2107 "Popping input %d\n", ctxt->inputNr);
2108 xmlFreeInputStream(inputPop(ctxt));
2109 if ((*ctxt->input->cur == 0) &&
2110 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2111 return(xmlPopInput(ctxt));
2117 * @ctxt: an XML parser context
2118 * @input: an XML parser input fragment (entity, XML fragment ...).
2120 * xmlPushInput: switch to a new input stream which is stacked on top
2121 * of the previous one(s).
2122 * Returns -1 in case of error or the index in the input stack
2125 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2127 if (input == NULL) return(-1);
2129 if (xmlParserDebugEntities) {
2130 if ((ctxt->input != NULL) && (ctxt->input->filename))
2131 xmlGenericError(xmlGenericErrorContext,
2132 "%s(%d): ", ctxt->input->filename,
2134 xmlGenericError(xmlGenericErrorContext,
2135 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2137 ret = inputPush(ctxt, input);
2144 * @ctxt: an XML parser context
2146 * parse Reference declarations
2148 * [66] CharRef ::= '&#' [0-9]+ ';' |
2149 * '&#x' [0-9a-fA-F]+ ';'
2151 * [ WFC: Legal Character ]
2152 * Characters referred to using character references must match the
2153 * production for Char.
2155 * Returns the value parsed (as an int), 0 in case of error
2158 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2159 unsigned int val = 0;
2161 unsigned int outofrange = 0;
2164 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2166 if ((RAW == '&') && (NXT(1) == '#') &&
2170 while (RAW != ';') { /* loop blocked by count */
2175 if ((RAW >= '0') && (RAW <= '9'))
2176 val = val * 16 + (CUR - '0');
2177 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2178 val = val * 16 + (CUR - 'a') + 10;
2179 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2180 val = val * 16 + (CUR - 'A') + 10;
2182 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2193 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2198 } else if ((RAW == '&') && (NXT(1) == '#')) {
2201 while (RAW != ';') { /* loop blocked by count */
2206 if ((RAW >= '0') && (RAW <= '9'))
2207 val = val * 10 + (CUR - '0');
2209 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2220 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2226 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2230 * [ WFC: Legal Character ]
2231 * Characters referred to using character references must match the
2232 * production for Char.
2234 if ((IS_CHAR(val) && (outofrange == 0))) {
2237 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2238 "xmlParseCharRef: invalid xmlChar value %d\n",
2245 * xmlParseStringCharRef:
2246 * @ctxt: an XML parser context
2247 * @str: a pointer to an index in the string
2249 * parse Reference declarations, variant parsing from a string rather
2250 * than an an input flow.
2252 * [66] CharRef ::= '&#' [0-9]+ ';' |
2253 * '&#x' [0-9a-fA-F]+ ';'
2255 * [ WFC: Legal Character ]
2256 * Characters referred to using character references must match the
2257 * production for Char.
2259 * Returns the value parsed (as an int), 0 in case of error, str will be
2260 * updated to the current value of the index
2263 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2266 unsigned int val = 0;
2267 unsigned int outofrange = 0;
2269 if ((str == NULL) || (*str == NULL)) return(0);
2272 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2275 while (cur != ';') { /* Non input consuming loop */
2276 if ((cur >= '0') && (cur <= '9'))
2277 val = val * 16 + (cur - '0');
2278 else if ((cur >= 'a') && (cur <= 'f'))
2279 val = val * 16 + (cur - 'a') + 10;
2280 else if ((cur >= 'A') && (cur <= 'F'))
2281 val = val * 16 + (cur - 'A') + 10;
2283 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2295 } else if ((cur == '&') && (ptr[1] == '#')){
2298 while (cur != ';') { /* Non input consuming loops */
2299 if ((cur >= '0') && (cur <= '9'))
2300 val = val * 10 + (cur - '0');
2302 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2315 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2321 * [ WFC: Legal Character ]
2322 * Characters referred to using character references must match the
2323 * production for Char.
2325 if ((IS_CHAR(val) && (outofrange == 0))) {
2328 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2329 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2336 * xmlNewBlanksWrapperInputStream:
2337 * @ctxt: an XML parser context
2338 * @entity: an Entity pointer
2340 * Create a new input stream for wrapping
2341 * blanks around a PEReference
2343 * Returns the new input stream or NULL
2346 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2348 static xmlParserInputPtr
2349 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2350 xmlParserInputPtr input;
2353 if (entity == NULL) {
2354 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2355 "xmlNewBlanksWrapperInputStream entity\n");
2358 if (xmlParserDebugEntities)
2359 xmlGenericError(xmlGenericErrorContext,
2360 "new blanks wrapper for entity: %s\n", entity->name);
2361 input = xmlNewInputStream(ctxt);
2362 if (input == NULL) {
2365 length = xmlStrlen(entity->name) + 5;
2366 buffer = xmlMallocAtomic(length);
2367 if (buffer == NULL) {
2368 xmlErrMemory(ctxt, NULL);
2374 buffer [length-3] = ';';
2375 buffer [length-2] = ' ';
2376 buffer [length-1] = 0;
2377 memcpy(buffer + 2, entity->name, length - 5);
2378 input->free = deallocblankswrapper;
2379 input->base = buffer;
2380 input->cur = buffer;
2381 input->length = length;
2382 input->end = &buffer[length];
2387 * xmlParserHandlePEReference:
2388 * @ctxt: the parser context
2390 * [69] PEReference ::= '%' Name ';'
2392 * [ WFC: No Recursion ]
2393 * A parsed entity must not contain a recursive
2394 * reference to itself, either directly or indirectly.
2396 * [ WFC: Entity Declared ]
2397 * In a document without any DTD, a document with only an internal DTD
2398 * subset which contains no parameter entity references, or a document
2399 * with "standalone='yes'", ... ... The declaration of a parameter
2400 * entity must precede any reference to it...
2402 * [ VC: Entity Declared ]
2403 * In a document with an external subset or external parameter entities
2404 * with "standalone='no'", ... ... The declaration of a parameter entity
2405 * must precede any reference to it...
2408 * Parameter-entity references may only appear in the DTD.
2409 * NOTE: misleading but this is handled.
2411 * A PEReference may have been detected in the current input stream
2412 * the handling is done accordingly to
2413 * http://www.w3.org/TR/REC-xml#entproc
2415 * - Included in literal in entity values
2416 * - Included as Parameter Entity reference within DTDs
2419 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2420 const xmlChar *name;
2421 xmlEntityPtr entity = NULL;
2422 xmlParserInputPtr input;
2424 if (RAW != '%') return;
2425 switch(ctxt->instate) {
2426 case XML_PARSER_CDATA_SECTION:
2428 case XML_PARSER_COMMENT:
2430 case XML_PARSER_START_TAG:
2432 case XML_PARSER_END_TAG:
2434 case XML_PARSER_EOF:
2435 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2437 case XML_PARSER_PROLOG:
2438 case XML_PARSER_START:
2439 case XML_PARSER_MISC:
2440 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2442 case XML_PARSER_ENTITY_DECL:
2443 case XML_PARSER_CONTENT:
2444 case XML_PARSER_ATTRIBUTE_VALUE:
2446 case XML_PARSER_SYSTEM_LITERAL:
2447 case XML_PARSER_PUBLIC_LITERAL:
2448 /* we just ignore it there */
2450 case XML_PARSER_EPILOG:
2451 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2453 case XML_PARSER_ENTITY_VALUE:
2455 * NOTE: in the case of entity values, we don't do the
2456 * substitution here since we need the literal
2457 * entity value to be able to save the internal
2458 * subset of the document.
2459 * This will be handled by xmlStringDecodeEntities
2462 case XML_PARSER_DTD:
2464 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2465 * In the internal DTD subset, parameter-entity references
2466 * can occur only where markup declarations can occur, not
2467 * within markup declarations.
2468 * In that case this is handled in xmlParseMarkupDecl
2470 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2472 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2475 case XML_PARSER_IGNORE:
2480 name = xmlParseName(ctxt);
2481 if (xmlParserDebugEntities)
2482 xmlGenericError(xmlGenericErrorContext,
2483 "PEReference: %s\n", name);
2485 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2489 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2490 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2491 if (entity == NULL) {
2494 * [ WFC: Entity Declared ]
2495 * In a document without any DTD, a document with only an
2496 * internal DTD subset which contains no parameter entity
2497 * references, or a document with "standalone='yes'", ...
2498 * ... The declaration of a parameter entity must precede
2499 * any reference to it...
2501 if ((ctxt->standalone == 1) ||
2502 ((ctxt->hasExternalSubset == 0) &&
2503 (ctxt->hasPErefs == 0))) {
2504 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2505 "PEReference: %%%s; not found\n", name);
2508 * [ VC: Entity Declared ]
2509 * In a document with an external subset or external
2510 * parameter entities with "standalone='no'", ...
2511 * ... The declaration of a parameter entity must precede
2512 * any reference to it...
2514 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2515 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2516 "PEReference: %%%s; not found\n",
2519 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2520 "PEReference: %%%s; not found\n",
2524 } else if (ctxt->input->free != deallocblankswrapper) {
2525 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2526 if (xmlPushInput(ctxt, input) < 0)
2529 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2530 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2532 xmlCharEncoding enc;
2535 * handle the extra spaces added before and after
2536 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2537 * this is done independently.
2539 input = xmlNewEntityInputStream(ctxt, entity);
2540 if (xmlPushInput(ctxt, input) < 0)
2544 * Get the 4 first bytes and decode the charset
2545 * if enc != XML_CHAR_ENCODING_NONE
2546 * plug some encoding conversion routines.
2547 * Note that, since we may have some non-UTF8
2548 * encoding (like UTF16, bug 135229), the 'length'
2549 * is not known, but we can calculate based upon
2550 * the amount of data in the buffer.
2553 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2558 enc = xmlDetectCharEncoding(start, 4);
2559 if (enc != XML_CHAR_ENCODING_NONE) {
2560 xmlSwitchEncoding(ctxt, enc);
2564 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2565 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2566 (IS_BLANK_CH(NXT(5)))) {
2567 xmlParseTextDecl(ctxt);
2570 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2571 "PEReference: %s is not a parameter entity\n",
2576 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2582 * Macro used to grow the current buffer.
2584 #define growBuffer(buffer, n) { \
2586 buffer##_size *= 2; \
2587 buffer##_size += n; \
2589 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
2590 if (tmp == NULL) goto mem_error; \
2595 * xmlStringLenDecodeEntities:
2596 * @ctxt: the parser context
2597 * @str: the input string
2598 * @len: the string length
2599 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2600 * @end: an end marker xmlChar, 0 if none
2601 * @end2: an end marker xmlChar, 0 if none
2602 * @end3: an end marker xmlChar, 0 if none
2604 * Takes a entity string content and process to do the adequate substitutions.
2606 * [67] Reference ::= EntityRef | CharRef
2608 * [69] PEReference ::= '%' Name ';'
2610 * Returns A newly allocated string with the substitution done. The caller
2611 * must deallocate it !
2614 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2616 xmlChar *buffer = NULL;
2617 int buffer_size = 0;
2619 xmlChar *current = NULL;
2620 xmlChar *rep = NULL;
2621 const xmlChar *last;
2626 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2630 if (((ctxt->depth > 40) &&
2631 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2632 (ctxt->depth > 1024)) {
2633 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2638 * allocate a translation buffer.
2640 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2641 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2642 if (buffer == NULL) goto mem_error;
2645 * OK loop until we reach one of the ending char or a size limit.
2646 * we are operating on already parsed values.
2649 c = CUR_SCHAR(str, l);
2652 while ((c != 0) && (c != end) && /* non input consuming loop */
2653 (c != end2) && (c != end3)) {
2656 if ((c == '&') && (str[1] == '#')) {
2657 int val = xmlParseStringCharRef(ctxt, &str);
2659 COPY_BUF(0,buffer,nbchars,val);
2661 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2662 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2664 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2665 if (xmlParserDebugEntities)
2666 xmlGenericError(xmlGenericErrorContext,
2667 "String decoding Entity Reference: %.30s\n",
2669 ent = xmlParseStringEntityRef(ctxt, &str);
2670 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2671 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2674 ctxt->nbentities += ent->checked;
2675 if ((ent != NULL) &&
2676 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2677 if (ent->content != NULL) {
2678 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2679 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2680 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2683 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2684 "predefined entity has no content\n");
2686 } else if ((ent != NULL) && (ent->content != NULL)) {
2688 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2694 while (*current != 0) { /* non input consuming loop */
2695 buffer[nbchars++] = *current++;
2697 buffer_size - XML_PARSER_BUFFER_SIZE) {
2698 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2700 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2706 } else if (ent != NULL) {
2707 int i = xmlStrlen(ent->name);
2708 const xmlChar *cur = ent->name;
2710 buffer[nbchars++] = '&';
2711 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2712 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2715 buffer[nbchars++] = *cur++;
2716 buffer[nbchars++] = ';';
2718 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2719 if (xmlParserDebugEntities)
2720 xmlGenericError(xmlGenericErrorContext,
2721 "String decoding PE Reference: %.30s\n", str);
2722 ent = xmlParseStringPEReference(ctxt, &str);
2723 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2726 ctxt->nbentities += ent->checked;
2728 if (ent->content == NULL) {
2729 xmlLoadEntityContent(ctxt, ent);
2732 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2737 while (*current != 0) { /* non input consuming loop */
2738 buffer[nbchars++] = *current++;
2740 buffer_size - XML_PARSER_BUFFER_SIZE) {
2741 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2743 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2751 COPY_BUF(l,buffer,nbchars,c);
2753 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2754 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2758 c = CUR_SCHAR(str, l);
2762 buffer[nbchars] = 0;
2766 xmlErrMemory(ctxt, NULL);
2776 * xmlStringDecodeEntities:
2777 * @ctxt: the parser context
2778 * @str: the input string
2779 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2780 * @end: an end marker xmlChar, 0 if none
2781 * @end2: an end marker xmlChar, 0 if none
2782 * @end3: an end marker xmlChar, 0 if none
2784 * Takes a entity string content and process to do the adequate substitutions.
2786 * [67] Reference ::= EntityRef | CharRef
2788 * [69] PEReference ::= '%' Name ';'
2790 * Returns A newly allocated string with the substitution done. The caller
2791 * must deallocate it !
2794 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2795 xmlChar end, xmlChar end2, xmlChar end3) {
2796 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2797 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2801 /************************************************************************
2803 * Commodity functions, cleanup needed ? *
2805 ************************************************************************/
2809 * @ctxt: an XML parser context
2811 * @len: the size of @str
2812 * @blank_chars: we know the chars are blanks
2814 * Is this a sequence of blank chars that one can ignore ?
2816 * Returns 1 if ignorable 0 otherwise.
2819 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2822 xmlNodePtr lastChild;
2825 * Don't spend time trying to differentiate them, the same callback is
2828 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2832 * Check for xml:space value.
2834 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2835 (*(ctxt->space) == -2))
2839 * Check that the string is made of blanks
2841 if (blank_chars == 0) {
2842 for (i = 0;i < len;i++)
2843 if (!(IS_BLANK_CH(str[i]))) return(0);
2847 * Look if the element is mixed content in the DTD if available
2849 if (ctxt->node == NULL) return(0);
2850 if (ctxt->myDoc != NULL) {
2851 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2852 if (ret == 0) return(1);
2853 if (ret == 1) return(0);
2857 * Otherwise, heuristic :-\
2859 if ((RAW != '<') && (RAW != 0xD)) return(0);
2860 if ((ctxt->node->children == NULL) &&
2861 (RAW == '<') && (NXT(1) == '/')) return(0);
2863 lastChild = xmlGetLastChild(ctxt->node);
2864 if (lastChild == NULL) {
2865 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2866 (ctxt->node->content != NULL)) return(0);
2867 } else if (xmlNodeIsText(lastChild))
2869 else if ((ctxt->node->children != NULL) &&
2870 (xmlNodeIsText(ctxt->node->children)))
2875 /************************************************************************
2877 * Extra stuff for namespace support *
2878 * Relates to http://www.w3.org/TR/WD-xml-names *
2880 ************************************************************************/
2884 * @ctxt: an XML parser context
2885 * @name: an XML parser context
2886 * @prefix: a xmlChar **
2888 * parse an UTF8 encoded XML qualified name string
2890 * [NS 5] QName ::= (Prefix ':')? LocalPart
2892 * [NS 6] Prefix ::= NCName
2894 * [NS 7] LocalPart ::= NCName
2896 * Returns the local part, and prefix is updated
2897 * to get the Prefix if any.
2901 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2902 xmlChar buf[XML_MAX_NAMELEN + 5];
2903 xmlChar *buffer = NULL;
2905 int max = XML_MAX_NAMELEN;
2906 xmlChar *ret = NULL;
2907 const xmlChar *cur = name;
2910 if (prefix == NULL) return(NULL);
2913 if (cur == NULL) return(NULL);
2915 #ifndef XML_XML_NAMESPACE
2916 /* xml: prefix is not really a namespace */
2917 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2918 (cur[2] == 'l') && (cur[3] == ':'))
2919 return(xmlStrdup(name));
2922 /* nasty but well=formed */
2924 return(xmlStrdup(name));
2927 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2933 * Okay someone managed to make a huge name, so he's ready to pay
2934 * for the processing speed.
2938 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2939 if (buffer == NULL) {
2940 xmlErrMemory(ctxt, NULL);
2943 memcpy(buffer, buf, len);
2944 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2945 if (len + 10 > max) {
2949 tmp = (xmlChar *) xmlRealloc(buffer,
2950 max * sizeof(xmlChar));
2953 xmlErrMemory(ctxt, NULL);
2964 if ((c == ':') && (*cur == 0)) {
2968 return(xmlStrdup(name));
2972 ret = xmlStrndup(buf, len);
2976 max = XML_MAX_NAMELEN;
2984 return(xmlStrndup(BAD_CAST "", 0));
2989 * Check that the first character is proper to start
2992 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2993 ((c >= 0x41) && (c <= 0x5A)) ||
2994 (c == '_') || (c == ':'))) {
2996 int first = CUR_SCHAR(cur, l);
2998 if (!IS_LETTER(first) && (first != '_')) {
2999 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3000 "Name %s is not XML Namespace compliant\n",
3006 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3012 * Okay someone managed to make a huge name, so he's ready to pay
3013 * for the processing speed.
3017 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3018 if (buffer == NULL) {
3019 xmlErrMemory(ctxt, NULL);
3022 memcpy(buffer, buf, len);
3023 while (c != 0) { /* tested bigname2.xml */
3024 if (len + 10 > max) {
3028 tmp = (xmlChar *) xmlRealloc(buffer,
3029 max * sizeof(xmlChar));
3031 xmlErrMemory(ctxt, NULL);
3044 ret = xmlStrndup(buf, len);
3053 /************************************************************************
3055 * The parser itself *
3056 * Relates to http://www.w3.org/TR/REC-xml *
3058 ************************************************************************/
3060 /************************************************************************
3062 * Routines to parse Name, NCName and NmToken *
3064 ************************************************************************/
3066 static unsigned long nbParseName = 0;
3067 static unsigned long nbParseNmToken = 0;
3068 static unsigned long nbParseNCName = 0;
3069 static unsigned long nbParseNCNameComplex = 0;
3070 static unsigned long nbParseNameComplex = 0;
3071 static unsigned long nbParseStringName = 0;
3075 * The two following functions are related to the change of accepted
3076 * characters for Name and NmToken in the Revision 5 of XML-1.0
3077 * They correspond to the modified production [4] and the new production [4a]
3078 * changes in that revision. Also note that the macros used for the
3079 * productions Letter, Digit, CombiningChar and Extender are not needed
3081 * We still keep compatibility to pre-revision5 parsing semantic if the
3082 * new XML_PARSE_OLD10 option is given to the parser.
3085 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3086 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3088 * Use the new checks of production [4] [4a] amd [5] of the
3089 * Update 5 of XML-1.0
3091 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3092 (((c >= 'a') && (c <= 'z')) ||
3093 ((c >= 'A') && (c <= 'Z')) ||
3094 (c == '_') || (c == ':') ||
3095 ((c >= 0xC0) && (c <= 0xD6)) ||
3096 ((c >= 0xD8) && (c <= 0xF6)) ||
3097 ((c >= 0xF8) && (c <= 0x2FF)) ||
3098 ((c >= 0x370) && (c <= 0x37D)) ||
3099 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3100 ((c >= 0x200C) && (c <= 0x200D)) ||
3101 ((c >= 0x2070) && (c <= 0x218F)) ||
3102 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3103 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3104 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3105 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3106 ((c >= 0x10000) && (c <= 0xEFFFF))))
3109 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3116 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3117 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3119 * Use the new checks of production [4] [4a] amd [5] of the
3120 * Update 5 of XML-1.0
3122 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3123 (((c >= 'a') && (c <= 'z')) ||
3124 ((c >= 'A') && (c <= 'Z')) ||
3125 ((c >= '0') && (c <= '9')) || /* !start */
3126 (c == '_') || (c == ':') ||
3127 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3128 ((c >= 0xC0) && (c <= 0xD6)) ||
3129 ((c >= 0xD8) && (c <= 0xF6)) ||
3130 ((c >= 0xF8) && (c <= 0x2FF)) ||
3131 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3132 ((c >= 0x370) && (c <= 0x37D)) ||
3133 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3134 ((c >= 0x200C) && (c <= 0x200D)) ||
3135 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3136 ((c >= 0x2070) && (c <= 0x218F)) ||
3137 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3138 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3139 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3140 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3141 ((c >= 0x10000) && (c <= 0xEFFFF))))
3144 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3145 (c == '.') || (c == '-') ||
3146 (c == '_') || (c == ':') ||
3147 (IS_COMBINING(c)) ||
3154 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3155 int *len, int *alloc, int normalize);
3157 static const xmlChar *
3158 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3164 nbParseNameComplex++;
3168 * Handler for more complex cases
3172 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3174 * Use the new checks of production [4] [4a] amd [5] of the
3175 * Update 5 of XML-1.0
3177 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3178 (!(((c >= 'a') && (c <= 'z')) ||
3179 ((c >= 'A') && (c <= 'Z')) ||
3180 (c == '_') || (c == ':') ||
3181 ((c >= 0xC0) && (c <= 0xD6)) ||
3182 ((c >= 0xD8) && (c <= 0xF6)) ||
3183 ((c >= 0xF8) && (c <= 0x2FF)) ||
3184 ((c >= 0x370) && (c <= 0x37D)) ||
3185 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3186 ((c >= 0x200C) && (c <= 0x200D)) ||
3187 ((c >= 0x2070) && (c <= 0x218F)) ||
3188 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3189 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3190 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3191 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3192 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3198 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3199 (((c >= 'a') && (c <= 'z')) ||
3200 ((c >= 'A') && (c <= 'Z')) ||
3201 ((c >= '0') && (c <= '9')) || /* !start */
3202 (c == '_') || (c == ':') ||
3203 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3204 ((c >= 0xC0) && (c <= 0xD6)) ||
3205 ((c >= 0xD8) && (c <= 0xF6)) ||
3206 ((c >= 0xF8) && (c <= 0x2FF)) ||
3207 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3208 ((c >= 0x370) && (c <= 0x37D)) ||
3209 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3210 ((c >= 0x200C) && (c <= 0x200D)) ||
3211 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3212 ((c >= 0x2070) && (c <= 0x218F)) ||
3213 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3214 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3215 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3216 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3217 ((c >= 0x10000) && (c <= 0xEFFFF))
3219 if (count++ > 100) {
3228 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3229 (!IS_LETTER(c) && (c != '_') &&
3237 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3238 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3239 (c == '.') || (c == '-') ||
3240 (c == '_') || (c == ':') ||
3241 (IS_COMBINING(c)) ||
3242 (IS_EXTENDER(c)))) {
3243 if (count++ > 100) {
3252 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3253 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3254 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3259 * @ctxt: an XML parser context
3261 * parse an XML name.
3263 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3264 * CombiningChar | Extender
3266 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3268 * [6] Names ::= Name (#x20 Name)*
3270 * Returns the Name parsed or NULL
3274 xmlParseName(xmlParserCtxtPtr ctxt) {
3286 * Accelerator for simple ASCII names
3288 in = ctxt->input->cur;
3289 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3290 ((*in >= 0x41) && (*in <= 0x5A)) ||
3291 (*in == '_') || (*in == ':')) {
3293 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3294 ((*in >= 0x41) && (*in <= 0x5A)) ||
3295 ((*in >= 0x30) && (*in <= 0x39)) ||
3296 (*in == '_') || (*in == '-') ||
3297 (*in == ':') || (*in == '.'))
3299 if ((*in > 0) && (*in < 0x80)) {
3300 count = in - ctxt->input->cur;
3301 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3302 ctxt->input->cur = in;
3303 ctxt->nbChars += count;
3304 ctxt->input->col += count;
3306 xmlErrMemory(ctxt, NULL);
3310 /* accelerator for special cases */
3311 return(xmlParseNameComplex(ctxt));
3314 static const xmlChar *
3315 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3321 nbParseNCNameComplex++;
3325 * Handler for more complex cases
3329 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3330 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3334 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3335 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3336 if (count++ > 100) {
3344 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3349 * @ctxt: an XML parser context
3350 * @len: lenght of the string parsed
3352 * parse an XML name.
3354 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3355 * CombiningChar | Extender
3357 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3359 * Returns the Name parsed or NULL
3362 static const xmlChar *
3363 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3373 * Accelerator for simple ASCII names
3375 in = ctxt->input->cur;
3376 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3377 ((*in >= 0x41) && (*in <= 0x5A)) ||
3380 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3381 ((*in >= 0x41) && (*in <= 0x5A)) ||
3382 ((*in >= 0x30) && (*in <= 0x39)) ||
3383 (*in == '_') || (*in == '-') ||
3386 if ((*in > 0) && (*in < 0x80)) {
3387 count = in - ctxt->input->cur;
3388 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3389 ctxt->input->cur = in;
3390 ctxt->nbChars += count;
3391 ctxt->input->col += count;
3393 xmlErrMemory(ctxt, NULL);
3398 return(xmlParseNCNameComplex(ctxt));
3402 * xmlParseNameAndCompare:
3403 * @ctxt: an XML parser context
3405 * parse an XML name and compares for match
3406 * (specialized for endtag parsing)
3408 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3409 * and the name for mismatch
3412 static const xmlChar *
3413 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3414 register const xmlChar *cmp = other;
3415 register const xmlChar *in;
3420 in = ctxt->input->cur;
3421 while (*in != 0 && *in == *cmp) {
3426 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3428 ctxt->input->cur = in;
3429 return (const xmlChar*) 1;
3431 /* failure (or end of input buffer), check with full function */
3432 ret = xmlParseName (ctxt);
3433 /* strings coming from the dictionnary direct compare possible */
3435 return (const xmlChar*) 1;
3441 * xmlParseStringName:
3442 * @ctxt: an XML parser context
3443 * @str: a pointer to the string pointer (IN/OUT)
3445 * parse an XML name.
3447 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3448 * CombiningChar | Extender
3450 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3452 * [6] Names ::= Name (#x20 Name)*
3454 * Returns the Name parsed or NULL. The @str pointer
3455 * is updated to the current location in the string.
3459 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3460 xmlChar buf[XML_MAX_NAMELEN + 5];
3461 const xmlChar *cur = *str;
3466 nbParseStringName++;
3469 c = CUR_SCHAR(cur, l);
3470 if (!xmlIsNameStartChar(ctxt, c)) {
3474 COPY_BUF(l,buf,len,c);
3476 c = CUR_SCHAR(cur, l);
3477 while (xmlIsNameChar(ctxt, c)) {
3478 COPY_BUF(l,buf,len,c);
3480 c = CUR_SCHAR(cur, l);
3481 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3483 * Okay someone managed to make a huge name, so he's ready to pay
3484 * for the processing speed.
3489 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3490 if (buffer == NULL) {
3491 xmlErrMemory(ctxt, NULL);
3494 memcpy(buffer, buf, len);
3495 while (xmlIsNameChar(ctxt, c)) {
3496 if (len + 10 > max) {
3499 tmp = (xmlChar *) xmlRealloc(buffer,
3500 max * sizeof(xmlChar));
3502 xmlErrMemory(ctxt, NULL);
3508 COPY_BUF(l,buffer,len,c);
3510 c = CUR_SCHAR(cur, l);
3518 return(xmlStrndup(buf, len));
3523 * @ctxt: an XML parser context
3525 * parse an XML Nmtoken.
3527 * [7] Nmtoken ::= (NameChar)+
3529 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3531 * Returns the Nmtoken parsed or NULL
3535 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3536 xmlChar buf[XML_MAX_NAMELEN + 5];
3548 while (xmlIsNameChar(ctxt, c)) {
3549 if (count++ > 100) {
3553 COPY_BUF(l,buf,len,c);
3556 if (len >= XML_MAX_NAMELEN) {
3558 * Okay someone managed to make a huge token, so he's ready to pay
3559 * for the processing speed.
3564 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3565 if (buffer == NULL) {
3566 xmlErrMemory(ctxt, NULL);
3569 memcpy(buffer, buf, len);
3570 while (xmlIsNameChar(ctxt, c)) {
3571 if (count++ > 100) {
3575 if (len + 10 > max) {
3579 tmp = (xmlChar *) xmlRealloc(buffer,
3580 max * sizeof(xmlChar));
3582 xmlErrMemory(ctxt, NULL);
3588 COPY_BUF(l,buffer,len,c);
3598 return(xmlStrndup(buf, len));
3602 * xmlParseEntityValue:
3603 * @ctxt: an XML parser context
3604 * @orig: if non-NULL store a copy of the original entity value
3606 * parse a value for ENTITY declarations
3608 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3609 * "'" ([^%&'] | PEReference | Reference)* "'"
3611 * Returns the EntityValue parsed with reference substituted or NULL
3615 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3616 xmlChar *buf = NULL;
3618 int size = XML_PARSER_BUFFER_SIZE;
3621 xmlChar *ret = NULL;
3622 const xmlChar *cur = NULL;
3623 xmlParserInputPtr input;
3625 if (RAW == '"') stop = '"';
3626 else if (RAW == '\'') stop = '\'';
3628 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3631 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3633 xmlErrMemory(ctxt, NULL);
3638 * The content of the entity definition is copied in a buffer.
3641 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3642 input = ctxt->input;
3647 * NOTE: 4.4.5 Included in Literal
3648 * When a parameter entity reference appears in a literal entity
3649 * value, ... a single or double quote character in the replacement
3650 * text is always treated as a normal data character and will not
3651 * terminate the literal.
3652 * In practice it means we stop the loop only when back at parsing
3653 * the initial entity and the quote is found
3655 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
3656 (ctxt->input != input))) {
3657 if (len + 5 >= size) {
3661 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3663 xmlErrMemory(ctxt, NULL);
3669 COPY_BUF(l,buf,len,c);
3672 * Pop-up of finished entities.
3674 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3687 * Raise problem w.r.t. '&' and '%' being used in non-entities
3688 * reference constructs. Note Charref will be handled in
3689 * xmlStringDecodeEntities()
3692 while (*cur != 0) { /* non input consuming */
3693 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3698 name = xmlParseStringName(ctxt, &cur);
3699 if ((name == NULL) || (*cur != ';')) {
3700 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3701 "EntityValue: '%c' forbidden except for entities references\n",
3704 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3705 (ctxt->inputNr == 1)) {
3706 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3717 * Then PEReference entities are substituted.
3720 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3725 * NOTE: 4.4.7 Bypassed
3726 * When a general entity reference appears in the EntityValue in
3727 * an entity declaration, it is bypassed and left as is.
3728 * so XML_SUBSTITUTE_REF is not set here.
3730 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3742 * xmlParseAttValueComplex:
3743 * @ctxt: an XML parser context
3744 * @len: the resulting attribute len
3745 * @normalize: wether to apply the inner normalization
3747 * parse a value for an attribute, this is the fallback function
3748 * of xmlParseAttValue() when the attribute parsing requires handling
3749 * of non-ASCII characters, or normalization compaction.
3751 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3754 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3756 xmlChar *buf = NULL;
3757 xmlChar *rep = NULL;
3760 int c, l, in_space = 0;
3761 xmlChar *current = NULL;
3764 if (NXT(0) == '"') {
3765 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3768 } else if (NXT(0) == '\'') {
3770 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3773 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3778 * allocate a translation buffer.
3780 buf_size = XML_PARSER_BUFFER_SIZE;
3781 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3782 if (buf == NULL) goto mem_error;
3785 * OK loop until we reach one of the ending char or a size limit.
3788 while ((NXT(0) != limit) && /* checked */
3789 (IS_CHAR(c)) && (c != '<')) {
3793 if (NXT(1) == '#') {
3794 int val = xmlParseCharRef(ctxt);
3797 if (ctxt->replaceEntities) {
3798 if (len > buf_size - 10) {
3799 growBuffer(buf, 10);
3804 * The reparsing will be done in xmlStringGetNodeList()
3805 * called by the attribute() function in SAX.c
3807 if (len > buf_size - 10) {
3808 growBuffer(buf, 10);
3816 } else if (val != 0) {
3817 if (len > buf_size - 10) {
3818 growBuffer(buf, 10);
3820 len += xmlCopyChar(0, &buf[len], val);
3823 ent = xmlParseEntityRef(ctxt);
3826 ctxt->nbentities += ent->owner;
3827 if ((ent != NULL) &&
3828 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3829 if (len > buf_size - 10) {
3830 growBuffer(buf, 10);
3832 if ((ctxt->replaceEntities == 0) &&
3833 (ent->content[0] == '&')) {
3840 buf[len++] = ent->content[0];
3842 } else if ((ent != NULL) &&
3843 (ctxt->replaceEntities != 0)) {
3844 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3845 rep = xmlStringDecodeEntities(ctxt, ent->content,
3850 while (*current != 0) { /* non input consuming */
3851 if ((*current == 0xD) || (*current == 0xA) ||
3852 (*current == 0x9)) {
3856 buf[len++] = *current++;
3857 if (len > buf_size - 10) {
3858 growBuffer(buf, 10);
3865 if (len > buf_size - 10) {
3866 growBuffer(buf, 10);
3868 if (ent->content != NULL)
3869 buf[len++] = ent->content[0];
3871 } else if (ent != NULL) {
3872 int i = xmlStrlen(ent->name);
3873 const xmlChar *cur = ent->name;
3876 * This may look absurd but is needed to detect
3879 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3880 (ent->content != NULL)) {
3881 rep = xmlStringDecodeEntities(ctxt, ent->content,
3882 XML_SUBSTITUTE_REF, 0, 0, 0);
3890 * Just output the reference
3893 while (len > buf_size - i - 10) {
3894 growBuffer(buf, i + 10);
3897 buf[len++] = *cur++;
3902 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3903 if ((len != 0) || (!normalize)) {
3904 if ((!normalize) || (!in_space)) {
3905 COPY_BUF(l,buf,len,0x20);
3906 while (len > buf_size - 10) {
3907 growBuffer(buf, 10);
3914 COPY_BUF(l,buf,len,c);
3915 if (len > buf_size - 10) {
3916 growBuffer(buf, 10);
3924 if ((in_space) && (normalize)) {
3925 while (buf[len - 1] == 0x20) len--;
3929 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3930 } else if (RAW != limit) {
3931 if ((c != 0) && (!IS_CHAR(c))) {
3932 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3933 "invalid character in attribute value\n");
3935 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3936 "AttValue: ' expected\n");
3940 if (attlen != NULL) *attlen = len;
3944 xmlErrMemory(ctxt, NULL);
3954 * @ctxt: an XML parser context
3956 * parse a value for an attribute
3957 * Note: the parser won't do substitution of entities here, this
3958 * will be handled later in xmlStringGetNodeList
3960 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3961 * "'" ([^<&'] | Reference)* "'"
3963 * 3.3.3 Attribute-Value Normalization:
3964 * Before the value of an attribute is passed to the application or
3965 * checked for validity, the XML processor must normalize it as follows:
3966 * - a character reference is processed by appending the referenced
3967 * character to the attribute value
3968 * - an entity reference is processed by recursively processing the
3969 * replacement text of the entity
3970 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3971 * appending #x20 to the normalized value, except that only a single
3972 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3973 * parsed entity or the literal entity value of an internal parsed entity
3974 * - other characters are processed by appending them to the normalized value
3975 * If the declared value is not CDATA, then the XML processor must further
3976 * process the normalized attribute value by discarding any leading and
3977 * trailing space (#x20) characters, and by replacing sequences of space
3978 * (#x20) characters by a single space (#x20) character.
3979 * All attributes for which no declaration has been read should be treated
3980 * by a non-validating parser as if declared CDATA.
3982 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3987 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3988 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3989 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3993 * xmlParseSystemLiteral:
3994 * @ctxt: an XML parser context
3996 * parse an XML Literal
3998 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4000 * Returns the SystemLiteral parsed or NULL
4004 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4005 xmlChar *buf = NULL;
4007 int size = XML_PARSER_BUFFER_SIZE;
4010 int state = ctxt->instate;
4017 } else if (RAW == '\'') {
4021 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4025 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4027 xmlErrMemory(ctxt, NULL);
4030 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4032 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4033 if (len + 5 >= size) {
4037 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4040 xmlErrMemory(ctxt, NULL);
4041 ctxt->instate = (xmlParserInputState) state;
4051 COPY_BUF(l,buf,len,cur);
4061 ctxt->instate = (xmlParserInputState) state;
4062 if (!IS_CHAR(cur)) {
4063 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4071 * xmlParsePubidLiteral:
4072 * @ctxt: an XML parser context
4074 * parse an XML public literal
4076 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4078 * Returns the PubidLiteral parsed or NULL.
4082 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4083 xmlChar *buf = NULL;
4085 int size = XML_PARSER_BUFFER_SIZE;
4089 xmlParserInputState oldstate = ctxt->instate;
4095 } else if (RAW == '\'') {
4099 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4102 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4104 xmlErrMemory(ctxt, NULL);
4107 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4109 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4110 if (len + 1 >= size) {
4114 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4116 xmlErrMemory(ctxt, NULL);
4138 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4142 ctxt->instate = oldstate;
4146 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4149 * used for the test in the inner loop of the char data testing
4151 static const unsigned char test_char_data[256] = {
4152 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4153 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4154 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4155 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4156 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4157 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4158 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4159 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4160 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4161 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4162 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4163 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4164 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4165 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4166 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4167 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4168 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4169 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4170 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4171 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4172 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4173 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4174 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4175 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4176 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4177 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4178 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4179 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4180 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4181 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4182 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4183 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4188 * @ctxt: an XML parser context
4189 * @cdata: int indicating whether we are within a CDATA section
4191 * parse a CharData section.
4192 * if we are within a CDATA section ']]>' marks an end of section.
4194 * The right angle bracket (>) may be represented using the string ">",
4195 * and must, for compatibility, be escaped using ">" or a character
4196 * reference when it appears in the string "]]>" in content, when that
4197 * string is not marking the end of a CDATA section.
4199 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4203 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4206 int line = ctxt->input->line;
4207 int col = ctxt->input->col;
4213 * Accelerated common case where input don't need to be
4214 * modified before passing it to the handler.
4217 in = ctxt->input->cur;
4220 while (*in == 0x20) { in++; ctxt->input->col++; }
4223 ctxt->input->line++; ctxt->input->col = 1;
4225 } while (*in == 0xA);
4226 goto get_more_space;
4229 nbchar = in - ctxt->input->cur;
4231 const xmlChar *tmp = ctxt->input->cur;
4232 ctxt->input->cur = in;
4234 if ((ctxt->sax != NULL) &&
4235 (ctxt->sax->ignorableWhitespace !=
4236 ctxt->sax->characters)) {
4237 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4238 if (ctxt->sax->ignorableWhitespace != NULL)
4239 ctxt->sax->ignorableWhitespace(ctxt->userData,
4242 if (ctxt->sax->characters != NULL)
4243 ctxt->sax->characters(ctxt->userData,
4245 if (*ctxt->space == -1)
4248 } else if ((ctxt->sax != NULL) &&
4249 (ctxt->sax->characters != NULL)) {
4250 ctxt->sax->characters(ctxt->userData,
4258 ccol = ctxt->input->col;
4259 while (test_char_data[*in]) {
4263 ctxt->input->col = ccol;
4266 ctxt->input->line++; ctxt->input->col = 1;
4268 } while (*in == 0xA);
4272 if ((in[1] == ']') && (in[2] == '>')) {
4273 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4274 ctxt->input->cur = in;
4281 nbchar = in - ctxt->input->cur;
4283 if ((ctxt->sax != NULL) &&
4284 (ctxt->sax->ignorableWhitespace !=
4285 ctxt->sax->characters) &&
4286 (IS_BLANK_CH(*ctxt->input->cur))) {
4287 const xmlChar *tmp = ctxt->input->cur;
4288 ctxt->input->cur = in;
4290 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4291 if (ctxt->sax->ignorableWhitespace != NULL)
4292 ctxt->sax->ignorableWhitespace(ctxt->userData,
4295 if (ctxt->sax->characters != NULL)
4296 ctxt->sax->characters(ctxt->userData,
4298 if (*ctxt->space == -1)
4301 line = ctxt->input->line;
4302 col = ctxt->input->col;
4303 } else if (ctxt->sax != NULL) {
4304 if (ctxt->sax->characters != NULL)
4305 ctxt->sax->characters(ctxt->userData,
4306 ctxt->input->cur, nbchar);
4307 line = ctxt->input->line;
4308 col = ctxt->input->col;
4310 /* something really bad happened in the SAX callback */
4311 if (ctxt->instate != XML_PARSER_CONTENT)
4314 ctxt->input->cur = in;
4318 ctxt->input->cur = in;
4320 ctxt->input->line++; ctxt->input->col = 1;
4321 continue; /* while */
4333 in = ctxt->input->cur;
4334 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4337 ctxt->input->line = line;
4338 ctxt->input->col = col;
4339 xmlParseCharDataComplex(ctxt, cdata);
4343 * xmlParseCharDataComplex:
4344 * @ctxt: an XML parser context
4345 * @cdata: int indicating whether we are within a CDATA section
4347 * parse a CharData section.this is the fallback function
4348 * of xmlParseCharData() when the parsing requires handling
4349 * of non-ASCII characters.
4352 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4353 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4361 while ((cur != '<') && /* checked */
4363 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4364 if ((cur == ']') && (NXT(1) == ']') &&
4368 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4371 COPY_BUF(l,buf,nbchar,cur);
4372 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4376 * OK the segment is to be consumed as chars.
4378 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4379 if (areBlanks(ctxt, buf, nbchar, 0)) {
4380 if (ctxt->sax->ignorableWhitespace != NULL)
4381 ctxt->sax->ignorableWhitespace(ctxt->userData,
4384 if (ctxt->sax->characters != NULL)
4385 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4386 if ((ctxt->sax->characters !=
4387 ctxt->sax->ignorableWhitespace) &&
4388 (*ctxt->space == -1))
4393 /* something really bad happened in the SAX callback */
4394 if (ctxt->instate != XML_PARSER_CONTENT)
4408 * OK the segment is to be consumed as chars.
4410 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4411 if (areBlanks(ctxt, buf, nbchar, 0)) {
4412 if (ctxt->sax->ignorableWhitespace != NULL)
4413 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4415 if (ctxt->sax->characters != NULL)
4416 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4417 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4418 (*ctxt->space == -1))
4423 if ((cur != 0) && (!IS_CHAR(cur))) {
4424 /* Generate the error and skip the offending character */
4425 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4426 "PCDATA invalid Char value %d\n",
4433 * xmlParseExternalID:
4434 * @ctxt: an XML parser context
4435 * @publicID: a xmlChar** receiving PubidLiteral
4436 * @strict: indicate whether we should restrict parsing to only
4437 * production [75], see NOTE below
4439 * Parse an External ID or a Public ID
4441 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4442 * 'PUBLIC' S PubidLiteral S SystemLiteral
4444 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4445 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4447 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4449 * Returns the function returns SystemLiteral and in the second
4450 * case publicID receives PubidLiteral, is strict is off
4451 * it is possible to return NULL and have publicID set.
4455 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4456 xmlChar *URI = NULL;
4461 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4463 if (!IS_BLANK_CH(CUR)) {
4464 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4465 "Space required after 'SYSTEM'\n");
4468 URI = xmlParseSystemLiteral(ctxt);
4470 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4472 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4474 if (!IS_BLANK_CH(CUR)) {
4475 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4476 "Space required after 'PUBLIC'\n");
4479 *publicID = xmlParsePubidLiteral(ctxt);
4480 if (*publicID == NULL) {
4481 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4485 * We don't handle [83] so "S SystemLiteral" is required.
4487 if (!IS_BLANK_CH(CUR)) {
4488 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4489 "Space required after the Public Identifier\n");
4493 * We handle [83] so we return immediately, if
4494 * "S SystemLiteral" is not detected. From a purely parsing
4495 * point of view that's a nice mess.
4501 if (!IS_BLANK_CH(*ptr)) return(NULL);
4503 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4504 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4507 URI = xmlParseSystemLiteral(ctxt);
4509 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4516 * xmlParseCommentComplex:
4517 * @ctxt: an XML parser context
4518 * @buf: the already parsed part of the buffer
4519 * @len: number of bytes filles in the buffer
4520 * @size: allocated size of the buffer
4522 * Skip an XML (SGML) comment <!-- .... -->
4523 * The spec says that "For compatibility, the string "--" (double-hyphen)
4524 * must not occur within comments. "
4525 * This is the slow routine in case the accelerator for ascii didn't work
4527 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4530 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
4537 inputid = ctxt->input->id;
4541 size = XML_PARSER_BUFFER_SIZE;
4542 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4544 xmlErrMemory(ctxt, NULL);
4548 GROW; /* Assure there's enough input data */
4551 goto not_terminated;
4553 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4554 "xmlParseComment: invalid xmlChar value %d\n",
4562 goto not_terminated;
4564 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4565 "xmlParseComment: invalid xmlChar value %d\n",
4573 goto not_terminated;
4574 while (IS_CHAR(cur) && /* checked */
4576 (r != '-') || (q != '-'))) {
4577 if ((r == '-') && (q == '-')) {
4578 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4580 if (len + 5 >= size) {
4583 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4584 if (new_buf == NULL) {
4586 xmlErrMemory(ctxt, NULL);
4591 COPY_BUF(ql,buf,len,q);
4612 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4613 "Comment not terminated \n<!--%.50s\n", buf);
4614 } else if (!IS_CHAR(cur)) {
4615 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4616 "xmlParseComment: invalid xmlChar value %d\n",
4619 if (inputid != ctxt->input->id) {
4620 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4621 "Comment doesn't start and stop in the same entity\n");
4624 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4625 (!ctxt->disableSAX))
4626 ctxt->sax->comment(ctxt->userData, buf);
4631 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4632 "Comment not terminated\n", NULL);
4639 * @ctxt: an XML parser context
4641 * Skip an XML (SGML) comment <!-- .... -->
4642 * The spec says that "For compatibility, the string "--" (double-hyphen)
4643 * must not occur within comments. "
4645 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4648 xmlParseComment(xmlParserCtxtPtr ctxt) {
4649 xmlChar *buf = NULL;
4650 int size = XML_PARSER_BUFFER_SIZE;
4652 xmlParserInputState state;
4654 int nbchar = 0, ccol;
4658 * Check that there is a comment right here.
4660 if ((RAW != '<') || (NXT(1) != '!') ||
4661 (NXT(2) != '-') || (NXT(3) != '-')) return;
4662 state = ctxt->instate;
4663 ctxt->instate = XML_PARSER_COMMENT;
4664 inputid = ctxt->input->id;
4670 * Accelerated common case where input don't need to be
4671 * modified before passing it to the handler.
4673 in = ctxt->input->cur;
4677 ctxt->input->line++; ctxt->input->col = 1;
4679 } while (*in == 0xA);
4682 ccol = ctxt->input->col;
4683 while (((*in > '-') && (*in <= 0x7F)) ||
4684 ((*in >= 0x20) && (*in < '-')) ||
4689 ctxt->input->col = ccol;
4692 ctxt->input->line++; ctxt->input->col = 1;
4694 } while (*in == 0xA);
4697 nbchar = in - ctxt->input->cur;
4699 * save current set of data
4702 if ((ctxt->sax != NULL) &&
4703 (ctxt->sax->comment != NULL)) {
4705 if ((*in == '-') && (in[1] == '-'))
4708 size = XML_PARSER_BUFFER_SIZE + nbchar;
4709 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4711 xmlErrMemory(ctxt, NULL);
4712 ctxt->instate = state;
4716 } else if (len + nbchar + 1 >= size) {
4718 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4719 new_buf = (xmlChar *) xmlRealloc(buf,
4720 size * sizeof(xmlChar));
4721 if (new_buf == NULL) {
4723 xmlErrMemory(ctxt, NULL);
4724 ctxt->instate = state;
4729 memcpy(&buf[len], ctxt->input->cur, nbchar);
4734 ctxt->input->cur = in;
4737 ctxt->input->line++; ctxt->input->col = 1;
4742 ctxt->input->cur = in;
4744 ctxt->input->line++; ctxt->input->col = 1;
4745 continue; /* while */
4751 in = ctxt->input->cur;
4755 if (ctxt->input->id != inputid) {
4756 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4757 "comment doesn't start and stop in the same entity\n");
4760 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4761 (!ctxt->disableSAX)) {
4763 ctxt->sax->comment(ctxt->userData, buf);
4765 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4769 ctxt->instate = state;
4773 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4774 "Comment not terminated \n<!--%.50s\n",
4777 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4778 "Comment not terminated \n", NULL);
4786 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4787 xmlParseCommentComplex(ctxt, buf, len, size);
4788 ctxt->instate = state;
4795 * @ctxt: an XML parser context
4797 * parse the name of a PI
4799 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4801 * Returns the PITarget name or NULL
4805 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4806 const xmlChar *name;
4808 name = xmlParseName(ctxt);
4809 if ((name != NULL) &&
4810 ((name[0] == 'x') || (name[0] == 'X')) &&
4811 ((name[1] == 'm') || (name[1] == 'M')) &&
4812 ((name[2] == 'l') || (name[2] == 'L'))) {
4814 if ((name[0] == 'x') && (name[1] == 'm') &&
4815 (name[2] == 'l') && (name[3] == 0)) {
4816 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4817 "XML declaration allowed only at the start of the document\n");
4819 } else if (name[3] == 0) {
4820 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4824 if (xmlW3CPIs[i] == NULL) break;
4825 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4828 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4829 "xmlParsePITarget: invalid name prefix 'xml'\n",
4832 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4833 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4834 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4839 #ifdef LIBXML_CATALOG_ENABLED
4841 * xmlParseCatalogPI:
4842 * @ctxt: an XML parser context
4843 * @catalog: the PI value string
4845 * parse an XML Catalog Processing Instruction.
4847 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4849 * Occurs only if allowed by the user and if happening in the Misc
4850 * part of the document before any doctype informations
4851 * This will add the given catalog to the parsing context in order
4852 * to be used if there is a resolution need further down in the document
4856 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4857 xmlChar *URL = NULL;
4858 const xmlChar *tmp, *base;
4862 while (IS_BLANK_CH(*tmp)) tmp++;
4863 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4866 while (IS_BLANK_CH(*tmp)) tmp++;
4871 while (IS_BLANK_CH(*tmp)) tmp++;
4873 if ((marker != '\'') && (marker != '"'))
4877 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4880 URL = xmlStrndup(base, tmp - base);
4882 while (IS_BLANK_CH(*tmp)) tmp++;
4887 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4893 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4894 "Catalog PI syntax error: %s\n",
4903 * @ctxt: an XML parser context
4905 * parse an XML Processing Instruction.
4907 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4909 * The processing is transfered to SAX once parsed.
4913 xmlParsePI(xmlParserCtxtPtr ctxt) {
4914 xmlChar *buf = NULL;
4916 int size = XML_PARSER_BUFFER_SIZE;
4918 const xmlChar *target;
4919 xmlParserInputState state;
4922 if ((RAW == '<') && (NXT(1) == '?')) {
4923 xmlParserInputPtr input = ctxt->input;
4924 state = ctxt->instate;
4925 ctxt->instate = XML_PARSER_PI;
4927 * this is a Processing Instruction.
4933 * Parse the target name and check for special support like
4936 target = xmlParsePITarget(ctxt);
4937 if (target != NULL) {
4938 if ((RAW == '?') && (NXT(1) == '>')) {
4939 if (input != ctxt->input) {
4940 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4941 "PI declaration doesn't start and stop in the same entity\n");
4948 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4949 (ctxt->sax->processingInstruction != NULL))
4950 ctxt->sax->processingInstruction(ctxt->userData,
4952 ctxt->instate = state;
4955 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4957 xmlErrMemory(ctxt, NULL);
4958 ctxt->instate = state;
4962 if (!IS_BLANK(cur)) {
4963 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4964 "ParsePI: PI %s space expected\n", target);
4968 while (IS_CHAR(cur) && /* checked */
4969 ((cur != '?') || (NXT(1) != '>'))) {
4970 if (len + 5 >= size) {
4974 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4976 xmlErrMemory(ctxt, NULL);
4978 ctxt->instate = state;
4988 COPY_BUF(l,buf,len,cur);
4999 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5000 "ParsePI: PI %s never end ...\n", target);
5002 if (input != ctxt->input) {
5003 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5004 "PI declaration doesn't start and stop in the same entity\n");
5008 #ifdef LIBXML_CATALOG_ENABLED
5009 if (((state == XML_PARSER_MISC) ||
5010 (state == XML_PARSER_START)) &&
5011 (xmlStrEqual(target, XML_CATALOG_PI))) {
5012 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5013 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5014 (allow == XML_CATA_ALLOW_ALL))
5015 xmlParseCatalogPI(ctxt, buf);
5023 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5024 (ctxt->sax->processingInstruction != NULL))
5025 ctxt->sax->processingInstruction(ctxt->userData,
5030 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5032 ctxt->instate = state;
5037 * xmlParseNotationDecl:
5038 * @ctxt: an XML parser context
5040 * parse a notation declaration
5042 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5044 * Hence there is actually 3 choices:
5045 * 'PUBLIC' S PubidLiteral
5046 * 'PUBLIC' S PubidLiteral S SystemLiteral
5047 * and 'SYSTEM' S SystemLiteral
5049 * See the NOTE on xmlParseExternalID().
5053 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5054 const xmlChar *name;
5058 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5059 xmlParserInputPtr input = ctxt->input;
5062 if (!IS_BLANK_CH(CUR)) {
5063 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5064 "Space required after '<!NOTATION'\n");
5069 name = xmlParseName(ctxt);
5071 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5074 if (!IS_BLANK_CH(CUR)) {
5075 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5076 "Space required after the NOTATION name'\n");
5079 if (xmlStrchr(name, ':') != NULL) {
5080 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5081 "colon are forbidden from notation names '%s'\n",
5089 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5093 if (input != ctxt->input) {
5094 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5095 "Notation declaration doesn't start and stop in the same entity\n");
5098 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5099 (ctxt->sax->notationDecl != NULL))
5100 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5102 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5104 if (Systemid != NULL) xmlFree(Systemid);
5105 if (Pubid != NULL) xmlFree(Pubid);
5110 * xmlParseEntityDecl:
5111 * @ctxt: an XML parser context
5113 * parse <!ENTITY declarations
5115 * [70] EntityDecl ::= GEDecl | PEDecl
5117 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5119 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5121 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5123 * [74] PEDef ::= EntityValue | ExternalID
5125 * [76] NDataDecl ::= S 'NDATA' S Name
5127 * [ VC: Notation Declared ]
5128 * The Name must match the declared name of a notation.
5132 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5133 const xmlChar *name = NULL;
5134 xmlChar *value = NULL;
5135 xmlChar *URI = NULL, *literal = NULL;
5136 const xmlChar *ndata = NULL;
5137 int isParameter = 0;
5138 xmlChar *orig = NULL;
5141 /* GROW; done in the caller */
5142 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5143 xmlParserInputPtr input = ctxt->input;
5146 skipped = SKIP_BLANKS;
5148 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5149 "Space required after '<!ENTITY'\n");
5154 skipped = SKIP_BLANKS;
5156 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5157 "Space required after '%'\n");
5162 name = xmlParseName(ctxt);
5164 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5165 "xmlParseEntityDecl: no name\n");
5168 if (xmlStrchr(name, ':') != NULL) {
5169 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5170 "colon are forbidden from entities names '%s'\n",
5173 skipped = SKIP_BLANKS;
5175 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5176 "Space required after the entity name\n");
5179 ctxt->instate = XML_PARSER_ENTITY_DECL;
5181 * handle the various case of definitions...
5184 if ((RAW == '"') || (RAW == '\'')) {
5185 value = xmlParseEntityValue(ctxt, &orig);
5187 if ((ctxt->sax != NULL) &&
5188 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5189 ctxt->sax->entityDecl(ctxt->userData, name,
5190 XML_INTERNAL_PARAMETER_ENTITY,
5194 URI = xmlParseExternalID(ctxt, &literal, 1);
5195 if ((URI == NULL) && (literal == NULL)) {
5196 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5201 uri = xmlParseURI((const char *) URI);
5203 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5204 "Invalid URI: %s\n", URI);
5206 * This really ought to be a well formedness error
5207 * but the XML Core WG decided otherwise c.f. issue
5208 * E26 of the XML erratas.
5211 if (uri->fragment != NULL) {
5213 * Okay this is foolish to block those but not
5216 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5218 if ((ctxt->sax != NULL) &&
5219 (!ctxt->disableSAX) &&
5220 (ctxt->sax->entityDecl != NULL))
5221 ctxt->sax->entityDecl(ctxt->userData, name,
5222 XML_EXTERNAL_PARAMETER_ENTITY,
5223 literal, URI, NULL);
5230 if ((RAW == '"') || (RAW == '\'')) {
5231 value = xmlParseEntityValue(ctxt, &orig);
5232 if ((ctxt->sax != NULL) &&
5233 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5234 ctxt->sax->entityDecl(ctxt->userData, name,
5235 XML_INTERNAL_GENERAL_ENTITY,
5238 * For expat compatibility in SAX mode.
5240 if ((ctxt->myDoc == NULL) ||
5241 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5242 if (ctxt->myDoc == NULL) {
5243 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5244 if (ctxt->myDoc == NULL) {
5245 xmlErrMemory(ctxt, "New Doc failed");
5248 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5250 if (ctxt->myDoc->intSubset == NULL)
5251 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5252 BAD_CAST "fake", NULL, NULL);
5254 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5258 URI = xmlParseExternalID(ctxt, &literal, 1);
5259 if ((URI == NULL) && (literal == NULL)) {
5260 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5265 uri = xmlParseURI((const char *)URI);
5267 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5268 "Invalid URI: %s\n", URI);
5270 * This really ought to be a well formedness error
5271 * but the XML Core WG decided otherwise c.f. issue
5272 * E26 of the XML erratas.
5275 if (uri->fragment != NULL) {
5277 * Okay this is foolish to block those but not
5280 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5285 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5286 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5287 "Space required before 'NDATA'\n");
5290 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5292 if (!IS_BLANK_CH(CUR)) {
5293 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5294 "Space required after 'NDATA'\n");
5297 ndata = xmlParseName(ctxt);
5298 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5299 (ctxt->sax->unparsedEntityDecl != NULL))
5300 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5301 literal, URI, ndata);
5303 if ((ctxt->sax != NULL) &&
5304 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5305 ctxt->sax->entityDecl(ctxt->userData, name,
5306 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5307 literal, URI, NULL);
5309 * For expat compatibility in SAX mode.
5310 * assuming the entity repalcement was asked for
5312 if ((ctxt->replaceEntities != 0) &&
5313 ((ctxt->myDoc == NULL) ||
5314 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5315 if (ctxt->myDoc == NULL) {
5316 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5317 if (ctxt->myDoc == NULL) {
5318 xmlErrMemory(ctxt, "New Doc failed");
5321 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5324 if (ctxt->myDoc->intSubset == NULL)
5325 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5326 BAD_CAST "fake", NULL, NULL);
5327 xmlSAX2EntityDecl(ctxt, name,
5328 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5329 literal, URI, NULL);
5336 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5337 "xmlParseEntityDecl: entity %s not terminated\n", name);
5339 if (input != ctxt->input) {
5340 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5341 "Entity declaration doesn't start and stop in the same entity\n");
5347 * Ugly mechanism to save the raw entity value.
5349 xmlEntityPtr cur = NULL;
5352 if ((ctxt->sax != NULL) &&
5353 (ctxt->sax->getParameterEntity != NULL))
5354 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5356 if ((ctxt->sax != NULL) &&
5357 (ctxt->sax->getEntity != NULL))
5358 cur = ctxt->sax->getEntity(ctxt->userData, name);
5359 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5360 cur = xmlSAX2GetEntity(ctxt, name);
5364 if (cur->orig != NULL)
5371 if (value != NULL) xmlFree(value);
5372 if (URI != NULL) xmlFree(URI);
5373 if (literal != NULL) xmlFree(literal);
5378 * xmlParseDefaultDecl:
5379 * @ctxt: an XML parser context
5380 * @value: Receive a possible fixed default value for the attribute
5382 * Parse an attribute default declaration
5384 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5386 * [ VC: Required Attribute ]
5387 * if the default declaration is the keyword #REQUIRED, then the
5388 * attribute must be specified for all elements of the type in the
5389 * attribute-list declaration.
5391 * [ VC: Attribute Default Legal ]
5392 * The declared default value must meet the lexical constraints of
5393 * the declared attribute type c.f. xmlValidateAttributeDecl()
5395 * [ VC: Fixed Attribute Default ]
5396 * if an attribute has a default value declared with the #FIXED
5397 * keyword, instances of that attribute must match the default value.
5399 * [ WFC: No < in Attribute Values ]
5400 * handled in xmlParseAttValue()
5402 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5403 * or XML_ATTRIBUTE_FIXED.
5407 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5412 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5414 return(XML_ATTRIBUTE_REQUIRED);
5416 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5418 return(XML_ATTRIBUTE_IMPLIED);
5420 val = XML_ATTRIBUTE_NONE;
5421 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5423 val = XML_ATTRIBUTE_FIXED;
5424 if (!IS_BLANK_CH(CUR)) {
5425 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5426 "Space required after '#FIXED'\n");
5430 ret = xmlParseAttValue(ctxt);
5431 ctxt->instate = XML_PARSER_DTD;
5433 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5434 "Attribute default value declaration error\n");
5441 * xmlParseNotationType:
5442 * @ctxt: an XML parser context
5444 * parse an Notation attribute type.
5446 * Note: the leading 'NOTATION' S part has already being parsed...
5448 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5450 * [ VC: Notation Attributes ]
5451 * Values of this type must match one of the notation names included
5452 * in the declaration; all notation names in the declaration must be declared.
5454 * Returns: the notation attribute tree built while parsing
5458 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5459 const xmlChar *name;
5460 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5463 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5470 name = xmlParseName(ctxt);
5472 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5473 "Name expected in NOTATION declaration\n");
5474 xmlFreeEnumeration(ret);
5478 while (tmp != NULL) {
5479 if (xmlStrEqual(name, tmp->name)) {
5480 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5481 "standalone: attribute notation value token %s duplicated\n",
5483 if (!xmlDictOwns(ctxt->dict, name))
5484 xmlFree((xmlChar *) name);
5490 cur = xmlCreateEnumeration(name);
5492 xmlFreeEnumeration(ret);
5495 if (last == NULL) ret = last = cur;
5502 } while (RAW == '|');
5504 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5505 xmlFreeEnumeration(ret);
5513 * xmlParseEnumerationType:
5514 * @ctxt: an XML parser context
5516 * parse an Enumeration attribute type.
5518 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5520 * [ VC: Enumeration ]
5521 * Values of this type must match one of the Nmtoken tokens in
5524 * Returns: the enumeration attribute tree built while parsing
5528 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5530 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5533 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5540 name = xmlParseNmtoken(ctxt);
5542 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5546 while (tmp != NULL) {
5547 if (xmlStrEqual(name, tmp->name)) {
5548 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5549 "standalone: attribute enumeration value token %s duplicated\n",
5551 if (!xmlDictOwns(ctxt->dict, name))
5558 cur = xmlCreateEnumeration(name);
5559 if (!xmlDictOwns(ctxt->dict, name))
5562 xmlFreeEnumeration(ret);
5565 if (last == NULL) ret = last = cur;
5572 } while (RAW == '|');
5574 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5582 * xmlParseEnumeratedType:
5583 * @ctxt: an XML parser context
5584 * @tree: the enumeration tree built while parsing
5586 * parse an Enumerated attribute type.
5588 * [57] EnumeratedType ::= NotationType | Enumeration
5590 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5593 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5597 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5598 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5600 if (!IS_BLANK_CH(CUR)) {
5601 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5602 "Space required after 'NOTATION'\n");
5606 *tree = xmlParseNotationType(ctxt);
5607 if (*tree == NULL) return(0);
5608 return(XML_ATTRIBUTE_NOTATION);
5610 *tree = xmlParseEnumerationType(ctxt);
5611 if (*tree == NULL) return(0);
5612 return(XML_ATTRIBUTE_ENUMERATION);
5616 * xmlParseAttributeType:
5617 * @ctxt: an XML parser context
5618 * @tree: the enumeration tree built while parsing
5620 * parse the Attribute list def for an element
5622 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5624 * [55] StringType ::= 'CDATA'
5626 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5627 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5629 * Validity constraints for attribute values syntax are checked in
5630 * xmlValidateAttributeValue()
5633 * Values of type ID must match the Name production. A name must not
5634 * appear more than once in an XML document as a value of this type;
5635 * i.e., ID values must uniquely identify the elements which bear them.
5637 * [ VC: One ID per Element Type ]
5638 * No element type may have more than one ID attribute specified.
5640 * [ VC: ID Attribute Default ]
5641 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5644 * Values of type IDREF must match the Name production, and values
5645 * of type IDREFS must match Names; each IDREF Name must match the value
5646 * of an ID attribute on some element in the XML document; i.e. IDREF
5647 * values must match the value of some ID attribute.
5649 * [ VC: Entity Name ]
5650 * Values of type ENTITY must match the Name production, values
5651 * of type ENTITIES must match Names; each Entity Name must match the
5652 * name of an unparsed entity declared in the DTD.
5654 * [ VC: Name Token ]
5655 * Values of type NMTOKEN must match the Nmtoken production; values
5656 * of type NMTOKENS must match Nmtokens.
5658 * Returns the attribute type
5661 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5663 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5665 return(XML_ATTRIBUTE_CDATA);
5666 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5668 return(XML_ATTRIBUTE_IDREFS);
5669 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5671 return(XML_ATTRIBUTE_IDREF);
5672 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5674 return(XML_ATTRIBUTE_ID);
5675 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5677 return(XML_ATTRIBUTE_ENTITY);
5678 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5680 return(XML_ATTRIBUTE_ENTITIES);
5681 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5683 return(XML_ATTRIBUTE_NMTOKENS);
5684 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5686 return(XML_ATTRIBUTE_NMTOKEN);
5688 return(xmlParseEnumeratedType(ctxt, tree));
5692 * xmlParseAttributeListDecl:
5693 * @ctxt: an XML parser context
5695 * : parse the Attribute list def for an element
5697 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5699 * [53] AttDef ::= S Name S AttType S DefaultDecl
5703 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5704 const xmlChar *elemName;
5705 const xmlChar *attrName;
5706 xmlEnumerationPtr tree;
5708 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5709 xmlParserInputPtr input = ctxt->input;
5712 if (!IS_BLANK_CH(CUR)) {
5713 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5714 "Space required after '<!ATTLIST'\n");
5717 elemName = xmlParseName(ctxt);
5718 if (elemName == NULL) {
5719 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5720 "ATTLIST: no name for Element\n");
5725 while (RAW != '>') {
5726 const xmlChar *check = CUR_PTR;
5729 xmlChar *defaultValue = NULL;
5733 attrName = xmlParseName(ctxt);
5734 if (attrName == NULL) {
5735 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5736 "ATTLIST: no name for Attribute\n");
5740 if (!IS_BLANK_CH(CUR)) {
5741 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5742 "Space required after the attribute name\n");
5747 type = xmlParseAttributeType(ctxt, &tree);
5753 if (!IS_BLANK_CH(CUR)) {
5754 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5755 "Space required after the attribute type\n");
5757 xmlFreeEnumeration(tree);
5762 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5764 if (defaultValue != NULL)
5765 xmlFree(defaultValue);
5767 xmlFreeEnumeration(tree);
5770 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5771 xmlAttrNormalizeSpace(defaultValue, defaultValue);
5775 if (!IS_BLANK_CH(CUR)) {
5776 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5777 "Space required after the attribute default value\n");
5778 if (defaultValue != NULL)
5779 xmlFree(defaultValue);
5781 xmlFreeEnumeration(tree);
5786 if (check == CUR_PTR) {
5787 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5788 "in xmlParseAttributeListDecl\n");
5789 if (defaultValue != NULL)
5790 xmlFree(defaultValue);
5792 xmlFreeEnumeration(tree);
5795 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5796 (ctxt->sax->attributeDecl != NULL))
5797 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5798 type, def, defaultValue, tree);
5799 else if (tree != NULL)
5800 xmlFreeEnumeration(tree);
5802 if ((ctxt->sax2) && (defaultValue != NULL) &&
5803 (def != XML_ATTRIBUTE_IMPLIED) &&
5804 (def != XML_ATTRIBUTE_REQUIRED)) {
5805 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5808 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5810 if (defaultValue != NULL)
5811 xmlFree(defaultValue);
5815 if (input != ctxt->input) {
5816 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5817 "Attribute list declaration doesn't start and stop in the same entity\n",
5826 * xmlParseElementMixedContentDecl:
5827 * @ctxt: an XML parser context
5828 * @inputchk: the input used for the current entity, needed for boundary checks
5830 * parse the declaration for a Mixed Element content
5831 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5833 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5834 * '(' S? '#PCDATA' S? ')'
5836 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5838 * [ VC: No Duplicate Types ]
5839 * The same name must not appear more than once in a single
5840 * mixed-content declaration.
5842 * returns: the list of the xmlElementContentPtr describing the element choices
5844 xmlElementContentPtr
5845 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5846 xmlElementContentPtr ret = NULL, cur = NULL, n;
5847 const xmlChar *elem = NULL;
5850 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5855 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5856 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5857 "Element content declaration doesn't start and stop in the same entity\n",
5861 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5865 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5870 if ((RAW == '(') || (RAW == '|')) {
5871 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5872 if (ret == NULL) return(NULL);
5874 while (RAW == '|') {
5877 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5878 if (ret == NULL) return(NULL);
5884 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5885 if (n == NULL) return(NULL);
5886 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5895 elem = xmlParseName(ctxt);
5897 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5898 "xmlParseElementMixedContentDecl : Name expected\n");
5899 xmlFreeDocElementContent(ctxt->myDoc, cur);
5905 if ((RAW == ')') && (NXT(1) == '*')) {
5907 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5908 XML_ELEMENT_CONTENT_ELEMENT);
5909 if (cur->c2 != NULL)
5910 cur->c2->parent = cur;
5913 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5914 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5915 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5916 "Element content declaration doesn't start and stop in the same entity\n",
5921 xmlFreeDocElementContent(ctxt->myDoc, ret);
5922 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5927 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5933 * xmlParseElementChildrenContentDeclPriv:
5934 * @ctxt: an XML parser context
5935 * @inputchk: the input used for the current entity, needed for boundary checks
5936 * @depth: the level of recursion
5938 * parse the declaration for a Mixed Element content
5939 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5942 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5944 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5946 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5948 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5950 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5951 * TODO Parameter-entity replacement text must be properly nested
5952 * with parenthesized groups. That is to say, if either of the
5953 * opening or closing parentheses in a choice, seq, or Mixed
5954 * construct is contained in the replacement text for a parameter
5955 * entity, both must be contained in the same replacement text. For
5956 * interoperability, if a parameter-entity reference appears in a
5957 * choice, seq, or Mixed construct, its replacement text should not
5958 * be empty, and neither the first nor last non-blank character of
5959 * the replacement text should be a connector (| or ,).
5961 * Returns the tree of xmlElementContentPtr describing the element
5964 static xmlElementContentPtr
5965 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5967 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5968 const xmlChar *elem;
5971 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5973 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5974 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5981 int inputid = ctxt->input->id;
5983 /* Recurse on first child */
5986 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5991 elem = xmlParseName(ctxt);
5993 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5996 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5998 xmlErrMemory(ctxt, NULL);
6003 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6005 } else if (RAW == '*') {
6006 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6008 } else if (RAW == '+') {
6009 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6012 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6018 while (RAW != ')') {
6020 * Each loop we parse one separator and one element.
6023 if (type == 0) type = CUR;
6026 * Detect "Name | Name , Name" error
6028 else if (type != CUR) {
6029 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6030 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6032 if ((last != NULL) && (last != ret))
6033 xmlFreeDocElementContent(ctxt->myDoc, last);
6035 xmlFreeDocElementContent(ctxt->myDoc, ret);
6040 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6042 if ((last != NULL) && (last != ret))
6043 xmlFreeDocElementContent(ctxt->myDoc, last);
6044 xmlFreeDocElementContent(ctxt->myDoc, ret);
6062 } else if (RAW == '|') {
6063 if (type == 0) type = CUR;
6066 * Detect "Name , Name | Name" error
6068 else if (type != CUR) {
6069 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6070 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6072 if ((last != NULL) && (last != ret))
6073 xmlFreeDocElementContent(ctxt->myDoc, last);
6075 xmlFreeDocElementContent(ctxt->myDoc, ret);
6080 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6082 if ((last != NULL) && (last != ret))
6083 xmlFreeDocElementContent(ctxt->myDoc, last);
6085 xmlFreeDocElementContent(ctxt->myDoc, ret);
6104 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6105 if ((last != NULL) && (last != ret))
6106 xmlFreeDocElementContent(ctxt->myDoc, last);
6108 xmlFreeDocElementContent(ctxt->myDoc, ret);
6115 int inputid = ctxt->input->id;
6116 /* Recurse on second child */
6119 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6123 elem = xmlParseName(ctxt);
6125 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6127 xmlFreeDocElementContent(ctxt->myDoc, ret);
6130 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6133 xmlFreeDocElementContent(ctxt->myDoc, ret);
6137 last->ocur = XML_ELEMENT_CONTENT_OPT;
6139 } else if (RAW == '*') {
6140 last->ocur = XML_ELEMENT_CONTENT_MULT;
6142 } else if (RAW == '+') {
6143 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6146 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6152 if ((cur != NULL) && (last != NULL)) {
6157 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6158 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6159 "Element content declaration doesn't start and stop in the same entity\n",
6165 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6166 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6167 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6169 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6172 } else if (RAW == '*') {
6174 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6177 * Some normalization:
6178 * (a | b* | c?)* == (a | b | c)*
6180 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6181 if ((cur->c1 != NULL) &&
6182 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6183 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6184 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6185 if ((cur->c2 != NULL) &&
6186 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6187 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6188 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6193 } else if (RAW == '+') {
6197 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6198 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6199 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6201 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6203 * Some normalization:
6204 * (a | b*)+ == (a | b)*
6205 * (a | b?)+ == (a | b)*
6207 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6208 if ((cur->c1 != NULL) &&
6209 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6210 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6211 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6214 if ((cur->c2 != NULL) &&
6215 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6216 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6217 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6223 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6231 * xmlParseElementChildrenContentDecl:
6232 * @ctxt: an XML parser context
6233 * @inputchk: the input used for the current entity, needed for boundary checks
6235 * parse the declaration for a Mixed Element content
6236 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6238 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6240 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6242 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6244 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6246 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6247 * TODO Parameter-entity replacement text must be properly nested
6248 * with parenthesized groups. That is to say, if either of the
6249 * opening or closing parentheses in a choice, seq, or Mixed
6250 * construct is contained in the replacement text for a parameter
6251 * entity, both must be contained in the same replacement text. For
6252 * interoperability, if a parameter-entity reference appears in a
6253 * choice, seq, or Mixed construct, its replacement text should not
6254 * be empty, and neither the first nor last non-blank character of
6255 * the replacement text should be a connector (| or ,).
6257 * Returns the tree of xmlElementContentPtr describing the element
6260 xmlElementContentPtr
6261 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6262 /* stub left for API/ABI compat */
6263 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6267 * xmlParseElementContentDecl:
6268 * @ctxt: an XML parser context
6269 * @name: the name of the element being defined.
6270 * @result: the Element Content pointer will be stored here if any
6272 * parse the declaration for an Element content either Mixed or Children,
6273 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6275 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6277 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6281 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6282 xmlElementContentPtr *result) {
6284 xmlElementContentPtr tree = NULL;
6285 int inputid = ctxt->input->id;
6291 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6292 "xmlParseElementContentDecl : %s '(' expected\n", name);
6298 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6299 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6300 res = XML_ELEMENT_TYPE_MIXED;
6302 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6303 res = XML_ELEMENT_TYPE_ELEMENT;
6311 * xmlParseElementDecl:
6312 * @ctxt: an XML parser context
6314 * parse an Element declaration.
6316 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6318 * [ VC: Unique Element Type Declaration ]
6319 * No element type may be declared more than once
6321 * Returns the type of the element, or -1 in case of error
6324 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6325 const xmlChar *name;
6327 xmlElementContentPtr content = NULL;
6329 /* GROW; done in the caller */
6330 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6331 xmlParserInputPtr input = ctxt->input;
6334 if (!IS_BLANK_CH(CUR)) {
6335 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6336 "Space required after 'ELEMENT'\n");
6339 name = xmlParseName(ctxt);
6341 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6342 "xmlParseElementDecl: no name for Element\n");
6345 while ((RAW == 0) && (ctxt->inputNr > 1))
6347 if (!IS_BLANK_CH(CUR)) {
6348 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6349 "Space required after the element name\n");
6352 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6355 * Element must always be empty.
6357 ret = XML_ELEMENT_TYPE_EMPTY;
6358 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6362 * Element is a generic container.
6364 ret = XML_ELEMENT_TYPE_ANY;
6365 } else if (RAW == '(') {
6366 ret = xmlParseElementContentDecl(ctxt, name, &content);
6369 * [ WFC: PEs in Internal Subset ] error handling.
6371 if ((RAW == '%') && (ctxt->external == 0) &&
6372 (ctxt->inputNr == 1)) {
6373 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6374 "PEReference: forbidden within markup decl in internal subset\n");
6376 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6377 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6384 * Pop-up of finished entities.
6386 while ((RAW == 0) && (ctxt->inputNr > 1))
6391 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6392 if (content != NULL) {
6393 xmlFreeDocElementContent(ctxt->myDoc, content);
6396 if (input != ctxt->input) {
6397 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6398 "Element declaration doesn't start and stop in the same entity\n");
6402 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6403 (ctxt->sax->elementDecl != NULL)) {
6404 if (content != NULL)
6405 content->parent = NULL;
6406 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6408 if ((content != NULL) && (content->parent == NULL)) {
6410 * this is a trick: if xmlAddElementDecl is called,
6411 * instead of copying the full tree it is plugged directly
6412 * if called from the parser. Avoid duplicating the
6413 * interfaces or change the API/ABI
6415 xmlFreeDocElementContent(ctxt->myDoc, content);
6417 } else if (content != NULL) {
6418 xmlFreeDocElementContent(ctxt->myDoc, content);
6426 * xmlParseConditionalSections
6427 * @ctxt: an XML parser context
6429 * [61] conditionalSect ::= includeSect | ignoreSect
6430 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6431 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6432 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6433 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6437 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6438 int id = ctxt->input->id;
6442 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6446 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6448 if (ctxt->input->id != id) {
6449 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6450 "All markup of the conditional section is not in the same entity\n",
6455 if (xmlParserDebugEntities) {
6456 if ((ctxt->input != NULL) && (ctxt->input->filename))
6457 xmlGenericError(xmlGenericErrorContext,
6458 "%s(%d): ", ctxt->input->filename,
6460 xmlGenericError(xmlGenericErrorContext,
6461 "Entering INCLUDE Conditional Section\n");
6464 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6466 const xmlChar *check = CUR_PTR;
6467 unsigned int cons = ctxt->input->consumed;
6469 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6470 xmlParseConditionalSections(ctxt);
6471 } else if (IS_BLANK_CH(CUR)) {
6473 } else if (RAW == '%') {
6474 xmlParsePEReference(ctxt);
6476 xmlParseMarkupDecl(ctxt);
6479 * Pop-up of finished entities.
6481 while ((RAW == 0) && (ctxt->inputNr > 1))
6484 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6485 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6489 if (xmlParserDebugEntities) {
6490 if ((ctxt->input != NULL) && (ctxt->input->filename))
6491 xmlGenericError(xmlGenericErrorContext,
6492 "%s(%d): ", ctxt->input->filename,
6494 xmlGenericError(xmlGenericErrorContext,
6495 "Leaving INCLUDE Conditional Section\n");
6498 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6500 xmlParserInputState instate;
6506 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6508 if (ctxt->input->id != id) {
6509 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6510 "All markup of the conditional section is not in the same entity\n",
6515 if (xmlParserDebugEntities) {
6516 if ((ctxt->input != NULL) && (ctxt->input->filename))
6517 xmlGenericError(xmlGenericErrorContext,
6518 "%s(%d): ", ctxt->input->filename,
6520 xmlGenericError(xmlGenericErrorContext,
6521 "Entering IGNORE Conditional Section\n");
6525 * Parse up to the end of the conditional section
6526 * But disable SAX event generating DTD building in the meantime
6528 state = ctxt->disableSAX;
6529 instate = ctxt->instate;
6530 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6531 ctxt->instate = XML_PARSER_IGNORE;
6533 while ((depth >= 0) && (RAW != 0)) {
6534 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6539 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6540 if (--depth >= 0) SKIP(3);
6547 ctxt->disableSAX = state;
6548 ctxt->instate = instate;
6550 if (xmlParserDebugEntities) {
6551 if ((ctxt->input != NULL) && (ctxt->input->filename))
6552 xmlGenericError(xmlGenericErrorContext,
6553 "%s(%d): ", ctxt->input->filename,
6555 xmlGenericError(xmlGenericErrorContext,
6556 "Leaving IGNORE Conditional Section\n");
6560 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6567 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6569 if (ctxt->input->id != id) {
6570 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6571 "All markup of the conditional section is not in the same entity\n",
6579 * xmlParseMarkupDecl:
6580 * @ctxt: an XML parser context
6582 * parse Markup declarations
6584 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6585 * NotationDecl | PI | Comment
6587 * [ VC: Proper Declaration/PE Nesting ]
6588 * Parameter-entity replacement text must be properly nested with
6589 * markup declarations. That is to say, if either the first character
6590 * or the last character of a markup declaration (markupdecl above) is
6591 * contained in the replacement text for a parameter-entity reference,
6592 * both must be contained in the same replacement text.
6594 * [ WFC: PEs in Internal Subset ]
6595 * In the internal DTD subset, parameter-entity references can occur
6596 * only where markup declarations can occur, not within markup declarations.
6597 * (This does not apply to references that occur in external parameter
6598 * entities or to the external subset.)
6601 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6604 if (NXT(1) == '!') {
6608 xmlParseElementDecl(ctxt);
6609 else if (NXT(3) == 'N')
6610 xmlParseEntityDecl(ctxt);
6613 xmlParseAttributeListDecl(ctxt);
6616 xmlParseNotationDecl(ctxt);
6619 xmlParseComment(ctxt);
6622 /* there is an error but it will be detected later */
6625 } else if (NXT(1) == '?') {
6630 * This is only for internal subset. On external entities,
6631 * the replacement is done before parsing stage
6633 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6634 xmlParsePEReference(ctxt);
6637 * Conditional sections are allowed from entities included
6638 * by PE References in the internal subset.
6640 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6641 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6642 xmlParseConditionalSections(ctxt);
6646 ctxt->instate = XML_PARSER_DTD;
6651 * @ctxt: an XML parser context
6653 * parse an XML declaration header for external entities
6655 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6659 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6661 const xmlChar *encoding;
6664 * We know that '<?xml' is here.
6666 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6669 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6673 if (!IS_BLANK_CH(CUR)) {
6674 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6675 "Space needed after '<?xml'\n");
6680 * We may have the VersionInfo here.
6682 version = xmlParseVersionInfo(ctxt);
6683 if (version == NULL)
6684 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6686 if (!IS_BLANK_CH(CUR)) {
6687 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6688 "Space needed here\n");
6691 ctxt->input->version = version;
6694 * We must have the encoding declaration
6696 encoding = xmlParseEncodingDecl(ctxt);
6697 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6699 * The XML REC instructs us to stop parsing right here
6703 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6704 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6705 "Missing encoding in text declaration\n");
6709 if ((RAW == '?') && (NXT(1) == '>')) {
6711 } else if (RAW == '>') {
6712 /* Deprecated old WD ... */
6713 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6716 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6717 MOVETO_ENDTAG(CUR_PTR);
6723 * xmlParseExternalSubset:
6724 * @ctxt: an XML parser context
6725 * @ExternalID: the external identifier
6726 * @SystemID: the system identifier (or URL)
6728 * parse Markup declarations from an external subset
6730 * [30] extSubset ::= textDecl? extSubsetDecl
6732 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6735 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6736 const xmlChar *SystemID) {
6737 xmlDetectSAX2(ctxt);
6740 if ((ctxt->encoding == NULL) &&
6741 (ctxt->input->end - ctxt->input->cur >= 4)) {
6743 xmlCharEncoding enc;
6749 enc = xmlDetectCharEncoding(start, 4);
6750 if (enc != XML_CHAR_ENCODING_NONE)
6751 xmlSwitchEncoding(ctxt, enc);
6754 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6755 xmlParseTextDecl(ctxt);
6756 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6758 * The XML REC instructs us to stop parsing right here
6760 ctxt->instate = XML_PARSER_EOF;
6764 if (ctxt->myDoc == NULL) {
6765 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6766 if (ctxt->myDoc == NULL) {
6767 xmlErrMemory(ctxt, "New Doc failed");
6770 ctxt->myDoc->properties = XML_DOC_INTERNAL;
6772 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6773 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6775 ctxt->instate = XML_PARSER_DTD;
6777 while (((RAW == '<') && (NXT(1) == '?')) ||
6778 ((RAW == '<') && (NXT(1) == '!')) ||
6779 (RAW == '%') || IS_BLANK_CH(CUR)) {
6780 const xmlChar *check = CUR_PTR;
6781 unsigned int cons = ctxt->input->consumed;
6784 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6785 xmlParseConditionalSections(ctxt);
6786 } else if (IS_BLANK_CH(CUR)) {
6788 } else if (RAW == '%') {
6789 xmlParsePEReference(ctxt);
6791 xmlParseMarkupDecl(ctxt);
6794 * Pop-up of finished entities.
6796 while ((RAW == 0) && (ctxt->inputNr > 1))
6799 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6800 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6806 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6812 * xmlParseReference:
6813 * @ctxt: an XML parser context
6815 * parse and handle entity references in content, depending on the SAX
6816 * interface, this may end-up in a call to character() if this is a
6817 * CharRef, a predefined entity, if there is no reference() callback.
6818 * or if the parser was asked to switch to that mode.
6820 * [67] Reference ::= EntityRef | CharRef
6823 xmlParseReference(xmlParserCtxtPtr ctxt) {
6827 xmlNodePtr list = NULL;
6828 xmlParserErrors ret = XML_ERR_OK;
6835 * Simple case of a CharRef
6837 if (NXT(1) == '#') {
6841 int value = xmlParseCharRef(ctxt);
6845 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6847 * So we are using non-UTF-8 buffers
6848 * Check that the char fit on 8bits, if not
6849 * generate a CharRef.
6851 if (value <= 0xFF) {
6854 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6855 (!ctxt->disableSAX))
6856 ctxt->sax->characters(ctxt->userData, out, 1);
6858 if ((hex == 'x') || (hex == 'X'))
6859 snprintf((char *)out, sizeof(out), "#x%X", value);
6861 snprintf((char *)out, sizeof(out), "#%d", value);
6862 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6863 (!ctxt->disableSAX))
6864 ctxt->sax->reference(ctxt->userData, out);
6868 * Just encode the value in UTF-8
6870 COPY_BUF(0 ,out, i, value);
6872 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6873 (!ctxt->disableSAX))
6874 ctxt->sax->characters(ctxt->userData, out, i);
6880 * We are seeing an entity reference
6882 ent = xmlParseEntityRef(ctxt);
6883 if (ent == NULL) return;
6884 if (!ctxt->wellFormed)
6886 was_checked = ent->checked;
6888 /* special case of predefined entities */
6889 if ((ent->name == NULL) ||
6890 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6892 if (val == NULL) return;
6894 * inline the entity.
6896 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6897 (!ctxt->disableSAX))
6898 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6903 * The first reference to the entity trigger a parsing phase
6904 * where the ent->children is filled with the result from
6907 if (ent->checked == 0) {
6908 unsigned long oldnbent = ctxt->nbentities;
6911 * This is a bit hackish but this seems the best
6912 * way to make sure both SAX and DOM entity support
6916 if (ctxt->userData == ctxt)
6919 user_data = ctxt->userData;
6922 * Check that this entity is well formed
6923 * 4.3.2: An internal general parsed entity is well-formed
6924 * if its replacement text matches the production labeled
6927 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6929 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6933 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6935 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6936 user_data, ctxt->depth, ent->URI,
6937 ent->ExternalID, &list);
6940 ret = XML_ERR_ENTITY_PE_INTERNAL;
6941 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6942 "invalid entity type found\n", NULL);
6946 * Store the number of entities needing parsing for this entity
6947 * content and do checkings
6949 ent->checked = ctxt->nbentities - oldnbent;
6950 if (ret == XML_ERR_ENTITY_LOOP) {
6951 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6952 xmlFreeNodeList(list);
6955 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6956 xmlFreeNodeList(list);
6960 if ((ret == XML_ERR_OK) && (list != NULL)) {
6961 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6962 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6963 (ent->children == NULL)) {
6964 ent->children = list;
6965 if (ctxt->replaceEntities) {
6967 * Prune it directly in the generated document
6968 * except for single text nodes.
6970 if (((list->type == XML_TEXT_NODE) &&
6971 (list->next == NULL)) ||
6972 (ctxt->parseMode == XML_PARSE_READER)) {
6973 list->parent = (xmlNodePtr) ent;
6978 while (list != NULL) {
6979 list->parent = (xmlNodePtr) ctxt->node;
6980 list->doc = ctxt->myDoc;
6981 if (list->next == NULL)
6985 list = ent->children;
6986 #ifdef LIBXML_LEGACY_ENABLED
6987 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6988 xmlAddEntityReference(ent, list, NULL);
6989 #endif /* LIBXML_LEGACY_ENABLED */
6993 while (list != NULL) {
6994 list->parent = (xmlNodePtr) ent;
6995 if (list->next == NULL)
7001 xmlFreeNodeList(list);
7004 } else if ((ret != XML_ERR_OK) &&
7005 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7006 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7007 "Entity '%s' failed to parse\n", ent->name);
7008 } else if (list != NULL) {
7009 xmlFreeNodeList(list);
7012 if (ent->checked == 0)
7014 } else if (ent->checked != 1) {
7015 ctxt->nbentities += ent->checked;
7019 * Now that the entity content has been gathered
7020 * provide it to the application, this can take different forms based
7021 * on the parsing modes.
7023 if (ent->children == NULL) {
7025 * Probably running in SAX mode and the callbacks don't
7026 * build the entity content. So unless we already went
7027 * though parsing for first checking go though the entity
7028 * content to generate callbacks associated to the entity
7030 if (was_checked != 0) {
7033 * This is a bit hackish but this seems the best
7034 * way to make sure both SAX and DOM entity support
7037 if (ctxt->userData == ctxt)
7040 user_data = ctxt->userData;
7042 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7044 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7045 ent->content, user_data, NULL);
7047 } else if (ent->etype ==
7048 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7050 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7051 ctxt->sax, user_data, ctxt->depth,
7052 ent->URI, ent->ExternalID, NULL);
7055 ret = XML_ERR_ENTITY_PE_INTERNAL;
7056 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7057 "invalid entity type found\n", NULL);
7059 if (ret == XML_ERR_ENTITY_LOOP) {
7060 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7064 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7065 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7067 * Entity reference callback comes second, it's somewhat
7068 * superfluous but a compatibility to historical behaviour
7070 ctxt->sax->reference(ctxt->userData, ent->name);
7076 * If we didn't get any children for the entity being built
7078 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7079 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7083 ctxt->sax->reference(ctxt->userData, ent->name);
7087 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7089 * There is a problem on the handling of _private for entities
7090 * (bug 155816): Should we copy the content of the field from
7091 * the entity (possibly overwriting some value set by the user
7092 * when a copy is created), should we leave it alone, or should
7093 * we try to take care of different situations? The problem
7094 * is exacerbated by the usage of this field by the xmlReader.
7095 * To fix this bug, we look at _private on the created node
7096 * and, if it's NULL, we copy in whatever was in the entity.
7097 * If it's not NULL we leave it alone. This is somewhat of a
7098 * hack - maybe we should have further tests to determine
7101 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7103 * Seems we are generating the DOM content, do
7104 * a simple tree copy for all references except the first
7105 * In the first occurrence list contains the replacement.
7106 * progressive == 2 means we are operating on the Reader
7107 * and since nodes are discarded we must copy all the time.
7109 if (((list == NULL) && (ent->owner == 0)) ||
7110 (ctxt->parseMode == XML_PARSE_READER)) {
7111 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7114 * when operating on a reader, the entities definitions
7115 * are always owning the entities subtree.
7116 if (ctxt->parseMode == XML_PARSE_READER)
7120 cur = ent->children;
7121 while (cur != NULL) {
7122 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7124 if (nw->_private == NULL)
7125 nw->_private = cur->_private;
7126 if (firstChild == NULL){
7129 nw = xmlAddChild(ctxt->node, nw);
7131 if (cur == ent->last) {
7133 * needed to detect some strange empty
7134 * node cases in the reader tests
7136 if ((ctxt->parseMode == XML_PARSE_READER) &&
7138 (nw->type == XML_ELEMENT_NODE) &&
7139 (nw->children == NULL))
7146 #ifdef LIBXML_LEGACY_ENABLED
7147 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7148 xmlAddEntityReference(ent, firstChild, nw);
7149 #endif /* LIBXML_LEGACY_ENABLED */
7150 } else if (list == NULL) {
7151 xmlNodePtr nw = NULL, cur, next, last,
7154 * Copy the entity child list and make it the new
7155 * entity child list. The goal is to make sure any
7156 * ID or REF referenced will be the one from the
7157 * document content and not the entity copy.
7159 cur = ent->children;
7160 ent->children = NULL;
7163 while (cur != NULL) {
7167 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7169 if (nw->_private == NULL)
7170 nw->_private = cur->_private;
7171 if (firstChild == NULL){
7174 xmlAddChild((xmlNodePtr) ent, nw);
7175 xmlAddChild(ctxt->node, cur);
7181 if (ent->owner == 0)
7183 #ifdef LIBXML_LEGACY_ENABLED
7184 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7185 xmlAddEntityReference(ent, firstChild, nw);
7186 #endif /* LIBXML_LEGACY_ENABLED */
7188 const xmlChar *nbktext;
7191 * the name change is to avoid coalescing of the
7192 * node with a possible previous text one which
7193 * would make ent->children a dangling pointer
7195 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7197 if (ent->children->type == XML_TEXT_NODE)
7198 ent->children->name = nbktext;
7199 if ((ent->last != ent->children) &&
7200 (ent->last->type == XML_TEXT_NODE))
7201 ent->last->name = nbktext;
7202 xmlAddChildList(ctxt->node, ent->children);
7206 * This is to avoid a nasty side effect, see
7207 * characters() in SAX.c
7217 * xmlParseEntityRef:
7218 * @ctxt: an XML parser context
7220 * parse ENTITY references declarations
7222 * [68] EntityRef ::= '&' Name ';'
7224 * [ WFC: Entity Declared ]
7225 * In a document without any DTD, a document with only an internal DTD
7226 * subset which contains no parameter entity references, or a document
7227 * with "standalone='yes'", the Name given in the entity reference
7228 * must match that in an entity declaration, except that well-formed
7229 * documents need not declare any of the following entities: amp, lt,
7230 * gt, apos, quot. The declaration of a parameter entity must precede
7231 * any reference to it. Similarly, the declaration of a general entity
7232 * must precede any reference to it which appears in a default value in an
7233 * attribute-list declaration. Note that if entities are declared in the
7234 * external subset or in external parameter entities, a non-validating
7235 * processor is not obligated to read and process their declarations;
7236 * for such documents, the rule that an entity must be declared is a
7237 * well-formedness constraint only if standalone='yes'.
7239 * [ WFC: Parsed Entity ]
7240 * An entity reference must not contain the name of an unparsed entity
7242 * Returns the xmlEntityPtr if found, or NULL otherwise.
7245 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7246 const xmlChar *name;
7247 xmlEntityPtr ent = NULL;
7254 name = xmlParseName(ctxt);
7256 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7257 "xmlParseEntityRef: no name\n");
7261 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7267 * Predefined entites override any extra definition
7269 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7270 ent = xmlGetPredefinedEntity(name);
7276 * Increate the number of entity references parsed
7281 * Ask first SAX for entity resolution, otherwise try the
7282 * entities which may have stored in the parser context.
7284 if (ctxt->sax != NULL) {
7285 if (ctxt->sax->getEntity != NULL)
7286 ent = ctxt->sax->getEntity(ctxt->userData, name);
7287 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7288 (ctxt->options & XML_PARSE_OLDSAX))
7289 ent = xmlGetPredefinedEntity(name);
7290 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7291 (ctxt->userData==ctxt)) {
7292 ent = xmlSAX2GetEntity(ctxt, name);
7296 * [ WFC: Entity Declared ]
7297 * In a document without any DTD, a document with only an
7298 * internal DTD subset which contains no parameter entity
7299 * references, or a document with "standalone='yes'", the
7300 * Name given in the entity reference must match that in an
7301 * entity declaration, except that well-formed documents
7302 * need not declare any of the following entities: amp, lt,
7304 * The declaration of a parameter entity must precede any
7306 * Similarly, the declaration of a general entity must
7307 * precede any reference to it which appears in a default
7308 * value in an attribute-list declaration. Note that if
7309 * entities are declared in the external subset or in
7310 * external parameter entities, a non-validating processor
7311 * is not obligated to read and process their declarations;
7312 * for such documents, the rule that an entity must be
7313 * declared is a well-formedness constraint only if
7317 if ((ctxt->standalone == 1) ||
7318 ((ctxt->hasExternalSubset == 0) &&
7319 (ctxt->hasPErefs == 0))) {
7320 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7321 "Entity '%s' not defined\n", name);
7323 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7324 "Entity '%s' not defined\n", name);
7325 if ((ctxt->inSubset == 0) &&
7326 (ctxt->sax != NULL) &&
7327 (ctxt->sax->reference != NULL)) {
7328 ctxt->sax->reference(ctxt->userData, name);
7335 * [ WFC: Parsed Entity ]
7336 * An entity reference must not contain the name of an
7339 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7340 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7341 "Entity reference to unparsed entity %s\n", name);
7345 * [ WFC: No External Entity References ]
7346 * Attribute values cannot contain direct or indirect
7347 * entity references to external entities.
7349 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7350 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7351 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7352 "Attribute references external entity '%s'\n", name);
7355 * [ WFC: No < in Attribute Values ]
7356 * The replacement text of any entity referred to directly or
7357 * indirectly in an attribute value (other than "<") must
7360 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7361 (ent != NULL) && (ent->content != NULL) &&
7362 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7363 (xmlStrchr(ent->content, '<'))) {
7364 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7365 "'<' in entity '%s' is not allowed in attributes values\n", name);
7369 * Internal check, no parameter entities here ...
7372 switch (ent->etype) {
7373 case XML_INTERNAL_PARAMETER_ENTITY:
7374 case XML_EXTERNAL_PARAMETER_ENTITY:
7375 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7376 "Attempt to reference the parameter entity '%s'\n",
7385 * [ WFC: No Recursion ]
7386 * A parsed entity must not contain a recursive reference
7387 * to itself, either directly or indirectly.
7388 * Done somewhere else
7394 * xmlParseStringEntityRef:
7395 * @ctxt: an XML parser context
7396 * @str: a pointer to an index in the string
7398 * parse ENTITY references declarations, but this version parses it from
7401 * [68] EntityRef ::= '&' Name ';'
7403 * [ WFC: Entity Declared ]
7404 * In a document without any DTD, a document with only an internal DTD
7405 * subset which contains no parameter entity references, or a document
7406 * with "standalone='yes'", the Name given in the entity reference
7407 * must match that in an entity declaration, except that well-formed
7408 * documents need not declare any of the following entities: amp, lt,
7409 * gt, apos, quot. The declaration of a parameter entity must precede
7410 * any reference to it. Similarly, the declaration of a general entity
7411 * must precede any reference to it which appears in a default value in an
7412 * attribute-list declaration. Note that if entities are declared in the
7413 * external subset or in external parameter entities, a non-validating
7414 * processor is not obligated to read and process their declarations;
7415 * for such documents, the rule that an entity must be declared is a
7416 * well-formedness constraint only if standalone='yes'.
7418 * [ WFC: Parsed Entity ]
7419 * An entity reference must not contain the name of an unparsed entity
7421 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7422 * is updated to the current location in the string.
7425 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7429 xmlEntityPtr ent = NULL;
7431 if ((str == NULL) || (*str == NULL))
7439 name = xmlParseStringName(ctxt, &ptr);
7441 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7442 "xmlParseStringEntityRef: no name\n");
7447 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7456 * Predefined entites override any extra definition
7458 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7459 ent = xmlGetPredefinedEntity(name);
7468 * Increate the number of entity references parsed
7473 * Ask first SAX for entity resolution, otherwise try the
7474 * entities which may have stored in the parser context.
7476 if (ctxt->sax != NULL) {
7477 if (ctxt->sax->getEntity != NULL)
7478 ent = ctxt->sax->getEntity(ctxt->userData, name);
7479 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7480 ent = xmlGetPredefinedEntity(name);
7481 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7482 ent = xmlSAX2GetEntity(ctxt, name);
7487 * [ WFC: Entity Declared ]
7488 * In a document without any DTD, a document with only an
7489 * internal DTD subset which contains no parameter entity
7490 * references, or a document with "standalone='yes'", the
7491 * Name given in the entity reference must match that in an
7492 * entity declaration, except that well-formed documents
7493 * need not declare any of the following entities: amp, lt,
7495 * The declaration of a parameter entity must precede any
7497 * Similarly, the declaration of a general entity must
7498 * precede any reference to it which appears in a default
7499 * value in an attribute-list declaration. Note that if
7500 * entities are declared in the external subset or in
7501 * external parameter entities, a non-validating processor
7502 * is not obligated to read and process their declarations;
7503 * for such documents, the rule that an entity must be
7504 * declared is a well-formedness constraint only if
7508 if ((ctxt->standalone == 1) ||
7509 ((ctxt->hasExternalSubset == 0) &&
7510 (ctxt->hasPErefs == 0))) {
7511 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7512 "Entity '%s' not defined\n", name);
7514 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7515 "Entity '%s' not defined\n",
7518 /* TODO ? check regressions ctxt->valid = 0; */
7522 * [ WFC: Parsed Entity ]
7523 * An entity reference must not contain the name of an
7526 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7527 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7528 "Entity reference to unparsed entity %s\n", name);
7532 * [ WFC: No External Entity References ]
7533 * Attribute values cannot contain direct or indirect
7534 * entity references to external entities.
7536 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7537 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7538 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7539 "Attribute references external entity '%s'\n", name);
7542 * [ WFC: No < in Attribute Values ]
7543 * The replacement text of any entity referred to directly or
7544 * indirectly in an attribute value (other than "<") must
7547 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7548 (ent != NULL) && (ent->content != NULL) &&
7549 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7550 (xmlStrchr(ent->content, '<'))) {
7551 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7552 "'<' in entity '%s' is not allowed in attributes values\n",
7557 * Internal check, no parameter entities here ...
7560 switch (ent->etype) {
7561 case XML_INTERNAL_PARAMETER_ENTITY:
7562 case XML_EXTERNAL_PARAMETER_ENTITY:
7563 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7564 "Attempt to reference the parameter entity '%s'\n",
7573 * [ WFC: No Recursion ]
7574 * A parsed entity must not contain a recursive reference
7575 * to itself, either directly or indirectly.
7576 * Done somewhere else
7585 * xmlParsePEReference:
7586 * @ctxt: an XML parser context
7588 * parse PEReference declarations
7589 * The entity content is handled directly by pushing it's content as
7590 * a new input stream.
7592 * [69] PEReference ::= '%' Name ';'
7594 * [ WFC: No Recursion ]
7595 * A parsed entity must not contain a recursive
7596 * reference to itself, either directly or indirectly.
7598 * [ WFC: Entity Declared ]
7599 * In a document without any DTD, a document with only an internal DTD
7600 * subset which contains no parameter entity references, or a document
7601 * with "standalone='yes'", ... ... The declaration of a parameter
7602 * entity must precede any reference to it...
7604 * [ VC: Entity Declared ]
7605 * In a document with an external subset or external parameter entities
7606 * with "standalone='no'", ... ... The declaration of a parameter entity
7607 * must precede any reference to it...
7610 * Parameter-entity references may only appear in the DTD.
7611 * NOTE: misleading but this is handled.
7614 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7616 const xmlChar *name;
7617 xmlEntityPtr entity = NULL;
7618 xmlParserInputPtr input;
7623 name = xmlParseName(ctxt);
7625 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7626 "xmlParsePEReference: no name\n");
7630 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7637 * Increate the number of entity references parsed
7642 * Request the entity from SAX
7644 if ((ctxt->sax != NULL) &&
7645 (ctxt->sax->getParameterEntity != NULL))
7646 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7648 if (entity == NULL) {
7650 * [ WFC: Entity Declared ]
7651 * In a document without any DTD, a document with only an
7652 * internal DTD subset which contains no parameter entity
7653 * references, or a document with "standalone='yes'", ...
7654 * ... The declaration of a parameter entity must precede
7655 * any reference to it...
7657 if ((ctxt->standalone == 1) ||
7658 ((ctxt->hasExternalSubset == 0) &&
7659 (ctxt->hasPErefs == 0))) {
7660 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7661 "PEReference: %%%s; not found\n",
7665 * [ VC: Entity Declared ]
7666 * In a document with an external subset or external
7667 * parameter entities with "standalone='no'", ...
7668 * ... The declaration of a parameter entity must
7669 * precede any reference to it...
7671 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7672 "PEReference: %%%s; not found\n",
7678 * Internal checking in case the entity quest barfed
7680 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7681 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7682 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7683 "Internal: %%%s; is not a parameter entity\n",
7685 } else if (ctxt->input->free != deallocblankswrapper) {
7686 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7687 if (xmlPushInput(ctxt, input) < 0)
7692 * handle the extra spaces added before and after
7693 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7695 input = xmlNewEntityInputStream(ctxt, entity);
7696 if (xmlPushInput(ctxt, input) < 0)
7698 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7699 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7700 (IS_BLANK_CH(NXT(5)))) {
7701 xmlParseTextDecl(ctxt);
7703 XML_ERR_UNSUPPORTED_ENCODING) {
7705 * The XML REC instructs us to stop parsing
7708 ctxt->instate = XML_PARSER_EOF;
7714 ctxt->hasPErefs = 1;
7718 * xmlLoadEntityContent:
7719 * @ctxt: an XML parser context
7720 * @entity: an unloaded system entity
7722 * Load the original content of the given system entity from the
7723 * ExternalID/SystemID given. This is to be used for Included in Literal
7724 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7726 * Returns 0 in case of success and -1 in case of failure
7729 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7730 xmlParserInputPtr input;
7735 if ((ctxt == NULL) || (entity == NULL) ||
7736 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7737 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7738 (entity->content != NULL)) {
7739 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7740 "xmlLoadEntityContent parameter error");
7744 if (xmlParserDebugEntities)
7745 xmlGenericError(xmlGenericErrorContext,
7746 "Reading %s entity content input\n", entity->name);
7748 buf = xmlBufferCreate();
7750 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7751 "xmlLoadEntityContent parameter error");
7755 input = xmlNewEntityInputStream(ctxt, entity);
7756 if (input == NULL) {
7757 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7758 "xmlLoadEntityContent input error");
7764 * Push the entity as the current input, read char by char
7765 * saving to the buffer until the end of the entity or an error
7767 if (xmlPushInput(ctxt, input) < 0) {
7774 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7776 xmlBufferAdd(buf, ctxt->input->cur, l);
7777 if (count++ > 100) {
7785 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7787 } else if (!IS_CHAR(c)) {
7788 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7789 "xmlLoadEntityContent: invalid char value %d\n",
7794 entity->content = buf->content;
7795 buf->content = NULL;
7802 * xmlParseStringPEReference:
7803 * @ctxt: an XML parser context
7804 * @str: a pointer to an index in the string
7806 * parse PEReference declarations
7808 * [69] PEReference ::= '%' Name ';'
7810 * [ WFC: No Recursion ]
7811 * A parsed entity must not contain a recursive
7812 * reference to itself, either directly or indirectly.
7814 * [ WFC: Entity Declared ]
7815 * In a document without any DTD, a document with only an internal DTD
7816 * subset which contains no parameter entity references, or a document
7817 * with "standalone='yes'", ... ... The declaration of a parameter
7818 * entity must precede any reference to it...
7820 * [ VC: Entity Declared ]
7821 * In a document with an external subset or external parameter entities
7822 * with "standalone='no'", ... ... The declaration of a parameter entity
7823 * must precede any reference to it...
7826 * Parameter-entity references may only appear in the DTD.
7827 * NOTE: misleading but this is handled.
7829 * Returns the string of the entity content.
7830 * str is updated to the current value of the index
7833 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7837 xmlEntityPtr entity = NULL;
7839 if ((str == NULL) || (*str == NULL)) return(NULL);
7845 name = xmlParseStringName(ctxt, &ptr);
7847 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7848 "xmlParseStringPEReference: no name\n");
7854 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7862 * Increate the number of entity references parsed
7867 * Request the entity from SAX
7869 if ((ctxt->sax != NULL) &&
7870 (ctxt->sax->getParameterEntity != NULL))
7871 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7873 if (entity == NULL) {
7875 * [ WFC: Entity Declared ]
7876 * In a document without any DTD, a document with only an
7877 * internal DTD subset which contains no parameter entity
7878 * references, or a document with "standalone='yes'", ...
7879 * ... The declaration of a parameter entity must precede
7880 * any reference to it...
7882 if ((ctxt->standalone == 1) ||
7883 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7884 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7885 "PEReference: %%%s; not found\n", name);
7888 * [ VC: Entity Declared ]
7889 * In a document with an external subset or external
7890 * parameter entities with "standalone='no'", ...
7891 * ... The declaration of a parameter entity must
7892 * precede any reference to it...
7894 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7895 "PEReference: %%%s; not found\n",
7901 * Internal checking in case the entity quest barfed
7903 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7904 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7905 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7906 "%%%s; is not a parameter entity\n",
7910 ctxt->hasPErefs = 1;
7917 * xmlParseDocTypeDecl:
7918 * @ctxt: an XML parser context
7920 * parse a DOCTYPE declaration
7922 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7923 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7925 * [ VC: Root Element Type ]
7926 * The Name in the document type declaration must match the element
7927 * type of the root element.
7931 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
7932 const xmlChar *name = NULL;
7933 xmlChar *ExternalID = NULL;
7934 xmlChar *URI = NULL;
7937 * We know that '<!DOCTYPE' has been detected.
7944 * Parse the DOCTYPE name.
7946 name = xmlParseName(ctxt);
7948 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7949 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7951 ctxt->intSubName = name;
7956 * Check for SystemID and ExternalID
7958 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7960 if ((URI != NULL) || (ExternalID != NULL)) {
7961 ctxt->hasExternalSubset = 1;
7963 ctxt->extSubURI = URI;
7964 ctxt->extSubSystem = ExternalID;
7969 * Create and update the internal subset.
7971 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7972 (!ctxt->disableSAX))
7973 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7976 * Is there any internal subset declarations ?
7977 * they are handled separately in xmlParseInternalSubset()
7983 * We should be at the end of the DOCTYPE declaration.
7986 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7992 * xmlParseInternalSubset:
7993 * @ctxt: an XML parser context
7995 * parse the internal subset declaration
7997 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8001 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8003 * Is there any DTD definition ?
8006 ctxt->instate = XML_PARSER_DTD;
8009 * Parse the succession of Markup declarations and
8011 * Subsequence (markupdecl | PEReference | S)*
8013 while (RAW != ']') {
8014 const xmlChar *check = CUR_PTR;
8015 unsigned int cons = ctxt->input->consumed;
8018 xmlParseMarkupDecl(ctxt);
8019 xmlParsePEReference(ctxt);
8022 * Pop-up of finished entities.
8024 while ((RAW == 0) && (ctxt->inputNr > 1))
8027 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8028 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8029 "xmlParseInternalSubset: error detected in Markup declaration\n");
8040 * We should be at the end of the DOCTYPE declaration.
8043 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8048 #ifdef LIBXML_SAX1_ENABLED
8050 * xmlParseAttribute:
8051 * @ctxt: an XML parser context
8052 * @value: a xmlChar ** used to store the value of the attribute
8054 * parse an attribute
8056 * [41] Attribute ::= Name Eq AttValue
8058 * [ WFC: No External Entity References ]
8059 * Attribute values cannot contain direct or indirect entity references
8060 * to external entities.
8062 * [ WFC: No < in Attribute Values ]
8063 * The replacement text of any entity referred to directly or indirectly in
8064 * an attribute value (other than "<") must not contain a <.
8066 * [ VC: Attribute Value Type ]
8067 * The attribute must have been declared; the value must be of the type
8070 * [25] Eq ::= S? '=' S?
8074 * [NS 11] Attribute ::= QName Eq AttValue
8076 * Also the case QName == xmlns:??? is handled independently as a namespace
8079 * Returns the attribute name, and the value in *value.
8083 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8084 const xmlChar *name;
8089 name = xmlParseName(ctxt);
8091 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8092 "error parsing attribute name\n");
8103 val = xmlParseAttValue(ctxt);
8104 ctxt->instate = XML_PARSER_CONTENT;
8106 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8107 "Specification mandate value for attribute %s\n", name);
8112 * Check that xml:lang conforms to the specification
8113 * No more registered as an error, just generate a warning now
8114 * since this was deprecated in XML second edition
8116 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8117 if (!xmlCheckLanguageID(val)) {
8118 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8119 "Malformed value for xml:lang : %s\n",
8125 * Check that xml:space conforms to the specification
8127 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8128 if (xmlStrEqual(val, BAD_CAST "default"))
8130 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8133 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8134 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8145 * @ctxt: an XML parser context
8147 * parse a start of tag either for rule element or
8148 * EmptyElement. In both case we don't parse the tag closing chars.
8150 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8152 * [ WFC: Unique Att Spec ]
8153 * No attribute name may appear more than once in the same start-tag or
8154 * empty-element tag.
8156 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8158 * [ WFC: Unique Att Spec ]
8159 * No attribute name may appear more than once in the same start-tag or
8160 * empty-element tag.
8164 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8166 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8168 * Returns the element name parsed
8172 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8173 const xmlChar *name;
8174 const xmlChar *attname;
8176 const xmlChar **atts = ctxt->atts;
8178 int maxatts = ctxt->maxatts;
8181 if (RAW != '<') return(NULL);
8184 name = xmlParseName(ctxt);
8186 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8187 "xmlParseStartTag: invalid element name\n");
8192 * Now parse the attributes, it ends up with the ending
8199 while ((RAW != '>') &&
8200 ((RAW != '/') || (NXT(1) != '>')) &&
8201 (IS_BYTE_CHAR(RAW))) {
8202 const xmlChar *q = CUR_PTR;
8203 unsigned int cons = ctxt->input->consumed;
8205 attname = xmlParseAttribute(ctxt, &attvalue);
8206 if ((attname != NULL) && (attvalue != NULL)) {
8208 * [ WFC: Unique Att Spec ]
8209 * No attribute name may appear more than once in the same
8210 * start-tag or empty-element tag.
8212 for (i = 0; i < nbatts;i += 2) {
8213 if (xmlStrEqual(atts[i], attname)) {
8214 xmlErrAttributeDup(ctxt, NULL, attname);
8220 * Add the pair to atts
8223 maxatts = 22; /* allow for 10 attrs by default */
8224 atts = (const xmlChar **)
8225 xmlMalloc(maxatts * sizeof(xmlChar *));
8227 xmlErrMemory(ctxt, NULL);
8228 if (attvalue != NULL)
8233 ctxt->maxatts = maxatts;
8234 } else if (nbatts + 4 > maxatts) {
8238 n = (const xmlChar **) xmlRealloc((void *) atts,
8239 maxatts * sizeof(const xmlChar *));
8241 xmlErrMemory(ctxt, NULL);
8242 if (attvalue != NULL)
8248 ctxt->maxatts = maxatts;
8250 atts[nbatts++] = attname;
8251 atts[nbatts++] = attvalue;
8252 atts[nbatts] = NULL;
8253 atts[nbatts + 1] = NULL;
8255 if (attvalue != NULL)
8262 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8264 if (!IS_BLANK_CH(RAW)) {
8265 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8266 "attributes construct error\n");
8269 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8270 (attname == NULL) && (attvalue == NULL)) {
8271 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8272 "xmlParseStartTag: problem parsing attributes\n");
8280 * SAX: Start of Element !
8282 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8283 (!ctxt->disableSAX)) {
8285 ctxt->sax->startElement(ctxt->userData, name, atts);
8287 ctxt->sax->startElement(ctxt->userData, name, NULL);
8291 /* Free only the content strings */
8292 for (i = 1;i < nbatts;i+=2)
8293 if (atts[i] != NULL)
8294 xmlFree((xmlChar *) atts[i]);
8301 * @ctxt: an XML parser context
8302 * @line: line of the start tag
8303 * @nsNr: number of namespaces on the start tag
8305 * parse an end of tag
8307 * [42] ETag ::= '</' Name S? '>'
8311 * [NS 9] ETag ::= '</' QName S? '>'
8315 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8316 const xmlChar *name;
8319 if ((RAW != '<') || (NXT(1) != '/')) {
8320 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8321 "xmlParseEndTag: '</' not found\n");
8326 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8329 * We should definitely be at the ending "S? '>'" part
8333 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8334 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8339 * [ WFC: Element Type Match ]
8340 * The Name in an element's end-tag must match the element type in the
8344 if (name != (xmlChar*)1) {
8345 if (name == NULL) name = BAD_CAST "unparseable";
8346 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8347 "Opening and ending tag mismatch: %s line %d and %s\n",
8348 ctxt->name, line, name);
8354 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8355 (!ctxt->disableSAX))
8356 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8365 * @ctxt: an XML parser context
8367 * parse an end of tag
8369 * [42] ETag ::= '</' Name S? '>'
8373 * [NS 9] ETag ::= '</' QName S? '>'
8377 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8378 xmlParseEndTag1(ctxt, 0);
8380 #endif /* LIBXML_SAX1_ENABLED */
8382 /************************************************************************
8384 * SAX 2 specific operations *
8386 ************************************************************************/
8390 * @ctxt: an XML parser context
8391 * @prefix: the prefix to lookup
8393 * Lookup the namespace name for the @prefix (which ca be NULL)
8394 * The prefix must come from the @ctxt->dict dictionnary
8396 * Returns the namespace name or NULL if not bound
8398 static const xmlChar *
8399 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8402 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8403 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8404 if (ctxt->nsTab[i] == prefix) {
8405 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8407 return(ctxt->nsTab[i + 1]);
8414 * @ctxt: an XML parser context
8415 * @prefix: pointer to store the prefix part
8417 * parse an XML Namespace QName
8419 * [6] QName ::= (Prefix ':')? LocalPart
8420 * [7] Prefix ::= NCName
8421 * [8] LocalPart ::= NCName
8423 * Returns the Name parsed or NULL
8426 static const xmlChar *
8427 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8428 const xmlChar *l, *p;
8432 l = xmlParseNCName(ctxt);
8435 l = xmlParseName(ctxt);
8437 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8438 "Failed to parse QName '%s'\n", l, NULL, NULL);
8448 l = xmlParseNCName(ctxt);
8452 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8453 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8454 l = xmlParseNmtoken(ctxt);
8456 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8458 tmp = xmlBuildQName(l, p, NULL, 0);
8461 p = xmlDictLookup(ctxt->dict, tmp, -1);
8462 if (tmp != NULL) xmlFree(tmp);
8469 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8470 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8472 tmp = (xmlChar *) xmlParseName(ctxt);
8474 tmp = xmlBuildQName(tmp, l, NULL, 0);
8475 l = xmlDictLookup(ctxt->dict, tmp, -1);
8476 if (tmp != NULL) xmlFree(tmp);
8480 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8481 l = xmlDictLookup(ctxt->dict, tmp, -1);
8482 if (tmp != NULL) xmlFree(tmp);
8493 * xmlParseQNameAndCompare:
8494 * @ctxt: an XML parser context
8495 * @name: the localname
8496 * @prefix: the prefix, if any.
8498 * parse an XML name and compares for match
8499 * (specialized for endtag parsing)
8501 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8502 * and the name for mismatch
8505 static const xmlChar *
8506 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8507 xmlChar const *prefix) {
8511 const xmlChar *prefix2;
8513 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8516 in = ctxt->input->cur;
8519 while (*in != 0 && *in == *cmp) {
8523 if ((*cmp == 0) && (*in == ':')) {
8526 while (*in != 0 && *in == *cmp) {
8530 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8532 ctxt->input->cur = in;
8533 return((const xmlChar*) 1);
8537 * all strings coms from the dictionary, equality can be done directly
8539 ret = xmlParseQName (ctxt, &prefix2);
8540 if ((ret == name) && (prefix == prefix2))
8541 return((const xmlChar*) 1);
8546 * xmlParseAttValueInternal:
8547 * @ctxt: an XML parser context
8548 * @len: attribute len result
8549 * @alloc: whether the attribute was reallocated as a new string
8550 * @normalize: if 1 then further non-CDATA normalization must be done
8552 * parse a value for an attribute.
8553 * NOTE: if no normalization is needed, the routine will return pointers
8554 * directly from the data buffer.
8556 * 3.3.3 Attribute-Value Normalization:
8557 * Before the value of an attribute is passed to the application or
8558 * checked for validity, the XML processor must normalize it as follows:
8559 * - a character reference is processed by appending the referenced
8560 * character to the attribute value
8561 * - an entity reference is processed by recursively processing the
8562 * replacement text of the entity
8563 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8564 * appending #x20 to the normalized value, except that only a single
8565 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8566 * parsed entity or the literal entity value of an internal parsed entity
8567 * - other characters are processed by appending them to the normalized value
8568 * If the declared value is not CDATA, then the XML processor must further
8569 * process the normalized attribute value by discarding any leading and
8570 * trailing space (#x20) characters, and by replacing sequences of space
8571 * (#x20) characters by a single space (#x20) character.
8572 * All attributes for which no declaration has been read should be treated
8573 * by a non-validating parser as if declared CDATA.
8575 * Returns the AttValue parsed or NULL. The value has to be freed by the
8576 * caller if it was copied, this can be detected by val[*len] == 0.
8580 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8584 const xmlChar *in = NULL, *start, *end, *last;
8585 xmlChar *ret = NULL;
8588 in = (xmlChar *) CUR_PTR;
8589 if (*in != '"' && *in != '\'') {
8590 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8593 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8596 * try to handle in this routine the most common case where no
8597 * allocation of a new string is required and where content is
8601 end = ctxt->input->end;
8604 const xmlChar *oldbase = ctxt->input->base;
8606 if (oldbase != ctxt->input->base) {
8607 long delta = ctxt->input->base - oldbase;
8608 start = start + delta;
8611 end = ctxt->input->end;
8615 * Skip any leading spaces
8617 while ((in < end) && (*in != limit) &&
8618 ((*in == 0x20) || (*in == 0x9) ||
8619 (*in == 0xA) || (*in == 0xD))) {
8623 const xmlChar *oldbase = ctxt->input->base;
8625 if (oldbase != ctxt->input->base) {
8626 long delta = ctxt->input->base - oldbase;
8627 start = start + delta;
8630 end = ctxt->input->end;
8633 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8634 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8635 if ((*in++ == 0x20) && (*in == 0x20)) break;
8637 const xmlChar *oldbase = ctxt->input->base;
8639 if (oldbase != ctxt->input->base) {
8640 long delta = ctxt->input->base - oldbase;
8641 start = start + delta;
8644 end = ctxt->input->end;
8649 * skip the trailing blanks
8651 while ((last[-1] == 0x20) && (last > start)) last--;
8652 while ((in < end) && (*in != limit) &&
8653 ((*in == 0x20) || (*in == 0x9) ||
8654 (*in == 0xA) || (*in == 0xD))) {
8657 const xmlChar *oldbase = ctxt->input->base;
8659 if (oldbase != ctxt->input->base) {
8660 long delta = ctxt->input->base - oldbase;
8661 start = start + delta;
8663 last = last + delta;
8665 end = ctxt->input->end;
8668 if (*in != limit) goto need_complex;
8670 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8671 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8674 const xmlChar *oldbase = ctxt->input->base;
8676 if (oldbase != ctxt->input->base) {
8677 long delta = ctxt->input->base - oldbase;
8678 start = start + delta;
8681 end = ctxt->input->end;
8685 if (*in != limit) goto need_complex;
8689 *len = last - start;
8690 ret = (xmlChar *) start;
8692 if (alloc) *alloc = 1;
8693 ret = xmlStrndup(start, last - start);
8696 if (alloc) *alloc = 0;
8699 if (alloc) *alloc = 1;
8700 return xmlParseAttValueComplex(ctxt, len, normalize);
8704 * xmlParseAttribute2:
8705 * @ctxt: an XML parser context
8706 * @pref: the element prefix
8707 * @elem: the element name
8708 * @prefix: a xmlChar ** used to store the value of the attribute prefix
8709 * @value: a xmlChar ** used to store the value of the attribute
8710 * @len: an int * to save the length of the attribute
8711 * @alloc: an int * to indicate if the attribute was allocated
8713 * parse an attribute in the new SAX2 framework.
8715 * Returns the attribute name, and the value in *value, .
8718 static const xmlChar *
8719 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8720 const xmlChar * pref, const xmlChar * elem,
8721 const xmlChar ** prefix, xmlChar ** value,
8722 int *len, int *alloc)
8724 const xmlChar *name;
8725 xmlChar *val, *internal_val = NULL;
8730 name = xmlParseQName(ctxt, prefix);
8732 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8733 "error parsing attribute name\n");
8738 * get the type if needed
8740 if (ctxt->attsSpecial != NULL) {
8743 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
8744 pref, elem, *prefix, name);
8756 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8759 * Sometimes a second normalisation pass for spaces is needed
8760 * but that only happens if charrefs or entities refernces
8761 * have been used in the attribute value, i.e. the attribute
8762 * value have been extracted in an allocated string already.
8765 const xmlChar *val2;
8767 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8768 if ((val2 != NULL) && (val2 != val)) {
8770 val = (xmlChar *) val2;
8774 ctxt->instate = XML_PARSER_CONTENT;
8776 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8777 "Specification mandate value for attribute %s\n",
8782 if (*prefix == ctxt->str_xml) {
8784 * Check that xml:lang conforms to the specification
8785 * No more registered as an error, just generate a warning now
8786 * since this was deprecated in XML second edition
8788 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8789 internal_val = xmlStrndup(val, *len);
8790 if (!xmlCheckLanguageID(internal_val)) {
8791 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8792 "Malformed value for xml:lang : %s\n",
8793 internal_val, NULL);
8798 * Check that xml:space conforms to the specification
8800 if (xmlStrEqual(name, BAD_CAST "space")) {
8801 internal_val = xmlStrndup(val, *len);
8802 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8804 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8807 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8808 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8809 internal_val, NULL);
8813 xmlFree(internal_val);
8821 * xmlParseStartTag2:
8822 * @ctxt: an XML parser context
8824 * parse a start of tag either for rule element or
8825 * EmptyElement. In both case we don't parse the tag closing chars.
8826 * This routine is called when running SAX2 parsing
8828 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8830 * [ WFC: Unique Att Spec ]
8831 * No attribute name may appear more than once in the same start-tag or
8832 * empty-element tag.
8834 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8836 * [ WFC: Unique Att Spec ]
8837 * No attribute name may appear more than once in the same start-tag or
8838 * empty-element tag.
8842 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8844 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8846 * Returns the element name parsed
8849 static const xmlChar *
8850 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8851 const xmlChar **URI, int *tlen) {
8852 const xmlChar *localname;
8853 const xmlChar *prefix;
8854 const xmlChar *attname;
8855 const xmlChar *aprefix;
8856 const xmlChar *nsname;
8858 const xmlChar **atts = ctxt->atts;
8859 int maxatts = ctxt->maxatts;
8860 int nratts, nbatts, nbdef;
8861 int i, j, nbNs, attval, oldline, oldcol;
8862 const xmlChar *base;
8864 int nsNr = ctxt->nsNr;
8866 if (RAW != '<') return(NULL);
8870 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8871 * point since the attribute values may be stored as pointers to
8872 * the buffer and calling SHRINK would destroy them !
8873 * The Shrinking is only possible once the full set of attribute
8874 * callbacks have been done.
8878 base = ctxt->input->base;
8879 cur = ctxt->input->cur - ctxt->input->base;
8880 oldline = ctxt->input->line;
8881 oldcol = ctxt->input->col;
8887 /* Forget any namespaces added during an earlier parse of this element. */
8890 localname = xmlParseQName(ctxt, &prefix);
8891 if (localname == NULL) {
8892 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8893 "StartTag: invalid element name\n");
8896 *tlen = ctxt->input->cur - ctxt->input->base - cur;
8899 * Now parse the attributes, it ends up with the ending
8905 if (ctxt->input->base != base) goto base_changed;
8907 while ((RAW != '>') &&
8908 ((RAW != '/') || (NXT(1) != '>')) &&
8909 (IS_BYTE_CHAR(RAW))) {
8910 const xmlChar *q = CUR_PTR;
8911 unsigned int cons = ctxt->input->consumed;
8912 int len = -1, alloc = 0;
8914 attname = xmlParseAttribute2(ctxt, prefix, localname,
8915 &aprefix, &attvalue, &len, &alloc);
8916 if (ctxt->input->base != base) {
8917 if ((attvalue != NULL) && (alloc != 0))
8922 if ((attname != NULL) && (attvalue != NULL)) {
8923 if (len < 0) len = xmlStrlen(attvalue);
8924 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8925 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8929 uri = xmlParseURI((const char *) URL);
8931 xmlNsErr(ctxt, XML_WAR_NS_URI,
8932 "xmlns: '%s' is not a valid URI\n",
8935 if (uri->scheme == NULL) {
8936 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8937 "xmlns: URI %s is not absolute\n",
8942 if (URL == ctxt->str_xml_ns) {
8943 if (attname != ctxt->str_xml) {
8944 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8945 "xml namespace URI cannot be the default namespace\n",
8948 goto skip_default_ns;
8952 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8953 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8954 "reuse of the xmlns namespace name is forbidden\n",
8956 goto skip_default_ns;
8960 * check that it's not a defined namespace
8962 for (j = 1;j <= nbNs;j++)
8963 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8966 xmlErrAttributeDup(ctxt, NULL, attname);
8968 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8970 if (alloc != 0) xmlFree(attvalue);
8974 if (aprefix == ctxt->str_xmlns) {
8975 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8978 if (attname == ctxt->str_xml) {
8979 if (URL != ctxt->str_xml_ns) {
8980 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8981 "xml namespace prefix mapped to wrong URI\n",
8985 * Do not keep a namespace definition node
8989 if (URL == ctxt->str_xml_ns) {
8990 if (attname != ctxt->str_xml) {
8991 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8992 "xml namespace URI mapped to wrong prefix\n",
8997 if (attname == ctxt->str_xmlns) {
8998 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8999 "redefinition of the xmlns prefix is forbidden\n",
9005 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9006 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9007 "reuse of the xmlns namespace name is forbidden\n",
9011 if ((URL == NULL) || (URL[0] == 0)) {
9012 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9013 "xmlns:%s: Empty XML namespace is not allowed\n",
9014 attname, NULL, NULL);
9017 uri = xmlParseURI((const char *) URL);
9019 xmlNsErr(ctxt, XML_WAR_NS_URI,
9020 "xmlns:%s: '%s' is not a valid URI\n",
9021 attname, URL, NULL);
9023 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9024 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9025 "xmlns:%s: URI %s is not absolute\n",
9026 attname, URL, NULL);
9033 * check that it's not a defined namespace
9035 for (j = 1;j <= nbNs;j++)
9036 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9039 xmlErrAttributeDup(ctxt, aprefix, attname);
9041 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9043 if (alloc != 0) xmlFree(attvalue);
9045 if (ctxt->input->base != base) goto base_changed;
9050 * Add the pair to atts
9052 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9053 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9054 if (attvalue[len] == 0)
9058 maxatts = ctxt->maxatts;
9061 ctxt->attallocs[nratts++] = alloc;
9062 atts[nbatts++] = attname;
9063 atts[nbatts++] = aprefix;
9064 atts[nbatts++] = NULL; /* the URI will be fetched later */
9065 atts[nbatts++] = attvalue;
9067 atts[nbatts++] = attvalue;
9069 * tag if some deallocation is needed
9071 if (alloc != 0) attval = 1;
9073 if ((attvalue != NULL) && (attvalue[len] == 0))
9080 if (ctxt->input->base != base) goto base_changed;
9081 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9083 if (!IS_BLANK_CH(RAW)) {
9084 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9085 "attributes construct error\n");
9089 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9090 (attname == NULL) && (attvalue == NULL)) {
9091 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9092 "xmlParseStartTag: problem parsing attributes\n");
9096 if (ctxt->input->base != base) goto base_changed;
9100 * The attributes defaulting
9102 if (ctxt->attsDefault != NULL) {
9103 xmlDefAttrsPtr defaults;
9105 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9106 if (defaults != NULL) {
9107 for (i = 0;i < defaults->nbAttrs;i++) {
9108 attname = defaults->values[5 * i];
9109 aprefix = defaults->values[5 * i + 1];
9112 * special work for namespaces defaulted defs
9114 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9116 * check that it's not a defined namespace
9118 for (j = 1;j <= nbNs;j++)
9119 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9121 if (j <= nbNs) continue;
9123 nsname = xmlGetNamespace(ctxt, NULL);
9124 if (nsname != defaults->values[5 * i + 2]) {
9125 if (nsPush(ctxt, NULL,
9126 defaults->values[5 * i + 2]) > 0)
9129 } else if (aprefix == ctxt->str_xmlns) {
9131 * check that it's not a defined namespace
9133 for (j = 1;j <= nbNs;j++)
9134 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9136 if (j <= nbNs) continue;
9138 nsname = xmlGetNamespace(ctxt, attname);
9139 if (nsname != defaults->values[2]) {
9140 if (nsPush(ctxt, attname,
9141 defaults->values[5 * i + 2]) > 0)
9146 * check that it's not a defined attribute
9148 for (j = 0;j < nbatts;j+=5) {
9149 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9152 if (j < nbatts) continue;
9154 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9155 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9158 maxatts = ctxt->maxatts;
9161 atts[nbatts++] = attname;
9162 atts[nbatts++] = aprefix;
9163 if (aprefix == NULL)
9164 atts[nbatts++] = NULL;
9166 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9167 atts[nbatts++] = defaults->values[5 * i + 2];
9168 atts[nbatts++] = defaults->values[5 * i + 3];
9169 if ((ctxt->standalone == 1) &&
9170 (defaults->values[5 * i + 4] != NULL)) {
9171 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9172 "standalone: attribute %s on %s defaulted from external subset\n",
9173 attname, localname);
9182 * The attributes checkings
9184 for (i = 0; i < nbatts;i += 5) {
9186 * The default namespace does not apply to attribute names.
9188 if (atts[i + 1] != NULL) {
9189 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9190 if (nsname == NULL) {
9191 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9192 "Namespace prefix %s for %s on %s is not defined\n",
9193 atts[i + 1], atts[i], localname);
9195 atts[i + 2] = nsname;
9199 * [ WFC: Unique Att Spec ]
9200 * No attribute name may appear more than once in the same
9201 * start-tag or empty-element tag.
9202 * As extended by the Namespace in XML REC.
9204 for (j = 0; j < i;j += 5) {
9205 if (atts[i] == atts[j]) {
9206 if (atts[i+1] == atts[j+1]) {
9207 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9210 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9211 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9212 "Namespaced Attribute %s in '%s' redefined\n",
9213 atts[i], nsname, NULL);
9220 nsname = xmlGetNamespace(ctxt, prefix);
9221 if ((prefix != NULL) && (nsname == NULL)) {
9222 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9223 "Namespace prefix %s on %s is not defined\n",
9224 prefix, localname, NULL);
9230 * SAX: Start of Element !
9232 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9233 (!ctxt->disableSAX)) {
9235 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9236 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9237 nbatts / 5, nbdef, atts);
9239 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9240 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9244 * Free up attribute allocated strings if needed
9247 for (i = 3,j = 0; j < nratts;i += 5,j++)
9248 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9249 xmlFree((xmlChar *) atts[i]);
9256 * the attribute strings are valid iif the base didn't changed
9259 for (i = 3,j = 0; j < nratts;i += 5,j++)
9260 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9261 xmlFree((xmlChar *) atts[i]);
9263 ctxt->input->cur = ctxt->input->base + cur;
9264 ctxt->input->line = oldline;
9265 ctxt->input->col = oldcol;
9266 if (ctxt->wellFormed == 1) {
9274 * @ctxt: an XML parser context
9275 * @line: line of the start tag
9276 * @nsNr: number of namespaces on the start tag
9278 * parse an end of tag
9280 * [42] ETag ::= '</' Name S? '>'
9284 * [NS 9] ETag ::= '</' QName S? '>'
9288 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9289 const xmlChar *URI, int line, int nsNr, int tlen) {
9290 const xmlChar *name;
9293 if ((RAW != '<') || (NXT(1) != '/')) {
9294 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9299 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9300 if (ctxt->input->cur[tlen] == '>') {
9301 ctxt->input->cur += tlen + 1;
9304 ctxt->input->cur += tlen;
9308 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9310 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9314 * We should definitely be at the ending "S? '>'" part
9318 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9319 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9324 * [ WFC: Element Type Match ]
9325 * The Name in an element's end-tag must match the element type in the
9329 if (name != (xmlChar*)1) {
9330 if (name == NULL) name = BAD_CAST "unparseable";
9331 if ((line == 0) && (ctxt->node != NULL))
9332 line = ctxt->node->line;
9333 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9334 "Opening and ending tag mismatch: %s line %d and %s\n",
9335 ctxt->name, line, name);
9342 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9343 (!ctxt->disableSAX))
9344 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9354 * @ctxt: an XML parser context
9356 * Parse escaped pure raw content.
9358 * [18] CDSect ::= CDStart CData CDEnd
9360 * [19] CDStart ::= '<![CDATA['
9362 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9364 * [21] CDEnd ::= ']]>'
9367 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9368 xmlChar *buf = NULL;
9370 int size = XML_PARSER_BUFFER_SIZE;
9376 /* Check 2.6.0 was NXT(0) not RAW */
9377 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9382 ctxt->instate = XML_PARSER_CDATA_SECTION;
9385 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9386 ctxt->instate = XML_PARSER_CONTENT;
9392 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9393 ctxt->instate = XML_PARSER_CONTENT;
9398 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9400 xmlErrMemory(ctxt, NULL);
9403 while (IS_CHAR(cur) &&
9404 ((r != ']') || (s != ']') || (cur != '>'))) {
9405 if (len + 5 >= size) {
9409 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9412 xmlErrMemory(ctxt, NULL);
9417 COPY_BUF(rl,buf,len,r);
9431 ctxt->instate = XML_PARSER_CONTENT;
9433 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9434 "CData section not finished\n%.50s\n", buf);
9441 * OK the buffer is to be consumed as cdata.
9443 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9444 if (ctxt->sax->cdataBlock != NULL)
9445 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9446 else if (ctxt->sax->characters != NULL)
9447 ctxt->sax->characters(ctxt->userData, buf, len);
9454 * @ctxt: an XML parser context
9458 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9462 xmlParseContent(xmlParserCtxtPtr ctxt) {
9464 while ((RAW != 0) &&
9465 ((RAW != '<') || (NXT(1) != '/')) &&
9466 (ctxt->instate != XML_PARSER_EOF)) {
9467 const xmlChar *test = CUR_PTR;
9468 unsigned int cons = ctxt->input->consumed;
9469 const xmlChar *cur = ctxt->input->cur;
9472 * First case : a Processing Instruction.
9474 if ((*cur == '<') && (cur[1] == '?')) {
9479 * Second case : a CDSection
9481 /* 2.6.0 test was *cur not RAW */
9482 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9483 xmlParseCDSect(ctxt);
9487 * Third case : a comment
9489 else if ((*cur == '<') && (NXT(1) == '!') &&
9490 (NXT(2) == '-') && (NXT(3) == '-')) {
9491 xmlParseComment(ctxt);
9492 ctxt->instate = XML_PARSER_CONTENT;
9496 * Fourth case : a sub-element.
9498 else if (*cur == '<') {
9499 xmlParseElement(ctxt);
9503 * Fifth case : a reference. If if has not been resolved,
9504 * parsing returns it's Name, create the node
9507 else if (*cur == '&') {
9508 xmlParseReference(ctxt);
9512 * Last case, text. Note that References are handled directly.
9515 xmlParseCharData(ctxt, 0);
9520 * Pop-up of finished entities.
9522 while ((RAW == 0) && (ctxt->inputNr > 1))
9526 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9527 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9528 "detected an error in element content\n");
9529 ctxt->instate = XML_PARSER_EOF;
9537 * @ctxt: an XML parser context
9539 * parse an XML element, this is highly recursive
9541 * [39] element ::= EmptyElemTag | STag content ETag
9543 * [ WFC: Element Type Match ]
9544 * The Name in an element's end-tag must match the element type in the
9550 xmlParseElement(xmlParserCtxtPtr ctxt) {
9551 const xmlChar *name;
9552 const xmlChar *prefix = NULL;
9553 const xmlChar *URI = NULL;
9554 xmlParserNodeInfo node_info;
9557 int nsNr = ctxt->nsNr;
9559 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9560 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9561 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9562 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9564 ctxt->instate = XML_PARSER_EOF;
9568 /* Capture start position */
9569 if (ctxt->record_info) {
9570 node_info.begin_pos = ctxt->input->consumed +
9571 (CUR_PTR - ctxt->input->base);
9572 node_info.begin_line = ctxt->input->line;
9575 if (ctxt->spaceNr == 0)
9576 spacePush(ctxt, -1);
9577 else if (*ctxt->space == -2)
9578 spacePush(ctxt, -1);
9580 spacePush(ctxt, *ctxt->space);
9582 line = ctxt->input->line;
9583 #ifdef LIBXML_SAX1_ENABLED
9585 #endif /* LIBXML_SAX1_ENABLED */
9586 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9587 #ifdef LIBXML_SAX1_ENABLED
9589 name = xmlParseStartTag(ctxt);
9590 #endif /* LIBXML_SAX1_ENABLED */
9595 namePush(ctxt, name);
9598 #ifdef LIBXML_VALID_ENABLED
9600 * [ VC: Root Element Type ]
9601 * The Name in the document type declaration must match the element
9602 * type of the root element.
9604 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9605 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9606 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9607 #endif /* LIBXML_VALID_ENABLED */
9610 * Check for an Empty Element.
9612 if ((RAW == '/') && (NXT(1) == '>')) {
9615 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9616 (!ctxt->disableSAX))
9617 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9618 #ifdef LIBXML_SAX1_ENABLED
9620 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9621 (!ctxt->disableSAX))
9622 ctxt->sax->endElement(ctxt->userData, name);
9623 #endif /* LIBXML_SAX1_ENABLED */
9627 if (nsNr != ctxt->nsNr)
9628 nsPop(ctxt, ctxt->nsNr - nsNr);
9629 if ( ret != NULL && ctxt->record_info ) {
9630 node_info.end_pos = ctxt->input->consumed +
9631 (CUR_PTR - ctxt->input->base);
9632 node_info.end_line = ctxt->input->line;
9633 node_info.node = ret;
9634 xmlParserAddNodeInfo(ctxt, &node_info);
9641 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9642 "Couldn't find end of Start Tag %s line %d\n",
9646 * end of parsing of this node.
9651 if (nsNr != ctxt->nsNr)
9652 nsPop(ctxt, ctxt->nsNr - nsNr);
9655 * Capture end position and add node
9657 if ( ret != NULL && ctxt->record_info ) {
9658 node_info.end_pos = ctxt->input->consumed +
9659 (CUR_PTR - ctxt->input->base);
9660 node_info.end_line = ctxt->input->line;
9661 node_info.node = ret;
9662 xmlParserAddNodeInfo(ctxt, &node_info);
9668 * Parse the content of the element:
9670 xmlParseContent(ctxt);
9671 if (!IS_BYTE_CHAR(RAW)) {
9672 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9673 "Premature end of data in tag %s line %d\n",
9677 * end of parsing of this node.
9682 if (nsNr != ctxt->nsNr)
9683 nsPop(ctxt, ctxt->nsNr - nsNr);
9688 * parse the end of tag: '</' should be here.
9691 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
9694 #ifdef LIBXML_SAX1_ENABLED
9696 xmlParseEndTag1(ctxt, line);
9697 #endif /* LIBXML_SAX1_ENABLED */
9700 * Capture end position and add node
9702 if ( ret != NULL && ctxt->record_info ) {
9703 node_info.end_pos = ctxt->input->consumed +
9704 (CUR_PTR - ctxt->input->base);
9705 node_info.end_line = ctxt->input->line;
9706 node_info.node = ret;
9707 xmlParserAddNodeInfo(ctxt, &node_info);
9712 * xmlParseVersionNum:
9713 * @ctxt: an XML parser context
9715 * parse the XML version value.
9717 * [26] VersionNum ::= '1.' [0-9]+
9719 * In practice allow [0-9].[0-9]+ at that level
9721 * Returns the string giving the XML version number, or NULL
9724 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9725 xmlChar *buf = NULL;
9730 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9732 xmlErrMemory(ctxt, NULL);
9736 if (!((cur >= '0') && (cur <= '9'))) {
9750 while ((cur >= '0') && (cur <= '9')) {
9751 if (len + 1 >= size) {
9755 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9758 xmlErrMemory(ctxt, NULL);
9772 * xmlParseVersionInfo:
9773 * @ctxt: an XML parser context
9775 * parse the XML version.
9777 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9779 * [25] Eq ::= S? '=' S?
9781 * Returns the version string, e.g. "1.0"
9785 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9786 xmlChar *version = NULL;
9788 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9792 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9799 version = xmlParseVersionNum(ctxt);
9801 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9804 } else if (RAW == '\''){
9806 version = xmlParseVersionNum(ctxt);
9808 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9812 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9820 * @ctxt: an XML parser context
9822 * parse the XML encoding name
9824 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9826 * Returns the encoding name value or NULL
9829 xmlParseEncName(xmlParserCtxtPtr ctxt) {
9830 xmlChar *buf = NULL;
9836 if (((cur >= 'a') && (cur <= 'z')) ||
9837 ((cur >= 'A') && (cur <= 'Z'))) {
9838 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9840 xmlErrMemory(ctxt, NULL);
9847 while (((cur >= 'a') && (cur <= 'z')) ||
9848 ((cur >= 'A') && (cur <= 'Z')) ||
9849 ((cur >= '0') && (cur <= '9')) ||
9850 (cur == '.') || (cur == '_') ||
9852 if (len + 1 >= size) {
9856 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9858 xmlErrMemory(ctxt, NULL);
9875 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
9881 * xmlParseEncodingDecl:
9882 * @ctxt: an XML parser context
9884 * parse the XML encoding declaration
9886 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9888 * this setups the conversion filters.
9890 * Returns the encoding value or NULL
9894 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9895 xmlChar *encoding = NULL;
9898 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9902 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9909 encoding = xmlParseEncName(ctxt);
9911 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9914 } else if (RAW == '\''){
9916 encoding = xmlParseEncName(ctxt);
9918 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9922 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9925 * UTF-16 encoding stwich has already taken place at this stage,
9926 * more over the little-endian/big-endian selection is already done
9928 if ((encoding != NULL) &&
9929 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9930 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
9932 * If no encoding was passed to the parser, that we are
9933 * using UTF-16 and no decoder is present i.e. the
9934 * document is apparently UTF-8 compatible, then raise an
9935 * encoding mismatch fatal error
9937 if ((ctxt->encoding == NULL) &&
9938 (ctxt->input->buf != NULL) &&
9939 (ctxt->input->buf->encoder == NULL)) {
9940 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9941 "Document labelled UTF-16 but has UTF-8 content\n");
9943 if (ctxt->encoding != NULL)
9944 xmlFree((xmlChar *) ctxt->encoding);
9945 ctxt->encoding = encoding;
9948 * UTF-8 encoding is handled natively
9950 else if ((encoding != NULL) &&
9951 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9952 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
9953 if (ctxt->encoding != NULL)
9954 xmlFree((xmlChar *) ctxt->encoding);
9955 ctxt->encoding = encoding;
9957 else if (encoding != NULL) {
9958 xmlCharEncodingHandlerPtr handler;
9960 if (ctxt->input->encoding != NULL)
9961 xmlFree((xmlChar *) ctxt->input->encoding);
9962 ctxt->input->encoding = encoding;
9964 handler = xmlFindCharEncodingHandler((const char *) encoding);
9965 if (handler != NULL) {
9966 xmlSwitchToEncoding(ctxt, handler);
9968 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
9969 "Unsupported encoding %s\n", encoding);
9979 * @ctxt: an XML parser context
9981 * parse the XML standalone declaration
9983 * [32] SDDecl ::= S 'standalone' Eq
9984 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9986 * [ VC: Standalone Document Declaration ]
9987 * TODO The standalone document declaration must have the value "no"
9988 * if any external markup declarations contain declarations of:
9989 * - attributes with default values, if elements to which these
9990 * attributes apply appear in the document without specifications
9991 * of values for these attributes, or
9992 * - entities (other than amp, lt, gt, apos, quot), if references
9993 * to those entities appear in the document, or
9994 * - attributes with values subject to normalization, where the
9995 * attribute appears in the document with a value which will change
9996 * as a result of normalization, or
9997 * - element types with element content, if white space occurs directly
9998 * within any instance of those types.
10001 * 1 if standalone="yes"
10002 * 0 if standalone="no"
10003 * -2 if standalone attribute is missing or invalid
10004 * (A standalone value of -2 means that the XML declaration was found,
10005 * but no value was specified for the standalone attribute).
10009 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10010 int standalone = -2;
10013 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10017 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10018 return(standalone);
10024 if ((RAW == 'n') && (NXT(1) == 'o')) {
10027 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10032 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10035 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10038 } else if (RAW == '"'){
10040 if ((RAW == 'n') && (NXT(1) == 'o')) {
10043 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10048 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10051 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10055 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10058 return(standalone);
10063 * @ctxt: an XML parser context
10065 * parse an XML declaration header
10067 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10071 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10075 * This value for standalone indicates that the document has an
10076 * XML declaration but it does not have a standalone attribute.
10077 * It will be overwritten later if a standalone attribute is found.
10079 ctxt->input->standalone = -2;
10082 * We know that '<?xml' is here.
10086 if (!IS_BLANK_CH(RAW)) {
10087 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10088 "Blank needed after '<?xml'\n");
10093 * We must have the VersionInfo here.
10095 version = xmlParseVersionInfo(ctxt);
10096 if (version == NULL) {
10097 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10099 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10101 * Changed here for XML-1.0 5th edition
10103 if (ctxt->options & XML_PARSE_OLD10) {
10104 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10105 "Unsupported version '%s'\n",
10108 if ((version[0] == '1') && ((version[1] == '.'))) {
10109 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10110 "Unsupported version '%s'\n",
10113 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10114 "Unsupported version '%s'\n",
10119 if (ctxt->version != NULL)
10120 xmlFree((void *) ctxt->version);
10121 ctxt->version = version;
10125 * We may have the encoding declaration
10127 if (!IS_BLANK_CH(RAW)) {
10128 if ((RAW == '?') && (NXT(1) == '>')) {
10132 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10134 xmlParseEncodingDecl(ctxt);
10135 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10137 * The XML REC instructs us to stop parsing right here
10143 * We may have the standalone status.
10145 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10146 if ((RAW == '?') && (NXT(1) == '>')) {
10150 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10154 * We can grow the input buffer freely at that point
10159 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10162 if ((RAW == '?') && (NXT(1) == '>')) {
10164 } else if (RAW == '>') {
10165 /* Deprecated old WD ... */
10166 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10169 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10170 MOVETO_ENDTAG(CUR_PTR);
10177 * @ctxt: an XML parser context
10179 * parse an XML Misc* optional field.
10181 * [27] Misc ::= Comment | PI | S
10185 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10186 while (((RAW == '<') && (NXT(1) == '?')) ||
10187 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10188 IS_BLANK_CH(CUR)) {
10189 if ((RAW == '<') && (NXT(1) == '?')) {
10191 } else if (IS_BLANK_CH(CUR)) {
10194 xmlParseComment(ctxt);
10199 * xmlParseDocument:
10200 * @ctxt: an XML parser context
10202 * parse an XML document (and build a tree if using the standard SAX
10205 * [1] document ::= prolog element Misc*
10207 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10209 * Returns 0, -1 in case of error. the parser context is augmented
10210 * as a result of the parsing.
10214 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10216 xmlCharEncoding enc;
10220 if ((ctxt == NULL) || (ctxt->input == NULL))
10226 * SAX: detecting the level.
10228 xmlDetectSAX2(ctxt);
10231 * SAX: beginning of the document processing.
10233 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10234 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10236 if ((ctxt->encoding == NULL) &&
10237 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10239 * Get the 4 first bytes and decode the charset
10240 * if enc != XML_CHAR_ENCODING_NONE
10241 * plug some encoding conversion routines.
10247 enc = xmlDetectCharEncoding(&start[0], 4);
10248 if (enc != XML_CHAR_ENCODING_NONE) {
10249 xmlSwitchEncoding(ctxt, enc);
10255 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10259 * Check for the XMLDecl in the Prolog.
10260 * do not GROW here to avoid the detected encoder to decode more
10261 * than just the first line, unless the amount of data is really
10262 * too small to hold "<?xml version="1.0" encoding="foo"
10264 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10267 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10270 * Note that we will switch encoding on the fly.
10272 xmlParseXMLDecl(ctxt);
10273 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10275 * The XML REC instructs us to stop parsing right here
10279 ctxt->standalone = ctxt->input->standalone;
10282 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10284 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10285 ctxt->sax->startDocument(ctxt->userData);
10288 * The Misc part of the Prolog
10291 xmlParseMisc(ctxt);
10294 * Then possibly doc type declaration(s) and more Misc
10295 * (doctypedecl Misc*)?
10298 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10300 ctxt->inSubset = 1;
10301 xmlParseDocTypeDecl(ctxt);
10303 ctxt->instate = XML_PARSER_DTD;
10304 xmlParseInternalSubset(ctxt);
10308 * Create and update the external subset.
10310 ctxt->inSubset = 2;
10311 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10312 (!ctxt->disableSAX))
10313 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10314 ctxt->extSubSystem, ctxt->extSubURI);
10315 ctxt->inSubset = 0;
10317 xmlCleanSpecialAttr(ctxt);
10319 ctxt->instate = XML_PARSER_PROLOG;
10320 xmlParseMisc(ctxt);
10324 * Time to start parsing the tree itself
10328 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10329 "Start tag expected, '<' not found\n");
10331 ctxt->instate = XML_PARSER_CONTENT;
10332 xmlParseElement(ctxt);
10333 ctxt->instate = XML_PARSER_EPILOG;
10337 * The Misc part at the end
10339 xmlParseMisc(ctxt);
10342 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10344 ctxt->instate = XML_PARSER_EOF;
10348 * SAX: end of the document processing.
10350 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10351 ctxt->sax->endDocument(ctxt->userData);
10354 * Remove locally kept entity definitions if the tree was not built
10356 if ((ctxt->myDoc != NULL) &&
10357 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10358 xmlFreeDoc(ctxt->myDoc);
10359 ctxt->myDoc = NULL;
10362 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10363 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10365 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10366 if (ctxt->nsWellFormed)
10367 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10368 if (ctxt->options & XML_PARSE_OLD10)
10369 ctxt->myDoc->properties |= XML_DOC_OLD10;
10371 if (! ctxt->wellFormed) {
10379 * xmlParseExtParsedEnt:
10380 * @ctxt: an XML parser context
10382 * parse a general parsed entity
10383 * An external general parsed entity is well-formed if it matches the
10384 * production labeled extParsedEnt.
10386 * [78] extParsedEnt ::= TextDecl? content
10388 * Returns 0, -1 in case of error. the parser context is augmented
10389 * as a result of the parsing.
10393 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10395 xmlCharEncoding enc;
10397 if ((ctxt == NULL) || (ctxt->input == NULL))
10400 xmlDefaultSAXHandlerInit();
10402 xmlDetectSAX2(ctxt);
10407 * SAX: beginning of the document processing.
10409 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10410 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10413 * Get the 4 first bytes and decode the charset
10414 * if enc != XML_CHAR_ENCODING_NONE
10415 * plug some encoding conversion routines.
10417 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10422 enc = xmlDetectCharEncoding(start, 4);
10423 if (enc != XML_CHAR_ENCODING_NONE) {
10424 xmlSwitchEncoding(ctxt, enc);
10430 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10434 * Check for the XMLDecl in the Prolog.
10437 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10440 * Note that we will switch encoding on the fly.
10442 xmlParseXMLDecl(ctxt);
10443 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10445 * The XML REC instructs us to stop parsing right here
10451 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10453 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10454 ctxt->sax->startDocument(ctxt->userData);
10457 * Doing validity checking on chunk doesn't make sense
10459 ctxt->instate = XML_PARSER_CONTENT;
10460 ctxt->validate = 0;
10461 ctxt->loadsubset = 0;
10464 xmlParseContent(ctxt);
10466 if ((RAW == '<') && (NXT(1) == '/')) {
10467 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10468 } else if (RAW != 0) {
10469 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10473 * SAX: end of the document processing.
10475 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10476 ctxt->sax->endDocument(ctxt->userData);
10478 if (! ctxt->wellFormed) return(-1);
10482 #ifdef LIBXML_PUSH_ENABLED
10483 /************************************************************************
10485 * Progressive parsing interfaces *
10487 ************************************************************************/
10490 * xmlParseLookupSequence:
10491 * @ctxt: an XML parser context
10492 * @first: the first char to lookup
10493 * @next: the next char to lookup or zero
10494 * @third: the next char to lookup or zero
10496 * Try to find if a sequence (first, next, third) or just (first next) or
10497 * (first) is available in the input stream.
10498 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10499 * to avoid rescanning sequences of bytes, it DOES change the state of the
10500 * parser, do not use liberally.
10502 * Returns the index to the current parsing point if the full sequence
10503 * is available, -1 otherwise.
10506 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10507 xmlChar next, xmlChar third) {
10509 xmlParserInputPtr in;
10510 const xmlChar *buf;
10513 if (in == NULL) return(-1);
10514 base = in->cur - in->base;
10515 if (base < 0) return(-1);
10516 if (ctxt->checkIndex > base)
10517 base = ctxt->checkIndex;
10518 if (in->buf == NULL) {
10522 buf = in->buf->buffer->content;
10523 len = in->buf->buffer->use;
10525 /* take into account the sequence length */
10526 if (third) len -= 2;
10527 else if (next) len --;
10528 for (;base < len;base++) {
10529 if (buf[base] == first) {
10531 if ((buf[base + 1] != next) ||
10532 (buf[base + 2] != third)) continue;
10533 } else if (next != 0) {
10534 if (buf[base + 1] != next) continue;
10536 ctxt->checkIndex = 0;
10539 xmlGenericError(xmlGenericErrorContext,
10540 "PP: lookup '%c' found at %d\n",
10542 else if (third == 0)
10543 xmlGenericError(xmlGenericErrorContext,
10544 "PP: lookup '%c%c' found at %d\n",
10545 first, next, base);
10547 xmlGenericError(xmlGenericErrorContext,
10548 "PP: lookup '%c%c%c' found at %d\n",
10549 first, next, third, base);
10551 return(base - (in->cur - in->base));
10554 ctxt->checkIndex = base;
10557 xmlGenericError(xmlGenericErrorContext,
10558 "PP: lookup '%c' failed\n", first);
10559 else if (third == 0)
10560 xmlGenericError(xmlGenericErrorContext,
10561 "PP: lookup '%c%c' failed\n", first, next);
10563 xmlGenericError(xmlGenericErrorContext,
10564 "PP: lookup '%c%c%c' failed\n", first, next, third);
10570 * xmlParseGetLasts:
10571 * @ctxt: an XML parser context
10572 * @lastlt: pointer to store the last '<' from the input
10573 * @lastgt: pointer to store the last '>' from the input
10575 * Lookup the last < and > in the current chunk
10578 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10579 const xmlChar **lastgt) {
10580 const xmlChar *tmp;
10582 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10583 xmlGenericError(xmlGenericErrorContext,
10584 "Internal error: xmlParseGetLasts\n");
10587 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10588 tmp = ctxt->input->end;
10590 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10591 if (tmp < ctxt->input->base) {
10597 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10598 if (*tmp == '\'') {
10600 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10601 if (tmp < ctxt->input->end) tmp++;
10602 } else if (*tmp == '"') {
10604 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10605 if (tmp < ctxt->input->end) tmp++;
10609 if (tmp < ctxt->input->end)
10614 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10615 if (tmp >= ctxt->input->base)
10627 * xmlCheckCdataPush:
10628 * @cur: pointer to the bock of characters
10629 * @len: length of the block in bytes
10631 * Check that the block of characters is okay as SCdata content [20]
10633 * Returns the number of bytes to pass if okay, a negative index where an
10634 * UTF-8 error occured otherwise
10637 xmlCheckCdataPush(const xmlChar *utf, int len) {
10642 if ((utf == NULL) || (len <= 0))
10645 for (ix = 0; ix < len;) { /* string is 0-terminated */
10647 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10650 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10654 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10655 if (ix + 2 > len) return(ix);
10656 if ((utf[ix+1] & 0xc0 ) != 0x80)
10658 codepoint = (utf[ix] & 0x1f) << 6;
10659 codepoint |= utf[ix+1] & 0x3f;
10660 if (!xmlIsCharQ(codepoint))
10663 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10664 if (ix + 3 > len) return(ix);
10665 if (((utf[ix+1] & 0xc0) != 0x80) ||
10666 ((utf[ix+2] & 0xc0) != 0x80))
10668 codepoint = (utf[ix] & 0xf) << 12;
10669 codepoint |= (utf[ix+1] & 0x3f) << 6;
10670 codepoint |= utf[ix+2] & 0x3f;
10671 if (!xmlIsCharQ(codepoint))
10674 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10675 if (ix + 4 > len) return(ix);
10676 if (((utf[ix+1] & 0xc0) != 0x80) ||
10677 ((utf[ix+2] & 0xc0) != 0x80) ||
10678 ((utf[ix+3] & 0xc0) != 0x80))
10680 codepoint = (utf[ix] & 0x7) << 18;
10681 codepoint |= (utf[ix+1] & 0x3f) << 12;
10682 codepoint |= (utf[ix+2] & 0x3f) << 6;
10683 codepoint |= utf[ix+3] & 0x3f;
10684 if (!xmlIsCharQ(codepoint))
10687 } else /* unknown encoding */
10694 * xmlParseTryOrFinish:
10695 * @ctxt: an XML parser context
10696 * @terminate: last chunk indicator
10698 * Try to progress on parsing
10700 * Returns zero if no parsing was possible
10703 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10707 const xmlChar *lastlt, *lastgt;
10709 if (ctxt->input == NULL)
10713 switch (ctxt->instate) {
10714 case XML_PARSER_EOF:
10715 xmlGenericError(xmlGenericErrorContext,
10716 "PP: try EOF\n"); break;
10717 case XML_PARSER_START:
10718 xmlGenericError(xmlGenericErrorContext,
10719 "PP: try START\n"); break;
10720 case XML_PARSER_MISC:
10721 xmlGenericError(xmlGenericErrorContext,
10722 "PP: try MISC\n");break;
10723 case XML_PARSER_COMMENT:
10724 xmlGenericError(xmlGenericErrorContext,
10725 "PP: try COMMENT\n");break;
10726 case XML_PARSER_PROLOG:
10727 xmlGenericError(xmlGenericErrorContext,
10728 "PP: try PROLOG\n");break;
10729 case XML_PARSER_START_TAG:
10730 xmlGenericError(xmlGenericErrorContext,
10731 "PP: try START_TAG\n");break;
10732 case XML_PARSER_CONTENT:
10733 xmlGenericError(xmlGenericErrorContext,
10734 "PP: try CONTENT\n");break;
10735 case XML_PARSER_CDATA_SECTION:
10736 xmlGenericError(xmlGenericErrorContext,
10737 "PP: try CDATA_SECTION\n");break;
10738 case XML_PARSER_END_TAG:
10739 xmlGenericError(xmlGenericErrorContext,
10740 "PP: try END_TAG\n");break;
10741 case XML_PARSER_ENTITY_DECL:
10742 xmlGenericError(xmlGenericErrorContext,
10743 "PP: try ENTITY_DECL\n");break;
10744 case XML_PARSER_ENTITY_VALUE:
10745 xmlGenericError(xmlGenericErrorContext,
10746 "PP: try ENTITY_VALUE\n");break;
10747 case XML_PARSER_ATTRIBUTE_VALUE:
10748 xmlGenericError(xmlGenericErrorContext,
10749 "PP: try ATTRIBUTE_VALUE\n");break;
10750 case XML_PARSER_DTD:
10751 xmlGenericError(xmlGenericErrorContext,
10752 "PP: try DTD\n");break;
10753 case XML_PARSER_EPILOG:
10754 xmlGenericError(xmlGenericErrorContext,
10755 "PP: try EPILOG\n");break;
10756 case XML_PARSER_PI:
10757 xmlGenericError(xmlGenericErrorContext,
10758 "PP: try PI\n");break;
10759 case XML_PARSER_IGNORE:
10760 xmlGenericError(xmlGenericErrorContext,
10761 "PP: try IGNORE\n");break;
10765 if ((ctxt->input != NULL) &&
10766 (ctxt->input->cur - ctxt->input->base > 4096)) {
10768 ctxt->checkIndex = 0;
10770 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10773 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10778 * Pop-up of finished entities.
10780 while ((RAW == 0) && (ctxt->inputNr > 1))
10783 if (ctxt->input == NULL) break;
10784 if (ctxt->input->buf == NULL)
10785 avail = ctxt->input->length -
10786 (ctxt->input->cur - ctxt->input->base);
10789 * If we are operating on converted input, try to flush
10790 * remainng chars to avoid them stalling in the non-converted
10793 if ((ctxt->input->buf->raw != NULL) &&
10794 (ctxt->input->buf->raw->use > 0)) {
10795 int base = ctxt->input->base -
10796 ctxt->input->buf->buffer->content;
10797 int current = ctxt->input->cur - ctxt->input->base;
10799 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10800 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10801 ctxt->input->cur = ctxt->input->base + current;
10803 &ctxt->input->buf->buffer->content[
10804 ctxt->input->buf->buffer->use];
10806 avail = ctxt->input->buf->buffer->use -
10807 (ctxt->input->cur - ctxt->input->base);
10811 switch (ctxt->instate) {
10812 case XML_PARSER_EOF:
10814 * Document parsing is done !
10817 case XML_PARSER_START:
10818 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10820 xmlCharEncoding enc;
10823 * Very first chars read from the document flow.
10829 * Get the 4 first bytes and decode the charset
10830 * if enc != XML_CHAR_ENCODING_NONE
10831 * plug some encoding conversion routines,
10832 * else xmlSwitchEncoding will set to (default)
10839 enc = xmlDetectCharEncoding(start, 4);
10840 xmlSwitchEncoding(ctxt, enc);
10846 cur = ctxt->input->cur[0];
10847 next = ctxt->input->cur[1];
10849 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10850 ctxt->sax->setDocumentLocator(ctxt->userData,
10851 &xmlDefaultSAXLocator);
10852 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10853 ctxt->instate = XML_PARSER_EOF;
10855 xmlGenericError(xmlGenericErrorContext,
10856 "PP: entering EOF\n");
10858 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10859 ctxt->sax->endDocument(ctxt->userData);
10862 if ((cur == '<') && (next == '?')) {
10863 /* PI or XML decl */
10864 if (avail < 5) return(ret);
10865 if ((!terminate) &&
10866 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10868 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10869 ctxt->sax->setDocumentLocator(ctxt->userData,
10870 &xmlDefaultSAXLocator);
10871 if ((ctxt->input->cur[2] == 'x') &&
10872 (ctxt->input->cur[3] == 'm') &&
10873 (ctxt->input->cur[4] == 'l') &&
10874 (IS_BLANK_CH(ctxt->input->cur[5]))) {
10877 xmlGenericError(xmlGenericErrorContext,
10878 "PP: Parsing XML Decl\n");
10880 xmlParseXMLDecl(ctxt);
10881 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10883 * The XML REC instructs us to stop parsing right
10886 ctxt->instate = XML_PARSER_EOF;
10889 ctxt->standalone = ctxt->input->standalone;
10890 if ((ctxt->encoding == NULL) &&
10891 (ctxt->input->encoding != NULL))
10892 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10893 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10894 (!ctxt->disableSAX))
10895 ctxt->sax->startDocument(ctxt->userData);
10896 ctxt->instate = XML_PARSER_MISC;
10898 xmlGenericError(xmlGenericErrorContext,
10899 "PP: entering MISC\n");
10902 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10903 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10904 (!ctxt->disableSAX))
10905 ctxt->sax->startDocument(ctxt->userData);
10906 ctxt->instate = XML_PARSER_MISC;
10908 xmlGenericError(xmlGenericErrorContext,
10909 "PP: entering MISC\n");
10913 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10914 ctxt->sax->setDocumentLocator(ctxt->userData,
10915 &xmlDefaultSAXLocator);
10916 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10917 if (ctxt->version == NULL) {
10918 xmlErrMemory(ctxt, NULL);
10921 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10922 (!ctxt->disableSAX))
10923 ctxt->sax->startDocument(ctxt->userData);
10924 ctxt->instate = XML_PARSER_MISC;
10926 xmlGenericError(xmlGenericErrorContext,
10927 "PP: entering MISC\n");
10931 case XML_PARSER_START_TAG: {
10932 const xmlChar *name;
10933 const xmlChar *prefix = NULL;
10934 const xmlChar *URI = NULL;
10935 int nsNr = ctxt->nsNr;
10937 if ((avail < 2) && (ctxt->inputNr == 1))
10939 cur = ctxt->input->cur[0];
10941 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10942 ctxt->instate = XML_PARSER_EOF;
10943 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10944 ctxt->sax->endDocument(ctxt->userData);
10948 if (ctxt->progressive) {
10949 /* > can be found unescaped in attribute values */
10950 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10952 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10956 if (ctxt->spaceNr == 0)
10957 spacePush(ctxt, -1);
10958 else if (*ctxt->space == -2)
10959 spacePush(ctxt, -1);
10961 spacePush(ctxt, *ctxt->space);
10962 #ifdef LIBXML_SAX1_ENABLED
10964 #endif /* LIBXML_SAX1_ENABLED */
10965 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10966 #ifdef LIBXML_SAX1_ENABLED
10968 name = xmlParseStartTag(ctxt);
10969 #endif /* LIBXML_SAX1_ENABLED */
10970 if (name == NULL) {
10972 ctxt->instate = XML_PARSER_EOF;
10973 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10974 ctxt->sax->endDocument(ctxt->userData);
10977 #ifdef LIBXML_VALID_ENABLED
10979 * [ VC: Root Element Type ]
10980 * The Name in the document type declaration must match
10981 * the element type of the root element.
10983 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10984 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10985 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10986 #endif /* LIBXML_VALID_ENABLED */
10989 * Check for an Empty Element.
10991 if ((RAW == '/') && (NXT(1) == '>')) {
10995 if ((ctxt->sax != NULL) &&
10996 (ctxt->sax->endElementNs != NULL) &&
10997 (!ctxt->disableSAX))
10998 ctxt->sax->endElementNs(ctxt->userData, name,
11000 if (ctxt->nsNr - nsNr > 0)
11001 nsPop(ctxt, ctxt->nsNr - nsNr);
11002 #ifdef LIBXML_SAX1_ENABLED
11004 if ((ctxt->sax != NULL) &&
11005 (ctxt->sax->endElement != NULL) &&
11006 (!ctxt->disableSAX))
11007 ctxt->sax->endElement(ctxt->userData, name);
11008 #endif /* LIBXML_SAX1_ENABLED */
11011 if (ctxt->nameNr == 0) {
11012 ctxt->instate = XML_PARSER_EPILOG;
11014 ctxt->instate = XML_PARSER_CONTENT;
11021 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11022 "Couldn't find end of Start Tag %s\n",
11028 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11029 #ifdef LIBXML_SAX1_ENABLED
11031 namePush(ctxt, name);
11032 #endif /* LIBXML_SAX1_ENABLED */
11034 ctxt->instate = XML_PARSER_CONTENT;
11037 case XML_PARSER_CONTENT: {
11038 const xmlChar *test;
11040 if ((avail < 2) && (ctxt->inputNr == 1))
11042 cur = ctxt->input->cur[0];
11043 next = ctxt->input->cur[1];
11046 cons = ctxt->input->consumed;
11047 if ((cur == '<') && (next == '/')) {
11048 ctxt->instate = XML_PARSER_END_TAG;
11050 } else if ((cur == '<') && (next == '?')) {
11051 if ((!terminate) &&
11052 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11055 } else if ((cur == '<') && (next != '!')) {
11056 ctxt->instate = XML_PARSER_START_TAG;
11058 } else if ((cur == '<') && (next == '!') &&
11059 (ctxt->input->cur[2] == '-') &&
11060 (ctxt->input->cur[3] == '-')) {
11065 ctxt->input->cur += 4;
11066 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11067 ctxt->input->cur -= 4;
11068 if ((!terminate) && (term < 0))
11070 xmlParseComment(ctxt);
11071 ctxt->instate = XML_PARSER_CONTENT;
11072 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11073 (ctxt->input->cur[2] == '[') &&
11074 (ctxt->input->cur[3] == 'C') &&
11075 (ctxt->input->cur[4] == 'D') &&
11076 (ctxt->input->cur[5] == 'A') &&
11077 (ctxt->input->cur[6] == 'T') &&
11078 (ctxt->input->cur[7] == 'A') &&
11079 (ctxt->input->cur[8] == '[')) {
11081 ctxt->instate = XML_PARSER_CDATA_SECTION;
11083 } else if ((cur == '<') && (next == '!') &&
11086 } else if (cur == '&') {
11087 if ((!terminate) &&
11088 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11090 xmlParseReference(ctxt);
11092 /* TODO Avoid the extra copy, handle directly !!! */
11094 * Goal of the following test is:
11095 * - minimize calls to the SAX 'character' callback
11096 * when they are mergeable
11097 * - handle an problem for isBlank when we only parse
11098 * a sequence of blank chars and the next one is
11099 * not available to check against '<' presence.
11100 * - tries to homogenize the differences in SAX
11101 * callbacks between the push and pull versions
11104 if ((ctxt->inputNr == 1) &&
11105 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11107 if (ctxt->progressive) {
11108 if ((lastlt == NULL) ||
11109 (ctxt->input->cur > lastlt))
11111 } else if (xmlParseLookupSequence(ctxt,
11117 ctxt->checkIndex = 0;
11118 xmlParseCharData(ctxt, 0);
11121 * Pop-up of finished entities.
11123 while ((RAW == 0) && (ctxt->inputNr > 1))
11125 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11126 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11127 "detected an error in element content\n");
11128 ctxt->instate = XML_PARSER_EOF;
11133 case XML_PARSER_END_TAG:
11137 if (ctxt->progressive) {
11138 /* > can be found unescaped in attribute values */
11139 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11141 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11146 xmlParseEndTag2(ctxt,
11147 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11148 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11149 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11152 #ifdef LIBXML_SAX1_ENABLED
11154 xmlParseEndTag1(ctxt, 0);
11155 #endif /* LIBXML_SAX1_ENABLED */
11156 if (ctxt->nameNr == 0) {
11157 ctxt->instate = XML_PARSER_EPILOG;
11159 ctxt->instate = XML_PARSER_CONTENT;
11162 case XML_PARSER_CDATA_SECTION: {
11164 * The Push mode need to have the SAX callback for
11165 * cdataBlock merge back contiguous callbacks.
11169 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11171 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11174 tmp = xmlCheckCdataPush(ctxt->input->cur,
11175 XML_PARSER_BIG_BUFFER_SIZE);
11178 ctxt->input->cur += tmp;
11179 goto encoding_error;
11181 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11182 if (ctxt->sax->cdataBlock != NULL)
11183 ctxt->sax->cdataBlock(ctxt->userData,
11184 ctxt->input->cur, tmp);
11185 else if (ctxt->sax->characters != NULL)
11186 ctxt->sax->characters(ctxt->userData,
11187 ctxt->input->cur, tmp);
11190 ctxt->checkIndex = 0;
11196 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11197 if ((tmp < 0) || (tmp != base)) {
11199 ctxt->input->cur += tmp;
11200 goto encoding_error;
11202 if ((ctxt->sax != NULL) && (base == 0) &&
11203 (ctxt->sax->cdataBlock != NULL) &&
11204 (!ctxt->disableSAX)) {
11206 * Special case to provide identical behaviour
11207 * between pull and push parsers on enpty CDATA
11210 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11211 (!strncmp((const char *)&ctxt->input->cur[-9],
11213 ctxt->sax->cdataBlock(ctxt->userData,
11215 } else if ((ctxt->sax != NULL) && (base > 0) &&
11216 (!ctxt->disableSAX)) {
11217 if (ctxt->sax->cdataBlock != NULL)
11218 ctxt->sax->cdataBlock(ctxt->userData,
11219 ctxt->input->cur, base);
11220 else if (ctxt->sax->characters != NULL)
11221 ctxt->sax->characters(ctxt->userData,
11222 ctxt->input->cur, base);
11225 ctxt->checkIndex = 0;
11226 ctxt->instate = XML_PARSER_CONTENT;
11228 xmlGenericError(xmlGenericErrorContext,
11229 "PP: entering CONTENT\n");
11234 case XML_PARSER_MISC:
11236 if (ctxt->input->buf == NULL)
11237 avail = ctxt->input->length -
11238 (ctxt->input->cur - ctxt->input->base);
11240 avail = ctxt->input->buf->buffer->use -
11241 (ctxt->input->cur - ctxt->input->base);
11244 cur = ctxt->input->cur[0];
11245 next = ctxt->input->cur[1];
11246 if ((cur == '<') && (next == '?')) {
11247 if ((!terminate) &&
11248 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11251 xmlGenericError(xmlGenericErrorContext,
11252 "PP: Parsing PI\n");
11255 ctxt->checkIndex = 0;
11256 } else if ((cur == '<') && (next == '!') &&
11257 (ctxt->input->cur[2] == '-') &&
11258 (ctxt->input->cur[3] == '-')) {
11259 if ((!terminate) &&
11260 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11263 xmlGenericError(xmlGenericErrorContext,
11264 "PP: Parsing Comment\n");
11266 xmlParseComment(ctxt);
11267 ctxt->instate = XML_PARSER_MISC;
11268 ctxt->checkIndex = 0;
11269 } else if ((cur == '<') && (next == '!') &&
11270 (ctxt->input->cur[2] == 'D') &&
11271 (ctxt->input->cur[3] == 'O') &&
11272 (ctxt->input->cur[4] == 'C') &&
11273 (ctxt->input->cur[5] == 'T') &&
11274 (ctxt->input->cur[6] == 'Y') &&
11275 (ctxt->input->cur[7] == 'P') &&
11276 (ctxt->input->cur[8] == 'E')) {
11277 if ((!terminate) &&
11278 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11281 xmlGenericError(xmlGenericErrorContext,
11282 "PP: Parsing internal subset\n");
11284 ctxt->inSubset = 1;
11285 xmlParseDocTypeDecl(ctxt);
11287 ctxt->instate = XML_PARSER_DTD;
11289 xmlGenericError(xmlGenericErrorContext,
11290 "PP: entering DTD\n");
11294 * Create and update the external subset.
11296 ctxt->inSubset = 2;
11297 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11298 (ctxt->sax->externalSubset != NULL))
11299 ctxt->sax->externalSubset(ctxt->userData,
11300 ctxt->intSubName, ctxt->extSubSystem,
11302 ctxt->inSubset = 0;
11303 xmlCleanSpecialAttr(ctxt);
11304 ctxt->instate = XML_PARSER_PROLOG;
11306 xmlGenericError(xmlGenericErrorContext,
11307 "PP: entering PROLOG\n");
11310 } else if ((cur == '<') && (next == '!') &&
11314 ctxt->instate = XML_PARSER_START_TAG;
11315 ctxt->progressive = 1;
11316 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11318 xmlGenericError(xmlGenericErrorContext,
11319 "PP: entering START_TAG\n");
11323 case XML_PARSER_PROLOG:
11325 if (ctxt->input->buf == NULL)
11326 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11328 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11331 cur = ctxt->input->cur[0];
11332 next = ctxt->input->cur[1];
11333 if ((cur == '<') && (next == '?')) {
11334 if ((!terminate) &&
11335 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11338 xmlGenericError(xmlGenericErrorContext,
11339 "PP: Parsing PI\n");
11342 } else if ((cur == '<') && (next == '!') &&
11343 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11344 if ((!terminate) &&
11345 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11348 xmlGenericError(xmlGenericErrorContext,
11349 "PP: Parsing Comment\n");
11351 xmlParseComment(ctxt);
11352 ctxt->instate = XML_PARSER_PROLOG;
11353 } else if ((cur == '<') && (next == '!') &&
11357 ctxt->instate = XML_PARSER_START_TAG;
11358 if (ctxt->progressive == 0)
11359 ctxt->progressive = 1;
11360 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11362 xmlGenericError(xmlGenericErrorContext,
11363 "PP: entering START_TAG\n");
11367 case XML_PARSER_EPILOG:
11369 if (ctxt->input->buf == NULL)
11370 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11372 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11375 cur = ctxt->input->cur[0];
11376 next = ctxt->input->cur[1];
11377 if ((cur == '<') && (next == '?')) {
11378 if ((!terminate) &&
11379 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11382 xmlGenericError(xmlGenericErrorContext,
11383 "PP: Parsing PI\n");
11386 ctxt->instate = XML_PARSER_EPILOG;
11387 } else if ((cur == '<') && (next == '!') &&
11388 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11389 if ((!terminate) &&
11390 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11393 xmlGenericError(xmlGenericErrorContext,
11394 "PP: Parsing Comment\n");
11396 xmlParseComment(ctxt);
11397 ctxt->instate = XML_PARSER_EPILOG;
11398 } else if ((cur == '<') && (next == '!') &&
11402 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11403 ctxt->instate = XML_PARSER_EOF;
11405 xmlGenericError(xmlGenericErrorContext,
11406 "PP: entering EOF\n");
11408 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11409 ctxt->sax->endDocument(ctxt->userData);
11413 case XML_PARSER_DTD: {
11415 * Sorry but progressive parsing of the internal subset
11416 * is not expected to be supported. We first check that
11417 * the full content of the internal subset is available and
11418 * the parsing is launched only at that point.
11419 * Internal subset ends up with "']' S? '>'" in an unescaped
11420 * section and not in a ']]>' sequence which are conditional
11421 * sections (whoever argued to keep that crap in XML deserve
11422 * a place in hell !).
11428 base = ctxt->input->cur - ctxt->input->base;
11429 if (base < 0) return(0);
11430 if (ctxt->checkIndex > base)
11431 base = ctxt->checkIndex;
11432 buf = ctxt->input->buf->buffer->content;
11433 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11436 if (buf[base] == quote)
11440 if ((quote == 0) && (buf[base] == '<')) {
11442 /* special handling of comments */
11443 if (((unsigned int) base + 4 <
11444 ctxt->input->buf->buffer->use) &&
11445 (buf[base + 1] == '!') &&
11446 (buf[base + 2] == '-') &&
11447 (buf[base + 3] == '-')) {
11448 for (;(unsigned int) base + 3 <
11449 ctxt->input->buf->buffer->use; base++) {
11450 if ((buf[base] == '-') &&
11451 (buf[base + 1] == '-') &&
11452 (buf[base + 2] == '>')) {
11460 fprintf(stderr, "unfinished comment\n");
11467 if (buf[base] == '"') {
11471 if (buf[base] == '\'') {
11475 if (buf[base] == ']') {
11477 fprintf(stderr, "%c%c%c%c: ", buf[base],
11478 buf[base + 1], buf[base + 2], buf[base + 3]);
11480 if ((unsigned int) base +1 >=
11481 ctxt->input->buf->buffer->use)
11483 if (buf[base + 1] == ']') {
11484 /* conditional crap, skip both ']' ! */
11489 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11491 if (buf[base + i] == '>') {
11493 fprintf(stderr, "found\n");
11495 goto found_end_int_subset;
11497 if (!IS_BLANK_CH(buf[base + i])) {
11499 fprintf(stderr, "not found\n");
11501 goto not_end_of_int_subset;
11505 fprintf(stderr, "end of stream\n");
11510 not_end_of_int_subset:
11511 continue; /* for */
11514 * We didn't found the end of the Internal subset
11518 xmlGenericError(xmlGenericErrorContext,
11519 "PP: lookup of int subset end filed\n");
11523 found_end_int_subset:
11524 xmlParseInternalSubset(ctxt);
11525 ctxt->inSubset = 2;
11526 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11527 (ctxt->sax->externalSubset != NULL))
11528 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11529 ctxt->extSubSystem, ctxt->extSubURI);
11530 ctxt->inSubset = 0;
11531 xmlCleanSpecialAttr(ctxt);
11532 ctxt->instate = XML_PARSER_PROLOG;
11533 ctxt->checkIndex = 0;
11535 xmlGenericError(xmlGenericErrorContext,
11536 "PP: entering PROLOG\n");
11540 case XML_PARSER_COMMENT:
11541 xmlGenericError(xmlGenericErrorContext,
11542 "PP: internal error, state == COMMENT\n");
11543 ctxt->instate = XML_PARSER_CONTENT;
11545 xmlGenericError(xmlGenericErrorContext,
11546 "PP: entering CONTENT\n");
11549 case XML_PARSER_IGNORE:
11550 xmlGenericError(xmlGenericErrorContext,
11551 "PP: internal error, state == IGNORE");
11552 ctxt->instate = XML_PARSER_DTD;
11554 xmlGenericError(xmlGenericErrorContext,
11555 "PP: entering DTD\n");
11558 case XML_PARSER_PI:
11559 xmlGenericError(xmlGenericErrorContext,
11560 "PP: internal error, state == PI\n");
11561 ctxt->instate = XML_PARSER_CONTENT;
11563 xmlGenericError(xmlGenericErrorContext,
11564 "PP: entering CONTENT\n");
11567 case XML_PARSER_ENTITY_DECL:
11568 xmlGenericError(xmlGenericErrorContext,
11569 "PP: internal error, state == ENTITY_DECL\n");
11570 ctxt->instate = XML_PARSER_DTD;
11572 xmlGenericError(xmlGenericErrorContext,
11573 "PP: entering DTD\n");
11576 case XML_PARSER_ENTITY_VALUE:
11577 xmlGenericError(xmlGenericErrorContext,
11578 "PP: internal error, state == ENTITY_VALUE\n");
11579 ctxt->instate = XML_PARSER_CONTENT;
11581 xmlGenericError(xmlGenericErrorContext,
11582 "PP: entering DTD\n");
11585 case XML_PARSER_ATTRIBUTE_VALUE:
11586 xmlGenericError(xmlGenericErrorContext,
11587 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11588 ctxt->instate = XML_PARSER_START_TAG;
11590 xmlGenericError(xmlGenericErrorContext,
11591 "PP: entering START_TAG\n");
11594 case XML_PARSER_SYSTEM_LITERAL:
11595 xmlGenericError(xmlGenericErrorContext,
11596 "PP: internal error, state == SYSTEM_LITERAL\n");
11597 ctxt->instate = XML_PARSER_START_TAG;
11599 xmlGenericError(xmlGenericErrorContext,
11600 "PP: entering START_TAG\n");
11603 case XML_PARSER_PUBLIC_LITERAL:
11604 xmlGenericError(xmlGenericErrorContext,
11605 "PP: internal error, state == PUBLIC_LITERAL\n");
11606 ctxt->instate = XML_PARSER_START_TAG;
11608 xmlGenericError(xmlGenericErrorContext,
11609 "PP: entering START_TAG\n");
11616 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11623 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11624 ctxt->input->cur[0], ctxt->input->cur[1],
11625 ctxt->input->cur[2], ctxt->input->cur[3]);
11626 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11627 "Input is not proper UTF-8, indicate encoding !\n%s",
11628 BAD_CAST buffer, NULL);
11635 * @ctxt: an XML parser context
11636 * @chunk: an char array
11637 * @size: the size in byte of the chunk
11638 * @terminate: last chunk indicator
11640 * Parse a Chunk of memory
11642 * Returns zero if no error, the xmlParserErrors otherwise.
11645 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11651 return(XML_ERR_INTERNAL_ERROR);
11652 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11653 return(ctxt->errNo);
11654 if (ctxt->instate == XML_PARSER_START)
11655 xmlDetectSAX2(ctxt);
11656 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11657 (chunk[size - 1] == '\r')) {
11664 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11665 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11666 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11667 int cur = ctxt->input->cur - ctxt->input->base;
11671 * Specific handling if we autodetected an encoding, we should not
11672 * push more than the first line ... which depend on the encoding
11673 * And only push the rest once the final encoding was detected
11675 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11676 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
11677 unsigned int len = 45;
11679 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11680 BAD_CAST "UTF-16")) ||
11681 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11682 BAD_CAST "UTF16")))
11684 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11685 BAD_CAST "UCS-4")) ||
11686 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11690 if (ctxt->input->buf->rawconsumed < len)
11691 len -= ctxt->input->buf->rawconsumed;
11694 * Change size for reading the initial declaration only
11695 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11696 * will blindly copy extra bytes from memory.
11698 if ((unsigned int) size > len) {
11699 remain = size - len;
11705 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11707 ctxt->errNo = XML_PARSER_EOF;
11708 ctxt->disableSAX = 1;
11709 return (XML_PARSER_EOF);
11711 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11712 ctxt->input->cur = ctxt->input->base + cur;
11714 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11716 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11719 } else if (ctxt->instate != XML_PARSER_EOF) {
11720 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11721 xmlParserInputBufferPtr in = ctxt->input->buf;
11722 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11723 (in->raw != NULL)) {
11726 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11729 xmlGenericError(xmlGenericErrorContext,
11730 "xmlParseChunk: encoder error\n");
11731 return(XML_ERR_INVALID_ENCODING);
11737 xmlParseTryOrFinish(ctxt, 0);
11739 xmlParseTryOrFinish(ctxt, terminate);
11740 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11741 return(ctxt->errNo);
11749 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11750 (ctxt->input->buf != NULL)) {
11751 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11755 * Check for termination
11759 if (ctxt->input != NULL) {
11760 if (ctxt->input->buf == NULL)
11761 avail = ctxt->input->length -
11762 (ctxt->input->cur - ctxt->input->base);
11764 avail = ctxt->input->buf->buffer->use -
11765 (ctxt->input->cur - ctxt->input->base);
11768 if ((ctxt->instate != XML_PARSER_EOF) &&
11769 (ctxt->instate != XML_PARSER_EPILOG)) {
11770 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11772 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
11773 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11775 if (ctxt->instate != XML_PARSER_EOF) {
11776 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11777 ctxt->sax->endDocument(ctxt->userData);
11779 ctxt->instate = XML_PARSER_EOF;
11781 return((xmlParserErrors) ctxt->errNo);
11784 /************************************************************************
11786 * I/O front end functions to the parser *
11788 ************************************************************************/
11791 * xmlCreatePushParserCtxt:
11792 * @sax: a SAX handler
11793 * @user_data: The user data returned on SAX callbacks
11794 * @chunk: a pointer to an array of chars
11795 * @size: number of chars in the array
11796 * @filename: an optional file name or URI
11798 * Create a parser context for using the XML parser in push mode.
11799 * If @buffer and @size are non-NULL, the data is used to detect
11800 * the encoding. The remaining characters will be parsed so they
11801 * don't need to be fed in again through xmlParseChunk.
11802 * To allow content encoding detection, @size should be >= 4
11803 * The value of @filename is used for fetching external entities
11804 * and error/warning reports.
11806 * Returns the new parser context or NULL
11810 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11811 const char *chunk, int size, const char *filename) {
11812 xmlParserCtxtPtr ctxt;
11813 xmlParserInputPtr inputStream;
11814 xmlParserInputBufferPtr buf;
11815 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11818 * plug some encoding conversion routines
11820 if ((chunk != NULL) && (size >= 4))
11821 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11823 buf = xmlAllocParserInputBuffer(enc);
11824 if (buf == NULL) return(NULL);
11826 ctxt = xmlNewParserCtxt();
11827 if (ctxt == NULL) {
11828 xmlErrMemory(NULL, "creating parser: out of memory\n");
11829 xmlFreeParserInputBuffer(buf);
11832 ctxt->dictNames = 1;
11833 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11834 if (ctxt->pushTab == NULL) {
11835 xmlErrMemory(ctxt, NULL);
11836 xmlFreeParserInputBuffer(buf);
11837 xmlFreeParserCtxt(ctxt);
11841 #ifdef LIBXML_SAX1_ENABLED
11842 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11843 #endif /* LIBXML_SAX1_ENABLED */
11844 xmlFree(ctxt->sax);
11845 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11846 if (ctxt->sax == NULL) {
11847 xmlErrMemory(ctxt, NULL);
11848 xmlFreeParserInputBuffer(buf);
11849 xmlFreeParserCtxt(ctxt);
11852 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11853 if (sax->initialized == XML_SAX2_MAGIC)
11854 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11856 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11857 if (user_data != NULL)
11858 ctxt->userData = user_data;
11860 if (filename == NULL) {
11861 ctxt->directory = NULL;
11863 ctxt->directory = xmlParserGetDirectory(filename);
11866 inputStream = xmlNewInputStream(ctxt);
11867 if (inputStream == NULL) {
11868 xmlFreeParserCtxt(ctxt);
11869 xmlFreeParserInputBuffer(buf);
11873 if (filename == NULL)
11874 inputStream->filename = NULL;
11876 inputStream->filename = (char *)
11877 xmlCanonicPath((const xmlChar *) filename);
11878 if (inputStream->filename == NULL) {
11879 xmlFreeParserCtxt(ctxt);
11880 xmlFreeParserInputBuffer(buf);
11884 inputStream->buf = buf;
11885 inputStream->base = inputStream->buf->buffer->content;
11886 inputStream->cur = inputStream->buf->buffer->content;
11888 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11890 inputPush(ctxt, inputStream);
11893 * If the caller didn't provide an initial 'chunk' for determining
11894 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11895 * that it can be automatically determined later
11897 if ((size == 0) || (chunk == NULL)) {
11898 ctxt->charset = XML_CHAR_ENCODING_NONE;
11899 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
11900 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11901 int cur = ctxt->input->cur - ctxt->input->base;
11903 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11905 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11906 ctxt->input->cur = ctxt->input->base + cur;
11908 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11910 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11914 if (enc != XML_CHAR_ENCODING_NONE) {
11915 xmlSwitchEncoding(ctxt, enc);
11920 #endif /* LIBXML_PUSH_ENABLED */
11924 * @ctxt: an XML parser context
11926 * Blocks further parser processing
11929 xmlStopParser(xmlParserCtxtPtr ctxt) {
11932 ctxt->instate = XML_PARSER_EOF;
11933 ctxt->disableSAX = 1;
11934 if (ctxt->input != NULL) {
11935 ctxt->input->cur = BAD_CAST"";
11936 ctxt->input->base = ctxt->input->cur;
11941 * xmlCreateIOParserCtxt:
11942 * @sax: a SAX handler
11943 * @user_data: The user data returned on SAX callbacks
11944 * @ioread: an I/O read function
11945 * @ioclose: an I/O close function
11946 * @ioctx: an I/O handler
11947 * @enc: the charset encoding if known
11949 * Create a parser context for using the XML parser with an existing
11952 * Returns the new parser context or NULL
11955 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11956 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11957 void *ioctx, xmlCharEncoding enc) {
11958 xmlParserCtxtPtr ctxt;
11959 xmlParserInputPtr inputStream;
11960 xmlParserInputBufferPtr buf;
11962 if (ioread == NULL) return(NULL);
11964 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11965 if (buf == NULL) return(NULL);
11967 ctxt = xmlNewParserCtxt();
11968 if (ctxt == NULL) {
11969 xmlFreeParserInputBuffer(buf);
11973 #ifdef LIBXML_SAX1_ENABLED
11974 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11975 #endif /* LIBXML_SAX1_ENABLED */
11976 xmlFree(ctxt->sax);
11977 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11978 if (ctxt->sax == NULL) {
11979 xmlErrMemory(ctxt, NULL);
11980 xmlFreeParserCtxt(ctxt);
11983 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11984 if (sax->initialized == XML_SAX2_MAGIC)
11985 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11987 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11988 if (user_data != NULL)
11989 ctxt->userData = user_data;
11992 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11993 if (inputStream == NULL) {
11994 xmlFreeParserCtxt(ctxt);
11997 inputPush(ctxt, inputStream);
12002 #ifdef LIBXML_VALID_ENABLED
12003 /************************************************************************
12005 * Front ends when parsing a DTD *
12007 ************************************************************************/
12011 * @sax: the SAX handler block or NULL
12012 * @input: an Input Buffer
12013 * @enc: the charset encoding if known
12015 * Load and parse a DTD
12017 * Returns the resulting xmlDtdPtr or NULL in case of error.
12018 * @input will be freed by the function in any case.
12022 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12023 xmlCharEncoding enc) {
12024 xmlDtdPtr ret = NULL;
12025 xmlParserCtxtPtr ctxt;
12026 xmlParserInputPtr pinput = NULL;
12032 ctxt = xmlNewParserCtxt();
12033 if (ctxt == NULL) {
12034 xmlFreeParserInputBuffer(input);
12039 * Set-up the SAX context
12042 if (ctxt->sax != NULL)
12043 xmlFree(ctxt->sax);
12045 ctxt->userData = ctxt;
12047 xmlDetectSAX2(ctxt);
12050 * generate a parser input from the I/O handler
12053 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12054 if (pinput == NULL) {
12055 if (sax != NULL) ctxt->sax = NULL;
12056 xmlFreeParserInputBuffer(input);
12057 xmlFreeParserCtxt(ctxt);
12062 * plug some encoding conversion routines here.
12064 if (xmlPushInput(ctxt, pinput) < 0) {
12065 if (sax != NULL) ctxt->sax = NULL;
12066 xmlFreeParserCtxt(ctxt);
12069 if (enc != XML_CHAR_ENCODING_NONE) {
12070 xmlSwitchEncoding(ctxt, enc);
12073 pinput->filename = NULL;
12076 pinput->base = ctxt->input->cur;
12077 pinput->cur = ctxt->input->cur;
12078 pinput->free = NULL;
12081 * let's parse that entity knowing it's an external subset.
12083 ctxt->inSubset = 2;
12084 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12085 if (ctxt->myDoc == NULL) {
12086 xmlErrMemory(ctxt, "New Doc failed");
12089 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12090 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12091 BAD_CAST "none", BAD_CAST "none");
12093 if ((enc == XML_CHAR_ENCODING_NONE) &&
12094 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12096 * Get the 4 first bytes and decode the charset
12097 * if enc != XML_CHAR_ENCODING_NONE
12098 * plug some encoding conversion routines.
12104 enc = xmlDetectCharEncoding(start, 4);
12105 if (enc != XML_CHAR_ENCODING_NONE) {
12106 xmlSwitchEncoding(ctxt, enc);
12110 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12112 if (ctxt->myDoc != NULL) {
12113 if (ctxt->wellFormed) {
12114 ret = ctxt->myDoc->extSubset;
12115 ctxt->myDoc->extSubset = NULL;
12120 tmp = ret->children;
12121 while (tmp != NULL) {
12129 xmlFreeDoc(ctxt->myDoc);
12130 ctxt->myDoc = NULL;
12132 if (sax != NULL) ctxt->sax = NULL;
12133 xmlFreeParserCtxt(ctxt);
12140 * @sax: the SAX handler block
12141 * @ExternalID: a NAME* containing the External ID of the DTD
12142 * @SystemID: a NAME* containing the URL to the DTD
12144 * Load and parse an external subset.
12146 * Returns the resulting xmlDtdPtr or NULL in case of error.
12150 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12151 const xmlChar *SystemID) {
12152 xmlDtdPtr ret = NULL;
12153 xmlParserCtxtPtr ctxt;
12154 xmlParserInputPtr input = NULL;
12155 xmlCharEncoding enc;
12156 xmlChar* systemIdCanonic;
12158 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12160 ctxt = xmlNewParserCtxt();
12161 if (ctxt == NULL) {
12166 * Set-up the SAX context
12169 if (ctxt->sax != NULL)
12170 xmlFree(ctxt->sax);
12172 ctxt->userData = ctxt;
12176 * Canonicalise the system ID
12178 systemIdCanonic = xmlCanonicPath(SystemID);
12179 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12180 xmlFreeParserCtxt(ctxt);
12185 * Ask the Entity resolver to load the damn thing
12188 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12189 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12191 if (input == NULL) {
12192 if (sax != NULL) ctxt->sax = NULL;
12193 xmlFreeParserCtxt(ctxt);
12194 if (systemIdCanonic != NULL)
12195 xmlFree(systemIdCanonic);
12200 * plug some encoding conversion routines here.
12202 if (xmlPushInput(ctxt, input) < 0) {
12203 if (sax != NULL) ctxt->sax = NULL;
12204 xmlFreeParserCtxt(ctxt);
12205 if (systemIdCanonic != NULL)
12206 xmlFree(systemIdCanonic);
12209 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12210 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12211 xmlSwitchEncoding(ctxt, enc);
12214 if (input->filename == NULL)
12215 input->filename = (char *) systemIdCanonic;
12217 xmlFree(systemIdCanonic);
12220 input->base = ctxt->input->cur;
12221 input->cur = ctxt->input->cur;
12222 input->free = NULL;
12225 * let's parse that entity knowing it's an external subset.
12227 ctxt->inSubset = 2;
12228 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12229 if (ctxt->myDoc == NULL) {
12230 xmlErrMemory(ctxt, "New Doc failed");
12231 if (sax != NULL) ctxt->sax = NULL;
12232 xmlFreeParserCtxt(ctxt);
12235 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12236 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12237 ExternalID, SystemID);
12238 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12240 if (ctxt->myDoc != NULL) {
12241 if (ctxt->wellFormed) {
12242 ret = ctxt->myDoc->extSubset;
12243 ctxt->myDoc->extSubset = NULL;
12248 tmp = ret->children;
12249 while (tmp != NULL) {
12257 xmlFreeDoc(ctxt->myDoc);
12258 ctxt->myDoc = NULL;
12260 if (sax != NULL) ctxt->sax = NULL;
12261 xmlFreeParserCtxt(ctxt);
12269 * @ExternalID: a NAME* containing the External ID of the DTD
12270 * @SystemID: a NAME* containing the URL to the DTD
12272 * Load and parse an external subset.
12274 * Returns the resulting xmlDtdPtr or NULL in case of error.
12278 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12279 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12281 #endif /* LIBXML_VALID_ENABLED */
12283 /************************************************************************
12285 * Front ends when parsing an Entity *
12287 ************************************************************************/
12290 * xmlParseCtxtExternalEntity:
12291 * @ctx: the existing parsing context
12292 * @URL: the URL for the entity to load
12293 * @ID: the System ID for the entity to load
12294 * @lst: the return value for the set of parsed nodes
12296 * Parse an external general entity within an existing parsing context
12297 * An external general parsed entity is well-formed if it matches the
12298 * production labeled extParsedEnt.
12300 * [78] extParsedEnt ::= TextDecl? content
12302 * Returns 0 if the entity is well formed, -1 in case of args problem and
12303 * the parser error code otherwise
12307 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12308 const xmlChar *ID, xmlNodePtr *lst) {
12309 xmlParserCtxtPtr ctxt;
12311 xmlNodePtr newRoot;
12312 xmlSAXHandlerPtr oldsax = NULL;
12315 xmlCharEncoding enc;
12317 if (ctx == NULL) return(-1);
12319 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12320 (ctx->depth > 1024)) {
12321 return(XML_ERR_ENTITY_LOOP);
12326 if ((URL == NULL) && (ID == NULL))
12328 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12331 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12332 if (ctxt == NULL) {
12336 oldsax = ctxt->sax;
12337 ctxt->sax = ctx->sax;
12338 xmlDetectSAX2(ctxt);
12339 newDoc = xmlNewDoc(BAD_CAST "1.0");
12340 if (newDoc == NULL) {
12341 xmlFreeParserCtxt(ctxt);
12344 newDoc->properties = XML_DOC_INTERNAL;
12345 if (ctx->myDoc->dict) {
12346 newDoc->dict = ctx->myDoc->dict;
12347 xmlDictReference(newDoc->dict);
12349 if (ctx->myDoc != NULL) {
12350 newDoc->intSubset = ctx->myDoc->intSubset;
12351 newDoc->extSubset = ctx->myDoc->extSubset;
12353 if (ctx->myDoc->URL != NULL) {
12354 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12356 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12357 if (newRoot == NULL) {
12358 ctxt->sax = oldsax;
12359 xmlFreeParserCtxt(ctxt);
12360 newDoc->intSubset = NULL;
12361 newDoc->extSubset = NULL;
12362 xmlFreeDoc(newDoc);
12365 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12366 nodePush(ctxt, newDoc->children);
12367 if (ctx->myDoc == NULL) {
12368 ctxt->myDoc = newDoc;
12370 ctxt->myDoc = ctx->myDoc;
12371 newDoc->children->doc = ctx->myDoc;
12375 * Get the 4 first bytes and decode the charset
12376 * if enc != XML_CHAR_ENCODING_NONE
12377 * plug some encoding conversion routines.
12380 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12385 enc = xmlDetectCharEncoding(start, 4);
12386 if (enc != XML_CHAR_ENCODING_NONE) {
12387 xmlSwitchEncoding(ctxt, enc);
12392 * Parse a possible text declaration first
12394 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12395 xmlParseTextDecl(ctxt);
12397 * An XML-1.0 document can't reference an entity not XML-1.0
12399 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12400 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12401 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12402 "Version mismatch between document and entity\n");
12407 * Doing validity checking on chunk doesn't make sense
12409 ctxt->instate = XML_PARSER_CONTENT;
12410 ctxt->validate = ctx->validate;
12411 ctxt->valid = ctx->valid;
12412 ctxt->loadsubset = ctx->loadsubset;
12413 ctxt->depth = ctx->depth + 1;
12414 ctxt->replaceEntities = ctx->replaceEntities;
12415 if (ctxt->validate) {
12416 ctxt->vctxt.error = ctx->vctxt.error;
12417 ctxt->vctxt.warning = ctx->vctxt.warning;
12419 ctxt->vctxt.error = NULL;
12420 ctxt->vctxt.warning = NULL;
12422 ctxt->vctxt.nodeTab = NULL;
12423 ctxt->vctxt.nodeNr = 0;
12424 ctxt->vctxt.nodeMax = 0;
12425 ctxt->vctxt.node = NULL;
12426 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12427 ctxt->dict = ctx->dict;
12428 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12429 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12430 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12431 ctxt->dictNames = ctx->dictNames;
12432 ctxt->attsDefault = ctx->attsDefault;
12433 ctxt->attsSpecial = ctx->attsSpecial;
12434 ctxt->linenumbers = ctx->linenumbers;
12436 xmlParseContent(ctxt);
12438 ctx->validate = ctxt->validate;
12439 ctx->valid = ctxt->valid;
12440 if ((RAW == '<') && (NXT(1) == '/')) {
12441 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12442 } else if (RAW != 0) {
12443 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12445 if (ctxt->node != newDoc->children) {
12446 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12449 if (!ctxt->wellFormed) {
12450 if (ctxt->errNo == 0)
12459 * Return the newly created nodeset after unlinking it from
12460 * they pseudo parent.
12462 cur = newDoc->children->children;
12464 while (cur != NULL) {
12465 cur->parent = NULL;
12468 newDoc->children->children = NULL;
12472 ctxt->sax = oldsax;
12474 ctxt->attsDefault = NULL;
12475 ctxt->attsSpecial = NULL;
12476 xmlFreeParserCtxt(ctxt);
12477 newDoc->intSubset = NULL;
12478 newDoc->extSubset = NULL;
12479 xmlFreeDoc(newDoc);
12485 * xmlParseExternalEntityPrivate:
12486 * @doc: the document the chunk pertains to
12487 * @oldctxt: the previous parser context if available
12488 * @sax: the SAX handler bloc (possibly NULL)
12489 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12490 * @depth: Used for loop detection, use 0
12491 * @URL: the URL for the entity to load
12492 * @ID: the System ID for the entity to load
12493 * @list: the return value for the set of parsed nodes
12495 * Private version of xmlParseExternalEntity()
12497 * Returns 0 if the entity is well formed, -1 in case of args problem and
12498 * the parser error code otherwise
12501 static xmlParserErrors
12502 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12503 xmlSAXHandlerPtr sax,
12504 void *user_data, int depth, const xmlChar *URL,
12505 const xmlChar *ID, xmlNodePtr *list) {
12506 xmlParserCtxtPtr ctxt;
12508 xmlNodePtr newRoot;
12509 xmlSAXHandlerPtr oldsax = NULL;
12510 xmlParserErrors ret = XML_ERR_OK;
12512 xmlCharEncoding enc;
12514 if (((depth > 40) &&
12515 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12517 return(XML_ERR_ENTITY_LOOP);
12522 if ((URL == NULL) && (ID == NULL))
12523 return(XML_ERR_INTERNAL_ERROR);
12525 return(XML_ERR_INTERNAL_ERROR);
12528 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
12529 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12530 ctxt->userData = ctxt;
12531 if (oldctxt != NULL) {
12532 ctxt->_private = oldctxt->_private;
12533 ctxt->loadsubset = oldctxt->loadsubset;
12534 ctxt->validate = oldctxt->validate;
12535 ctxt->external = oldctxt->external;
12536 ctxt->record_info = oldctxt->record_info;
12537 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12538 ctxt->node_seq.length = oldctxt->node_seq.length;
12539 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12542 * Doing validity checking on chunk without context
12543 * doesn't make sense
12545 ctxt->_private = NULL;
12546 ctxt->validate = 0;
12547 ctxt->external = 2;
12548 ctxt->loadsubset = 0;
12551 oldsax = ctxt->sax;
12553 if (user_data != NULL)
12554 ctxt->userData = user_data;
12556 xmlDetectSAX2(ctxt);
12557 newDoc = xmlNewDoc(BAD_CAST "1.0");
12558 if (newDoc == NULL) {
12559 ctxt->node_seq.maximum = 0;
12560 ctxt->node_seq.length = 0;
12561 ctxt->node_seq.buffer = NULL;
12562 xmlFreeParserCtxt(ctxt);
12563 return(XML_ERR_INTERNAL_ERROR);
12565 newDoc->properties = XML_DOC_INTERNAL;
12566 newDoc->intSubset = doc->intSubset;
12567 newDoc->extSubset = doc->extSubset;
12568 newDoc->dict = doc->dict;
12569 xmlDictReference(newDoc->dict);
12571 if (doc->URL != NULL) {
12572 newDoc->URL = xmlStrdup(doc->URL);
12574 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12575 if (newRoot == NULL) {
12577 ctxt->sax = oldsax;
12578 ctxt->node_seq.maximum = 0;
12579 ctxt->node_seq.length = 0;
12580 ctxt->node_seq.buffer = NULL;
12581 xmlFreeParserCtxt(ctxt);
12582 newDoc->intSubset = NULL;
12583 newDoc->extSubset = NULL;
12584 xmlFreeDoc(newDoc);
12585 return(XML_ERR_INTERNAL_ERROR);
12587 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12588 nodePush(ctxt, newDoc->children);
12590 newRoot->doc = doc;
12593 * Get the 4 first bytes and decode the charset
12594 * if enc != XML_CHAR_ENCODING_NONE
12595 * plug some encoding conversion routines.
12598 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12603 enc = xmlDetectCharEncoding(start, 4);
12604 if (enc != XML_CHAR_ENCODING_NONE) {
12605 xmlSwitchEncoding(ctxt, enc);
12610 * Parse a possible text declaration first
12612 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12613 xmlParseTextDecl(ctxt);
12616 ctxt->instate = XML_PARSER_CONTENT;
12617 ctxt->depth = depth;
12619 xmlParseContent(ctxt);
12621 if ((RAW == '<') && (NXT(1) == '/')) {
12622 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12623 } else if (RAW != 0) {
12624 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12626 if (ctxt->node != newDoc->children) {
12627 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12630 if (!ctxt->wellFormed) {
12631 if (ctxt->errNo == 0)
12632 ret = XML_ERR_INTERNAL_ERROR;
12634 ret = (xmlParserErrors)ctxt->errNo;
12636 if (list != NULL) {
12640 * Return the newly created nodeset after unlinking it from
12641 * they pseudo parent.
12643 cur = newDoc->children->children;
12645 while (cur != NULL) {
12646 cur->parent = NULL;
12649 newDoc->children->children = NULL;
12655 * Record in the parent context the number of entities replacement
12656 * done when parsing that reference.
12658 if (oldctxt != NULL)
12659 oldctxt->nbentities += ctxt->nbentities;
12662 * Also record the size of the entity parsed
12664 if (ctxt->input != NULL) {
12665 oldctxt->sizeentities += ctxt->input->consumed;
12666 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12669 * And record the last error if any
12671 if (ctxt->lastError.code != XML_ERR_OK)
12672 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12675 ctxt->sax = oldsax;
12676 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12677 oldctxt->node_seq.length = ctxt->node_seq.length;
12678 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12679 ctxt->node_seq.maximum = 0;
12680 ctxt->node_seq.length = 0;
12681 ctxt->node_seq.buffer = NULL;
12682 xmlFreeParserCtxt(ctxt);
12683 newDoc->intSubset = NULL;
12684 newDoc->extSubset = NULL;
12685 xmlFreeDoc(newDoc);
12690 #ifdef LIBXML_SAX1_ENABLED
12692 * xmlParseExternalEntity:
12693 * @doc: the document the chunk pertains to
12694 * @sax: the SAX handler bloc (possibly NULL)
12695 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12696 * @depth: Used for loop detection, use 0
12697 * @URL: the URL for the entity to load
12698 * @ID: the System ID for the entity to load
12699 * @lst: the return value for the set of parsed nodes
12701 * Parse an external general entity
12702 * An external general parsed entity is well-formed if it matches the
12703 * production labeled extParsedEnt.
12705 * [78] extParsedEnt ::= TextDecl? content
12707 * Returns 0 if the entity is well formed, -1 in case of args problem and
12708 * the parser error code otherwise
12712 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12713 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12714 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12719 * xmlParseBalancedChunkMemory:
12720 * @doc: the document the chunk pertains to
12721 * @sax: the SAX handler bloc (possibly NULL)
12722 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12723 * @depth: Used for loop detection, use 0
12724 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12725 * @lst: the return value for the set of parsed nodes
12727 * Parse a well-balanced chunk of an XML document
12728 * called by the parser
12729 * The allowed sequence for the Well Balanced Chunk is the one defined by
12730 * the content production in the XML grammar:
12732 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12734 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12735 * the parser error code otherwise
12739 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12740 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12741 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12742 depth, string, lst, 0 );
12744 #endif /* LIBXML_SAX1_ENABLED */
12747 * xmlParseBalancedChunkMemoryInternal:
12748 * @oldctxt: the existing parsing context
12749 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12750 * @user_data: the user data field for the parser context
12751 * @lst: the return value for the set of parsed nodes
12754 * Parse a well-balanced chunk of an XML document
12755 * called by the parser
12756 * The allowed sequence for the Well Balanced Chunk is the one defined by
12757 * the content production in the XML grammar:
12759 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12761 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12762 * error code otherwise
12764 * In case recover is set to 1, the nodelist will not be empty even if
12765 * the parsed chunk is not well balanced.
12767 static xmlParserErrors
12768 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12769 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12770 xmlParserCtxtPtr ctxt;
12771 xmlDocPtr newDoc = NULL;
12772 xmlNodePtr newRoot;
12773 xmlSAXHandlerPtr oldsax = NULL;
12774 xmlNodePtr content = NULL;
12775 xmlNodePtr last = NULL;
12777 xmlParserErrors ret = XML_ERR_OK;
12782 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12783 (oldctxt->depth > 1024)) {
12784 return(XML_ERR_ENTITY_LOOP);
12790 if (string == NULL)
12791 return(XML_ERR_INTERNAL_ERROR);
12793 size = xmlStrlen(string);
12795 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12796 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12797 if (user_data != NULL)
12798 ctxt->userData = user_data;
12800 ctxt->userData = ctxt;
12801 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12802 ctxt->dict = oldctxt->dict;
12803 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12804 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12805 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12808 /* propagate namespaces down the entity */
12809 for (i = 0;i < oldctxt->nsNr;i += 2) {
12810 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12814 oldsax = ctxt->sax;
12815 ctxt->sax = oldctxt->sax;
12816 xmlDetectSAX2(ctxt);
12817 ctxt->replaceEntities = oldctxt->replaceEntities;
12818 ctxt->options = oldctxt->options;
12820 ctxt->_private = oldctxt->_private;
12821 if (oldctxt->myDoc == NULL) {
12822 newDoc = xmlNewDoc(BAD_CAST "1.0");
12823 if (newDoc == NULL) {
12824 ctxt->sax = oldsax;
12826 xmlFreeParserCtxt(ctxt);
12827 return(XML_ERR_INTERNAL_ERROR);
12829 newDoc->properties = XML_DOC_INTERNAL;
12830 newDoc->dict = ctxt->dict;
12831 xmlDictReference(newDoc->dict);
12832 ctxt->myDoc = newDoc;
12834 ctxt->myDoc = oldctxt->myDoc;
12835 content = ctxt->myDoc->children;
12836 last = ctxt->myDoc->last;
12838 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12839 if (newRoot == NULL) {
12840 ctxt->sax = oldsax;
12842 xmlFreeParserCtxt(ctxt);
12843 if (newDoc != NULL) {
12844 xmlFreeDoc(newDoc);
12846 return(XML_ERR_INTERNAL_ERROR);
12848 ctxt->myDoc->children = NULL;
12849 ctxt->myDoc->last = NULL;
12850 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
12851 nodePush(ctxt, ctxt->myDoc->children);
12852 ctxt->instate = XML_PARSER_CONTENT;
12853 ctxt->depth = oldctxt->depth + 1;
12855 ctxt->validate = 0;
12856 ctxt->loadsubset = oldctxt->loadsubset;
12857 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12859 * ID/IDREF registration will be done in xmlValidateElement below
12861 ctxt->loadsubset |= XML_SKIP_IDS;
12863 ctxt->dictNames = oldctxt->dictNames;
12864 ctxt->attsDefault = oldctxt->attsDefault;
12865 ctxt->attsSpecial = oldctxt->attsSpecial;
12867 xmlParseContent(ctxt);
12868 if ((RAW == '<') && (NXT(1) == '/')) {
12869 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12870 } else if (RAW != 0) {
12871 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12873 if (ctxt->node != ctxt->myDoc->children) {
12874 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12877 if (!ctxt->wellFormed) {
12878 if (ctxt->errNo == 0)
12879 ret = XML_ERR_INTERNAL_ERROR;
12881 ret = (xmlParserErrors)ctxt->errNo;
12886 if ((lst != NULL) && (ret == XML_ERR_OK)) {
12890 * Return the newly created nodeset after unlinking it from
12891 * they pseudo parent.
12893 cur = ctxt->myDoc->children->children;
12895 while (cur != NULL) {
12896 #ifdef LIBXML_VALID_ENABLED
12897 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12898 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12899 (cur->type == XML_ELEMENT_NODE)) {
12900 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12901 oldctxt->myDoc, cur);
12903 #endif /* LIBXML_VALID_ENABLED */
12904 cur->parent = NULL;
12907 ctxt->myDoc->children->children = NULL;
12909 if (ctxt->myDoc != NULL) {
12910 xmlFreeNode(ctxt->myDoc->children);
12911 ctxt->myDoc->children = content;
12912 ctxt->myDoc->last = last;
12916 * Record in the parent context the number of entities replacement
12917 * done when parsing that reference.
12919 if (oldctxt != NULL)
12920 oldctxt->nbentities += ctxt->nbentities;
12923 * Also record the last error if any
12925 if (ctxt->lastError.code != XML_ERR_OK)
12926 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12928 ctxt->sax = oldsax;
12930 ctxt->attsDefault = NULL;
12931 ctxt->attsSpecial = NULL;
12932 xmlFreeParserCtxt(ctxt);
12933 if (newDoc != NULL) {
12934 xmlFreeDoc(newDoc);
12941 * xmlParseInNodeContext:
12942 * @node: the context node
12943 * @data: the input string
12944 * @datalen: the input string length in bytes
12945 * @options: a combination of xmlParserOption
12946 * @lst: the return value for the set of parsed nodes
12948 * Parse a well-balanced chunk of an XML document
12949 * within the context (DTD, namespaces, etc ...) of the given node.
12951 * The allowed sequence for the data is a Well Balanced Chunk defined by
12952 * the content production in the XML grammar:
12954 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12956 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12957 * error code otherwise
12960 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12961 int options, xmlNodePtr *lst) {
12963 xmlParserCtxtPtr ctxt;
12964 xmlDocPtr doc = NULL;
12965 xmlNodePtr fake, cur;
12968 xmlParserErrors ret = XML_ERR_OK;
12971 * check all input parameters, grab the document
12973 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12974 return(XML_ERR_INTERNAL_ERROR);
12975 switch (node->type) {
12976 case XML_ELEMENT_NODE:
12977 case XML_ATTRIBUTE_NODE:
12978 case XML_TEXT_NODE:
12979 case XML_CDATA_SECTION_NODE:
12980 case XML_ENTITY_REF_NODE:
12982 case XML_COMMENT_NODE:
12983 case XML_DOCUMENT_NODE:
12984 case XML_HTML_DOCUMENT_NODE:
12987 return(XML_ERR_INTERNAL_ERROR);
12990 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12991 (node->type != XML_DOCUMENT_NODE) &&
12992 (node->type != XML_HTML_DOCUMENT_NODE))
12993 node = node->parent;
12995 return(XML_ERR_INTERNAL_ERROR);
12996 if (node->type == XML_ELEMENT_NODE)
12999 doc = (xmlDocPtr) node;
13001 return(XML_ERR_INTERNAL_ERROR);
13004 * allocate a context and set-up everything not related to the
13005 * node position in the tree
13007 if (doc->type == XML_DOCUMENT_NODE)
13008 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13009 #ifdef LIBXML_HTML_ENABLED
13010 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13011 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13013 * When parsing in context, it makes no sense to add implied
13014 * elements like html/body/etc...
13016 options |= HTML_PARSE_NOIMPLIED;
13020 return(XML_ERR_INTERNAL_ERROR);
13023 return(XML_ERR_NO_MEMORY);
13026 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13027 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13028 * we must wait until the last moment to free the original one.
13030 if (doc->dict != NULL) {
13031 if (ctxt->dict != NULL)
13032 xmlDictFree(ctxt->dict);
13033 ctxt->dict = doc->dict;
13035 options |= XML_PARSE_NODICT;
13037 if (doc->encoding != NULL) {
13038 xmlCharEncodingHandlerPtr hdlr;
13040 if (ctxt->encoding != NULL)
13041 xmlFree((xmlChar *) ctxt->encoding);
13042 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13044 hdlr = xmlFindCharEncodingHandler(doc->encoding);
13045 if (hdlr != NULL) {
13046 xmlSwitchToEncoding(ctxt, hdlr);
13048 return(XML_ERR_UNSUPPORTED_ENCODING);
13052 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13053 xmlDetectSAX2(ctxt);
13056 fake = xmlNewComment(NULL);
13057 if (fake == NULL) {
13058 xmlFreeParserCtxt(ctxt);
13059 return(XML_ERR_NO_MEMORY);
13061 xmlAddChild(node, fake);
13063 if (node->type == XML_ELEMENT_NODE) {
13064 nodePush(ctxt, node);
13066 * initialize the SAX2 namespaces stack
13069 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13070 xmlNsPtr ns = cur->nsDef;
13071 const xmlChar *iprefix, *ihref;
13073 while (ns != NULL) {
13075 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13076 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13078 iprefix = ns->prefix;
13082 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13083 nsPush(ctxt, iprefix, ihref);
13090 ctxt->instate = XML_PARSER_CONTENT;
13093 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13095 * ID/IDREF registration will be done in xmlValidateElement below
13097 ctxt->loadsubset |= XML_SKIP_IDS;
13100 #ifdef LIBXML_HTML_ENABLED
13101 if (doc->type == XML_HTML_DOCUMENT_NODE)
13102 __htmlParseContent(ctxt);
13105 xmlParseContent(ctxt);
13108 if ((RAW == '<') && (NXT(1) == '/')) {
13109 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13110 } else if (RAW != 0) {
13111 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13113 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13114 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13115 ctxt->wellFormed = 0;
13118 if (!ctxt->wellFormed) {
13119 if (ctxt->errNo == 0)
13120 ret = XML_ERR_INTERNAL_ERROR;
13122 ret = (xmlParserErrors)ctxt->errNo;
13128 * Return the newly created nodeset after unlinking it from
13129 * the pseudo sibling.
13142 while (cur != NULL) {
13143 cur->parent = NULL;
13147 xmlUnlinkNode(fake);
13151 if (ret != XML_ERR_OK) {
13152 xmlFreeNodeList(*lst);
13156 if (doc->dict != NULL)
13158 xmlFreeParserCtxt(ctxt);
13162 return(XML_ERR_INTERNAL_ERROR);
13166 #ifdef LIBXML_SAX1_ENABLED
13168 * xmlParseBalancedChunkMemoryRecover:
13169 * @doc: the document the chunk pertains to
13170 * @sax: the SAX handler bloc (possibly NULL)
13171 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13172 * @depth: Used for loop detection, use 0
13173 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13174 * @lst: the return value for the set of parsed nodes
13175 * @recover: return nodes even if the data is broken (use 0)
13178 * Parse a well-balanced chunk of an XML document
13179 * called by the parser
13180 * The allowed sequence for the Well Balanced Chunk is the one defined by
13181 * the content production in the XML grammar:
13183 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13185 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13186 * the parser error code otherwise
13188 * In case recover is set to 1, the nodelist will not be empty even if
13189 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13193 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13194 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13196 xmlParserCtxtPtr ctxt;
13198 xmlSAXHandlerPtr oldsax = NULL;
13199 xmlNodePtr content, newRoot;
13204 return(XML_ERR_ENTITY_LOOP);
13210 if (string == NULL)
13213 size = xmlStrlen(string);
13215 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13216 if (ctxt == NULL) return(-1);
13217 ctxt->userData = ctxt;
13219 oldsax = ctxt->sax;
13221 if (user_data != NULL)
13222 ctxt->userData = user_data;
13224 newDoc = xmlNewDoc(BAD_CAST "1.0");
13225 if (newDoc == NULL) {
13226 xmlFreeParserCtxt(ctxt);
13229 newDoc->properties = XML_DOC_INTERNAL;
13230 if ((doc != NULL) && (doc->dict != NULL)) {
13231 xmlDictFree(ctxt->dict);
13232 ctxt->dict = doc->dict;
13233 xmlDictReference(ctxt->dict);
13234 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13235 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13236 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13237 ctxt->dictNames = 1;
13239 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13242 newDoc->intSubset = doc->intSubset;
13243 newDoc->extSubset = doc->extSubset;
13245 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13246 if (newRoot == NULL) {
13248 ctxt->sax = oldsax;
13249 xmlFreeParserCtxt(ctxt);
13250 newDoc->intSubset = NULL;
13251 newDoc->extSubset = NULL;
13252 xmlFreeDoc(newDoc);
13255 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13256 nodePush(ctxt, newRoot);
13258 ctxt->myDoc = newDoc;
13260 ctxt->myDoc = newDoc;
13261 newDoc->children->doc = doc;
13262 /* Ensure that doc has XML spec namespace */
13263 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13264 newDoc->oldNs = doc->oldNs;
13266 ctxt->instate = XML_PARSER_CONTENT;
13267 ctxt->depth = depth;
13270 * Doing validity checking on chunk doesn't make sense
13272 ctxt->validate = 0;
13273 ctxt->loadsubset = 0;
13274 xmlDetectSAX2(ctxt);
13276 if ( doc != NULL ){
13277 content = doc->children;
13278 doc->children = NULL;
13279 xmlParseContent(ctxt);
13280 doc->children = content;
13283 xmlParseContent(ctxt);
13285 if ((RAW == '<') && (NXT(1) == '/')) {
13286 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13287 } else if (RAW != 0) {
13288 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13290 if (ctxt->node != newDoc->children) {
13291 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13294 if (!ctxt->wellFormed) {
13295 if (ctxt->errNo == 0)
13303 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13307 * Return the newly created nodeset after unlinking it from
13308 * they pseudo parent.
13310 cur = newDoc->children->children;
13312 while (cur != NULL) {
13313 xmlSetTreeDoc(cur, doc);
13314 cur->parent = NULL;
13317 newDoc->children->children = NULL;
13321 ctxt->sax = oldsax;
13322 xmlFreeParserCtxt(ctxt);
13323 newDoc->intSubset = NULL;
13324 newDoc->extSubset = NULL;
13325 newDoc->oldNs = NULL;
13326 xmlFreeDoc(newDoc);
13332 * xmlSAXParseEntity:
13333 * @sax: the SAX handler block
13334 * @filename: the filename
13336 * parse an XML external entity out of context and build a tree.
13337 * It use the given SAX function block to handle the parsing callback.
13338 * If sax is NULL, fallback to the default DOM tree building routines.
13340 * [78] extParsedEnt ::= TextDecl? content
13342 * This correspond to a "Well Balanced" chunk
13344 * Returns the resulting document tree
13348 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13350 xmlParserCtxtPtr ctxt;
13352 ctxt = xmlCreateFileParserCtxt(filename);
13353 if (ctxt == NULL) {
13357 if (ctxt->sax != NULL)
13358 xmlFree(ctxt->sax);
13360 ctxt->userData = NULL;
13363 xmlParseExtParsedEnt(ctxt);
13365 if (ctxt->wellFormed)
13369 xmlFreeDoc(ctxt->myDoc);
13370 ctxt->myDoc = NULL;
13374 xmlFreeParserCtxt(ctxt);
13381 * @filename: the filename
13383 * parse an XML external entity out of context and build a tree.
13385 * [78] extParsedEnt ::= TextDecl? content
13387 * This correspond to a "Well Balanced" chunk
13389 * Returns the resulting document tree
13393 xmlParseEntity(const char *filename) {
13394 return(xmlSAXParseEntity(NULL, filename));
13396 #endif /* LIBXML_SAX1_ENABLED */
13399 * xmlCreateEntityParserCtxtInternal:
13400 * @URL: the entity URL
13401 * @ID: the entity PUBLIC ID
13402 * @base: a possible base for the target URI
13403 * @pctx: parser context used to set options on new context
13405 * Create a parser context for an external entity
13406 * Automatic support for ZLIB/Compress compressed document is provided
13407 * by default if found at compile-time.
13409 * Returns the new parser context or NULL
13411 static xmlParserCtxtPtr
13412 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13413 const xmlChar *base, xmlParserCtxtPtr pctx) {
13414 xmlParserCtxtPtr ctxt;
13415 xmlParserInputPtr inputStream;
13416 char *directory = NULL;
13419 ctxt = xmlNewParserCtxt();
13420 if (ctxt == NULL) {
13424 if (pctx != NULL) {
13425 ctxt->options = pctx->options;
13426 ctxt->_private = pctx->_private;
13429 uri = xmlBuildURI(URL, base);
13432 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13433 if (inputStream == NULL) {
13434 xmlFreeParserCtxt(ctxt);
13438 inputPush(ctxt, inputStream);
13440 if ((ctxt->directory == NULL) && (directory == NULL))
13441 directory = xmlParserGetDirectory((char *)URL);
13442 if ((ctxt->directory == NULL) && (directory != NULL))
13443 ctxt->directory = directory;
13445 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13446 if (inputStream == NULL) {
13448 xmlFreeParserCtxt(ctxt);
13452 inputPush(ctxt, inputStream);
13454 if ((ctxt->directory == NULL) && (directory == NULL))
13455 directory = xmlParserGetDirectory((char *)uri);
13456 if ((ctxt->directory == NULL) && (directory != NULL))
13457 ctxt->directory = directory;
13464 * xmlCreateEntityParserCtxt:
13465 * @URL: the entity URL
13466 * @ID: the entity PUBLIC ID
13467 * @base: a possible base for the target URI
13469 * Create a parser context for an external entity
13470 * Automatic support for ZLIB/Compress compressed document is provided
13471 * by default if found at compile-time.
13473 * Returns the new parser context or NULL
13476 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13477 const xmlChar *base) {
13478 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13482 /************************************************************************
13484 * Front ends when parsing from a file *
13486 ************************************************************************/
13489 * xmlCreateURLParserCtxt:
13490 * @filename: the filename or URL
13491 * @options: a combination of xmlParserOption
13493 * Create a parser context for a file or URL content.
13494 * Automatic support for ZLIB/Compress compressed document is provided
13495 * by default if found at compile-time and for file accesses
13497 * Returns the new parser context or NULL
13500 xmlCreateURLParserCtxt(const char *filename, int options)
13502 xmlParserCtxtPtr ctxt;
13503 xmlParserInputPtr inputStream;
13504 char *directory = NULL;
13506 ctxt = xmlNewParserCtxt();
13507 if (ctxt == NULL) {
13508 xmlErrMemory(NULL, "cannot allocate parser context");
13513 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13514 ctxt->linenumbers = 1;
13516 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13517 if (inputStream == NULL) {
13518 xmlFreeParserCtxt(ctxt);
13522 inputPush(ctxt, inputStream);
13523 if ((ctxt->directory == NULL) && (directory == NULL))
13524 directory = xmlParserGetDirectory(filename);
13525 if ((ctxt->directory == NULL) && (directory != NULL))
13526 ctxt->directory = directory;
13532 * xmlCreateFileParserCtxt:
13533 * @filename: the filename
13535 * Create a parser context for a file content.
13536 * Automatic support for ZLIB/Compress compressed document is provided
13537 * by default if found at compile-time.
13539 * Returns the new parser context or NULL
13542 xmlCreateFileParserCtxt(const char *filename)
13544 return(xmlCreateURLParserCtxt(filename, 0));
13547 #ifdef LIBXML_SAX1_ENABLED
13549 * xmlSAXParseFileWithData:
13550 * @sax: the SAX handler block
13551 * @filename: the filename
13552 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13554 * @data: the userdata
13556 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13557 * compressed document is provided by default if found at compile-time.
13558 * It use the given SAX function block to handle the parsing callback.
13559 * If sax is NULL, fallback to the default DOM tree building routines.
13561 * User data (void *) is stored within the parser context in the
13562 * context's _private member, so it is available nearly everywhere in libxml
13564 * Returns the resulting document tree
13568 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13569 int recovery, void *data) {
13571 xmlParserCtxtPtr ctxt;
13575 ctxt = xmlCreateFileParserCtxt(filename);
13576 if (ctxt == NULL) {
13580 if (ctxt->sax != NULL)
13581 xmlFree(ctxt->sax);
13584 xmlDetectSAX2(ctxt);
13586 ctxt->_private = data;
13589 if (ctxt->directory == NULL)
13590 ctxt->directory = xmlParserGetDirectory(filename);
13592 ctxt->recovery = recovery;
13594 xmlParseDocument(ctxt);
13596 if ((ctxt->wellFormed) || recovery) {
13599 if (ctxt->input->buf->compressed > 0)
13600 ret->compression = 9;
13602 ret->compression = ctxt->input->buf->compressed;
13607 xmlFreeDoc(ctxt->myDoc);
13608 ctxt->myDoc = NULL;
13612 xmlFreeParserCtxt(ctxt);
13619 * @sax: the SAX handler block
13620 * @filename: the filename
13621 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13624 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13625 * compressed document is provided by default if found at compile-time.
13626 * It use the given SAX function block to handle the parsing callback.
13627 * If sax is NULL, fallback to the default DOM tree building routines.
13629 * Returns the resulting document tree
13633 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13635 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13640 * @cur: a pointer to an array of xmlChar
13642 * parse an XML in-memory document and build a tree.
13643 * In the case the document is not Well Formed, a attempt to build a
13644 * tree is tried anyway
13646 * Returns the resulting document tree or NULL in case of failure
13650 xmlRecoverDoc(const xmlChar *cur) {
13651 return(xmlSAXParseDoc(NULL, cur, 1));
13656 * @filename: the filename
13658 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13659 * compressed document is provided by default if found at compile-time.
13661 * Returns the resulting document tree if the file was wellformed,
13666 xmlParseFile(const char *filename) {
13667 return(xmlSAXParseFile(NULL, filename, 0));
13672 * @filename: the filename
13674 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13675 * compressed document is provided by default if found at compile-time.
13676 * In the case the document is not Well Formed, it attempts to build
13679 * Returns the resulting document tree or NULL in case of failure
13683 xmlRecoverFile(const char *filename) {
13684 return(xmlSAXParseFile(NULL, filename, 1));
13689 * xmlSetupParserForBuffer:
13690 * @ctxt: an XML parser context
13691 * @buffer: a xmlChar * buffer
13692 * @filename: a file name
13694 * Setup the parser context to parse a new buffer; Clears any prior
13695 * contents from the parser context. The buffer parameter must not be
13696 * NULL, but the filename parameter can be
13699 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13700 const char* filename)
13702 xmlParserInputPtr input;
13704 if ((ctxt == NULL) || (buffer == NULL))
13707 input = xmlNewInputStream(ctxt);
13708 if (input == NULL) {
13709 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13710 xmlClearParserCtxt(ctxt);
13714 xmlClearParserCtxt(ctxt);
13715 if (filename != NULL)
13716 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13717 input->base = buffer;
13718 input->cur = buffer;
13719 input->end = &buffer[xmlStrlen(buffer)];
13720 inputPush(ctxt, input);
13724 * xmlSAXUserParseFile:
13725 * @sax: a SAX handler
13726 * @user_data: The user data returned on SAX callbacks
13727 * @filename: a file name
13729 * parse an XML file and call the given SAX handler routines.
13730 * Automatic support for ZLIB/Compress compressed document is provided
13732 * Returns 0 in case of success or a error number otherwise
13735 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13736 const char *filename) {
13738 xmlParserCtxtPtr ctxt;
13740 ctxt = xmlCreateFileParserCtxt(filename);
13741 if (ctxt == NULL) return -1;
13742 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13743 xmlFree(ctxt->sax);
13745 xmlDetectSAX2(ctxt);
13747 if (user_data != NULL)
13748 ctxt->userData = user_data;
13750 xmlParseDocument(ctxt);
13752 if (ctxt->wellFormed)
13755 if (ctxt->errNo != 0)
13762 if (ctxt->myDoc != NULL) {
13763 xmlFreeDoc(ctxt->myDoc);
13764 ctxt->myDoc = NULL;
13766 xmlFreeParserCtxt(ctxt);
13770 #endif /* LIBXML_SAX1_ENABLED */
13772 /************************************************************************
13774 * Front ends when parsing from memory *
13776 ************************************************************************/
13779 * xmlCreateMemoryParserCtxt:
13780 * @buffer: a pointer to a char array
13781 * @size: the size of the array
13783 * Create a parser context for an XML in-memory document.
13785 * Returns the new parser context or NULL
13788 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13789 xmlParserCtxtPtr ctxt;
13790 xmlParserInputPtr input;
13791 xmlParserInputBufferPtr buf;
13793 if (buffer == NULL)
13798 ctxt = xmlNewParserCtxt();
13802 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
13803 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13805 xmlFreeParserCtxt(ctxt);
13809 input = xmlNewInputStream(ctxt);
13810 if (input == NULL) {
13811 xmlFreeParserInputBuffer(buf);
13812 xmlFreeParserCtxt(ctxt);
13816 input->filename = NULL;
13818 input->base = input->buf->buffer->content;
13819 input->cur = input->buf->buffer->content;
13820 input->end = &input->buf->buffer->content[input->buf->buffer->use];
13822 inputPush(ctxt, input);
13826 #ifdef LIBXML_SAX1_ENABLED
13828 * xmlSAXParseMemoryWithData:
13829 * @sax: the SAX handler block
13830 * @buffer: an pointer to a char array
13831 * @size: the size of the array
13832 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13834 * @data: the userdata
13836 * parse an XML in-memory block and use the given SAX function block
13837 * to handle the parsing callback. If sax is NULL, fallback to the default
13838 * DOM tree building routines.
13840 * User data (void *) is stored within the parser context in the
13841 * context's _private member, so it is available nearly everywhere in libxml
13843 * Returns the resulting document tree
13847 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13848 int size, int recovery, void *data) {
13850 xmlParserCtxtPtr ctxt;
13854 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13855 if (ctxt == NULL) return(NULL);
13857 if (ctxt->sax != NULL)
13858 xmlFree(ctxt->sax);
13861 xmlDetectSAX2(ctxt);
13863 ctxt->_private=data;
13866 ctxt->recovery = recovery;
13868 xmlParseDocument(ctxt);
13870 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13873 xmlFreeDoc(ctxt->myDoc);
13874 ctxt->myDoc = NULL;
13878 xmlFreeParserCtxt(ctxt);
13884 * xmlSAXParseMemory:
13885 * @sax: the SAX handler block
13886 * @buffer: an pointer to a char array
13887 * @size: the size of the array
13888 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13891 * parse an XML in-memory block and use the given SAX function block
13892 * to handle the parsing callback. If sax is NULL, fallback to the default
13893 * DOM tree building routines.
13895 * Returns the resulting document tree
13898 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13899 int size, int recovery) {
13900 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13905 * @buffer: an pointer to a char array
13906 * @size: the size of the array
13908 * parse an XML in-memory block and build a tree.
13910 * Returns the resulting document tree
13913 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13914 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13918 * xmlRecoverMemory:
13919 * @buffer: an pointer to a char array
13920 * @size: the size of the array
13922 * parse an XML in-memory block and build a tree.
13923 * In the case the document is not Well Formed, an attempt to
13924 * build a tree is tried anyway
13926 * Returns the resulting document tree or NULL in case of error
13929 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13930 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13934 * xmlSAXUserParseMemory:
13935 * @sax: a SAX handler
13936 * @user_data: The user data returned on SAX callbacks
13937 * @buffer: an in-memory XML document input
13938 * @size: the length of the XML document in bytes
13940 * A better SAX parsing routine.
13941 * parse an XML in-memory buffer and call the given SAX handler routines.
13943 * Returns 0 in case of success or a error number otherwise
13945 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13946 const char *buffer, int size) {
13948 xmlParserCtxtPtr ctxt;
13952 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13953 if (ctxt == NULL) return -1;
13954 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13955 xmlFree(ctxt->sax);
13957 xmlDetectSAX2(ctxt);
13959 if (user_data != NULL)
13960 ctxt->userData = user_data;
13962 xmlParseDocument(ctxt);
13964 if (ctxt->wellFormed)
13967 if (ctxt->errNo != 0)
13974 if (ctxt->myDoc != NULL) {
13975 xmlFreeDoc(ctxt->myDoc);
13976 ctxt->myDoc = NULL;
13978 xmlFreeParserCtxt(ctxt);
13982 #endif /* LIBXML_SAX1_ENABLED */
13985 * xmlCreateDocParserCtxt:
13986 * @cur: a pointer to an array of xmlChar
13988 * Creates a parser context for an XML in-memory document.
13990 * Returns the new parser context or NULL
13993 xmlCreateDocParserCtxt(const xmlChar *cur) {
13998 len = xmlStrlen(cur);
13999 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14002 #ifdef LIBXML_SAX1_ENABLED
14005 * @sax: the SAX handler block
14006 * @cur: a pointer to an array of xmlChar
14007 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14010 * parse an XML in-memory document and build a tree.
14011 * It use the given SAX function block to handle the parsing callback.
14012 * If sax is NULL, fallback to the default DOM tree building routines.
14014 * Returns the resulting document tree
14018 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14020 xmlParserCtxtPtr ctxt;
14021 xmlSAXHandlerPtr oldsax = NULL;
14023 if (cur == NULL) return(NULL);
14026 ctxt = xmlCreateDocParserCtxt(cur);
14027 if (ctxt == NULL) return(NULL);
14029 oldsax = ctxt->sax;
14031 ctxt->userData = NULL;
14033 xmlDetectSAX2(ctxt);
14035 xmlParseDocument(ctxt);
14036 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14039 xmlFreeDoc(ctxt->myDoc);
14040 ctxt->myDoc = NULL;
14043 ctxt->sax = oldsax;
14044 xmlFreeParserCtxt(ctxt);
14051 * @cur: a pointer to an array of xmlChar
14053 * parse an XML in-memory document and build a tree.
14055 * Returns the resulting document tree
14059 xmlParseDoc(const xmlChar *cur) {
14060 return(xmlSAXParseDoc(NULL, cur, 0));
14062 #endif /* LIBXML_SAX1_ENABLED */
14064 #ifdef LIBXML_LEGACY_ENABLED
14065 /************************************************************************
14067 * Specific function to keep track of entities references *
14068 * and used by the XSLT debugger *
14070 ************************************************************************/
14072 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14075 * xmlAddEntityReference:
14076 * @ent : A valid entity
14077 * @firstNode : A valid first node for children of entity
14078 * @lastNode : A valid last node of children entity
14080 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14083 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14084 xmlNodePtr lastNode)
14086 if (xmlEntityRefFunc != NULL) {
14087 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14093 * xmlSetEntityReferenceFunc:
14094 * @func: A valid function
14096 * Set the function to call call back when a xml reference has been made
14099 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14101 xmlEntityRefFunc = func;
14103 #endif /* LIBXML_LEGACY_ENABLED */
14105 /************************************************************************
14109 ************************************************************************/
14111 #ifdef LIBXML_XPATH_ENABLED
14112 #include <libxml/xpath.h>
14115 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14116 static int xmlParserInitialized = 0;
14121 * Initialization function for the XML parser.
14122 * This is not reentrant. Call once before processing in case of
14123 * use in multithreaded programs.
14127 xmlInitParser(void) {
14128 if (xmlParserInitialized != 0)
14131 #ifdef LIBXML_THREAD_ENABLED
14132 __xmlGlobalInitMutexLock();
14133 if (xmlParserInitialized == 0) {
14137 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14138 (xmlGenericError == NULL))
14139 initGenericErrorDefaultFunc(NULL);
14141 xmlInitCharEncodingHandlers();
14142 xmlDefaultSAXHandlerInit();
14143 xmlRegisterDefaultInputCallbacks();
14144 #ifdef LIBXML_OUTPUT_ENABLED
14145 xmlRegisterDefaultOutputCallbacks();
14146 #endif /* LIBXML_OUTPUT_ENABLED */
14147 #ifdef LIBXML_HTML_ENABLED
14148 htmlInitAutoClose();
14149 htmlDefaultSAXHandlerInit();
14151 #ifdef LIBXML_XPATH_ENABLED
14154 xmlParserInitialized = 1;
14155 #ifdef LIBXML_THREAD_ENABLED
14157 __xmlGlobalInitMutexUnlock();
14162 * xmlCleanupParser:
14164 * This function name is somewhat misleading. It does not clean up
14165 * parser state, it cleans up memory allocated by the library itself.
14166 * It is a cleanup function for the XML library. It tries to reclaim all
14167 * related global memory allocated for the library processing.
14168 * It doesn't deallocate any document related memory. One should
14169 * call xmlCleanupParser() only when the process has finished using
14170 * the library and all XML/HTML documents built with it.
14171 * See also xmlInitParser() which has the opposite function of preparing
14172 * the library for operations.
14174 * WARNING: if your application is multithreaded or has plugin support
14175 * calling this may crash the application if another thread or
14176 * a plugin is still using libxml2. It's sometimes very hard to
14177 * guess if libxml2 is in use in the application, some libraries
14178 * or plugins may use it without notice. In case of doubt abstain
14179 * from calling this function or do it just before calling exit()
14180 * to avoid leak reports from valgrind !
14184 xmlCleanupParser(void) {
14185 if (!xmlParserInitialized)
14188 xmlCleanupCharEncodingHandlers();
14189 #ifdef LIBXML_CATALOG_ENABLED
14190 xmlCatalogCleanup();
14193 xmlCleanupInputCallbacks();
14194 #ifdef LIBXML_OUTPUT_ENABLED
14195 xmlCleanupOutputCallbacks();
14197 #ifdef LIBXML_SCHEMAS_ENABLED
14198 xmlSchemaCleanupTypes();
14199 xmlRelaxNGCleanupTypes();
14201 xmlCleanupGlobals();
14202 xmlResetLastError();
14203 xmlCleanupThreads(); /* must be last if called not from the main thread */
14204 xmlCleanupMemory();
14205 xmlParserInitialized = 0;
14208 /************************************************************************
14210 * New set (2.6.0) of simpler and more flexible APIs *
14212 ************************************************************************/
14218 * Free a string if it is not owned by the "dict" dictionnary in the
14221 #define DICT_FREE(str) \
14222 if ((str) && ((!dict) || \
14223 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14224 xmlFree((char *)(str));
14228 * @ctxt: an XML parser context
14230 * Reset a parser context
14233 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14235 xmlParserInputPtr input;
14243 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14244 xmlFreeInputStream(input);
14247 ctxt->input = NULL;
14250 if (ctxt->spaceTab != NULL) {
14251 ctxt->spaceTab[0] = -1;
14252 ctxt->space = &ctxt->spaceTab[0];
14254 ctxt->space = NULL;
14264 DICT_FREE(ctxt->version);
14265 ctxt->version = NULL;
14266 DICT_FREE(ctxt->encoding);
14267 ctxt->encoding = NULL;
14268 DICT_FREE(ctxt->directory);
14269 ctxt->directory = NULL;
14270 DICT_FREE(ctxt->extSubURI);
14271 ctxt->extSubURI = NULL;
14272 DICT_FREE(ctxt->extSubSystem);
14273 ctxt->extSubSystem = NULL;
14274 if (ctxt->myDoc != NULL)
14275 xmlFreeDoc(ctxt->myDoc);
14276 ctxt->myDoc = NULL;
14278 ctxt->standalone = -1;
14279 ctxt->hasExternalSubset = 0;
14280 ctxt->hasPErefs = 0;
14282 ctxt->external = 0;
14283 ctxt->instate = XML_PARSER_START;
14286 ctxt->wellFormed = 1;
14287 ctxt->nsWellFormed = 1;
14288 ctxt->disableSAX = 0;
14291 ctxt->vctxt.userData = ctxt;
14292 ctxt->vctxt.error = xmlParserValidityError;
14293 ctxt->vctxt.warning = xmlParserValidityWarning;
14295 ctxt->record_info = 0;
14297 ctxt->checkIndex = 0;
14298 ctxt->inSubset = 0;
14299 ctxt->errNo = XML_ERR_OK;
14301 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14302 ctxt->catalogs = NULL;
14303 ctxt->nbentities = 0;
14304 ctxt->sizeentities = 0;
14305 xmlInitNodeInfoSeq(&ctxt->node_seq);
14307 if (ctxt->attsDefault != NULL) {
14308 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14309 ctxt->attsDefault = NULL;
14311 if (ctxt->attsSpecial != NULL) {
14312 xmlHashFree(ctxt->attsSpecial, NULL);
14313 ctxt->attsSpecial = NULL;
14316 #ifdef LIBXML_CATALOG_ENABLED
14317 if (ctxt->catalogs != NULL)
14318 xmlCatalogFreeLocal(ctxt->catalogs);
14320 if (ctxt->lastError.code != XML_ERR_OK)
14321 xmlResetError(&ctxt->lastError);
14325 * xmlCtxtResetPush:
14326 * @ctxt: an XML parser context
14327 * @chunk: a pointer to an array of chars
14328 * @size: number of chars in the array
14329 * @filename: an optional file name or URI
14330 * @encoding: the document encoding, or NULL
14332 * Reset a push parser context
14334 * Returns 0 in case of success and 1 in case of error
14337 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14338 int size, const char *filename, const char *encoding)
14340 xmlParserInputPtr inputStream;
14341 xmlParserInputBufferPtr buf;
14342 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14347 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14348 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14350 buf = xmlAllocParserInputBuffer(enc);
14354 if (ctxt == NULL) {
14355 xmlFreeParserInputBuffer(buf);
14359 xmlCtxtReset(ctxt);
14361 if (ctxt->pushTab == NULL) {
14362 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14363 sizeof(xmlChar *));
14364 if (ctxt->pushTab == NULL) {
14365 xmlErrMemory(ctxt, NULL);
14366 xmlFreeParserInputBuffer(buf);
14371 if (filename == NULL) {
14372 ctxt->directory = NULL;
14374 ctxt->directory = xmlParserGetDirectory(filename);
14377 inputStream = xmlNewInputStream(ctxt);
14378 if (inputStream == NULL) {
14379 xmlFreeParserInputBuffer(buf);
14383 if (filename == NULL)
14384 inputStream->filename = NULL;
14386 inputStream->filename = (char *)
14387 xmlCanonicPath((const xmlChar *) filename);
14388 inputStream->buf = buf;
14389 inputStream->base = inputStream->buf->buffer->content;
14390 inputStream->cur = inputStream->buf->buffer->content;
14392 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14394 inputPush(ctxt, inputStream);
14396 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14397 (ctxt->input->buf != NULL)) {
14398 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14399 int cur = ctxt->input->cur - ctxt->input->base;
14401 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14403 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14404 ctxt->input->cur = ctxt->input->base + cur;
14406 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14409 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14413 if (encoding != NULL) {
14414 xmlCharEncodingHandlerPtr hdlr;
14416 if (ctxt->encoding != NULL)
14417 xmlFree((xmlChar *) ctxt->encoding);
14418 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14420 hdlr = xmlFindCharEncodingHandler(encoding);
14421 if (hdlr != NULL) {
14422 xmlSwitchToEncoding(ctxt, hdlr);
14424 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14425 "Unsupported encoding %s\n", BAD_CAST encoding);
14427 } else if (enc != XML_CHAR_ENCODING_NONE) {
14428 xmlSwitchEncoding(ctxt, enc);
14436 * xmlCtxtUseOptionsInternal:
14437 * @ctxt: an XML parser context
14438 * @options: a combination of xmlParserOption
14439 * @encoding: the user provided encoding to use
14441 * Applies the options to the parser context
14443 * Returns 0 in case of success, the set of unknown or unimplemented options
14444 * in case of error.
14447 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14451 if (encoding != NULL) {
14452 if (ctxt->encoding != NULL)
14453 xmlFree((xmlChar *) ctxt->encoding);
14454 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14456 if (options & XML_PARSE_RECOVER) {
14457 ctxt->recovery = 1;
14458 options -= XML_PARSE_RECOVER;
14459 ctxt->options |= XML_PARSE_RECOVER;
14461 ctxt->recovery = 0;
14462 if (options & XML_PARSE_DTDLOAD) {
14463 ctxt->loadsubset = XML_DETECT_IDS;
14464 options -= XML_PARSE_DTDLOAD;
14465 ctxt->options |= XML_PARSE_DTDLOAD;
14467 ctxt->loadsubset = 0;
14468 if (options & XML_PARSE_DTDATTR) {
14469 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14470 options -= XML_PARSE_DTDATTR;
14471 ctxt->options |= XML_PARSE_DTDATTR;
14473 if (options & XML_PARSE_NOENT) {
14474 ctxt->replaceEntities = 1;
14475 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14476 options -= XML_PARSE_NOENT;
14477 ctxt->options |= XML_PARSE_NOENT;
14479 ctxt->replaceEntities = 0;
14480 if (options & XML_PARSE_PEDANTIC) {
14481 ctxt->pedantic = 1;
14482 options -= XML_PARSE_PEDANTIC;
14483 ctxt->options |= XML_PARSE_PEDANTIC;
14485 ctxt->pedantic = 0;
14486 if (options & XML_PARSE_NOBLANKS) {
14487 ctxt->keepBlanks = 0;
14488 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14489 options -= XML_PARSE_NOBLANKS;
14490 ctxt->options |= XML_PARSE_NOBLANKS;
14492 ctxt->keepBlanks = 1;
14493 if (options & XML_PARSE_DTDVALID) {
14494 ctxt->validate = 1;
14495 if (options & XML_PARSE_NOWARNING)
14496 ctxt->vctxt.warning = NULL;
14497 if (options & XML_PARSE_NOERROR)
14498 ctxt->vctxt.error = NULL;
14499 options -= XML_PARSE_DTDVALID;
14500 ctxt->options |= XML_PARSE_DTDVALID;
14502 ctxt->validate = 0;
14503 if (options & XML_PARSE_NOWARNING) {
14504 ctxt->sax->warning = NULL;
14505 options -= XML_PARSE_NOWARNING;
14507 if (options & XML_PARSE_NOERROR) {
14508 ctxt->sax->error = NULL;
14509 ctxt->sax->fatalError = NULL;
14510 options -= XML_PARSE_NOERROR;
14512 #ifdef LIBXML_SAX1_ENABLED
14513 if (options & XML_PARSE_SAX1) {
14514 ctxt->sax->startElement = xmlSAX2StartElement;
14515 ctxt->sax->endElement = xmlSAX2EndElement;
14516 ctxt->sax->startElementNs = NULL;
14517 ctxt->sax->endElementNs = NULL;
14518 ctxt->sax->initialized = 1;
14519 options -= XML_PARSE_SAX1;
14520 ctxt->options |= XML_PARSE_SAX1;
14522 #endif /* LIBXML_SAX1_ENABLED */
14523 if (options & XML_PARSE_NODICT) {
14524 ctxt->dictNames = 0;
14525 options -= XML_PARSE_NODICT;
14526 ctxt->options |= XML_PARSE_NODICT;
14528 ctxt->dictNames = 1;
14530 if (options & XML_PARSE_NOCDATA) {
14531 ctxt->sax->cdataBlock = NULL;
14532 options -= XML_PARSE_NOCDATA;
14533 ctxt->options |= XML_PARSE_NOCDATA;
14535 if (options & XML_PARSE_NSCLEAN) {
14536 ctxt->options |= XML_PARSE_NSCLEAN;
14537 options -= XML_PARSE_NSCLEAN;
14539 if (options & XML_PARSE_NONET) {
14540 ctxt->options |= XML_PARSE_NONET;
14541 options -= XML_PARSE_NONET;
14543 if (options & XML_PARSE_COMPACT) {
14544 ctxt->options |= XML_PARSE_COMPACT;
14545 options -= XML_PARSE_COMPACT;
14547 if (options & XML_PARSE_OLD10) {
14548 ctxt->options |= XML_PARSE_OLD10;
14549 options -= XML_PARSE_OLD10;
14551 if (options & XML_PARSE_NOBASEFIX) {
14552 ctxt->options |= XML_PARSE_NOBASEFIX;
14553 options -= XML_PARSE_NOBASEFIX;
14555 if (options & XML_PARSE_HUGE) {
14556 ctxt->options |= XML_PARSE_HUGE;
14557 options -= XML_PARSE_HUGE;
14559 if (options & XML_PARSE_OLDSAX) {
14560 ctxt->options |= XML_PARSE_OLDSAX;
14561 options -= XML_PARSE_OLDSAX;
14563 ctxt->linenumbers = 1;
14568 * xmlCtxtUseOptions:
14569 * @ctxt: an XML parser context
14570 * @options: a combination of xmlParserOption
14572 * Applies the options to the parser context
14574 * Returns 0 in case of success, the set of unknown or unimplemented options
14575 * in case of error.
14578 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14580 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14585 * @ctxt: an XML parser context
14586 * @URL: the base URL to use for the document
14587 * @encoding: the document encoding, or NULL
14588 * @options: a combination of xmlParserOption
14589 * @reuse: keep the context for reuse
14591 * Common front-end for the xmlRead functions
14593 * Returns the resulting document tree or NULL
14596 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14597 int options, int reuse)
14601 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14602 if (encoding != NULL) {
14603 xmlCharEncodingHandlerPtr hdlr;
14605 hdlr = xmlFindCharEncodingHandler(encoding);
14607 xmlSwitchToEncoding(ctxt, hdlr);
14609 if ((URL != NULL) && (ctxt->input != NULL) &&
14610 (ctxt->input->filename == NULL))
14611 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14612 xmlParseDocument(ctxt);
14613 if ((ctxt->wellFormed) || ctxt->recovery)
14617 if (ctxt->myDoc != NULL) {
14618 xmlFreeDoc(ctxt->myDoc);
14621 ctxt->myDoc = NULL;
14623 xmlFreeParserCtxt(ctxt);
14631 * @cur: a pointer to a zero terminated string
14632 * @URL: the base URL to use for the document
14633 * @encoding: the document encoding, or NULL
14634 * @options: a combination of xmlParserOption
14636 * parse an XML in-memory document and build a tree.
14638 * Returns the resulting document tree
14641 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14643 xmlParserCtxtPtr ctxt;
14648 ctxt = xmlCreateDocParserCtxt(cur);
14651 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14656 * @filename: a file or URL
14657 * @encoding: the document encoding, or NULL
14658 * @options: a combination of xmlParserOption
14660 * parse an XML file from the filesystem or the network.
14662 * Returns the resulting document tree
14665 xmlReadFile(const char *filename, const char *encoding, int options)
14667 xmlParserCtxtPtr ctxt;
14669 ctxt = xmlCreateURLParserCtxt(filename, options);
14672 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14677 * @buffer: a pointer to a char array
14678 * @size: the size of the array
14679 * @URL: the base URL to use for the document
14680 * @encoding: the document encoding, or NULL
14681 * @options: a combination of xmlParserOption
14683 * parse an XML in-memory document and build a tree.
14685 * Returns the resulting document tree
14688 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14690 xmlParserCtxtPtr ctxt;
14692 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14695 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14700 * @fd: an open file descriptor
14701 * @URL: the base URL to use for the document
14702 * @encoding: the document encoding, or NULL
14703 * @options: a combination of xmlParserOption
14705 * parse an XML from a file descriptor and build a tree.
14706 * NOTE that the file descriptor will not be closed when the
14707 * reader is closed or reset.
14709 * Returns the resulting document tree
14712 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14714 xmlParserCtxtPtr ctxt;
14715 xmlParserInputBufferPtr input;
14716 xmlParserInputPtr stream;
14721 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14724 input->closecallback = NULL;
14725 ctxt = xmlNewParserCtxt();
14726 if (ctxt == NULL) {
14727 xmlFreeParserInputBuffer(input);
14730 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14731 if (stream == NULL) {
14732 xmlFreeParserInputBuffer(input);
14733 xmlFreeParserCtxt(ctxt);
14736 inputPush(ctxt, stream);
14737 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14742 * @ioread: an I/O read function
14743 * @ioclose: an I/O close function
14744 * @ioctx: an I/O handler
14745 * @URL: the base URL to use for the document
14746 * @encoding: the document encoding, or NULL
14747 * @options: a combination of xmlParserOption
14749 * parse an XML document from I/O functions and source and build a tree.
14751 * Returns the resulting document tree
14754 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14755 void *ioctx, const char *URL, const char *encoding, int options)
14757 xmlParserCtxtPtr ctxt;
14758 xmlParserInputBufferPtr input;
14759 xmlParserInputPtr stream;
14761 if (ioread == NULL)
14764 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14765 XML_CHAR_ENCODING_NONE);
14768 ctxt = xmlNewParserCtxt();
14769 if (ctxt == NULL) {
14770 xmlFreeParserInputBuffer(input);
14773 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14774 if (stream == NULL) {
14775 xmlFreeParserInputBuffer(input);
14776 xmlFreeParserCtxt(ctxt);
14779 inputPush(ctxt, stream);
14780 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14785 * @ctxt: an XML parser context
14786 * @cur: a pointer to a zero terminated string
14787 * @URL: the base URL to use for the document
14788 * @encoding: the document encoding, or NULL
14789 * @options: a combination of xmlParserOption
14791 * parse an XML in-memory document and build a tree.
14792 * This reuses the existing @ctxt parser context
14794 * Returns the resulting document tree
14797 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
14798 const char *URL, const char *encoding, int options)
14800 xmlParserInputPtr stream;
14807 xmlCtxtReset(ctxt);
14809 stream = xmlNewStringInputStream(ctxt, cur);
14810 if (stream == NULL) {
14813 inputPush(ctxt, stream);
14814 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14819 * @ctxt: an XML parser context
14820 * @filename: a file or URL
14821 * @encoding: the document encoding, or NULL
14822 * @options: a combination of xmlParserOption
14824 * parse an XML file from the filesystem or the network.
14825 * This reuses the existing @ctxt parser context
14827 * Returns the resulting document tree
14830 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14831 const char *encoding, int options)
14833 xmlParserInputPtr stream;
14835 if (filename == NULL)
14840 xmlCtxtReset(ctxt);
14842 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
14843 if (stream == NULL) {
14846 inputPush(ctxt, stream);
14847 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
14851 * xmlCtxtReadMemory:
14852 * @ctxt: an XML parser context
14853 * @buffer: a pointer to a char array
14854 * @size: the size of the array
14855 * @URL: the base URL to use for the document
14856 * @encoding: the document encoding, or NULL
14857 * @options: a combination of xmlParserOption
14859 * parse an XML in-memory document and build a tree.
14860 * This reuses the existing @ctxt parser context
14862 * Returns the resulting document tree
14865 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14866 const char *URL, const char *encoding, int options)
14868 xmlParserInputBufferPtr input;
14869 xmlParserInputPtr stream;
14873 if (buffer == NULL)
14876 xmlCtxtReset(ctxt);
14878 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14879 if (input == NULL) {
14883 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14884 if (stream == NULL) {
14885 xmlFreeParserInputBuffer(input);
14889 inputPush(ctxt, stream);
14890 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14895 * @ctxt: an XML parser context
14896 * @fd: an open file descriptor
14897 * @URL: the base URL to use for the document
14898 * @encoding: the document encoding, or NULL
14899 * @options: a combination of xmlParserOption
14901 * parse an XML from a file descriptor and build a tree.
14902 * This reuses the existing @ctxt parser context
14903 * NOTE that the file descriptor will not be closed when the
14904 * reader is closed or reset.
14906 * Returns the resulting document tree
14909 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14910 const char *URL, const char *encoding, int options)
14912 xmlParserInputBufferPtr input;
14913 xmlParserInputPtr stream;
14920 xmlCtxtReset(ctxt);
14923 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14926 input->closecallback = NULL;
14927 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14928 if (stream == NULL) {
14929 xmlFreeParserInputBuffer(input);
14932 inputPush(ctxt, stream);
14933 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14938 * @ctxt: an XML parser context
14939 * @ioread: an I/O read function
14940 * @ioclose: an I/O close function
14941 * @ioctx: an I/O handler
14942 * @URL: the base URL to use for the document
14943 * @encoding: the document encoding, or NULL
14944 * @options: a combination of xmlParserOption
14946 * parse an XML document from I/O functions and source and build a tree.
14947 * This reuses the existing @ctxt parser context
14949 * Returns the resulting document tree
14952 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14953 xmlInputCloseCallback ioclose, void *ioctx,
14955 const char *encoding, int options)
14957 xmlParserInputBufferPtr input;
14958 xmlParserInputPtr stream;
14960 if (ioread == NULL)
14965 xmlCtxtReset(ctxt);
14967 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14968 XML_CHAR_ENCODING_NONE);
14971 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14972 if (stream == NULL) {
14973 xmlFreeParserInputBuffer(input);
14976 inputPush(ctxt, stream);
14977 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14980 #define bottom_parser
14981 #include "elfgcchack.h"