2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
28 * See Copyright for the status of this software.
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
39 #define XML_DIR_SEP '/'
46 #include <libxml/xmlmemory.h>
47 #include <libxml/threads.h>
48 #include <libxml/globals.h>
49 #include <libxml/tree.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/valid.h>
53 #include <libxml/entities.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/encoding.h>
56 #include <libxml/xmlIO.h>
57 #include <libxml/uri.h>
58 #ifdef LIBXML_CATALOG_ENABLED
59 #include <libxml/catalog.h>
61 #ifdef LIBXML_SCHEMAS_ENABLED
62 #include <libxml/xmlschemastypes.h>
63 #include <libxml/relaxng.h>
71 #ifdef HAVE_SYS_STAT_H
91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
93 static xmlParserCtxtPtr
94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
97 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
99 /************************************************************************
101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
103 ************************************************************************/
105 #define XML_PARSER_BIG_ENTITY 1000
106 #define XML_PARSER_LOT_ENTITY 5000
109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110 * replacement over the size in byte of the input indicates that you have
111 * and eponential behaviour. A value of 10 correspond to at least 3 entity
112 * replacement per byte of input.
114 #define XML_PARSER_NON_LINEAR 10
117 * xmlParserEntityCheck
119 * Function to check non-linear entity expansion behaviour
120 * This is here to detect and stop exponential linear entity expansion
121 * This is not a limitation of the parser but a safety
122 * boundary feature. It can be disabled with the XML_PARSE_HUGE
126 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
127 xmlEntityPtr ent, size_t replacement)
131 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
133 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
137 * This may look absurd but is needed to detect
140 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
141 (ent->content != NULL) && (ent->checked == 0)) {
142 unsigned long oldnbent = ctxt->nbentities;
147 rep = xmlStringDecodeEntities(ctxt, ent->content,
148 XML_SUBSTITUTE_REF, 0, 0, 0);
150 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
152 if (xmlStrchr(rep, '<'))
158 if (replacement != 0) {
159 if (replacement < XML_MAX_TEXT_LENGTH)
163 * If the volume of entity copy reaches 10 times the
164 * amount of parsed data and over the large text threshold
165 * then that's very likely to be an abuse.
167 if (ctxt->input != NULL) {
168 consumed = ctxt->input->consumed +
169 (ctxt->input->cur - ctxt->input->base);
171 consumed += ctxt->sizeentities;
173 if (replacement < XML_PARSER_NON_LINEAR * consumed)
175 } else if (size != 0) {
177 * Do the check based on the replacement size of the entity
179 if (size < XML_PARSER_BIG_ENTITY)
183 * A limit on the amount of text data reasonably used
185 if (ctxt->input != NULL) {
186 consumed = ctxt->input->consumed +
187 (ctxt->input->cur - ctxt->input->base);
189 consumed += ctxt->sizeentities;
191 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
192 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
194 } else if (ent != NULL) {
196 * use the number of parsed entities in the replacement
198 size = ent->checked / 2;
201 * The amount of data parsed counting entities size only once
203 if (ctxt->input != NULL) {
204 consumed = ctxt->input->consumed +
205 (ctxt->input->cur - ctxt->input->base);
207 consumed += ctxt->sizeentities;
210 * Check the density of entities for the amount of data
211 * knowing an entity reference will take at least 3 bytes
213 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
217 * strange we got no data for checking
219 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
220 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
221 (ctxt->nbentities <= 10000))
224 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
231 * arbitrary depth limit for the XML documents that we allow to
232 * process. This is not a limitation of the parser but a safety
233 * boundary feature. It can be disabled with the XML_PARSE_HUGE
236 unsigned int xmlParserMaxDepth = 256;
241 #define XML_PARSER_BIG_BUFFER_SIZE 300
242 #define XML_PARSER_BUFFER_SIZE 100
243 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
246 * XML_PARSER_CHUNK_SIZE
248 * When calling GROW that's the minimal amount of data
249 * the parser expected to have received. It is not a hard
250 * limit but an optimization when reading strings like Names
251 * It is not strictly needed as long as inputs available characters
252 * are followed by 0, which should be provided by the I/O level
254 #define XML_PARSER_CHUNK_SIZE 100
257 * List of XML prefixed PI allowed by W3C specs
260 static const char *xmlW3CPIs[] = {
267 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
268 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
269 const xmlChar **str);
271 static xmlParserErrors
272 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
273 xmlSAXHandlerPtr sax,
274 void *user_data, int depth, const xmlChar *URL,
275 const xmlChar *ID, xmlNodePtr *list);
278 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
279 const char *encoding);
280 #ifdef LIBXML_LEGACY_ENABLED
282 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
283 xmlNodePtr lastNode);
284 #endif /* LIBXML_LEGACY_ENABLED */
286 static xmlParserErrors
287 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
288 const xmlChar *string, void *user_data, xmlNodePtr *lst);
291 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
293 /************************************************************************
295 * Some factorized error routines *
297 ************************************************************************/
300 * xmlErrAttributeDup:
301 * @ctxt: an XML parser context
302 * @prefix: the attribute prefix
303 * @localname: the attribute localname
305 * Handle a redefinition of attribute error
308 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
309 const xmlChar * localname)
311 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
312 (ctxt->instate == XML_PARSER_EOF))
315 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
318 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
319 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
320 (const char *) localname, NULL, NULL, 0, 0,
321 "Attribute %s redefined\n", localname);
323 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
324 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
325 (const char *) prefix, (const char *) localname,
326 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
329 ctxt->wellFormed = 0;
330 if (ctxt->recovery == 0)
331 ctxt->disableSAX = 1;
337 * @ctxt: an XML parser context
338 * @error: the error number
339 * @extra: extra information string
341 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
344 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
347 char errstr[129] = "";
349 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
350 (ctxt->instate == XML_PARSER_EOF))
353 case XML_ERR_INVALID_HEX_CHARREF:
354 errmsg = "CharRef: invalid hexadecimal value";
356 case XML_ERR_INVALID_DEC_CHARREF:
357 errmsg = "CharRef: invalid decimal value";
359 case XML_ERR_INVALID_CHARREF:
360 errmsg = "CharRef: invalid value";
362 case XML_ERR_INTERNAL_ERROR:
363 errmsg = "internal error";
365 case XML_ERR_PEREF_AT_EOF:
366 errmsg = "PEReference at end of document";
368 case XML_ERR_PEREF_IN_PROLOG:
369 errmsg = "PEReference in prolog";
371 case XML_ERR_PEREF_IN_EPILOG:
372 errmsg = "PEReference in epilog";
374 case XML_ERR_PEREF_NO_NAME:
375 errmsg = "PEReference: no name";
377 case XML_ERR_PEREF_SEMICOL_MISSING:
378 errmsg = "PEReference: expecting ';'";
380 case XML_ERR_ENTITY_LOOP:
381 errmsg = "Detected an entity reference loop";
383 case XML_ERR_ENTITY_NOT_STARTED:
384 errmsg = "EntityValue: \" or ' expected";
386 case XML_ERR_ENTITY_PE_INTERNAL:
387 errmsg = "PEReferences forbidden in internal subset";
389 case XML_ERR_ENTITY_NOT_FINISHED:
390 errmsg = "EntityValue: \" or ' expected";
392 case XML_ERR_ATTRIBUTE_NOT_STARTED:
393 errmsg = "AttValue: \" or ' expected";
395 case XML_ERR_LT_IN_ATTRIBUTE:
396 errmsg = "Unescaped '<' not allowed in attributes values";
398 case XML_ERR_LITERAL_NOT_STARTED:
399 errmsg = "SystemLiteral \" or ' expected";
401 case XML_ERR_LITERAL_NOT_FINISHED:
402 errmsg = "Unfinished System or Public ID \" or ' expected";
404 case XML_ERR_MISPLACED_CDATA_END:
405 errmsg = "Sequence ']]>' not allowed in content";
407 case XML_ERR_URI_REQUIRED:
408 errmsg = "SYSTEM or PUBLIC, the URI is missing";
410 case XML_ERR_PUBID_REQUIRED:
411 errmsg = "PUBLIC, the Public Identifier is missing";
413 case XML_ERR_HYPHEN_IN_COMMENT:
414 errmsg = "Comment must not contain '--' (double-hyphen)";
416 case XML_ERR_PI_NOT_STARTED:
417 errmsg = "xmlParsePI : no target name";
419 case XML_ERR_RESERVED_XML_NAME:
420 errmsg = "Invalid PI name";
422 case XML_ERR_NOTATION_NOT_STARTED:
423 errmsg = "NOTATION: Name expected here";
425 case XML_ERR_NOTATION_NOT_FINISHED:
426 errmsg = "'>' required to close NOTATION declaration";
428 case XML_ERR_VALUE_REQUIRED:
429 errmsg = "Entity value required";
431 case XML_ERR_URI_FRAGMENT:
432 errmsg = "Fragment not allowed";
434 case XML_ERR_ATTLIST_NOT_STARTED:
435 errmsg = "'(' required to start ATTLIST enumeration";
437 case XML_ERR_NMTOKEN_REQUIRED:
438 errmsg = "NmToken expected in ATTLIST enumeration";
440 case XML_ERR_ATTLIST_NOT_FINISHED:
441 errmsg = "')' required to finish ATTLIST enumeration";
443 case XML_ERR_MIXED_NOT_STARTED:
444 errmsg = "MixedContentDecl : '|' or ')*' expected";
446 case XML_ERR_PCDATA_REQUIRED:
447 errmsg = "MixedContentDecl : '#PCDATA' expected";
449 case XML_ERR_ELEMCONTENT_NOT_STARTED:
450 errmsg = "ContentDecl : Name or '(' expected";
452 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
453 errmsg = "ContentDecl : ',' '|' or ')' expected";
455 case XML_ERR_PEREF_IN_INT_SUBSET:
457 "PEReference: forbidden within markup decl in internal subset";
459 case XML_ERR_GT_REQUIRED:
460 errmsg = "expected '>'";
462 case XML_ERR_CONDSEC_INVALID:
463 errmsg = "XML conditional section '[' expected";
465 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
466 errmsg = "Content error in the external subset";
468 case XML_ERR_CONDSEC_INVALID_KEYWORD:
470 "conditional section INCLUDE or IGNORE keyword expected";
472 case XML_ERR_CONDSEC_NOT_FINISHED:
473 errmsg = "XML conditional section not closed";
475 case XML_ERR_XMLDECL_NOT_STARTED:
476 errmsg = "Text declaration '<?xml' required";
478 case XML_ERR_XMLDECL_NOT_FINISHED:
479 errmsg = "parsing XML declaration: '?>' expected";
481 case XML_ERR_EXT_ENTITY_STANDALONE:
482 errmsg = "external parsed entities cannot be standalone";
484 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
485 errmsg = "EntityRef: expecting ';'";
487 case XML_ERR_DOCTYPE_NOT_FINISHED:
488 errmsg = "DOCTYPE improperly terminated";
490 case XML_ERR_LTSLASH_REQUIRED:
491 errmsg = "EndTag: '</' not found";
493 case XML_ERR_EQUAL_REQUIRED:
494 errmsg = "expected '='";
496 case XML_ERR_STRING_NOT_CLOSED:
497 errmsg = "String not closed expecting \" or '";
499 case XML_ERR_STRING_NOT_STARTED:
500 errmsg = "String not started expecting ' or \"";
502 case XML_ERR_ENCODING_NAME:
503 errmsg = "Invalid XML encoding name";
505 case XML_ERR_STANDALONE_VALUE:
506 errmsg = "standalone accepts only 'yes' or 'no'";
508 case XML_ERR_DOCUMENT_EMPTY:
509 errmsg = "Document is empty";
511 case XML_ERR_DOCUMENT_END:
512 errmsg = "Extra content at the end of the document";
514 case XML_ERR_NOT_WELL_BALANCED:
515 errmsg = "chunk is not well balanced";
517 case XML_ERR_EXTRA_CONTENT:
518 errmsg = "extra content at the end of well balanced chunk";
520 case XML_ERR_VERSION_MISSING:
521 errmsg = "Malformed declaration expecting version";
523 case XML_ERR_NAME_TOO_LONG:
524 errmsg = "Name too long use XML_PARSE_HUGE option";
532 errmsg = "Unregistered error message";
535 snprintf(errstr, 128, "%s\n", errmsg);
537 snprintf(errstr, 128, "%s: %%s\n", errmsg);
540 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
541 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
544 ctxt->wellFormed = 0;
545 if (ctxt->recovery == 0)
546 ctxt->disableSAX = 1;
552 * @ctxt: an XML parser context
553 * @error: the error number
554 * @msg: the error message
556 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
559 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
562 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
563 (ctxt->instate == XML_PARSER_EOF))
567 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
568 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
570 ctxt->wellFormed = 0;
571 if (ctxt->recovery == 0)
572 ctxt->disableSAX = 1;
578 * @ctxt: an XML parser context
579 * @error: the error number
580 * @msg: the error message
587 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
588 const char *msg, const xmlChar *str1, const xmlChar *str2)
590 xmlStructuredErrorFunc schannel = NULL;
592 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
593 (ctxt->instate == XML_PARSER_EOF))
595 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
596 (ctxt->sax->initialized == XML_SAX2_MAGIC))
597 schannel = ctxt->sax->serror;
599 __xmlRaiseError(schannel,
600 (ctxt->sax) ? ctxt->sax->warning : NULL,
602 ctxt, NULL, XML_FROM_PARSER, error,
603 XML_ERR_WARNING, NULL, 0,
604 (const char *) str1, (const char *) str2, NULL, 0, 0,
605 msg, (const char *) str1, (const char *) str2);
607 __xmlRaiseError(schannel, NULL, NULL,
608 ctxt, NULL, XML_FROM_PARSER, error,
609 XML_ERR_WARNING, NULL, 0,
610 (const char *) str1, (const char *) str2, NULL, 0, 0,
611 msg, (const char *) str1, (const char *) str2);
617 * @ctxt: an XML parser context
618 * @error: the error number
619 * @msg: the error message
622 * Handle a validity error.
625 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
626 const char *msg, const xmlChar *str1, const xmlChar *str2)
628 xmlStructuredErrorFunc schannel = NULL;
630 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
631 (ctxt->instate == XML_PARSER_EOF))
635 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
636 schannel = ctxt->sax->serror;
639 __xmlRaiseError(schannel,
640 ctxt->vctxt.error, ctxt->vctxt.userData,
641 ctxt, NULL, XML_FROM_DTD, error,
642 XML_ERR_ERROR, NULL, 0, (const char *) str1,
643 (const char *) str2, NULL, 0, 0,
644 msg, (const char *) str1, (const char *) str2);
647 __xmlRaiseError(schannel, NULL, NULL,
648 ctxt, NULL, XML_FROM_DTD, error,
649 XML_ERR_ERROR, NULL, 0, (const char *) str1,
650 (const char *) str2, NULL, 0, 0,
651 msg, (const char *) str1, (const char *) str2);
657 * @ctxt: an XML parser context
658 * @error: the error number
659 * @msg: the error message
660 * @val: an integer value
662 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
665 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
666 const char *msg, int val)
668 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
669 (ctxt->instate == XML_PARSER_EOF))
673 __xmlRaiseError(NULL, NULL, NULL,
674 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
675 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
677 ctxt->wellFormed = 0;
678 if (ctxt->recovery == 0)
679 ctxt->disableSAX = 1;
684 * xmlFatalErrMsgStrIntStr:
685 * @ctxt: an XML parser context
686 * @error: the error number
687 * @msg: the error message
688 * @str1: an string info
689 * @val: an integer value
690 * @str2: an string info
692 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
695 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
696 const char *msg, const xmlChar *str1, int val,
699 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
700 (ctxt->instate == XML_PARSER_EOF))
704 __xmlRaiseError(NULL, NULL, NULL,
705 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
706 NULL, 0, (const char *) str1, (const char *) str2,
707 NULL, val, 0, msg, str1, val, str2);
709 ctxt->wellFormed = 0;
710 if (ctxt->recovery == 0)
711 ctxt->disableSAX = 1;
717 * @ctxt: an XML parser context
718 * @error: the error number
719 * @msg: the error message
720 * @val: a string value
722 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
725 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
726 const char *msg, const xmlChar * val)
728 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
729 (ctxt->instate == XML_PARSER_EOF))
733 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
734 XML_FROM_PARSER, error, XML_ERR_FATAL,
735 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
738 ctxt->wellFormed = 0;
739 if (ctxt->recovery == 0)
740 ctxt->disableSAX = 1;
746 * @ctxt: an XML parser context
747 * @error: the error number
748 * @msg: the error message
749 * @val: a string value
751 * Handle a non fatal parser error
754 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
755 const char *msg, const xmlChar * val)
757 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
758 (ctxt->instate == XML_PARSER_EOF))
762 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
763 XML_FROM_PARSER, error, XML_ERR_ERROR,
764 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
770 * @ctxt: an XML parser context
771 * @error: the error number
773 * @info1: extra information string
774 * @info2: extra information string
776 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
779 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
781 const xmlChar * info1, const xmlChar * info2,
782 const xmlChar * info3)
784 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
785 (ctxt->instate == XML_PARSER_EOF))
789 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
790 XML_ERR_ERROR, NULL, 0, (const char *) info1,
791 (const char *) info2, (const char *) info3, 0, 0, msg,
792 info1, info2, info3);
794 ctxt->nsWellFormed = 0;
799 * @ctxt: an XML parser context
800 * @error: the error number
802 * @info1: extra information string
803 * @info2: extra information string
805 * Handle a namespace warning error
808 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
810 const xmlChar * info1, const xmlChar * info2,
811 const xmlChar * info3)
813 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
814 (ctxt->instate == XML_PARSER_EOF))
816 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
817 XML_ERR_WARNING, NULL, 0, (const char *) info1,
818 (const char *) info2, (const char *) info3, 0, 0, msg,
819 info1, info2, info3);
822 /************************************************************************
824 * Library wide options *
826 ************************************************************************/
830 * @feature: the feature to be examined
832 * Examines if the library has been compiled with a given feature.
834 * Returns a non-zero value if the feature exist, otherwise zero.
835 * Returns zero (0) if the feature does not exist or an unknown
836 * unknown feature is requested, non-zero otherwise.
839 xmlHasFeature(xmlFeature feature)
842 case XML_WITH_THREAD:
843 #ifdef LIBXML_THREAD_ENABLED
849 #ifdef LIBXML_TREE_ENABLED
854 case XML_WITH_OUTPUT:
855 #ifdef LIBXML_OUTPUT_ENABLED
861 #ifdef LIBXML_PUSH_ENABLED
866 case XML_WITH_READER:
867 #ifdef LIBXML_READER_ENABLED
872 case XML_WITH_PATTERN:
873 #ifdef LIBXML_PATTERN_ENABLED
878 case XML_WITH_WRITER:
879 #ifdef LIBXML_WRITER_ENABLED
885 #ifdef LIBXML_SAX1_ENABLED
891 #ifdef LIBXML_FTP_ENABLED
897 #ifdef LIBXML_HTTP_ENABLED
903 #ifdef LIBXML_VALID_ENABLED
909 #ifdef LIBXML_HTML_ENABLED
914 case XML_WITH_LEGACY:
915 #ifdef LIBXML_LEGACY_ENABLED
921 #ifdef LIBXML_C14N_ENABLED
926 case XML_WITH_CATALOG:
927 #ifdef LIBXML_CATALOG_ENABLED
933 #ifdef LIBXML_XPATH_ENABLED
939 #ifdef LIBXML_XPTR_ENABLED
944 case XML_WITH_XINCLUDE:
945 #ifdef LIBXML_XINCLUDE_ENABLED
951 #ifdef LIBXML_ICONV_ENABLED
956 case XML_WITH_ISO8859X:
957 #ifdef LIBXML_ISO8859X_ENABLED
962 case XML_WITH_UNICODE:
963 #ifdef LIBXML_UNICODE_ENABLED
968 case XML_WITH_REGEXP:
969 #ifdef LIBXML_REGEXP_ENABLED
974 case XML_WITH_AUTOMATA:
975 #ifdef LIBXML_AUTOMATA_ENABLED
981 #ifdef LIBXML_EXPR_ENABLED
986 case XML_WITH_SCHEMAS:
987 #ifdef LIBXML_SCHEMAS_ENABLED
992 case XML_WITH_SCHEMATRON:
993 #ifdef LIBXML_SCHEMATRON_ENABLED
998 case XML_WITH_MODULES:
999 #ifdef LIBXML_MODULES_ENABLED
1004 case XML_WITH_DEBUG:
1005 #ifdef LIBXML_DEBUG_ENABLED
1010 case XML_WITH_DEBUG_MEM:
1011 #ifdef DEBUG_MEMORY_LOCATION
1016 case XML_WITH_DEBUG_RUN:
1017 #ifdef LIBXML_DEBUG_RUNTIME
1023 #ifdef LIBXML_ZLIB_ENABLED
1029 #ifdef LIBXML_LZMA_ENABLED
1035 #ifdef LIBXML_ICU_ENABLED
1046 /************************************************************************
1048 * SAX2 defaulted attributes handling *
1050 ************************************************************************/
1054 * @ctxt: an XML parser context
1056 * Do the SAX2 detection and specific intialization
1059 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1060 if (ctxt == NULL) return;
1061 #ifdef LIBXML_SAX1_ENABLED
1062 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1063 ((ctxt->sax->startElementNs != NULL) ||
1064 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1067 #endif /* LIBXML_SAX1_ENABLED */
1069 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1070 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1071 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1072 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1073 (ctxt->str_xml_ns == NULL)) {
1074 xmlErrMemory(ctxt, NULL);
1078 typedef struct _xmlDefAttrs xmlDefAttrs;
1079 typedef xmlDefAttrs *xmlDefAttrsPtr;
1080 struct _xmlDefAttrs {
1081 int nbAttrs; /* number of defaulted attributes on that element */
1082 int maxAttrs; /* the size of the array */
1083 const xmlChar *values[5]; /* array of localname/prefix/values/external */
1087 * xmlAttrNormalizeSpace:
1088 * @src: the source string
1089 * @dst: the target string
1091 * Normalize the space in non CDATA attribute values:
1092 * If the attribute type is not CDATA, then the XML processor MUST further
1093 * process the normalized attribute value by discarding any leading and
1094 * trailing space (#x20) characters, and by replacing sequences of space
1095 * (#x20) characters by a single space (#x20) character.
1096 * Note that the size of dst need to be at least src, and if one doesn't need
1097 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1098 * passing src as dst is just fine.
1100 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1104 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1106 if ((src == NULL) || (dst == NULL))
1109 while (*src == 0x20) src++;
1112 while (*src == 0x20) src++;
1126 * xmlAttrNormalizeSpace2:
1127 * @src: the source string
1129 * Normalize the space in non CDATA attribute values, a slightly more complex
1130 * front end to avoid allocation problems when running on attribute values
1131 * coming from the input.
1133 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1136 static const xmlChar *
1137 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1140 int remove_head = 0;
1141 int need_realloc = 0;
1144 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1151 while (*cur == 0x20) {
1158 if ((*cur == 0x20) || (*cur == 0)) {
1168 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1170 xmlErrMemory(ctxt, NULL);
1173 xmlAttrNormalizeSpace(ret, ret);
1174 *len = (int) strlen((const char *)ret);
1176 } else if (remove_head) {
1177 *len -= remove_head;
1178 memmove(src, src + remove_head, 1 + *len);
1186 * @ctxt: an XML parser context
1187 * @fullname: the element fullname
1188 * @fullattr: the attribute fullname
1189 * @value: the attribute value
1191 * Add a defaulted attribute for an element
1194 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1195 const xmlChar *fullname,
1196 const xmlChar *fullattr,
1197 const xmlChar *value) {
1198 xmlDefAttrsPtr defaults;
1200 const xmlChar *name;
1201 const xmlChar *prefix;
1204 * Allows to detect attribute redefinitions
1206 if (ctxt->attsSpecial != NULL) {
1207 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1211 if (ctxt->attsDefault == NULL) {
1212 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1213 if (ctxt->attsDefault == NULL)
1218 * split the element name into prefix:localname , the string found
1219 * are within the DTD and then not associated to namespace names.
1221 name = xmlSplitQName3(fullname, &len);
1223 name = xmlDictLookup(ctxt->dict, fullname, -1);
1226 name = xmlDictLookup(ctxt->dict, name, -1);
1227 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1231 * make sure there is some storage
1233 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1234 if (defaults == NULL) {
1235 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1236 (4 * 5) * sizeof(const xmlChar *));
1237 if (defaults == NULL)
1239 defaults->nbAttrs = 0;
1240 defaults->maxAttrs = 4;
1241 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1242 defaults, NULL) < 0) {
1246 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1247 xmlDefAttrsPtr temp;
1249 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1250 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1254 defaults->maxAttrs *= 2;
1255 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1256 defaults, NULL) < 0) {
1263 * Split the element name into prefix:localname , the string found
1264 * are within the DTD and hen not associated to namespace names.
1266 name = xmlSplitQName3(fullattr, &len);
1268 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1271 name = xmlDictLookup(ctxt->dict, name, -1);
1272 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1275 defaults->values[5 * defaults->nbAttrs] = name;
1276 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1277 /* intern the string and precompute the end */
1278 len = xmlStrlen(value);
1279 value = xmlDictLookup(ctxt->dict, value, len);
1280 defaults->values[5 * defaults->nbAttrs + 2] = value;
1281 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1283 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1285 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1286 defaults->nbAttrs++;
1291 xmlErrMemory(ctxt, NULL);
1296 * xmlAddSpecialAttr:
1297 * @ctxt: an XML parser context
1298 * @fullname: the element fullname
1299 * @fullattr: the attribute fullname
1300 * @type: the attribute type
1302 * Register this attribute type
1305 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1306 const xmlChar *fullname,
1307 const xmlChar *fullattr,
1310 if (ctxt->attsSpecial == NULL) {
1311 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1312 if (ctxt->attsSpecial == NULL)
1316 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1319 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1320 (void *) (long) type);
1324 xmlErrMemory(ctxt, NULL);
1329 * xmlCleanSpecialAttrCallback:
1331 * Removes CDATA attributes from the special attribute table
1334 xmlCleanSpecialAttrCallback(void *payload, void *data,
1335 const xmlChar *fullname, const xmlChar *fullattr,
1336 const xmlChar *unused ATTRIBUTE_UNUSED) {
1337 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1339 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1340 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1345 * xmlCleanSpecialAttr:
1346 * @ctxt: an XML parser context
1348 * Trim the list of attributes defined to remove all those of type
1349 * CDATA as they are not special. This call should be done when finishing
1350 * to parse the DTD and before starting to parse the document root.
1353 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1355 if (ctxt->attsSpecial == NULL)
1358 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1360 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1361 xmlHashFree(ctxt->attsSpecial, NULL);
1362 ctxt->attsSpecial = NULL;
1368 * xmlCheckLanguageID:
1369 * @lang: pointer to the string value
1371 * Checks that the value conforms to the LanguageID production:
1373 * NOTE: this is somewhat deprecated, those productions were removed from
1374 * the XML Second edition.
1376 * [33] LanguageID ::= Langcode ('-' Subcode)*
1377 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1378 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1379 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1380 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1381 * [38] Subcode ::= ([a-z] | [A-Z])+
1383 * The current REC reference the sucessors of RFC 1766, currently 5646
1385 * http://www.rfc-editor.org/rfc/rfc5646.txt
1386 * langtag = language
1392 * language = 2*3ALPHA ; shortest ISO 639 code
1393 * ["-" extlang] ; sometimes followed by
1394 * ; extended language subtags
1395 * / 4ALPHA ; or reserved for future use
1396 * / 5*8ALPHA ; or registered language subtag
1398 * extlang = 3ALPHA ; selected ISO 639 codes
1399 * *2("-" 3ALPHA) ; permanently reserved
1401 * script = 4ALPHA ; ISO 15924 code
1403 * region = 2ALPHA ; ISO 3166-1 code
1404 * / 3DIGIT ; UN M.49 code
1406 * variant = 5*8alphanum ; registered variants
1407 * / (DIGIT 3alphanum)
1409 * extension = singleton 1*("-" (2*8alphanum))
1411 * ; Single alphanumerics
1412 * ; "x" reserved for private use
1413 * singleton = DIGIT ; 0 - 9
1419 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1420 * The parser below doesn't try to cope with extension or privateuse
1421 * that could be added but that's not interoperable anyway
1423 * Returns 1 if correct 0 otherwise
1426 xmlCheckLanguageID(const xmlChar * lang)
1428 const xmlChar *cur = lang, *nxt;
1432 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1433 ((cur[0] == 'I') && (cur[1] == '-')) ||
1434 ((cur[0] == 'x') && (cur[1] == '-')) ||
1435 ((cur[0] == 'X') && (cur[1] == '-'))) {
1437 * Still allow IANA code and user code which were coming
1438 * from the previous version of the XML-1.0 specification
1439 * it's deprecated but we should not fail
1442 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1443 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1445 return(cur[0] == 0);
1448 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1449 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1451 if (nxt - cur >= 4) {
1455 if ((nxt - cur > 8) || (nxt[0] != 0))
1461 /* we got an ISO 639 code */
1469 /* now we can have extlang or script or region or variant */
1470 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1473 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1474 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1480 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1484 /* we parsed an extlang */
1492 /* now we can have script or region or variant */
1493 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1496 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1497 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1501 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1505 /* we parsed a script */
1514 /* now we can have region or variant */
1515 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1518 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1519 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1522 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1526 /* we parsed a region */
1535 /* now we can just have a variant */
1536 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1537 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1540 if ((nxt - cur < 5) || (nxt - cur > 8))
1543 /* we parsed a variant */
1549 /* extensions and private use subtags not checked */
1553 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1554 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1561 /************************************************************************
1563 * Parser stacks related functions and macros *
1565 ************************************************************************/
1567 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1568 const xmlChar ** str);
1573 * @ctxt: an XML parser context
1574 * @prefix: the namespace prefix or NULL
1575 * @URL: the namespace name
1577 * Pushes a new parser namespace on top of the ns stack
1579 * Returns -1 in case of error, -2 if the namespace should be discarded
1580 * and the index in the stack otherwise.
1583 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1585 if (ctxt->options & XML_PARSE_NSCLEAN) {
1587 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1588 if (ctxt->nsTab[i] == prefix) {
1590 if (ctxt->nsTab[i + 1] == URL)
1592 /* out of scope keep it */
1597 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1600 ctxt->nsTab = (const xmlChar **)
1601 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1602 if (ctxt->nsTab == NULL) {
1603 xmlErrMemory(ctxt, NULL);
1607 } else if (ctxt->nsNr >= ctxt->nsMax) {
1608 const xmlChar ** tmp;
1610 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1611 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1613 xmlErrMemory(ctxt, NULL);
1619 ctxt->nsTab[ctxt->nsNr++] = prefix;
1620 ctxt->nsTab[ctxt->nsNr++] = URL;
1621 return (ctxt->nsNr);
1625 * @ctxt: an XML parser context
1626 * @nr: the number to pop
1628 * Pops the top @nr parser prefix/namespace from the ns stack
1630 * Returns the number of namespaces removed
1633 nsPop(xmlParserCtxtPtr ctxt, int nr)
1637 if (ctxt->nsTab == NULL) return(0);
1638 if (ctxt->nsNr < nr) {
1639 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1642 if (ctxt->nsNr <= 0)
1645 for (i = 0;i < nr;i++) {
1647 ctxt->nsTab[ctxt->nsNr] = NULL;
1654 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1655 const xmlChar **atts;
1659 if (ctxt->atts == NULL) {
1660 maxatts = 55; /* allow for 10 attrs by default */
1661 atts = (const xmlChar **)
1662 xmlMalloc(maxatts * sizeof(xmlChar *));
1663 if (atts == NULL) goto mem_error;
1665 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1666 if (attallocs == NULL) goto mem_error;
1667 ctxt->attallocs = attallocs;
1668 ctxt->maxatts = maxatts;
1669 } else if (nr + 5 > ctxt->maxatts) {
1670 maxatts = (nr + 5) * 2;
1671 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1672 maxatts * sizeof(const xmlChar *));
1673 if (atts == NULL) goto mem_error;
1675 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1676 (maxatts / 5) * sizeof(int));
1677 if (attallocs == NULL) goto mem_error;
1678 ctxt->attallocs = attallocs;
1679 ctxt->maxatts = maxatts;
1681 return(ctxt->maxatts);
1683 xmlErrMemory(ctxt, NULL);
1689 * @ctxt: an XML parser context
1690 * @value: the parser input
1692 * Pushes a new parser input on top of the input stack
1694 * Returns -1 in case of error, the index in the stack otherwise
1697 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1699 if ((ctxt == NULL) || (value == NULL))
1701 if (ctxt->inputNr >= ctxt->inputMax) {
1702 ctxt->inputMax *= 2;
1704 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1706 sizeof(ctxt->inputTab[0]));
1707 if (ctxt->inputTab == NULL) {
1708 xmlErrMemory(ctxt, NULL);
1709 xmlFreeInputStream(value);
1710 ctxt->inputMax /= 2;
1715 ctxt->inputTab[ctxt->inputNr] = value;
1716 ctxt->input = value;
1717 return (ctxt->inputNr++);
1721 * @ctxt: an XML parser context
1723 * Pops the top parser input from the input stack
1725 * Returns the input just removed
1728 inputPop(xmlParserCtxtPtr ctxt)
1730 xmlParserInputPtr ret;
1734 if (ctxt->inputNr <= 0)
1737 if (ctxt->inputNr > 0)
1738 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1741 ret = ctxt->inputTab[ctxt->inputNr];
1742 ctxt->inputTab[ctxt->inputNr] = NULL;
1747 * @ctxt: an XML parser context
1748 * @value: the element node
1750 * Pushes a new element node on top of the node stack
1752 * Returns -1 in case of error, the index in the stack otherwise
1755 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1757 if (ctxt == NULL) return(0);
1758 if (ctxt->nodeNr >= ctxt->nodeMax) {
1761 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1763 sizeof(ctxt->nodeTab[0]));
1765 xmlErrMemory(ctxt, NULL);
1768 ctxt->nodeTab = tmp;
1771 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1772 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1773 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1774 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1776 xmlHaltParser(ctxt);
1779 ctxt->nodeTab[ctxt->nodeNr] = value;
1781 return (ctxt->nodeNr++);
1786 * @ctxt: an XML parser context
1788 * Pops the top element node from the node stack
1790 * Returns the node just removed
1793 nodePop(xmlParserCtxtPtr ctxt)
1797 if (ctxt == NULL) return(NULL);
1798 if (ctxt->nodeNr <= 0)
1801 if (ctxt->nodeNr > 0)
1802 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1805 ret = ctxt->nodeTab[ctxt->nodeNr];
1806 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1810 #ifdef LIBXML_PUSH_ENABLED
1813 * @ctxt: an XML parser context
1814 * @value: the element name
1815 * @prefix: the element prefix
1816 * @URI: the element namespace name
1818 * Pushes a new element name/prefix/URL on top of the name stack
1820 * Returns -1 in case of error, the index in the stack otherwise
1823 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1824 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1826 if (ctxt->nameNr >= ctxt->nameMax) {
1827 const xmlChar * *tmp;
1830 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1832 sizeof(ctxt->nameTab[0]));
1837 ctxt->nameTab = tmp;
1838 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1840 sizeof(ctxt->pushTab[0]));
1845 ctxt->pushTab = tmp2;
1847 ctxt->nameTab[ctxt->nameNr] = value;
1849 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1850 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1851 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1852 return (ctxt->nameNr++);
1854 xmlErrMemory(ctxt, NULL);
1859 * @ctxt: an XML parser context
1861 * Pops the top element/prefix/URI name from the name stack
1863 * Returns the name just removed
1865 static const xmlChar *
1866 nameNsPop(xmlParserCtxtPtr ctxt)
1870 if (ctxt->nameNr <= 0)
1873 if (ctxt->nameNr > 0)
1874 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1877 ret = ctxt->nameTab[ctxt->nameNr];
1878 ctxt->nameTab[ctxt->nameNr] = NULL;
1881 #endif /* LIBXML_PUSH_ENABLED */
1885 * @ctxt: an XML parser context
1886 * @value: the element name
1888 * Pushes a new element name on top of the name stack
1890 * Returns -1 in case of error, the index in the stack otherwise
1893 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1895 if (ctxt == NULL) return (-1);
1897 if (ctxt->nameNr >= ctxt->nameMax) {
1898 const xmlChar * *tmp;
1899 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1901 sizeof(ctxt->nameTab[0]));
1905 ctxt->nameTab = tmp;
1908 ctxt->nameTab[ctxt->nameNr] = value;
1910 return (ctxt->nameNr++);
1912 xmlErrMemory(ctxt, NULL);
1917 * @ctxt: an XML parser context
1919 * Pops the top element name from the name stack
1921 * Returns the name just removed
1924 namePop(xmlParserCtxtPtr ctxt)
1928 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1931 if (ctxt->nameNr > 0)
1932 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1935 ret = ctxt->nameTab[ctxt->nameNr];
1936 ctxt->nameTab[ctxt->nameNr] = NULL;
1940 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1941 if (ctxt->spaceNr >= ctxt->spaceMax) {
1944 ctxt->spaceMax *= 2;
1945 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1946 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1948 xmlErrMemory(ctxt, NULL);
1952 ctxt->spaceTab = tmp;
1954 ctxt->spaceTab[ctxt->spaceNr] = val;
1955 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1956 return(ctxt->spaceNr++);
1959 static int spacePop(xmlParserCtxtPtr ctxt) {
1961 if (ctxt->spaceNr <= 0) return(0);
1963 if (ctxt->spaceNr > 0)
1964 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1966 ctxt->space = &ctxt->spaceTab[0];
1967 ret = ctxt->spaceTab[ctxt->spaceNr];
1968 ctxt->spaceTab[ctxt->spaceNr] = -1;
1973 * Macros for accessing the content. Those should be used only by the parser,
1976 * Dirty macros, i.e. one often need to make assumption on the context to
1979 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1980 * To be used with extreme caution since operations consuming
1981 * characters may move the input buffer to a different location !
1982 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1983 * This should be used internally by the parser
1984 * only to compare to ASCII values otherwise it would break when
1985 * running with UTF-8 encoding.
1986 * RAW same as CUR but in the input buffer, bypass any token
1987 * extraction that may have been done
1988 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1989 * to compare on ASCII based substring.
1990 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1991 * strings without newlines within the parser.
1992 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1993 * defined char within the parser.
1994 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1996 * NEXT Skip to the next character, this does the proper decoding
1997 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1998 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1999 * CUR_CHAR(l) returns the current unicode character (int), set l
2000 * to the number of xmlChars used for the encoding [0-5].
2001 * CUR_SCHAR same but operate on a string instead of the context
2002 * COPY_BUF copy the current unicode char to the target buffer, increment
2004 * GROW, SHRINK handling of input buffers
2007 #define RAW (*ctxt->input->cur)
2008 #define CUR (*ctxt->input->cur)
2009 #define NXT(val) ctxt->input->cur[(val)]
2010 #define CUR_PTR ctxt->input->cur
2012 #define CMP4( s, c1, c2, c3, c4 ) \
2013 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2014 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2015 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2016 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2017 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2018 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2019 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2020 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2021 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2022 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2023 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2024 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2025 ((unsigned char *) s)[ 8 ] == c9 )
2026 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2027 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2028 ((unsigned char *) s)[ 9 ] == c10 )
2030 #define SKIP(val) do { \
2031 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2032 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2033 if ((*ctxt->input->cur == 0) && \
2034 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2035 xmlPopInput(ctxt); \
2038 #define SKIPL(val) do { \
2040 for(skipl=0; skipl<val; skipl++) { \
2041 if (*(ctxt->input->cur) == '\n') { \
2042 ctxt->input->line++; ctxt->input->col = 1; \
2043 } else ctxt->input->col++; \
2045 ctxt->input->cur++; \
2047 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2048 if ((*ctxt->input->cur == 0) && \
2049 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2050 xmlPopInput(ctxt); \
2053 #define SHRINK if ((ctxt->progressive == 0) && \
2054 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2055 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2058 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2059 xmlParserInputShrink(ctxt->input);
2060 if ((*ctxt->input->cur == 0) &&
2061 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2065 #define GROW if ((ctxt->progressive == 0) && \
2066 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2069 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2070 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2071 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2073 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2074 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2075 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2076 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2077 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2078 xmlHaltParser(ctxt);
2081 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2082 if ((ctxt->input->cur > ctxt->input->end) ||
2083 (ctxt->input->cur < ctxt->input->base)) {
2084 xmlHaltParser(ctxt);
2085 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2088 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2089 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2093 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2095 #define NEXT xmlNextChar(ctxt)
2098 ctxt->input->col++; \
2099 ctxt->input->cur++; \
2101 if (*ctxt->input->cur == 0) \
2102 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2105 #define NEXTL(l) do { \
2106 if (*(ctxt->input->cur) == '\n') { \
2107 ctxt->input->line++; ctxt->input->col = 1; \
2108 } else ctxt->input->col++; \
2109 ctxt->input->cur += l; \
2110 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2113 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2114 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116 #define COPY_BUF(l,b,i,v) \
2117 if (l == 1) b[i++] = (xmlChar) v; \
2118 else i += xmlCopyCharMultiByte(&b[i],v)
2121 * xmlSkipBlankChars:
2122 * @ctxt: the XML parser context
2124 * skip all blanks character found at that point in the input streams.
2125 * It pops up finished entities in the process if allowable at that point.
2127 * Returns the number of space chars skipped
2131 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2135 * It's Okay to use CUR/NEXT here since all the blanks are on
2138 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2141 * if we are in the document content, go really fast
2143 cur = ctxt->input->cur;
2144 while (IS_BLANK_CH(*cur)) {
2146 ctxt->input->line++; ctxt->input->col = 1;
2153 ctxt->input->cur = cur;
2154 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2155 cur = ctxt->input->cur;
2158 ctxt->input->cur = cur;
2163 while ((IS_BLANK_CH(cur) && /* CHECKED tstblanks.xml */
2164 (ctxt->instate != XML_PARSER_EOF))) {
2169 while ((cur == 0) && (ctxt->inputNr > 1) &&
2170 (ctxt->instate != XML_PARSER_COMMENT)) {
2175 * Need to handle support of entities branching here
2177 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2178 } while ((IS_BLANK(cur)) && /* CHECKED tstblanks.xml */
2179 (ctxt->instate != XML_PARSER_EOF));
2184 /************************************************************************
2186 * Commodity functions to handle entities *
2188 ************************************************************************/
2192 * @ctxt: an XML parser context
2194 * xmlPopInput: the current input pointed by ctxt->input came to an end
2195 * pop it and return the next char.
2197 * Returns the current xmlChar in the parser context
2200 xmlPopInput(xmlParserCtxtPtr ctxt) {
2201 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2202 if (xmlParserDebugEntities)
2203 xmlGenericError(xmlGenericErrorContext,
2204 "Popping input %d\n", ctxt->inputNr);
2205 xmlFreeInputStream(inputPop(ctxt));
2206 if ((*ctxt->input->cur == 0) &&
2207 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2208 return(xmlPopInput(ctxt));
2214 * @ctxt: an XML parser context
2215 * @input: an XML parser input fragment (entity, XML fragment ...).
2217 * xmlPushInput: switch to a new input stream which is stacked on top
2218 * of the previous one(s).
2219 * Returns -1 in case of error or the index in the input stack
2222 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2224 if (input == NULL) return(-1);
2226 if (xmlParserDebugEntities) {
2227 if ((ctxt->input != NULL) && (ctxt->input->filename))
2228 xmlGenericError(xmlGenericErrorContext,
2229 "%s(%d): ", ctxt->input->filename,
2231 xmlGenericError(xmlGenericErrorContext,
2232 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2234 ret = inputPush(ctxt, input);
2235 if (ctxt->instate == XML_PARSER_EOF)
2243 * @ctxt: an XML parser context
2245 * parse Reference declarations
2247 * [66] CharRef ::= '&#' [0-9]+ ';' |
2248 * '&#x' [0-9a-fA-F]+ ';'
2250 * [ WFC: Legal Character ]
2251 * Characters referred to using character references must match the
2252 * production for Char.
2254 * Returns the value parsed (as an int), 0 in case of error
2257 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2258 unsigned int val = 0;
2260 unsigned int outofrange = 0;
2263 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2265 if ((RAW == '&') && (NXT(1) == '#') &&
2269 while (RAW != ';') { /* loop blocked by count */
2273 if (ctxt->instate == XML_PARSER_EOF)
2276 if ((RAW >= '0') && (RAW <= '9'))
2277 val = val * 16 + (CUR - '0');
2278 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2279 val = val * 16 + (CUR - 'a') + 10;
2280 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2281 val = val * 16 + (CUR - 'A') + 10;
2283 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2294 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2299 } else if ((RAW == '&') && (NXT(1) == '#')) {
2302 while (RAW != ';') { /* loop blocked by count */
2306 if (ctxt->instate == XML_PARSER_EOF)
2309 if ((RAW >= '0') && (RAW <= '9'))
2310 val = val * 10 + (CUR - '0');
2312 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2323 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2329 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2333 * [ WFC: Legal Character ]
2334 * Characters referred to using character references must match the
2335 * production for Char.
2337 if ((IS_CHAR(val) && (outofrange == 0))) {
2340 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2341 "xmlParseCharRef: invalid xmlChar value %d\n",
2348 * xmlParseStringCharRef:
2349 * @ctxt: an XML parser context
2350 * @str: a pointer to an index in the string
2352 * parse Reference declarations, variant parsing from a string rather
2353 * than an an input flow.
2355 * [66] CharRef ::= '&#' [0-9]+ ';' |
2356 * '&#x' [0-9a-fA-F]+ ';'
2358 * [ WFC: Legal Character ]
2359 * Characters referred to using character references must match the
2360 * production for Char.
2362 * Returns the value parsed (as an int), 0 in case of error, str will be
2363 * updated to the current value of the index
2366 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2369 unsigned int val = 0;
2370 unsigned int outofrange = 0;
2372 if ((str == NULL) || (*str == NULL)) return(0);
2375 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2378 while (cur != ';') { /* Non input consuming loop */
2379 if ((cur >= '0') && (cur <= '9'))
2380 val = val * 16 + (cur - '0');
2381 else if ((cur >= 'a') && (cur <= 'f'))
2382 val = val * 16 + (cur - 'a') + 10;
2383 else if ((cur >= 'A') && (cur <= 'F'))
2384 val = val * 16 + (cur - 'A') + 10;
2386 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2398 } else if ((cur == '&') && (ptr[1] == '#')){
2401 while (cur != ';') { /* Non input consuming loops */
2402 if ((cur >= '0') && (cur <= '9'))
2403 val = val * 10 + (cur - '0');
2405 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2418 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2424 * [ WFC: Legal Character ]
2425 * Characters referred to using character references must match the
2426 * production for Char.
2428 if ((IS_CHAR(val) && (outofrange == 0))) {
2431 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2432 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2439 * xmlNewBlanksWrapperInputStream:
2440 * @ctxt: an XML parser context
2441 * @entity: an Entity pointer
2443 * Create a new input stream for wrapping
2444 * blanks around a PEReference
2446 * Returns the new input stream or NULL
2449 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2451 static xmlParserInputPtr
2452 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2453 xmlParserInputPtr input;
2456 if (entity == NULL) {
2457 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2458 "xmlNewBlanksWrapperInputStream entity\n");
2461 if (xmlParserDebugEntities)
2462 xmlGenericError(xmlGenericErrorContext,
2463 "new blanks wrapper for entity: %s\n", entity->name);
2464 input = xmlNewInputStream(ctxt);
2465 if (input == NULL) {
2468 length = xmlStrlen(entity->name) + 5;
2469 buffer = xmlMallocAtomic(length);
2470 if (buffer == NULL) {
2471 xmlErrMemory(ctxt, NULL);
2477 buffer [length-3] = ';';
2478 buffer [length-2] = ' ';
2479 buffer [length-1] = 0;
2480 memcpy(buffer + 2, entity->name, length - 5);
2481 input->free = deallocblankswrapper;
2482 input->base = buffer;
2483 input->cur = buffer;
2484 input->length = length;
2485 input->end = &buffer[length];
2490 * xmlParserHandlePEReference:
2491 * @ctxt: the parser context
2493 * [69] PEReference ::= '%' Name ';'
2495 * [ WFC: No Recursion ]
2496 * A parsed entity must not contain a recursive
2497 * reference to itself, either directly or indirectly.
2499 * [ WFC: Entity Declared ]
2500 * In a document without any DTD, a document with only an internal DTD
2501 * subset which contains no parameter entity references, or a document
2502 * with "standalone='yes'", ... ... The declaration of a parameter
2503 * entity must precede any reference to it...
2505 * [ VC: Entity Declared ]
2506 * In a document with an external subset or external parameter entities
2507 * with "standalone='no'", ... ... The declaration of a parameter entity
2508 * must precede any reference to it...
2511 * Parameter-entity references may only appear in the DTD.
2512 * NOTE: misleading but this is handled.
2514 * A PEReference may have been detected in the current input stream
2515 * the handling is done accordingly to
2516 * http://www.w3.org/TR/REC-xml#entproc
2518 * - Included in literal in entity values
2519 * - Included as Parameter Entity reference within DTDs
2522 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2523 const xmlChar *name;
2524 xmlEntityPtr entity = NULL;
2525 xmlParserInputPtr input;
2527 if (RAW != '%') return;
2528 switch(ctxt->instate) {
2529 case XML_PARSER_CDATA_SECTION:
2531 case XML_PARSER_COMMENT:
2533 case XML_PARSER_START_TAG:
2535 case XML_PARSER_END_TAG:
2537 case XML_PARSER_EOF:
2538 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2540 case XML_PARSER_PROLOG:
2541 case XML_PARSER_START:
2542 case XML_PARSER_MISC:
2543 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2545 case XML_PARSER_ENTITY_DECL:
2546 case XML_PARSER_CONTENT:
2547 case XML_PARSER_ATTRIBUTE_VALUE:
2549 case XML_PARSER_SYSTEM_LITERAL:
2550 case XML_PARSER_PUBLIC_LITERAL:
2551 /* we just ignore it there */
2553 case XML_PARSER_EPILOG:
2554 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2556 case XML_PARSER_ENTITY_VALUE:
2558 * NOTE: in the case of entity values, we don't do the
2559 * substitution here since we need the literal
2560 * entity value to be able to save the internal
2561 * subset of the document.
2562 * This will be handled by xmlStringDecodeEntities
2565 case XML_PARSER_DTD:
2567 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2568 * In the internal DTD subset, parameter-entity references
2569 * can occur only where markup declarations can occur, not
2570 * within markup declarations.
2571 * In that case this is handled in xmlParseMarkupDecl
2573 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2575 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578 case XML_PARSER_IGNORE:
2583 name = xmlParseName(ctxt);
2584 if (xmlParserDebugEntities)
2585 xmlGenericError(xmlGenericErrorContext,
2586 "PEReference: %s\n", name);
2588 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2592 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2593 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2594 if (ctxt->instate == XML_PARSER_EOF)
2596 if (entity == NULL) {
2599 * [ WFC: Entity Declared ]
2600 * In a document without any DTD, a document with only an
2601 * internal DTD subset which contains no parameter entity
2602 * references, or a document with "standalone='yes'", ...
2603 * ... The declaration of a parameter entity must precede
2604 * any reference to it...
2606 if ((ctxt->standalone == 1) ||
2607 ((ctxt->hasExternalSubset == 0) &&
2608 (ctxt->hasPErefs == 0))) {
2609 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2610 "PEReference: %%%s; not found\n", name);
2613 * [ VC: Entity Declared ]
2614 * In a document with an external subset or external
2615 * parameter entities with "standalone='no'", ...
2616 * ... The declaration of a parameter entity must precede
2617 * any reference to it...
2619 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2620 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2621 "PEReference: %%%s; not found\n",
2624 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2625 "PEReference: %%%s; not found\n",
2629 xmlParserEntityCheck(ctxt, 0, NULL, 0);
2630 } else if (ctxt->input->free != deallocblankswrapper) {
2631 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2632 if (xmlPushInput(ctxt, input) < 0)
2635 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2636 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2638 xmlCharEncoding enc;
2641 * Note: external parameter entities will not be loaded, it
2642 * is not required for a non-validating parser, unless the
2643 * option of validating, or substituting entities were
2644 * given. Doing so is far more secure as the parser will
2645 * only process data coming from the document entity by
2648 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2649 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2650 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
2651 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2652 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2653 (ctxt->replaceEntities == 0) &&
2654 (ctxt->validate == 0))
2658 * handle the extra spaces added before and after
2659 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2660 * this is done independently.
2662 input = xmlNewEntityInputStream(ctxt, entity);
2663 if (xmlPushInput(ctxt, input) < 0)
2667 * Get the 4 first bytes and decode the charset
2668 * if enc != XML_CHAR_ENCODING_NONE
2669 * plug some encoding conversion routines.
2670 * Note that, since we may have some non-UTF8
2671 * encoding (like UTF16, bug 135229), the 'length'
2672 * is not known, but we can calculate based upon
2673 * the amount of data in the buffer.
2676 if (ctxt->instate == XML_PARSER_EOF)
2678 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2683 enc = xmlDetectCharEncoding(start, 4);
2684 if (enc != XML_CHAR_ENCODING_NONE) {
2685 xmlSwitchEncoding(ctxt, enc);
2689 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2690 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2691 (IS_BLANK_CH(NXT(5)))) {
2692 xmlParseTextDecl(ctxt);
2695 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2696 "PEReference: %s is not a parameter entity\n",
2701 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2707 * Macro used to grow the current buffer.
2708 * buffer##_size is expected to be a size_t
2709 * mem_error: is expected to handle memory allocation failures
2711 #define growBuffer(buffer, n) { \
2713 size_t new_size = buffer##_size * 2 + n; \
2714 if (new_size < buffer##_size) goto mem_error; \
2715 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2716 if (tmp == NULL) goto mem_error; \
2718 buffer##_size = new_size; \
2722 * xmlStringLenDecodeEntities:
2723 * @ctxt: the parser context
2724 * @str: the input string
2725 * @len: the string length
2726 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2727 * @end: an end marker xmlChar, 0 if none
2728 * @end2: an end marker xmlChar, 0 if none
2729 * @end3: an end marker xmlChar, 0 if none
2731 * Takes a entity string content and process to do the adequate substitutions.
2733 * [67] Reference ::= EntityRef | CharRef
2735 * [69] PEReference ::= '%' Name ';'
2737 * Returns A newly allocated string with the substitution done. The caller
2738 * must deallocate it !
2741 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2742 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2743 xmlChar *buffer = NULL;
2744 size_t buffer_size = 0;
2747 xmlChar *current = NULL;
2748 xmlChar *rep = NULL;
2749 const xmlChar *last;
2753 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2757 if (((ctxt->depth > 40) &&
2758 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2759 (ctxt->depth > 1024)) {
2760 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2765 * allocate a translation buffer.
2767 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2768 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2769 if (buffer == NULL) goto mem_error;
2772 * OK loop until we reach one of the ending char or a size limit.
2773 * we are operating on already parsed values.
2776 c = CUR_SCHAR(str, l);
2779 while ((c != 0) && (c != end) && /* non input consuming loop */
2780 (c != end2) && (c != end3)) {
2783 if ((c == '&') && (str[1] == '#')) {
2784 int val = xmlParseStringCharRef(ctxt, &str);
2786 COPY_BUF(0,buffer,nbchars,val);
2788 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2789 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2792 if (xmlParserDebugEntities)
2793 xmlGenericError(xmlGenericErrorContext,
2794 "String decoding Entity Reference: %.30s\n",
2796 ent = xmlParseStringEntityRef(ctxt, &str);
2797 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2798 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2800 xmlParserEntityCheck(ctxt, 0, ent, 0);
2802 ctxt->nbentities += ent->checked / 2;
2803 if ((ent != NULL) &&
2804 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2805 if (ent->content != NULL) {
2806 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2807 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2808 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2811 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2812 "predefined entity has no content\n");
2814 } else if ((ent != NULL) && (ent->content != NULL)) {
2816 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2820 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2821 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2826 while (*current != 0) { /* non input consuming loop */
2827 buffer[nbchars++] = *current++;
2828 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2829 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2831 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2837 } else if (ent != NULL) {
2838 int i = xmlStrlen(ent->name);
2839 const xmlChar *cur = ent->name;
2841 buffer[nbchars++] = '&';
2842 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2843 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2846 buffer[nbchars++] = *cur++;
2847 buffer[nbchars++] = ';';
2849 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2850 if (xmlParserDebugEntities)
2851 xmlGenericError(xmlGenericErrorContext,
2852 "String decoding PE Reference: %.30s\n", str);
2853 ent = xmlParseStringPEReference(ctxt, &str);
2854 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2856 xmlParserEntityCheck(ctxt, 0, ent, 0);
2858 ctxt->nbentities += ent->checked / 2;
2860 if (ent->content == NULL) {
2861 xmlLoadEntityContent(ctxt, ent);
2864 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2869 while (*current != 0) { /* non input consuming loop */
2870 buffer[nbchars++] = *current++;
2871 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2872 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2874 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2882 COPY_BUF(l,buffer,nbchars,c);
2884 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2885 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2889 c = CUR_SCHAR(str, l);
2893 buffer[nbchars] = 0;
2897 xmlErrMemory(ctxt, NULL);
2907 * xmlStringDecodeEntities:
2908 * @ctxt: the parser context
2909 * @str: the input string
2910 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2911 * @end: an end marker xmlChar, 0 if none
2912 * @end2: an end marker xmlChar, 0 if none
2913 * @end3: an end marker xmlChar, 0 if none
2915 * Takes a entity string content and process to do the adequate substitutions.
2917 * [67] Reference ::= EntityRef | CharRef
2919 * [69] PEReference ::= '%' Name ';'
2921 * Returns A newly allocated string with the substitution done. The caller
2922 * must deallocate it !
2925 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2926 xmlChar end, xmlChar end2, xmlChar end3) {
2927 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2928 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2932 /************************************************************************
2934 * Commodity functions, cleanup needed ? *
2936 ************************************************************************/
2940 * @ctxt: an XML parser context
2942 * @len: the size of @str
2943 * @blank_chars: we know the chars are blanks
2945 * Is this a sequence of blank chars that one can ignore ?
2947 * Returns 1 if ignorable 0 otherwise.
2950 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2953 xmlNodePtr lastChild;
2956 * Don't spend time trying to differentiate them, the same callback is
2959 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2963 * Check for xml:space value.
2965 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2966 (*(ctxt->space) == -2))
2970 * Check that the string is made of blanks
2972 if (blank_chars == 0) {
2973 for (i = 0;i < len;i++)
2974 if (!(IS_BLANK_CH(str[i]))) return(0);
2978 * Look if the element is mixed content in the DTD if available
2980 if (ctxt->node == NULL) return(0);
2981 if (ctxt->myDoc != NULL) {
2982 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2983 if (ret == 0) return(1);
2984 if (ret == 1) return(0);
2988 * Otherwise, heuristic :-\
2990 if ((RAW != '<') && (RAW != 0xD)) return(0);
2991 if ((ctxt->node->children == NULL) &&
2992 (RAW == '<') && (NXT(1) == '/')) return(0);
2994 lastChild = xmlGetLastChild(ctxt->node);
2995 if (lastChild == NULL) {
2996 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2997 (ctxt->node->content != NULL)) return(0);
2998 } else if (xmlNodeIsText(lastChild))
3000 else if ((ctxt->node->children != NULL) &&
3001 (xmlNodeIsText(ctxt->node->children)))
3006 /************************************************************************
3008 * Extra stuff for namespace support *
3009 * Relates to http://www.w3.org/TR/WD-xml-names *
3011 ************************************************************************/
3015 * @ctxt: an XML parser context
3016 * @name: an XML parser context
3017 * @prefix: a xmlChar **
3019 * parse an UTF8 encoded XML qualified name string
3021 * [NS 5] QName ::= (Prefix ':')? LocalPart
3023 * [NS 6] Prefix ::= NCName
3025 * [NS 7] LocalPart ::= NCName
3027 * Returns the local part, and prefix is updated
3028 * to get the Prefix if any.
3032 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3033 xmlChar buf[XML_MAX_NAMELEN + 5];
3034 xmlChar *buffer = NULL;
3036 int max = XML_MAX_NAMELEN;
3037 xmlChar *ret = NULL;
3038 const xmlChar *cur = name;
3041 if (prefix == NULL) return(NULL);
3044 if (cur == NULL) return(NULL);
3046 #ifndef XML_XML_NAMESPACE
3047 /* xml: prefix is not really a namespace */
3048 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3049 (cur[2] == 'l') && (cur[3] == ':'))
3050 return(xmlStrdup(name));
3053 /* nasty but well=formed */
3055 return(xmlStrdup(name));
3058 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3064 * Okay someone managed to make a huge name, so he's ready to pay
3065 * for the processing speed.
3069 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3070 if (buffer == NULL) {
3071 xmlErrMemory(ctxt, NULL);
3074 memcpy(buffer, buf, len);
3075 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3076 if (len + 10 > max) {
3080 tmp = (xmlChar *) xmlRealloc(buffer,
3081 max * sizeof(xmlChar));
3084 xmlErrMemory(ctxt, NULL);
3095 if ((c == ':') && (*cur == 0)) {
3099 return(xmlStrdup(name));
3103 ret = xmlStrndup(buf, len);
3107 max = XML_MAX_NAMELEN;
3115 return(xmlStrndup(BAD_CAST "", 0));
3120 * Check that the first character is proper to start
3123 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3124 ((c >= 0x41) && (c <= 0x5A)) ||
3125 (c == '_') || (c == ':'))) {
3127 int first = CUR_SCHAR(cur, l);
3129 if (!IS_LETTER(first) && (first != '_')) {
3130 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3131 "Name %s is not XML Namespace compliant\n",
3137 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3143 * Okay someone managed to make a huge name, so he's ready to pay
3144 * for the processing speed.
3148 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3149 if (buffer == NULL) {
3150 xmlErrMemory(ctxt, NULL);
3153 memcpy(buffer, buf, len);
3154 while (c != 0) { /* tested bigname2.xml */
3155 if (len + 10 > max) {
3159 tmp = (xmlChar *) xmlRealloc(buffer,
3160 max * sizeof(xmlChar));
3162 xmlErrMemory(ctxt, NULL);
3175 ret = xmlStrndup(buf, len);
3184 /************************************************************************
3186 * The parser itself *
3187 * Relates to http://www.w3.org/TR/REC-xml *
3189 ************************************************************************/
3191 /************************************************************************
3193 * Routines to parse Name, NCName and NmToken *
3195 ************************************************************************/
3197 static unsigned long nbParseName = 0;
3198 static unsigned long nbParseNmToken = 0;
3199 static unsigned long nbParseNCName = 0;
3200 static unsigned long nbParseNCNameComplex = 0;
3201 static unsigned long nbParseNameComplex = 0;
3202 static unsigned long nbParseStringName = 0;
3206 * The two following functions are related to the change of accepted
3207 * characters for Name and NmToken in the Revision 5 of XML-1.0
3208 * They correspond to the modified production [4] and the new production [4a]
3209 * changes in that revision. Also note that the macros used for the
3210 * productions Letter, Digit, CombiningChar and Extender are not needed
3212 * We still keep compatibility to pre-revision5 parsing semantic if the
3213 * new XML_PARSE_OLD10 option is given to the parser.
3216 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3217 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3219 * Use the new checks of production [4] [4a] amd [5] of the
3220 * Update 5 of XML-1.0
3222 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3223 (((c >= 'a') && (c <= 'z')) ||
3224 ((c >= 'A') && (c <= 'Z')) ||
3225 (c == '_') || (c == ':') ||
3226 ((c >= 0xC0) && (c <= 0xD6)) ||
3227 ((c >= 0xD8) && (c <= 0xF6)) ||
3228 ((c >= 0xF8) && (c <= 0x2FF)) ||
3229 ((c >= 0x370) && (c <= 0x37D)) ||
3230 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3231 ((c >= 0x200C) && (c <= 0x200D)) ||
3232 ((c >= 0x2070) && (c <= 0x218F)) ||
3233 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3234 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3235 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3236 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3237 ((c >= 0x10000) && (c <= 0xEFFFF))))
3240 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3247 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3248 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3250 * Use the new checks of production [4] [4a] amd [5] of the
3251 * Update 5 of XML-1.0
3253 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3254 (((c >= 'a') && (c <= 'z')) ||
3255 ((c >= 'A') && (c <= 'Z')) ||
3256 ((c >= '0') && (c <= '9')) || /* !start */
3257 (c == '_') || (c == ':') ||
3258 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3259 ((c >= 0xC0) && (c <= 0xD6)) ||
3260 ((c >= 0xD8) && (c <= 0xF6)) ||
3261 ((c >= 0xF8) && (c <= 0x2FF)) ||
3262 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3263 ((c >= 0x370) && (c <= 0x37D)) ||
3264 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3265 ((c >= 0x200C) && (c <= 0x200D)) ||
3266 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3267 ((c >= 0x2070) && (c <= 0x218F)) ||
3268 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3269 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3270 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3271 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3272 ((c >= 0x10000) && (c <= 0xEFFFF))))
3275 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3276 (c == '.') || (c == '-') ||
3277 (c == '_') || (c == ':') ||
3278 (IS_COMBINING(c)) ||
3285 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3286 int *len, int *alloc, int normalize);
3288 static const xmlChar *
3289 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3295 nbParseNameComplex++;
3299 * Handler for more complex cases
3302 if (ctxt->instate == XML_PARSER_EOF)
3305 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3307 * Use the new checks of production [4] [4a] amd [5] of the
3308 * Update 5 of XML-1.0
3310 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3311 (!(((c >= 'a') && (c <= 'z')) ||
3312 ((c >= 'A') && (c <= 'Z')) ||
3313 (c == '_') || (c == ':') ||
3314 ((c >= 0xC0) && (c <= 0xD6)) ||
3315 ((c >= 0xD8) && (c <= 0xF6)) ||
3316 ((c >= 0xF8) && (c <= 0x2FF)) ||
3317 ((c >= 0x370) && (c <= 0x37D)) ||
3318 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3319 ((c >= 0x200C) && (c <= 0x200D)) ||
3320 ((c >= 0x2070) && (c <= 0x218F)) ||
3321 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3322 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3323 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3324 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3325 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3331 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3332 (((c >= 'a') && (c <= 'z')) ||
3333 ((c >= 'A') && (c <= 'Z')) ||
3334 ((c >= '0') && (c <= '9')) || /* !start */
3335 (c == '_') || (c == ':') ||
3336 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3337 ((c >= 0xC0) && (c <= 0xD6)) ||
3338 ((c >= 0xD8) && (c <= 0xF6)) ||
3339 ((c >= 0xF8) && (c <= 0x2FF)) ||
3340 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3341 ((c >= 0x370) && (c <= 0x37D)) ||
3342 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3343 ((c >= 0x200C) && (c <= 0x200D)) ||
3344 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3345 ((c >= 0x2070) && (c <= 0x218F)) ||
3346 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3347 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3348 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3349 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3350 ((c >= 0x10000) && (c <= 0xEFFFF))
3352 if (count++ > XML_PARSER_CHUNK_SIZE) {
3355 if (ctxt->instate == XML_PARSER_EOF)
3363 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3364 (!IS_LETTER(c) && (c != '_') &&
3372 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3373 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3374 (c == '.') || (c == '-') ||
3375 (c == '_') || (c == ':') ||
3376 (IS_COMBINING(c)) ||
3377 (IS_EXTENDER(c)))) {
3378 if (count++ > XML_PARSER_CHUNK_SIZE) {
3381 if (ctxt->instate == XML_PARSER_EOF)
3390 if (ctxt->instate == XML_PARSER_EOF)
3396 if ((len > XML_MAX_NAME_LENGTH) &&
3397 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3398 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3401 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3402 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3403 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3408 * @ctxt: an XML parser context
3410 * parse an XML name.
3412 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3413 * CombiningChar | Extender
3415 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3417 * [6] Names ::= Name (#x20 Name)*
3419 * Returns the Name parsed or NULL
3423 xmlParseName(xmlParserCtxtPtr ctxt) {
3435 * Accelerator for simple ASCII names
3437 in = ctxt->input->cur;
3438 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3439 ((*in >= 0x41) && (*in <= 0x5A)) ||
3440 (*in == '_') || (*in == ':')) {
3442 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3443 ((*in >= 0x41) && (*in <= 0x5A)) ||
3444 ((*in >= 0x30) && (*in <= 0x39)) ||
3445 (*in == '_') || (*in == '-') ||
3446 (*in == ':') || (*in == '.'))
3448 if ((*in > 0) && (*in < 0x80)) {
3449 count = in - ctxt->input->cur;
3450 if ((count > XML_MAX_NAME_LENGTH) &&
3451 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3452 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3455 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3456 ctxt->input->cur = in;
3457 ctxt->nbChars += count;
3458 ctxt->input->col += count;
3460 xmlErrMemory(ctxt, NULL);
3464 /* accelerator for special cases */
3465 return(xmlParseNameComplex(ctxt));
3468 static const xmlChar *
3469 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3473 const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
3476 nbParseNCNameComplex++;
3480 * Handler for more complex cases
3483 end = ctxt->input->cur;
3485 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3486 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3490 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3491 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3492 if (count++ > XML_PARSER_CHUNK_SIZE) {
3493 if ((len > XML_MAX_NAME_LENGTH) &&
3494 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3495 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3500 if (ctxt->instate == XML_PARSER_EOF)
3505 end = ctxt->input->cur;
3510 * when shrinking to extend the buffer we really need to preserve
3511 * the part of the name we already parsed. Hence rolling back
3512 * by current lenght.
3514 ctxt->input->cur -= l;
3516 ctxt->input->cur += l;
3517 if (ctxt->instate == XML_PARSER_EOF)
3519 end = ctxt->input->cur;
3523 if ((len > XML_MAX_NAME_LENGTH) &&
3524 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3525 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3528 return(xmlDictLookup(ctxt->dict, end - len, len));
3533 * @ctxt: an XML parser context
3534 * @len: length of the string parsed
3536 * parse an XML name.
3538 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3539 * CombiningChar | Extender
3541 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3543 * Returns the Name parsed or NULL
3546 static const xmlChar *
3547 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3548 const xmlChar *in, *e;
3557 * Accelerator for simple ASCII names
3559 in = ctxt->input->cur;
3560 e = ctxt->input->end;
3561 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3562 ((*in >= 0x41) && (*in <= 0x5A)) ||
3563 (*in == '_')) && (in < e)) {
3565 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3566 ((*in >= 0x41) && (*in <= 0x5A)) ||
3567 ((*in >= 0x30) && (*in <= 0x39)) ||
3568 (*in == '_') || (*in == '-') ||
3569 (*in == '.')) && (in < e))
3573 if ((*in > 0) && (*in < 0x80)) {
3574 count = in - ctxt->input->cur;
3575 if ((count > XML_MAX_NAME_LENGTH) &&
3576 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3577 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3580 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3581 ctxt->input->cur = in;
3582 ctxt->nbChars += count;
3583 ctxt->input->col += count;
3585 xmlErrMemory(ctxt, NULL);
3591 return(xmlParseNCNameComplex(ctxt));
3595 * xmlParseNameAndCompare:
3596 * @ctxt: an XML parser context
3598 * parse an XML name and compares for match
3599 * (specialized for endtag parsing)
3601 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3602 * and the name for mismatch
3605 static const xmlChar *
3606 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3607 register const xmlChar *cmp = other;
3608 register const xmlChar *in;
3612 if (ctxt->instate == XML_PARSER_EOF)
3615 in = ctxt->input->cur;
3616 while (*in != 0 && *in == *cmp) {
3621 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3623 ctxt->input->cur = in;
3624 return (const xmlChar*) 1;
3626 /* failure (or end of input buffer), check with full function */
3627 ret = xmlParseName (ctxt);
3628 /* strings coming from the dictionnary direct compare possible */
3630 return (const xmlChar*) 1;
3636 * xmlParseStringName:
3637 * @ctxt: an XML parser context
3638 * @str: a pointer to the string pointer (IN/OUT)
3640 * parse an XML name.
3642 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3643 * CombiningChar | Extender
3645 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3647 * [6] Names ::= Name (#x20 Name)*
3649 * Returns the Name parsed or NULL. The @str pointer
3650 * is updated to the current location in the string.
3654 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3655 xmlChar buf[XML_MAX_NAMELEN + 5];
3656 const xmlChar *cur = *str;
3661 nbParseStringName++;
3664 c = CUR_SCHAR(cur, l);
3665 if (!xmlIsNameStartChar(ctxt, c)) {
3669 COPY_BUF(l,buf,len,c);
3671 c = CUR_SCHAR(cur, l);
3672 while (xmlIsNameChar(ctxt, c)) {
3673 COPY_BUF(l,buf,len,c);
3675 c = CUR_SCHAR(cur, l);
3676 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3678 * Okay someone managed to make a huge name, so he's ready to pay
3679 * for the processing speed.
3684 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3685 if (buffer == NULL) {
3686 xmlErrMemory(ctxt, NULL);
3689 memcpy(buffer, buf, len);
3690 while (xmlIsNameChar(ctxt, c)) {
3691 if (len + 10 > max) {
3694 if ((len > XML_MAX_NAME_LENGTH) &&
3695 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3696 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3701 tmp = (xmlChar *) xmlRealloc(buffer,
3702 max * sizeof(xmlChar));
3704 xmlErrMemory(ctxt, NULL);
3710 COPY_BUF(l,buffer,len,c);
3712 c = CUR_SCHAR(cur, l);
3719 if ((len > XML_MAX_NAME_LENGTH) &&
3720 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3721 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3725 return(xmlStrndup(buf, len));
3730 * @ctxt: an XML parser context
3732 * parse an XML Nmtoken.
3734 * [7] Nmtoken ::= (NameChar)+
3736 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3738 * Returns the Nmtoken parsed or NULL
3742 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3743 xmlChar buf[XML_MAX_NAMELEN + 5];
3753 if (ctxt->instate == XML_PARSER_EOF)
3757 while (xmlIsNameChar(ctxt, c)) {
3758 if (count++ > XML_PARSER_CHUNK_SIZE) {
3762 COPY_BUF(l,buf,len,c);
3768 if (ctxt->instate == XML_PARSER_EOF)
3772 if (len >= XML_MAX_NAMELEN) {
3774 * Okay someone managed to make a huge token, so he's ready to pay
3775 * for the processing speed.
3780 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3781 if (buffer == NULL) {
3782 xmlErrMemory(ctxt, NULL);
3785 memcpy(buffer, buf, len);
3786 while (xmlIsNameChar(ctxt, c)) {
3787 if (count++ > XML_PARSER_CHUNK_SIZE) {
3790 if (ctxt->instate == XML_PARSER_EOF) {
3795 if (len + 10 > max) {
3798 if ((max > XML_MAX_NAME_LENGTH) &&
3799 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3800 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3805 tmp = (xmlChar *) xmlRealloc(buffer,
3806 max * sizeof(xmlChar));
3808 xmlErrMemory(ctxt, NULL);
3814 COPY_BUF(l,buffer,len,c);
3824 if ((len > XML_MAX_NAME_LENGTH) &&
3825 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3826 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3829 return(xmlStrndup(buf, len));
3833 * xmlParseEntityValue:
3834 * @ctxt: an XML parser context
3835 * @orig: if non-NULL store a copy of the original entity value
3837 * parse a value for ENTITY declarations
3839 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3840 * "'" ([^%&'] | PEReference | Reference)* "'"
3842 * Returns the EntityValue parsed with reference substituted or NULL
3846 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3847 xmlChar *buf = NULL;
3849 int size = XML_PARSER_BUFFER_SIZE;
3852 xmlChar *ret = NULL;
3853 const xmlChar *cur = NULL;
3854 xmlParserInputPtr input;
3856 if (RAW == '"') stop = '"';
3857 else if (RAW == '\'') stop = '\'';
3859 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3862 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3864 xmlErrMemory(ctxt, NULL);
3869 * The content of the entity definition is copied in a buffer.
3872 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3873 input = ctxt->input;
3875 if (ctxt->instate == XML_PARSER_EOF) {
3882 * NOTE: 4.4.5 Included in Literal
3883 * When a parameter entity reference appears in a literal entity
3884 * value, ... a single or double quote character in the replacement
3885 * text is always treated as a normal data character and will not
3886 * terminate the literal.
3887 * In practice it means we stop the loop only when back at parsing
3888 * the initial entity and the quote is found
3890 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3891 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3892 if (len + 5 >= size) {
3896 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3898 xmlErrMemory(ctxt, NULL);
3904 COPY_BUF(l,buf,len,c);
3907 * Pop-up of finished entities.
3909 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3920 if (ctxt->instate == XML_PARSER_EOF) {
3926 * Raise problem w.r.t. '&' and '%' being used in non-entities
3927 * reference constructs. Note Charref will be handled in
3928 * xmlStringDecodeEntities()
3931 while (*cur != 0) { /* non input consuming */
3932 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3937 name = xmlParseStringName(ctxt, &cur);
3938 if ((name == NULL) || (*cur != ';')) {
3939 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3940 "EntityValue: '%c' forbidden except for entities references\n",
3943 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3944 (ctxt->inputNr == 1)) {
3945 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3956 * Then PEReference entities are substituted.
3959 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3964 * NOTE: 4.4.7 Bypassed
3965 * When a general entity reference appears in the EntityValue in
3966 * an entity declaration, it is bypassed and left as is.
3967 * so XML_SUBSTITUTE_REF is not set here.
3969 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3981 * xmlParseAttValueComplex:
3982 * @ctxt: an XML parser context
3983 * @len: the resulting attribute len
3984 * @normalize: wether to apply the inner normalization
3986 * parse a value for an attribute, this is the fallback function
3987 * of xmlParseAttValue() when the attribute parsing requires handling
3988 * of non-ASCII characters, or normalization compaction.
3990 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3993 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3995 xmlChar *buf = NULL;
3996 xmlChar *rep = NULL;
3998 size_t buf_size = 0;
3999 int c, l, in_space = 0;
4000 xmlChar *current = NULL;
4003 if (NXT(0) == '"') {
4004 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4007 } else if (NXT(0) == '\'') {
4009 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4012 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4017 * allocate a translation buffer.
4019 buf_size = XML_PARSER_BUFFER_SIZE;
4020 buf = (xmlChar *) xmlMallocAtomic(buf_size);
4021 if (buf == NULL) goto mem_error;
4024 * OK loop until we reach one of the ending char or a size limit.
4027 while (((NXT(0) != limit) && /* checked */
4028 (IS_CHAR(c)) && (c != '<')) &&
4029 (ctxt->instate != XML_PARSER_EOF)) {
4031 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
4032 * special option is given
4034 if ((len > XML_MAX_TEXT_LENGTH) &&
4035 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4036 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4037 "AttValue length too long\n");
4043 if (NXT(1) == '#') {
4044 int val = xmlParseCharRef(ctxt);
4047 if (ctxt->replaceEntities) {
4048 if (len + 10 > buf_size) {
4049 growBuffer(buf, 10);
4054 * The reparsing will be done in xmlStringGetNodeList()
4055 * called by the attribute() function in SAX.c
4057 if (len + 10 > buf_size) {
4058 growBuffer(buf, 10);
4066 } else if (val != 0) {
4067 if (len + 10 > buf_size) {
4068 growBuffer(buf, 10);
4070 len += xmlCopyChar(0, &buf[len], val);
4073 ent = xmlParseEntityRef(ctxt);
4076 ctxt->nbentities += ent->owner;
4077 if ((ent != NULL) &&
4078 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4079 if (len + 10 > buf_size) {
4080 growBuffer(buf, 10);
4082 if ((ctxt->replaceEntities == 0) &&
4083 (ent->content[0] == '&')) {
4090 buf[len++] = ent->content[0];
4092 } else if ((ent != NULL) &&
4093 (ctxt->replaceEntities != 0)) {
4094 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4095 rep = xmlStringDecodeEntities(ctxt, ent->content,
4100 while (*current != 0) { /* non input consuming */
4101 if ((*current == 0xD) || (*current == 0xA) ||
4102 (*current == 0x9)) {
4106 buf[len++] = *current++;
4107 if (len + 10 > buf_size) {
4108 growBuffer(buf, 10);
4115 if (len + 10 > buf_size) {
4116 growBuffer(buf, 10);
4118 if (ent->content != NULL)
4119 buf[len++] = ent->content[0];
4121 } else if (ent != NULL) {
4122 int i = xmlStrlen(ent->name);
4123 const xmlChar *cur = ent->name;
4126 * This may look absurd but is needed to detect
4129 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4130 (ent->content != NULL) && (ent->checked == 0)) {
4131 unsigned long oldnbent = ctxt->nbentities;
4133 rep = xmlStringDecodeEntities(ctxt, ent->content,
4134 XML_SUBSTITUTE_REF, 0, 0, 0);
4136 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4138 if (xmlStrchr(rep, '<'))
4146 * Just output the reference
4149 while (len + i + 10 > buf_size) {
4150 growBuffer(buf, i + 10);
4153 buf[len++] = *cur++;
4158 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4159 if ((len != 0) || (!normalize)) {
4160 if ((!normalize) || (!in_space)) {
4161 COPY_BUF(l,buf,len,0x20);
4162 while (len + 10 > buf_size) {
4163 growBuffer(buf, 10);
4170 COPY_BUF(l,buf,len,c);
4171 if (len + 10 > buf_size) {
4172 growBuffer(buf, 10);
4180 if (ctxt->instate == XML_PARSER_EOF)
4183 if ((in_space) && (normalize)) {
4184 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4188 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4189 } else if (RAW != limit) {
4190 if ((c != 0) && (!IS_CHAR(c))) {
4191 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4192 "invalid character in attribute value\n");
4194 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4195 "AttValue: ' expected\n");
4201 * There we potentially risk an overflow, don't allow attribute value of
4202 * length more than INT_MAX it is a very reasonnable assumption !
4204 if (len >= INT_MAX) {
4205 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4206 "AttValue length too long\n");
4210 if (attlen != NULL) *attlen = (int) len;
4214 xmlErrMemory(ctxt, NULL);
4225 * @ctxt: an XML parser context
4227 * parse a value for an attribute
4228 * Note: the parser won't do substitution of entities here, this
4229 * will be handled later in xmlStringGetNodeList
4231 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4232 * "'" ([^<&'] | Reference)* "'"
4234 * 3.3.3 Attribute-Value Normalization:
4235 * Before the value of an attribute is passed to the application or
4236 * checked for validity, the XML processor must normalize it as follows:
4237 * - a character reference is processed by appending the referenced
4238 * character to the attribute value
4239 * - an entity reference is processed by recursively processing the
4240 * replacement text of the entity
4241 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4242 * appending #x20 to the normalized value, except that only a single
4243 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4244 * parsed entity or the literal entity value of an internal parsed entity
4245 * - other characters are processed by appending them to the normalized value
4246 * If the declared value is not CDATA, then the XML processor must further
4247 * process the normalized attribute value by discarding any leading and
4248 * trailing space (#x20) characters, and by replacing sequences of space
4249 * (#x20) characters by a single space (#x20) character.
4250 * All attributes for which no declaration has been read should be treated
4251 * by a non-validating parser as if declared CDATA.
4253 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4258 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4259 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4260 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4264 * xmlParseSystemLiteral:
4265 * @ctxt: an XML parser context
4267 * parse an XML Literal
4269 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4271 * Returns the SystemLiteral parsed or NULL
4275 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4276 xmlChar *buf = NULL;
4278 int size = XML_PARSER_BUFFER_SIZE;
4281 int state = ctxt->instate;
4288 } else if (RAW == '\'') {
4292 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4296 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4298 xmlErrMemory(ctxt, NULL);
4301 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4303 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4304 if (len + 5 >= size) {
4307 if ((size > XML_MAX_NAME_LENGTH) &&
4308 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4309 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4311 ctxt->instate = (xmlParserInputState) state;
4315 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4318 xmlErrMemory(ctxt, NULL);
4319 ctxt->instate = (xmlParserInputState) state;
4328 if (ctxt->instate == XML_PARSER_EOF) {
4333 COPY_BUF(l,buf,len,cur);
4343 ctxt->instate = (xmlParserInputState) state;
4344 if (!IS_CHAR(cur)) {
4345 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4353 * xmlParsePubidLiteral:
4354 * @ctxt: an XML parser context
4356 * parse an XML public literal
4358 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4360 * Returns the PubidLiteral parsed or NULL.
4364 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4365 xmlChar *buf = NULL;
4367 int size = XML_PARSER_BUFFER_SIZE;
4371 xmlParserInputState oldstate = ctxt->instate;
4377 } else if (RAW == '\'') {
4381 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4384 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4386 xmlErrMemory(ctxt, NULL);
4389 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4391 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4392 if (len + 1 >= size) {
4395 if ((size > XML_MAX_NAME_LENGTH) &&
4396 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4397 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4402 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4404 xmlErrMemory(ctxt, NULL);
4415 if (ctxt->instate == XML_PARSER_EOF) {
4430 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4434 ctxt->instate = oldstate;
4438 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4441 * used for the test in the inner loop of the char data testing
4443 static const unsigned char test_char_data[256] = {
4444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4449 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4450 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4451 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4452 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4453 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4454 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4455 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4456 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4457 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4458 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4459 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4460 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4461 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4462 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4463 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4464 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4465 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4466 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4467 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4468 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4469 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4470 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4471 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4472 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4473 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4474 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4475 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4480 * @ctxt: an XML parser context
4481 * @cdata: int indicating whether we are within a CDATA section
4483 * parse a CharData section.
4484 * if we are within a CDATA section ']]>' marks an end of section.
4486 * The right angle bracket (>) may be represented using the string ">",
4487 * and must, for compatibility, be escaped using ">" or a character
4488 * reference when it appears in the string "]]>" in content, when that
4489 * string is not marking the end of a CDATA section.
4491 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4495 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4498 int line = ctxt->input->line;
4499 int col = ctxt->input->col;
4505 * Accelerated common case where input don't need to be
4506 * modified before passing it to the handler.
4509 in = ctxt->input->cur;
4512 while (*in == 0x20) { in++; ctxt->input->col++; }
4515 ctxt->input->line++; ctxt->input->col = 1;
4517 } while (*in == 0xA);
4518 goto get_more_space;
4521 nbchar = in - ctxt->input->cur;
4523 const xmlChar *tmp = ctxt->input->cur;
4524 ctxt->input->cur = in;
4526 if ((ctxt->sax != NULL) &&
4527 (ctxt->sax->ignorableWhitespace !=
4528 ctxt->sax->characters)) {
4529 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4530 if (ctxt->sax->ignorableWhitespace != NULL)
4531 ctxt->sax->ignorableWhitespace(ctxt->userData,
4534 if (ctxt->sax->characters != NULL)
4535 ctxt->sax->characters(ctxt->userData,
4537 if (*ctxt->space == -1)
4540 } else if ((ctxt->sax != NULL) &&
4541 (ctxt->sax->characters != NULL)) {
4542 ctxt->sax->characters(ctxt->userData,
4550 ccol = ctxt->input->col;
4551 while (test_char_data[*in]) {
4555 ctxt->input->col = ccol;
4558 ctxt->input->line++; ctxt->input->col = 1;
4560 } while (*in == 0xA);
4564 if ((in[1] == ']') && (in[2] == '>')) {
4565 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4566 ctxt->input->cur = in;
4573 nbchar = in - ctxt->input->cur;
4575 if ((ctxt->sax != NULL) &&
4576 (ctxt->sax->ignorableWhitespace !=
4577 ctxt->sax->characters) &&
4578 (IS_BLANK_CH(*ctxt->input->cur))) {
4579 const xmlChar *tmp = ctxt->input->cur;
4580 ctxt->input->cur = in;
4582 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4583 if (ctxt->sax->ignorableWhitespace != NULL)
4584 ctxt->sax->ignorableWhitespace(ctxt->userData,
4587 if (ctxt->sax->characters != NULL)
4588 ctxt->sax->characters(ctxt->userData,
4590 if (*ctxt->space == -1)
4593 line = ctxt->input->line;
4594 col = ctxt->input->col;
4595 } else if (ctxt->sax != NULL) {
4596 if (ctxt->sax->characters != NULL)
4597 ctxt->sax->characters(ctxt->userData,
4598 ctxt->input->cur, nbchar);
4599 line = ctxt->input->line;
4600 col = ctxt->input->col;
4602 /* something really bad happened in the SAX callback */
4603 if (ctxt->instate != XML_PARSER_CONTENT)
4606 ctxt->input->cur = in;
4610 ctxt->input->cur = in;
4612 ctxt->input->line++; ctxt->input->col = 1;
4613 continue; /* while */
4625 if (ctxt->instate == XML_PARSER_EOF)
4627 in = ctxt->input->cur;
4628 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4631 ctxt->input->line = line;
4632 ctxt->input->col = col;
4633 xmlParseCharDataComplex(ctxt, cdata);
4637 * xmlParseCharDataComplex:
4638 * @ctxt: an XML parser context
4639 * @cdata: int indicating whether we are within a CDATA section
4641 * parse a CharData section.this is the fallback function
4642 * of xmlParseCharData() when the parsing requires handling
4643 * of non-ASCII characters.
4646 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4647 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4655 while ((cur != '<') && /* checked */
4657 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4658 if ((cur == ']') && (NXT(1) == ']') &&
4662 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4665 COPY_BUF(l,buf,nbchar,cur);
4666 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4670 * OK the segment is to be consumed as chars.
4672 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4673 if (areBlanks(ctxt, buf, nbchar, 0)) {
4674 if (ctxt->sax->ignorableWhitespace != NULL)
4675 ctxt->sax->ignorableWhitespace(ctxt->userData,
4678 if (ctxt->sax->characters != NULL)
4679 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4680 if ((ctxt->sax->characters !=
4681 ctxt->sax->ignorableWhitespace) &&
4682 (*ctxt->space == -1))
4687 /* something really bad happened in the SAX callback */
4688 if (ctxt->instate != XML_PARSER_CONTENT)
4695 if (ctxt->instate == XML_PARSER_EOF)
4704 * OK the segment is to be consumed as chars.
4706 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4707 if (areBlanks(ctxt, buf, nbchar, 0)) {
4708 if (ctxt->sax->ignorableWhitespace != NULL)
4709 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4711 if (ctxt->sax->characters != NULL)
4712 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4713 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4714 (*ctxt->space == -1))
4719 if ((cur != 0) && (!IS_CHAR(cur))) {
4720 /* Generate the error and skip the offending character */
4721 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4722 "PCDATA invalid Char value %d\n",
4729 * xmlParseExternalID:
4730 * @ctxt: an XML parser context
4731 * @publicID: a xmlChar** receiving PubidLiteral
4732 * @strict: indicate whether we should restrict parsing to only
4733 * production [75], see NOTE below
4735 * Parse an External ID or a Public ID
4737 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4738 * 'PUBLIC' S PubidLiteral S SystemLiteral
4740 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4741 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4743 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4745 * Returns the function returns SystemLiteral and in the second
4746 * case publicID receives PubidLiteral, is strict is off
4747 * it is possible to return NULL and have publicID set.
4751 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4752 xmlChar *URI = NULL;
4757 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4759 if (!IS_BLANK_CH(CUR)) {
4760 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4761 "Space required after 'SYSTEM'\n");
4764 URI = xmlParseSystemLiteral(ctxt);
4766 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4768 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4770 if (!IS_BLANK_CH(CUR)) {
4771 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4772 "Space required after 'PUBLIC'\n");
4775 *publicID = xmlParsePubidLiteral(ctxt);
4776 if (*publicID == NULL) {
4777 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4781 * We don't handle [83] so "S SystemLiteral" is required.
4783 if (!IS_BLANK_CH(CUR)) {
4784 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4785 "Space required after the Public Identifier\n");
4789 * We handle [83] so we return immediately, if
4790 * "S SystemLiteral" is not detected. From a purely parsing
4791 * point of view that's a nice mess.
4797 if (!IS_BLANK_CH(*ptr)) return(NULL);
4799 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4800 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4803 URI = xmlParseSystemLiteral(ctxt);
4805 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4812 * xmlParseCommentComplex:
4813 * @ctxt: an XML parser context
4814 * @buf: the already parsed part of the buffer
4815 * @len: number of bytes filles in the buffer
4816 * @size: allocated size of the buffer
4818 * Skip an XML (SGML) comment <!-- .... -->
4819 * The spec says that "For compatibility, the string "--" (double-hyphen)
4820 * must not occur within comments. "
4821 * This is the slow routine in case the accelerator for ascii didn't work
4823 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4826 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4827 size_t len, size_t size) {
4834 inputid = ctxt->input->id;
4838 size = XML_PARSER_BUFFER_SIZE;
4839 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4841 xmlErrMemory(ctxt, NULL);
4845 GROW; /* Assure there's enough input data */
4848 goto not_terminated;
4850 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4851 "xmlParseComment: invalid xmlChar value %d\n",
4859 goto not_terminated;
4861 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4862 "xmlParseComment: invalid xmlChar value %d\n",
4870 goto not_terminated;
4871 while (IS_CHAR(cur) && /* checked */
4873 (r != '-') || (q != '-'))) {
4874 if ((r == '-') && (q == '-')) {
4875 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4877 if ((len > XML_MAX_TEXT_LENGTH) &&
4878 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4879 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4880 "Comment too big found", NULL);
4884 if (len + 5 >= size) {
4888 new_size = size * 2;
4889 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4890 if (new_buf == NULL) {
4892 xmlErrMemory(ctxt, NULL);
4898 COPY_BUF(ql,buf,len,q);
4908 if (ctxt->instate == XML_PARSER_EOF) {
4923 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4924 "Comment not terminated \n<!--%.50s\n", buf);
4925 } else if (!IS_CHAR(cur)) {
4926 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4927 "xmlParseComment: invalid xmlChar value %d\n",
4930 if (inputid != ctxt->input->id) {
4931 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4932 "Comment doesn't start and stop in the same entity\n");
4935 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4936 (!ctxt->disableSAX))
4937 ctxt->sax->comment(ctxt->userData, buf);
4942 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4943 "Comment not terminated\n", NULL);
4950 * @ctxt: an XML parser context
4952 * Skip an XML (SGML) comment <!-- .... -->
4953 * The spec says that "For compatibility, the string "--" (double-hyphen)
4954 * must not occur within comments. "
4956 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4959 xmlParseComment(xmlParserCtxtPtr ctxt) {
4960 xmlChar *buf = NULL;
4961 size_t size = XML_PARSER_BUFFER_SIZE;
4963 xmlParserInputState state;
4970 * Check that there is a comment right here.
4972 if ((RAW != '<') || (NXT(1) != '!') ||
4973 (NXT(2) != '-') || (NXT(3) != '-')) return;
4974 state = ctxt->instate;
4975 ctxt->instate = XML_PARSER_COMMENT;
4976 inputid = ctxt->input->id;
4982 * Accelerated common case where input don't need to be
4983 * modified before passing it to the handler.
4985 in = ctxt->input->cur;
4989 ctxt->input->line++; ctxt->input->col = 1;
4991 } while (*in == 0xA);
4994 ccol = ctxt->input->col;
4995 while (((*in > '-') && (*in <= 0x7F)) ||
4996 ((*in >= 0x20) && (*in < '-')) ||
5001 ctxt->input->col = ccol;
5004 ctxt->input->line++; ctxt->input->col = 1;
5006 } while (*in == 0xA);
5009 nbchar = in - ctxt->input->cur;
5011 * save current set of data
5014 if ((ctxt->sax != NULL) &&
5015 (ctxt->sax->comment != NULL)) {
5017 if ((*in == '-') && (in[1] == '-'))
5020 size = XML_PARSER_BUFFER_SIZE + nbchar;
5021 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5023 xmlErrMemory(ctxt, NULL);
5024 ctxt->instate = state;
5028 } else if (len + nbchar + 1 >= size) {
5030 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5031 new_buf = (xmlChar *) xmlRealloc(buf,
5032 size * sizeof(xmlChar));
5033 if (new_buf == NULL) {
5035 xmlErrMemory(ctxt, NULL);
5036 ctxt->instate = state;
5041 memcpy(&buf[len], ctxt->input->cur, nbchar);
5046 if ((len > XML_MAX_TEXT_LENGTH) &&
5047 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5048 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5049 "Comment too big found", NULL);
5053 ctxt->input->cur = in;
5056 ctxt->input->line++; ctxt->input->col = 1;
5061 ctxt->input->cur = in;
5063 ctxt->input->line++; ctxt->input->col = 1;
5064 continue; /* while */
5070 if (ctxt->instate == XML_PARSER_EOF) {
5074 in = ctxt->input->cur;
5078 if (ctxt->input->id != inputid) {
5079 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5080 "comment doesn't start and stop in the same entity\n");
5083 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5084 (!ctxt->disableSAX)) {
5086 ctxt->sax->comment(ctxt->userData, buf);
5088 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5092 if (ctxt->instate != XML_PARSER_EOF)
5093 ctxt->instate = state;
5097 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5098 "Double hyphen within comment: "
5102 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5103 "Double hyphen within comment\n", NULL);
5111 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5112 xmlParseCommentComplex(ctxt, buf, len, size);
5113 ctxt->instate = state;
5120 * @ctxt: an XML parser context
5122 * parse the name of a PI
5124 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5126 * Returns the PITarget name or NULL
5130 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5131 const xmlChar *name;
5133 name = xmlParseName(ctxt);
5134 if ((name != NULL) &&
5135 ((name[0] == 'x') || (name[0] == 'X')) &&
5136 ((name[1] == 'm') || (name[1] == 'M')) &&
5137 ((name[2] == 'l') || (name[2] == 'L'))) {
5139 if ((name[0] == 'x') && (name[1] == 'm') &&
5140 (name[2] == 'l') && (name[3] == 0)) {
5141 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5142 "XML declaration allowed only at the start of the document\n");
5144 } else if (name[3] == 0) {
5145 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5149 if (xmlW3CPIs[i] == NULL) break;
5150 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5153 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5154 "xmlParsePITarget: invalid name prefix 'xml'\n",
5157 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5158 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5159 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5164 #ifdef LIBXML_CATALOG_ENABLED
5166 * xmlParseCatalogPI:
5167 * @ctxt: an XML parser context
5168 * @catalog: the PI value string
5170 * parse an XML Catalog Processing Instruction.
5172 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5174 * Occurs only if allowed by the user and if happening in the Misc
5175 * part of the document before any doctype informations
5176 * This will add the given catalog to the parsing context in order
5177 * to be used if there is a resolution need further down in the document
5181 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5182 xmlChar *URL = NULL;
5183 const xmlChar *tmp, *base;
5187 while (IS_BLANK_CH(*tmp)) tmp++;
5188 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5191 while (IS_BLANK_CH(*tmp)) tmp++;
5196 while (IS_BLANK_CH(*tmp)) tmp++;
5198 if ((marker != '\'') && (marker != '"'))
5202 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5205 URL = xmlStrndup(base, tmp - base);
5207 while (IS_BLANK_CH(*tmp)) tmp++;
5212 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5218 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5219 "Catalog PI syntax error: %s\n",
5228 * @ctxt: an XML parser context
5230 * parse an XML Processing Instruction.
5232 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5234 * The processing is transfered to SAX once parsed.
5238 xmlParsePI(xmlParserCtxtPtr ctxt) {
5239 xmlChar *buf = NULL;
5241 size_t size = XML_PARSER_BUFFER_SIZE;
5243 const xmlChar *target;
5244 xmlParserInputState state;
5247 if ((RAW == '<') && (NXT(1) == '?')) {
5248 xmlParserInputPtr input = ctxt->input;
5249 state = ctxt->instate;
5250 ctxt->instate = XML_PARSER_PI;
5252 * this is a Processing Instruction.
5258 * Parse the target name and check for special support like
5261 target = xmlParsePITarget(ctxt);
5262 if (target != NULL) {
5263 if ((RAW == '?') && (NXT(1) == '>')) {
5264 if (input != ctxt->input) {
5265 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5266 "PI declaration doesn't start and stop in the same entity\n");
5273 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5274 (ctxt->sax->processingInstruction != NULL))
5275 ctxt->sax->processingInstruction(ctxt->userData,
5277 if (ctxt->instate != XML_PARSER_EOF)
5278 ctxt->instate = state;
5281 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5283 xmlErrMemory(ctxt, NULL);
5284 ctxt->instate = state;
5288 if (!IS_BLANK(cur)) {
5289 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5290 "ParsePI: PI %s space expected\n", target);
5294 while (IS_CHAR(cur) && /* checked */
5295 ((cur != '?') || (NXT(1) != '>'))) {
5296 if (len + 5 >= size) {
5298 size_t new_size = size * 2;
5299 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5301 xmlErrMemory(ctxt, NULL);
5303 ctxt->instate = state;
5312 if (ctxt->instate == XML_PARSER_EOF) {
5317 if ((len > XML_MAX_TEXT_LENGTH) &&
5318 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5319 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5320 "PI %s too big found", target);
5322 ctxt->instate = state;
5326 COPY_BUF(l,buf,len,cur);
5335 if ((len > XML_MAX_TEXT_LENGTH) &&
5336 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5337 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5338 "PI %s too big found", target);
5340 ctxt->instate = state;
5345 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5346 "ParsePI: PI %s never end ...\n", target);
5348 if (input != ctxt->input) {
5349 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5350 "PI declaration doesn't start and stop in the same entity\n");
5354 #ifdef LIBXML_CATALOG_ENABLED
5355 if (((state == XML_PARSER_MISC) ||
5356 (state == XML_PARSER_START)) &&
5357 (xmlStrEqual(target, XML_CATALOG_PI))) {
5358 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5359 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5360 (allow == XML_CATA_ALLOW_ALL))
5361 xmlParseCatalogPI(ctxt, buf);
5369 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5370 (ctxt->sax->processingInstruction != NULL))
5371 ctxt->sax->processingInstruction(ctxt->userData,
5376 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5378 if (ctxt->instate != XML_PARSER_EOF)
5379 ctxt->instate = state;
5384 * xmlParseNotationDecl:
5385 * @ctxt: an XML parser context
5387 * parse a notation declaration
5389 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5391 * Hence there is actually 3 choices:
5392 * 'PUBLIC' S PubidLiteral
5393 * 'PUBLIC' S PubidLiteral S SystemLiteral
5394 * and 'SYSTEM' S SystemLiteral
5396 * See the NOTE on xmlParseExternalID().
5400 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5401 const xmlChar *name;
5405 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5406 xmlParserInputPtr input = ctxt->input;
5409 if (!IS_BLANK_CH(CUR)) {
5410 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5411 "Space required after '<!NOTATION'\n");
5416 name = xmlParseName(ctxt);
5418 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5421 if (!IS_BLANK_CH(CUR)) {
5422 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423 "Space required after the NOTATION name'\n");
5426 if (xmlStrchr(name, ':') != NULL) {
5427 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5428 "colons are forbidden from notation names '%s'\n",
5436 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5440 if (input != ctxt->input) {
5441 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5442 "Notation declaration doesn't start and stop in the same entity\n");
5445 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5446 (ctxt->sax->notationDecl != NULL))
5447 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5449 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5451 if (Systemid != NULL) xmlFree(Systemid);
5452 if (Pubid != NULL) xmlFree(Pubid);
5457 * xmlParseEntityDecl:
5458 * @ctxt: an XML parser context
5460 * parse <!ENTITY declarations
5462 * [70] EntityDecl ::= GEDecl | PEDecl
5464 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5466 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5468 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5470 * [74] PEDef ::= EntityValue | ExternalID
5472 * [76] NDataDecl ::= S 'NDATA' S Name
5474 * [ VC: Notation Declared ]
5475 * The Name must match the declared name of a notation.
5479 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5480 const xmlChar *name = NULL;
5481 xmlChar *value = NULL;
5482 xmlChar *URI = NULL, *literal = NULL;
5483 const xmlChar *ndata = NULL;
5484 int isParameter = 0;
5485 xmlChar *orig = NULL;
5488 /* GROW; done in the caller */
5489 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490 xmlParserInputPtr input = ctxt->input;
5493 skipped = SKIP_BLANKS;
5495 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5496 "Space required after '<!ENTITY'\n");
5501 skipped = SKIP_BLANKS;
5503 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5504 "Space required after '%'\n");
5509 name = xmlParseName(ctxt);
5511 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5512 "xmlParseEntityDecl: no name\n");
5515 if (xmlStrchr(name, ':') != NULL) {
5516 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5517 "colons are forbidden from entities names '%s'\n",
5520 skipped = SKIP_BLANKS;
5522 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5523 "Space required after the entity name\n");
5526 ctxt->instate = XML_PARSER_ENTITY_DECL;
5528 * handle the various case of definitions...
5531 if ((RAW == '"') || (RAW == '\'')) {
5532 value = xmlParseEntityValue(ctxt, &orig);
5534 if ((ctxt->sax != NULL) &&
5535 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5536 ctxt->sax->entityDecl(ctxt->userData, name,
5537 XML_INTERNAL_PARAMETER_ENTITY,
5541 URI = xmlParseExternalID(ctxt, &literal, 1);
5542 if ((URI == NULL) && (literal == NULL)) {
5543 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5548 uri = xmlParseURI((const char *) URI);
5550 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5551 "Invalid URI: %s\n", URI);
5553 * This really ought to be a well formedness error
5554 * but the XML Core WG decided otherwise c.f. issue
5555 * E26 of the XML erratas.
5558 if (uri->fragment != NULL) {
5560 * Okay this is foolish to block those but not
5563 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5565 if ((ctxt->sax != NULL) &&
5566 (!ctxt->disableSAX) &&
5567 (ctxt->sax->entityDecl != NULL))
5568 ctxt->sax->entityDecl(ctxt->userData, name,
5569 XML_EXTERNAL_PARAMETER_ENTITY,
5570 literal, URI, NULL);
5577 if ((RAW == '"') || (RAW == '\'')) {
5578 value = xmlParseEntityValue(ctxt, &orig);
5579 if ((ctxt->sax != NULL) &&
5580 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5581 ctxt->sax->entityDecl(ctxt->userData, name,
5582 XML_INTERNAL_GENERAL_ENTITY,
5585 * For expat compatibility in SAX mode.
5587 if ((ctxt->myDoc == NULL) ||
5588 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5589 if (ctxt->myDoc == NULL) {
5590 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5591 if (ctxt->myDoc == NULL) {
5592 xmlErrMemory(ctxt, "New Doc failed");
5595 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5597 if (ctxt->myDoc->intSubset == NULL)
5598 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5599 BAD_CAST "fake", NULL, NULL);
5601 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5605 URI = xmlParseExternalID(ctxt, &literal, 1);
5606 if ((URI == NULL) && (literal == NULL)) {
5607 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5612 uri = xmlParseURI((const char *)URI);
5614 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5615 "Invalid URI: %s\n", URI);
5617 * This really ought to be a well formedness error
5618 * but the XML Core WG decided otherwise c.f. issue
5619 * E26 of the XML erratas.
5622 if (uri->fragment != NULL) {
5624 * Okay this is foolish to block those but not
5627 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5632 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5633 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5634 "Space required before 'NDATA'\n");
5637 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5639 if (!IS_BLANK_CH(CUR)) {
5640 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5641 "Space required after 'NDATA'\n");
5644 ndata = xmlParseName(ctxt);
5645 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5646 (ctxt->sax->unparsedEntityDecl != NULL))
5647 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5648 literal, URI, ndata);
5650 if ((ctxt->sax != NULL) &&
5651 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5652 ctxt->sax->entityDecl(ctxt->userData, name,
5653 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5654 literal, URI, NULL);
5656 * For expat compatibility in SAX mode.
5657 * assuming the entity repalcement was asked for
5659 if ((ctxt->replaceEntities != 0) &&
5660 ((ctxt->myDoc == NULL) ||
5661 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5662 if (ctxt->myDoc == NULL) {
5663 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5664 if (ctxt->myDoc == NULL) {
5665 xmlErrMemory(ctxt, "New Doc failed");
5668 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5671 if (ctxt->myDoc->intSubset == NULL)
5672 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5673 BAD_CAST "fake", NULL, NULL);
5674 xmlSAX2EntityDecl(ctxt, name,
5675 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5676 literal, URI, NULL);
5681 if (ctxt->instate == XML_PARSER_EOF)
5685 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5686 "xmlParseEntityDecl: entity %s not terminated\n", name);
5687 xmlHaltParser(ctxt);
5689 if (input != ctxt->input) {
5690 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5691 "Entity declaration doesn't start and stop in the same entity\n");
5697 * Ugly mechanism to save the raw entity value.
5699 xmlEntityPtr cur = NULL;
5702 if ((ctxt->sax != NULL) &&
5703 (ctxt->sax->getParameterEntity != NULL))
5704 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5706 if ((ctxt->sax != NULL) &&
5707 (ctxt->sax->getEntity != NULL))
5708 cur = ctxt->sax->getEntity(ctxt->userData, name);
5709 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5710 cur = xmlSAX2GetEntity(ctxt, name);
5714 if (cur->orig != NULL)
5721 if (value != NULL) xmlFree(value);
5722 if (URI != NULL) xmlFree(URI);
5723 if (literal != NULL) xmlFree(literal);
5728 * xmlParseDefaultDecl:
5729 * @ctxt: an XML parser context
5730 * @value: Receive a possible fixed default value for the attribute
5732 * Parse an attribute default declaration
5734 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5736 * [ VC: Required Attribute ]
5737 * if the default declaration is the keyword #REQUIRED, then the
5738 * attribute must be specified for all elements of the type in the
5739 * attribute-list declaration.
5741 * [ VC: Attribute Default Legal ]
5742 * The declared default value must meet the lexical constraints of
5743 * the declared attribute type c.f. xmlValidateAttributeDecl()
5745 * [ VC: Fixed Attribute Default ]
5746 * if an attribute has a default value declared with the #FIXED
5747 * keyword, instances of that attribute must match the default value.
5749 * [ WFC: No < in Attribute Values ]
5750 * handled in xmlParseAttValue()
5752 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5753 * or XML_ATTRIBUTE_FIXED.
5757 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5762 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5764 return(XML_ATTRIBUTE_REQUIRED);
5766 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5768 return(XML_ATTRIBUTE_IMPLIED);
5770 val = XML_ATTRIBUTE_NONE;
5771 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5773 val = XML_ATTRIBUTE_FIXED;
5774 if (!IS_BLANK_CH(CUR)) {
5775 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5776 "Space required after '#FIXED'\n");
5780 ret = xmlParseAttValue(ctxt);
5781 ctxt->instate = XML_PARSER_DTD;
5783 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5784 "Attribute default value declaration error\n");
5791 * xmlParseNotationType:
5792 * @ctxt: an XML parser context
5794 * parse an Notation attribute type.
5796 * Note: the leading 'NOTATION' S part has already being parsed...
5798 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5800 * [ VC: Notation Attributes ]
5801 * Values of this type must match one of the notation names included
5802 * in the declaration; all notation names in the declaration must be declared.
5804 * Returns: the notation attribute tree built while parsing
5808 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5809 const xmlChar *name;
5810 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5813 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5820 name = xmlParseName(ctxt);
5822 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5823 "Name expected in NOTATION declaration\n");
5824 xmlFreeEnumeration(ret);
5828 while (tmp != NULL) {
5829 if (xmlStrEqual(name, tmp->name)) {
5830 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5831 "standalone: attribute notation value token %s duplicated\n",
5833 if (!xmlDictOwns(ctxt->dict, name))
5834 xmlFree((xmlChar *) name);
5840 cur = xmlCreateEnumeration(name);
5842 xmlFreeEnumeration(ret);
5845 if (last == NULL) ret = last = cur;
5852 } while (RAW == '|');
5854 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5855 xmlFreeEnumeration(ret);
5863 * xmlParseEnumerationType:
5864 * @ctxt: an XML parser context
5866 * parse an Enumeration attribute type.
5868 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870 * [ VC: Enumeration ]
5871 * Values of this type must match one of the Nmtoken tokens in
5874 * Returns: the enumeration attribute tree built while parsing
5878 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5883 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5890 name = xmlParseNmtoken(ctxt);
5892 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5896 while (tmp != NULL) {
5897 if (xmlStrEqual(name, tmp->name)) {
5898 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5899 "standalone: attribute enumeration value token %s duplicated\n",
5901 if (!xmlDictOwns(ctxt->dict, name))
5908 cur = xmlCreateEnumeration(name);
5909 if (!xmlDictOwns(ctxt->dict, name))
5912 xmlFreeEnumeration(ret);
5915 if (last == NULL) ret = last = cur;
5922 } while (RAW == '|');
5924 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5932 * xmlParseEnumeratedType:
5933 * @ctxt: an XML parser context
5934 * @tree: the enumeration tree built while parsing
5936 * parse an Enumerated attribute type.
5938 * [57] EnumeratedType ::= NotationType | Enumeration
5940 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5943 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5948 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5950 if (!IS_BLANK_CH(CUR)) {
5951 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5952 "Space required after 'NOTATION'\n");
5956 *tree = xmlParseNotationType(ctxt);
5957 if (*tree == NULL) return(0);
5958 return(XML_ATTRIBUTE_NOTATION);
5960 *tree = xmlParseEnumerationType(ctxt);
5961 if (*tree == NULL) return(0);
5962 return(XML_ATTRIBUTE_ENUMERATION);
5966 * xmlParseAttributeType:
5967 * @ctxt: an XML parser context
5968 * @tree: the enumeration tree built while parsing
5970 * parse the Attribute list def for an element
5972 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5974 * [55] StringType ::= 'CDATA'
5976 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5977 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5979 * Validity constraints for attribute values syntax are checked in
5980 * xmlValidateAttributeValue()
5983 * Values of type ID must match the Name production. A name must not
5984 * appear more than once in an XML document as a value of this type;
5985 * i.e., ID values must uniquely identify the elements which bear them.
5987 * [ VC: One ID per Element Type ]
5988 * No element type may have more than one ID attribute specified.
5990 * [ VC: ID Attribute Default ]
5991 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5994 * Values of type IDREF must match the Name production, and values
5995 * of type IDREFS must match Names; each IDREF Name must match the value
5996 * of an ID attribute on some element in the XML document; i.e. IDREF
5997 * values must match the value of some ID attribute.
5999 * [ VC: Entity Name ]
6000 * Values of type ENTITY must match the Name production, values
6001 * of type ENTITIES must match Names; each Entity Name must match the
6002 * name of an unparsed entity declared in the DTD.
6004 * [ VC: Name Token ]
6005 * Values of type NMTOKEN must match the Nmtoken production; values
6006 * of type NMTOKENS must match Nmtokens.
6008 * Returns the attribute type
6011 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6013 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6015 return(XML_ATTRIBUTE_CDATA);
6016 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6018 return(XML_ATTRIBUTE_IDREFS);
6019 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6021 return(XML_ATTRIBUTE_IDREF);
6022 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6024 return(XML_ATTRIBUTE_ID);
6025 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6027 return(XML_ATTRIBUTE_ENTITY);
6028 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6030 return(XML_ATTRIBUTE_ENTITIES);
6031 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6033 return(XML_ATTRIBUTE_NMTOKENS);
6034 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6036 return(XML_ATTRIBUTE_NMTOKEN);
6038 return(xmlParseEnumeratedType(ctxt, tree));
6042 * xmlParseAttributeListDecl:
6043 * @ctxt: an XML parser context
6045 * : parse the Attribute list def for an element
6047 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6049 * [53] AttDef ::= S Name S AttType S DefaultDecl
6053 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6054 const xmlChar *elemName;
6055 const xmlChar *attrName;
6056 xmlEnumerationPtr tree;
6058 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6059 xmlParserInputPtr input = ctxt->input;
6062 if (!IS_BLANK_CH(CUR)) {
6063 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6064 "Space required after '<!ATTLIST'\n");
6067 elemName = xmlParseName(ctxt);
6068 if (elemName == NULL) {
6069 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6070 "ATTLIST: no name for Element\n");
6075 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6076 const xmlChar *check = CUR_PTR;
6079 xmlChar *defaultValue = NULL;
6083 attrName = xmlParseName(ctxt);
6084 if (attrName == NULL) {
6085 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6086 "ATTLIST: no name for Attribute\n");
6090 if (!IS_BLANK_CH(CUR)) {
6091 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6092 "Space required after the attribute name\n");
6097 type = xmlParseAttributeType(ctxt, &tree);
6103 if (!IS_BLANK_CH(CUR)) {
6104 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6105 "Space required after the attribute type\n");
6107 xmlFreeEnumeration(tree);
6112 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6114 if (defaultValue != NULL)
6115 xmlFree(defaultValue);
6117 xmlFreeEnumeration(tree);
6120 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6121 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6125 if (!IS_BLANK_CH(CUR)) {
6126 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6127 "Space required after the attribute default value\n");
6128 if (defaultValue != NULL)
6129 xmlFree(defaultValue);
6131 xmlFreeEnumeration(tree);
6136 if (check == CUR_PTR) {
6137 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6138 "in xmlParseAttributeListDecl\n");
6139 if (defaultValue != NULL)
6140 xmlFree(defaultValue);
6142 xmlFreeEnumeration(tree);
6145 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6146 (ctxt->sax->attributeDecl != NULL))
6147 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6148 type, def, defaultValue, tree);
6149 else if (tree != NULL)
6150 xmlFreeEnumeration(tree);
6152 if ((ctxt->sax2) && (defaultValue != NULL) &&
6153 (def != XML_ATTRIBUTE_IMPLIED) &&
6154 (def != XML_ATTRIBUTE_REQUIRED)) {
6155 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6158 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6160 if (defaultValue != NULL)
6161 xmlFree(defaultValue);
6165 if (input != ctxt->input) {
6166 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6167 "Attribute list declaration doesn't start and stop in the same entity\n",
6176 * xmlParseElementMixedContentDecl:
6177 * @ctxt: an XML parser context
6178 * @inputchk: the input used for the current entity, needed for boundary checks
6180 * parse the declaration for a Mixed Element content
6181 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6183 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6184 * '(' S? '#PCDATA' S? ')'
6186 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6188 * [ VC: No Duplicate Types ]
6189 * The same name must not appear more than once in a single
6190 * mixed-content declaration.
6192 * returns: the list of the xmlElementContentPtr describing the element choices
6194 xmlElementContentPtr
6195 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6196 xmlElementContentPtr ret = NULL, cur = NULL, n;
6197 const xmlChar *elem = NULL;
6200 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6205 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6206 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6207 "Element content declaration doesn't start and stop in the same entity\n",
6211 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6215 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6220 if ((RAW == '(') || (RAW == '|')) {
6221 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6222 if (ret == NULL) return(NULL);
6224 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6227 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6228 if (ret == NULL) return(NULL);
6234 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6235 if (n == NULL) return(NULL);
6236 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6245 elem = xmlParseName(ctxt);
6247 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6248 "xmlParseElementMixedContentDecl : Name expected\n");
6249 xmlFreeDocElementContent(ctxt->myDoc, cur);
6255 if ((RAW == ')') && (NXT(1) == '*')) {
6257 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6258 XML_ELEMENT_CONTENT_ELEMENT);
6259 if (cur->c2 != NULL)
6260 cur->c2->parent = cur;
6263 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6264 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6265 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6266 "Element content declaration doesn't start and stop in the same entity\n",
6271 xmlFreeDocElementContent(ctxt->myDoc, ret);
6272 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6283 * xmlParseElementChildrenContentDeclPriv:
6284 * @ctxt: an XML parser context
6285 * @inputchk: the input used for the current entity, needed for boundary checks
6286 * @depth: the level of recursion
6288 * parse the declaration for a Mixed Element content
6289 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6292 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6294 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6296 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6298 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6300 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6301 * TODO Parameter-entity replacement text must be properly nested
6302 * with parenthesized groups. That is to say, if either of the
6303 * opening or closing parentheses in a choice, seq, or Mixed
6304 * construct is contained in the replacement text for a parameter
6305 * entity, both must be contained in the same replacement text. For
6306 * interoperability, if a parameter-entity reference appears in a
6307 * choice, seq, or Mixed construct, its replacement text should not
6308 * be empty, and neither the first nor last non-blank character of
6309 * the replacement text should be a connector (| or ,).
6311 * Returns the tree of xmlElementContentPtr describing the element
6314 static xmlElementContentPtr
6315 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6317 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6318 const xmlChar *elem;
6321 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6323 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6324 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6331 int inputid = ctxt->input->id;
6333 /* Recurse on first child */
6336 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341 elem = xmlParseName(ctxt);
6343 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6346 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6348 xmlErrMemory(ctxt, NULL);
6353 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6355 } else if (RAW == '*') {
6356 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6358 } else if (RAW == '+') {
6359 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6362 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6368 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6370 * Each loop we parse one separator and one element.
6373 if (type == 0) type = CUR;
6376 * Detect "Name | Name , Name" error
6378 else if (type != CUR) {
6379 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6380 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6382 if ((last != NULL) && (last != ret))
6383 xmlFreeDocElementContent(ctxt->myDoc, last);
6385 xmlFreeDocElementContent(ctxt->myDoc, ret);
6390 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6392 if ((last != NULL) && (last != ret))
6393 xmlFreeDocElementContent(ctxt->myDoc, last);
6394 xmlFreeDocElementContent(ctxt->myDoc, ret);
6412 } else if (RAW == '|') {
6413 if (type == 0) type = CUR;
6416 * Detect "Name , Name | Name" error
6418 else if (type != CUR) {
6419 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6420 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6422 if ((last != NULL) && (last != ret))
6423 xmlFreeDocElementContent(ctxt->myDoc, last);
6425 xmlFreeDocElementContent(ctxt->myDoc, ret);
6430 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6432 if ((last != NULL) && (last != ret))
6433 xmlFreeDocElementContent(ctxt->myDoc, last);
6435 xmlFreeDocElementContent(ctxt->myDoc, ret);
6454 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6455 if ((last != NULL) && (last != ret))
6456 xmlFreeDocElementContent(ctxt->myDoc, last);
6458 xmlFreeDocElementContent(ctxt->myDoc, ret);
6465 int inputid = ctxt->input->id;
6466 /* Recurse on second child */
6469 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6473 elem = xmlParseName(ctxt);
6475 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6477 xmlFreeDocElementContent(ctxt->myDoc, ret);
6480 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6483 xmlFreeDocElementContent(ctxt->myDoc, ret);
6487 last->ocur = XML_ELEMENT_CONTENT_OPT;
6489 } else if (RAW == '*') {
6490 last->ocur = XML_ELEMENT_CONTENT_MULT;
6492 } else if (RAW == '+') {
6493 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6496 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6502 if ((cur != NULL) && (last != NULL)) {
6507 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6508 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6509 "Element content declaration doesn't start and stop in the same entity\n",
6515 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6516 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6517 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6519 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6522 } else if (RAW == '*') {
6524 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6527 * Some normalization:
6528 * (a | b* | c?)* == (a | b | c)*
6530 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6531 if ((cur->c1 != NULL) &&
6532 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6533 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6534 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6535 if ((cur->c2 != NULL) &&
6536 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6537 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6538 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6543 } else if (RAW == '+') {
6547 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6549 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6551 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6553 * Some normalization:
6554 * (a | b*)+ == (a | b)*
6555 * (a | b?)+ == (a | b)*
6557 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6558 if ((cur->c1 != NULL) &&
6559 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6560 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6561 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6564 if ((cur->c2 != NULL) &&
6565 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6566 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6567 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6573 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6581 * xmlParseElementChildrenContentDecl:
6582 * @ctxt: an XML parser context
6583 * @inputchk: the input used for the current entity, needed for boundary checks
6585 * parse the declaration for a Mixed Element content
6586 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6588 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6590 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6592 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6594 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6596 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6597 * TODO Parameter-entity replacement text must be properly nested
6598 * with parenthesized groups. That is to say, if either of the
6599 * opening or closing parentheses in a choice, seq, or Mixed
6600 * construct is contained in the replacement text for a parameter
6601 * entity, both must be contained in the same replacement text. For
6602 * interoperability, if a parameter-entity reference appears in a
6603 * choice, seq, or Mixed construct, its replacement text should not
6604 * be empty, and neither the first nor last non-blank character of
6605 * the replacement text should be a connector (| or ,).
6607 * Returns the tree of xmlElementContentPtr describing the element
6610 xmlElementContentPtr
6611 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6612 /* stub left for API/ABI compat */
6613 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6617 * xmlParseElementContentDecl:
6618 * @ctxt: an XML parser context
6619 * @name: the name of the element being defined.
6620 * @result: the Element Content pointer will be stored here if any
6622 * parse the declaration for an Element content either Mixed or Children,
6623 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6625 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6627 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6631 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6632 xmlElementContentPtr *result) {
6634 xmlElementContentPtr tree = NULL;
6635 int inputid = ctxt->input->id;
6641 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6642 "xmlParseElementContentDecl : %s '(' expected\n", name);
6647 if (ctxt->instate == XML_PARSER_EOF)
6650 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6651 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6652 res = XML_ELEMENT_TYPE_MIXED;
6654 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6655 res = XML_ELEMENT_TYPE_ELEMENT;
6663 * xmlParseElementDecl:
6664 * @ctxt: an XML parser context
6666 * parse an Element declaration.
6668 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6670 * [ VC: Unique Element Type Declaration ]
6671 * No element type may be declared more than once
6673 * Returns the type of the element, or -1 in case of error
6676 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6677 const xmlChar *name;
6679 xmlElementContentPtr content = NULL;
6681 /* GROW; done in the caller */
6682 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6683 xmlParserInputPtr input = ctxt->input;
6686 if (!IS_BLANK_CH(CUR)) {
6687 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6688 "Space required after 'ELEMENT'\n");
6691 name = xmlParseName(ctxt);
6693 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6694 "xmlParseElementDecl: no name for Element\n");
6697 while ((RAW == 0) && (ctxt->inputNr > 1))
6699 if (!IS_BLANK_CH(CUR)) {
6700 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6701 "Space required after the element name\n");
6704 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6707 * Element must always be empty.
6709 ret = XML_ELEMENT_TYPE_EMPTY;
6710 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6714 * Element is a generic container.
6716 ret = XML_ELEMENT_TYPE_ANY;
6717 } else if (RAW == '(') {
6718 ret = xmlParseElementContentDecl(ctxt, name, &content);
6721 * [ WFC: PEs in Internal Subset ] error handling.
6723 if ((RAW == '%') && (ctxt->external == 0) &&
6724 (ctxt->inputNr == 1)) {
6725 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6726 "PEReference: forbidden within markup decl in internal subset\n");
6728 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6729 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6736 * Pop-up of finished entities.
6738 while ((RAW == 0) && (ctxt->inputNr > 1))
6743 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6744 if (content != NULL) {
6745 xmlFreeDocElementContent(ctxt->myDoc, content);
6748 if (input != ctxt->input) {
6749 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6750 "Element declaration doesn't start and stop in the same entity\n");
6754 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6755 (ctxt->sax->elementDecl != NULL)) {
6756 if (content != NULL)
6757 content->parent = NULL;
6758 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6760 if ((content != NULL) && (content->parent == NULL)) {
6762 * this is a trick: if xmlAddElementDecl is called,
6763 * instead of copying the full tree it is plugged directly
6764 * if called from the parser. Avoid duplicating the
6765 * interfaces or change the API/ABI
6767 xmlFreeDocElementContent(ctxt->myDoc, content);
6769 } else if (content != NULL) {
6770 xmlFreeDocElementContent(ctxt->myDoc, content);
6778 * xmlParseConditionalSections
6779 * @ctxt: an XML parser context
6781 * [61] conditionalSect ::= includeSect | ignoreSect
6782 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6783 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6784 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6785 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6789 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6790 int id = ctxt->input->id;
6794 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6798 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6799 xmlHaltParser(ctxt);
6802 if (ctxt->input->id != id) {
6803 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6804 "All markup of the conditional section is not in the same entity\n",
6809 if (xmlParserDebugEntities) {
6810 if ((ctxt->input != NULL) && (ctxt->input->filename))
6811 xmlGenericError(xmlGenericErrorContext,
6812 "%s(%d): ", ctxt->input->filename,
6814 xmlGenericError(xmlGenericErrorContext,
6815 "Entering INCLUDE Conditional Section\n");
6818 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6819 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6820 const xmlChar *check = CUR_PTR;
6821 unsigned int cons = ctxt->input->consumed;
6823 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6824 xmlParseConditionalSections(ctxt);
6825 } else if (IS_BLANK_CH(CUR)) {
6827 } else if (RAW == '%') {
6828 xmlParsePEReference(ctxt);
6830 xmlParseMarkupDecl(ctxt);
6833 * Pop-up of finished entities.
6835 while ((RAW == 0) && (ctxt->inputNr > 1))
6838 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6839 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6843 if (xmlParserDebugEntities) {
6844 if ((ctxt->input != NULL) && (ctxt->input->filename))
6845 xmlGenericError(xmlGenericErrorContext,
6846 "%s(%d): ", ctxt->input->filename,
6848 xmlGenericError(xmlGenericErrorContext,
6849 "Leaving INCLUDE Conditional Section\n");
6852 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6854 xmlParserInputState instate;
6860 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6861 xmlHaltParser(ctxt);
6864 if (ctxt->input->id != id) {
6865 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6866 "All markup of the conditional section is not in the same entity\n",
6871 if (xmlParserDebugEntities) {
6872 if ((ctxt->input != NULL) && (ctxt->input->filename))
6873 xmlGenericError(xmlGenericErrorContext,
6874 "%s(%d): ", ctxt->input->filename,
6876 xmlGenericError(xmlGenericErrorContext,
6877 "Entering IGNORE Conditional Section\n");
6881 * Parse up to the end of the conditional section
6882 * But disable SAX event generating DTD building in the meantime
6884 state = ctxt->disableSAX;
6885 instate = ctxt->instate;
6886 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6887 ctxt->instate = XML_PARSER_IGNORE;
6889 while (((depth >= 0) && (RAW != 0)) &&
6890 (ctxt->instate != XML_PARSER_EOF)) {
6891 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6896 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6897 if (--depth >= 0) SKIP(3);
6904 ctxt->disableSAX = state;
6905 ctxt->instate = instate;
6907 if (xmlParserDebugEntities) {
6908 if ((ctxt->input != NULL) && (ctxt->input->filename))
6909 xmlGenericError(xmlGenericErrorContext,
6910 "%s(%d): ", ctxt->input->filename,
6912 xmlGenericError(xmlGenericErrorContext,
6913 "Leaving IGNORE Conditional Section\n");
6917 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6918 xmlHaltParser(ctxt);
6926 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6928 if (ctxt->input->id != id) {
6929 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6930 "All markup of the conditional section is not in the same entity\n",
6933 if ((ctxt-> instate != XML_PARSER_EOF) &&
6934 ((ctxt->input->cur + 3) <= ctxt->input->end))
6940 * xmlParseMarkupDecl:
6941 * @ctxt: an XML parser context
6943 * parse Markup declarations
6945 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6946 * NotationDecl | PI | Comment
6948 * [ VC: Proper Declaration/PE Nesting ]
6949 * Parameter-entity replacement text must be properly nested with
6950 * markup declarations. That is to say, if either the first character
6951 * or the last character of a markup declaration (markupdecl above) is
6952 * contained in the replacement text for a parameter-entity reference,
6953 * both must be contained in the same replacement text.
6955 * [ WFC: PEs in Internal Subset ]
6956 * In the internal DTD subset, parameter-entity references can occur
6957 * only where markup declarations can occur, not within markup declarations.
6958 * (This does not apply to references that occur in external parameter
6959 * entities or to the external subset.)
6962 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6965 if (NXT(1) == '!') {
6969 xmlParseElementDecl(ctxt);
6970 else if (NXT(3) == 'N')
6971 xmlParseEntityDecl(ctxt);
6974 xmlParseAttributeListDecl(ctxt);
6977 xmlParseNotationDecl(ctxt);
6980 xmlParseComment(ctxt);
6983 /* there is an error but it will be detected later */
6986 } else if (NXT(1) == '?') {
6992 * detect requirement to exit there and act accordingly
6993 * and avoid having instate overriden later on
6995 if (ctxt->instate == XML_PARSER_EOF)
6999 * This is only for internal subset. On external entities,
7000 * the replacement is done before parsing stage
7002 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
7003 xmlParsePEReference(ctxt);
7006 * Conditional sections are allowed from entities included
7007 * by PE References in the internal subset.
7009 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
7010 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7011 xmlParseConditionalSections(ctxt);
7015 ctxt->instate = XML_PARSER_DTD;
7020 * @ctxt: an XML parser context
7022 * parse an XML declaration header for external entities
7024 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7028 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7030 const xmlChar *encoding;
7033 * We know that '<?xml' is here.
7035 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7038 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7042 if (!IS_BLANK_CH(CUR)) {
7043 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7044 "Space needed after '<?xml'\n");
7049 * We may have the VersionInfo here.
7051 version = xmlParseVersionInfo(ctxt);
7052 if (version == NULL)
7053 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7055 if (!IS_BLANK_CH(CUR)) {
7056 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7057 "Space needed here\n");
7060 ctxt->input->version = version;
7063 * We must have the encoding declaration
7065 encoding = xmlParseEncodingDecl(ctxt);
7066 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7068 * The XML REC instructs us to stop parsing right here
7072 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7073 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7074 "Missing encoding in text declaration\n");
7078 if ((RAW == '?') && (NXT(1) == '>')) {
7080 } else if (RAW == '>') {
7081 /* Deprecated old WD ... */
7082 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7085 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7086 MOVETO_ENDTAG(CUR_PTR);
7092 * xmlParseExternalSubset:
7093 * @ctxt: an XML parser context
7094 * @ExternalID: the external identifier
7095 * @SystemID: the system identifier (or URL)
7097 * parse Markup declarations from an external subset
7099 * [30] extSubset ::= textDecl? extSubsetDecl
7101 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7104 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7105 const xmlChar *SystemID) {
7106 xmlDetectSAX2(ctxt);
7109 if ((ctxt->encoding == NULL) &&
7110 (ctxt->input->end - ctxt->input->cur >= 4)) {
7112 xmlCharEncoding enc;
7118 enc = xmlDetectCharEncoding(start, 4);
7119 if (enc != XML_CHAR_ENCODING_NONE)
7120 xmlSwitchEncoding(ctxt, enc);
7123 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7124 xmlParseTextDecl(ctxt);
7125 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7127 * The XML REC instructs us to stop parsing right here
7129 xmlHaltParser(ctxt);
7133 if (ctxt->myDoc == NULL) {
7134 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7135 if (ctxt->myDoc == NULL) {
7136 xmlErrMemory(ctxt, "New Doc failed");
7139 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7141 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7142 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7144 ctxt->instate = XML_PARSER_DTD;
7146 while (((RAW == '<') && (NXT(1) == '?')) ||
7147 ((RAW == '<') && (NXT(1) == '!')) ||
7148 (RAW == '%') || IS_BLANK_CH(CUR)) {
7149 const xmlChar *check = CUR_PTR;
7150 unsigned int cons = ctxt->input->consumed;
7153 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7154 xmlParseConditionalSections(ctxt);
7155 } else if (IS_BLANK_CH(CUR)) {
7157 } else if (RAW == '%') {
7158 xmlParsePEReference(ctxt);
7160 xmlParseMarkupDecl(ctxt);
7163 * Pop-up of finished entities.
7165 while ((RAW == 0) && (ctxt->inputNr > 1))
7168 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7169 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7175 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7181 * xmlParseReference:
7182 * @ctxt: an XML parser context
7184 * parse and handle entity references in content, depending on the SAX
7185 * interface, this may end-up in a call to character() if this is a
7186 * CharRef, a predefined entity, if there is no reference() callback.
7187 * or if the parser was asked to switch to that mode.
7189 * [67] Reference ::= EntityRef | CharRef
7192 xmlParseReference(xmlParserCtxtPtr ctxt) {
7196 xmlNodePtr list = NULL;
7197 xmlParserErrors ret = XML_ERR_OK;
7204 * Simple case of a CharRef
7206 if (NXT(1) == '#') {
7210 int value = xmlParseCharRef(ctxt);
7214 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7216 * So we are using non-UTF-8 buffers
7217 * Check that the char fit on 8bits, if not
7218 * generate a CharRef.
7220 if (value <= 0xFF) {
7223 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7224 (!ctxt->disableSAX))
7225 ctxt->sax->characters(ctxt->userData, out, 1);
7227 if ((hex == 'x') || (hex == 'X'))
7228 snprintf((char *)out, sizeof(out), "#x%X", value);
7230 snprintf((char *)out, sizeof(out), "#%d", value);
7231 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7232 (!ctxt->disableSAX))
7233 ctxt->sax->reference(ctxt->userData, out);
7237 * Just encode the value in UTF-8
7239 COPY_BUF(0 ,out, i, value);
7241 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7242 (!ctxt->disableSAX))
7243 ctxt->sax->characters(ctxt->userData, out, i);
7249 * We are seeing an entity reference
7251 ent = xmlParseEntityRef(ctxt);
7252 if (ent == NULL) return;
7253 if (!ctxt->wellFormed)
7255 was_checked = ent->checked;
7257 /* special case of predefined entities */
7258 if ((ent->name == NULL) ||
7259 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7261 if (val == NULL) return;
7263 * inline the entity.
7265 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7266 (!ctxt->disableSAX))
7267 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7272 * The first reference to the entity trigger a parsing phase
7273 * where the ent->children is filled with the result from
7275 * Note: external parsed entities will not be loaded, it is not
7276 * required for a non-validating parser, unless the parsing option
7277 * of validating, or substituting entities were given. Doing so is
7278 * far more secure as the parser will only process data coming from
7279 * the document entity by default.
7281 if (((ent->checked == 0) ||
7282 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7283 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7284 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7285 unsigned long oldnbent = ctxt->nbentities;
7288 * This is a bit hackish but this seems the best
7289 * way to make sure both SAX and DOM entity support
7293 if (ctxt->userData == ctxt)
7296 user_data = ctxt->userData;
7299 * Check that this entity is well formed
7300 * 4.3.2: An internal general parsed entity is well-formed
7301 * if its replacement text matches the production labeled
7304 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7306 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7310 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7312 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7313 user_data, ctxt->depth, ent->URI,
7314 ent->ExternalID, &list);
7317 ret = XML_ERR_ENTITY_PE_INTERNAL;
7318 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7319 "invalid entity type found\n", NULL);
7323 * Store the number of entities needing parsing for this entity
7324 * content and do checkings
7326 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7327 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7329 if (ret == XML_ERR_ENTITY_LOOP) {
7330 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7331 xmlFreeNodeList(list);
7334 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7335 xmlFreeNodeList(list);
7339 if ((ret == XML_ERR_OK) && (list != NULL)) {
7340 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7341 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7342 (ent->children == NULL)) {
7343 ent->children = list;
7344 if (ctxt->replaceEntities) {
7346 * Prune it directly in the generated document
7347 * except for single text nodes.
7349 if (((list->type == XML_TEXT_NODE) &&
7350 (list->next == NULL)) ||
7351 (ctxt->parseMode == XML_PARSE_READER)) {
7352 list->parent = (xmlNodePtr) ent;
7357 while (list != NULL) {
7358 list->parent = (xmlNodePtr) ctxt->node;
7359 list->doc = ctxt->myDoc;
7360 if (list->next == NULL)
7364 list = ent->children;
7365 #ifdef LIBXML_LEGACY_ENABLED
7366 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7367 xmlAddEntityReference(ent, list, NULL);
7368 #endif /* LIBXML_LEGACY_ENABLED */
7372 while (list != NULL) {
7373 list->parent = (xmlNodePtr) ent;
7374 xmlSetTreeDoc(list, ent->doc);
7375 if (list->next == NULL)
7381 xmlFreeNodeList(list);
7384 } else if ((ret != XML_ERR_OK) &&
7385 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7386 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7387 "Entity '%s' failed to parse\n", ent->name);
7388 xmlParserEntityCheck(ctxt, 0, ent, 0);
7389 } else if (list != NULL) {
7390 xmlFreeNodeList(list);
7393 if (ent->checked == 0)
7395 } else if (ent->checked != 1) {
7396 ctxt->nbentities += ent->checked / 2;
7400 * Now that the entity content has been gathered
7401 * provide it to the application, this can take different forms based
7402 * on the parsing modes.
7404 if (ent->children == NULL) {
7406 * Probably running in SAX mode and the callbacks don't
7407 * build the entity content. So unless we already went
7408 * though parsing for first checking go though the entity
7409 * content to generate callbacks associated to the entity
7411 if (was_checked != 0) {
7414 * This is a bit hackish but this seems the best
7415 * way to make sure both SAX and DOM entity support
7418 if (ctxt->userData == ctxt)
7421 user_data = ctxt->userData;
7423 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7425 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7426 ent->content, user_data, NULL);
7428 } else if (ent->etype ==
7429 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7431 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7432 ctxt->sax, user_data, ctxt->depth,
7433 ent->URI, ent->ExternalID, NULL);
7436 ret = XML_ERR_ENTITY_PE_INTERNAL;
7437 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7438 "invalid entity type found\n", NULL);
7440 if (ret == XML_ERR_ENTITY_LOOP) {
7441 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7445 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7446 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7448 * Entity reference callback comes second, it's somewhat
7449 * superfluous but a compatibility to historical behaviour
7451 ctxt->sax->reference(ctxt->userData, ent->name);
7457 * If we didn't get any children for the entity being built
7459 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7460 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7464 ctxt->sax->reference(ctxt->userData, ent->name);
7468 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7470 * There is a problem on the handling of _private for entities
7471 * (bug 155816): Should we copy the content of the field from
7472 * the entity (possibly overwriting some value set by the user
7473 * when a copy is created), should we leave it alone, or should
7474 * we try to take care of different situations? The problem
7475 * is exacerbated by the usage of this field by the xmlReader.
7476 * To fix this bug, we look at _private on the created node
7477 * and, if it's NULL, we copy in whatever was in the entity.
7478 * If it's not NULL we leave it alone. This is somewhat of a
7479 * hack - maybe we should have further tests to determine
7482 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7484 * Seems we are generating the DOM content, do
7485 * a simple tree copy for all references except the first
7486 * In the first occurrence list contains the replacement.
7488 if (((list == NULL) && (ent->owner == 0)) ||
7489 (ctxt->parseMode == XML_PARSE_READER)) {
7490 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7493 * We are copying here, make sure there is no abuse
7495 ctxt->sizeentcopy += ent->length + 5;
7496 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7500 * when operating on a reader, the entities definitions
7501 * are always owning the entities subtree.
7502 if (ctxt->parseMode == XML_PARSE_READER)
7506 cur = ent->children;
7507 while (cur != NULL) {
7508 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7510 if (nw->_private == NULL)
7511 nw->_private = cur->_private;
7512 if (firstChild == NULL){
7515 nw = xmlAddChild(ctxt->node, nw);
7517 if (cur == ent->last) {
7519 * needed to detect some strange empty
7520 * node cases in the reader tests
7522 if ((ctxt->parseMode == XML_PARSE_READER) &&
7524 (nw->type == XML_ELEMENT_NODE) &&
7525 (nw->children == NULL))
7532 #ifdef LIBXML_LEGACY_ENABLED
7533 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7534 xmlAddEntityReference(ent, firstChild, nw);
7535 #endif /* LIBXML_LEGACY_ENABLED */
7536 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7537 xmlNodePtr nw = NULL, cur, next, last,
7541 * We are copying here, make sure there is no abuse
7543 ctxt->sizeentcopy += ent->length + 5;
7544 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7548 * Copy the entity child list and make it the new
7549 * entity child list. The goal is to make sure any
7550 * ID or REF referenced will be the one from the
7551 * document content and not the entity copy.
7553 cur = ent->children;
7554 ent->children = NULL;
7557 while (cur != NULL) {
7561 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7563 if (nw->_private == NULL)
7564 nw->_private = cur->_private;
7565 if (firstChild == NULL){
7568 xmlAddChild((xmlNodePtr) ent, nw);
7569 xmlAddChild(ctxt->node, cur);
7575 if (ent->owner == 0)
7577 #ifdef LIBXML_LEGACY_ENABLED
7578 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7579 xmlAddEntityReference(ent, firstChild, nw);
7580 #endif /* LIBXML_LEGACY_ENABLED */
7582 const xmlChar *nbktext;
7585 * the name change is to avoid coalescing of the
7586 * node with a possible previous text one which
7587 * would make ent->children a dangling pointer
7589 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7591 if (ent->children->type == XML_TEXT_NODE)
7592 ent->children->name = nbktext;
7593 if ((ent->last != ent->children) &&
7594 (ent->last->type == XML_TEXT_NODE))
7595 ent->last->name = nbktext;
7596 xmlAddChildList(ctxt->node, ent->children);
7600 * This is to avoid a nasty side effect, see
7601 * characters() in SAX.c
7611 * xmlParseEntityRef:
7612 * @ctxt: an XML parser context
7614 * parse ENTITY references declarations
7616 * [68] EntityRef ::= '&' Name ';'
7618 * [ WFC: Entity Declared ]
7619 * In a document without any DTD, a document with only an internal DTD
7620 * subset which contains no parameter entity references, or a document
7621 * with "standalone='yes'", the Name given in the entity reference
7622 * must match that in an entity declaration, except that well-formed
7623 * documents need not declare any of the following entities: amp, lt,
7624 * gt, apos, quot. The declaration of a parameter entity must precede
7625 * any reference to it. Similarly, the declaration of a general entity
7626 * must precede any reference to it which appears in a default value in an
7627 * attribute-list declaration. Note that if entities are declared in the
7628 * external subset or in external parameter entities, a non-validating
7629 * processor is not obligated to read and process their declarations;
7630 * for such documents, the rule that an entity must be declared is a
7631 * well-formedness constraint only if standalone='yes'.
7633 * [ WFC: Parsed Entity ]
7634 * An entity reference must not contain the name of an unparsed entity
7636 * Returns the xmlEntityPtr if found, or NULL otherwise.
7639 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7640 const xmlChar *name;
7641 xmlEntityPtr ent = NULL;
7644 if (ctxt->instate == XML_PARSER_EOF)
7650 name = xmlParseName(ctxt);
7652 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7653 "xmlParseEntityRef: no name\n");
7657 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7663 * Predefined entities override any extra definition
7665 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7666 ent = xmlGetPredefinedEntity(name);
7672 * Increase the number of entity references parsed
7677 * Ask first SAX for entity resolution, otherwise try the
7678 * entities which may have stored in the parser context.
7680 if (ctxt->sax != NULL) {
7681 if (ctxt->sax->getEntity != NULL)
7682 ent = ctxt->sax->getEntity(ctxt->userData, name);
7683 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7684 (ctxt->options & XML_PARSE_OLDSAX))
7685 ent = xmlGetPredefinedEntity(name);
7686 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7687 (ctxt->userData==ctxt)) {
7688 ent = xmlSAX2GetEntity(ctxt, name);
7691 if (ctxt->instate == XML_PARSER_EOF)
7694 * [ WFC: Entity Declared ]
7695 * In a document without any DTD, a document with only an
7696 * internal DTD subset which contains no parameter entity
7697 * references, or a document with "standalone='yes'", the
7698 * Name given in the entity reference must match that in an
7699 * entity declaration, except that well-formed documents
7700 * need not declare any of the following entities: amp, lt,
7702 * The declaration of a parameter entity must precede any
7704 * Similarly, the declaration of a general entity must
7705 * precede any reference to it which appears in a default
7706 * value in an attribute-list declaration. Note that if
7707 * entities are declared in the external subset or in
7708 * external parameter entities, a non-validating processor
7709 * is not obligated to read and process their declarations;
7710 * for such documents, the rule that an entity must be
7711 * declared is a well-formedness constraint only if
7715 if ((ctxt->standalone == 1) ||
7716 ((ctxt->hasExternalSubset == 0) &&
7717 (ctxt->hasPErefs == 0))) {
7718 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7719 "Entity '%s' not defined\n", name);
7721 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7722 "Entity '%s' not defined\n", name);
7723 if ((ctxt->inSubset == 0) &&
7724 (ctxt->sax != NULL) &&
7725 (ctxt->sax->reference != NULL)) {
7726 ctxt->sax->reference(ctxt->userData, name);
7729 xmlParserEntityCheck(ctxt, 0, ent, 0);
7734 * [ WFC: Parsed Entity ]
7735 * An entity reference must not contain the name of an
7738 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7739 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7740 "Entity reference to unparsed entity %s\n", name);
7744 * [ WFC: No External Entity References ]
7745 * Attribute values cannot contain direct or indirect
7746 * entity references to external entities.
7748 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7749 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7750 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7751 "Attribute references external entity '%s'\n", name);
7754 * [ WFC: No < in Attribute Values ]
7755 * The replacement text of any entity referred to directly or
7756 * indirectly in an attribute value (other than "<") must
7759 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7761 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7762 if (((ent->checked & 1) || (ent->checked == 0)) &&
7763 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7764 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7765 "'<' in entity '%s' is not allowed in attributes values\n", name);
7770 * Internal check, no parameter entities here ...
7773 switch (ent->etype) {
7774 case XML_INTERNAL_PARAMETER_ENTITY:
7775 case XML_EXTERNAL_PARAMETER_ENTITY:
7776 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7777 "Attempt to reference the parameter entity '%s'\n",
7786 * [ WFC: No Recursion ]
7787 * A parsed entity must not contain a recursive reference
7788 * to itself, either directly or indirectly.
7789 * Done somewhere else
7795 * xmlParseStringEntityRef:
7796 * @ctxt: an XML parser context
7797 * @str: a pointer to an index in the string
7799 * parse ENTITY references declarations, but this version parses it from
7802 * [68] EntityRef ::= '&' Name ';'
7804 * [ WFC: Entity Declared ]
7805 * In a document without any DTD, a document with only an internal DTD
7806 * subset which contains no parameter entity references, or a document
7807 * with "standalone='yes'", the Name given in the entity reference
7808 * must match that in an entity declaration, except that well-formed
7809 * documents need not declare any of the following entities: amp, lt,
7810 * gt, apos, quot. The declaration of a parameter entity must precede
7811 * any reference to it. Similarly, the declaration of a general entity
7812 * must precede any reference to it which appears in a default value in an
7813 * attribute-list declaration. Note that if entities are declared in the
7814 * external subset or in external parameter entities, a non-validating
7815 * processor is not obligated to read and process their declarations;
7816 * for such documents, the rule that an entity must be declared is a
7817 * well-formedness constraint only if standalone='yes'.
7819 * [ WFC: Parsed Entity ]
7820 * An entity reference must not contain the name of an unparsed entity
7822 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7823 * is updated to the current location in the string.
7826 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7830 xmlEntityPtr ent = NULL;
7832 if ((str == NULL) || (*str == NULL))
7840 name = xmlParseStringName(ctxt, &ptr);
7842 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7843 "xmlParseStringEntityRef: no name\n");
7848 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7857 * Predefined entities override any extra definition
7859 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7860 ent = xmlGetPredefinedEntity(name);
7869 * Increate the number of entity references parsed
7874 * Ask first SAX for entity resolution, otherwise try the
7875 * entities which may have stored in the parser context.
7877 if (ctxt->sax != NULL) {
7878 if (ctxt->sax->getEntity != NULL)
7879 ent = ctxt->sax->getEntity(ctxt->userData, name);
7880 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7881 ent = xmlGetPredefinedEntity(name);
7882 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7883 ent = xmlSAX2GetEntity(ctxt, name);
7886 if (ctxt->instate == XML_PARSER_EOF) {
7892 * [ WFC: Entity Declared ]
7893 * In a document without any DTD, a document with only an
7894 * internal DTD subset which contains no parameter entity
7895 * references, or a document with "standalone='yes'", the
7896 * Name given in the entity reference must match that in an
7897 * entity declaration, except that well-formed documents
7898 * need not declare any of the following entities: amp, lt,
7900 * The declaration of a parameter entity must precede any
7902 * Similarly, the declaration of a general entity must
7903 * precede any reference to it which appears in a default
7904 * value in an attribute-list declaration. Note that if
7905 * entities are declared in the external subset or in
7906 * external parameter entities, a non-validating processor
7907 * is not obligated to read and process their declarations;
7908 * for such documents, the rule that an entity must be
7909 * declared is a well-formedness constraint only if
7913 if ((ctxt->standalone == 1) ||
7914 ((ctxt->hasExternalSubset == 0) &&
7915 (ctxt->hasPErefs == 0))) {
7916 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7917 "Entity '%s' not defined\n", name);
7919 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7920 "Entity '%s' not defined\n",
7923 xmlParserEntityCheck(ctxt, 0, ent, 0);
7924 /* TODO ? check regressions ctxt->valid = 0; */
7928 * [ WFC: Parsed Entity ]
7929 * An entity reference must not contain the name of an
7932 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7933 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7934 "Entity reference to unparsed entity %s\n", name);
7938 * [ WFC: No External Entity References ]
7939 * Attribute values cannot contain direct or indirect
7940 * entity references to external entities.
7942 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7943 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7944 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7945 "Attribute references external entity '%s'\n", name);
7948 * [ WFC: No < in Attribute Values ]
7949 * The replacement text of any entity referred to directly or
7950 * indirectly in an attribute value (other than "<") must
7953 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7954 (ent != NULL) && (ent->content != NULL) &&
7955 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7956 (xmlStrchr(ent->content, '<'))) {
7957 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7958 "'<' in entity '%s' is not allowed in attributes values\n",
7963 * Internal check, no parameter entities here ...
7966 switch (ent->etype) {
7967 case XML_INTERNAL_PARAMETER_ENTITY:
7968 case XML_EXTERNAL_PARAMETER_ENTITY:
7969 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7970 "Attempt to reference the parameter entity '%s'\n",
7979 * [ WFC: No Recursion ]
7980 * A parsed entity must not contain a recursive reference
7981 * to itself, either directly or indirectly.
7982 * Done somewhere else
7991 * xmlParsePEReference:
7992 * @ctxt: an XML parser context
7994 * parse PEReference declarations
7995 * The entity content is handled directly by pushing it's content as
7996 * a new input stream.
7998 * [69] PEReference ::= '%' Name ';'
8000 * [ WFC: No Recursion ]
8001 * A parsed entity must not contain a recursive
8002 * reference to itself, either directly or indirectly.
8004 * [ WFC: Entity Declared ]
8005 * In a document without any DTD, a document with only an internal DTD
8006 * subset which contains no parameter entity references, or a document
8007 * with "standalone='yes'", ... ... The declaration of a parameter
8008 * entity must precede any reference to it...
8010 * [ VC: Entity Declared ]
8011 * In a document with an external subset or external parameter entities
8012 * with "standalone='no'", ... ... The declaration of a parameter entity
8013 * must precede any reference to it...
8016 * Parameter-entity references may only appear in the DTD.
8017 * NOTE: misleading but this is handled.
8020 xmlParsePEReference(xmlParserCtxtPtr ctxt)
8022 const xmlChar *name;
8023 xmlEntityPtr entity = NULL;
8024 xmlParserInputPtr input;
8029 name = xmlParseName(ctxt);
8031 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8032 "xmlParsePEReference: no name\n");
8036 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8043 * Increate the number of entity references parsed
8048 * Request the entity from SAX
8050 if ((ctxt->sax != NULL) &&
8051 (ctxt->sax->getParameterEntity != NULL))
8052 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8053 if (ctxt->instate == XML_PARSER_EOF)
8055 if (entity == NULL) {
8057 * [ WFC: Entity Declared ]
8058 * In a document without any DTD, a document with only an
8059 * internal DTD subset which contains no parameter entity
8060 * references, or a document with "standalone='yes'", ...
8061 * ... The declaration of a parameter entity must precede
8062 * any reference to it...
8064 if ((ctxt->standalone == 1) ||
8065 ((ctxt->hasExternalSubset == 0) &&
8066 (ctxt->hasPErefs == 0))) {
8067 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8068 "PEReference: %%%s; not found\n",
8072 * [ VC: Entity Declared ]
8073 * In a document with an external subset or external
8074 * parameter entities with "standalone='no'", ...
8075 * ... The declaration of a parameter entity must
8076 * precede any reference to it...
8078 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8079 "PEReference: %%%s; not found\n",
8083 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8086 * Internal checking in case the entity quest barfed
8088 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8089 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8090 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8091 "Internal: %%%s; is not a parameter entity\n",
8093 } else if (ctxt->input->free != deallocblankswrapper) {
8094 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8095 if (xmlPushInput(ctxt, input) < 0)
8100 * handle the extra spaces added before and after
8101 * c.f. http://www.w3.org/TR/REC-xml#as-PE
8103 input = xmlNewEntityInputStream(ctxt, entity);
8104 if (xmlPushInput(ctxt, input) < 0)
8106 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8107 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8108 (IS_BLANK_CH(NXT(5)))) {
8109 xmlParseTextDecl(ctxt);
8111 XML_ERR_UNSUPPORTED_ENCODING) {
8113 * The XML REC instructs us to stop parsing
8116 xmlHaltParser(ctxt);
8122 ctxt->hasPErefs = 1;
8126 * xmlLoadEntityContent:
8127 * @ctxt: an XML parser context
8128 * @entity: an unloaded system entity
8130 * Load the original content of the given system entity from the
8131 * ExternalID/SystemID given. This is to be used for Included in Literal
8132 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8134 * Returns 0 in case of success and -1 in case of failure
8137 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8138 xmlParserInputPtr input;
8143 if ((ctxt == NULL) || (entity == NULL) ||
8144 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8145 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8146 (entity->content != NULL)) {
8147 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8148 "xmlLoadEntityContent parameter error");
8152 if (xmlParserDebugEntities)
8153 xmlGenericError(xmlGenericErrorContext,
8154 "Reading %s entity content input\n", entity->name);
8156 buf = xmlBufferCreate();
8158 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8159 "xmlLoadEntityContent parameter error");
8163 input = xmlNewEntityInputStream(ctxt, entity);
8164 if (input == NULL) {
8165 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8166 "xmlLoadEntityContent input error");
8172 * Push the entity as the current input, read char by char
8173 * saving to the buffer until the end of the entity or an error
8175 if (xmlPushInput(ctxt, input) < 0) {
8182 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8184 xmlBufferAdd(buf, ctxt->input->cur, l);
8185 if (count++ > XML_PARSER_CHUNK_SIZE) {
8188 if (ctxt->instate == XML_PARSER_EOF) {
8198 if (ctxt->instate == XML_PARSER_EOF) {
8206 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8208 } else if (!IS_CHAR(c)) {
8209 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8210 "xmlLoadEntityContent: invalid char value %d\n",
8215 entity->content = buf->content;
8216 buf->content = NULL;
8223 * xmlParseStringPEReference:
8224 * @ctxt: an XML parser context
8225 * @str: a pointer to an index in the string
8227 * parse PEReference declarations
8229 * [69] PEReference ::= '%' Name ';'
8231 * [ WFC: No Recursion ]
8232 * A parsed entity must not contain a recursive
8233 * reference to itself, either directly or indirectly.
8235 * [ WFC: Entity Declared ]
8236 * In a document without any DTD, a document with only an internal DTD
8237 * subset which contains no parameter entity references, or a document
8238 * with "standalone='yes'", ... ... The declaration of a parameter
8239 * entity must precede any reference to it...
8241 * [ VC: Entity Declared ]
8242 * In a document with an external subset or external parameter entities
8243 * with "standalone='no'", ... ... The declaration of a parameter entity
8244 * must precede any reference to it...
8247 * Parameter-entity references may only appear in the DTD.
8248 * NOTE: misleading but this is handled.
8250 * Returns the string of the entity content.
8251 * str is updated to the current value of the index
8254 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8258 xmlEntityPtr entity = NULL;
8260 if ((str == NULL) || (*str == NULL)) return(NULL);
8266 name = xmlParseStringName(ctxt, &ptr);
8268 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8269 "xmlParseStringPEReference: no name\n");
8275 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8283 * Increate the number of entity references parsed
8288 * Request the entity from SAX
8290 if ((ctxt->sax != NULL) &&
8291 (ctxt->sax->getParameterEntity != NULL))
8292 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8293 if (ctxt->instate == XML_PARSER_EOF) {
8297 if (entity == NULL) {
8299 * [ WFC: Entity Declared ]
8300 * In a document without any DTD, a document with only an
8301 * internal DTD subset which contains no parameter entity
8302 * references, or a document with "standalone='yes'", ...
8303 * ... The declaration of a parameter entity must precede
8304 * any reference to it...
8306 if ((ctxt->standalone == 1) ||
8307 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8308 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8309 "PEReference: %%%s; not found\n", name);
8312 * [ VC: Entity Declared ]
8313 * In a document with an external subset or external
8314 * parameter entities with "standalone='no'", ...
8315 * ... The declaration of a parameter entity must
8316 * precede any reference to it...
8318 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8319 "PEReference: %%%s; not found\n",
8323 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8326 * Internal checking in case the entity quest barfed
8328 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8329 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8330 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8331 "%%%s; is not a parameter entity\n",
8335 ctxt->hasPErefs = 1;
8342 * xmlParseDocTypeDecl:
8343 * @ctxt: an XML parser context
8345 * parse a DOCTYPE declaration
8347 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8348 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8350 * [ VC: Root Element Type ]
8351 * The Name in the document type declaration must match the element
8352 * type of the root element.
8356 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8357 const xmlChar *name = NULL;
8358 xmlChar *ExternalID = NULL;
8359 xmlChar *URI = NULL;
8362 * We know that '<!DOCTYPE' has been detected.
8369 * Parse the DOCTYPE name.
8371 name = xmlParseName(ctxt);
8373 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8374 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8376 ctxt->intSubName = name;
8381 * Check for SystemID and ExternalID
8383 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8385 if ((URI != NULL) || (ExternalID != NULL)) {
8386 ctxt->hasExternalSubset = 1;
8388 ctxt->extSubURI = URI;
8389 ctxt->extSubSystem = ExternalID;
8394 * Create and update the internal subset.
8396 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8397 (!ctxt->disableSAX))
8398 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8399 if (ctxt->instate == XML_PARSER_EOF)
8403 * Is there any internal subset declarations ?
8404 * they are handled separately in xmlParseInternalSubset()
8410 * We should be at the end of the DOCTYPE declaration.
8413 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8419 * xmlParseInternalSubset:
8420 * @ctxt: an XML parser context
8422 * parse the internal subset declaration
8424 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8428 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8430 * Is there any DTD definition ?
8433 ctxt->instate = XML_PARSER_DTD;
8436 * Parse the succession of Markup declarations and
8438 * Subsequence (markupdecl | PEReference | S)*
8440 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
8441 const xmlChar *check = CUR_PTR;
8442 unsigned int cons = ctxt->input->consumed;
8445 xmlParseMarkupDecl(ctxt);
8446 xmlParsePEReference(ctxt);
8449 * Pop-up of finished entities.
8451 while ((RAW == 0) && (ctxt->inputNr > 1))
8454 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8455 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8456 "xmlParseInternalSubset: error detected in Markup declaration\n");
8467 * We should be at the end of the DOCTYPE declaration.
8470 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8475 #ifdef LIBXML_SAX1_ENABLED
8477 * xmlParseAttribute:
8478 * @ctxt: an XML parser context
8479 * @value: a xmlChar ** used to store the value of the attribute
8481 * parse an attribute
8483 * [41] Attribute ::= Name Eq AttValue
8485 * [ WFC: No External Entity References ]
8486 * Attribute values cannot contain direct or indirect entity references
8487 * to external entities.
8489 * [ WFC: No < in Attribute Values ]
8490 * The replacement text of any entity referred to directly or indirectly in
8491 * an attribute value (other than "<") must not contain a <.
8493 * [ VC: Attribute Value Type ]
8494 * The attribute must have been declared; the value must be of the type
8497 * [25] Eq ::= S? '=' S?
8501 * [NS 11] Attribute ::= QName Eq AttValue
8503 * Also the case QName == xmlns:??? is handled independently as a namespace
8506 * Returns the attribute name, and the value in *value.
8510 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8511 const xmlChar *name;
8516 name = xmlParseName(ctxt);
8518 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8519 "error parsing attribute name\n");
8530 val = xmlParseAttValue(ctxt);
8531 ctxt->instate = XML_PARSER_CONTENT;
8533 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8534 "Specification mandate value for attribute %s\n", name);
8539 * Check that xml:lang conforms to the specification
8540 * No more registered as an error, just generate a warning now
8541 * since this was deprecated in XML second edition
8543 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8544 if (!xmlCheckLanguageID(val)) {
8545 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8546 "Malformed value for xml:lang : %s\n",
8552 * Check that xml:space conforms to the specification
8554 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8555 if (xmlStrEqual(val, BAD_CAST "default"))
8557 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8560 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8561 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8572 * @ctxt: an XML parser context
8574 * parse a start of tag either for rule element or
8575 * EmptyElement. In both case we don't parse the tag closing chars.
8577 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8579 * [ WFC: Unique Att Spec ]
8580 * No attribute name may appear more than once in the same start-tag or
8581 * empty-element tag.
8583 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8585 * [ WFC: Unique Att Spec ]
8586 * No attribute name may appear more than once in the same start-tag or
8587 * empty-element tag.
8591 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8593 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8595 * Returns the element name parsed
8599 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8600 const xmlChar *name;
8601 const xmlChar *attname;
8603 const xmlChar **atts = ctxt->atts;
8605 int maxatts = ctxt->maxatts;
8608 if (RAW != '<') return(NULL);
8611 name = xmlParseName(ctxt);
8613 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8614 "xmlParseStartTag: invalid element name\n");
8619 * Now parse the attributes, it ends up with the ending
8626 while (((RAW != '>') &&
8627 ((RAW != '/') || (NXT(1) != '>')) &&
8628 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8629 const xmlChar *q = CUR_PTR;
8630 unsigned int cons = ctxt->input->consumed;
8632 attname = xmlParseAttribute(ctxt, &attvalue);
8633 if ((attname != NULL) && (attvalue != NULL)) {
8635 * [ WFC: Unique Att Spec ]
8636 * No attribute name may appear more than once in the same
8637 * start-tag or empty-element tag.
8639 for (i = 0; i < nbatts;i += 2) {
8640 if (xmlStrEqual(atts[i], attname)) {
8641 xmlErrAttributeDup(ctxt, NULL, attname);
8647 * Add the pair to atts
8650 maxatts = 22; /* allow for 10 attrs by default */
8651 atts = (const xmlChar **)
8652 xmlMalloc(maxatts * sizeof(xmlChar *));
8654 xmlErrMemory(ctxt, NULL);
8655 if (attvalue != NULL)
8660 ctxt->maxatts = maxatts;
8661 } else if (nbatts + 4 > maxatts) {
8665 n = (const xmlChar **) xmlRealloc((void *) atts,
8666 maxatts * sizeof(const xmlChar *));
8668 xmlErrMemory(ctxt, NULL);
8669 if (attvalue != NULL)
8675 ctxt->maxatts = maxatts;
8677 atts[nbatts++] = attname;
8678 atts[nbatts++] = attvalue;
8679 atts[nbatts] = NULL;
8680 atts[nbatts + 1] = NULL;
8682 if (attvalue != NULL)
8689 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8691 if (!IS_BLANK_CH(RAW)) {
8692 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8693 "attributes construct error\n");
8696 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8697 (attname == NULL) && (attvalue == NULL)) {
8698 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8699 "xmlParseStartTag: problem parsing attributes\n");
8707 * SAX: Start of Element !
8709 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8710 (!ctxt->disableSAX)) {
8712 ctxt->sax->startElement(ctxt->userData, name, atts);
8714 ctxt->sax->startElement(ctxt->userData, name, NULL);
8718 /* Free only the content strings */
8719 for (i = 1;i < nbatts;i+=2)
8720 if (atts[i] != NULL)
8721 xmlFree((xmlChar *) atts[i]);
8728 * @ctxt: an XML parser context
8729 * @line: line of the start tag
8730 * @nsNr: number of namespaces on the start tag
8732 * parse an end of tag
8734 * [42] ETag ::= '</' Name S? '>'
8738 * [NS 9] ETag ::= '</' QName S? '>'
8742 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8743 const xmlChar *name;
8746 if ((RAW != '<') || (NXT(1) != '/')) {
8747 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8748 "xmlParseEndTag: '</' not found\n");
8753 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8756 * We should definitely be at the ending "S? '>'" part
8760 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8761 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8766 * [ WFC: Element Type Match ]
8767 * The Name in an element's end-tag must match the element type in the
8771 if (name != (xmlChar*)1) {
8772 if (name == NULL) name = BAD_CAST "unparseable";
8773 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8774 "Opening and ending tag mismatch: %s line %d and %s\n",
8775 ctxt->name, line, name);
8781 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8782 (!ctxt->disableSAX))
8783 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8792 * @ctxt: an XML parser context
8794 * parse an end of tag
8796 * [42] ETag ::= '</' Name S? '>'
8800 * [NS 9] ETag ::= '</' QName S? '>'
8804 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8805 xmlParseEndTag1(ctxt, 0);
8807 #endif /* LIBXML_SAX1_ENABLED */
8809 /************************************************************************
8811 * SAX 2 specific operations *
8813 ************************************************************************/
8817 * @ctxt: an XML parser context
8818 * @prefix: the prefix to lookup
8820 * Lookup the namespace name for the @prefix (which ca be NULL)
8821 * The prefix must come from the @ctxt->dict dictionnary
8823 * Returns the namespace name or NULL if not bound
8825 static const xmlChar *
8826 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8829 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8830 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8831 if (ctxt->nsTab[i] == prefix) {
8832 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8834 return(ctxt->nsTab[i + 1]);
8841 * @ctxt: an XML parser context
8842 * @prefix: pointer to store the prefix part
8844 * parse an XML Namespace QName
8846 * [6] QName ::= (Prefix ':')? LocalPart
8847 * [7] Prefix ::= NCName
8848 * [8] LocalPart ::= NCName
8850 * Returns the Name parsed or NULL
8853 static const xmlChar *
8854 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8855 const xmlChar *l, *p;
8859 l = xmlParseNCName(ctxt);
8862 l = xmlParseName(ctxt);
8864 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8865 "Failed to parse QName '%s'\n", l, NULL, NULL);
8875 l = xmlParseNCName(ctxt);
8879 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8880 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8881 l = xmlParseNmtoken(ctxt);
8883 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8885 tmp = xmlBuildQName(l, p, NULL, 0);
8888 p = xmlDictLookup(ctxt->dict, tmp, -1);
8889 if (tmp != NULL) xmlFree(tmp);
8896 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8897 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8899 tmp = (xmlChar *) xmlParseName(ctxt);
8901 tmp = xmlBuildQName(tmp, l, NULL, 0);
8902 l = xmlDictLookup(ctxt->dict, tmp, -1);
8903 if (tmp != NULL) xmlFree(tmp);
8907 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8908 l = xmlDictLookup(ctxt->dict, tmp, -1);
8909 if (tmp != NULL) xmlFree(tmp);
8920 * xmlParseQNameAndCompare:
8921 * @ctxt: an XML parser context
8922 * @name: the localname
8923 * @prefix: the prefix, if any.
8925 * parse an XML name and compares for match
8926 * (specialized for endtag parsing)
8928 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8929 * and the name for mismatch
8932 static const xmlChar *
8933 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8934 xmlChar const *prefix) {
8938 const xmlChar *prefix2;
8940 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8943 in = ctxt->input->cur;
8946 while (*in != 0 && *in == *cmp) {
8950 if ((*cmp == 0) && (*in == ':')) {
8953 while (*in != 0 && *in == *cmp) {
8957 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8959 ctxt->input->cur = in;
8960 return((const xmlChar*) 1);
8964 * all strings coms from the dictionary, equality can be done directly
8966 ret = xmlParseQName (ctxt, &prefix2);
8967 if ((ret == name) && (prefix == prefix2))
8968 return((const xmlChar*) 1);
8973 * xmlParseAttValueInternal:
8974 * @ctxt: an XML parser context
8975 * @len: attribute len result
8976 * @alloc: whether the attribute was reallocated as a new string
8977 * @normalize: if 1 then further non-CDATA normalization must be done
8979 * parse a value for an attribute.
8980 * NOTE: if no normalization is needed, the routine will return pointers
8981 * directly from the data buffer.
8983 * 3.3.3 Attribute-Value Normalization:
8984 * Before the value of an attribute is passed to the application or
8985 * checked for validity, the XML processor must normalize it as follows:
8986 * - a character reference is processed by appending the referenced
8987 * character to the attribute value
8988 * - an entity reference is processed by recursively processing the
8989 * replacement text of the entity
8990 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8991 * appending #x20 to the normalized value, except that only a single
8992 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8993 * parsed entity or the literal entity value of an internal parsed entity
8994 * - other characters are processed by appending them to the normalized value
8995 * If the declared value is not CDATA, then the XML processor must further
8996 * process the normalized attribute value by discarding any leading and
8997 * trailing space (#x20) characters, and by replacing sequences of space
8998 * (#x20) characters by a single space (#x20) character.
8999 * All attributes for which no declaration has been read should be treated
9000 * by a non-validating parser as if declared CDATA.
9002 * Returns the AttValue parsed or NULL. The value has to be freed by the
9003 * caller if it was copied, this can be detected by val[*len] == 0.
9007 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9011 const xmlChar *in = NULL, *start, *end, *last;
9012 xmlChar *ret = NULL;
9016 in = (xmlChar *) CUR_PTR;
9017 line = ctxt->input->line;
9018 col = ctxt->input->col;
9019 if (*in != '"' && *in != '\'') {
9020 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9023 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9026 * try to handle in this routine the most common case where no
9027 * allocation of a new string is required and where content is
9032 end = ctxt->input->end;
9035 const xmlChar *oldbase = ctxt->input->base;
9037 if (oldbase != ctxt->input->base) {
9038 long delta = ctxt->input->base - oldbase;
9039 start = start + delta;
9042 end = ctxt->input->end;
9046 * Skip any leading spaces
9048 while ((in < end) && (*in != limit) &&
9049 ((*in == 0x20) || (*in == 0x9) ||
9050 (*in == 0xA) || (*in == 0xD))) {
9059 const xmlChar *oldbase = ctxt->input->base;
9061 if (ctxt->instate == XML_PARSER_EOF)
9063 if (oldbase != ctxt->input->base) {
9064 long delta = ctxt->input->base - oldbase;
9065 start = start + delta;
9068 end = ctxt->input->end;
9069 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9070 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9071 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9072 "AttValue length too long\n");
9077 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9078 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9080 if ((*in++ == 0x20) && (*in == 0x20)) break;
9082 const xmlChar *oldbase = ctxt->input->base;
9084 if (ctxt->instate == XML_PARSER_EOF)
9086 if (oldbase != ctxt->input->base) {
9087 long delta = ctxt->input->base - oldbase;
9088 start = start + delta;
9091 end = ctxt->input->end;
9092 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9093 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9094 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9095 "AttValue length too long\n");
9102 * skip the trailing blanks
9104 while ((last[-1] == 0x20) && (last > start)) last--;
9105 while ((in < end) && (*in != limit) &&
9106 ((*in == 0x20) || (*in == 0x9) ||
9107 (*in == 0xA) || (*in == 0xD))) {
9115 const xmlChar *oldbase = ctxt->input->base;
9117 if (ctxt->instate == XML_PARSER_EOF)
9119 if (oldbase != ctxt->input->base) {
9120 long delta = ctxt->input->base - oldbase;
9121 start = start + delta;
9123 last = last + delta;
9125 end = ctxt->input->end;
9126 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9127 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9128 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9129 "AttValue length too long\n");
9134 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9135 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9136 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9137 "AttValue length too long\n");
9140 if (*in != limit) goto need_complex;
9142 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9143 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9147 const xmlChar *oldbase = ctxt->input->base;
9149 if (ctxt->instate == XML_PARSER_EOF)
9151 if (oldbase != ctxt->input->base) {
9152 long delta = ctxt->input->base - oldbase;
9153 start = start + delta;
9156 end = ctxt->input->end;
9157 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9158 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9159 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160 "AttValue length too long\n");
9166 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9167 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9168 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9169 "AttValue length too long\n");
9172 if (*in != limit) goto need_complex;
9177 *len = last - start;
9178 ret = (xmlChar *) start;
9180 if (alloc) *alloc = 1;
9181 ret = xmlStrndup(start, last - start);
9184 ctxt->input->line = line;
9185 ctxt->input->col = col;
9186 if (alloc) *alloc = 0;
9189 if (alloc) *alloc = 1;
9190 return xmlParseAttValueComplex(ctxt, len, normalize);
9194 * xmlParseAttribute2:
9195 * @ctxt: an XML parser context
9196 * @pref: the element prefix
9197 * @elem: the element name
9198 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9199 * @value: a xmlChar ** used to store the value of the attribute
9200 * @len: an int * to save the length of the attribute
9201 * @alloc: an int * to indicate if the attribute was allocated
9203 * parse an attribute in the new SAX2 framework.
9205 * Returns the attribute name, and the value in *value, .
9208 static const xmlChar *
9209 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9210 const xmlChar * pref, const xmlChar * elem,
9211 const xmlChar ** prefix, xmlChar ** value,
9212 int *len, int *alloc)
9214 const xmlChar *name;
9215 xmlChar *val, *internal_val = NULL;
9220 name = xmlParseQName(ctxt, prefix);
9222 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9223 "error parsing attribute name\n");
9228 * get the type if needed
9230 if (ctxt->attsSpecial != NULL) {
9233 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
9234 pref, elem, *prefix, name);
9246 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9249 * Sometimes a second normalisation pass for spaces is needed
9250 * but that only happens if charrefs or entities refernces
9251 * have been used in the attribute value, i.e. the attribute
9252 * value have been extracted in an allocated string already.
9255 const xmlChar *val2;
9257 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9258 if ((val2 != NULL) && (val2 != val)) {
9260 val = (xmlChar *) val2;
9264 ctxt->instate = XML_PARSER_CONTENT;
9266 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9267 "Specification mandate value for attribute %s\n",
9272 if (*prefix == ctxt->str_xml) {
9274 * Check that xml:lang conforms to the specification
9275 * No more registered as an error, just generate a warning now
9276 * since this was deprecated in XML second edition
9278 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9279 internal_val = xmlStrndup(val, *len);
9280 if (!xmlCheckLanguageID(internal_val)) {
9281 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9282 "Malformed value for xml:lang : %s\n",
9283 internal_val, NULL);
9288 * Check that xml:space conforms to the specification
9290 if (xmlStrEqual(name, BAD_CAST "space")) {
9291 internal_val = xmlStrndup(val, *len);
9292 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9294 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9297 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9298 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9299 internal_val, NULL);
9303 xmlFree(internal_val);
9311 * xmlParseStartTag2:
9312 * @ctxt: an XML parser context
9314 * parse a start of tag either for rule element or
9315 * EmptyElement. In both case we don't parse the tag closing chars.
9316 * This routine is called when running SAX2 parsing
9318 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9320 * [ WFC: Unique Att Spec ]
9321 * No attribute name may appear more than once in the same start-tag or
9322 * empty-element tag.
9324 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9326 * [ WFC: Unique Att Spec ]
9327 * No attribute name may appear more than once in the same start-tag or
9328 * empty-element tag.
9332 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9334 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9336 * Returns the element name parsed
9339 static const xmlChar *
9340 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9341 const xmlChar **URI, int *tlen) {
9342 const xmlChar *localname;
9343 const xmlChar *prefix;
9344 const xmlChar *attname;
9345 const xmlChar *aprefix;
9346 const xmlChar *nsname;
9348 const xmlChar **atts = ctxt->atts;
9349 int maxatts = ctxt->maxatts;
9350 int nratts, nbatts, nbdef;
9351 int i, j, nbNs, attval, oldline, oldcol, inputNr;
9352 const xmlChar *base;
9354 int nsNr = ctxt->nsNr;
9356 if (RAW != '<') return(NULL);
9360 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9361 * point since the attribute values may be stored as pointers to
9362 * the buffer and calling SHRINK would destroy them !
9363 * The Shrinking is only possible once the full set of attribute
9364 * callbacks have been done.
9368 base = ctxt->input->base;
9369 cur = ctxt->input->cur - ctxt->input->base;
9370 inputNr = ctxt->inputNr;
9371 oldline = ctxt->input->line;
9372 oldcol = ctxt->input->col;
9378 /* Forget any namespaces added during an earlier parse of this element. */
9381 localname = xmlParseQName(ctxt, &prefix);
9382 if (localname == NULL) {
9383 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9384 "StartTag: invalid element name\n");
9387 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9390 * Now parse the attributes, it ends up with the ending
9396 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9399 while (((RAW != '>') &&
9400 ((RAW != '/') || (NXT(1) != '>')) &&
9401 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9402 const xmlChar *q = CUR_PTR;
9403 unsigned int cons = ctxt->input->consumed;
9404 int len = -1, alloc = 0;
9406 attname = xmlParseAttribute2(ctxt, prefix, localname,
9407 &aprefix, &attvalue, &len, &alloc);
9408 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) {
9409 if ((attvalue != NULL) && (alloc != 0))
9414 if ((attname != NULL) && (attvalue != NULL)) {
9415 if (len < 0) len = xmlStrlen(attvalue);
9416 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9417 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9421 xmlErrMemory(ctxt, "dictionary allocation failure");
9422 if ((attvalue != NULL) && (alloc != 0))
9427 uri = xmlParseURI((const char *) URL);
9429 xmlNsErr(ctxt, XML_WAR_NS_URI,
9430 "xmlns: '%s' is not a valid URI\n",
9433 if (uri->scheme == NULL) {
9434 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9435 "xmlns: URI %s is not absolute\n",
9440 if (URL == ctxt->str_xml_ns) {
9441 if (attname != ctxt->str_xml) {
9442 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9443 "xml namespace URI cannot be the default namespace\n",
9446 goto skip_default_ns;
9450 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9451 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9452 "reuse of the xmlns namespace name is forbidden\n",
9454 goto skip_default_ns;
9458 * check that it's not a defined namespace
9460 for (j = 1;j <= nbNs;j++)
9461 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9464 xmlErrAttributeDup(ctxt, NULL, attname);
9466 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9468 if (alloc != 0) xmlFree(attvalue);
9469 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9471 if (!IS_BLANK_CH(RAW)) {
9472 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9473 "attributes construct error\n");
9479 if (aprefix == ctxt->str_xmlns) {
9480 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9483 if (attname == ctxt->str_xml) {
9484 if (URL != ctxt->str_xml_ns) {
9485 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9486 "xml namespace prefix mapped to wrong URI\n",
9490 * Do not keep a namespace definition node
9494 if (URL == ctxt->str_xml_ns) {
9495 if (attname != ctxt->str_xml) {
9496 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9497 "xml namespace URI mapped to wrong prefix\n",
9502 if (attname == ctxt->str_xmlns) {
9503 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9504 "redefinition of the xmlns prefix is forbidden\n",
9510 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9511 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9512 "reuse of the xmlns namespace name is forbidden\n",
9516 if ((URL == NULL) || (URL[0] == 0)) {
9517 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9518 "xmlns:%s: Empty XML namespace is not allowed\n",
9519 attname, NULL, NULL);
9522 uri = xmlParseURI((const char *) URL);
9524 xmlNsErr(ctxt, XML_WAR_NS_URI,
9525 "xmlns:%s: '%s' is not a valid URI\n",
9526 attname, URL, NULL);
9528 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9529 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9530 "xmlns:%s: URI %s is not absolute\n",
9531 attname, URL, NULL);
9538 * check that it's not a defined namespace
9540 for (j = 1;j <= nbNs;j++)
9541 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9544 xmlErrAttributeDup(ctxt, aprefix, attname);
9546 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9548 if (alloc != 0) xmlFree(attvalue);
9549 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9551 if (!IS_BLANK_CH(RAW)) {
9552 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9553 "attributes construct error\n");
9557 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9563 * Add the pair to atts
9565 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9566 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9567 if (attvalue[len] == 0)
9571 maxatts = ctxt->maxatts;
9574 ctxt->attallocs[nratts++] = alloc;
9575 atts[nbatts++] = attname;
9576 atts[nbatts++] = aprefix;
9577 atts[nbatts++] = NULL; /* the URI will be fetched later */
9578 atts[nbatts++] = attvalue;
9580 atts[nbatts++] = attvalue;
9582 * tag if some deallocation is needed
9584 if (alloc != 0) attval = 1;
9586 if ((attvalue != NULL) && (attvalue[len] == 0))
9593 if (ctxt->instate == XML_PARSER_EOF)
9595 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9597 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9599 if (!IS_BLANK_CH(RAW)) {
9600 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9601 "attributes construct error\n");
9605 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9606 (attname == NULL) && (attvalue == NULL)) {
9607 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9608 "xmlParseStartTag: problem parsing attributes\n");
9612 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9617 * The attributes defaulting
9619 if (ctxt->attsDefault != NULL) {
9620 xmlDefAttrsPtr defaults;
9622 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9623 if (defaults != NULL) {
9624 for (i = 0;i < defaults->nbAttrs;i++) {
9625 attname = defaults->values[5 * i];
9626 aprefix = defaults->values[5 * i + 1];
9629 * special work for namespaces defaulted defs
9631 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9633 * check that it's not a defined namespace
9635 for (j = 1;j <= nbNs;j++)
9636 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9638 if (j <= nbNs) continue;
9640 nsname = xmlGetNamespace(ctxt, NULL);
9641 if (nsname != defaults->values[5 * i + 2]) {
9642 if (nsPush(ctxt, NULL,
9643 defaults->values[5 * i + 2]) > 0)
9646 } else if (aprefix == ctxt->str_xmlns) {
9648 * check that it's not a defined namespace
9650 for (j = 1;j <= nbNs;j++)
9651 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9653 if (j <= nbNs) continue;
9655 nsname = xmlGetNamespace(ctxt, attname);
9656 if (nsname != defaults->values[2]) {
9657 if (nsPush(ctxt, attname,
9658 defaults->values[5 * i + 2]) > 0)
9663 * check that it's not a defined attribute
9665 for (j = 0;j < nbatts;j+=5) {
9666 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669 if (j < nbatts) continue;
9671 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9672 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675 maxatts = ctxt->maxatts;
9678 atts[nbatts++] = attname;
9679 atts[nbatts++] = aprefix;
9680 if (aprefix == NULL)
9681 atts[nbatts++] = NULL;
9683 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9684 atts[nbatts++] = defaults->values[5 * i + 2];
9685 atts[nbatts++] = defaults->values[5 * i + 3];
9686 if ((ctxt->standalone == 1) &&
9687 (defaults->values[5 * i + 4] != NULL)) {
9688 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9689 "standalone: attribute %s on %s defaulted from external subset\n",
9690 attname, localname);
9699 * The attributes checkings
9701 for (i = 0; i < nbatts;i += 5) {
9703 * The default namespace does not apply to attribute names.
9705 if (atts[i + 1] != NULL) {
9706 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9707 if (nsname == NULL) {
9708 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9709 "Namespace prefix %s for %s on %s is not defined\n",
9710 atts[i + 1], atts[i], localname);
9712 atts[i + 2] = nsname;
9716 * [ WFC: Unique Att Spec ]
9717 * No attribute name may appear more than once in the same
9718 * start-tag or empty-element tag.
9719 * As extended by the Namespace in XML REC.
9721 for (j = 0; j < i;j += 5) {
9722 if (atts[i] == atts[j]) {
9723 if (atts[i+1] == atts[j+1]) {
9724 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9727 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9728 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9729 "Namespaced Attribute %s in '%s' redefined\n",
9730 atts[i], nsname, NULL);
9737 nsname = xmlGetNamespace(ctxt, prefix);
9738 if ((prefix != NULL) && (nsname == NULL)) {
9739 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9740 "Namespace prefix %s on %s is not defined\n",
9741 prefix, localname, NULL);
9747 * SAX: Start of Element !
9749 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9750 (!ctxt->disableSAX)) {
9752 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9753 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9754 nbatts / 5, nbdef, atts);
9756 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9757 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761 * Free up attribute allocated strings if needed
9764 for (i = 3,j = 0; j < nratts;i += 5,j++)
9765 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9766 xmlFree((xmlChar *) atts[i]);
9773 * the attribute strings are valid iif the base didn't changed
9776 for (i = 3,j = 0; j < nratts;i += 5,j++)
9777 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9778 xmlFree((xmlChar *) atts[i]);
9782 * We can't switch from one entity to another in the middle
9785 if (inputNr != ctxt->inputNr) {
9786 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
9787 "Start tag doesn't start and stop in the same entity\n");
9791 ctxt->input->cur = ctxt->input->base + cur;
9792 ctxt->input->line = oldline;
9793 ctxt->input->col = oldcol;
9794 if (ctxt->wellFormed == 1) {
9802 * @ctxt: an XML parser context
9803 * @line: line of the start tag
9804 * @nsNr: number of namespaces on the start tag
9806 * parse an end of tag
9808 * [42] ETag ::= '</' Name S? '>'
9812 * [NS 9] ETag ::= '</' QName S? '>'
9816 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9817 const xmlChar *URI, int line, int nsNr, int tlen) {
9818 const xmlChar *name;
9821 if ((RAW != '<') || (NXT(1) != '/')) {
9822 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9827 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9828 if (ctxt->input->cur[tlen] == '>') {
9829 ctxt->input->cur += tlen + 1;
9830 ctxt->input->col += tlen + 1;
9833 ctxt->input->cur += tlen;
9834 ctxt->input->col += tlen;
9838 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9840 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9844 * We should definitely be at the ending "S? '>'" part
9847 if (ctxt->instate == XML_PARSER_EOF)
9850 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9851 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9856 * [ WFC: Element Type Match ]
9857 * The Name in an element's end-tag must match the element type in the
9861 if (name != (xmlChar*)1) {
9862 if (name == NULL) name = BAD_CAST "unparseable";
9863 if ((line == 0) && (ctxt->node != NULL))
9864 line = ctxt->node->line;
9865 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9866 "Opening and ending tag mismatch: %s line %d and %s\n",
9867 ctxt->name, line, name);
9874 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9875 (!ctxt->disableSAX))
9876 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9886 * @ctxt: an XML parser context
9888 * Parse escaped pure raw content.
9890 * [18] CDSect ::= CDStart CData CDEnd
9892 * [19] CDStart ::= '<![CDATA['
9894 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9896 * [21] CDEnd ::= ']]>'
9899 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9900 xmlChar *buf = NULL;
9902 int size = XML_PARSER_BUFFER_SIZE;
9908 /* Check 2.6.0 was NXT(0) not RAW */
9909 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9914 ctxt->instate = XML_PARSER_CDATA_SECTION;
9917 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9918 ctxt->instate = XML_PARSER_CONTENT;
9924 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9925 ctxt->instate = XML_PARSER_CONTENT;
9930 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9932 xmlErrMemory(ctxt, NULL);
9935 while (IS_CHAR(cur) &&
9936 ((r != ']') || (s != ']') || (cur != '>'))) {
9937 if (len + 5 >= size) {
9940 if ((size > XML_MAX_TEXT_LENGTH) &&
9941 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9942 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9943 "CData section too big found", NULL);
9947 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9950 xmlErrMemory(ctxt, NULL);
9956 COPY_BUF(rl,buf,len,r);
9964 if (ctxt->instate == XML_PARSER_EOF) {
9974 ctxt->instate = XML_PARSER_CONTENT;
9976 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9977 "CData section not finished\n%.50s\n", buf);
9984 * OK the buffer is to be consumed as cdata.
9986 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9987 if (ctxt->sax->cdataBlock != NULL)
9988 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9989 else if (ctxt->sax->characters != NULL)
9990 ctxt->sax->characters(ctxt->userData, buf, len);
9997 * @ctxt: an XML parser context
10001 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10005 xmlParseContent(xmlParserCtxtPtr ctxt) {
10007 while ((RAW != 0) &&
10008 ((RAW != '<') || (NXT(1) != '/')) &&
10009 (ctxt->instate != XML_PARSER_EOF)) {
10010 const xmlChar *test = CUR_PTR;
10011 unsigned int cons = ctxt->input->consumed;
10012 const xmlChar *cur = ctxt->input->cur;
10015 * First case : a Processing Instruction.
10017 if ((*cur == '<') && (cur[1] == '?')) {
10022 * Second case : a CDSection
10024 /* 2.6.0 test was *cur not RAW */
10025 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
10026 xmlParseCDSect(ctxt);
10030 * Third case : a comment
10032 else if ((*cur == '<') && (NXT(1) == '!') &&
10033 (NXT(2) == '-') && (NXT(3) == '-')) {
10034 xmlParseComment(ctxt);
10035 ctxt->instate = XML_PARSER_CONTENT;
10039 * Fourth case : a sub-element.
10041 else if (*cur == '<') {
10042 xmlParseElement(ctxt);
10046 * Fifth case : a reference. If if has not been resolved,
10047 * parsing returns it's Name, create the node
10050 else if (*cur == '&') {
10051 xmlParseReference(ctxt);
10055 * Last case, text. Note that References are handled directly.
10058 xmlParseCharData(ctxt, 0);
10063 * Pop-up of finished entities.
10065 while ((RAW == 0) && (ctxt->inputNr > 1))
10069 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10070 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10071 "detected an error in element content\n");
10072 xmlHaltParser(ctxt);
10080 * @ctxt: an XML parser context
10082 * parse an XML element, this is highly recursive
10084 * [39] element ::= EmptyElemTag | STag content ETag
10086 * [ WFC: Element Type Match ]
10087 * The Name in an element's end-tag must match the element type in the
10093 xmlParseElement(xmlParserCtxtPtr ctxt) {
10094 const xmlChar *name;
10095 const xmlChar *prefix = NULL;
10096 const xmlChar *URI = NULL;
10097 xmlParserNodeInfo node_info;
10098 int line, tlen = 0;
10100 int nsNr = ctxt->nsNr;
10102 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10103 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10104 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10105 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10106 xmlParserMaxDepth);
10107 xmlHaltParser(ctxt);
10111 /* Capture start position */
10112 if (ctxt->record_info) {
10113 node_info.begin_pos = ctxt->input->consumed +
10114 (CUR_PTR - ctxt->input->base);
10115 node_info.begin_line = ctxt->input->line;
10118 if (ctxt->spaceNr == 0)
10119 spacePush(ctxt, -1);
10120 else if (*ctxt->space == -2)
10121 spacePush(ctxt, -1);
10123 spacePush(ctxt, *ctxt->space);
10125 line = ctxt->input->line;
10126 #ifdef LIBXML_SAX1_ENABLED
10128 #endif /* LIBXML_SAX1_ENABLED */
10129 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10130 #ifdef LIBXML_SAX1_ENABLED
10132 name = xmlParseStartTag(ctxt);
10133 #endif /* LIBXML_SAX1_ENABLED */
10134 if (ctxt->instate == XML_PARSER_EOF)
10136 if (name == NULL) {
10140 namePush(ctxt, name);
10143 #ifdef LIBXML_VALID_ENABLED
10145 * [ VC: Root Element Type ]
10146 * The Name in the document type declaration must match the element
10147 * type of the root element.
10149 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10150 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10151 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10152 #endif /* LIBXML_VALID_ENABLED */
10155 * Check for an Empty Element.
10157 if ((RAW == '/') && (NXT(1) == '>')) {
10160 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10161 (!ctxt->disableSAX))
10162 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10163 #ifdef LIBXML_SAX1_ENABLED
10165 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10166 (!ctxt->disableSAX))
10167 ctxt->sax->endElement(ctxt->userData, name);
10168 #endif /* LIBXML_SAX1_ENABLED */
10172 if (nsNr != ctxt->nsNr)
10173 nsPop(ctxt, ctxt->nsNr - nsNr);
10174 if ( ret != NULL && ctxt->record_info ) {
10175 node_info.end_pos = ctxt->input->consumed +
10176 (CUR_PTR - ctxt->input->base);
10177 node_info.end_line = ctxt->input->line;
10178 node_info.node = ret;
10179 xmlParserAddNodeInfo(ctxt, &node_info);
10186 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10187 "Couldn't find end of Start Tag %s line %d\n",
10191 * end of parsing of this node.
10196 if (nsNr != ctxt->nsNr)
10197 nsPop(ctxt, ctxt->nsNr - nsNr);
10200 * Capture end position and add node
10202 if ( ret != NULL && ctxt->record_info ) {
10203 node_info.end_pos = ctxt->input->consumed +
10204 (CUR_PTR - ctxt->input->base);
10205 node_info.end_line = ctxt->input->line;
10206 node_info.node = ret;
10207 xmlParserAddNodeInfo(ctxt, &node_info);
10213 * Parse the content of the element:
10215 xmlParseContent(ctxt);
10216 if (ctxt->instate == XML_PARSER_EOF)
10218 if (!IS_BYTE_CHAR(RAW)) {
10219 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10220 "Premature end of data in tag %s line %d\n",
10224 * end of parsing of this node.
10229 if (nsNr != ctxt->nsNr)
10230 nsPop(ctxt, ctxt->nsNr - nsNr);
10235 * parse the end of tag: '</' should be here.
10238 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10241 #ifdef LIBXML_SAX1_ENABLED
10243 xmlParseEndTag1(ctxt, line);
10244 #endif /* LIBXML_SAX1_ENABLED */
10247 * Capture end position and add node
10249 if ( ret != NULL && ctxt->record_info ) {
10250 node_info.end_pos = ctxt->input->consumed +
10251 (CUR_PTR - ctxt->input->base);
10252 node_info.end_line = ctxt->input->line;
10253 node_info.node = ret;
10254 xmlParserAddNodeInfo(ctxt, &node_info);
10259 * xmlParseVersionNum:
10260 * @ctxt: an XML parser context
10262 * parse the XML version value.
10264 * [26] VersionNum ::= '1.' [0-9]+
10266 * In practice allow [0-9].[0-9]+ at that level
10268 * Returns the string giving the XML version number, or NULL
10271 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10272 xmlChar *buf = NULL;
10277 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10279 xmlErrMemory(ctxt, NULL);
10283 if (!((cur >= '0') && (cur <= '9'))) {
10297 while ((cur >= '0') && (cur <= '9')) {
10298 if (len + 1 >= size) {
10302 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10305 xmlErrMemory(ctxt, NULL);
10319 * xmlParseVersionInfo:
10320 * @ctxt: an XML parser context
10322 * parse the XML version.
10324 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10326 * [25] Eq ::= S? '=' S?
10328 * Returns the version string, e.g. "1.0"
10332 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10333 xmlChar *version = NULL;
10335 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10339 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10346 version = xmlParseVersionNum(ctxt);
10348 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10351 } else if (RAW == '\''){
10353 version = xmlParseVersionNum(ctxt);
10355 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10359 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10367 * @ctxt: an XML parser context
10369 * parse the XML encoding name
10371 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10373 * Returns the encoding name value or NULL
10376 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10377 xmlChar *buf = NULL;
10383 if (((cur >= 'a') && (cur <= 'z')) ||
10384 ((cur >= 'A') && (cur <= 'Z'))) {
10385 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10387 xmlErrMemory(ctxt, NULL);
10394 while (((cur >= 'a') && (cur <= 'z')) ||
10395 ((cur >= 'A') && (cur <= 'Z')) ||
10396 ((cur >= '0') && (cur <= '9')) ||
10397 (cur == '.') || (cur == '_') ||
10399 if (len + 1 >= size) {
10403 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10405 xmlErrMemory(ctxt, NULL);
10422 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10428 * xmlParseEncodingDecl:
10429 * @ctxt: an XML parser context
10431 * parse the XML encoding declaration
10433 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10435 * this setups the conversion filters.
10437 * Returns the encoding value or NULL
10441 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10442 xmlChar *encoding = NULL;
10445 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10449 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10456 encoding = xmlParseEncName(ctxt);
10458 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10459 xmlFree((xmlChar *) encoding);
10463 } else if (RAW == '\''){
10465 encoding = xmlParseEncName(ctxt);
10467 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10468 xmlFree((xmlChar *) encoding);
10473 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10477 * Non standard parsing, allowing the user to ignore encoding
10479 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10480 xmlFree((xmlChar *) encoding);
10485 * UTF-16 encoding stwich has already taken place at this stage,
10486 * more over the little-endian/big-endian selection is already done
10488 if ((encoding != NULL) &&
10489 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10490 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10492 * If no encoding was passed to the parser, that we are
10493 * using UTF-16 and no decoder is present i.e. the
10494 * document is apparently UTF-8 compatible, then raise an
10495 * encoding mismatch fatal error
10497 if ((ctxt->encoding == NULL) &&
10498 (ctxt->input->buf != NULL) &&
10499 (ctxt->input->buf->encoder == NULL)) {
10500 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10501 "Document labelled UTF-16 but has UTF-8 content\n");
10503 if (ctxt->encoding != NULL)
10504 xmlFree((xmlChar *) ctxt->encoding);
10505 ctxt->encoding = encoding;
10508 * UTF-8 encoding is handled natively
10510 else if ((encoding != NULL) &&
10511 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10512 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10513 if (ctxt->encoding != NULL)
10514 xmlFree((xmlChar *) ctxt->encoding);
10515 ctxt->encoding = encoding;
10517 else if (encoding != NULL) {
10518 xmlCharEncodingHandlerPtr handler;
10520 if (ctxt->input->encoding != NULL)
10521 xmlFree((xmlChar *) ctxt->input->encoding);
10522 ctxt->input->encoding = encoding;
10524 handler = xmlFindCharEncodingHandler((const char *) encoding);
10525 if (handler != NULL) {
10526 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10527 /* failed to convert */
10528 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10532 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10533 "Unsupported encoding %s\n", encoding);
10543 * @ctxt: an XML parser context
10545 * parse the XML standalone declaration
10547 * [32] SDDecl ::= S 'standalone' Eq
10548 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10550 * [ VC: Standalone Document Declaration ]
10551 * TODO The standalone document declaration must have the value "no"
10552 * if any external markup declarations contain declarations of:
10553 * - attributes with default values, if elements to which these
10554 * attributes apply appear in the document without specifications
10555 * of values for these attributes, or
10556 * - entities (other than amp, lt, gt, apos, quot), if references
10557 * to those entities appear in the document, or
10558 * - attributes with values subject to normalization, where the
10559 * attribute appears in the document with a value which will change
10560 * as a result of normalization, or
10561 * - element types with element content, if white space occurs directly
10562 * within any instance of those types.
10565 * 1 if standalone="yes"
10566 * 0 if standalone="no"
10567 * -2 if standalone attribute is missing or invalid
10568 * (A standalone value of -2 means that the XML declaration was found,
10569 * but no value was specified for the standalone attribute).
10573 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10574 int standalone = -2;
10577 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10581 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10582 return(standalone);
10588 if ((RAW == 'n') && (NXT(1) == 'o')) {
10591 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10596 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10599 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10602 } else if (RAW == '"'){
10604 if ((RAW == 'n') && (NXT(1) == 'o')) {
10607 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10612 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10615 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10619 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10622 return(standalone);
10627 * @ctxt: an XML parser context
10629 * parse an XML declaration header
10631 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10635 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10639 * This value for standalone indicates that the document has an
10640 * XML declaration but it does not have a standalone attribute.
10641 * It will be overwritten later if a standalone attribute is found.
10643 ctxt->input->standalone = -2;
10646 * We know that '<?xml' is here.
10650 if (!IS_BLANK_CH(RAW)) {
10651 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10652 "Blank needed after '<?xml'\n");
10657 * We must have the VersionInfo here.
10659 version = xmlParseVersionInfo(ctxt);
10660 if (version == NULL) {
10661 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10663 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10665 * Changed here for XML-1.0 5th edition
10667 if (ctxt->options & XML_PARSE_OLD10) {
10668 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10669 "Unsupported version '%s'\n",
10672 if ((version[0] == '1') && ((version[1] == '.'))) {
10673 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10674 "Unsupported version '%s'\n",
10677 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10678 "Unsupported version '%s'\n",
10683 if (ctxt->version != NULL)
10684 xmlFree((void *) ctxt->version);
10685 ctxt->version = version;
10689 * We may have the encoding declaration
10691 if (!IS_BLANK_CH(RAW)) {
10692 if ((RAW == '?') && (NXT(1) == '>')) {
10696 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10698 xmlParseEncodingDecl(ctxt);
10699 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10700 (ctxt->instate == XML_PARSER_EOF)) {
10702 * The XML REC instructs us to stop parsing right here
10708 * We may have the standalone status.
10710 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10711 if ((RAW == '?') && (NXT(1) == '>')) {
10715 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10719 * We can grow the input buffer freely at that point
10724 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10727 if ((RAW == '?') && (NXT(1) == '>')) {
10729 } else if (RAW == '>') {
10730 /* Deprecated old WD ... */
10731 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10734 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10735 MOVETO_ENDTAG(CUR_PTR);
10742 * @ctxt: an XML parser context
10744 * parse an XML Misc* optional field.
10746 * [27] Misc ::= Comment | PI | S
10750 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10751 while ((ctxt->instate != XML_PARSER_EOF) &&
10752 (((RAW == '<') && (NXT(1) == '?')) ||
10753 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10754 IS_BLANK_CH(CUR))) {
10755 if ((RAW == '<') && (NXT(1) == '?')) {
10757 } else if (IS_BLANK_CH(CUR)) {
10760 xmlParseComment(ctxt);
10765 * xmlParseDocument:
10766 * @ctxt: an XML parser context
10768 * parse an XML document (and build a tree if using the standard SAX
10771 * [1] document ::= prolog element Misc*
10773 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10775 * Returns 0, -1 in case of error. the parser context is augmented
10776 * as a result of the parsing.
10780 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10782 xmlCharEncoding enc;
10786 if ((ctxt == NULL) || (ctxt->input == NULL))
10792 * SAX: detecting the level.
10794 xmlDetectSAX2(ctxt);
10797 * SAX: beginning of the document processing.
10799 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10800 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10801 if (ctxt->instate == XML_PARSER_EOF)
10804 if ((ctxt->encoding == NULL) &&
10805 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10807 * Get the 4 first bytes and decode the charset
10808 * if enc != XML_CHAR_ENCODING_NONE
10809 * plug some encoding conversion routines.
10815 enc = xmlDetectCharEncoding(&start[0], 4);
10816 if (enc != XML_CHAR_ENCODING_NONE) {
10817 xmlSwitchEncoding(ctxt, enc);
10823 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10828 * Check for the XMLDecl in the Prolog.
10829 * do not GROW here to avoid the detected encoder to decode more
10830 * than just the first line, unless the amount of data is really
10831 * too small to hold "<?xml version="1.0" encoding="foo"
10833 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10836 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10839 * Note that we will switch encoding on the fly.
10841 xmlParseXMLDecl(ctxt);
10842 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10843 (ctxt->instate == XML_PARSER_EOF)) {
10845 * The XML REC instructs us to stop parsing right here
10849 ctxt->standalone = ctxt->input->standalone;
10852 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10854 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10855 ctxt->sax->startDocument(ctxt->userData);
10856 if (ctxt->instate == XML_PARSER_EOF)
10858 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10859 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10860 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10864 * The Misc part of the Prolog
10867 xmlParseMisc(ctxt);
10870 * Then possibly doc type declaration(s) and more Misc
10871 * (doctypedecl Misc*)?
10874 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10876 ctxt->inSubset = 1;
10877 xmlParseDocTypeDecl(ctxt);
10879 ctxt->instate = XML_PARSER_DTD;
10880 xmlParseInternalSubset(ctxt);
10881 if (ctxt->instate == XML_PARSER_EOF)
10886 * Create and update the external subset.
10888 ctxt->inSubset = 2;
10889 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10890 (!ctxt->disableSAX))
10891 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10892 ctxt->extSubSystem, ctxt->extSubURI);
10893 if (ctxt->instate == XML_PARSER_EOF)
10895 ctxt->inSubset = 0;
10897 xmlCleanSpecialAttr(ctxt);
10899 ctxt->instate = XML_PARSER_PROLOG;
10900 xmlParseMisc(ctxt);
10904 * Time to start parsing the tree itself
10908 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10909 "Start tag expected, '<' not found\n");
10911 ctxt->instate = XML_PARSER_CONTENT;
10912 xmlParseElement(ctxt);
10913 ctxt->instate = XML_PARSER_EPILOG;
10917 * The Misc part at the end
10919 xmlParseMisc(ctxt);
10922 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10924 ctxt->instate = XML_PARSER_EOF;
10928 * SAX: end of the document processing.
10930 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10931 ctxt->sax->endDocument(ctxt->userData);
10934 * Remove locally kept entity definitions if the tree was not built
10936 if ((ctxt->myDoc != NULL) &&
10937 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10938 xmlFreeDoc(ctxt->myDoc);
10939 ctxt->myDoc = NULL;
10942 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10943 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10945 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10946 if (ctxt->nsWellFormed)
10947 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10948 if (ctxt->options & XML_PARSE_OLD10)
10949 ctxt->myDoc->properties |= XML_DOC_OLD10;
10951 if (! ctxt->wellFormed) {
10959 * xmlParseExtParsedEnt:
10960 * @ctxt: an XML parser context
10962 * parse a general parsed entity
10963 * An external general parsed entity is well-formed if it matches the
10964 * production labeled extParsedEnt.
10966 * [78] extParsedEnt ::= TextDecl? content
10968 * Returns 0, -1 in case of error. the parser context is augmented
10969 * as a result of the parsing.
10973 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10975 xmlCharEncoding enc;
10977 if ((ctxt == NULL) || (ctxt->input == NULL))
10980 xmlDefaultSAXHandlerInit();
10982 xmlDetectSAX2(ctxt);
10987 * SAX: beginning of the document processing.
10989 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10990 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10993 * Get the 4 first bytes and decode the charset
10994 * if enc != XML_CHAR_ENCODING_NONE
10995 * plug some encoding conversion routines.
10997 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11002 enc = xmlDetectCharEncoding(start, 4);
11003 if (enc != XML_CHAR_ENCODING_NONE) {
11004 xmlSwitchEncoding(ctxt, enc);
11010 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11014 * Check for the XMLDecl in the Prolog.
11017 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11020 * Note that we will switch encoding on the fly.
11022 xmlParseXMLDecl(ctxt);
11023 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11025 * The XML REC instructs us to stop parsing right here
11031 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11033 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11034 ctxt->sax->startDocument(ctxt->userData);
11035 if (ctxt->instate == XML_PARSER_EOF)
11039 * Doing validity checking on chunk doesn't make sense
11041 ctxt->instate = XML_PARSER_CONTENT;
11042 ctxt->validate = 0;
11043 ctxt->loadsubset = 0;
11046 xmlParseContent(ctxt);
11047 if (ctxt->instate == XML_PARSER_EOF)
11050 if ((RAW == '<') && (NXT(1) == '/')) {
11051 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11052 } else if (RAW != 0) {
11053 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11057 * SAX: end of the document processing.
11059 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11060 ctxt->sax->endDocument(ctxt->userData);
11062 if (! ctxt->wellFormed) return(-1);
11066 #ifdef LIBXML_PUSH_ENABLED
11067 /************************************************************************
11069 * Progressive parsing interfaces *
11071 ************************************************************************/
11074 * xmlParseLookupSequence:
11075 * @ctxt: an XML parser context
11076 * @first: the first char to lookup
11077 * @next: the next char to lookup or zero
11078 * @third: the next char to lookup or zero
11080 * Try to find if a sequence (first, next, third) or just (first next) or
11081 * (first) is available in the input stream.
11082 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11083 * to avoid rescanning sequences of bytes, it DOES change the state of the
11084 * parser, do not use liberally.
11086 * Returns the index to the current parsing point if the full sequence
11087 * is available, -1 otherwise.
11090 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11091 xmlChar next, xmlChar third) {
11093 xmlParserInputPtr in;
11094 const xmlChar *buf;
11097 if (in == NULL) return(-1);
11098 base = in->cur - in->base;
11099 if (base < 0) return(-1);
11100 if (ctxt->checkIndex > base)
11101 base = ctxt->checkIndex;
11102 if (in->buf == NULL) {
11106 buf = xmlBufContent(in->buf->buffer);
11107 len = xmlBufUse(in->buf->buffer);
11109 /* take into account the sequence length */
11110 if (third) len -= 2;
11111 else if (next) len --;
11112 for (;base < len;base++) {
11113 if (buf[base] == first) {
11115 if ((buf[base + 1] != next) ||
11116 (buf[base + 2] != third)) continue;
11117 } else if (next != 0) {
11118 if (buf[base + 1] != next) continue;
11120 ctxt->checkIndex = 0;
11123 xmlGenericError(xmlGenericErrorContext,
11124 "PP: lookup '%c' found at %d\n",
11126 else if (third == 0)
11127 xmlGenericError(xmlGenericErrorContext,
11128 "PP: lookup '%c%c' found at %d\n",
11129 first, next, base);
11131 xmlGenericError(xmlGenericErrorContext,
11132 "PP: lookup '%c%c%c' found at %d\n",
11133 first, next, third, base);
11135 return(base - (in->cur - in->base));
11138 ctxt->checkIndex = base;
11141 xmlGenericError(xmlGenericErrorContext,
11142 "PP: lookup '%c' failed\n", first);
11143 else if (third == 0)
11144 xmlGenericError(xmlGenericErrorContext,
11145 "PP: lookup '%c%c' failed\n", first, next);
11147 xmlGenericError(xmlGenericErrorContext,
11148 "PP: lookup '%c%c%c' failed\n", first, next, third);
11154 * xmlParseGetLasts:
11155 * @ctxt: an XML parser context
11156 * @lastlt: pointer to store the last '<' from the input
11157 * @lastgt: pointer to store the last '>' from the input
11159 * Lookup the last < and > in the current chunk
11162 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11163 const xmlChar **lastgt) {
11164 const xmlChar *tmp;
11166 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11167 xmlGenericError(xmlGenericErrorContext,
11168 "Internal error: xmlParseGetLasts\n");
11171 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11172 tmp = ctxt->input->end;
11174 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11175 if (tmp < ctxt->input->base) {
11181 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11182 if (*tmp == '\'') {
11184 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11185 if (tmp < ctxt->input->end) tmp++;
11186 } else if (*tmp == '"') {
11188 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11189 if (tmp < ctxt->input->end) tmp++;
11193 if (tmp < ctxt->input->end)
11198 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11199 if (tmp >= ctxt->input->base)
11211 * xmlCheckCdataPush:
11212 * @cur: pointer to the bock of characters
11213 * @len: length of the block in bytes
11215 * Check that the block of characters is okay as SCdata content [20]
11217 * Returns the number of bytes to pass if okay, a negative index where an
11218 * UTF-8 error occured otherwise
11221 xmlCheckCdataPush(const xmlChar *utf, int len) {
11226 if ((utf == NULL) || (len <= 0))
11229 for (ix = 0; ix < len;) { /* string is 0-terminated */
11231 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11234 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11238 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11239 if (ix + 2 > len) return(-ix);
11240 if ((utf[ix+1] & 0xc0 ) != 0x80)
11242 codepoint = (utf[ix] & 0x1f) << 6;
11243 codepoint |= utf[ix+1] & 0x3f;
11244 if (!xmlIsCharQ(codepoint))
11247 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11248 if (ix + 3 > len) return(-ix);
11249 if (((utf[ix+1] & 0xc0) != 0x80) ||
11250 ((utf[ix+2] & 0xc0) != 0x80))
11252 codepoint = (utf[ix] & 0xf) << 12;
11253 codepoint |= (utf[ix+1] & 0x3f) << 6;
11254 codepoint |= utf[ix+2] & 0x3f;
11255 if (!xmlIsCharQ(codepoint))
11258 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11259 if (ix + 4 > len) return(-ix);
11260 if (((utf[ix+1] & 0xc0) != 0x80) ||
11261 ((utf[ix+2] & 0xc0) != 0x80) ||
11262 ((utf[ix+3] & 0xc0) != 0x80))
11264 codepoint = (utf[ix] & 0x7) << 18;
11265 codepoint |= (utf[ix+1] & 0x3f) << 12;
11266 codepoint |= (utf[ix+2] & 0x3f) << 6;
11267 codepoint |= utf[ix+3] & 0x3f;
11268 if (!xmlIsCharQ(codepoint))
11271 } else /* unknown encoding */
11278 * xmlParseTryOrFinish:
11279 * @ctxt: an XML parser context
11280 * @terminate: last chunk indicator
11282 * Try to progress on parsing
11284 * Returns zero if no parsing was possible
11287 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11291 const xmlChar *lastlt, *lastgt;
11293 if (ctxt->input == NULL)
11297 switch (ctxt->instate) {
11298 case XML_PARSER_EOF:
11299 xmlGenericError(xmlGenericErrorContext,
11300 "PP: try EOF\n"); break;
11301 case XML_PARSER_START:
11302 xmlGenericError(xmlGenericErrorContext,
11303 "PP: try START\n"); break;
11304 case XML_PARSER_MISC:
11305 xmlGenericError(xmlGenericErrorContext,
11306 "PP: try MISC\n");break;
11307 case XML_PARSER_COMMENT:
11308 xmlGenericError(xmlGenericErrorContext,
11309 "PP: try COMMENT\n");break;
11310 case XML_PARSER_PROLOG:
11311 xmlGenericError(xmlGenericErrorContext,
11312 "PP: try PROLOG\n");break;
11313 case XML_PARSER_START_TAG:
11314 xmlGenericError(xmlGenericErrorContext,
11315 "PP: try START_TAG\n");break;
11316 case XML_PARSER_CONTENT:
11317 xmlGenericError(xmlGenericErrorContext,
11318 "PP: try CONTENT\n");break;
11319 case XML_PARSER_CDATA_SECTION:
11320 xmlGenericError(xmlGenericErrorContext,
11321 "PP: try CDATA_SECTION\n");break;
11322 case XML_PARSER_END_TAG:
11323 xmlGenericError(xmlGenericErrorContext,
11324 "PP: try END_TAG\n");break;
11325 case XML_PARSER_ENTITY_DECL:
11326 xmlGenericError(xmlGenericErrorContext,
11327 "PP: try ENTITY_DECL\n");break;
11328 case XML_PARSER_ENTITY_VALUE:
11329 xmlGenericError(xmlGenericErrorContext,
11330 "PP: try ENTITY_VALUE\n");break;
11331 case XML_PARSER_ATTRIBUTE_VALUE:
11332 xmlGenericError(xmlGenericErrorContext,
11333 "PP: try ATTRIBUTE_VALUE\n");break;
11334 case XML_PARSER_DTD:
11335 xmlGenericError(xmlGenericErrorContext,
11336 "PP: try DTD\n");break;
11337 case XML_PARSER_EPILOG:
11338 xmlGenericError(xmlGenericErrorContext,
11339 "PP: try EPILOG\n");break;
11340 case XML_PARSER_PI:
11341 xmlGenericError(xmlGenericErrorContext,
11342 "PP: try PI\n");break;
11343 case XML_PARSER_IGNORE:
11344 xmlGenericError(xmlGenericErrorContext,
11345 "PP: try IGNORE\n");break;
11349 if ((ctxt->input != NULL) &&
11350 (ctxt->input->cur - ctxt->input->base > 4096)) {
11352 ctxt->checkIndex = 0;
11354 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11356 while (ctxt->instate != XML_PARSER_EOF) {
11357 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11362 * Pop-up of finished entities.
11364 while ((RAW == 0) && (ctxt->inputNr > 1))
11367 if (ctxt->input == NULL) break;
11368 if (ctxt->input->buf == NULL)
11369 avail = ctxt->input->length -
11370 (ctxt->input->cur - ctxt->input->base);
11373 * If we are operating on converted input, try to flush
11374 * remainng chars to avoid them stalling in the non-converted
11375 * buffer. But do not do this in document start where
11376 * encoding="..." may not have been read and we work on a
11377 * guessed encoding.
11379 if ((ctxt->instate != XML_PARSER_START) &&
11380 (ctxt->input->buf->raw != NULL) &&
11381 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11382 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11384 size_t current = ctxt->input->cur - ctxt->input->base;
11386 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11387 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11390 avail = xmlBufUse(ctxt->input->buf->buffer) -
11391 (ctxt->input->cur - ctxt->input->base);
11395 switch (ctxt->instate) {
11396 case XML_PARSER_EOF:
11398 * Document parsing is done !
11401 case XML_PARSER_START:
11402 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11404 xmlCharEncoding enc;
11407 * Very first chars read from the document flow.
11413 * Get the 4 first bytes and decode the charset
11414 * if enc != XML_CHAR_ENCODING_NONE
11415 * plug some encoding conversion routines,
11416 * else xmlSwitchEncoding will set to (default)
11423 enc = xmlDetectCharEncoding(start, 4);
11424 xmlSwitchEncoding(ctxt, enc);
11430 cur = ctxt->input->cur[0];
11431 next = ctxt->input->cur[1];
11433 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11434 ctxt->sax->setDocumentLocator(ctxt->userData,
11435 &xmlDefaultSAXLocator);
11436 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11437 xmlHaltParser(ctxt);
11439 xmlGenericError(xmlGenericErrorContext,
11440 "PP: entering EOF\n");
11442 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11443 ctxt->sax->endDocument(ctxt->userData);
11446 if ((cur == '<') && (next == '?')) {
11447 /* PI or XML decl */
11448 if (avail < 5) return(ret);
11449 if ((!terminate) &&
11450 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11452 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11453 ctxt->sax->setDocumentLocator(ctxt->userData,
11454 &xmlDefaultSAXLocator);
11455 if ((ctxt->input->cur[2] == 'x') &&
11456 (ctxt->input->cur[3] == 'm') &&
11457 (ctxt->input->cur[4] == 'l') &&
11458 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11461 xmlGenericError(xmlGenericErrorContext,
11462 "PP: Parsing XML Decl\n");
11464 xmlParseXMLDecl(ctxt);
11465 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11467 * The XML REC instructs us to stop parsing right
11470 xmlHaltParser(ctxt);
11473 ctxt->standalone = ctxt->input->standalone;
11474 if ((ctxt->encoding == NULL) &&
11475 (ctxt->input->encoding != NULL))
11476 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11477 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11478 (!ctxt->disableSAX))
11479 ctxt->sax->startDocument(ctxt->userData);
11480 ctxt->instate = XML_PARSER_MISC;
11482 xmlGenericError(xmlGenericErrorContext,
11483 "PP: entering MISC\n");
11486 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11487 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11488 (!ctxt->disableSAX))
11489 ctxt->sax->startDocument(ctxt->userData);
11490 ctxt->instate = XML_PARSER_MISC;
11492 xmlGenericError(xmlGenericErrorContext,
11493 "PP: entering MISC\n");
11497 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11498 ctxt->sax->setDocumentLocator(ctxt->userData,
11499 &xmlDefaultSAXLocator);
11500 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11501 if (ctxt->version == NULL) {
11502 xmlErrMemory(ctxt, NULL);
11505 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11506 (!ctxt->disableSAX))
11507 ctxt->sax->startDocument(ctxt->userData);
11508 ctxt->instate = XML_PARSER_MISC;
11510 xmlGenericError(xmlGenericErrorContext,
11511 "PP: entering MISC\n");
11515 case XML_PARSER_START_TAG: {
11516 const xmlChar *name;
11517 const xmlChar *prefix = NULL;
11518 const xmlChar *URI = NULL;
11519 int nsNr = ctxt->nsNr;
11521 if ((avail < 2) && (ctxt->inputNr == 1))
11523 cur = ctxt->input->cur[0];
11525 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11526 xmlHaltParser(ctxt);
11527 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11528 ctxt->sax->endDocument(ctxt->userData);
11532 if (ctxt->progressive) {
11533 /* > can be found unescaped in attribute values */
11534 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11536 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11540 if (ctxt->spaceNr == 0)
11541 spacePush(ctxt, -1);
11542 else if (*ctxt->space == -2)
11543 spacePush(ctxt, -1);
11545 spacePush(ctxt, *ctxt->space);
11546 #ifdef LIBXML_SAX1_ENABLED
11548 #endif /* LIBXML_SAX1_ENABLED */
11549 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11550 #ifdef LIBXML_SAX1_ENABLED
11552 name = xmlParseStartTag(ctxt);
11553 #endif /* LIBXML_SAX1_ENABLED */
11554 if (ctxt->instate == XML_PARSER_EOF)
11556 if (name == NULL) {
11558 xmlHaltParser(ctxt);
11559 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11560 ctxt->sax->endDocument(ctxt->userData);
11563 #ifdef LIBXML_VALID_ENABLED
11565 * [ VC: Root Element Type ]
11566 * The Name in the document type declaration must match
11567 * the element type of the root element.
11569 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11570 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11571 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11572 #endif /* LIBXML_VALID_ENABLED */
11575 * Check for an Empty Element.
11577 if ((RAW == '/') && (NXT(1) == '>')) {
11581 if ((ctxt->sax != NULL) &&
11582 (ctxt->sax->endElementNs != NULL) &&
11583 (!ctxt->disableSAX))
11584 ctxt->sax->endElementNs(ctxt->userData, name,
11586 if (ctxt->nsNr - nsNr > 0)
11587 nsPop(ctxt, ctxt->nsNr - nsNr);
11588 #ifdef LIBXML_SAX1_ENABLED
11590 if ((ctxt->sax != NULL) &&
11591 (ctxt->sax->endElement != NULL) &&
11592 (!ctxt->disableSAX))
11593 ctxt->sax->endElement(ctxt->userData, name);
11594 #endif /* LIBXML_SAX1_ENABLED */
11596 if (ctxt->instate == XML_PARSER_EOF)
11599 if (ctxt->nameNr == 0) {
11600 ctxt->instate = XML_PARSER_EPILOG;
11602 ctxt->instate = XML_PARSER_CONTENT;
11604 ctxt->progressive = 1;
11610 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11611 "Couldn't find end of Start Tag %s\n",
11617 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11618 #ifdef LIBXML_SAX1_ENABLED
11620 namePush(ctxt, name);
11621 #endif /* LIBXML_SAX1_ENABLED */
11623 ctxt->instate = XML_PARSER_CONTENT;
11624 ctxt->progressive = 1;
11627 case XML_PARSER_CONTENT: {
11628 const xmlChar *test;
11630 if ((avail < 2) && (ctxt->inputNr == 1))
11632 cur = ctxt->input->cur[0];
11633 next = ctxt->input->cur[1];
11636 cons = ctxt->input->consumed;
11637 if ((cur == '<') && (next == '/')) {
11638 ctxt->instate = XML_PARSER_END_TAG;
11640 } else if ((cur == '<') && (next == '?')) {
11641 if ((!terminate) &&
11642 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11643 ctxt->progressive = XML_PARSER_PI;
11647 ctxt->instate = XML_PARSER_CONTENT;
11648 ctxt->progressive = 1;
11649 } else if ((cur == '<') && (next != '!')) {
11650 ctxt->instate = XML_PARSER_START_TAG;
11652 } else if ((cur == '<') && (next == '!') &&
11653 (ctxt->input->cur[2] == '-') &&
11654 (ctxt->input->cur[3] == '-')) {
11659 ctxt->input->cur += 4;
11660 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11661 ctxt->input->cur -= 4;
11662 if ((!terminate) && (term < 0)) {
11663 ctxt->progressive = XML_PARSER_COMMENT;
11666 xmlParseComment(ctxt);
11667 ctxt->instate = XML_PARSER_CONTENT;
11668 ctxt->progressive = 1;
11669 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11670 (ctxt->input->cur[2] == '[') &&
11671 (ctxt->input->cur[3] == 'C') &&
11672 (ctxt->input->cur[4] == 'D') &&
11673 (ctxt->input->cur[5] == 'A') &&
11674 (ctxt->input->cur[6] == 'T') &&
11675 (ctxt->input->cur[7] == 'A') &&
11676 (ctxt->input->cur[8] == '[')) {
11678 ctxt->instate = XML_PARSER_CDATA_SECTION;
11680 } else if ((cur == '<') && (next == '!') &&
11683 } else if (cur == '&') {
11684 if ((!terminate) &&
11685 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11687 xmlParseReference(ctxt);
11689 /* TODO Avoid the extra copy, handle directly !!! */
11691 * Goal of the following test is:
11692 * - minimize calls to the SAX 'character' callback
11693 * when they are mergeable
11694 * - handle an problem for isBlank when we only parse
11695 * a sequence of blank chars and the next one is
11696 * not available to check against '<' presence.
11697 * - tries to homogenize the differences in SAX
11698 * callbacks between the push and pull versions
11701 if ((ctxt->inputNr == 1) &&
11702 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11704 if (ctxt->progressive) {
11705 if ((lastlt == NULL) ||
11706 (ctxt->input->cur > lastlt))
11708 } else if (xmlParseLookupSequence(ctxt,
11714 ctxt->checkIndex = 0;
11715 xmlParseCharData(ctxt, 0);
11718 * Pop-up of finished entities.
11720 while ((RAW == 0) && (ctxt->inputNr > 1))
11722 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11723 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11724 "detected an error in element content\n");
11725 xmlHaltParser(ctxt);
11730 case XML_PARSER_END_TAG:
11734 if (ctxt->progressive) {
11735 /* > can be found unescaped in attribute values */
11736 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11738 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11743 xmlParseEndTag2(ctxt,
11744 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11745 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11746 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11749 #ifdef LIBXML_SAX1_ENABLED
11751 xmlParseEndTag1(ctxt, 0);
11752 #endif /* LIBXML_SAX1_ENABLED */
11753 if (ctxt->instate == XML_PARSER_EOF) {
11755 } else if (ctxt->nameNr == 0) {
11756 ctxt->instate = XML_PARSER_EPILOG;
11758 ctxt->instate = XML_PARSER_CONTENT;
11761 case XML_PARSER_CDATA_SECTION: {
11763 * The Push mode need to have the SAX callback for
11764 * cdataBlock merge back contiguous callbacks.
11768 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11770 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11773 tmp = xmlCheckCdataPush(ctxt->input->cur,
11774 XML_PARSER_BIG_BUFFER_SIZE);
11777 ctxt->input->cur += tmp;
11778 goto encoding_error;
11780 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11781 if (ctxt->sax->cdataBlock != NULL)
11782 ctxt->sax->cdataBlock(ctxt->userData,
11783 ctxt->input->cur, tmp);
11784 else if (ctxt->sax->characters != NULL)
11785 ctxt->sax->characters(ctxt->userData,
11786 ctxt->input->cur, tmp);
11788 if (ctxt->instate == XML_PARSER_EOF)
11791 ctxt->checkIndex = 0;
11797 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11798 if ((tmp < 0) || (tmp != base)) {
11800 ctxt->input->cur += tmp;
11801 goto encoding_error;
11803 if ((ctxt->sax != NULL) && (base == 0) &&
11804 (ctxt->sax->cdataBlock != NULL) &&
11805 (!ctxt->disableSAX)) {
11807 * Special case to provide identical behaviour
11808 * between pull and push parsers on enpty CDATA
11811 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11812 (!strncmp((const char *)&ctxt->input->cur[-9],
11814 ctxt->sax->cdataBlock(ctxt->userData,
11816 } else if ((ctxt->sax != NULL) && (base > 0) &&
11817 (!ctxt->disableSAX)) {
11818 if (ctxt->sax->cdataBlock != NULL)
11819 ctxt->sax->cdataBlock(ctxt->userData,
11820 ctxt->input->cur, base);
11821 else if (ctxt->sax->characters != NULL)
11822 ctxt->sax->characters(ctxt->userData,
11823 ctxt->input->cur, base);
11825 if (ctxt->instate == XML_PARSER_EOF)
11828 ctxt->checkIndex = 0;
11829 ctxt->instate = XML_PARSER_CONTENT;
11831 xmlGenericError(xmlGenericErrorContext,
11832 "PP: entering CONTENT\n");
11837 case XML_PARSER_MISC:
11839 if (ctxt->input->buf == NULL)
11840 avail = ctxt->input->length -
11841 (ctxt->input->cur - ctxt->input->base);
11843 avail = xmlBufUse(ctxt->input->buf->buffer) -
11844 (ctxt->input->cur - ctxt->input->base);
11847 cur = ctxt->input->cur[0];
11848 next = ctxt->input->cur[1];
11849 if ((cur == '<') && (next == '?')) {
11850 if ((!terminate) &&
11851 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11852 ctxt->progressive = XML_PARSER_PI;
11856 xmlGenericError(xmlGenericErrorContext,
11857 "PP: Parsing PI\n");
11860 if (ctxt->instate == XML_PARSER_EOF)
11862 ctxt->instate = XML_PARSER_MISC;
11863 ctxt->progressive = 1;
11864 ctxt->checkIndex = 0;
11865 } else if ((cur == '<') && (next == '!') &&
11866 (ctxt->input->cur[2] == '-') &&
11867 (ctxt->input->cur[3] == '-')) {
11868 if ((!terminate) &&
11869 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11870 ctxt->progressive = XML_PARSER_COMMENT;
11874 xmlGenericError(xmlGenericErrorContext,
11875 "PP: Parsing Comment\n");
11877 xmlParseComment(ctxt);
11878 if (ctxt->instate == XML_PARSER_EOF)
11880 ctxt->instate = XML_PARSER_MISC;
11881 ctxt->progressive = 1;
11882 ctxt->checkIndex = 0;
11883 } else if ((cur == '<') && (next == '!') &&
11884 (ctxt->input->cur[2] == 'D') &&
11885 (ctxt->input->cur[3] == 'O') &&
11886 (ctxt->input->cur[4] == 'C') &&
11887 (ctxt->input->cur[5] == 'T') &&
11888 (ctxt->input->cur[6] == 'Y') &&
11889 (ctxt->input->cur[7] == 'P') &&
11890 (ctxt->input->cur[8] == 'E')) {
11891 if ((!terminate) &&
11892 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11893 ctxt->progressive = XML_PARSER_DTD;
11897 xmlGenericError(xmlGenericErrorContext,
11898 "PP: Parsing internal subset\n");
11900 ctxt->inSubset = 1;
11901 ctxt->progressive = 0;
11902 ctxt->checkIndex = 0;
11903 xmlParseDocTypeDecl(ctxt);
11904 if (ctxt->instate == XML_PARSER_EOF)
11907 ctxt->instate = XML_PARSER_DTD;
11909 xmlGenericError(xmlGenericErrorContext,
11910 "PP: entering DTD\n");
11914 * Create and update the external subset.
11916 ctxt->inSubset = 2;
11917 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11918 (ctxt->sax->externalSubset != NULL))
11919 ctxt->sax->externalSubset(ctxt->userData,
11920 ctxt->intSubName, ctxt->extSubSystem,
11922 ctxt->inSubset = 0;
11923 xmlCleanSpecialAttr(ctxt);
11924 ctxt->instate = XML_PARSER_PROLOG;
11926 xmlGenericError(xmlGenericErrorContext,
11927 "PP: entering PROLOG\n");
11930 } else if ((cur == '<') && (next == '!') &&
11934 ctxt->instate = XML_PARSER_START_TAG;
11935 ctxt->progressive = XML_PARSER_START_TAG;
11936 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11938 xmlGenericError(xmlGenericErrorContext,
11939 "PP: entering START_TAG\n");
11943 case XML_PARSER_PROLOG:
11945 if (ctxt->input->buf == NULL)
11946 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11948 avail = xmlBufUse(ctxt->input->buf->buffer) -
11949 (ctxt->input->cur - ctxt->input->base);
11952 cur = ctxt->input->cur[0];
11953 next = ctxt->input->cur[1];
11954 if ((cur == '<') && (next == '?')) {
11955 if ((!terminate) &&
11956 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11957 ctxt->progressive = XML_PARSER_PI;
11961 xmlGenericError(xmlGenericErrorContext,
11962 "PP: Parsing PI\n");
11965 if (ctxt->instate == XML_PARSER_EOF)
11967 ctxt->instate = XML_PARSER_PROLOG;
11968 ctxt->progressive = 1;
11969 } else if ((cur == '<') && (next == '!') &&
11970 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11971 if ((!terminate) &&
11972 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11973 ctxt->progressive = XML_PARSER_COMMENT;
11977 xmlGenericError(xmlGenericErrorContext,
11978 "PP: Parsing Comment\n");
11980 xmlParseComment(ctxt);
11981 if (ctxt->instate == XML_PARSER_EOF)
11983 ctxt->instate = XML_PARSER_PROLOG;
11984 ctxt->progressive = 1;
11985 } else if ((cur == '<') && (next == '!') &&
11989 ctxt->instate = XML_PARSER_START_TAG;
11990 if (ctxt->progressive == 0)
11991 ctxt->progressive = XML_PARSER_START_TAG;
11992 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11994 xmlGenericError(xmlGenericErrorContext,
11995 "PP: entering START_TAG\n");
11999 case XML_PARSER_EPILOG:
12001 if (ctxt->input->buf == NULL)
12002 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
12004 avail = xmlBufUse(ctxt->input->buf->buffer) -
12005 (ctxt->input->cur - ctxt->input->base);
12008 cur = ctxt->input->cur[0];
12009 next = ctxt->input->cur[1];
12010 if ((cur == '<') && (next == '?')) {
12011 if ((!terminate) &&
12012 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12013 ctxt->progressive = XML_PARSER_PI;
12017 xmlGenericError(xmlGenericErrorContext,
12018 "PP: Parsing PI\n");
12021 if (ctxt->instate == XML_PARSER_EOF)
12023 ctxt->instate = XML_PARSER_EPILOG;
12024 ctxt->progressive = 1;
12025 } else if ((cur == '<') && (next == '!') &&
12026 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12027 if ((!terminate) &&
12028 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12029 ctxt->progressive = XML_PARSER_COMMENT;
12033 xmlGenericError(xmlGenericErrorContext,
12034 "PP: Parsing Comment\n");
12036 xmlParseComment(ctxt);
12037 if (ctxt->instate == XML_PARSER_EOF)
12039 ctxt->instate = XML_PARSER_EPILOG;
12040 ctxt->progressive = 1;
12041 } else if ((cur == '<') && (next == '!') &&
12045 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12046 xmlHaltParser(ctxt);
12048 xmlGenericError(xmlGenericErrorContext,
12049 "PP: entering EOF\n");
12051 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12052 ctxt->sax->endDocument(ctxt->userData);
12056 case XML_PARSER_DTD: {
12058 * Sorry but progressive parsing of the internal subset
12059 * is not expected to be supported. We first check that
12060 * the full content of the internal subset is available and
12061 * the parsing is launched only at that point.
12062 * Internal subset ends up with "']' S? '>'" in an unescaped
12063 * section and not in a ']]>' sequence which are conditional
12064 * sections (whoever argued to keep that crap in XML deserve
12065 * a place in hell !).
12072 base = ctxt->input->cur - ctxt->input->base;
12073 if (base < 0) return(0);
12074 if (ctxt->checkIndex > base)
12075 base = ctxt->checkIndex;
12076 buf = xmlBufContent(ctxt->input->buf->buffer);
12077 use = xmlBufUse(ctxt->input->buf->buffer);
12078 for (;(unsigned int) base < use; base++) {
12080 if (buf[base] == quote)
12084 if ((quote == 0) && (buf[base] == '<')) {
12086 /* special handling of comments */
12087 if (((unsigned int) base + 4 < use) &&
12088 (buf[base + 1] == '!') &&
12089 (buf[base + 2] == '-') &&
12090 (buf[base + 3] == '-')) {
12091 for (;(unsigned int) base + 3 < use; base++) {
12092 if ((buf[base] == '-') &&
12093 (buf[base + 1] == '-') &&
12094 (buf[base + 2] == '>')) {
12102 fprintf(stderr, "unfinished comment\n");
12109 if (buf[base] == '"') {
12113 if (buf[base] == '\'') {
12117 if (buf[base] == ']') {
12119 fprintf(stderr, "%c%c%c%c: ", buf[base],
12120 buf[base + 1], buf[base + 2], buf[base + 3]);
12122 if ((unsigned int) base +1 >= use)
12124 if (buf[base + 1] == ']') {
12125 /* conditional crap, skip both ']' ! */
12129 for (i = 1; (unsigned int) base + i < use; i++) {
12130 if (buf[base + i] == '>') {
12132 fprintf(stderr, "found\n");
12134 goto found_end_int_subset;
12136 if (!IS_BLANK_CH(buf[base + i])) {
12138 fprintf(stderr, "not found\n");
12140 goto not_end_of_int_subset;
12144 fprintf(stderr, "end of stream\n");
12149 not_end_of_int_subset:
12150 continue; /* for */
12153 * We didn't found the end of the Internal subset
12156 ctxt->checkIndex = base;
12158 ctxt->checkIndex = 0;
12161 xmlGenericError(xmlGenericErrorContext,
12162 "PP: lookup of int subset end filed\n");
12166 found_end_int_subset:
12167 ctxt->checkIndex = 0;
12168 xmlParseInternalSubset(ctxt);
12169 if (ctxt->instate == XML_PARSER_EOF)
12171 ctxt->inSubset = 2;
12172 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12173 (ctxt->sax->externalSubset != NULL))
12174 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12175 ctxt->extSubSystem, ctxt->extSubURI);
12176 ctxt->inSubset = 0;
12177 xmlCleanSpecialAttr(ctxt);
12178 if (ctxt->instate == XML_PARSER_EOF)
12180 ctxt->instate = XML_PARSER_PROLOG;
12181 ctxt->checkIndex = 0;
12183 xmlGenericError(xmlGenericErrorContext,
12184 "PP: entering PROLOG\n");
12188 case XML_PARSER_COMMENT:
12189 xmlGenericError(xmlGenericErrorContext,
12190 "PP: internal error, state == COMMENT\n");
12191 ctxt->instate = XML_PARSER_CONTENT;
12193 xmlGenericError(xmlGenericErrorContext,
12194 "PP: entering CONTENT\n");
12197 case XML_PARSER_IGNORE:
12198 xmlGenericError(xmlGenericErrorContext,
12199 "PP: internal error, state == IGNORE");
12200 ctxt->instate = XML_PARSER_DTD;
12202 xmlGenericError(xmlGenericErrorContext,
12203 "PP: entering DTD\n");
12206 case XML_PARSER_PI:
12207 xmlGenericError(xmlGenericErrorContext,
12208 "PP: internal error, state == PI\n");
12209 ctxt->instate = XML_PARSER_CONTENT;
12211 xmlGenericError(xmlGenericErrorContext,
12212 "PP: entering CONTENT\n");
12215 case XML_PARSER_ENTITY_DECL:
12216 xmlGenericError(xmlGenericErrorContext,
12217 "PP: internal error, state == ENTITY_DECL\n");
12218 ctxt->instate = XML_PARSER_DTD;
12220 xmlGenericError(xmlGenericErrorContext,
12221 "PP: entering DTD\n");
12224 case XML_PARSER_ENTITY_VALUE:
12225 xmlGenericError(xmlGenericErrorContext,
12226 "PP: internal error, state == ENTITY_VALUE\n");
12227 ctxt->instate = XML_PARSER_CONTENT;
12229 xmlGenericError(xmlGenericErrorContext,
12230 "PP: entering DTD\n");
12233 case XML_PARSER_ATTRIBUTE_VALUE:
12234 xmlGenericError(xmlGenericErrorContext,
12235 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12236 ctxt->instate = XML_PARSER_START_TAG;
12238 xmlGenericError(xmlGenericErrorContext,
12239 "PP: entering START_TAG\n");
12242 case XML_PARSER_SYSTEM_LITERAL:
12243 xmlGenericError(xmlGenericErrorContext,
12244 "PP: internal error, state == SYSTEM_LITERAL\n");
12245 ctxt->instate = XML_PARSER_START_TAG;
12247 xmlGenericError(xmlGenericErrorContext,
12248 "PP: entering START_TAG\n");
12251 case XML_PARSER_PUBLIC_LITERAL:
12252 xmlGenericError(xmlGenericErrorContext,
12253 "PP: internal error, state == PUBLIC_LITERAL\n");
12254 ctxt->instate = XML_PARSER_START_TAG;
12256 xmlGenericError(xmlGenericErrorContext,
12257 "PP: entering START_TAG\n");
12264 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12271 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12272 ctxt->input->cur[0], ctxt->input->cur[1],
12273 ctxt->input->cur[2], ctxt->input->cur[3]);
12274 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12275 "Input is not proper UTF-8, indicate encoding !\n%s",
12276 BAD_CAST buffer, NULL);
12282 * xmlParseCheckTransition:
12283 * @ctxt: an XML parser context
12284 * @chunk: a char array
12285 * @size: the size in byte of the chunk
12287 * Check depending on the current parser state if the chunk given must be
12288 * processed immediately or one need more data to advance on parsing.
12290 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12293 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12294 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12296 if (ctxt->instate == XML_PARSER_START_TAG) {
12297 if (memchr(chunk, '>', size) != NULL)
12301 if (ctxt->progressive == XML_PARSER_COMMENT) {
12302 if (memchr(chunk, '>', size) != NULL)
12306 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12307 if (memchr(chunk, '>', size) != NULL)
12311 if (ctxt->progressive == XML_PARSER_PI) {
12312 if (memchr(chunk, '>', size) != NULL)
12316 if (ctxt->instate == XML_PARSER_END_TAG) {
12317 if (memchr(chunk, '>', size) != NULL)
12321 if ((ctxt->progressive == XML_PARSER_DTD) ||
12322 (ctxt->instate == XML_PARSER_DTD)) {
12323 if (memchr(chunk, '>', size) != NULL)
12332 * @ctxt: an XML parser context
12333 * @chunk: an char array
12334 * @size: the size in byte of the chunk
12335 * @terminate: last chunk indicator
12337 * Parse a Chunk of memory
12339 * Returns zero if no error, the xmlParserErrors otherwise.
12342 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12346 size_t old_avail = 0;
12350 return(XML_ERR_INTERNAL_ERROR);
12351 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12352 return(ctxt->errNo);
12353 if (ctxt->instate == XML_PARSER_EOF)
12355 if (ctxt->instate == XML_PARSER_START)
12356 xmlDetectSAX2(ctxt);
12357 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12358 (chunk[size - 1] == '\r')) {
12365 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12366 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12367 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12368 size_t cur = ctxt->input->cur - ctxt->input->base;
12371 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12373 * Specific handling if we autodetected an encoding, we should not
12374 * push more than the first line ... which depend on the encoding
12375 * And only push the rest once the final encoding was detected
12377 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12378 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12379 unsigned int len = 45;
12381 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12382 BAD_CAST "UTF-16")) ||
12383 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12384 BAD_CAST "UTF16")))
12386 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12387 BAD_CAST "UCS-4")) ||
12388 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12392 if (ctxt->input->buf->rawconsumed < len)
12393 len -= ctxt->input->buf->rawconsumed;
12396 * Change size for reading the initial declaration only
12397 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12398 * will blindly copy extra bytes from memory.
12400 if ((unsigned int) size > len) {
12401 remain = size - len;
12407 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12409 ctxt->errNo = XML_PARSER_EOF;
12410 xmlHaltParser(ctxt);
12411 return (XML_PARSER_EOF);
12413 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12415 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12418 } else if (ctxt->instate != XML_PARSER_EOF) {
12419 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12420 xmlParserInputBufferPtr in = ctxt->input->buf;
12421 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12422 (in->raw != NULL)) {
12424 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12425 size_t current = ctxt->input->cur - ctxt->input->base;
12427 nbchars = xmlCharEncInput(in, terminate);
12430 xmlGenericError(xmlGenericErrorContext,
12431 "xmlParseChunk: encoder error\n");
12432 return(XML_ERR_INVALID_ENCODING);
12434 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12439 xmlParseTryOrFinish(ctxt, 0);
12441 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12442 avail = xmlBufUse(ctxt->input->buf->buffer);
12444 * Depending on the current state it may not be such
12445 * a good idea to try parsing if there is nothing in the chunk
12446 * which would be worth doing a parser state transition and we
12447 * need to wait for more data
12449 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12450 (old_avail == 0) || (avail == 0) ||
12451 (xmlParseCheckTransition(ctxt,
12452 (const char *)&ctxt->input->base[old_avail],
12453 avail - old_avail)))
12454 xmlParseTryOrFinish(ctxt, terminate);
12456 if (ctxt->instate == XML_PARSER_EOF)
12457 return(ctxt->errNo);
12459 if ((ctxt->input != NULL) &&
12460 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12461 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12462 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12463 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12464 xmlHaltParser(ctxt);
12466 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12467 return(ctxt->errNo);
12475 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12476 (ctxt->input->buf != NULL)) {
12477 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12479 size_t current = ctxt->input->cur - ctxt->input->base;
12481 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12483 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12488 * Check for termination
12492 if (ctxt->input != NULL) {
12493 if (ctxt->input->buf == NULL)
12494 cur_avail = ctxt->input->length -
12495 (ctxt->input->cur - ctxt->input->base);
12497 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12498 (ctxt->input->cur - ctxt->input->base);
12501 if ((ctxt->instate != XML_PARSER_EOF) &&
12502 (ctxt->instate != XML_PARSER_EPILOG)) {
12503 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12505 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12506 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12508 if (ctxt->instate != XML_PARSER_EOF) {
12509 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12510 ctxt->sax->endDocument(ctxt->userData);
12512 ctxt->instate = XML_PARSER_EOF;
12514 if (ctxt->wellFormed == 0)
12515 return((xmlParserErrors) ctxt->errNo);
12520 /************************************************************************
12522 * I/O front end functions to the parser *
12524 ************************************************************************/
12527 * xmlCreatePushParserCtxt:
12528 * @sax: a SAX handler
12529 * @user_data: The user data returned on SAX callbacks
12530 * @chunk: a pointer to an array of chars
12531 * @size: number of chars in the array
12532 * @filename: an optional file name or URI
12534 * Create a parser context for using the XML parser in push mode.
12535 * If @buffer and @size are non-NULL, the data is used to detect
12536 * the encoding. The remaining characters will be parsed so they
12537 * don't need to be fed in again through xmlParseChunk.
12538 * To allow content encoding detection, @size should be >= 4
12539 * The value of @filename is used for fetching external entities
12540 * and error/warning reports.
12542 * Returns the new parser context or NULL
12546 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12547 const char *chunk, int size, const char *filename) {
12548 xmlParserCtxtPtr ctxt;
12549 xmlParserInputPtr inputStream;
12550 xmlParserInputBufferPtr buf;
12551 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12554 * plug some encoding conversion routines
12556 if ((chunk != NULL) && (size >= 4))
12557 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12559 buf = xmlAllocParserInputBuffer(enc);
12560 if (buf == NULL) return(NULL);
12562 ctxt = xmlNewParserCtxt();
12563 if (ctxt == NULL) {
12564 xmlErrMemory(NULL, "creating parser: out of memory\n");
12565 xmlFreeParserInputBuffer(buf);
12568 ctxt->dictNames = 1;
12569 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12570 if (ctxt->pushTab == NULL) {
12571 xmlErrMemory(ctxt, NULL);
12572 xmlFreeParserInputBuffer(buf);
12573 xmlFreeParserCtxt(ctxt);
12577 #ifdef LIBXML_SAX1_ENABLED
12578 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12579 #endif /* LIBXML_SAX1_ENABLED */
12580 xmlFree(ctxt->sax);
12581 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12582 if (ctxt->sax == NULL) {
12583 xmlErrMemory(ctxt, NULL);
12584 xmlFreeParserInputBuffer(buf);
12585 xmlFreeParserCtxt(ctxt);
12588 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12589 if (sax->initialized == XML_SAX2_MAGIC)
12590 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12592 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12593 if (user_data != NULL)
12594 ctxt->userData = user_data;
12596 if (filename == NULL) {
12597 ctxt->directory = NULL;
12599 ctxt->directory = xmlParserGetDirectory(filename);
12602 inputStream = xmlNewInputStream(ctxt);
12603 if (inputStream == NULL) {
12604 xmlFreeParserCtxt(ctxt);
12605 xmlFreeParserInputBuffer(buf);
12609 if (filename == NULL)
12610 inputStream->filename = NULL;
12612 inputStream->filename = (char *)
12613 xmlCanonicPath((const xmlChar *) filename);
12614 if (inputStream->filename == NULL) {
12615 xmlFreeParserCtxt(ctxt);
12616 xmlFreeParserInputBuffer(buf);
12620 inputStream->buf = buf;
12621 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12622 inputPush(ctxt, inputStream);
12625 * If the caller didn't provide an initial 'chunk' for determining
12626 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12627 * that it can be automatically determined later
12629 if ((size == 0) || (chunk == NULL)) {
12630 ctxt->charset = XML_CHAR_ENCODING_NONE;
12631 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12632 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12633 size_t cur = ctxt->input->cur - ctxt->input->base;
12635 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12637 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12639 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12643 if (enc != XML_CHAR_ENCODING_NONE) {
12644 xmlSwitchEncoding(ctxt, enc);
12649 #endif /* LIBXML_PUSH_ENABLED */
12653 * @ctxt: an XML parser context
12655 * Blocks further parser processing don't override error
12659 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12662 ctxt->instate = XML_PARSER_EOF;
12663 ctxt->disableSAX = 1;
12664 if (ctxt->input != NULL) {
12666 * in case there was a specific allocation deallocate before
12669 if (ctxt->input->free != NULL) {
12670 ctxt->input->free((xmlChar *) ctxt->input->base);
12671 ctxt->input->free = NULL;
12673 ctxt->input->cur = BAD_CAST"";
12674 ctxt->input->base = ctxt->input->cur;
12680 * @ctxt: an XML parser context
12682 * Blocks further parser processing
12685 xmlStopParser(xmlParserCtxtPtr ctxt) {
12688 xmlHaltParser(ctxt);
12689 ctxt->errNo = XML_ERR_USER_STOP;
12693 * xmlCreateIOParserCtxt:
12694 * @sax: a SAX handler
12695 * @user_data: The user data returned on SAX callbacks
12696 * @ioread: an I/O read function
12697 * @ioclose: an I/O close function
12698 * @ioctx: an I/O handler
12699 * @enc: the charset encoding if known
12701 * Create a parser context for using the XML parser with an existing
12704 * Returns the new parser context or NULL
12707 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12708 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12709 void *ioctx, xmlCharEncoding enc) {
12710 xmlParserCtxtPtr ctxt;
12711 xmlParserInputPtr inputStream;
12712 xmlParserInputBufferPtr buf;
12714 if (ioread == NULL) return(NULL);
12716 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12718 if (ioclose != NULL)
12723 ctxt = xmlNewParserCtxt();
12724 if (ctxt == NULL) {
12725 xmlFreeParserInputBuffer(buf);
12729 #ifdef LIBXML_SAX1_ENABLED
12730 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12731 #endif /* LIBXML_SAX1_ENABLED */
12732 xmlFree(ctxt->sax);
12733 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12734 if (ctxt->sax == NULL) {
12735 xmlErrMemory(ctxt, NULL);
12736 xmlFreeParserCtxt(ctxt);
12739 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12740 if (sax->initialized == XML_SAX2_MAGIC)
12741 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12743 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12744 if (user_data != NULL)
12745 ctxt->userData = user_data;
12748 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12749 if (inputStream == NULL) {
12750 xmlFreeParserCtxt(ctxt);
12753 inputPush(ctxt, inputStream);
12758 #ifdef LIBXML_VALID_ENABLED
12759 /************************************************************************
12761 * Front ends when parsing a DTD *
12763 ************************************************************************/
12767 * @sax: the SAX handler block or NULL
12768 * @input: an Input Buffer
12769 * @enc: the charset encoding if known
12771 * Load and parse a DTD
12773 * Returns the resulting xmlDtdPtr or NULL in case of error.
12774 * @input will be freed by the function in any case.
12778 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12779 xmlCharEncoding enc) {
12780 xmlDtdPtr ret = NULL;
12781 xmlParserCtxtPtr ctxt;
12782 xmlParserInputPtr pinput = NULL;
12788 ctxt = xmlNewParserCtxt();
12789 if (ctxt == NULL) {
12790 xmlFreeParserInputBuffer(input);
12794 /* We are loading a DTD */
12795 ctxt->options |= XML_PARSE_DTDLOAD;
12798 * Set-up the SAX context
12801 if (ctxt->sax != NULL)
12802 xmlFree(ctxt->sax);
12804 ctxt->userData = ctxt;
12806 xmlDetectSAX2(ctxt);
12809 * generate a parser input from the I/O handler
12812 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12813 if (pinput == NULL) {
12814 if (sax != NULL) ctxt->sax = NULL;
12815 xmlFreeParserInputBuffer(input);
12816 xmlFreeParserCtxt(ctxt);
12821 * plug some encoding conversion routines here.
12823 if (xmlPushInput(ctxt, pinput) < 0) {
12824 if (sax != NULL) ctxt->sax = NULL;
12825 xmlFreeParserCtxt(ctxt);
12828 if (enc != XML_CHAR_ENCODING_NONE) {
12829 xmlSwitchEncoding(ctxt, enc);
12832 pinput->filename = NULL;
12835 pinput->base = ctxt->input->cur;
12836 pinput->cur = ctxt->input->cur;
12837 pinput->free = NULL;
12840 * let's parse that entity knowing it's an external subset.
12842 ctxt->inSubset = 2;
12843 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12844 if (ctxt->myDoc == NULL) {
12845 xmlErrMemory(ctxt, "New Doc failed");
12848 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12849 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12850 BAD_CAST "none", BAD_CAST "none");
12852 if ((enc == XML_CHAR_ENCODING_NONE) &&
12853 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12855 * Get the 4 first bytes and decode the charset
12856 * if enc != XML_CHAR_ENCODING_NONE
12857 * plug some encoding conversion routines.
12863 enc = xmlDetectCharEncoding(start, 4);
12864 if (enc != XML_CHAR_ENCODING_NONE) {
12865 xmlSwitchEncoding(ctxt, enc);
12869 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12871 if (ctxt->myDoc != NULL) {
12872 if (ctxt->wellFormed) {
12873 ret = ctxt->myDoc->extSubset;
12874 ctxt->myDoc->extSubset = NULL;
12879 tmp = ret->children;
12880 while (tmp != NULL) {
12888 xmlFreeDoc(ctxt->myDoc);
12889 ctxt->myDoc = NULL;
12891 if (sax != NULL) ctxt->sax = NULL;
12892 xmlFreeParserCtxt(ctxt);
12899 * @sax: the SAX handler block
12900 * @ExternalID: a NAME* containing the External ID of the DTD
12901 * @SystemID: a NAME* containing the URL to the DTD
12903 * Load and parse an external subset.
12905 * Returns the resulting xmlDtdPtr or NULL in case of error.
12909 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12910 const xmlChar *SystemID) {
12911 xmlDtdPtr ret = NULL;
12912 xmlParserCtxtPtr ctxt;
12913 xmlParserInputPtr input = NULL;
12914 xmlCharEncoding enc;
12915 xmlChar* systemIdCanonic;
12917 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12919 ctxt = xmlNewParserCtxt();
12920 if (ctxt == NULL) {
12924 /* We are loading a DTD */
12925 ctxt->options |= XML_PARSE_DTDLOAD;
12928 * Set-up the SAX context
12931 if (ctxt->sax != NULL)
12932 xmlFree(ctxt->sax);
12934 ctxt->userData = ctxt;
12938 * Canonicalise the system ID
12940 systemIdCanonic = xmlCanonicPath(SystemID);
12941 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12942 xmlFreeParserCtxt(ctxt);
12947 * Ask the Entity resolver to load the damn thing
12950 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12951 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12953 if (input == NULL) {
12954 if (sax != NULL) ctxt->sax = NULL;
12955 xmlFreeParserCtxt(ctxt);
12956 if (systemIdCanonic != NULL)
12957 xmlFree(systemIdCanonic);
12962 * plug some encoding conversion routines here.
12964 if (xmlPushInput(ctxt, input) < 0) {
12965 if (sax != NULL) ctxt->sax = NULL;
12966 xmlFreeParserCtxt(ctxt);
12967 if (systemIdCanonic != NULL)
12968 xmlFree(systemIdCanonic);
12971 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12972 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12973 xmlSwitchEncoding(ctxt, enc);
12976 if (input->filename == NULL)
12977 input->filename = (char *) systemIdCanonic;
12979 xmlFree(systemIdCanonic);
12982 input->base = ctxt->input->cur;
12983 input->cur = ctxt->input->cur;
12984 input->free = NULL;
12987 * let's parse that entity knowing it's an external subset.
12989 ctxt->inSubset = 2;
12990 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12991 if (ctxt->myDoc == NULL) {
12992 xmlErrMemory(ctxt, "New Doc failed");
12993 if (sax != NULL) ctxt->sax = NULL;
12994 xmlFreeParserCtxt(ctxt);
12997 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12998 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12999 ExternalID, SystemID);
13000 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
13002 if (ctxt->myDoc != NULL) {
13003 if (ctxt->wellFormed) {
13004 ret = ctxt->myDoc->extSubset;
13005 ctxt->myDoc->extSubset = NULL;
13010 tmp = ret->children;
13011 while (tmp != NULL) {
13019 xmlFreeDoc(ctxt->myDoc);
13020 ctxt->myDoc = NULL;
13022 if (sax != NULL) ctxt->sax = NULL;
13023 xmlFreeParserCtxt(ctxt);
13031 * @ExternalID: a NAME* containing the External ID of the DTD
13032 * @SystemID: a NAME* containing the URL to the DTD
13034 * Load and parse an external subset.
13036 * Returns the resulting xmlDtdPtr or NULL in case of error.
13040 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
13041 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
13043 #endif /* LIBXML_VALID_ENABLED */
13045 /************************************************************************
13047 * Front ends when parsing an Entity *
13049 ************************************************************************/
13052 * xmlParseCtxtExternalEntity:
13053 * @ctx: the existing parsing context
13054 * @URL: the URL for the entity to load
13055 * @ID: the System ID for the entity to load
13056 * @lst: the return value for the set of parsed nodes
13058 * Parse an external general entity within an existing parsing context
13059 * An external general parsed entity is well-formed if it matches the
13060 * production labeled extParsedEnt.
13062 * [78] extParsedEnt ::= TextDecl? content
13064 * Returns 0 if the entity is well formed, -1 in case of args problem and
13065 * the parser error code otherwise
13069 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
13070 const xmlChar *ID, xmlNodePtr *lst) {
13071 xmlParserCtxtPtr ctxt;
13073 xmlNodePtr newRoot;
13074 xmlSAXHandlerPtr oldsax = NULL;
13077 xmlCharEncoding enc;
13079 if (ctx == NULL) return(-1);
13081 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
13082 (ctx->depth > 1024)) {
13083 return(XML_ERR_ENTITY_LOOP);
13088 if ((URL == NULL) && (ID == NULL))
13090 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
13093 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
13094 if (ctxt == NULL) {
13098 oldsax = ctxt->sax;
13099 ctxt->sax = ctx->sax;
13100 xmlDetectSAX2(ctxt);
13101 newDoc = xmlNewDoc(BAD_CAST "1.0");
13102 if (newDoc == NULL) {
13103 xmlFreeParserCtxt(ctxt);
13106 newDoc->properties = XML_DOC_INTERNAL;
13107 if (ctx->myDoc->dict) {
13108 newDoc->dict = ctx->myDoc->dict;
13109 xmlDictReference(newDoc->dict);
13111 if (ctx->myDoc != NULL) {
13112 newDoc->intSubset = ctx->myDoc->intSubset;
13113 newDoc->extSubset = ctx->myDoc->extSubset;
13115 if (ctx->myDoc->URL != NULL) {
13116 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
13118 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13119 if (newRoot == NULL) {
13120 ctxt->sax = oldsax;
13121 xmlFreeParserCtxt(ctxt);
13122 newDoc->intSubset = NULL;
13123 newDoc->extSubset = NULL;
13124 xmlFreeDoc(newDoc);
13127 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13128 nodePush(ctxt, newDoc->children);
13129 if (ctx->myDoc == NULL) {
13130 ctxt->myDoc = newDoc;
13132 ctxt->myDoc = ctx->myDoc;
13133 newDoc->children->doc = ctx->myDoc;
13137 * Get the 4 first bytes and decode the charset
13138 * if enc != XML_CHAR_ENCODING_NONE
13139 * plug some encoding conversion routines.
13142 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13147 enc = xmlDetectCharEncoding(start, 4);
13148 if (enc != XML_CHAR_ENCODING_NONE) {
13149 xmlSwitchEncoding(ctxt, enc);
13154 * Parse a possible text declaration first
13156 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13157 xmlParseTextDecl(ctxt);
13159 * An XML-1.0 document can't reference an entity not XML-1.0
13161 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
13162 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13163 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13164 "Version mismatch between document and entity\n");
13169 * If the user provided its own SAX callbacks then reuse the
13170 * useData callback field, otherwise the expected setup in a
13171 * DOM builder is to have userData == ctxt
13173 if (ctx->userData == ctx)
13174 ctxt->userData = ctxt;
13176 ctxt->userData = ctx->userData;
13179 * Doing validity checking on chunk doesn't make sense
13181 ctxt->instate = XML_PARSER_CONTENT;
13182 ctxt->validate = ctx->validate;
13183 ctxt->valid = ctx->valid;
13184 ctxt->loadsubset = ctx->loadsubset;
13185 ctxt->depth = ctx->depth + 1;
13186 ctxt->replaceEntities = ctx->replaceEntities;
13187 if (ctxt->validate) {
13188 ctxt->vctxt.error = ctx->vctxt.error;
13189 ctxt->vctxt.warning = ctx->vctxt.warning;
13191 ctxt->vctxt.error = NULL;
13192 ctxt->vctxt.warning = NULL;
13194 ctxt->vctxt.nodeTab = NULL;
13195 ctxt->vctxt.nodeNr = 0;
13196 ctxt->vctxt.nodeMax = 0;
13197 ctxt->vctxt.node = NULL;
13198 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13199 ctxt->dict = ctx->dict;
13200 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13201 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13202 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13203 ctxt->dictNames = ctx->dictNames;
13204 ctxt->attsDefault = ctx->attsDefault;
13205 ctxt->attsSpecial = ctx->attsSpecial;
13206 ctxt->linenumbers = ctx->linenumbers;
13208 xmlParseContent(ctxt);
13210 ctx->validate = ctxt->validate;
13211 ctx->valid = ctxt->valid;
13212 if ((RAW == '<') && (NXT(1) == '/')) {
13213 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13214 } else if (RAW != 0) {
13215 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13217 if (ctxt->node != newDoc->children) {
13218 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13221 if (!ctxt->wellFormed) {
13222 if (ctxt->errNo == 0)
13231 * Return the newly created nodeset after unlinking it from
13232 * they pseudo parent.
13234 cur = newDoc->children->children;
13236 while (cur != NULL) {
13237 cur->parent = NULL;
13240 newDoc->children->children = NULL;
13244 ctxt->sax = oldsax;
13246 ctxt->attsDefault = NULL;
13247 ctxt->attsSpecial = NULL;
13248 xmlFreeParserCtxt(ctxt);
13249 newDoc->intSubset = NULL;
13250 newDoc->extSubset = NULL;
13251 xmlFreeDoc(newDoc);
13257 * xmlParseExternalEntityPrivate:
13258 * @doc: the document the chunk pertains to
13259 * @oldctxt: the previous parser context if available
13260 * @sax: the SAX handler bloc (possibly NULL)
13261 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13262 * @depth: Used for loop detection, use 0
13263 * @URL: the URL for the entity to load
13264 * @ID: the System ID for the entity to load
13265 * @list: the return value for the set of parsed nodes
13267 * Private version of xmlParseExternalEntity()
13269 * Returns 0 if the entity is well formed, -1 in case of args problem and
13270 * the parser error code otherwise
13273 static xmlParserErrors
13274 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13275 xmlSAXHandlerPtr sax,
13276 void *user_data, int depth, const xmlChar *URL,
13277 const xmlChar *ID, xmlNodePtr *list) {
13278 xmlParserCtxtPtr ctxt;
13280 xmlNodePtr newRoot;
13281 xmlSAXHandlerPtr oldsax = NULL;
13282 xmlParserErrors ret = XML_ERR_OK;
13284 xmlCharEncoding enc;
13286 if (((depth > 40) &&
13287 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13289 return(XML_ERR_ENTITY_LOOP);
13294 if ((URL == NULL) && (ID == NULL))
13295 return(XML_ERR_INTERNAL_ERROR);
13297 return(XML_ERR_INTERNAL_ERROR);
13300 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13301 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13302 ctxt->userData = ctxt;
13303 if (oldctxt != NULL) {
13304 ctxt->_private = oldctxt->_private;
13305 ctxt->loadsubset = oldctxt->loadsubset;
13306 ctxt->validate = oldctxt->validate;
13307 ctxt->external = oldctxt->external;
13308 ctxt->record_info = oldctxt->record_info;
13309 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13310 ctxt->node_seq.length = oldctxt->node_seq.length;
13311 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13314 * Doing validity checking on chunk without context
13315 * doesn't make sense
13317 ctxt->_private = NULL;
13318 ctxt->validate = 0;
13319 ctxt->external = 2;
13320 ctxt->loadsubset = 0;
13323 oldsax = ctxt->sax;
13325 if (user_data != NULL)
13326 ctxt->userData = user_data;
13328 xmlDetectSAX2(ctxt);
13329 newDoc = xmlNewDoc(BAD_CAST "1.0");
13330 if (newDoc == NULL) {
13331 ctxt->node_seq.maximum = 0;
13332 ctxt->node_seq.length = 0;
13333 ctxt->node_seq.buffer = NULL;
13334 xmlFreeParserCtxt(ctxt);
13335 return(XML_ERR_INTERNAL_ERROR);
13337 newDoc->properties = XML_DOC_INTERNAL;
13338 newDoc->intSubset = doc->intSubset;
13339 newDoc->extSubset = doc->extSubset;
13340 newDoc->dict = doc->dict;
13341 xmlDictReference(newDoc->dict);
13343 if (doc->URL != NULL) {
13344 newDoc->URL = xmlStrdup(doc->URL);
13346 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13347 if (newRoot == NULL) {
13349 ctxt->sax = oldsax;
13350 ctxt->node_seq.maximum = 0;
13351 ctxt->node_seq.length = 0;
13352 ctxt->node_seq.buffer = NULL;
13353 xmlFreeParserCtxt(ctxt);
13354 newDoc->intSubset = NULL;
13355 newDoc->extSubset = NULL;
13356 xmlFreeDoc(newDoc);
13357 return(XML_ERR_INTERNAL_ERROR);
13359 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13360 nodePush(ctxt, newDoc->children);
13362 newRoot->doc = doc;
13365 * Get the 4 first bytes and decode the charset
13366 * if enc != XML_CHAR_ENCODING_NONE
13367 * plug some encoding conversion routines.
13370 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13375 enc = xmlDetectCharEncoding(start, 4);
13376 if (enc != XML_CHAR_ENCODING_NONE) {
13377 xmlSwitchEncoding(ctxt, enc);
13382 * Parse a possible text declaration first
13384 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13385 xmlParseTextDecl(ctxt);
13388 ctxt->instate = XML_PARSER_CONTENT;
13389 ctxt->depth = depth;
13391 xmlParseContent(ctxt);
13393 if ((RAW == '<') && (NXT(1) == '/')) {
13394 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13395 } else if (RAW != 0) {
13396 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13398 if (ctxt->node != newDoc->children) {
13399 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13402 if (!ctxt->wellFormed) {
13403 if (ctxt->errNo == 0)
13404 ret = XML_ERR_INTERNAL_ERROR;
13406 ret = (xmlParserErrors)ctxt->errNo;
13408 if (list != NULL) {
13412 * Return the newly created nodeset after unlinking it from
13413 * they pseudo parent.
13415 cur = newDoc->children->children;
13417 while (cur != NULL) {
13418 cur->parent = NULL;
13421 newDoc->children->children = NULL;
13427 * Record in the parent context the number of entities replacement
13428 * done when parsing that reference.
13430 if (oldctxt != NULL)
13431 oldctxt->nbentities += ctxt->nbentities;
13434 * Also record the size of the entity parsed
13436 if (ctxt->input != NULL && oldctxt != NULL) {
13437 oldctxt->sizeentities += ctxt->input->consumed;
13438 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13441 * And record the last error if any
13443 if (ctxt->lastError.code != XML_ERR_OK)
13444 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13447 ctxt->sax = oldsax;
13448 if (oldctxt != NULL) {
13449 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13450 oldctxt->node_seq.length = ctxt->node_seq.length;
13451 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13453 ctxt->node_seq.maximum = 0;
13454 ctxt->node_seq.length = 0;
13455 ctxt->node_seq.buffer = NULL;
13456 xmlFreeParserCtxt(ctxt);
13457 newDoc->intSubset = NULL;
13458 newDoc->extSubset = NULL;
13459 xmlFreeDoc(newDoc);
13464 #ifdef LIBXML_SAX1_ENABLED
13466 * xmlParseExternalEntity:
13467 * @doc: the document the chunk pertains to
13468 * @sax: the SAX handler bloc (possibly NULL)
13469 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13470 * @depth: Used for loop detection, use 0
13471 * @URL: the URL for the entity to load
13472 * @ID: the System ID for the entity to load
13473 * @lst: the return value for the set of parsed nodes
13475 * Parse an external general entity
13476 * An external general parsed entity is well-formed if it matches the
13477 * production labeled extParsedEnt.
13479 * [78] extParsedEnt ::= TextDecl? content
13481 * Returns 0 if the entity is well formed, -1 in case of args problem and
13482 * the parser error code otherwise
13486 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13487 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13488 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13493 * xmlParseBalancedChunkMemory:
13494 * @doc: the document the chunk pertains to
13495 * @sax: the SAX handler bloc (possibly NULL)
13496 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13497 * @depth: Used for loop detection, use 0
13498 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13499 * @lst: the return value for the set of parsed nodes
13501 * Parse a well-balanced chunk of an XML document
13502 * called by the parser
13503 * The allowed sequence for the Well Balanced Chunk is the one defined by
13504 * the content production in the XML grammar:
13506 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13508 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13509 * the parser error code otherwise
13513 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13514 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13515 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13516 depth, string, lst, 0 );
13518 #endif /* LIBXML_SAX1_ENABLED */
13521 * xmlParseBalancedChunkMemoryInternal:
13522 * @oldctxt: the existing parsing context
13523 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13524 * @user_data: the user data field for the parser context
13525 * @lst: the return value for the set of parsed nodes
13528 * Parse a well-balanced chunk of an XML document
13529 * called by the parser
13530 * The allowed sequence for the Well Balanced Chunk is the one defined by
13531 * the content production in the XML grammar:
13533 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13535 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13536 * error code otherwise
13538 * In case recover is set to 1, the nodelist will not be empty even if
13539 * the parsed chunk is not well balanced.
13541 static xmlParserErrors
13542 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13543 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13544 xmlParserCtxtPtr ctxt;
13545 xmlDocPtr newDoc = NULL;
13546 xmlNodePtr newRoot;
13547 xmlSAXHandlerPtr oldsax = NULL;
13548 xmlNodePtr content = NULL;
13549 xmlNodePtr last = NULL;
13551 xmlParserErrors ret = XML_ERR_OK;
13556 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13557 (oldctxt->depth > 1024)) {
13558 return(XML_ERR_ENTITY_LOOP);
13564 if (string == NULL)
13565 return(XML_ERR_INTERNAL_ERROR);
13567 size = xmlStrlen(string);
13569 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13570 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13571 if (user_data != NULL)
13572 ctxt->userData = user_data;
13574 ctxt->userData = ctxt;
13575 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13576 ctxt->dict = oldctxt->dict;
13577 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13578 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13579 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13582 /* propagate namespaces down the entity */
13583 for (i = 0;i < oldctxt->nsNr;i += 2) {
13584 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13588 oldsax = ctxt->sax;
13589 ctxt->sax = oldctxt->sax;
13590 xmlDetectSAX2(ctxt);
13591 ctxt->replaceEntities = oldctxt->replaceEntities;
13592 ctxt->options = oldctxt->options;
13594 ctxt->_private = oldctxt->_private;
13595 if (oldctxt->myDoc == NULL) {
13596 newDoc = xmlNewDoc(BAD_CAST "1.0");
13597 if (newDoc == NULL) {
13598 ctxt->sax = oldsax;
13600 xmlFreeParserCtxt(ctxt);
13601 return(XML_ERR_INTERNAL_ERROR);
13603 newDoc->properties = XML_DOC_INTERNAL;
13604 newDoc->dict = ctxt->dict;
13605 xmlDictReference(newDoc->dict);
13606 ctxt->myDoc = newDoc;
13608 ctxt->myDoc = oldctxt->myDoc;
13609 content = ctxt->myDoc->children;
13610 last = ctxt->myDoc->last;
13612 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13613 if (newRoot == NULL) {
13614 ctxt->sax = oldsax;
13616 xmlFreeParserCtxt(ctxt);
13617 if (newDoc != NULL) {
13618 xmlFreeDoc(newDoc);
13620 return(XML_ERR_INTERNAL_ERROR);
13622 ctxt->myDoc->children = NULL;
13623 ctxt->myDoc->last = NULL;
13624 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13625 nodePush(ctxt, ctxt->myDoc->children);
13626 ctxt->instate = XML_PARSER_CONTENT;
13627 ctxt->depth = oldctxt->depth + 1;
13629 ctxt->validate = 0;
13630 ctxt->loadsubset = oldctxt->loadsubset;
13631 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13633 * ID/IDREF registration will be done in xmlValidateElement below
13635 ctxt->loadsubset |= XML_SKIP_IDS;
13637 ctxt->dictNames = oldctxt->dictNames;
13638 ctxt->attsDefault = oldctxt->attsDefault;
13639 ctxt->attsSpecial = oldctxt->attsSpecial;
13641 xmlParseContent(ctxt);
13642 if ((RAW == '<') && (NXT(1) == '/')) {
13643 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13644 } else if (RAW != 0) {
13645 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13647 if (ctxt->node != ctxt->myDoc->children) {
13648 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13651 if (!ctxt->wellFormed) {
13652 if (ctxt->errNo == 0)
13653 ret = XML_ERR_INTERNAL_ERROR;
13655 ret = (xmlParserErrors)ctxt->errNo;
13660 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13664 * Return the newly created nodeset after unlinking it from
13665 * they pseudo parent.
13667 cur = ctxt->myDoc->children->children;
13669 while (cur != NULL) {
13670 #ifdef LIBXML_VALID_ENABLED
13671 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13672 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13673 (cur->type == XML_ELEMENT_NODE)) {
13674 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13675 oldctxt->myDoc, cur);
13677 #endif /* LIBXML_VALID_ENABLED */
13678 cur->parent = NULL;
13681 ctxt->myDoc->children->children = NULL;
13683 if (ctxt->myDoc != NULL) {
13684 xmlFreeNode(ctxt->myDoc->children);
13685 ctxt->myDoc->children = content;
13686 ctxt->myDoc->last = last;
13690 * Record in the parent context the number of entities replacement
13691 * done when parsing that reference.
13693 if (oldctxt != NULL)
13694 oldctxt->nbentities += ctxt->nbentities;
13697 * Also record the last error if any
13699 if (ctxt->lastError.code != XML_ERR_OK)
13700 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13702 ctxt->sax = oldsax;
13704 ctxt->attsDefault = NULL;
13705 ctxt->attsSpecial = NULL;
13706 xmlFreeParserCtxt(ctxt);
13707 if (newDoc != NULL) {
13708 xmlFreeDoc(newDoc);
13715 * xmlParseInNodeContext:
13716 * @node: the context node
13717 * @data: the input string
13718 * @datalen: the input string length in bytes
13719 * @options: a combination of xmlParserOption
13720 * @lst: the return value for the set of parsed nodes
13722 * Parse a well-balanced chunk of an XML document
13723 * within the context (DTD, namespaces, etc ...) of the given node.
13725 * The allowed sequence for the data is a Well Balanced Chunk defined by
13726 * the content production in the XML grammar:
13728 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13730 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13731 * error code otherwise
13734 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13735 int options, xmlNodePtr *lst) {
13737 xmlParserCtxtPtr ctxt;
13738 xmlDocPtr doc = NULL;
13739 xmlNodePtr fake, cur;
13742 xmlParserErrors ret = XML_ERR_OK;
13745 * check all input parameters, grab the document
13747 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13748 return(XML_ERR_INTERNAL_ERROR);
13749 switch (node->type) {
13750 case XML_ELEMENT_NODE:
13751 case XML_ATTRIBUTE_NODE:
13752 case XML_TEXT_NODE:
13753 case XML_CDATA_SECTION_NODE:
13754 case XML_ENTITY_REF_NODE:
13756 case XML_COMMENT_NODE:
13757 case XML_DOCUMENT_NODE:
13758 case XML_HTML_DOCUMENT_NODE:
13761 return(XML_ERR_INTERNAL_ERROR);
13764 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13765 (node->type != XML_DOCUMENT_NODE) &&
13766 (node->type != XML_HTML_DOCUMENT_NODE))
13767 node = node->parent;
13769 return(XML_ERR_INTERNAL_ERROR);
13770 if (node->type == XML_ELEMENT_NODE)
13773 doc = (xmlDocPtr) node;
13775 return(XML_ERR_INTERNAL_ERROR);
13778 * allocate a context and set-up everything not related to the
13779 * node position in the tree
13781 if (doc->type == XML_DOCUMENT_NODE)
13782 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13783 #ifdef LIBXML_HTML_ENABLED
13784 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13785 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13787 * When parsing in context, it makes no sense to add implied
13788 * elements like html/body/etc...
13790 options |= HTML_PARSE_NOIMPLIED;
13794 return(XML_ERR_INTERNAL_ERROR);
13797 return(XML_ERR_NO_MEMORY);
13800 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13801 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13802 * we must wait until the last moment to free the original one.
13804 if (doc->dict != NULL) {
13805 if (ctxt->dict != NULL)
13806 xmlDictFree(ctxt->dict);
13807 ctxt->dict = doc->dict;
13809 options |= XML_PARSE_NODICT;
13811 if (doc->encoding != NULL) {
13812 xmlCharEncodingHandlerPtr hdlr;
13814 if (ctxt->encoding != NULL)
13815 xmlFree((xmlChar *) ctxt->encoding);
13816 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13818 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13819 if (hdlr != NULL) {
13820 xmlSwitchToEncoding(ctxt, hdlr);
13822 return(XML_ERR_UNSUPPORTED_ENCODING);
13826 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13827 xmlDetectSAX2(ctxt);
13829 /* parsing in context, i.e. as within existing content */
13830 ctxt->instate = XML_PARSER_CONTENT;
13832 fake = xmlNewComment(NULL);
13833 if (fake == NULL) {
13834 xmlFreeParserCtxt(ctxt);
13835 return(XML_ERR_NO_MEMORY);
13837 xmlAddChild(node, fake);
13839 if (node->type == XML_ELEMENT_NODE) {
13840 nodePush(ctxt, node);
13842 * initialize the SAX2 namespaces stack
13845 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13846 xmlNsPtr ns = cur->nsDef;
13847 const xmlChar *iprefix, *ihref;
13849 while (ns != NULL) {
13851 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13852 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13854 iprefix = ns->prefix;
13858 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13859 nsPush(ctxt, iprefix, ihref);
13868 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13870 * ID/IDREF registration will be done in xmlValidateElement below
13872 ctxt->loadsubset |= XML_SKIP_IDS;
13875 #ifdef LIBXML_HTML_ENABLED
13876 if (doc->type == XML_HTML_DOCUMENT_NODE)
13877 __htmlParseContent(ctxt);
13880 xmlParseContent(ctxt);
13883 if ((RAW == '<') && (NXT(1) == '/')) {
13884 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13885 } else if (RAW != 0) {
13886 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13888 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13889 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13890 ctxt->wellFormed = 0;
13893 if (!ctxt->wellFormed) {
13894 if (ctxt->errNo == 0)
13895 ret = XML_ERR_INTERNAL_ERROR;
13897 ret = (xmlParserErrors)ctxt->errNo;
13903 * Return the newly created nodeset after unlinking it from
13904 * the pseudo sibling.
13917 while (cur != NULL) {
13918 cur->parent = NULL;
13922 xmlUnlinkNode(fake);
13926 if (ret != XML_ERR_OK) {
13927 xmlFreeNodeList(*lst);
13931 if (doc->dict != NULL)
13933 xmlFreeParserCtxt(ctxt);
13937 return(XML_ERR_INTERNAL_ERROR);
13941 #ifdef LIBXML_SAX1_ENABLED
13943 * xmlParseBalancedChunkMemoryRecover:
13944 * @doc: the document the chunk pertains to
13945 * @sax: the SAX handler bloc (possibly NULL)
13946 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13947 * @depth: Used for loop detection, use 0
13948 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13949 * @lst: the return value for the set of parsed nodes
13950 * @recover: return nodes even if the data is broken (use 0)
13953 * Parse a well-balanced chunk of an XML document
13954 * called by the parser
13955 * The allowed sequence for the Well Balanced Chunk is the one defined by
13956 * the content production in the XML grammar:
13958 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13960 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13961 * the parser error code otherwise
13963 * In case recover is set to 1, the nodelist will not be empty even if
13964 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13968 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13969 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13971 xmlParserCtxtPtr ctxt;
13973 xmlSAXHandlerPtr oldsax = NULL;
13974 xmlNodePtr content, newRoot;
13979 return(XML_ERR_ENTITY_LOOP);
13985 if (string == NULL)
13988 size = xmlStrlen(string);
13990 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13991 if (ctxt == NULL) return(-1);
13992 ctxt->userData = ctxt;
13994 oldsax = ctxt->sax;
13996 if (user_data != NULL)
13997 ctxt->userData = user_data;
13999 newDoc = xmlNewDoc(BAD_CAST "1.0");
14000 if (newDoc == NULL) {
14001 xmlFreeParserCtxt(ctxt);
14004 newDoc->properties = XML_DOC_INTERNAL;
14005 if ((doc != NULL) && (doc->dict != NULL)) {
14006 xmlDictFree(ctxt->dict);
14007 ctxt->dict = doc->dict;
14008 xmlDictReference(ctxt->dict);
14009 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
14010 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
14011 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
14012 ctxt->dictNames = 1;
14014 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
14017 newDoc->intSubset = doc->intSubset;
14018 newDoc->extSubset = doc->extSubset;
14020 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
14021 if (newRoot == NULL) {
14023 ctxt->sax = oldsax;
14024 xmlFreeParserCtxt(ctxt);
14025 newDoc->intSubset = NULL;
14026 newDoc->extSubset = NULL;
14027 xmlFreeDoc(newDoc);
14030 xmlAddChild((xmlNodePtr) newDoc, newRoot);
14031 nodePush(ctxt, newRoot);
14033 ctxt->myDoc = newDoc;
14035 ctxt->myDoc = newDoc;
14036 newDoc->children->doc = doc;
14037 /* Ensure that doc has XML spec namespace */
14038 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
14039 newDoc->oldNs = doc->oldNs;
14041 ctxt->instate = XML_PARSER_CONTENT;
14042 ctxt->depth = depth;
14045 * Doing validity checking on chunk doesn't make sense
14047 ctxt->validate = 0;
14048 ctxt->loadsubset = 0;
14049 xmlDetectSAX2(ctxt);
14051 if ( doc != NULL ){
14052 content = doc->children;
14053 doc->children = NULL;
14054 xmlParseContent(ctxt);
14055 doc->children = content;
14058 xmlParseContent(ctxt);
14060 if ((RAW == '<') && (NXT(1) == '/')) {
14061 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
14062 } else if (RAW != 0) {
14063 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
14065 if (ctxt->node != newDoc->children) {
14066 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
14069 if (!ctxt->wellFormed) {
14070 if (ctxt->errNo == 0)
14078 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
14082 * Return the newly created nodeset after unlinking it from
14083 * they pseudo parent.
14085 cur = newDoc->children->children;
14087 while (cur != NULL) {
14088 xmlSetTreeDoc(cur, doc);
14089 cur->parent = NULL;
14092 newDoc->children->children = NULL;
14096 ctxt->sax = oldsax;
14097 xmlFreeParserCtxt(ctxt);
14098 newDoc->intSubset = NULL;
14099 newDoc->extSubset = NULL;
14100 newDoc->oldNs = NULL;
14101 xmlFreeDoc(newDoc);
14107 * xmlSAXParseEntity:
14108 * @sax: the SAX handler block
14109 * @filename: the filename
14111 * parse an XML external entity out of context and build a tree.
14112 * It use the given SAX function block to handle the parsing callback.
14113 * If sax is NULL, fallback to the default DOM tree building routines.
14115 * [78] extParsedEnt ::= TextDecl? content
14117 * This correspond to a "Well Balanced" chunk
14119 * Returns the resulting document tree
14123 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
14125 xmlParserCtxtPtr ctxt;
14127 ctxt = xmlCreateFileParserCtxt(filename);
14128 if (ctxt == NULL) {
14132 if (ctxt->sax != NULL)
14133 xmlFree(ctxt->sax);
14135 ctxt->userData = NULL;
14138 xmlParseExtParsedEnt(ctxt);
14140 if (ctxt->wellFormed)
14144 xmlFreeDoc(ctxt->myDoc);
14145 ctxt->myDoc = NULL;
14149 xmlFreeParserCtxt(ctxt);
14156 * @filename: the filename
14158 * parse an XML external entity out of context and build a tree.
14160 * [78] extParsedEnt ::= TextDecl? content
14162 * This correspond to a "Well Balanced" chunk
14164 * Returns the resulting document tree
14168 xmlParseEntity(const char *filename) {
14169 return(xmlSAXParseEntity(NULL, filename));
14171 #endif /* LIBXML_SAX1_ENABLED */
14174 * xmlCreateEntityParserCtxtInternal:
14175 * @URL: the entity URL
14176 * @ID: the entity PUBLIC ID
14177 * @base: a possible base for the target URI
14178 * @pctx: parser context used to set options on new context
14180 * Create a parser context for an external entity
14181 * Automatic support for ZLIB/Compress compressed document is provided
14182 * by default if found at compile-time.
14184 * Returns the new parser context or NULL
14186 static xmlParserCtxtPtr
14187 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14188 const xmlChar *base, xmlParserCtxtPtr pctx) {
14189 xmlParserCtxtPtr ctxt;
14190 xmlParserInputPtr inputStream;
14191 char *directory = NULL;
14194 ctxt = xmlNewParserCtxt();
14195 if (ctxt == NULL) {
14199 if (pctx != NULL) {
14200 ctxt->options = pctx->options;
14201 ctxt->_private = pctx->_private;
14204 uri = xmlBuildURI(URL, base);
14207 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14208 if (inputStream == NULL) {
14209 xmlFreeParserCtxt(ctxt);
14213 inputPush(ctxt, inputStream);
14215 if ((ctxt->directory == NULL) && (directory == NULL))
14216 directory = xmlParserGetDirectory((char *)URL);
14217 if ((ctxt->directory == NULL) && (directory != NULL))
14218 ctxt->directory = directory;
14220 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14221 if (inputStream == NULL) {
14223 xmlFreeParserCtxt(ctxt);
14227 inputPush(ctxt, inputStream);
14229 if ((ctxt->directory == NULL) && (directory == NULL))
14230 directory = xmlParserGetDirectory((char *)uri);
14231 if ((ctxt->directory == NULL) && (directory != NULL))
14232 ctxt->directory = directory;
14239 * xmlCreateEntityParserCtxt:
14240 * @URL: the entity URL
14241 * @ID: the entity PUBLIC ID
14242 * @base: a possible base for the target URI
14244 * Create a parser context for an external entity
14245 * Automatic support for ZLIB/Compress compressed document is provided
14246 * by default if found at compile-time.
14248 * Returns the new parser context or NULL
14251 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14252 const xmlChar *base) {
14253 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14257 /************************************************************************
14259 * Front ends when parsing from a file *
14261 ************************************************************************/
14264 * xmlCreateURLParserCtxt:
14265 * @filename: the filename or URL
14266 * @options: a combination of xmlParserOption
14268 * Create a parser context for a file or URL content.
14269 * Automatic support for ZLIB/Compress compressed document is provided
14270 * by default if found at compile-time and for file accesses
14272 * Returns the new parser context or NULL
14275 xmlCreateURLParserCtxt(const char *filename, int options)
14277 xmlParserCtxtPtr ctxt;
14278 xmlParserInputPtr inputStream;
14279 char *directory = NULL;
14281 ctxt = xmlNewParserCtxt();
14282 if (ctxt == NULL) {
14283 xmlErrMemory(NULL, "cannot allocate parser context");
14288 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14289 ctxt->linenumbers = 1;
14291 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14292 if (inputStream == NULL) {
14293 xmlFreeParserCtxt(ctxt);
14297 inputPush(ctxt, inputStream);
14298 if ((ctxt->directory == NULL) && (directory == NULL))
14299 directory = xmlParserGetDirectory(filename);
14300 if ((ctxt->directory == NULL) && (directory != NULL))
14301 ctxt->directory = directory;
14307 * xmlCreateFileParserCtxt:
14308 * @filename: the filename
14310 * Create a parser context for a file content.
14311 * Automatic support for ZLIB/Compress compressed document is provided
14312 * by default if found at compile-time.
14314 * Returns the new parser context or NULL
14317 xmlCreateFileParserCtxt(const char *filename)
14319 return(xmlCreateURLParserCtxt(filename, 0));
14322 #ifdef LIBXML_SAX1_ENABLED
14324 * xmlSAXParseFileWithData:
14325 * @sax: the SAX handler block
14326 * @filename: the filename
14327 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14329 * @data: the userdata
14331 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14332 * compressed document is provided by default if found at compile-time.
14333 * It use the given SAX function block to handle the parsing callback.
14334 * If sax is NULL, fallback to the default DOM tree building routines.
14336 * User data (void *) is stored within the parser context in the
14337 * context's _private member, so it is available nearly everywhere in libxml
14339 * Returns the resulting document tree
14343 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14344 int recovery, void *data) {
14346 xmlParserCtxtPtr ctxt;
14350 ctxt = xmlCreateFileParserCtxt(filename);
14351 if (ctxt == NULL) {
14355 if (ctxt->sax != NULL)
14356 xmlFree(ctxt->sax);
14359 xmlDetectSAX2(ctxt);
14361 ctxt->_private = data;
14364 if (ctxt->directory == NULL)
14365 ctxt->directory = xmlParserGetDirectory(filename);
14367 ctxt->recovery = recovery;
14369 xmlParseDocument(ctxt);
14371 if ((ctxt->wellFormed) || recovery) {
14374 if (ctxt->input->buf->compressed > 0)
14375 ret->compression = 9;
14377 ret->compression = ctxt->input->buf->compressed;
14382 xmlFreeDoc(ctxt->myDoc);
14383 ctxt->myDoc = NULL;
14387 xmlFreeParserCtxt(ctxt);
14394 * @sax: the SAX handler block
14395 * @filename: the filename
14396 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14399 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14400 * compressed document is provided by default if found at compile-time.
14401 * It use the given SAX function block to handle the parsing callback.
14402 * If sax is NULL, fallback to the default DOM tree building routines.
14404 * Returns the resulting document tree
14408 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14410 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14415 * @cur: a pointer to an array of xmlChar
14417 * parse an XML in-memory document and build a tree.
14418 * In the case the document is not Well Formed, a attempt to build a
14419 * tree is tried anyway
14421 * Returns the resulting document tree or NULL in case of failure
14425 xmlRecoverDoc(const xmlChar *cur) {
14426 return(xmlSAXParseDoc(NULL, cur, 1));
14431 * @filename: the filename
14433 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14434 * compressed document is provided by default if found at compile-time.
14436 * Returns the resulting document tree if the file was wellformed,
14441 xmlParseFile(const char *filename) {
14442 return(xmlSAXParseFile(NULL, filename, 0));
14447 * @filename: the filename
14449 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14450 * compressed document is provided by default if found at compile-time.
14451 * In the case the document is not Well Formed, it attempts to build
14454 * Returns the resulting document tree or NULL in case of failure
14458 xmlRecoverFile(const char *filename) {
14459 return(xmlSAXParseFile(NULL, filename, 1));
14464 * xmlSetupParserForBuffer:
14465 * @ctxt: an XML parser context
14466 * @buffer: a xmlChar * buffer
14467 * @filename: a file name
14469 * Setup the parser context to parse a new buffer; Clears any prior
14470 * contents from the parser context. The buffer parameter must not be
14471 * NULL, but the filename parameter can be
14474 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14475 const char* filename)
14477 xmlParserInputPtr input;
14479 if ((ctxt == NULL) || (buffer == NULL))
14482 input = xmlNewInputStream(ctxt);
14483 if (input == NULL) {
14484 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14485 xmlClearParserCtxt(ctxt);
14489 xmlClearParserCtxt(ctxt);
14490 if (filename != NULL)
14491 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14492 input->base = buffer;
14493 input->cur = buffer;
14494 input->end = &buffer[xmlStrlen(buffer)];
14495 inputPush(ctxt, input);
14499 * xmlSAXUserParseFile:
14500 * @sax: a SAX handler
14501 * @user_data: The user data returned on SAX callbacks
14502 * @filename: a file name
14504 * parse an XML file and call the given SAX handler routines.
14505 * Automatic support for ZLIB/Compress compressed document is provided
14507 * Returns 0 in case of success or a error number otherwise
14510 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14511 const char *filename) {
14513 xmlParserCtxtPtr ctxt;
14515 ctxt = xmlCreateFileParserCtxt(filename);
14516 if (ctxt == NULL) return -1;
14517 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14518 xmlFree(ctxt->sax);
14520 xmlDetectSAX2(ctxt);
14522 if (user_data != NULL)
14523 ctxt->userData = user_data;
14525 xmlParseDocument(ctxt);
14527 if (ctxt->wellFormed)
14530 if (ctxt->errNo != 0)
14537 if (ctxt->myDoc != NULL) {
14538 xmlFreeDoc(ctxt->myDoc);
14539 ctxt->myDoc = NULL;
14541 xmlFreeParserCtxt(ctxt);
14545 #endif /* LIBXML_SAX1_ENABLED */
14547 /************************************************************************
14549 * Front ends when parsing from memory *
14551 ************************************************************************/
14554 * xmlCreateMemoryParserCtxt:
14555 * @buffer: a pointer to a char array
14556 * @size: the size of the array
14558 * Create a parser context for an XML in-memory document.
14560 * Returns the new parser context or NULL
14563 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14564 xmlParserCtxtPtr ctxt;
14565 xmlParserInputPtr input;
14566 xmlParserInputBufferPtr buf;
14568 if (buffer == NULL)
14573 ctxt = xmlNewParserCtxt();
14577 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14578 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14580 xmlFreeParserCtxt(ctxt);
14584 input = xmlNewInputStream(ctxt);
14585 if (input == NULL) {
14586 xmlFreeParserInputBuffer(buf);
14587 xmlFreeParserCtxt(ctxt);
14591 input->filename = NULL;
14593 xmlBufResetInput(input->buf->buffer, input);
14595 inputPush(ctxt, input);
14599 #ifdef LIBXML_SAX1_ENABLED
14601 * xmlSAXParseMemoryWithData:
14602 * @sax: the SAX handler block
14603 * @buffer: an pointer to a char array
14604 * @size: the size of the array
14605 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14607 * @data: the userdata
14609 * parse an XML in-memory block and use the given SAX function block
14610 * to handle the parsing callback. If sax is NULL, fallback to the default
14611 * DOM tree building routines.
14613 * User data (void *) is stored within the parser context in the
14614 * context's _private member, so it is available nearly everywhere in libxml
14616 * Returns the resulting document tree
14620 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14621 int size, int recovery, void *data) {
14623 xmlParserCtxtPtr ctxt;
14627 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14628 if (ctxt == NULL) return(NULL);
14630 if (ctxt->sax != NULL)
14631 xmlFree(ctxt->sax);
14634 xmlDetectSAX2(ctxt);
14636 ctxt->_private=data;
14639 ctxt->recovery = recovery;
14641 xmlParseDocument(ctxt);
14643 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14646 xmlFreeDoc(ctxt->myDoc);
14647 ctxt->myDoc = NULL;
14651 xmlFreeParserCtxt(ctxt);
14657 * xmlSAXParseMemory:
14658 * @sax: the SAX handler block
14659 * @buffer: an pointer to a char array
14660 * @size: the size of the array
14661 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14664 * parse an XML in-memory block and use the given SAX function block
14665 * to handle the parsing callback. If sax is NULL, fallback to the default
14666 * DOM tree building routines.
14668 * Returns the resulting document tree
14671 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14672 int size, int recovery) {
14673 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14678 * @buffer: an pointer to a char array
14679 * @size: the size of the array
14681 * parse an XML in-memory block and build a tree.
14683 * Returns the resulting document tree
14686 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14687 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14691 * xmlRecoverMemory:
14692 * @buffer: an pointer to a char array
14693 * @size: the size of the array
14695 * parse an XML in-memory block and build a tree.
14696 * In the case the document is not Well Formed, an attempt to
14697 * build a tree is tried anyway
14699 * Returns the resulting document tree or NULL in case of error
14702 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14703 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14707 * xmlSAXUserParseMemory:
14708 * @sax: a SAX handler
14709 * @user_data: The user data returned on SAX callbacks
14710 * @buffer: an in-memory XML document input
14711 * @size: the length of the XML document in bytes
14713 * A better SAX parsing routine.
14714 * parse an XML in-memory buffer and call the given SAX handler routines.
14716 * Returns 0 in case of success or a error number otherwise
14718 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14719 const char *buffer, int size) {
14721 xmlParserCtxtPtr ctxt;
14725 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14726 if (ctxt == NULL) return -1;
14727 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14728 xmlFree(ctxt->sax);
14730 xmlDetectSAX2(ctxt);
14732 if (user_data != NULL)
14733 ctxt->userData = user_data;
14735 xmlParseDocument(ctxt);
14737 if (ctxt->wellFormed)
14740 if (ctxt->errNo != 0)
14747 if (ctxt->myDoc != NULL) {
14748 xmlFreeDoc(ctxt->myDoc);
14749 ctxt->myDoc = NULL;
14751 xmlFreeParserCtxt(ctxt);
14755 #endif /* LIBXML_SAX1_ENABLED */
14758 * xmlCreateDocParserCtxt:
14759 * @cur: a pointer to an array of xmlChar
14761 * Creates a parser context for an XML in-memory document.
14763 * Returns the new parser context or NULL
14766 xmlCreateDocParserCtxt(const xmlChar *cur) {
14771 len = xmlStrlen(cur);
14772 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14775 #ifdef LIBXML_SAX1_ENABLED
14778 * @sax: the SAX handler block
14779 * @cur: a pointer to an array of xmlChar
14780 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14783 * parse an XML in-memory document and build a tree.
14784 * It use the given SAX function block to handle the parsing callback.
14785 * If sax is NULL, fallback to the default DOM tree building routines.
14787 * Returns the resulting document tree
14791 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14793 xmlParserCtxtPtr ctxt;
14794 xmlSAXHandlerPtr oldsax = NULL;
14796 if (cur == NULL) return(NULL);
14799 ctxt = xmlCreateDocParserCtxt(cur);
14800 if (ctxt == NULL) return(NULL);
14802 oldsax = ctxt->sax;
14804 ctxt->userData = NULL;
14806 xmlDetectSAX2(ctxt);
14808 xmlParseDocument(ctxt);
14809 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14812 xmlFreeDoc(ctxt->myDoc);
14813 ctxt->myDoc = NULL;
14816 ctxt->sax = oldsax;
14817 xmlFreeParserCtxt(ctxt);
14824 * @cur: a pointer to an array of xmlChar
14826 * parse an XML in-memory document and build a tree.
14828 * Returns the resulting document tree
14832 xmlParseDoc(const xmlChar *cur) {
14833 return(xmlSAXParseDoc(NULL, cur, 0));
14835 #endif /* LIBXML_SAX1_ENABLED */
14837 #ifdef LIBXML_LEGACY_ENABLED
14838 /************************************************************************
14840 * Specific function to keep track of entities references *
14841 * and used by the XSLT debugger *
14843 ************************************************************************/
14845 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14848 * xmlAddEntityReference:
14849 * @ent : A valid entity
14850 * @firstNode : A valid first node for children of entity
14851 * @lastNode : A valid last node of children entity
14853 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14856 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14857 xmlNodePtr lastNode)
14859 if (xmlEntityRefFunc != NULL) {
14860 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14866 * xmlSetEntityReferenceFunc:
14867 * @func: A valid function
14869 * Set the function to call call back when a xml reference has been made
14872 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14874 xmlEntityRefFunc = func;
14876 #endif /* LIBXML_LEGACY_ENABLED */
14878 /************************************************************************
14882 ************************************************************************/
14884 #ifdef LIBXML_XPATH_ENABLED
14885 #include <libxml/xpath.h>
14888 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14889 static int xmlParserInitialized = 0;
14894 * Initialization function for the XML parser.
14895 * This is not reentrant. Call once before processing in case of
14896 * use in multithreaded programs.
14900 xmlInitParser(void) {
14901 if (xmlParserInitialized != 0)
14904 #ifdef LIBXML_THREAD_ENABLED
14905 __xmlGlobalInitMutexLock();
14906 if (xmlParserInitialized == 0) {
14910 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14911 (xmlGenericError == NULL))
14912 initGenericErrorDefaultFunc(NULL);
14914 xmlInitializeDict();
14915 xmlInitCharEncodingHandlers();
14916 xmlDefaultSAXHandlerInit();
14917 xmlRegisterDefaultInputCallbacks();
14918 #ifdef LIBXML_OUTPUT_ENABLED
14919 xmlRegisterDefaultOutputCallbacks();
14920 #endif /* LIBXML_OUTPUT_ENABLED */
14921 #ifdef LIBXML_HTML_ENABLED
14922 htmlInitAutoClose();
14923 htmlDefaultSAXHandlerInit();
14925 #ifdef LIBXML_XPATH_ENABLED
14928 xmlParserInitialized = 1;
14929 #ifdef LIBXML_THREAD_ENABLED
14931 __xmlGlobalInitMutexUnlock();
14936 * xmlCleanupParser:
14938 * This function name is somewhat misleading. It does not clean up
14939 * parser state, it cleans up memory allocated by the library itself.
14940 * It is a cleanup function for the XML library. It tries to reclaim all
14941 * related global memory allocated for the library processing.
14942 * It doesn't deallocate any document related memory. One should
14943 * call xmlCleanupParser() only when the process has finished using
14944 * the library and all XML/HTML documents built with it.
14945 * See also xmlInitParser() which has the opposite function of preparing
14946 * the library for operations.
14948 * WARNING: if your application is multithreaded or has plugin support
14949 * calling this may crash the application if another thread or
14950 * a plugin is still using libxml2. It's sometimes very hard to
14951 * guess if libxml2 is in use in the application, some libraries
14952 * or plugins may use it without notice. In case of doubt abstain
14953 * from calling this function or do it just before calling exit()
14954 * to avoid leak reports from valgrind !
14958 xmlCleanupParser(void) {
14959 if (!xmlParserInitialized)
14962 xmlCleanupCharEncodingHandlers();
14963 #ifdef LIBXML_CATALOG_ENABLED
14964 xmlCatalogCleanup();
14967 xmlCleanupInputCallbacks();
14968 #ifdef LIBXML_OUTPUT_ENABLED
14969 xmlCleanupOutputCallbacks();
14971 #ifdef LIBXML_SCHEMAS_ENABLED
14972 xmlSchemaCleanupTypes();
14973 xmlRelaxNGCleanupTypes();
14975 xmlResetLastError();
14976 xmlCleanupGlobals();
14977 xmlCleanupThreads(); /* must be last if called not from the main thread */
14978 xmlCleanupMemory();
14979 xmlParserInitialized = 0;
14982 /************************************************************************
14984 * New set (2.6.0) of simpler and more flexible APIs *
14986 ************************************************************************/
14992 * Free a string if it is not owned by the "dict" dictionnary in the
14995 #define DICT_FREE(str) \
14996 if ((str) && ((!dict) || \
14997 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14998 xmlFree((char *)(str));
15002 * @ctxt: an XML parser context
15004 * Reset a parser context
15007 xmlCtxtReset(xmlParserCtxtPtr ctxt)
15009 xmlParserInputPtr input;
15017 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
15018 xmlFreeInputStream(input);
15021 ctxt->input = NULL;
15024 if (ctxt->spaceTab != NULL) {
15025 ctxt->spaceTab[0] = -1;
15026 ctxt->space = &ctxt->spaceTab[0];
15028 ctxt->space = NULL;
15038 DICT_FREE(ctxt->version);
15039 ctxt->version = NULL;
15040 DICT_FREE(ctxt->encoding);
15041 ctxt->encoding = NULL;
15042 DICT_FREE(ctxt->directory);
15043 ctxt->directory = NULL;
15044 DICT_FREE(ctxt->extSubURI);
15045 ctxt->extSubURI = NULL;
15046 DICT_FREE(ctxt->extSubSystem);
15047 ctxt->extSubSystem = NULL;
15048 if (ctxt->myDoc != NULL)
15049 xmlFreeDoc(ctxt->myDoc);
15050 ctxt->myDoc = NULL;
15052 ctxt->standalone = -1;
15053 ctxt->hasExternalSubset = 0;
15054 ctxt->hasPErefs = 0;
15056 ctxt->external = 0;
15057 ctxt->instate = XML_PARSER_START;
15060 ctxt->wellFormed = 1;
15061 ctxt->nsWellFormed = 1;
15062 ctxt->disableSAX = 0;
15065 ctxt->vctxt.userData = ctxt;
15066 ctxt->vctxt.error = xmlParserValidityError;
15067 ctxt->vctxt.warning = xmlParserValidityWarning;
15069 ctxt->record_info = 0;
15071 ctxt->checkIndex = 0;
15072 ctxt->inSubset = 0;
15073 ctxt->errNo = XML_ERR_OK;
15075 ctxt->charset = XML_CHAR_ENCODING_UTF8;
15076 ctxt->catalogs = NULL;
15077 ctxt->nbentities = 0;
15078 ctxt->sizeentities = 0;
15079 ctxt->sizeentcopy = 0;
15080 xmlInitNodeInfoSeq(&ctxt->node_seq);
15082 if (ctxt->attsDefault != NULL) {
15083 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
15084 ctxt->attsDefault = NULL;
15086 if (ctxt->attsSpecial != NULL) {
15087 xmlHashFree(ctxt->attsSpecial, NULL);
15088 ctxt->attsSpecial = NULL;
15091 #ifdef LIBXML_CATALOG_ENABLED
15092 if (ctxt->catalogs != NULL)
15093 xmlCatalogFreeLocal(ctxt->catalogs);
15095 if (ctxt->lastError.code != XML_ERR_OK)
15096 xmlResetError(&ctxt->lastError);
15100 * xmlCtxtResetPush:
15101 * @ctxt: an XML parser context
15102 * @chunk: a pointer to an array of chars
15103 * @size: number of chars in the array
15104 * @filename: an optional file name or URI
15105 * @encoding: the document encoding, or NULL
15107 * Reset a push parser context
15109 * Returns 0 in case of success and 1 in case of error
15112 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
15113 int size, const char *filename, const char *encoding)
15115 xmlParserInputPtr inputStream;
15116 xmlParserInputBufferPtr buf;
15117 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
15122 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
15123 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
15125 buf = xmlAllocParserInputBuffer(enc);
15129 if (ctxt == NULL) {
15130 xmlFreeParserInputBuffer(buf);
15134 xmlCtxtReset(ctxt);
15136 if (ctxt->pushTab == NULL) {
15137 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
15138 sizeof(xmlChar *));
15139 if (ctxt->pushTab == NULL) {
15140 xmlErrMemory(ctxt, NULL);
15141 xmlFreeParserInputBuffer(buf);
15146 if (filename == NULL) {
15147 ctxt->directory = NULL;
15149 ctxt->directory = xmlParserGetDirectory(filename);
15152 inputStream = xmlNewInputStream(ctxt);
15153 if (inputStream == NULL) {
15154 xmlFreeParserInputBuffer(buf);
15158 if (filename == NULL)
15159 inputStream->filename = NULL;
15161 inputStream->filename = (char *)
15162 xmlCanonicPath((const xmlChar *) filename);
15163 inputStream->buf = buf;
15164 xmlBufResetInput(buf->buffer, inputStream);
15166 inputPush(ctxt, inputStream);
15168 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15169 (ctxt->input->buf != NULL)) {
15170 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15171 size_t cur = ctxt->input->cur - ctxt->input->base;
15173 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15175 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
15177 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15181 if (encoding != NULL) {
15182 xmlCharEncodingHandlerPtr hdlr;
15184 if (ctxt->encoding != NULL)
15185 xmlFree((xmlChar *) ctxt->encoding);
15186 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15188 hdlr = xmlFindCharEncodingHandler(encoding);
15189 if (hdlr != NULL) {
15190 xmlSwitchToEncoding(ctxt, hdlr);
15192 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15193 "Unsupported encoding %s\n", BAD_CAST encoding);
15195 } else if (enc != XML_CHAR_ENCODING_NONE) {
15196 xmlSwitchEncoding(ctxt, enc);
15204 * xmlCtxtUseOptionsInternal:
15205 * @ctxt: an XML parser context
15206 * @options: a combination of xmlParserOption
15207 * @encoding: the user provided encoding to use
15209 * Applies the options to the parser context
15211 * Returns 0 in case of success, the set of unknown or unimplemented options
15212 * in case of error.
15215 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15219 if (encoding != NULL) {
15220 if (ctxt->encoding != NULL)
15221 xmlFree((xmlChar *) ctxt->encoding);
15222 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15224 if (options & XML_PARSE_RECOVER) {
15225 ctxt->recovery = 1;
15226 options -= XML_PARSE_RECOVER;
15227 ctxt->options |= XML_PARSE_RECOVER;
15229 ctxt->recovery = 0;
15230 if (options & XML_PARSE_DTDLOAD) {
15231 ctxt->loadsubset = XML_DETECT_IDS;
15232 options -= XML_PARSE_DTDLOAD;
15233 ctxt->options |= XML_PARSE_DTDLOAD;
15235 ctxt->loadsubset = 0;
15236 if (options & XML_PARSE_DTDATTR) {
15237 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15238 options -= XML_PARSE_DTDATTR;
15239 ctxt->options |= XML_PARSE_DTDATTR;
15241 if (options & XML_PARSE_NOENT) {
15242 ctxt->replaceEntities = 1;
15243 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15244 options -= XML_PARSE_NOENT;
15245 ctxt->options |= XML_PARSE_NOENT;
15247 ctxt->replaceEntities = 0;
15248 if (options & XML_PARSE_PEDANTIC) {
15249 ctxt->pedantic = 1;
15250 options -= XML_PARSE_PEDANTIC;
15251 ctxt->options |= XML_PARSE_PEDANTIC;
15253 ctxt->pedantic = 0;
15254 if (options & XML_PARSE_NOBLANKS) {
15255 ctxt->keepBlanks = 0;
15256 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15257 options -= XML_PARSE_NOBLANKS;
15258 ctxt->options |= XML_PARSE_NOBLANKS;
15260 ctxt->keepBlanks = 1;
15261 if (options & XML_PARSE_DTDVALID) {
15262 ctxt->validate = 1;
15263 if (options & XML_PARSE_NOWARNING)
15264 ctxt->vctxt.warning = NULL;
15265 if (options & XML_PARSE_NOERROR)
15266 ctxt->vctxt.error = NULL;
15267 options -= XML_PARSE_DTDVALID;
15268 ctxt->options |= XML_PARSE_DTDVALID;
15270 ctxt->validate = 0;
15271 if (options & XML_PARSE_NOWARNING) {
15272 ctxt->sax->warning = NULL;
15273 options -= XML_PARSE_NOWARNING;
15275 if (options & XML_PARSE_NOERROR) {
15276 ctxt->sax->error = NULL;
15277 ctxt->sax->fatalError = NULL;
15278 options -= XML_PARSE_NOERROR;
15280 #ifdef LIBXML_SAX1_ENABLED
15281 if (options & XML_PARSE_SAX1) {
15282 ctxt->sax->startElement = xmlSAX2StartElement;
15283 ctxt->sax->endElement = xmlSAX2EndElement;
15284 ctxt->sax->startElementNs = NULL;
15285 ctxt->sax->endElementNs = NULL;
15286 ctxt->sax->initialized = 1;
15287 options -= XML_PARSE_SAX1;
15288 ctxt->options |= XML_PARSE_SAX1;
15290 #endif /* LIBXML_SAX1_ENABLED */
15291 if (options & XML_PARSE_NODICT) {
15292 ctxt->dictNames = 0;
15293 options -= XML_PARSE_NODICT;
15294 ctxt->options |= XML_PARSE_NODICT;
15296 ctxt->dictNames = 1;
15298 if (options & XML_PARSE_NOCDATA) {
15299 ctxt->sax->cdataBlock = NULL;
15300 options -= XML_PARSE_NOCDATA;
15301 ctxt->options |= XML_PARSE_NOCDATA;
15303 if (options & XML_PARSE_NSCLEAN) {
15304 ctxt->options |= XML_PARSE_NSCLEAN;
15305 options -= XML_PARSE_NSCLEAN;
15307 if (options & XML_PARSE_NONET) {
15308 ctxt->options |= XML_PARSE_NONET;
15309 options -= XML_PARSE_NONET;
15311 if (options & XML_PARSE_COMPACT) {
15312 ctxt->options |= XML_PARSE_COMPACT;
15313 options -= XML_PARSE_COMPACT;
15315 if (options & XML_PARSE_OLD10) {
15316 ctxt->options |= XML_PARSE_OLD10;
15317 options -= XML_PARSE_OLD10;
15319 if (options & XML_PARSE_NOBASEFIX) {
15320 ctxt->options |= XML_PARSE_NOBASEFIX;
15321 options -= XML_PARSE_NOBASEFIX;
15323 if (options & XML_PARSE_HUGE) {
15324 ctxt->options |= XML_PARSE_HUGE;
15325 options -= XML_PARSE_HUGE;
15326 if (ctxt->dict != NULL)
15327 xmlDictSetLimit(ctxt->dict, 0);
15329 if (options & XML_PARSE_OLDSAX) {
15330 ctxt->options |= XML_PARSE_OLDSAX;
15331 options -= XML_PARSE_OLDSAX;
15333 if (options & XML_PARSE_IGNORE_ENC) {
15334 ctxt->options |= XML_PARSE_IGNORE_ENC;
15335 options -= XML_PARSE_IGNORE_ENC;
15337 if (options & XML_PARSE_BIG_LINES) {
15338 ctxt->options |= XML_PARSE_BIG_LINES;
15339 options -= XML_PARSE_BIG_LINES;
15341 ctxt->linenumbers = 1;
15346 * xmlCtxtUseOptions:
15347 * @ctxt: an XML parser context
15348 * @options: a combination of xmlParserOption
15350 * Applies the options to the parser context
15352 * Returns 0 in case of success, the set of unknown or unimplemented options
15353 * in case of error.
15356 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15358 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15363 * @ctxt: an XML parser context
15364 * @URL: the base URL to use for the document
15365 * @encoding: the document encoding, or NULL
15366 * @options: a combination of xmlParserOption
15367 * @reuse: keep the context for reuse
15369 * Common front-end for the xmlRead functions
15371 * Returns the resulting document tree or NULL
15374 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15375 int options, int reuse)
15379 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15380 if (encoding != NULL) {
15381 xmlCharEncodingHandlerPtr hdlr;
15383 hdlr = xmlFindCharEncodingHandler(encoding);
15385 xmlSwitchToEncoding(ctxt, hdlr);
15387 if ((URL != NULL) && (ctxt->input != NULL) &&
15388 (ctxt->input->filename == NULL))
15389 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15390 xmlParseDocument(ctxt);
15391 if ((ctxt->wellFormed) || ctxt->recovery)
15395 if (ctxt->myDoc != NULL) {
15396 xmlFreeDoc(ctxt->myDoc);
15399 ctxt->myDoc = NULL;
15401 xmlFreeParserCtxt(ctxt);
15409 * @cur: a pointer to a zero terminated string
15410 * @URL: the base URL to use for the document
15411 * @encoding: the document encoding, or NULL
15412 * @options: a combination of xmlParserOption
15414 * parse an XML in-memory document and build a tree.
15416 * Returns the resulting document tree
15419 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15421 xmlParserCtxtPtr ctxt;
15427 ctxt = xmlCreateDocParserCtxt(cur);
15430 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15435 * @filename: a file or URL
15436 * @encoding: the document encoding, or NULL
15437 * @options: a combination of xmlParserOption
15439 * parse an XML file from the filesystem or the network.
15441 * Returns the resulting document tree
15444 xmlReadFile(const char *filename, const char *encoding, int options)
15446 xmlParserCtxtPtr ctxt;
15449 ctxt = xmlCreateURLParserCtxt(filename, options);
15452 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15457 * @buffer: a pointer to a char array
15458 * @size: the size of the array
15459 * @URL: the base URL to use for the document
15460 * @encoding: the document encoding, or NULL
15461 * @options: a combination of xmlParserOption
15463 * parse an XML in-memory document and build a tree.
15465 * Returns the resulting document tree
15468 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15470 xmlParserCtxtPtr ctxt;
15473 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15476 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15481 * @fd: an open file descriptor
15482 * @URL: the base URL to use for the document
15483 * @encoding: the document encoding, or NULL
15484 * @options: a combination of xmlParserOption
15486 * parse an XML from a file descriptor and build a tree.
15487 * NOTE that the file descriptor will not be closed when the
15488 * reader is closed or reset.
15490 * Returns the resulting document tree
15493 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15495 xmlParserCtxtPtr ctxt;
15496 xmlParserInputBufferPtr input;
15497 xmlParserInputPtr stream;
15503 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15506 input->closecallback = NULL;
15507 ctxt = xmlNewParserCtxt();
15508 if (ctxt == NULL) {
15509 xmlFreeParserInputBuffer(input);
15512 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15513 if (stream == NULL) {
15514 xmlFreeParserInputBuffer(input);
15515 xmlFreeParserCtxt(ctxt);
15518 inputPush(ctxt, stream);
15519 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15524 * @ioread: an I/O read function
15525 * @ioclose: an I/O close function
15526 * @ioctx: an I/O handler
15527 * @URL: the base URL to use for the document
15528 * @encoding: the document encoding, or NULL
15529 * @options: a combination of xmlParserOption
15531 * parse an XML document from I/O functions and source and build a tree.
15533 * Returns the resulting document tree
15536 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15537 void *ioctx, const char *URL, const char *encoding, int options)
15539 xmlParserCtxtPtr ctxt;
15540 xmlParserInputBufferPtr input;
15541 xmlParserInputPtr stream;
15543 if (ioread == NULL)
15547 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15548 XML_CHAR_ENCODING_NONE);
15549 if (input == NULL) {
15550 if (ioclose != NULL)
15554 ctxt = xmlNewParserCtxt();
15555 if (ctxt == NULL) {
15556 xmlFreeParserInputBuffer(input);
15559 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15560 if (stream == NULL) {
15561 xmlFreeParserInputBuffer(input);
15562 xmlFreeParserCtxt(ctxt);
15565 inputPush(ctxt, stream);
15566 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15571 * @ctxt: an XML parser context
15572 * @cur: a pointer to a zero terminated string
15573 * @URL: the base URL to use for the document
15574 * @encoding: the document encoding, or NULL
15575 * @options: a combination of xmlParserOption
15577 * parse an XML in-memory document and build a tree.
15578 * This reuses the existing @ctxt parser context
15580 * Returns the resulting document tree
15583 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15584 const char *URL, const char *encoding, int options)
15586 xmlParserInputPtr stream;
15594 xmlCtxtReset(ctxt);
15596 stream = xmlNewStringInputStream(ctxt, cur);
15597 if (stream == NULL) {
15600 inputPush(ctxt, stream);
15601 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15606 * @ctxt: an XML parser context
15607 * @filename: a file or URL
15608 * @encoding: the document encoding, or NULL
15609 * @options: a combination of xmlParserOption
15611 * parse an XML file from the filesystem or the network.
15612 * This reuses the existing @ctxt parser context
15614 * Returns the resulting document tree
15617 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15618 const char *encoding, int options)
15620 xmlParserInputPtr stream;
15622 if (filename == NULL)
15628 xmlCtxtReset(ctxt);
15630 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15631 if (stream == NULL) {
15634 inputPush(ctxt, stream);
15635 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15639 * xmlCtxtReadMemory:
15640 * @ctxt: an XML parser context
15641 * @buffer: a pointer to a char array
15642 * @size: the size of the array
15643 * @URL: the base URL to use for the document
15644 * @encoding: the document encoding, or NULL
15645 * @options: a combination of xmlParserOption
15647 * parse an XML in-memory document and build a tree.
15648 * This reuses the existing @ctxt parser context
15650 * Returns the resulting document tree
15653 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15654 const char *URL, const char *encoding, int options)
15656 xmlParserInputBufferPtr input;
15657 xmlParserInputPtr stream;
15661 if (buffer == NULL)
15665 xmlCtxtReset(ctxt);
15667 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15668 if (input == NULL) {
15672 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15673 if (stream == NULL) {
15674 xmlFreeParserInputBuffer(input);
15678 inputPush(ctxt, stream);
15679 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15684 * @ctxt: an XML parser context
15685 * @fd: an open file descriptor
15686 * @URL: the base URL to use for the document
15687 * @encoding: the document encoding, or NULL
15688 * @options: a combination of xmlParserOption
15690 * parse an XML from a file descriptor and build a tree.
15691 * This reuses the existing @ctxt parser context
15692 * NOTE that the file descriptor will not be closed when the
15693 * reader is closed or reset.
15695 * Returns the resulting document tree
15698 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15699 const char *URL, const char *encoding, int options)
15701 xmlParserInputBufferPtr input;
15702 xmlParserInputPtr stream;
15710 xmlCtxtReset(ctxt);
15713 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15716 input->closecallback = NULL;
15717 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15718 if (stream == NULL) {
15719 xmlFreeParserInputBuffer(input);
15722 inputPush(ctxt, stream);
15723 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15728 * @ctxt: an XML parser context
15729 * @ioread: an I/O read function
15730 * @ioclose: an I/O close function
15731 * @ioctx: an I/O handler
15732 * @URL: the base URL to use for the document
15733 * @encoding: the document encoding, or NULL
15734 * @options: a combination of xmlParserOption
15736 * parse an XML document from I/O functions and source and build a tree.
15737 * This reuses the existing @ctxt parser context
15739 * Returns the resulting document tree
15742 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15743 xmlInputCloseCallback ioclose, void *ioctx,
15745 const char *encoding, int options)
15747 xmlParserInputBufferPtr input;
15748 xmlParserInputPtr stream;
15750 if (ioread == NULL)
15756 xmlCtxtReset(ctxt);
15758 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15759 XML_CHAR_ENCODING_NONE);
15760 if (input == NULL) {
15761 if (ioclose != NULL)
15765 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15766 if (stream == NULL) {
15767 xmlFreeParserInputBuffer(input);
15770 inputPush(ctxt, stream);
15771 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15774 #define bottom_parser
15775 #include "elfgcchack.h"