2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
28 * See Copyright for the status of this software.
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
39 #define XML_DIR_SEP '/'
46 #include <libxml/xmlmemory.h>
47 #include <libxml/threads.h>
48 #include <libxml/globals.h>
49 #include <libxml/tree.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/valid.h>
53 #include <libxml/entities.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/encoding.h>
56 #include <libxml/xmlIO.h>
57 #include <libxml/uri.h>
58 #ifdef LIBXML_CATALOG_ENABLED
59 #include <libxml/catalog.h>
61 #ifdef LIBXML_SCHEMAS_ENABLED
62 #include <libxml/xmlschemastypes.h>
63 #include <libxml/relaxng.h>
71 #ifdef HAVE_SYS_STAT_H
91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
93 static xmlParserCtxtPtr
94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
97 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
99 /************************************************************************
101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
103 ************************************************************************/
105 #define XML_PARSER_BIG_ENTITY 1000
106 #define XML_PARSER_LOT_ENTITY 5000
109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110 * replacement over the size in byte of the input indicates that you have
111 * and eponential behaviour. A value of 10 correspond to at least 3 entity
112 * replacement per byte of input.
114 #define XML_PARSER_NON_LINEAR 10
117 * xmlParserEntityCheck
119 * Function to check non-linear entity expansion behaviour
120 * This is here to detect and stop exponential linear entity expansion
121 * This is not a limitation of the parser but a safety
122 * boundary feature. It can be disabled with the XML_PARSE_HUGE
126 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
127 xmlEntityPtr ent, size_t replacement)
131 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
133 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
137 * This may look absurd but is needed to detect
140 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
141 (ent->content != NULL) && (ent->checked == 0) &&
142 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
143 unsigned long oldnbent = ctxt->nbentities;
149 rep = xmlStringDecodeEntities(ctxt, ent->content,
150 XML_SUBSTITUTE_REF, 0, 0, 0);
152 if (ctxt->errNo == XML_ERR_ENTITY_LOOP) {
156 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
158 if (xmlStrchr(rep, '<'))
164 if (replacement != 0) {
165 if (replacement < XML_MAX_TEXT_LENGTH)
169 * If the volume of entity copy reaches 10 times the
170 * amount of parsed data and over the large text threshold
171 * then that's very likely to be an abuse.
173 if (ctxt->input != NULL) {
174 consumed = ctxt->input->consumed +
175 (ctxt->input->cur - ctxt->input->base);
177 consumed += ctxt->sizeentities;
179 if (replacement < XML_PARSER_NON_LINEAR * consumed)
181 } else if (size != 0) {
183 * Do the check based on the replacement size of the entity
185 if (size < XML_PARSER_BIG_ENTITY)
189 * A limit on the amount of text data reasonably used
191 if (ctxt->input != NULL) {
192 consumed = ctxt->input->consumed +
193 (ctxt->input->cur - ctxt->input->base);
195 consumed += ctxt->sizeentities;
197 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
198 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
200 } else if (ent != NULL) {
202 * use the number of parsed entities in the replacement
204 size = ent->checked / 2;
207 * The amount of data parsed counting entities size only once
209 if (ctxt->input != NULL) {
210 consumed = ctxt->input->consumed +
211 (ctxt->input->cur - ctxt->input->base);
213 consumed += ctxt->sizeentities;
216 * Check the density of entities for the amount of data
217 * knowing an entity reference will take at least 3 bytes
219 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
223 * strange we got no data for checking
225 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
226 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
227 (ctxt->nbentities <= 10000))
230 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
237 * arbitrary depth limit for the XML documents that we allow to
238 * process. This is not a limitation of the parser but a safety
239 * boundary feature. It can be disabled with the XML_PARSE_HUGE
242 unsigned int xmlParserMaxDepth = 256;
247 #define XML_PARSER_BIG_BUFFER_SIZE 300
248 #define XML_PARSER_BUFFER_SIZE 100
249 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
252 * XML_PARSER_CHUNK_SIZE
254 * When calling GROW that's the minimal amount of data
255 * the parser expected to have received. It is not a hard
256 * limit but an optimization when reading strings like Names
257 * It is not strictly needed as long as inputs available characters
258 * are followed by 0, which should be provided by the I/O level
260 #define XML_PARSER_CHUNK_SIZE 100
263 * List of XML prefixed PI allowed by W3C specs
266 static const char *xmlW3CPIs[] = {
273 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
274 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
275 const xmlChar **str);
277 static xmlParserErrors
278 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
279 xmlSAXHandlerPtr sax,
280 void *user_data, int depth, const xmlChar *URL,
281 const xmlChar *ID, xmlNodePtr *list);
284 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
285 const char *encoding);
286 #ifdef LIBXML_LEGACY_ENABLED
288 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
289 xmlNodePtr lastNode);
290 #endif /* LIBXML_LEGACY_ENABLED */
292 static xmlParserErrors
293 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
294 const xmlChar *string, void *user_data, xmlNodePtr *lst);
297 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
299 /************************************************************************
301 * Some factorized error routines *
303 ************************************************************************/
306 * xmlErrAttributeDup:
307 * @ctxt: an XML parser context
308 * @prefix: the attribute prefix
309 * @localname: the attribute localname
311 * Handle a redefinition of attribute error
314 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
315 const xmlChar * localname)
317 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
318 (ctxt->instate == XML_PARSER_EOF))
321 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
324 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
325 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
326 (const char *) localname, NULL, NULL, 0, 0,
327 "Attribute %s redefined\n", localname);
329 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
330 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
331 (const char *) prefix, (const char *) localname,
332 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
335 ctxt->wellFormed = 0;
336 if (ctxt->recovery == 0)
337 ctxt->disableSAX = 1;
343 * @ctxt: an XML parser context
344 * @error: the error number
345 * @extra: extra information string
347 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
350 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
354 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
355 (ctxt->instate == XML_PARSER_EOF))
358 case XML_ERR_INVALID_HEX_CHARREF:
359 errmsg = "CharRef: invalid hexadecimal value";
361 case XML_ERR_INVALID_DEC_CHARREF:
362 errmsg = "CharRef: invalid decimal value";
364 case XML_ERR_INVALID_CHARREF:
365 errmsg = "CharRef: invalid value";
367 case XML_ERR_INTERNAL_ERROR:
368 errmsg = "internal error";
370 case XML_ERR_PEREF_AT_EOF:
371 errmsg = "PEReference at end of document";
373 case XML_ERR_PEREF_IN_PROLOG:
374 errmsg = "PEReference in prolog";
376 case XML_ERR_PEREF_IN_EPILOG:
377 errmsg = "PEReference in epilog";
379 case XML_ERR_PEREF_NO_NAME:
380 errmsg = "PEReference: no name";
382 case XML_ERR_PEREF_SEMICOL_MISSING:
383 errmsg = "PEReference: expecting ';'";
385 case XML_ERR_ENTITY_LOOP:
386 errmsg = "Detected an entity reference loop";
388 case XML_ERR_ENTITY_NOT_STARTED:
389 errmsg = "EntityValue: \" or ' expected";
391 case XML_ERR_ENTITY_PE_INTERNAL:
392 errmsg = "PEReferences forbidden in internal subset";
394 case XML_ERR_ENTITY_NOT_FINISHED:
395 errmsg = "EntityValue: \" or ' expected";
397 case XML_ERR_ATTRIBUTE_NOT_STARTED:
398 errmsg = "AttValue: \" or ' expected";
400 case XML_ERR_LT_IN_ATTRIBUTE:
401 errmsg = "Unescaped '<' not allowed in attributes values";
403 case XML_ERR_LITERAL_NOT_STARTED:
404 errmsg = "SystemLiteral \" or ' expected";
406 case XML_ERR_LITERAL_NOT_FINISHED:
407 errmsg = "Unfinished System or Public ID \" or ' expected";
409 case XML_ERR_MISPLACED_CDATA_END:
410 errmsg = "Sequence ']]>' not allowed in content";
412 case XML_ERR_URI_REQUIRED:
413 errmsg = "SYSTEM or PUBLIC, the URI is missing";
415 case XML_ERR_PUBID_REQUIRED:
416 errmsg = "PUBLIC, the Public Identifier is missing";
418 case XML_ERR_HYPHEN_IN_COMMENT:
419 errmsg = "Comment must not contain '--' (double-hyphen)";
421 case XML_ERR_PI_NOT_STARTED:
422 errmsg = "xmlParsePI : no target name";
424 case XML_ERR_RESERVED_XML_NAME:
425 errmsg = "Invalid PI name";
427 case XML_ERR_NOTATION_NOT_STARTED:
428 errmsg = "NOTATION: Name expected here";
430 case XML_ERR_NOTATION_NOT_FINISHED:
431 errmsg = "'>' required to close NOTATION declaration";
433 case XML_ERR_VALUE_REQUIRED:
434 errmsg = "Entity value required";
436 case XML_ERR_URI_FRAGMENT:
437 errmsg = "Fragment not allowed";
439 case XML_ERR_ATTLIST_NOT_STARTED:
440 errmsg = "'(' required to start ATTLIST enumeration";
442 case XML_ERR_NMTOKEN_REQUIRED:
443 errmsg = "NmToken expected in ATTLIST enumeration";
445 case XML_ERR_ATTLIST_NOT_FINISHED:
446 errmsg = "')' required to finish ATTLIST enumeration";
448 case XML_ERR_MIXED_NOT_STARTED:
449 errmsg = "MixedContentDecl : '|' or ')*' expected";
451 case XML_ERR_PCDATA_REQUIRED:
452 errmsg = "MixedContentDecl : '#PCDATA' expected";
454 case XML_ERR_ELEMCONTENT_NOT_STARTED:
455 errmsg = "ContentDecl : Name or '(' expected";
457 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
458 errmsg = "ContentDecl : ',' '|' or ')' expected";
460 case XML_ERR_PEREF_IN_INT_SUBSET:
462 "PEReference: forbidden within markup decl in internal subset";
464 case XML_ERR_GT_REQUIRED:
465 errmsg = "expected '>'";
467 case XML_ERR_CONDSEC_INVALID:
468 errmsg = "XML conditional section '[' expected";
470 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
471 errmsg = "Content error in the external subset";
473 case XML_ERR_CONDSEC_INVALID_KEYWORD:
475 "conditional section INCLUDE or IGNORE keyword expected";
477 case XML_ERR_CONDSEC_NOT_FINISHED:
478 errmsg = "XML conditional section not closed";
480 case XML_ERR_XMLDECL_NOT_STARTED:
481 errmsg = "Text declaration '<?xml' required";
483 case XML_ERR_XMLDECL_NOT_FINISHED:
484 errmsg = "parsing XML declaration: '?>' expected";
486 case XML_ERR_EXT_ENTITY_STANDALONE:
487 errmsg = "external parsed entities cannot be standalone";
489 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
490 errmsg = "EntityRef: expecting ';'";
492 case XML_ERR_DOCTYPE_NOT_FINISHED:
493 errmsg = "DOCTYPE improperly terminated";
495 case XML_ERR_LTSLASH_REQUIRED:
496 errmsg = "EndTag: '</' not found";
498 case XML_ERR_EQUAL_REQUIRED:
499 errmsg = "expected '='";
501 case XML_ERR_STRING_NOT_CLOSED:
502 errmsg = "String not closed expecting \" or '";
504 case XML_ERR_STRING_NOT_STARTED:
505 errmsg = "String not started expecting ' or \"";
507 case XML_ERR_ENCODING_NAME:
508 errmsg = "Invalid XML encoding name";
510 case XML_ERR_STANDALONE_VALUE:
511 errmsg = "standalone accepts only 'yes' or 'no'";
513 case XML_ERR_DOCUMENT_EMPTY:
514 errmsg = "Document is empty";
516 case XML_ERR_DOCUMENT_END:
517 errmsg = "Extra content at the end of the document";
519 case XML_ERR_NOT_WELL_BALANCED:
520 errmsg = "chunk is not well balanced";
522 case XML_ERR_EXTRA_CONTENT:
523 errmsg = "extra content at the end of well balanced chunk";
525 case XML_ERR_VERSION_MISSING:
526 errmsg = "Malformed declaration expecting version";
528 case XML_ERR_NAME_TOO_LONG:
529 errmsg = "Name too long use XML_PARSE_HUGE option";
537 errmsg = "Unregistered error message";
542 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
543 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
547 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
565 static void LIBXML_ATTR_FORMAT(3,0)
566 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
569 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
570 (ctxt->instate == XML_PARSER_EOF))
574 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
575 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
577 ctxt->wellFormed = 0;
578 if (ctxt->recovery == 0)
579 ctxt->disableSAX = 1;
585 * @ctxt: an XML parser context
586 * @error: the error number
587 * @msg: the error message
593 static void LIBXML_ATTR_FORMAT(3,0)
594 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595 const char *msg, const xmlChar *str1, const xmlChar *str2)
597 xmlStructuredErrorFunc schannel = NULL;
599 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
600 (ctxt->instate == XML_PARSER_EOF))
602 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
603 (ctxt->sax->initialized == XML_SAX2_MAGIC))
604 schannel = ctxt->sax->serror;
606 __xmlRaiseError(schannel,
607 (ctxt->sax) ? ctxt->sax->warning : NULL,
609 ctxt, NULL, XML_FROM_PARSER, error,
610 XML_ERR_WARNING, NULL, 0,
611 (const char *) str1, (const char *) str2, NULL, 0, 0,
612 msg, (const char *) str1, (const char *) str2);
614 __xmlRaiseError(schannel, NULL, NULL,
615 ctxt, NULL, XML_FROM_PARSER, error,
616 XML_ERR_WARNING, NULL, 0,
617 (const char *) str1, (const char *) str2, NULL, 0, 0,
618 msg, (const char *) str1, (const char *) str2);
624 * @ctxt: an XML parser context
625 * @error: the error number
626 * @msg: the error message
629 * Handle a validity error.
631 static void LIBXML_ATTR_FORMAT(3,0)
632 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
633 const char *msg, const xmlChar *str1, const xmlChar *str2)
635 xmlStructuredErrorFunc schannel = NULL;
637 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
638 (ctxt->instate == XML_PARSER_EOF))
642 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
643 schannel = ctxt->sax->serror;
646 __xmlRaiseError(schannel,
647 ctxt->vctxt.error, ctxt->vctxt.userData,
648 ctxt, NULL, XML_FROM_DTD, error,
649 XML_ERR_ERROR, NULL, 0, (const char *) str1,
650 (const char *) str2, NULL, 0, 0,
651 msg, (const char *) str1, (const char *) str2);
654 __xmlRaiseError(schannel, NULL, NULL,
655 ctxt, NULL, XML_FROM_DTD, error,
656 XML_ERR_ERROR, NULL, 0, (const char *) str1,
657 (const char *) str2, NULL, 0, 0,
658 msg, (const char *) str1, (const char *) str2);
664 * @ctxt: an XML parser context
665 * @error: the error number
666 * @msg: the error message
667 * @val: an integer value
669 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
671 static void LIBXML_ATTR_FORMAT(3,0)
672 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
673 const char *msg, int val)
675 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
676 (ctxt->instate == XML_PARSER_EOF))
680 __xmlRaiseError(NULL, NULL, NULL,
681 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
682 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
684 ctxt->wellFormed = 0;
685 if (ctxt->recovery == 0)
686 ctxt->disableSAX = 1;
691 * xmlFatalErrMsgStrIntStr:
692 * @ctxt: an XML parser context
693 * @error: the error number
694 * @msg: the error message
695 * @str1: an string info
696 * @val: an integer value
697 * @str2: an string info
699 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
701 static void LIBXML_ATTR_FORMAT(3,0)
702 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
703 const char *msg, const xmlChar *str1, int val,
706 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
707 (ctxt->instate == XML_PARSER_EOF))
711 __xmlRaiseError(NULL, NULL, NULL,
712 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
713 NULL, 0, (const char *) str1, (const char *) str2,
714 NULL, val, 0, msg, str1, val, str2);
716 ctxt->wellFormed = 0;
717 if (ctxt->recovery == 0)
718 ctxt->disableSAX = 1;
724 * @ctxt: an XML parser context
725 * @error: the error number
726 * @msg: the error message
727 * @val: a string value
729 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
731 static void LIBXML_ATTR_FORMAT(3,0)
732 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
733 const char *msg, const xmlChar * val)
735 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
736 (ctxt->instate == XML_PARSER_EOF))
740 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
741 XML_FROM_PARSER, error, XML_ERR_FATAL,
742 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
745 ctxt->wellFormed = 0;
746 if (ctxt->recovery == 0)
747 ctxt->disableSAX = 1;
753 * @ctxt: an XML parser context
754 * @error: the error number
755 * @msg: the error message
756 * @val: a string value
758 * Handle a non fatal parser error
760 static void LIBXML_ATTR_FORMAT(3,0)
761 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
762 const char *msg, const xmlChar * val)
764 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
765 (ctxt->instate == XML_PARSER_EOF))
769 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
770 XML_FROM_PARSER, error, XML_ERR_ERROR,
771 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
777 * @ctxt: an XML parser context
778 * @error: the error number
780 * @info1: extra information string
781 * @info2: extra information string
783 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
785 static void LIBXML_ATTR_FORMAT(3,0)
786 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
788 const xmlChar * info1, const xmlChar * info2,
789 const xmlChar * info3)
791 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
792 (ctxt->instate == XML_PARSER_EOF))
796 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
797 XML_ERR_ERROR, NULL, 0, (const char *) info1,
798 (const char *) info2, (const char *) info3, 0, 0, msg,
799 info1, info2, info3);
801 ctxt->nsWellFormed = 0;
806 * @ctxt: an XML parser context
807 * @error: the error number
809 * @info1: extra information string
810 * @info2: extra information string
812 * Handle a namespace warning error
814 static void LIBXML_ATTR_FORMAT(3,0)
815 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
817 const xmlChar * info1, const xmlChar * info2,
818 const xmlChar * info3)
820 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
821 (ctxt->instate == XML_PARSER_EOF))
823 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
824 XML_ERR_WARNING, NULL, 0, (const char *) info1,
825 (const char *) info2, (const char *) info3, 0, 0, msg,
826 info1, info2, info3);
829 /************************************************************************
831 * Library wide options *
833 ************************************************************************/
837 * @feature: the feature to be examined
839 * Examines if the library has been compiled with a given feature.
841 * Returns a non-zero value if the feature exist, otherwise zero.
842 * Returns zero (0) if the feature does not exist or an unknown
843 * unknown feature is requested, non-zero otherwise.
846 xmlHasFeature(xmlFeature feature)
849 case XML_WITH_THREAD:
850 #ifdef LIBXML_THREAD_ENABLED
856 #ifdef LIBXML_TREE_ENABLED
861 case XML_WITH_OUTPUT:
862 #ifdef LIBXML_OUTPUT_ENABLED
868 #ifdef LIBXML_PUSH_ENABLED
873 case XML_WITH_READER:
874 #ifdef LIBXML_READER_ENABLED
879 case XML_WITH_PATTERN:
880 #ifdef LIBXML_PATTERN_ENABLED
885 case XML_WITH_WRITER:
886 #ifdef LIBXML_WRITER_ENABLED
892 #ifdef LIBXML_SAX1_ENABLED
898 #ifdef LIBXML_FTP_ENABLED
904 #ifdef LIBXML_HTTP_ENABLED
910 #ifdef LIBXML_VALID_ENABLED
916 #ifdef LIBXML_HTML_ENABLED
921 case XML_WITH_LEGACY:
922 #ifdef LIBXML_LEGACY_ENABLED
928 #ifdef LIBXML_C14N_ENABLED
933 case XML_WITH_CATALOG:
934 #ifdef LIBXML_CATALOG_ENABLED
940 #ifdef LIBXML_XPATH_ENABLED
946 #ifdef LIBXML_XPTR_ENABLED
951 case XML_WITH_XINCLUDE:
952 #ifdef LIBXML_XINCLUDE_ENABLED
958 #ifdef LIBXML_ICONV_ENABLED
963 case XML_WITH_ISO8859X:
964 #ifdef LIBXML_ISO8859X_ENABLED
969 case XML_WITH_UNICODE:
970 #ifdef LIBXML_UNICODE_ENABLED
975 case XML_WITH_REGEXP:
976 #ifdef LIBXML_REGEXP_ENABLED
981 case XML_WITH_AUTOMATA:
982 #ifdef LIBXML_AUTOMATA_ENABLED
988 #ifdef LIBXML_EXPR_ENABLED
993 case XML_WITH_SCHEMAS:
994 #ifdef LIBXML_SCHEMAS_ENABLED
999 case XML_WITH_SCHEMATRON:
1000 #ifdef LIBXML_SCHEMATRON_ENABLED
1005 case XML_WITH_MODULES:
1006 #ifdef LIBXML_MODULES_ENABLED
1011 case XML_WITH_DEBUG:
1012 #ifdef LIBXML_DEBUG_ENABLED
1017 case XML_WITH_DEBUG_MEM:
1018 #ifdef DEBUG_MEMORY_LOCATION
1023 case XML_WITH_DEBUG_RUN:
1024 #ifdef LIBXML_DEBUG_RUNTIME
1030 #ifdef LIBXML_ZLIB_ENABLED
1036 #ifdef LIBXML_LZMA_ENABLED
1042 #ifdef LIBXML_ICU_ENABLED
1053 /************************************************************************
1055 * SAX2 defaulted attributes handling *
1057 ************************************************************************/
1061 * @ctxt: an XML parser context
1063 * Do the SAX2 detection and specific intialization
1066 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1067 if (ctxt == NULL) return;
1068 #ifdef LIBXML_SAX1_ENABLED
1069 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1070 ((ctxt->sax->startElementNs != NULL) ||
1071 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1074 #endif /* LIBXML_SAX1_ENABLED */
1076 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1077 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1078 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1079 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1080 (ctxt->str_xml_ns == NULL)) {
1081 xmlErrMemory(ctxt, NULL);
1085 typedef struct _xmlDefAttrs xmlDefAttrs;
1086 typedef xmlDefAttrs *xmlDefAttrsPtr;
1087 struct _xmlDefAttrs {
1088 int nbAttrs; /* number of defaulted attributes on that element */
1089 int maxAttrs; /* the size of the array */
1090 const xmlChar *values[5]; /* array of localname/prefix/values/external */
1094 * xmlAttrNormalizeSpace:
1095 * @src: the source string
1096 * @dst: the target string
1098 * Normalize the space in non CDATA attribute values:
1099 * If the attribute type is not CDATA, then the XML processor MUST further
1100 * process the normalized attribute value by discarding any leading and
1101 * trailing space (#x20) characters, and by replacing sequences of space
1102 * (#x20) characters by a single space (#x20) character.
1103 * Note that the size of dst need to be at least src, and if one doesn't need
1104 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1105 * passing src as dst is just fine.
1107 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1111 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1113 if ((src == NULL) || (dst == NULL))
1116 while (*src == 0x20) src++;
1119 while (*src == 0x20) src++;
1133 * xmlAttrNormalizeSpace2:
1134 * @src: the source string
1136 * Normalize the space in non CDATA attribute values, a slightly more complex
1137 * front end to avoid allocation problems when running on attribute values
1138 * coming from the input.
1140 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1143 static const xmlChar *
1144 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1147 int remove_head = 0;
1148 int need_realloc = 0;
1151 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1158 while (*cur == 0x20) {
1165 if ((*cur == 0x20) || (*cur == 0)) {
1175 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1177 xmlErrMemory(ctxt, NULL);
1180 xmlAttrNormalizeSpace(ret, ret);
1181 *len = (int) strlen((const char *)ret);
1183 } else if (remove_head) {
1184 *len -= remove_head;
1185 memmove(src, src + remove_head, 1 + *len);
1193 * @ctxt: an XML parser context
1194 * @fullname: the element fullname
1195 * @fullattr: the attribute fullname
1196 * @value: the attribute value
1198 * Add a defaulted attribute for an element
1201 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1202 const xmlChar *fullname,
1203 const xmlChar *fullattr,
1204 const xmlChar *value) {
1205 xmlDefAttrsPtr defaults;
1207 const xmlChar *name;
1208 const xmlChar *prefix;
1211 * Allows to detect attribute redefinitions
1213 if (ctxt->attsSpecial != NULL) {
1214 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1218 if (ctxt->attsDefault == NULL) {
1219 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1220 if (ctxt->attsDefault == NULL)
1225 * split the element name into prefix:localname , the string found
1226 * are within the DTD and then not associated to namespace names.
1228 name = xmlSplitQName3(fullname, &len);
1230 name = xmlDictLookup(ctxt->dict, fullname, -1);
1233 name = xmlDictLookup(ctxt->dict, name, -1);
1234 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1238 * make sure there is some storage
1240 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1241 if (defaults == NULL) {
1242 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1243 (4 * 5) * sizeof(const xmlChar *));
1244 if (defaults == NULL)
1246 defaults->nbAttrs = 0;
1247 defaults->maxAttrs = 4;
1248 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1249 defaults, NULL) < 0) {
1253 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1254 xmlDefAttrsPtr temp;
1256 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1257 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1261 defaults->maxAttrs *= 2;
1262 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1263 defaults, NULL) < 0) {
1270 * Split the element name into prefix:localname , the string found
1271 * are within the DTD and hen not associated to namespace names.
1273 name = xmlSplitQName3(fullattr, &len);
1275 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1278 name = xmlDictLookup(ctxt->dict, name, -1);
1279 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1282 defaults->values[5 * defaults->nbAttrs] = name;
1283 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1284 /* intern the string and precompute the end */
1285 len = xmlStrlen(value);
1286 value = xmlDictLookup(ctxt->dict, value, len);
1287 defaults->values[5 * defaults->nbAttrs + 2] = value;
1288 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1290 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1292 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1293 defaults->nbAttrs++;
1298 xmlErrMemory(ctxt, NULL);
1303 * xmlAddSpecialAttr:
1304 * @ctxt: an XML parser context
1305 * @fullname: the element fullname
1306 * @fullattr: the attribute fullname
1307 * @type: the attribute type
1309 * Register this attribute type
1312 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1313 const xmlChar *fullname,
1314 const xmlChar *fullattr,
1317 if (ctxt->attsSpecial == NULL) {
1318 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1319 if (ctxt->attsSpecial == NULL)
1323 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1326 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1327 (void *) (long) type);
1331 xmlErrMemory(ctxt, NULL);
1336 * xmlCleanSpecialAttrCallback:
1338 * Removes CDATA attributes from the special attribute table
1341 xmlCleanSpecialAttrCallback(void *payload, void *data,
1342 const xmlChar *fullname, const xmlChar *fullattr,
1343 const xmlChar *unused ATTRIBUTE_UNUSED) {
1344 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1346 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1347 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1352 * xmlCleanSpecialAttr:
1353 * @ctxt: an XML parser context
1355 * Trim the list of attributes defined to remove all those of type
1356 * CDATA as they are not special. This call should be done when finishing
1357 * to parse the DTD and before starting to parse the document root.
1360 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1362 if (ctxt->attsSpecial == NULL)
1365 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1367 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1368 xmlHashFree(ctxt->attsSpecial, NULL);
1369 ctxt->attsSpecial = NULL;
1375 * xmlCheckLanguageID:
1376 * @lang: pointer to the string value
1378 * Checks that the value conforms to the LanguageID production:
1380 * NOTE: this is somewhat deprecated, those productions were removed from
1381 * the XML Second edition.
1383 * [33] LanguageID ::= Langcode ('-' Subcode)*
1384 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1385 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1386 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1387 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1388 * [38] Subcode ::= ([a-z] | [A-Z])+
1390 * The current REC reference the sucessors of RFC 1766, currently 5646
1392 * http://www.rfc-editor.org/rfc/rfc5646.txt
1393 * langtag = language
1399 * language = 2*3ALPHA ; shortest ISO 639 code
1400 * ["-" extlang] ; sometimes followed by
1401 * ; extended language subtags
1402 * / 4ALPHA ; or reserved for future use
1403 * / 5*8ALPHA ; or registered language subtag
1405 * extlang = 3ALPHA ; selected ISO 639 codes
1406 * *2("-" 3ALPHA) ; permanently reserved
1408 * script = 4ALPHA ; ISO 15924 code
1410 * region = 2ALPHA ; ISO 3166-1 code
1411 * / 3DIGIT ; UN M.49 code
1413 * variant = 5*8alphanum ; registered variants
1414 * / (DIGIT 3alphanum)
1416 * extension = singleton 1*("-" (2*8alphanum))
1418 * ; Single alphanumerics
1419 * ; "x" reserved for private use
1420 * singleton = DIGIT ; 0 - 9
1426 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1427 * The parser below doesn't try to cope with extension or privateuse
1428 * that could be added but that's not interoperable anyway
1430 * Returns 1 if correct 0 otherwise
1433 xmlCheckLanguageID(const xmlChar * lang)
1435 const xmlChar *cur = lang, *nxt;
1439 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1440 ((cur[0] == 'I') && (cur[1] == '-')) ||
1441 ((cur[0] == 'x') && (cur[1] == '-')) ||
1442 ((cur[0] == 'X') && (cur[1] == '-'))) {
1444 * Still allow IANA code and user code which were coming
1445 * from the previous version of the XML-1.0 specification
1446 * it's deprecated but we should not fail
1449 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1450 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1452 return(cur[0] == 0);
1455 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1456 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1458 if (nxt - cur >= 4) {
1462 if ((nxt - cur > 8) || (nxt[0] != 0))
1468 /* we got an ISO 639 code */
1476 /* now we can have extlang or script or region or variant */
1477 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1480 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1481 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1487 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1491 /* we parsed an extlang */
1499 /* now we can have script or region or variant */
1500 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1503 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1504 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1508 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1512 /* we parsed a script */
1521 /* now we can have region or variant */
1522 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1525 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1526 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1529 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1533 /* we parsed a region */
1542 /* now we can just have a variant */
1543 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1544 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1547 if ((nxt - cur < 5) || (nxt - cur > 8))
1550 /* we parsed a variant */
1556 /* extensions and private use subtags not checked */
1560 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1561 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1568 /************************************************************************
1570 * Parser stacks related functions and macros *
1572 ************************************************************************/
1574 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1575 const xmlChar ** str);
1580 * @ctxt: an XML parser context
1581 * @prefix: the namespace prefix or NULL
1582 * @URL: the namespace name
1584 * Pushes a new parser namespace on top of the ns stack
1586 * Returns -1 in case of error, -2 if the namespace should be discarded
1587 * and the index in the stack otherwise.
1590 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1592 if (ctxt->options & XML_PARSE_NSCLEAN) {
1594 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1595 if (ctxt->nsTab[i] == prefix) {
1597 if (ctxt->nsTab[i + 1] == URL)
1599 /* out of scope keep it */
1604 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1607 ctxt->nsTab = (const xmlChar **)
1608 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1609 if (ctxt->nsTab == NULL) {
1610 xmlErrMemory(ctxt, NULL);
1614 } else if (ctxt->nsNr >= ctxt->nsMax) {
1615 const xmlChar ** tmp;
1617 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1618 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1620 xmlErrMemory(ctxt, NULL);
1626 ctxt->nsTab[ctxt->nsNr++] = prefix;
1627 ctxt->nsTab[ctxt->nsNr++] = URL;
1628 return (ctxt->nsNr);
1632 * @ctxt: an XML parser context
1633 * @nr: the number to pop
1635 * Pops the top @nr parser prefix/namespace from the ns stack
1637 * Returns the number of namespaces removed
1640 nsPop(xmlParserCtxtPtr ctxt, int nr)
1644 if (ctxt->nsTab == NULL) return(0);
1645 if (ctxt->nsNr < nr) {
1646 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1649 if (ctxt->nsNr <= 0)
1652 for (i = 0;i < nr;i++) {
1654 ctxt->nsTab[ctxt->nsNr] = NULL;
1661 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1662 const xmlChar **atts;
1666 if (ctxt->atts == NULL) {
1667 maxatts = 55; /* allow for 10 attrs by default */
1668 atts = (const xmlChar **)
1669 xmlMalloc(maxatts * sizeof(xmlChar *));
1670 if (atts == NULL) goto mem_error;
1672 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1673 if (attallocs == NULL) goto mem_error;
1674 ctxt->attallocs = attallocs;
1675 ctxt->maxatts = maxatts;
1676 } else if (nr + 5 > ctxt->maxatts) {
1677 maxatts = (nr + 5) * 2;
1678 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1679 maxatts * sizeof(const xmlChar *));
1680 if (atts == NULL) goto mem_error;
1682 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1683 (maxatts / 5) * sizeof(int));
1684 if (attallocs == NULL) goto mem_error;
1685 ctxt->attallocs = attallocs;
1686 ctxt->maxatts = maxatts;
1688 return(ctxt->maxatts);
1690 xmlErrMemory(ctxt, NULL);
1696 * @ctxt: an XML parser context
1697 * @value: the parser input
1699 * Pushes a new parser input on top of the input stack
1701 * Returns -1 in case of error, the index in the stack otherwise
1704 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1706 if ((ctxt == NULL) || (value == NULL))
1708 if (ctxt->inputNr >= ctxt->inputMax) {
1709 ctxt->inputMax *= 2;
1711 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1713 sizeof(ctxt->inputTab[0]));
1714 if (ctxt->inputTab == NULL) {
1715 xmlErrMemory(ctxt, NULL);
1716 xmlFreeInputStream(value);
1717 ctxt->inputMax /= 2;
1722 ctxt->inputTab[ctxt->inputNr] = value;
1723 ctxt->input = value;
1724 return (ctxt->inputNr++);
1728 * @ctxt: an XML parser context
1730 * Pops the top parser input from the input stack
1732 * Returns the input just removed
1735 inputPop(xmlParserCtxtPtr ctxt)
1737 xmlParserInputPtr ret;
1741 if (ctxt->inputNr <= 0)
1744 if (ctxt->inputNr > 0)
1745 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1748 ret = ctxt->inputTab[ctxt->inputNr];
1749 ctxt->inputTab[ctxt->inputNr] = NULL;
1754 * @ctxt: an XML parser context
1755 * @value: the element node
1757 * Pushes a new element node on top of the node stack
1759 * Returns -1 in case of error, the index in the stack otherwise
1762 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1764 if (ctxt == NULL) return(0);
1765 if (ctxt->nodeNr >= ctxt->nodeMax) {
1768 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1770 sizeof(ctxt->nodeTab[0]));
1772 xmlErrMemory(ctxt, NULL);
1775 ctxt->nodeTab = tmp;
1778 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1779 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1780 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1781 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1783 xmlHaltParser(ctxt);
1786 ctxt->nodeTab[ctxt->nodeNr] = value;
1788 return (ctxt->nodeNr++);
1793 * @ctxt: an XML parser context
1795 * Pops the top element node from the node stack
1797 * Returns the node just removed
1800 nodePop(xmlParserCtxtPtr ctxt)
1804 if (ctxt == NULL) return(NULL);
1805 if (ctxt->nodeNr <= 0)
1808 if (ctxt->nodeNr > 0)
1809 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1812 ret = ctxt->nodeTab[ctxt->nodeNr];
1813 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1817 #ifdef LIBXML_PUSH_ENABLED
1820 * @ctxt: an XML parser context
1821 * @value: the element name
1822 * @prefix: the element prefix
1823 * @URI: the element namespace name
1825 * Pushes a new element name/prefix/URL on top of the name stack
1827 * Returns -1 in case of error, the index in the stack otherwise
1830 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1831 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1833 if (ctxt->nameNr >= ctxt->nameMax) {
1834 const xmlChar * *tmp;
1837 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1839 sizeof(ctxt->nameTab[0]));
1844 ctxt->nameTab = tmp;
1845 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1847 sizeof(ctxt->pushTab[0]));
1852 ctxt->pushTab = tmp2;
1854 ctxt->nameTab[ctxt->nameNr] = value;
1856 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1857 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1858 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1859 return (ctxt->nameNr++);
1861 xmlErrMemory(ctxt, NULL);
1866 * @ctxt: an XML parser context
1868 * Pops the top element/prefix/URI name from the name stack
1870 * Returns the name just removed
1872 static const xmlChar *
1873 nameNsPop(xmlParserCtxtPtr ctxt)
1877 if (ctxt->nameNr <= 0)
1880 if (ctxt->nameNr > 0)
1881 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1884 ret = ctxt->nameTab[ctxt->nameNr];
1885 ctxt->nameTab[ctxt->nameNr] = NULL;
1888 #endif /* LIBXML_PUSH_ENABLED */
1892 * @ctxt: an XML parser context
1893 * @value: the element name
1895 * Pushes a new element name on top of the name stack
1897 * Returns -1 in case of error, the index in the stack otherwise
1900 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1902 if (ctxt == NULL) return (-1);
1904 if (ctxt->nameNr >= ctxt->nameMax) {
1905 const xmlChar * *tmp;
1906 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1908 sizeof(ctxt->nameTab[0]));
1912 ctxt->nameTab = tmp;
1915 ctxt->nameTab[ctxt->nameNr] = value;
1917 return (ctxt->nameNr++);
1919 xmlErrMemory(ctxt, NULL);
1924 * @ctxt: an XML parser context
1926 * Pops the top element name from the name stack
1928 * Returns the name just removed
1931 namePop(xmlParserCtxtPtr ctxt)
1935 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1938 if (ctxt->nameNr > 0)
1939 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1942 ret = ctxt->nameTab[ctxt->nameNr];
1943 ctxt->nameTab[ctxt->nameNr] = NULL;
1947 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1948 if (ctxt->spaceNr >= ctxt->spaceMax) {
1951 ctxt->spaceMax *= 2;
1952 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1953 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1955 xmlErrMemory(ctxt, NULL);
1959 ctxt->spaceTab = tmp;
1961 ctxt->spaceTab[ctxt->spaceNr] = val;
1962 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1963 return(ctxt->spaceNr++);
1966 static int spacePop(xmlParserCtxtPtr ctxt) {
1968 if (ctxt->spaceNr <= 0) return(0);
1970 if (ctxt->spaceNr > 0)
1971 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1973 ctxt->space = &ctxt->spaceTab[0];
1974 ret = ctxt->spaceTab[ctxt->spaceNr];
1975 ctxt->spaceTab[ctxt->spaceNr] = -1;
1980 * Macros for accessing the content. Those should be used only by the parser,
1983 * Dirty macros, i.e. one often need to make assumption on the context to
1986 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1987 * To be used with extreme caution since operations consuming
1988 * characters may move the input buffer to a different location !
1989 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1990 * This should be used internally by the parser
1991 * only to compare to ASCII values otherwise it would break when
1992 * running with UTF-8 encoding.
1993 * RAW same as CUR but in the input buffer, bypass any token
1994 * extraction that may have been done
1995 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1996 * to compare on ASCII based substring.
1997 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1998 * strings without newlines within the parser.
1999 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2000 * defined char within the parser.
2001 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2003 * NEXT Skip to the next character, this does the proper decoding
2004 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2005 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2006 * CUR_CHAR(l) returns the current unicode character (int), set l
2007 * to the number of xmlChars used for the encoding [0-5].
2008 * CUR_SCHAR same but operate on a string instead of the context
2009 * COPY_BUF copy the current unicode char to the target buffer, increment
2011 * GROW, SHRINK handling of input buffers
2014 #define RAW (*ctxt->input->cur)
2015 #define CUR (*ctxt->input->cur)
2016 #define NXT(val) ctxt->input->cur[(val)]
2017 #define CUR_PTR ctxt->input->cur
2018 #define BASE_PTR ctxt->input->base
2020 #define CMP4( s, c1, c2, c3, c4 ) \
2021 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2022 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2023 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2024 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2025 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2026 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2027 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2028 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2029 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2030 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2031 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2032 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2033 ((unsigned char *) s)[ 8 ] == c9 )
2034 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2035 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2036 ((unsigned char *) s)[ 9 ] == c10 )
2038 #define SKIP(val) do { \
2039 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2040 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2041 if ((*ctxt->input->cur == 0) && \
2042 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2043 xmlPopInput(ctxt); \
2046 #define SKIPL(val) do { \
2048 for(skipl=0; skipl<val; skipl++) { \
2049 if (*(ctxt->input->cur) == '\n') { \
2050 ctxt->input->line++; ctxt->input->col = 1; \
2051 } else ctxt->input->col++; \
2053 ctxt->input->cur++; \
2055 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2056 if ((*ctxt->input->cur == 0) && \
2057 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2058 xmlPopInput(ctxt); \
2061 #define SHRINK if ((ctxt->progressive == 0) && \
2062 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2063 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2066 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2067 xmlParserInputShrink(ctxt->input);
2068 if ((*ctxt->input->cur == 0) &&
2069 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2073 #define GROW if ((ctxt->progressive == 0) && \
2074 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2077 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2078 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2079 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2081 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2082 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2083 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2084 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2085 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2086 xmlHaltParser(ctxt);
2089 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2090 if ((ctxt->input->cur > ctxt->input->end) ||
2091 (ctxt->input->cur < ctxt->input->base)) {
2092 xmlHaltParser(ctxt);
2093 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2096 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2097 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2101 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2103 #define NEXT xmlNextChar(ctxt)
2106 ctxt->input->col++; \
2107 ctxt->input->cur++; \
2109 if (*ctxt->input->cur == 0) \
2110 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2113 #define NEXTL(l) do { \
2114 if (*(ctxt->input->cur) == '\n') { \
2115 ctxt->input->line++; ctxt->input->col = 1; \
2116 } else ctxt->input->col++; \
2117 ctxt->input->cur += l; \
2118 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2121 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2122 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2124 #define COPY_BUF(l,b,i,v) \
2125 if (l == 1) b[i++] = (xmlChar) v; \
2126 else i += xmlCopyCharMultiByte(&b[i],v)
2129 * xmlSkipBlankChars:
2130 * @ctxt: the XML parser context
2132 * skip all blanks character found at that point in the input streams.
2133 * It pops up finished entities in the process if allowable at that point.
2135 * Returns the number of space chars skipped
2139 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2143 * It's Okay to use CUR/NEXT here since all the blanks are on
2146 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2149 * if we are in the document content, go really fast
2151 cur = ctxt->input->cur;
2152 while (IS_BLANK_CH(*cur)) {
2154 ctxt->input->line++; ctxt->input->col = 1;
2161 ctxt->input->cur = cur;
2162 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2163 cur = ctxt->input->cur;
2166 ctxt->input->cur = cur;
2171 while ((IS_BLANK_CH(cur) && /* CHECKED tstblanks.xml */
2172 (ctxt->instate != XML_PARSER_EOF))) {
2177 while ((cur == 0) && (ctxt->inputNr > 1) &&
2178 (ctxt->instate != XML_PARSER_COMMENT)) {
2183 * Need to handle support of entities branching here
2185 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2186 } while ((IS_BLANK(cur)) && /* CHECKED tstblanks.xml */
2187 (ctxt->instate != XML_PARSER_EOF));
2192 /************************************************************************
2194 * Commodity functions to handle entities *
2196 ************************************************************************/
2200 * @ctxt: an XML parser context
2202 * xmlPopInput: the current input pointed by ctxt->input came to an end
2203 * pop it and return the next char.
2205 * Returns the current xmlChar in the parser context
2208 xmlPopInput(xmlParserCtxtPtr ctxt) {
2209 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2210 if (xmlParserDebugEntities)
2211 xmlGenericError(xmlGenericErrorContext,
2212 "Popping input %d\n", ctxt->inputNr);
2213 xmlFreeInputStream(inputPop(ctxt));
2214 if ((*ctxt->input->cur == 0) &&
2215 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2216 return(xmlPopInput(ctxt));
2222 * @ctxt: an XML parser context
2223 * @input: an XML parser input fragment (entity, XML fragment ...).
2225 * xmlPushInput: switch to a new input stream which is stacked on top
2226 * of the previous one(s).
2227 * Returns -1 in case of error or the index in the input stack
2230 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2232 if (input == NULL) return(-1);
2234 if (xmlParserDebugEntities) {
2235 if ((ctxt->input != NULL) && (ctxt->input->filename))
2236 xmlGenericError(xmlGenericErrorContext,
2237 "%s(%d): ", ctxt->input->filename,
2239 xmlGenericError(xmlGenericErrorContext,
2240 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2242 ret = inputPush(ctxt, input);
2243 if (ctxt->instate == XML_PARSER_EOF)
2251 * @ctxt: an XML parser context
2253 * parse Reference declarations
2255 * [66] CharRef ::= '&#' [0-9]+ ';' |
2256 * '&#x' [0-9a-fA-F]+ ';'
2258 * [ WFC: Legal Character ]
2259 * Characters referred to using character references must match the
2260 * production for Char.
2262 * Returns the value parsed (as an int), 0 in case of error
2265 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2266 unsigned int val = 0;
2268 unsigned int outofrange = 0;
2271 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2273 if ((RAW == '&') && (NXT(1) == '#') &&
2277 while (RAW != ';') { /* loop blocked by count */
2281 if (ctxt->instate == XML_PARSER_EOF)
2284 if ((RAW >= '0') && (RAW <= '9'))
2285 val = val * 16 + (CUR - '0');
2286 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2287 val = val * 16 + (CUR - 'a') + 10;
2288 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2289 val = val * 16 + (CUR - 'A') + 10;
2291 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2302 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2307 } else if ((RAW == '&') && (NXT(1) == '#')) {
2310 while (RAW != ';') { /* loop blocked by count */
2314 if (ctxt->instate == XML_PARSER_EOF)
2317 if ((RAW >= '0') && (RAW <= '9'))
2318 val = val * 10 + (CUR - '0');
2320 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2331 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2337 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2341 * [ WFC: Legal Character ]
2342 * Characters referred to using character references must match the
2343 * production for Char.
2345 if ((IS_CHAR(val) && (outofrange == 0))) {
2348 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2349 "xmlParseCharRef: invalid xmlChar value %d\n",
2356 * xmlParseStringCharRef:
2357 * @ctxt: an XML parser context
2358 * @str: a pointer to an index in the string
2360 * parse Reference declarations, variant parsing from a string rather
2361 * than an an input flow.
2363 * [66] CharRef ::= '&#' [0-9]+ ';' |
2364 * '&#x' [0-9a-fA-F]+ ';'
2366 * [ WFC: Legal Character ]
2367 * Characters referred to using character references must match the
2368 * production for Char.
2370 * Returns the value parsed (as an int), 0 in case of error, str will be
2371 * updated to the current value of the index
2374 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2377 unsigned int val = 0;
2378 unsigned int outofrange = 0;
2380 if ((str == NULL) || (*str == NULL)) return(0);
2383 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2386 while (cur != ';') { /* Non input consuming loop */
2387 if ((cur >= '0') && (cur <= '9'))
2388 val = val * 16 + (cur - '0');
2389 else if ((cur >= 'a') && (cur <= 'f'))
2390 val = val * 16 + (cur - 'a') + 10;
2391 else if ((cur >= 'A') && (cur <= 'F'))
2392 val = val * 16 + (cur - 'A') + 10;
2394 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2406 } else if ((cur == '&') && (ptr[1] == '#')){
2409 while (cur != ';') { /* Non input consuming loops */
2410 if ((cur >= '0') && (cur <= '9'))
2411 val = val * 10 + (cur - '0');
2413 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2426 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2432 * [ WFC: Legal Character ]
2433 * Characters referred to using character references must match the
2434 * production for Char.
2436 if ((IS_CHAR(val) && (outofrange == 0))) {
2439 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2440 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2447 * xmlNewBlanksWrapperInputStream:
2448 * @ctxt: an XML parser context
2449 * @entity: an Entity pointer
2451 * Create a new input stream for wrapping
2452 * blanks around a PEReference
2454 * Returns the new input stream or NULL
2457 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2459 static xmlParserInputPtr
2460 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2461 xmlParserInputPtr input;
2464 if (entity == NULL) {
2465 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2466 "xmlNewBlanksWrapperInputStream entity\n");
2469 if (xmlParserDebugEntities)
2470 xmlGenericError(xmlGenericErrorContext,
2471 "new blanks wrapper for entity: %s\n", entity->name);
2472 input = xmlNewInputStream(ctxt);
2473 if (input == NULL) {
2476 length = xmlStrlen(entity->name) + 5;
2477 buffer = xmlMallocAtomic(length);
2478 if (buffer == NULL) {
2479 xmlErrMemory(ctxt, NULL);
2485 buffer [length-3] = ';';
2486 buffer [length-2] = ' ';
2487 buffer [length-1] = 0;
2488 memcpy(buffer + 2, entity->name, length - 5);
2489 input->free = deallocblankswrapper;
2490 input->base = buffer;
2491 input->cur = buffer;
2492 input->length = length;
2493 input->end = &buffer[length];
2498 * xmlParserHandlePEReference:
2499 * @ctxt: the parser context
2501 * [69] PEReference ::= '%' Name ';'
2503 * [ WFC: No Recursion ]
2504 * A parsed entity must not contain a recursive
2505 * reference to itself, either directly or indirectly.
2507 * [ WFC: Entity Declared ]
2508 * In a document without any DTD, a document with only an internal DTD
2509 * subset which contains no parameter entity references, or a document
2510 * with "standalone='yes'", ... ... The declaration of a parameter
2511 * entity must precede any reference to it...
2513 * [ VC: Entity Declared ]
2514 * In a document with an external subset or external parameter entities
2515 * with "standalone='no'", ... ... The declaration of a parameter entity
2516 * must precede any reference to it...
2519 * Parameter-entity references may only appear in the DTD.
2520 * NOTE: misleading but this is handled.
2522 * A PEReference may have been detected in the current input stream
2523 * the handling is done accordingly to
2524 * http://www.w3.org/TR/REC-xml#entproc
2526 * - Included in literal in entity values
2527 * - Included as Parameter Entity reference within DTDs
2530 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2531 const xmlChar *name;
2532 xmlEntityPtr entity = NULL;
2533 xmlParserInputPtr input;
2535 if (RAW != '%') return;
2536 switch(ctxt->instate) {
2537 case XML_PARSER_CDATA_SECTION:
2539 case XML_PARSER_COMMENT:
2541 case XML_PARSER_START_TAG:
2543 case XML_PARSER_END_TAG:
2545 case XML_PARSER_EOF:
2546 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2548 case XML_PARSER_PROLOG:
2549 case XML_PARSER_START:
2550 case XML_PARSER_MISC:
2551 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2553 case XML_PARSER_ENTITY_DECL:
2554 case XML_PARSER_CONTENT:
2555 case XML_PARSER_ATTRIBUTE_VALUE:
2557 case XML_PARSER_SYSTEM_LITERAL:
2558 case XML_PARSER_PUBLIC_LITERAL:
2559 /* we just ignore it there */
2561 case XML_PARSER_EPILOG:
2562 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2564 case XML_PARSER_ENTITY_VALUE:
2566 * NOTE: in the case of entity values, we don't do the
2567 * substitution here since we need the literal
2568 * entity value to be able to save the internal
2569 * subset of the document.
2570 * This will be handled by xmlStringDecodeEntities
2573 case XML_PARSER_DTD:
2575 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2576 * In the internal DTD subset, parameter-entity references
2577 * can occur only where markup declarations can occur, not
2578 * within markup declarations.
2579 * In that case this is handled in xmlParseMarkupDecl
2581 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2583 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2586 case XML_PARSER_IGNORE:
2591 name = xmlParseName(ctxt);
2592 if (xmlParserDebugEntities)
2593 xmlGenericError(xmlGenericErrorContext,
2594 "PEReference: %s\n", name);
2596 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2600 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2601 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2602 if (ctxt->instate == XML_PARSER_EOF)
2604 if (entity == NULL) {
2607 * [ WFC: Entity Declared ]
2608 * In a document without any DTD, a document with only an
2609 * internal DTD subset which contains no parameter entity
2610 * references, or a document with "standalone='yes'", ...
2611 * ... The declaration of a parameter entity must precede
2612 * any reference to it...
2614 if ((ctxt->standalone == 1) ||
2615 ((ctxt->hasExternalSubset == 0) &&
2616 (ctxt->hasPErefs == 0))) {
2617 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2618 "PEReference: %%%s; not found\n", name);
2621 * [ VC: Entity Declared ]
2622 * In a document with an external subset or external
2623 * parameter entities with "standalone='no'", ...
2624 * ... The declaration of a parameter entity must precede
2625 * any reference to it...
2627 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2628 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2629 "PEReference: %%%s; not found\n",
2632 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2633 "PEReference: %%%s; not found\n",
2637 xmlParserEntityCheck(ctxt, 0, NULL, 0);
2638 } else if (ctxt->input->free != deallocblankswrapper) {
2639 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2640 if (xmlPushInput(ctxt, input) < 0)
2643 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2644 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2646 xmlCharEncoding enc;
2649 * Note: external parameter entities will not be loaded, it
2650 * is not required for a non-validating parser, unless the
2651 * option of validating, or substituting entities were
2652 * given. Doing so is far more secure as the parser will
2653 * only process data coming from the document entity by
2656 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2657 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2658 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
2659 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2660 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2661 (ctxt->replaceEntities == 0) &&
2662 (ctxt->validate == 0))
2666 * handle the extra spaces added before and after
2667 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2668 * this is done independently.
2670 input = xmlNewEntityInputStream(ctxt, entity);
2671 if (xmlPushInput(ctxt, input) < 0)
2675 * Get the 4 first bytes and decode the charset
2676 * if enc != XML_CHAR_ENCODING_NONE
2677 * plug some encoding conversion routines.
2678 * Note that, since we may have some non-UTF8
2679 * encoding (like UTF16, bug 135229), the 'length'
2680 * is not known, but we can calculate based upon
2681 * the amount of data in the buffer.
2684 if (ctxt->instate == XML_PARSER_EOF)
2686 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2691 enc = xmlDetectCharEncoding(start, 4);
2692 if (enc != XML_CHAR_ENCODING_NONE) {
2693 xmlSwitchEncoding(ctxt, enc);
2697 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2698 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2699 (IS_BLANK_CH(NXT(5)))) {
2700 xmlParseTextDecl(ctxt);
2703 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2704 "PEReference: %s is not a parameter entity\n",
2709 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2715 * Macro used to grow the current buffer.
2716 * buffer##_size is expected to be a size_t
2717 * mem_error: is expected to handle memory allocation failures
2719 #define growBuffer(buffer, n) { \
2721 size_t new_size = buffer##_size * 2 + n; \
2722 if (new_size < buffer##_size) goto mem_error; \
2723 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2724 if (tmp == NULL) goto mem_error; \
2726 buffer##_size = new_size; \
2730 * xmlStringLenDecodeEntities:
2731 * @ctxt: the parser context
2732 * @str: the input string
2733 * @len: the string length
2734 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2735 * @end: an end marker xmlChar, 0 if none
2736 * @end2: an end marker xmlChar, 0 if none
2737 * @end3: an end marker xmlChar, 0 if none
2739 * Takes a entity string content and process to do the adequate substitutions.
2741 * [67] Reference ::= EntityRef | CharRef
2743 * [69] PEReference ::= '%' Name ';'
2745 * Returns A newly allocated string with the substitution done. The caller
2746 * must deallocate it !
2749 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2750 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2751 xmlChar *buffer = NULL;
2752 size_t buffer_size = 0;
2755 xmlChar *current = NULL;
2756 xmlChar *rep = NULL;
2757 const xmlChar *last;
2761 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2765 if (((ctxt->depth > 40) &&
2766 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2767 (ctxt->depth > 1024)) {
2768 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2773 * allocate a translation buffer.
2775 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2776 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2777 if (buffer == NULL) goto mem_error;
2780 * OK loop until we reach one of the ending char or a size limit.
2781 * we are operating on already parsed values.
2784 c = CUR_SCHAR(str, l);
2787 while ((c != 0) && (c != end) && /* non input consuming loop */
2788 (c != end2) && (c != end3)) {
2791 if ((c == '&') && (str[1] == '#')) {
2792 int val = xmlParseStringCharRef(ctxt, &str);
2794 COPY_BUF(0,buffer,nbchars,val);
2796 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2797 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2799 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2800 if (xmlParserDebugEntities)
2801 xmlGenericError(xmlGenericErrorContext,
2802 "String decoding Entity Reference: %.30s\n",
2804 ent = xmlParseStringEntityRef(ctxt, &str);
2805 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2806 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2808 xmlParserEntityCheck(ctxt, 0, ent, 0);
2810 ctxt->nbentities += ent->checked / 2;
2811 if ((ent != NULL) &&
2812 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2813 if (ent->content != NULL) {
2814 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2815 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2816 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2819 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2820 "predefined entity has no content\n");
2822 } else if ((ent != NULL) && (ent->content != NULL)) {
2824 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2828 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2829 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2834 while (*current != 0) { /* non input consuming loop */
2835 buffer[nbchars++] = *current++;
2836 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2837 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2839 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2845 } else if (ent != NULL) {
2846 int i = xmlStrlen(ent->name);
2847 const xmlChar *cur = ent->name;
2849 buffer[nbchars++] = '&';
2850 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2851 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2854 buffer[nbchars++] = *cur++;
2855 buffer[nbchars++] = ';';
2857 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2858 if (xmlParserDebugEntities)
2859 xmlGenericError(xmlGenericErrorContext,
2860 "String decoding PE Reference: %.30s\n", str);
2861 ent = xmlParseStringPEReference(ctxt, &str);
2862 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2864 xmlParserEntityCheck(ctxt, 0, ent, 0);
2866 ctxt->nbentities += ent->checked / 2;
2868 if (ent->content == NULL) {
2870 * Note: external parsed entities will not be loaded,
2871 * it is not required for a non-validating parser to
2872 * complete external PEreferences coming from the
2875 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2876 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2877 (ctxt->validate != 0)) {
2878 xmlLoadEntityContent(ctxt, ent);
2880 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2881 "not validating will not read content for PE entity %s\n",
2886 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2891 while (*current != 0) { /* non input consuming loop */
2892 buffer[nbchars++] = *current++;
2893 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2894 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2896 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2904 COPY_BUF(l,buffer,nbchars,c);
2906 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2907 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2911 c = CUR_SCHAR(str, l);
2915 buffer[nbchars] = 0;
2919 xmlErrMemory(ctxt, NULL);
2929 * xmlStringDecodeEntities:
2930 * @ctxt: the parser context
2931 * @str: the input string
2932 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2933 * @end: an end marker xmlChar, 0 if none
2934 * @end2: an end marker xmlChar, 0 if none
2935 * @end3: an end marker xmlChar, 0 if none
2937 * Takes a entity string content and process to do the adequate substitutions.
2939 * [67] Reference ::= EntityRef | CharRef
2941 * [69] PEReference ::= '%' Name ';'
2943 * Returns A newly allocated string with the substitution done. The caller
2944 * must deallocate it !
2947 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2948 xmlChar end, xmlChar end2, xmlChar end3) {
2949 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2950 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2954 /************************************************************************
2956 * Commodity functions, cleanup needed ? *
2958 ************************************************************************/
2962 * @ctxt: an XML parser context
2964 * @len: the size of @str
2965 * @blank_chars: we know the chars are blanks
2967 * Is this a sequence of blank chars that one can ignore ?
2969 * Returns 1 if ignorable 0 otherwise.
2972 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2975 xmlNodePtr lastChild;
2978 * Don't spend time trying to differentiate them, the same callback is
2981 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2985 * Check for xml:space value.
2987 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2988 (*(ctxt->space) == -2))
2992 * Check that the string is made of blanks
2994 if (blank_chars == 0) {
2995 for (i = 0;i < len;i++)
2996 if (!(IS_BLANK_CH(str[i]))) return(0);
3000 * Look if the element is mixed content in the DTD if available
3002 if (ctxt->node == NULL) return(0);
3003 if (ctxt->myDoc != NULL) {
3004 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
3005 if (ret == 0) return(1);
3006 if (ret == 1) return(0);
3010 * Otherwise, heuristic :-\
3012 if ((RAW != '<') && (RAW != 0xD)) return(0);
3013 if ((ctxt->node->children == NULL) &&
3014 (RAW == '<') && (NXT(1) == '/')) return(0);
3016 lastChild = xmlGetLastChild(ctxt->node);
3017 if (lastChild == NULL) {
3018 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
3019 (ctxt->node->content != NULL)) return(0);
3020 } else if (xmlNodeIsText(lastChild))
3022 else if ((ctxt->node->children != NULL) &&
3023 (xmlNodeIsText(ctxt->node->children)))
3028 /************************************************************************
3030 * Extra stuff for namespace support *
3031 * Relates to http://www.w3.org/TR/WD-xml-names *
3033 ************************************************************************/
3037 * @ctxt: an XML parser context
3038 * @name: an XML parser context
3039 * @prefix: a xmlChar **
3041 * parse an UTF8 encoded XML qualified name string
3043 * [NS 5] QName ::= (Prefix ':')? LocalPart
3045 * [NS 6] Prefix ::= NCName
3047 * [NS 7] LocalPart ::= NCName
3049 * Returns the local part, and prefix is updated
3050 * to get the Prefix if any.
3054 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3055 xmlChar buf[XML_MAX_NAMELEN + 5];
3056 xmlChar *buffer = NULL;
3058 int max = XML_MAX_NAMELEN;
3059 xmlChar *ret = NULL;
3060 const xmlChar *cur = name;
3063 if (prefix == NULL) return(NULL);
3066 if (cur == NULL) return(NULL);
3068 #ifndef XML_XML_NAMESPACE
3069 /* xml: prefix is not really a namespace */
3070 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3071 (cur[2] == 'l') && (cur[3] == ':'))
3072 return(xmlStrdup(name));
3075 /* nasty but well=formed */
3077 return(xmlStrdup(name));
3080 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3086 * Okay someone managed to make a huge name, so he's ready to pay
3087 * for the processing speed.
3091 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3092 if (buffer == NULL) {
3093 xmlErrMemory(ctxt, NULL);
3096 memcpy(buffer, buf, len);
3097 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3098 if (len + 10 > max) {
3102 tmp = (xmlChar *) xmlRealloc(buffer,
3103 max * sizeof(xmlChar));
3106 xmlErrMemory(ctxt, NULL);
3117 if ((c == ':') && (*cur == 0)) {
3121 return(xmlStrdup(name));
3125 ret = xmlStrndup(buf, len);
3129 max = XML_MAX_NAMELEN;
3137 return(xmlStrndup(BAD_CAST "", 0));
3142 * Check that the first character is proper to start
3145 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3146 ((c >= 0x41) && (c <= 0x5A)) ||
3147 (c == '_') || (c == ':'))) {
3149 int first = CUR_SCHAR(cur, l);
3151 if (!IS_LETTER(first) && (first != '_')) {
3152 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3153 "Name %s is not XML Namespace compliant\n",
3159 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3165 * Okay someone managed to make a huge name, so he's ready to pay
3166 * for the processing speed.
3170 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3171 if (buffer == NULL) {
3172 xmlErrMemory(ctxt, NULL);
3175 memcpy(buffer, buf, len);
3176 while (c != 0) { /* tested bigname2.xml */
3177 if (len + 10 > max) {
3181 tmp = (xmlChar *) xmlRealloc(buffer,
3182 max * sizeof(xmlChar));
3184 xmlErrMemory(ctxt, NULL);
3197 ret = xmlStrndup(buf, len);
3206 /************************************************************************
3208 * The parser itself *
3209 * Relates to http://www.w3.org/TR/REC-xml *
3211 ************************************************************************/
3213 /************************************************************************
3215 * Routines to parse Name, NCName and NmToken *
3217 ************************************************************************/
3219 static unsigned long nbParseName = 0;
3220 static unsigned long nbParseNmToken = 0;
3221 static unsigned long nbParseNCName = 0;
3222 static unsigned long nbParseNCNameComplex = 0;
3223 static unsigned long nbParseNameComplex = 0;
3224 static unsigned long nbParseStringName = 0;
3228 * The two following functions are related to the change of accepted
3229 * characters for Name and NmToken in the Revision 5 of XML-1.0
3230 * They correspond to the modified production [4] and the new production [4a]
3231 * changes in that revision. Also note that the macros used for the
3232 * productions Letter, Digit, CombiningChar and Extender are not needed
3234 * We still keep compatibility to pre-revision5 parsing semantic if the
3235 * new XML_PARSE_OLD10 option is given to the parser.
3238 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3239 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3241 * Use the new checks of production [4] [4a] amd [5] of the
3242 * Update 5 of XML-1.0
3244 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3245 (((c >= 'a') && (c <= 'z')) ||
3246 ((c >= 'A') && (c <= 'Z')) ||
3247 (c == '_') || (c == ':') ||
3248 ((c >= 0xC0) && (c <= 0xD6)) ||
3249 ((c >= 0xD8) && (c <= 0xF6)) ||
3250 ((c >= 0xF8) && (c <= 0x2FF)) ||
3251 ((c >= 0x370) && (c <= 0x37D)) ||
3252 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3253 ((c >= 0x200C) && (c <= 0x200D)) ||
3254 ((c >= 0x2070) && (c <= 0x218F)) ||
3255 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3256 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3257 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3258 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3259 ((c >= 0x10000) && (c <= 0xEFFFF))))
3262 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3269 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3270 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3272 * Use the new checks of production [4] [4a] amd [5] of the
3273 * Update 5 of XML-1.0
3275 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3276 (((c >= 'a') && (c <= 'z')) ||
3277 ((c >= 'A') && (c <= 'Z')) ||
3278 ((c >= '0') && (c <= '9')) || /* !start */
3279 (c == '_') || (c == ':') ||
3280 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3281 ((c >= 0xC0) && (c <= 0xD6)) ||
3282 ((c >= 0xD8) && (c <= 0xF6)) ||
3283 ((c >= 0xF8) && (c <= 0x2FF)) ||
3284 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3285 ((c >= 0x370) && (c <= 0x37D)) ||
3286 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3287 ((c >= 0x200C) && (c <= 0x200D)) ||
3288 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3289 ((c >= 0x2070) && (c <= 0x218F)) ||
3290 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3291 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3292 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3293 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3294 ((c >= 0x10000) && (c <= 0xEFFFF))))
3297 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3298 (c == '.') || (c == '-') ||
3299 (c == '_') || (c == ':') ||
3300 (IS_COMBINING(c)) ||
3307 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3308 int *len, int *alloc, int normalize);
3310 static const xmlChar *
3311 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3317 nbParseNameComplex++;
3321 * Handler for more complex cases
3324 if (ctxt->instate == XML_PARSER_EOF)
3327 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3329 * Use the new checks of production [4] [4a] amd [5] of the
3330 * Update 5 of XML-1.0
3332 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3333 (!(((c >= 'a') && (c <= 'z')) ||
3334 ((c >= 'A') && (c <= 'Z')) ||
3335 (c == '_') || (c == ':') ||
3336 ((c >= 0xC0) && (c <= 0xD6)) ||
3337 ((c >= 0xD8) && (c <= 0xF6)) ||
3338 ((c >= 0xF8) && (c <= 0x2FF)) ||
3339 ((c >= 0x370) && (c <= 0x37D)) ||
3340 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3341 ((c >= 0x200C) && (c <= 0x200D)) ||
3342 ((c >= 0x2070) && (c <= 0x218F)) ||
3343 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3344 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3345 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3346 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3347 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3353 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3354 (((c >= 'a') && (c <= 'z')) ||
3355 ((c >= 'A') && (c <= 'Z')) ||
3356 ((c >= '0') && (c <= '9')) || /* !start */
3357 (c == '_') || (c == ':') ||
3358 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3359 ((c >= 0xC0) && (c <= 0xD6)) ||
3360 ((c >= 0xD8) && (c <= 0xF6)) ||
3361 ((c >= 0xF8) && (c <= 0x2FF)) ||
3362 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3363 ((c >= 0x370) && (c <= 0x37D)) ||
3364 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3365 ((c >= 0x200C) && (c <= 0x200D)) ||
3366 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3367 ((c >= 0x2070) && (c <= 0x218F)) ||
3368 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3369 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3370 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3371 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3372 ((c >= 0x10000) && (c <= 0xEFFFF))
3374 if (count++ > XML_PARSER_CHUNK_SIZE) {
3377 if (ctxt->instate == XML_PARSER_EOF)
3385 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3386 (!IS_LETTER(c) && (c != '_') &&
3394 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3395 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3396 (c == '.') || (c == '-') ||
3397 (c == '_') || (c == ':') ||
3398 (IS_COMBINING(c)) ||
3399 (IS_EXTENDER(c)))) {
3400 if (count++ > XML_PARSER_CHUNK_SIZE) {
3403 if (ctxt->instate == XML_PARSER_EOF)
3412 if (ctxt->instate == XML_PARSER_EOF)
3418 if ((len > XML_MAX_NAME_LENGTH) &&
3419 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3420 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3423 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3424 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3425 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3430 * @ctxt: an XML parser context
3432 * parse an XML name.
3434 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3435 * CombiningChar | Extender
3437 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3439 * [6] Names ::= Name (#x20 Name)*
3441 * Returns the Name parsed or NULL
3445 xmlParseName(xmlParserCtxtPtr ctxt) {
3457 * Accelerator for simple ASCII names
3459 in = ctxt->input->cur;
3460 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3461 ((*in >= 0x41) && (*in <= 0x5A)) ||
3462 (*in == '_') || (*in == ':')) {
3464 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3465 ((*in >= 0x41) && (*in <= 0x5A)) ||
3466 ((*in >= 0x30) && (*in <= 0x39)) ||
3467 (*in == '_') || (*in == '-') ||
3468 (*in == ':') || (*in == '.'))
3470 if ((*in > 0) && (*in < 0x80)) {
3471 count = in - ctxt->input->cur;
3472 if ((count > XML_MAX_NAME_LENGTH) &&
3473 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3474 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3477 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3478 ctxt->input->cur = in;
3479 ctxt->nbChars += count;
3480 ctxt->input->col += count;
3482 xmlErrMemory(ctxt, NULL);
3486 /* accelerator for special cases */
3487 return(xmlParseNameComplex(ctxt));
3490 static const xmlChar *
3491 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3495 size_t startPosition = 0;
3498 nbParseNCNameComplex++;
3502 * Handler for more complex cases
3505 startPosition = CUR_PTR - BASE_PTR;
3507 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3508 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3512 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3513 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3514 if (count++ > XML_PARSER_CHUNK_SIZE) {
3515 if ((len > XML_MAX_NAME_LENGTH) &&
3516 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3517 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522 if (ctxt->instate == XML_PARSER_EOF)
3531 * when shrinking to extend the buffer we really need to preserve
3532 * the part of the name we already parsed. Hence rolling back
3533 * by current lenght.
3535 ctxt->input->cur -= l;
3537 ctxt->input->cur += l;
3538 if (ctxt->instate == XML_PARSER_EOF)
3543 if ((len > XML_MAX_NAME_LENGTH) &&
3544 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3545 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3548 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3553 * @ctxt: an XML parser context
3554 * @len: length of the string parsed
3556 * parse an XML name.
3558 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3559 * CombiningChar | Extender
3561 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3563 * Returns the Name parsed or NULL
3566 static const xmlChar *
3567 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3568 const xmlChar *in, *e;
3577 * Accelerator for simple ASCII names
3579 in = ctxt->input->cur;
3580 e = ctxt->input->end;
3581 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3582 ((*in >= 0x41) && (*in <= 0x5A)) ||
3583 (*in == '_')) && (in < e)) {
3585 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3586 ((*in >= 0x41) && (*in <= 0x5A)) ||
3587 ((*in >= 0x30) && (*in <= 0x39)) ||
3588 (*in == '_') || (*in == '-') ||
3589 (*in == '.')) && (in < e))
3593 if ((*in > 0) && (*in < 0x80)) {
3594 count = in - ctxt->input->cur;
3595 if ((count > XML_MAX_NAME_LENGTH) &&
3596 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3597 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3600 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3601 ctxt->input->cur = in;
3602 ctxt->nbChars += count;
3603 ctxt->input->col += count;
3605 xmlErrMemory(ctxt, NULL);
3611 return(xmlParseNCNameComplex(ctxt));
3615 * xmlParseNameAndCompare:
3616 * @ctxt: an XML parser context
3618 * parse an XML name and compares for match
3619 * (specialized for endtag parsing)
3621 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3622 * and the name for mismatch
3625 static const xmlChar *
3626 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3627 register const xmlChar *cmp = other;
3628 register const xmlChar *in;
3632 if (ctxt->instate == XML_PARSER_EOF)
3635 in = ctxt->input->cur;
3636 while (*in != 0 && *in == *cmp) {
3641 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3643 ctxt->input->cur = in;
3644 return (const xmlChar*) 1;
3646 /* failure (or end of input buffer), check with full function */
3647 ret = xmlParseName (ctxt);
3648 /* strings coming from the dictionary direct compare possible */
3650 return (const xmlChar*) 1;
3656 * xmlParseStringName:
3657 * @ctxt: an XML parser context
3658 * @str: a pointer to the string pointer (IN/OUT)
3660 * parse an XML name.
3662 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3663 * CombiningChar | Extender
3665 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3667 * [6] Names ::= Name (#x20 Name)*
3669 * Returns the Name parsed or NULL. The @str pointer
3670 * is updated to the current location in the string.
3674 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3675 xmlChar buf[XML_MAX_NAMELEN + 5];
3676 const xmlChar *cur = *str;
3681 nbParseStringName++;
3684 c = CUR_SCHAR(cur, l);
3685 if (!xmlIsNameStartChar(ctxt, c)) {
3689 COPY_BUF(l,buf,len,c);
3691 c = CUR_SCHAR(cur, l);
3692 while (xmlIsNameChar(ctxt, c)) {
3693 COPY_BUF(l,buf,len,c);
3695 c = CUR_SCHAR(cur, l);
3696 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3698 * Okay someone managed to make a huge name, so he's ready to pay
3699 * for the processing speed.
3704 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3705 if (buffer == NULL) {
3706 xmlErrMemory(ctxt, NULL);
3709 memcpy(buffer, buf, len);
3710 while (xmlIsNameChar(ctxt, c)) {
3711 if (len + 10 > max) {
3714 if ((len > XML_MAX_NAME_LENGTH) &&
3715 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3716 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3721 tmp = (xmlChar *) xmlRealloc(buffer,
3722 max * sizeof(xmlChar));
3724 xmlErrMemory(ctxt, NULL);
3730 COPY_BUF(l,buffer,len,c);
3732 c = CUR_SCHAR(cur, l);
3739 if ((len > XML_MAX_NAME_LENGTH) &&
3740 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3741 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3745 return(xmlStrndup(buf, len));
3750 * @ctxt: an XML parser context
3752 * parse an XML Nmtoken.
3754 * [7] Nmtoken ::= (NameChar)+
3756 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3758 * Returns the Nmtoken parsed or NULL
3762 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3763 xmlChar buf[XML_MAX_NAMELEN + 5];
3773 if (ctxt->instate == XML_PARSER_EOF)
3777 while (xmlIsNameChar(ctxt, c)) {
3778 if (count++ > XML_PARSER_CHUNK_SIZE) {
3782 COPY_BUF(l,buf,len,c);
3788 if (ctxt->instate == XML_PARSER_EOF)
3792 if (len >= XML_MAX_NAMELEN) {
3794 * Okay someone managed to make a huge token, so he's ready to pay
3795 * for the processing speed.
3800 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3801 if (buffer == NULL) {
3802 xmlErrMemory(ctxt, NULL);
3805 memcpy(buffer, buf, len);
3806 while (xmlIsNameChar(ctxt, c)) {
3807 if (count++ > XML_PARSER_CHUNK_SIZE) {
3810 if (ctxt->instate == XML_PARSER_EOF) {
3815 if (len + 10 > max) {
3818 if ((max > XML_MAX_NAME_LENGTH) &&
3819 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3820 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3825 tmp = (xmlChar *) xmlRealloc(buffer,
3826 max * sizeof(xmlChar));
3828 xmlErrMemory(ctxt, NULL);
3834 COPY_BUF(l,buffer,len,c);
3844 if ((len > XML_MAX_NAME_LENGTH) &&
3845 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3846 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3849 return(xmlStrndup(buf, len));
3853 * xmlParseEntityValue:
3854 * @ctxt: an XML parser context
3855 * @orig: if non-NULL store a copy of the original entity value
3857 * parse a value for ENTITY declarations
3859 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3860 * "'" ([^%&'] | PEReference | Reference)* "'"
3862 * Returns the EntityValue parsed with reference substituted or NULL
3866 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3867 xmlChar *buf = NULL;
3869 int size = XML_PARSER_BUFFER_SIZE;
3872 xmlChar *ret = NULL;
3873 const xmlChar *cur = NULL;
3874 xmlParserInputPtr input;
3876 if (RAW == '"') stop = '"';
3877 else if (RAW == '\'') stop = '\'';
3879 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3882 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3884 xmlErrMemory(ctxt, NULL);
3889 * The content of the entity definition is copied in a buffer.
3892 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3893 input = ctxt->input;
3895 if (ctxt->instate == XML_PARSER_EOF) {
3902 * NOTE: 4.4.5 Included in Literal
3903 * When a parameter entity reference appears in a literal entity
3904 * value, ... a single or double quote character in the replacement
3905 * text is always treated as a normal data character and will not
3906 * terminate the literal.
3907 * In practice it means we stop the loop only when back at parsing
3908 * the initial entity and the quote is found
3910 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3911 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3912 if (len + 5 >= size) {
3916 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3918 xmlErrMemory(ctxt, NULL);
3924 COPY_BUF(l,buf,len,c);
3927 * Pop-up of finished entities.
3929 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3940 if (ctxt->instate == XML_PARSER_EOF) {
3946 * Raise problem w.r.t. '&' and '%' being used in non-entities
3947 * reference constructs. Note Charref will be handled in
3948 * xmlStringDecodeEntities()
3951 while (*cur != 0) { /* non input consuming */
3952 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3957 name = xmlParseStringName(ctxt, &cur);
3958 if ((name == NULL) || (*cur != ';')) {
3959 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3960 "EntityValue: '%c' forbidden except for entities references\n",
3963 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3964 (ctxt->inputNr == 1)) {
3965 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3976 * Then PEReference entities are substituted.
3979 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3984 * NOTE: 4.4.7 Bypassed
3985 * When a general entity reference appears in the EntityValue in
3986 * an entity declaration, it is bypassed and left as is.
3987 * so XML_SUBSTITUTE_REF is not set here.
3990 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
4003 * xmlParseAttValueComplex:
4004 * @ctxt: an XML parser context
4005 * @len: the resulting attribute len
4006 * @normalize: wether to apply the inner normalization
4008 * parse a value for an attribute, this is the fallback function
4009 * of xmlParseAttValue() when the attribute parsing requires handling
4010 * of non-ASCII characters, or normalization compaction.
4012 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4015 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
4017 xmlChar *buf = NULL;
4018 xmlChar *rep = NULL;
4020 size_t buf_size = 0;
4021 int c, l, in_space = 0;
4022 xmlChar *current = NULL;
4025 if (NXT(0) == '"') {
4026 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4029 } else if (NXT(0) == '\'') {
4031 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4034 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4039 * allocate a translation buffer.
4041 buf_size = XML_PARSER_BUFFER_SIZE;
4042 buf = (xmlChar *) xmlMallocAtomic(buf_size);
4043 if (buf == NULL) goto mem_error;
4046 * OK loop until we reach one of the ending char or a size limit.
4049 while (((NXT(0) != limit) && /* checked */
4050 (IS_CHAR(c)) && (c != '<')) &&
4051 (ctxt->instate != XML_PARSER_EOF)) {
4053 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
4054 * special option is given
4056 if ((len > XML_MAX_TEXT_LENGTH) &&
4057 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4058 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4059 "AttValue length too long\n");
4065 if (NXT(1) == '#') {
4066 int val = xmlParseCharRef(ctxt);
4069 if (ctxt->replaceEntities) {
4070 if (len + 10 > buf_size) {
4071 growBuffer(buf, 10);
4076 * The reparsing will be done in xmlStringGetNodeList()
4077 * called by the attribute() function in SAX.c
4079 if (len + 10 > buf_size) {
4080 growBuffer(buf, 10);
4088 } else if (val != 0) {
4089 if (len + 10 > buf_size) {
4090 growBuffer(buf, 10);
4092 len += xmlCopyChar(0, &buf[len], val);
4095 ent = xmlParseEntityRef(ctxt);
4098 ctxt->nbentities += ent->owner;
4099 if ((ent != NULL) &&
4100 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4101 if (len + 10 > buf_size) {
4102 growBuffer(buf, 10);
4104 if ((ctxt->replaceEntities == 0) &&
4105 (ent->content[0] == '&')) {
4112 buf[len++] = ent->content[0];
4114 } else if ((ent != NULL) &&
4115 (ctxt->replaceEntities != 0)) {
4116 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4118 rep = xmlStringDecodeEntities(ctxt, ent->content,
4124 while (*current != 0) { /* non input consuming */
4125 if ((*current == 0xD) || (*current == 0xA) ||
4126 (*current == 0x9)) {
4130 buf[len++] = *current++;
4131 if (len + 10 > buf_size) {
4132 growBuffer(buf, 10);
4139 if (len + 10 > buf_size) {
4140 growBuffer(buf, 10);
4142 if (ent->content != NULL)
4143 buf[len++] = ent->content[0];
4145 } else if (ent != NULL) {
4146 int i = xmlStrlen(ent->name);
4147 const xmlChar *cur = ent->name;
4150 * This may look absurd but is needed to detect
4153 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4154 (ent->content != NULL) && (ent->checked == 0)) {
4155 unsigned long oldnbent = ctxt->nbentities;
4158 rep = xmlStringDecodeEntities(ctxt, ent->content,
4159 XML_SUBSTITUTE_REF, 0, 0, 0);
4162 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4164 if (xmlStrchr(rep, '<'))
4172 * Just output the reference
4175 while (len + i + 10 > buf_size) {
4176 growBuffer(buf, i + 10);
4179 buf[len++] = *cur++;
4184 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4185 if ((len != 0) || (!normalize)) {
4186 if ((!normalize) || (!in_space)) {
4187 COPY_BUF(l,buf,len,0x20);
4188 while (len + 10 > buf_size) {
4189 growBuffer(buf, 10);
4196 COPY_BUF(l,buf,len,c);
4197 if (len + 10 > buf_size) {
4198 growBuffer(buf, 10);
4206 if (ctxt->instate == XML_PARSER_EOF)
4209 if ((in_space) && (normalize)) {
4210 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4214 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4215 } else if (RAW != limit) {
4216 if ((c != 0) && (!IS_CHAR(c))) {
4217 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4218 "invalid character in attribute value\n");
4220 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4221 "AttValue: ' expected\n");
4227 * There we potentially risk an overflow, don't allow attribute value of
4228 * length more than INT_MAX it is a very reasonnable assumption !
4230 if (len >= INT_MAX) {
4231 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4232 "AttValue length too long\n");
4236 if (attlen != NULL) *attlen = (int) len;
4240 xmlErrMemory(ctxt, NULL);
4251 * @ctxt: an XML parser context
4253 * parse a value for an attribute
4254 * Note: the parser won't do substitution of entities here, this
4255 * will be handled later in xmlStringGetNodeList
4257 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4258 * "'" ([^<&'] | Reference)* "'"
4260 * 3.3.3 Attribute-Value Normalization:
4261 * Before the value of an attribute is passed to the application or
4262 * checked for validity, the XML processor must normalize it as follows:
4263 * - a character reference is processed by appending the referenced
4264 * character to the attribute value
4265 * - an entity reference is processed by recursively processing the
4266 * replacement text of the entity
4267 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4268 * appending #x20 to the normalized value, except that only a single
4269 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4270 * parsed entity or the literal entity value of an internal parsed entity
4271 * - other characters are processed by appending them to the normalized value
4272 * If the declared value is not CDATA, then the XML processor must further
4273 * process the normalized attribute value by discarding any leading and
4274 * trailing space (#x20) characters, and by replacing sequences of space
4275 * (#x20) characters by a single space (#x20) character.
4276 * All attributes for which no declaration has been read should be treated
4277 * by a non-validating parser as if declared CDATA.
4279 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4284 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4285 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4286 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4290 * xmlParseSystemLiteral:
4291 * @ctxt: an XML parser context
4293 * parse an XML Literal
4295 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4297 * Returns the SystemLiteral parsed or NULL
4301 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4302 xmlChar *buf = NULL;
4304 int size = XML_PARSER_BUFFER_SIZE;
4307 int state = ctxt->instate;
4314 } else if (RAW == '\'') {
4318 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4322 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4324 xmlErrMemory(ctxt, NULL);
4327 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4329 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4330 if (len + 5 >= size) {
4333 if ((size > XML_MAX_NAME_LENGTH) &&
4334 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4335 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4337 ctxt->instate = (xmlParserInputState) state;
4341 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4344 xmlErrMemory(ctxt, NULL);
4345 ctxt->instate = (xmlParserInputState) state;
4354 if (ctxt->instate == XML_PARSER_EOF) {
4359 COPY_BUF(l,buf,len,cur);
4369 ctxt->instate = (xmlParserInputState) state;
4370 if (!IS_CHAR(cur)) {
4371 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4379 * xmlParsePubidLiteral:
4380 * @ctxt: an XML parser context
4382 * parse an XML public literal
4384 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4386 * Returns the PubidLiteral parsed or NULL.
4390 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4391 xmlChar *buf = NULL;
4393 int size = XML_PARSER_BUFFER_SIZE;
4397 xmlParserInputState oldstate = ctxt->instate;
4403 } else if (RAW == '\'') {
4407 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4410 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4412 xmlErrMemory(ctxt, NULL);
4415 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4417 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4418 if (len + 1 >= size) {
4421 if ((size > XML_MAX_NAME_LENGTH) &&
4422 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4423 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4428 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4430 xmlErrMemory(ctxt, NULL);
4441 if (ctxt->instate == XML_PARSER_EOF) {
4456 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4460 ctxt->instate = oldstate;
4464 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4467 * used for the test in the inner loop of the char data testing
4469 static const unsigned char test_char_data[256] = {
4470 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4471 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4472 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4473 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4474 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4475 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4476 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4477 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4478 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4479 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4480 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4481 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4482 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4483 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4484 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4485 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4486 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4487 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4488 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4489 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4490 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4491 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4492 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4493 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4494 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4495 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4496 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4497 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4498 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4499 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4500 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4501 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4506 * @ctxt: an XML parser context
4507 * @cdata: int indicating whether we are within a CDATA section
4509 * parse a CharData section.
4510 * if we are within a CDATA section ']]>' marks an end of section.
4512 * The right angle bracket (>) may be represented using the string ">",
4513 * and must, for compatibility, be escaped using ">" or a character
4514 * reference when it appears in the string "]]>" in content, when that
4515 * string is not marking the end of a CDATA section.
4517 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4521 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4524 int line = ctxt->input->line;
4525 int col = ctxt->input->col;
4531 * Accelerated common case where input don't need to be
4532 * modified before passing it to the handler.
4535 in = ctxt->input->cur;
4538 while (*in == 0x20) { in++; ctxt->input->col++; }
4541 ctxt->input->line++; ctxt->input->col = 1;
4543 } while (*in == 0xA);
4544 goto get_more_space;
4547 nbchar = in - ctxt->input->cur;
4549 const xmlChar *tmp = ctxt->input->cur;
4550 ctxt->input->cur = in;
4552 if ((ctxt->sax != NULL) &&
4553 (ctxt->sax->ignorableWhitespace !=
4554 ctxt->sax->characters)) {
4555 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4556 if (ctxt->sax->ignorableWhitespace != NULL)
4557 ctxt->sax->ignorableWhitespace(ctxt->userData,
4560 if (ctxt->sax->characters != NULL)
4561 ctxt->sax->characters(ctxt->userData,
4563 if (*ctxt->space == -1)
4566 } else if ((ctxt->sax != NULL) &&
4567 (ctxt->sax->characters != NULL)) {
4568 ctxt->sax->characters(ctxt->userData,
4576 ccol = ctxt->input->col;
4577 while (test_char_data[*in]) {
4581 ctxt->input->col = ccol;
4584 ctxt->input->line++; ctxt->input->col = 1;
4586 } while (*in == 0xA);
4590 if ((in[1] == ']') && (in[2] == '>')) {
4591 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4592 ctxt->input->cur = in;
4599 nbchar = in - ctxt->input->cur;
4601 if ((ctxt->sax != NULL) &&
4602 (ctxt->sax->ignorableWhitespace !=
4603 ctxt->sax->characters) &&
4604 (IS_BLANK_CH(*ctxt->input->cur))) {
4605 const xmlChar *tmp = ctxt->input->cur;
4606 ctxt->input->cur = in;
4608 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4609 if (ctxt->sax->ignorableWhitespace != NULL)
4610 ctxt->sax->ignorableWhitespace(ctxt->userData,
4613 if (ctxt->sax->characters != NULL)
4614 ctxt->sax->characters(ctxt->userData,
4616 if (*ctxt->space == -1)
4619 line = ctxt->input->line;
4620 col = ctxt->input->col;
4621 } else if (ctxt->sax != NULL) {
4622 if (ctxt->sax->characters != NULL)
4623 ctxt->sax->characters(ctxt->userData,
4624 ctxt->input->cur, nbchar);
4625 line = ctxt->input->line;
4626 col = ctxt->input->col;
4628 /* something really bad happened in the SAX callback */
4629 if (ctxt->instate != XML_PARSER_CONTENT)
4632 ctxt->input->cur = in;
4636 ctxt->input->cur = in;
4638 ctxt->input->line++; ctxt->input->col = 1;
4639 continue; /* while */
4651 if (ctxt->instate == XML_PARSER_EOF)
4653 in = ctxt->input->cur;
4654 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4657 ctxt->input->line = line;
4658 ctxt->input->col = col;
4659 xmlParseCharDataComplex(ctxt, cdata);
4663 * xmlParseCharDataComplex:
4664 * @ctxt: an XML parser context
4665 * @cdata: int indicating whether we are within a CDATA section
4667 * parse a CharData section.this is the fallback function
4668 * of xmlParseCharData() when the parsing requires handling
4669 * of non-ASCII characters.
4672 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4673 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4681 while ((cur != '<') && /* checked */
4683 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4684 if ((cur == ']') && (NXT(1) == ']') &&
4688 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4691 COPY_BUF(l,buf,nbchar,cur);
4692 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4696 * OK the segment is to be consumed as chars.
4698 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4699 if (areBlanks(ctxt, buf, nbchar, 0)) {
4700 if (ctxt->sax->ignorableWhitespace != NULL)
4701 ctxt->sax->ignorableWhitespace(ctxt->userData,
4704 if (ctxt->sax->characters != NULL)
4705 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4706 if ((ctxt->sax->characters !=
4707 ctxt->sax->ignorableWhitespace) &&
4708 (*ctxt->space == -1))
4713 /* something really bad happened in the SAX callback */
4714 if (ctxt->instate != XML_PARSER_CONTENT)
4721 if (ctxt->instate == XML_PARSER_EOF)
4730 * OK the segment is to be consumed as chars.
4732 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4733 if (areBlanks(ctxt, buf, nbchar, 0)) {
4734 if (ctxt->sax->ignorableWhitespace != NULL)
4735 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4737 if (ctxt->sax->characters != NULL)
4738 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4739 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4740 (*ctxt->space == -1))
4745 if ((cur != 0) && (!IS_CHAR(cur))) {
4746 /* Generate the error and skip the offending character */
4747 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4748 "PCDATA invalid Char value %d\n",
4755 * xmlParseExternalID:
4756 * @ctxt: an XML parser context
4757 * @publicID: a xmlChar** receiving PubidLiteral
4758 * @strict: indicate whether we should restrict parsing to only
4759 * production [75], see NOTE below
4761 * Parse an External ID or a Public ID
4763 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4764 * 'PUBLIC' S PubidLiteral S SystemLiteral
4766 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4767 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4769 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4771 * Returns the function returns SystemLiteral and in the second
4772 * case publicID receives PubidLiteral, is strict is off
4773 * it is possible to return NULL and have publicID set.
4777 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4778 xmlChar *URI = NULL;
4783 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4785 if (!IS_BLANK_CH(CUR)) {
4786 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4787 "Space required after 'SYSTEM'\n");
4790 URI = xmlParseSystemLiteral(ctxt);
4792 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4794 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4796 if (!IS_BLANK_CH(CUR)) {
4797 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4798 "Space required after 'PUBLIC'\n");
4801 *publicID = xmlParsePubidLiteral(ctxt);
4802 if (*publicID == NULL) {
4803 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4807 * We don't handle [83] so "S SystemLiteral" is required.
4809 if (!IS_BLANK_CH(CUR)) {
4810 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4811 "Space required after the Public Identifier\n");
4815 * We handle [83] so we return immediately, if
4816 * "S SystemLiteral" is not detected. From a purely parsing
4817 * point of view that's a nice mess.
4823 if (!IS_BLANK_CH(*ptr)) return(NULL);
4825 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4826 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4829 URI = xmlParseSystemLiteral(ctxt);
4831 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4838 * xmlParseCommentComplex:
4839 * @ctxt: an XML parser context
4840 * @buf: the already parsed part of the buffer
4841 * @len: number of bytes filles in the buffer
4842 * @size: allocated size of the buffer
4844 * Skip an XML (SGML) comment <!-- .... -->
4845 * The spec says that "For compatibility, the string "--" (double-hyphen)
4846 * must not occur within comments. "
4847 * This is the slow routine in case the accelerator for ascii didn't work
4849 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4852 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4853 size_t len, size_t size) {
4860 inputid = ctxt->input->id;
4864 size = XML_PARSER_BUFFER_SIZE;
4865 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4867 xmlErrMemory(ctxt, NULL);
4871 GROW; /* Assure there's enough input data */
4874 goto not_terminated;
4876 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4877 "xmlParseComment: invalid xmlChar value %d\n",
4885 goto not_terminated;
4887 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4888 "xmlParseComment: invalid xmlChar value %d\n",
4896 goto not_terminated;
4897 while (IS_CHAR(cur) && /* checked */
4899 (r != '-') || (q != '-'))) {
4900 if ((r == '-') && (q == '-')) {
4901 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4903 if ((len > XML_MAX_TEXT_LENGTH) &&
4904 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4905 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4906 "Comment too big found", NULL);
4910 if (len + 5 >= size) {
4914 new_size = size * 2;
4915 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4916 if (new_buf == NULL) {
4918 xmlErrMemory(ctxt, NULL);
4924 COPY_BUF(ql,buf,len,q);
4934 if (ctxt->instate == XML_PARSER_EOF) {
4949 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4950 "Comment not terminated \n<!--%.50s\n", buf);
4951 } else if (!IS_CHAR(cur)) {
4952 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4953 "xmlParseComment: invalid xmlChar value %d\n",
4956 if (inputid != ctxt->input->id) {
4957 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4958 "Comment doesn't start and stop in the same entity\n");
4961 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4962 (!ctxt->disableSAX))
4963 ctxt->sax->comment(ctxt->userData, buf);
4968 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4969 "Comment not terminated\n", NULL);
4976 * @ctxt: an XML parser context
4978 * Skip an XML (SGML) comment <!-- .... -->
4979 * The spec says that "For compatibility, the string "--" (double-hyphen)
4980 * must not occur within comments. "
4982 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4985 xmlParseComment(xmlParserCtxtPtr ctxt) {
4986 xmlChar *buf = NULL;
4987 size_t size = XML_PARSER_BUFFER_SIZE;
4989 xmlParserInputState state;
4996 * Check that there is a comment right here.
4998 if ((RAW != '<') || (NXT(1) != '!') ||
4999 (NXT(2) != '-') || (NXT(3) != '-')) return;
5000 state = ctxt->instate;
5001 ctxt->instate = XML_PARSER_COMMENT;
5002 inputid = ctxt->input->id;
5008 * Accelerated common case where input don't need to be
5009 * modified before passing it to the handler.
5011 in = ctxt->input->cur;
5015 ctxt->input->line++; ctxt->input->col = 1;
5017 } while (*in == 0xA);
5020 ccol = ctxt->input->col;
5021 while (((*in > '-') && (*in <= 0x7F)) ||
5022 ((*in >= 0x20) && (*in < '-')) ||
5027 ctxt->input->col = ccol;
5030 ctxt->input->line++; ctxt->input->col = 1;
5032 } while (*in == 0xA);
5035 nbchar = in - ctxt->input->cur;
5037 * save current set of data
5040 if ((ctxt->sax != NULL) &&
5041 (ctxt->sax->comment != NULL)) {
5043 if ((*in == '-') && (in[1] == '-'))
5046 size = XML_PARSER_BUFFER_SIZE + nbchar;
5047 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5049 xmlErrMemory(ctxt, NULL);
5050 ctxt->instate = state;
5054 } else if (len + nbchar + 1 >= size) {
5056 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5057 new_buf = (xmlChar *) xmlRealloc(buf,
5058 size * sizeof(xmlChar));
5059 if (new_buf == NULL) {
5061 xmlErrMemory(ctxt, NULL);
5062 ctxt->instate = state;
5067 memcpy(&buf[len], ctxt->input->cur, nbchar);
5072 if ((len > XML_MAX_TEXT_LENGTH) &&
5073 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5074 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5075 "Comment too big found", NULL);
5079 ctxt->input->cur = in;
5082 ctxt->input->line++; ctxt->input->col = 1;
5087 ctxt->input->cur = in;
5089 ctxt->input->line++; ctxt->input->col = 1;
5090 continue; /* while */
5096 if (ctxt->instate == XML_PARSER_EOF) {
5100 in = ctxt->input->cur;
5104 if (ctxt->input->id != inputid) {
5105 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5106 "comment doesn't start and stop in the same entity\n");
5109 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5110 (!ctxt->disableSAX)) {
5112 ctxt->sax->comment(ctxt->userData, buf);
5114 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5118 if (ctxt->instate != XML_PARSER_EOF)
5119 ctxt->instate = state;
5123 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5124 "Double hyphen within comment: "
5128 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5129 "Double hyphen within comment\n", NULL);
5137 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5138 xmlParseCommentComplex(ctxt, buf, len, size);
5139 ctxt->instate = state;
5146 * @ctxt: an XML parser context
5148 * parse the name of a PI
5150 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5152 * Returns the PITarget name or NULL
5156 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5157 const xmlChar *name;
5159 name = xmlParseName(ctxt);
5160 if ((name != NULL) &&
5161 ((name[0] == 'x') || (name[0] == 'X')) &&
5162 ((name[1] == 'm') || (name[1] == 'M')) &&
5163 ((name[2] == 'l') || (name[2] == 'L'))) {
5165 if ((name[0] == 'x') && (name[1] == 'm') &&
5166 (name[2] == 'l') && (name[3] == 0)) {
5167 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5168 "XML declaration allowed only at the start of the document\n");
5170 } else if (name[3] == 0) {
5171 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5175 if (xmlW3CPIs[i] == NULL) break;
5176 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5179 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5180 "xmlParsePITarget: invalid name prefix 'xml'\n",
5183 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5184 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5185 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5190 #ifdef LIBXML_CATALOG_ENABLED
5192 * xmlParseCatalogPI:
5193 * @ctxt: an XML parser context
5194 * @catalog: the PI value string
5196 * parse an XML Catalog Processing Instruction.
5198 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5200 * Occurs only if allowed by the user and if happening in the Misc
5201 * part of the document before any doctype informations
5202 * This will add the given catalog to the parsing context in order
5203 * to be used if there is a resolution need further down in the document
5207 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5208 xmlChar *URL = NULL;
5209 const xmlChar *tmp, *base;
5213 while (IS_BLANK_CH(*tmp)) tmp++;
5214 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5217 while (IS_BLANK_CH(*tmp)) tmp++;
5222 while (IS_BLANK_CH(*tmp)) tmp++;
5224 if ((marker != '\'') && (marker != '"'))
5228 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5231 URL = xmlStrndup(base, tmp - base);
5233 while (IS_BLANK_CH(*tmp)) tmp++;
5238 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5244 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5245 "Catalog PI syntax error: %s\n",
5254 * @ctxt: an XML parser context
5256 * parse an XML Processing Instruction.
5258 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5260 * The processing is transfered to SAX once parsed.
5264 xmlParsePI(xmlParserCtxtPtr ctxt) {
5265 xmlChar *buf = NULL;
5267 size_t size = XML_PARSER_BUFFER_SIZE;
5269 const xmlChar *target;
5270 xmlParserInputState state;
5273 if ((RAW == '<') && (NXT(1) == '?')) {
5274 xmlParserInputPtr input = ctxt->input;
5275 state = ctxt->instate;
5276 ctxt->instate = XML_PARSER_PI;
5278 * this is a Processing Instruction.
5284 * Parse the target name and check for special support like
5287 target = xmlParsePITarget(ctxt);
5288 if (target != NULL) {
5289 if ((RAW == '?') && (NXT(1) == '>')) {
5290 if (input != ctxt->input) {
5291 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5292 "PI declaration doesn't start and stop in the same entity\n");
5299 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5300 (ctxt->sax->processingInstruction != NULL))
5301 ctxt->sax->processingInstruction(ctxt->userData,
5303 if (ctxt->instate != XML_PARSER_EOF)
5304 ctxt->instate = state;
5307 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5309 xmlErrMemory(ctxt, NULL);
5310 ctxt->instate = state;
5314 if (!IS_BLANK(cur)) {
5315 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5316 "ParsePI: PI %s space expected\n", target);
5320 while (IS_CHAR(cur) && /* checked */
5321 ((cur != '?') || (NXT(1) != '>'))) {
5322 if (len + 5 >= size) {
5324 size_t new_size = size * 2;
5325 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5327 xmlErrMemory(ctxt, NULL);
5329 ctxt->instate = state;
5338 if (ctxt->instate == XML_PARSER_EOF) {
5343 if ((len > XML_MAX_TEXT_LENGTH) &&
5344 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5345 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5346 "PI %s too big found", target);
5348 ctxt->instate = state;
5352 COPY_BUF(l,buf,len,cur);
5361 if ((len > XML_MAX_TEXT_LENGTH) &&
5362 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5363 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5364 "PI %s too big found", target);
5366 ctxt->instate = state;
5371 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5372 "ParsePI: PI %s never end ...\n", target);
5374 if (input != ctxt->input) {
5375 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5376 "PI declaration doesn't start and stop in the same entity\n");
5380 #ifdef LIBXML_CATALOG_ENABLED
5381 if (((state == XML_PARSER_MISC) ||
5382 (state == XML_PARSER_START)) &&
5383 (xmlStrEqual(target, XML_CATALOG_PI))) {
5384 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5385 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5386 (allow == XML_CATA_ALLOW_ALL))
5387 xmlParseCatalogPI(ctxt, buf);
5395 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5396 (ctxt->sax->processingInstruction != NULL))
5397 ctxt->sax->processingInstruction(ctxt->userData,
5402 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5404 if (ctxt->instate != XML_PARSER_EOF)
5405 ctxt->instate = state;
5410 * xmlParseNotationDecl:
5411 * @ctxt: an XML parser context
5413 * parse a notation declaration
5415 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5417 * Hence there is actually 3 choices:
5418 * 'PUBLIC' S PubidLiteral
5419 * 'PUBLIC' S PubidLiteral S SystemLiteral
5420 * and 'SYSTEM' S SystemLiteral
5422 * See the NOTE on xmlParseExternalID().
5426 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5427 const xmlChar *name;
5431 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5432 xmlParserInputPtr input = ctxt->input;
5435 if (!IS_BLANK_CH(CUR)) {
5436 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5437 "Space required after '<!NOTATION'\n");
5442 name = xmlParseName(ctxt);
5444 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5447 if (!IS_BLANK_CH(CUR)) {
5448 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5449 "Space required after the NOTATION name'\n");
5452 if (xmlStrchr(name, ':') != NULL) {
5453 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5454 "colons are forbidden from notation names '%s'\n",
5462 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5466 if (input != ctxt->input) {
5467 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5468 "Notation declaration doesn't start and stop in the same entity\n");
5471 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5472 (ctxt->sax->notationDecl != NULL))
5473 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5475 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5477 if (Systemid != NULL) xmlFree(Systemid);
5478 if (Pubid != NULL) xmlFree(Pubid);
5483 * xmlParseEntityDecl:
5484 * @ctxt: an XML parser context
5486 * parse <!ENTITY declarations
5488 * [70] EntityDecl ::= GEDecl | PEDecl
5490 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5492 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5494 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5496 * [74] PEDef ::= EntityValue | ExternalID
5498 * [76] NDataDecl ::= S 'NDATA' S Name
5500 * [ VC: Notation Declared ]
5501 * The Name must match the declared name of a notation.
5505 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5506 const xmlChar *name = NULL;
5507 xmlChar *value = NULL;
5508 xmlChar *URI = NULL, *literal = NULL;
5509 const xmlChar *ndata = NULL;
5510 int isParameter = 0;
5511 xmlChar *orig = NULL;
5514 /* GROW; done in the caller */
5515 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5516 xmlParserInputPtr input = ctxt->input;
5519 skipped = SKIP_BLANKS;
5521 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5522 "Space required after '<!ENTITY'\n");
5527 skipped = SKIP_BLANKS;
5529 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5530 "Space required after '%%'\n");
5535 name = xmlParseName(ctxt);
5537 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5538 "xmlParseEntityDecl: no name\n");
5541 if (xmlStrchr(name, ':') != NULL) {
5542 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5543 "colons are forbidden from entities names '%s'\n",
5546 skipped = SKIP_BLANKS;
5548 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5549 "Space required after the entity name\n");
5552 ctxt->instate = XML_PARSER_ENTITY_DECL;
5554 * handle the various case of definitions...
5557 if ((RAW == '"') || (RAW == '\'')) {
5558 value = xmlParseEntityValue(ctxt, &orig);
5560 if ((ctxt->sax != NULL) &&
5561 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5562 ctxt->sax->entityDecl(ctxt->userData, name,
5563 XML_INTERNAL_PARAMETER_ENTITY,
5567 URI = xmlParseExternalID(ctxt, &literal, 1);
5568 if ((URI == NULL) && (literal == NULL)) {
5569 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5574 uri = xmlParseURI((const char *) URI);
5576 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5577 "Invalid URI: %s\n", URI);
5579 * This really ought to be a well formedness error
5580 * but the XML Core WG decided otherwise c.f. issue
5581 * E26 of the XML erratas.
5584 if (uri->fragment != NULL) {
5586 * Okay this is foolish to block those but not
5589 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5591 if ((ctxt->sax != NULL) &&
5592 (!ctxt->disableSAX) &&
5593 (ctxt->sax->entityDecl != NULL))
5594 ctxt->sax->entityDecl(ctxt->userData, name,
5595 XML_EXTERNAL_PARAMETER_ENTITY,
5596 literal, URI, NULL);
5603 if ((RAW == '"') || (RAW == '\'')) {
5604 value = xmlParseEntityValue(ctxt, &orig);
5605 if ((ctxt->sax != NULL) &&
5606 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5607 ctxt->sax->entityDecl(ctxt->userData, name,
5608 XML_INTERNAL_GENERAL_ENTITY,
5611 * For expat compatibility in SAX mode.
5613 if ((ctxt->myDoc == NULL) ||
5614 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5615 if (ctxt->myDoc == NULL) {
5616 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5617 if (ctxt->myDoc == NULL) {
5618 xmlErrMemory(ctxt, "New Doc failed");
5621 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5623 if (ctxt->myDoc->intSubset == NULL)
5624 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5625 BAD_CAST "fake", NULL, NULL);
5627 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5631 URI = xmlParseExternalID(ctxt, &literal, 1);
5632 if ((URI == NULL) && (literal == NULL)) {
5633 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5638 uri = xmlParseURI((const char *)URI);
5640 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5641 "Invalid URI: %s\n", URI);
5643 * This really ought to be a well formedness error
5644 * but the XML Core WG decided otherwise c.f. issue
5645 * E26 of the XML erratas.
5648 if (uri->fragment != NULL) {
5650 * Okay this is foolish to block those but not
5653 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5658 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5659 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5660 "Space required before 'NDATA'\n");
5663 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5665 if (!IS_BLANK_CH(CUR)) {
5666 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5667 "Space required after 'NDATA'\n");
5670 ndata = xmlParseName(ctxt);
5671 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5672 (ctxt->sax->unparsedEntityDecl != NULL))
5673 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5674 literal, URI, ndata);
5676 if ((ctxt->sax != NULL) &&
5677 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5678 ctxt->sax->entityDecl(ctxt->userData, name,
5679 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5680 literal, URI, NULL);
5682 * For expat compatibility in SAX mode.
5683 * assuming the entity repalcement was asked for
5685 if ((ctxt->replaceEntities != 0) &&
5686 ((ctxt->myDoc == NULL) ||
5687 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5688 if (ctxt->myDoc == NULL) {
5689 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5690 if (ctxt->myDoc == NULL) {
5691 xmlErrMemory(ctxt, "New Doc failed");
5694 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5697 if (ctxt->myDoc->intSubset == NULL)
5698 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5699 BAD_CAST "fake", NULL, NULL);
5700 xmlSAX2EntityDecl(ctxt, name,
5701 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5702 literal, URI, NULL);
5707 if (ctxt->instate == XML_PARSER_EOF)
5711 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5712 "xmlParseEntityDecl: entity %s not terminated\n", name);
5713 xmlHaltParser(ctxt);
5715 if (input != ctxt->input) {
5716 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5717 "Entity declaration doesn't start and stop in the same entity\n");
5723 * Ugly mechanism to save the raw entity value.
5725 xmlEntityPtr cur = NULL;
5728 if ((ctxt->sax != NULL) &&
5729 (ctxt->sax->getParameterEntity != NULL))
5730 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5732 if ((ctxt->sax != NULL) &&
5733 (ctxt->sax->getEntity != NULL))
5734 cur = ctxt->sax->getEntity(ctxt->userData, name);
5735 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5736 cur = xmlSAX2GetEntity(ctxt, name);
5740 if (cur->orig != NULL)
5747 if (value != NULL) xmlFree(value);
5748 if (URI != NULL) xmlFree(URI);
5749 if (literal != NULL) xmlFree(literal);
5754 * xmlParseDefaultDecl:
5755 * @ctxt: an XML parser context
5756 * @value: Receive a possible fixed default value for the attribute
5758 * Parse an attribute default declaration
5760 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5762 * [ VC: Required Attribute ]
5763 * if the default declaration is the keyword #REQUIRED, then the
5764 * attribute must be specified for all elements of the type in the
5765 * attribute-list declaration.
5767 * [ VC: Attribute Default Legal ]
5768 * The declared default value must meet the lexical constraints of
5769 * the declared attribute type c.f. xmlValidateAttributeDecl()
5771 * [ VC: Fixed Attribute Default ]
5772 * if an attribute has a default value declared with the #FIXED
5773 * keyword, instances of that attribute must match the default value.
5775 * [ WFC: No < in Attribute Values ]
5776 * handled in xmlParseAttValue()
5778 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5779 * or XML_ATTRIBUTE_FIXED.
5783 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5788 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5790 return(XML_ATTRIBUTE_REQUIRED);
5792 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5794 return(XML_ATTRIBUTE_IMPLIED);
5796 val = XML_ATTRIBUTE_NONE;
5797 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5799 val = XML_ATTRIBUTE_FIXED;
5800 if (!IS_BLANK_CH(CUR)) {
5801 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5802 "Space required after '#FIXED'\n");
5806 ret = xmlParseAttValue(ctxt);
5807 ctxt->instate = XML_PARSER_DTD;
5809 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5810 "Attribute default value declaration error\n");
5817 * xmlParseNotationType:
5818 * @ctxt: an XML parser context
5820 * parse an Notation attribute type.
5822 * Note: the leading 'NOTATION' S part has already being parsed...
5824 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5826 * [ VC: Notation Attributes ]
5827 * Values of this type must match one of the notation names included
5828 * in the declaration; all notation names in the declaration must be declared.
5830 * Returns: the notation attribute tree built while parsing
5834 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5835 const xmlChar *name;
5836 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5839 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5846 name = xmlParseName(ctxt);
5848 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5849 "Name expected in NOTATION declaration\n");
5850 xmlFreeEnumeration(ret);
5854 while (tmp != NULL) {
5855 if (xmlStrEqual(name, tmp->name)) {
5856 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5857 "standalone: attribute notation value token %s duplicated\n",
5859 if (!xmlDictOwns(ctxt->dict, name))
5860 xmlFree((xmlChar *) name);
5866 cur = xmlCreateEnumeration(name);
5868 xmlFreeEnumeration(ret);
5871 if (last == NULL) ret = last = cur;
5878 } while (RAW == '|');
5880 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5881 xmlFreeEnumeration(ret);
5889 * xmlParseEnumerationType:
5890 * @ctxt: an XML parser context
5892 * parse an Enumeration attribute type.
5894 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5896 * [ VC: Enumeration ]
5897 * Values of this type must match one of the Nmtoken tokens in
5900 * Returns: the enumeration attribute tree built while parsing
5904 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5906 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5909 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5916 name = xmlParseNmtoken(ctxt);
5918 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5922 while (tmp != NULL) {
5923 if (xmlStrEqual(name, tmp->name)) {
5924 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5925 "standalone: attribute enumeration value token %s duplicated\n",
5927 if (!xmlDictOwns(ctxt->dict, name))
5934 cur = xmlCreateEnumeration(name);
5935 if (!xmlDictOwns(ctxt->dict, name))
5938 xmlFreeEnumeration(ret);
5941 if (last == NULL) ret = last = cur;
5948 } while (RAW == '|');
5950 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5958 * xmlParseEnumeratedType:
5959 * @ctxt: an XML parser context
5960 * @tree: the enumeration tree built while parsing
5962 * parse an Enumerated attribute type.
5964 * [57] EnumeratedType ::= NotationType | Enumeration
5966 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5969 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5973 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5974 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5976 if (!IS_BLANK_CH(CUR)) {
5977 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5978 "Space required after 'NOTATION'\n");
5982 *tree = xmlParseNotationType(ctxt);
5983 if (*tree == NULL) return(0);
5984 return(XML_ATTRIBUTE_NOTATION);
5986 *tree = xmlParseEnumerationType(ctxt);
5987 if (*tree == NULL) return(0);
5988 return(XML_ATTRIBUTE_ENUMERATION);
5992 * xmlParseAttributeType:
5993 * @ctxt: an XML parser context
5994 * @tree: the enumeration tree built while parsing
5996 * parse the Attribute list def for an element
5998 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6000 * [55] StringType ::= 'CDATA'
6002 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6003 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6005 * Validity constraints for attribute values syntax are checked in
6006 * xmlValidateAttributeValue()
6009 * Values of type ID must match the Name production. A name must not
6010 * appear more than once in an XML document as a value of this type;
6011 * i.e., ID values must uniquely identify the elements which bear them.
6013 * [ VC: One ID per Element Type ]
6014 * No element type may have more than one ID attribute specified.
6016 * [ VC: ID Attribute Default ]
6017 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6020 * Values of type IDREF must match the Name production, and values
6021 * of type IDREFS must match Names; each IDREF Name must match the value
6022 * of an ID attribute on some element in the XML document; i.e. IDREF
6023 * values must match the value of some ID attribute.
6025 * [ VC: Entity Name ]
6026 * Values of type ENTITY must match the Name production, values
6027 * of type ENTITIES must match Names; each Entity Name must match the
6028 * name of an unparsed entity declared in the DTD.
6030 * [ VC: Name Token ]
6031 * Values of type NMTOKEN must match the Nmtoken production; values
6032 * of type NMTOKENS must match Nmtokens.
6034 * Returns the attribute type
6037 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6039 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6041 return(XML_ATTRIBUTE_CDATA);
6042 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6044 return(XML_ATTRIBUTE_IDREFS);
6045 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6047 return(XML_ATTRIBUTE_IDREF);
6048 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6050 return(XML_ATTRIBUTE_ID);
6051 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6053 return(XML_ATTRIBUTE_ENTITY);
6054 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6056 return(XML_ATTRIBUTE_ENTITIES);
6057 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6059 return(XML_ATTRIBUTE_NMTOKENS);
6060 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6062 return(XML_ATTRIBUTE_NMTOKEN);
6064 return(xmlParseEnumeratedType(ctxt, tree));
6068 * xmlParseAttributeListDecl:
6069 * @ctxt: an XML parser context
6071 * : parse the Attribute list def for an element
6073 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6075 * [53] AttDef ::= S Name S AttType S DefaultDecl
6079 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6080 const xmlChar *elemName;
6081 const xmlChar *attrName;
6082 xmlEnumerationPtr tree;
6084 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6085 xmlParserInputPtr input = ctxt->input;
6088 if (!IS_BLANK_CH(CUR)) {
6089 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6090 "Space required after '<!ATTLIST'\n");
6093 elemName = xmlParseName(ctxt);
6094 if (elemName == NULL) {
6095 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6096 "ATTLIST: no name for Element\n");
6101 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6102 const xmlChar *check = CUR_PTR;
6105 xmlChar *defaultValue = NULL;
6109 attrName = xmlParseName(ctxt);
6110 if (attrName == NULL) {
6111 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6112 "ATTLIST: no name for Attribute\n");
6116 if (!IS_BLANK_CH(CUR)) {
6117 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6118 "Space required after the attribute name\n");
6123 type = xmlParseAttributeType(ctxt, &tree);
6129 if (!IS_BLANK_CH(CUR)) {
6130 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6131 "Space required after the attribute type\n");
6133 xmlFreeEnumeration(tree);
6138 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6140 if (defaultValue != NULL)
6141 xmlFree(defaultValue);
6143 xmlFreeEnumeration(tree);
6146 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6147 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6151 if (!IS_BLANK_CH(CUR)) {
6152 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6153 "Space required after the attribute default value\n");
6154 if (defaultValue != NULL)
6155 xmlFree(defaultValue);
6157 xmlFreeEnumeration(tree);
6162 if (check == CUR_PTR) {
6163 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6164 "in xmlParseAttributeListDecl\n");
6165 if (defaultValue != NULL)
6166 xmlFree(defaultValue);
6168 xmlFreeEnumeration(tree);
6171 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6172 (ctxt->sax->attributeDecl != NULL))
6173 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6174 type, def, defaultValue, tree);
6175 else if (tree != NULL)
6176 xmlFreeEnumeration(tree);
6178 if ((ctxt->sax2) && (defaultValue != NULL) &&
6179 (def != XML_ATTRIBUTE_IMPLIED) &&
6180 (def != XML_ATTRIBUTE_REQUIRED)) {
6181 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6184 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6186 if (defaultValue != NULL)
6187 xmlFree(defaultValue);
6191 if (input != ctxt->input) {
6192 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6193 "Attribute list declaration doesn't start and stop in the same entity\n",
6202 * xmlParseElementMixedContentDecl:
6203 * @ctxt: an XML parser context
6204 * @inputchk: the input used for the current entity, needed for boundary checks
6206 * parse the declaration for a Mixed Element content
6207 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6209 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6210 * '(' S? '#PCDATA' S? ')'
6212 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6214 * [ VC: No Duplicate Types ]
6215 * The same name must not appear more than once in a single
6216 * mixed-content declaration.
6218 * returns: the list of the xmlElementContentPtr describing the element choices
6220 xmlElementContentPtr
6221 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6222 xmlElementContentPtr ret = NULL, cur = NULL, n;
6223 const xmlChar *elem = NULL;
6226 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6231 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6232 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6233 "Element content declaration doesn't start and stop in the same entity\n",
6237 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6241 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6246 if ((RAW == '(') || (RAW == '|')) {
6247 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6248 if (ret == NULL) return(NULL);
6250 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6253 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6254 if (ret == NULL) return(NULL);
6260 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6261 if (n == NULL) return(NULL);
6262 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6271 elem = xmlParseName(ctxt);
6273 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6274 "xmlParseElementMixedContentDecl : Name expected\n");
6275 xmlFreeDocElementContent(ctxt->myDoc, cur);
6281 if ((RAW == ')') && (NXT(1) == '*')) {
6283 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6284 XML_ELEMENT_CONTENT_ELEMENT);
6285 if (cur->c2 != NULL)
6286 cur->c2->parent = cur;
6289 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6290 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6291 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6292 "Element content declaration doesn't start and stop in the same entity\n",
6297 xmlFreeDocElementContent(ctxt->myDoc, ret);
6298 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6303 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6309 * xmlParseElementChildrenContentDeclPriv:
6310 * @ctxt: an XML parser context
6311 * @inputchk: the input used for the current entity, needed for boundary checks
6312 * @depth: the level of recursion
6314 * parse the declaration for a Mixed Element content
6315 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6318 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6320 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6322 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6324 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6326 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6327 * TODO Parameter-entity replacement text must be properly nested
6328 * with parenthesized groups. That is to say, if either of the
6329 * opening or closing parentheses in a choice, seq, or Mixed
6330 * construct is contained in the replacement text for a parameter
6331 * entity, both must be contained in the same replacement text. For
6332 * interoperability, if a parameter-entity reference appears in a
6333 * choice, seq, or Mixed construct, its replacement text should not
6334 * be empty, and neither the first nor last non-blank character of
6335 * the replacement text should be a connector (| or ,).
6337 * Returns the tree of xmlElementContentPtr describing the element
6340 static xmlElementContentPtr
6341 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6343 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6344 const xmlChar *elem;
6347 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6349 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6350 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6357 int inputid = ctxt->input->id;
6359 /* Recurse on first child */
6362 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6367 elem = xmlParseName(ctxt);
6369 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6372 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6374 xmlErrMemory(ctxt, NULL);
6379 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6381 } else if (RAW == '*') {
6382 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6384 } else if (RAW == '+') {
6385 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6388 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6394 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6396 * Each loop we parse one separator and one element.
6399 if (type == 0) type = CUR;
6402 * Detect "Name | Name , Name" error
6404 else if (type != CUR) {
6405 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6406 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6408 if ((last != NULL) && (last != ret))
6409 xmlFreeDocElementContent(ctxt->myDoc, last);
6411 xmlFreeDocElementContent(ctxt->myDoc, ret);
6416 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6418 if ((last != NULL) && (last != ret))
6419 xmlFreeDocElementContent(ctxt->myDoc, last);
6420 xmlFreeDocElementContent(ctxt->myDoc, ret);
6438 } else if (RAW == '|') {
6439 if (type == 0) type = CUR;
6442 * Detect "Name , Name | Name" error
6444 else if (type != CUR) {
6445 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6446 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6448 if ((last != NULL) && (last != ret))
6449 xmlFreeDocElementContent(ctxt->myDoc, last);
6451 xmlFreeDocElementContent(ctxt->myDoc, ret);
6456 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6458 if ((last != NULL) && (last != ret))
6459 xmlFreeDocElementContent(ctxt->myDoc, last);
6461 xmlFreeDocElementContent(ctxt->myDoc, ret);
6480 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6481 if ((last != NULL) && (last != ret))
6482 xmlFreeDocElementContent(ctxt->myDoc, last);
6484 xmlFreeDocElementContent(ctxt->myDoc, ret);
6491 int inputid = ctxt->input->id;
6492 /* Recurse on second child */
6495 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6499 elem = xmlParseName(ctxt);
6501 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6503 xmlFreeDocElementContent(ctxt->myDoc, ret);
6506 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6509 xmlFreeDocElementContent(ctxt->myDoc, ret);
6513 last->ocur = XML_ELEMENT_CONTENT_OPT;
6515 } else if (RAW == '*') {
6516 last->ocur = XML_ELEMENT_CONTENT_MULT;
6518 } else if (RAW == '+') {
6519 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6522 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6528 if ((cur != NULL) && (last != NULL)) {
6533 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6534 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6535 "Element content declaration doesn't start and stop in the same entity\n",
6541 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6542 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6543 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6545 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6548 } else if (RAW == '*') {
6550 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6553 * Some normalization:
6554 * (a | b* | c?)* == (a | b | c)*
6556 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6557 if ((cur->c1 != NULL) &&
6558 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6560 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6561 if ((cur->c2 != NULL) &&
6562 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6563 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6564 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6569 } else if (RAW == '+') {
6573 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6574 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6575 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6577 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6579 * Some normalization:
6580 * (a | b*)+ == (a | b)*
6581 * (a | b?)+ == (a | b)*
6583 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6584 if ((cur->c1 != NULL) &&
6585 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6586 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6587 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6590 if ((cur->c2 != NULL) &&
6591 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6592 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6593 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6599 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6607 * xmlParseElementChildrenContentDecl:
6608 * @ctxt: an XML parser context
6609 * @inputchk: the input used for the current entity, needed for boundary checks
6611 * parse the declaration for a Mixed Element content
6612 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6614 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6616 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6618 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6620 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6622 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6623 * TODO Parameter-entity replacement text must be properly nested
6624 * with parenthesized groups. That is to say, if either of the
6625 * opening or closing parentheses in a choice, seq, or Mixed
6626 * construct is contained in the replacement text for a parameter
6627 * entity, both must be contained in the same replacement text. For
6628 * interoperability, if a parameter-entity reference appears in a
6629 * choice, seq, or Mixed construct, its replacement text should not
6630 * be empty, and neither the first nor last non-blank character of
6631 * the replacement text should be a connector (| or ,).
6633 * Returns the tree of xmlElementContentPtr describing the element
6636 xmlElementContentPtr
6637 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6638 /* stub left for API/ABI compat */
6639 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6643 * xmlParseElementContentDecl:
6644 * @ctxt: an XML parser context
6645 * @name: the name of the element being defined.
6646 * @result: the Element Content pointer will be stored here if any
6648 * parse the declaration for an Element content either Mixed or Children,
6649 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6651 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6653 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6657 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6658 xmlElementContentPtr *result) {
6660 xmlElementContentPtr tree = NULL;
6661 int inputid = ctxt->input->id;
6667 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6668 "xmlParseElementContentDecl : %s '(' expected\n", name);
6673 if (ctxt->instate == XML_PARSER_EOF)
6676 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6677 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6678 res = XML_ELEMENT_TYPE_MIXED;
6680 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6681 res = XML_ELEMENT_TYPE_ELEMENT;
6689 * xmlParseElementDecl:
6690 * @ctxt: an XML parser context
6692 * parse an Element declaration.
6694 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6696 * [ VC: Unique Element Type Declaration ]
6697 * No element type may be declared more than once
6699 * Returns the type of the element, or -1 in case of error
6702 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6703 const xmlChar *name;
6705 xmlElementContentPtr content = NULL;
6707 /* GROW; done in the caller */
6708 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6709 xmlParserInputPtr input = ctxt->input;
6712 if (!IS_BLANK_CH(CUR)) {
6713 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6714 "Space required after 'ELEMENT'\n");
6718 name = xmlParseName(ctxt);
6720 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6721 "xmlParseElementDecl: no name for Element\n");
6724 while ((RAW == 0) && (ctxt->inputNr > 1))
6726 if (!IS_BLANK_CH(CUR)) {
6727 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6728 "Space required after the element name\n");
6731 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6734 * Element must always be empty.
6736 ret = XML_ELEMENT_TYPE_EMPTY;
6737 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6741 * Element is a generic container.
6743 ret = XML_ELEMENT_TYPE_ANY;
6744 } else if (RAW == '(') {
6745 ret = xmlParseElementContentDecl(ctxt, name, &content);
6748 * [ WFC: PEs in Internal Subset ] error handling.
6750 if ((RAW == '%') && (ctxt->external == 0) &&
6751 (ctxt->inputNr == 1)) {
6752 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6753 "PEReference: forbidden within markup decl in internal subset\n");
6755 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6756 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6763 * Pop-up of finished entities.
6765 while ((RAW == 0) && (ctxt->inputNr > 1))
6770 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6771 if (content != NULL) {
6772 xmlFreeDocElementContent(ctxt->myDoc, content);
6775 if (input != ctxt->input) {
6776 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6777 "Element declaration doesn't start and stop in the same entity\n");
6781 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6782 (ctxt->sax->elementDecl != NULL)) {
6783 if (content != NULL)
6784 content->parent = NULL;
6785 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6787 if ((content != NULL) && (content->parent == NULL)) {
6789 * this is a trick: if xmlAddElementDecl is called,
6790 * instead of copying the full tree it is plugged directly
6791 * if called from the parser. Avoid duplicating the
6792 * interfaces or change the API/ABI
6794 xmlFreeDocElementContent(ctxt->myDoc, content);
6796 } else if (content != NULL) {
6797 xmlFreeDocElementContent(ctxt->myDoc, content);
6805 * xmlParseConditionalSections
6806 * @ctxt: an XML parser context
6808 * [61] conditionalSect ::= includeSect | ignoreSect
6809 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6810 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6811 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6812 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6816 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6817 int id = ctxt->input->id;
6821 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6825 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6826 xmlHaltParser(ctxt);
6829 if (ctxt->input->id != id) {
6830 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6831 "All markup of the conditional section is not in the same entity\n",
6836 if (xmlParserDebugEntities) {
6837 if ((ctxt->input != NULL) && (ctxt->input->filename))
6838 xmlGenericError(xmlGenericErrorContext,
6839 "%s(%d): ", ctxt->input->filename,
6841 xmlGenericError(xmlGenericErrorContext,
6842 "Entering INCLUDE Conditional Section\n");
6845 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6846 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6847 const xmlChar *check = CUR_PTR;
6848 unsigned int cons = ctxt->input->consumed;
6850 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6851 xmlParseConditionalSections(ctxt);
6852 } else if (IS_BLANK_CH(CUR)) {
6854 } else if (RAW == '%') {
6855 xmlParsePEReference(ctxt);
6857 xmlParseMarkupDecl(ctxt);
6860 * Pop-up of finished entities.
6862 while ((RAW == 0) && (ctxt->inputNr > 1))
6865 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6866 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6867 xmlHaltParser(ctxt);
6871 if (xmlParserDebugEntities) {
6872 if ((ctxt->input != NULL) && (ctxt->input->filename))
6873 xmlGenericError(xmlGenericErrorContext,
6874 "%s(%d): ", ctxt->input->filename,
6876 xmlGenericError(xmlGenericErrorContext,
6877 "Leaving INCLUDE Conditional Section\n");
6880 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6882 xmlParserInputState instate;
6888 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6889 xmlHaltParser(ctxt);
6892 if (ctxt->input->id != id) {
6893 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6894 "All markup of the conditional section is not in the same entity\n",
6899 if (xmlParserDebugEntities) {
6900 if ((ctxt->input != NULL) && (ctxt->input->filename))
6901 xmlGenericError(xmlGenericErrorContext,
6902 "%s(%d): ", ctxt->input->filename,
6904 xmlGenericError(xmlGenericErrorContext,
6905 "Entering IGNORE Conditional Section\n");
6909 * Parse up to the end of the conditional section
6910 * But disable SAX event generating DTD building in the meantime
6912 state = ctxt->disableSAX;
6913 instate = ctxt->instate;
6914 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6915 ctxt->instate = XML_PARSER_IGNORE;
6917 while (((depth >= 0) && (RAW != 0)) &&
6918 (ctxt->instate != XML_PARSER_EOF)) {
6919 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6924 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6925 if (--depth >= 0) SKIP(3);
6932 ctxt->disableSAX = state;
6933 ctxt->instate = instate;
6935 if (xmlParserDebugEntities) {
6936 if ((ctxt->input != NULL) && (ctxt->input->filename))
6937 xmlGenericError(xmlGenericErrorContext,
6938 "%s(%d): ", ctxt->input->filename,
6940 xmlGenericError(xmlGenericErrorContext,
6941 "Leaving IGNORE Conditional Section\n");
6945 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6946 xmlHaltParser(ctxt);
6954 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6956 if (ctxt->input->id != id) {
6957 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6958 "All markup of the conditional section is not in the same entity\n",
6961 if ((ctxt-> instate != XML_PARSER_EOF) &&
6962 ((ctxt->input->cur + 3) <= ctxt->input->end))
6968 * xmlParseMarkupDecl:
6969 * @ctxt: an XML parser context
6971 * parse Markup declarations
6973 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6974 * NotationDecl | PI | Comment
6976 * [ VC: Proper Declaration/PE Nesting ]
6977 * Parameter-entity replacement text must be properly nested with
6978 * markup declarations. That is to say, if either the first character
6979 * or the last character of a markup declaration (markupdecl above) is
6980 * contained in the replacement text for a parameter-entity reference,
6981 * both must be contained in the same replacement text.
6983 * [ WFC: PEs in Internal Subset ]
6984 * In the internal DTD subset, parameter-entity references can occur
6985 * only where markup declarations can occur, not within markup declarations.
6986 * (This does not apply to references that occur in external parameter
6987 * entities or to the external subset.)
6990 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6993 if (NXT(1) == '!') {
6997 xmlParseElementDecl(ctxt);
6998 else if (NXT(3) == 'N')
6999 xmlParseEntityDecl(ctxt);
7002 xmlParseAttributeListDecl(ctxt);
7005 xmlParseNotationDecl(ctxt);
7008 xmlParseComment(ctxt);
7011 /* there is an error but it will be detected later */
7014 } else if (NXT(1) == '?') {
7020 * detect requirement to exit there and act accordingly
7021 * and avoid having instate overriden later on
7023 if (ctxt->instate == XML_PARSER_EOF)
7027 * This is only for internal subset. On external entities,
7028 * the replacement is done before parsing stage
7030 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
7031 xmlParsePEReference(ctxt);
7034 * Conditional sections are allowed from entities included
7035 * by PE References in the internal subset.
7037 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
7038 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7039 xmlParseConditionalSections(ctxt);
7043 ctxt->instate = XML_PARSER_DTD;
7048 * @ctxt: an XML parser context
7050 * parse an XML declaration header for external entities
7052 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7056 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7058 const xmlChar *encoding;
7061 * We know that '<?xml' is here.
7063 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7066 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7070 if (!IS_BLANK_CH(CUR)) {
7071 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7072 "Space needed after '<?xml'\n");
7077 * We may have the VersionInfo here.
7079 version = xmlParseVersionInfo(ctxt);
7080 if (version == NULL)
7081 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7083 if (!IS_BLANK_CH(CUR)) {
7084 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7085 "Space needed here\n");
7088 ctxt->input->version = version;
7091 * We must have the encoding declaration
7093 encoding = xmlParseEncodingDecl(ctxt);
7094 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7096 * The XML REC instructs us to stop parsing right here
7100 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7101 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7102 "Missing encoding in text declaration\n");
7106 if ((RAW == '?') && (NXT(1) == '>')) {
7108 } else if (RAW == '>') {
7109 /* Deprecated old WD ... */
7110 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7113 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7114 MOVETO_ENDTAG(CUR_PTR);
7120 * xmlParseExternalSubset:
7121 * @ctxt: an XML parser context
7122 * @ExternalID: the external identifier
7123 * @SystemID: the system identifier (or URL)
7125 * parse Markup declarations from an external subset
7127 * [30] extSubset ::= textDecl? extSubsetDecl
7129 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7132 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7133 const xmlChar *SystemID) {
7134 xmlDetectSAX2(ctxt);
7137 if ((ctxt->encoding == NULL) &&
7138 (ctxt->input->end - ctxt->input->cur >= 4)) {
7140 xmlCharEncoding enc;
7146 enc = xmlDetectCharEncoding(start, 4);
7147 if (enc != XML_CHAR_ENCODING_NONE)
7148 xmlSwitchEncoding(ctxt, enc);
7151 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7152 xmlParseTextDecl(ctxt);
7153 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7155 * The XML REC instructs us to stop parsing right here
7157 xmlHaltParser(ctxt);
7161 if (ctxt->myDoc == NULL) {
7162 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7163 if (ctxt->myDoc == NULL) {
7164 xmlErrMemory(ctxt, "New Doc failed");
7167 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7169 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7170 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7172 ctxt->instate = XML_PARSER_DTD;
7174 while (((RAW == '<') && (NXT(1) == '?')) ||
7175 ((RAW == '<') && (NXT(1) == '!')) ||
7176 (RAW == '%') || IS_BLANK_CH(CUR)) {
7177 const xmlChar *check = CUR_PTR;
7178 unsigned int cons = ctxt->input->consumed;
7181 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7182 xmlParseConditionalSections(ctxt);
7183 } else if (IS_BLANK_CH(CUR)) {
7185 } else if (RAW == '%') {
7186 xmlParsePEReference(ctxt);
7188 xmlParseMarkupDecl(ctxt);
7191 * Pop-up of finished entities.
7193 while ((RAW == 0) && (ctxt->inputNr > 1))
7196 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7197 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7203 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7209 * xmlParseReference:
7210 * @ctxt: an XML parser context
7212 * parse and handle entity references in content, depending on the SAX
7213 * interface, this may end-up in a call to character() if this is a
7214 * CharRef, a predefined entity, if there is no reference() callback.
7215 * or if the parser was asked to switch to that mode.
7217 * [67] Reference ::= EntityRef | CharRef
7220 xmlParseReference(xmlParserCtxtPtr ctxt) {
7224 xmlNodePtr list = NULL;
7225 xmlParserErrors ret = XML_ERR_OK;
7232 * Simple case of a CharRef
7234 if (NXT(1) == '#') {
7238 int value = xmlParseCharRef(ctxt);
7242 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7244 * So we are using non-UTF-8 buffers
7245 * Check that the char fit on 8bits, if not
7246 * generate a CharRef.
7248 if (value <= 0xFF) {
7251 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7252 (!ctxt->disableSAX))
7253 ctxt->sax->characters(ctxt->userData, out, 1);
7255 if ((hex == 'x') || (hex == 'X'))
7256 snprintf((char *)out, sizeof(out), "#x%X", value);
7258 snprintf((char *)out, sizeof(out), "#%d", value);
7259 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7260 (!ctxt->disableSAX))
7261 ctxt->sax->reference(ctxt->userData, out);
7265 * Just encode the value in UTF-8
7267 COPY_BUF(0 ,out, i, value);
7269 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7270 (!ctxt->disableSAX))
7271 ctxt->sax->characters(ctxt->userData, out, i);
7277 * We are seeing an entity reference
7279 ent = xmlParseEntityRef(ctxt);
7280 if (ent == NULL) return;
7281 if (!ctxt->wellFormed)
7283 was_checked = ent->checked;
7285 /* special case of predefined entities */
7286 if ((ent->name == NULL) ||
7287 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7289 if (val == NULL) return;
7291 * inline the entity.
7293 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7294 (!ctxt->disableSAX))
7295 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7300 * The first reference to the entity trigger a parsing phase
7301 * where the ent->children is filled with the result from
7303 * Note: external parsed entities will not be loaded, it is not
7304 * required for a non-validating parser, unless the parsing option
7305 * of validating, or substituting entities were given. Doing so is
7306 * far more secure as the parser will only process data coming from
7307 * the document entity by default.
7309 if (((ent->checked == 0) ||
7310 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7311 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7312 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7313 unsigned long oldnbent = ctxt->nbentities;
7316 * This is a bit hackish but this seems the best
7317 * way to make sure both SAX and DOM entity support
7321 if (ctxt->userData == ctxt)
7324 user_data = ctxt->userData;
7327 * Check that this entity is well formed
7328 * 4.3.2: An internal general parsed entity is well-formed
7329 * if its replacement text matches the production labeled
7332 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7334 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7338 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7340 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7341 user_data, ctxt->depth, ent->URI,
7342 ent->ExternalID, &list);
7345 ret = XML_ERR_ENTITY_PE_INTERNAL;
7346 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7347 "invalid entity type found\n", NULL);
7351 * Store the number of entities needing parsing for this entity
7352 * content and do checkings
7354 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7355 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7357 if (ret == XML_ERR_ENTITY_LOOP) {
7358 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7359 xmlFreeNodeList(list);
7362 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7363 xmlFreeNodeList(list);
7367 if ((ret == XML_ERR_OK) && (list != NULL)) {
7368 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7369 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7370 (ent->children == NULL)) {
7371 ent->children = list;
7372 if (ctxt->replaceEntities) {
7374 * Prune it directly in the generated document
7375 * except for single text nodes.
7377 if (((list->type == XML_TEXT_NODE) &&
7378 (list->next == NULL)) ||
7379 (ctxt->parseMode == XML_PARSE_READER)) {
7380 list->parent = (xmlNodePtr) ent;
7385 while (list != NULL) {
7386 list->parent = (xmlNodePtr) ctxt->node;
7387 list->doc = ctxt->myDoc;
7388 if (list->next == NULL)
7392 list = ent->children;
7393 #ifdef LIBXML_LEGACY_ENABLED
7394 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7395 xmlAddEntityReference(ent, list, NULL);
7396 #endif /* LIBXML_LEGACY_ENABLED */
7400 while (list != NULL) {
7401 list->parent = (xmlNodePtr) ent;
7402 xmlSetTreeDoc(list, ent->doc);
7403 if (list->next == NULL)
7409 xmlFreeNodeList(list);
7412 } else if ((ret != XML_ERR_OK) &&
7413 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7414 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7415 "Entity '%s' failed to parse\n", ent->name);
7416 xmlParserEntityCheck(ctxt, 0, ent, 0);
7417 } else if (list != NULL) {
7418 xmlFreeNodeList(list);
7421 if (ent->checked == 0)
7423 } else if (ent->checked != 1) {
7424 ctxt->nbentities += ent->checked / 2;
7428 * Now that the entity content has been gathered
7429 * provide it to the application, this can take different forms based
7430 * on the parsing modes.
7432 if (ent->children == NULL) {
7434 * Probably running in SAX mode and the callbacks don't
7435 * build the entity content. So unless we already went
7436 * though parsing for first checking go though the entity
7437 * content to generate callbacks associated to the entity
7439 if (was_checked != 0) {
7442 * This is a bit hackish but this seems the best
7443 * way to make sure both SAX and DOM entity support
7446 if (ctxt->userData == ctxt)
7449 user_data = ctxt->userData;
7451 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7453 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7454 ent->content, user_data, NULL);
7456 } else if (ent->etype ==
7457 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7459 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7460 ctxt->sax, user_data, ctxt->depth,
7461 ent->URI, ent->ExternalID, NULL);
7464 ret = XML_ERR_ENTITY_PE_INTERNAL;
7465 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7466 "invalid entity type found\n", NULL);
7468 if (ret == XML_ERR_ENTITY_LOOP) {
7469 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7473 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7474 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7476 * Entity reference callback comes second, it's somewhat
7477 * superfluous but a compatibility to historical behaviour
7479 ctxt->sax->reference(ctxt->userData, ent->name);
7485 * If we didn't get any children for the entity being built
7487 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7488 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7492 ctxt->sax->reference(ctxt->userData, ent->name);
7496 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7498 * There is a problem on the handling of _private for entities
7499 * (bug 155816): Should we copy the content of the field from
7500 * the entity (possibly overwriting some value set by the user
7501 * when a copy is created), should we leave it alone, or should
7502 * we try to take care of different situations? The problem
7503 * is exacerbated by the usage of this field by the xmlReader.
7504 * To fix this bug, we look at _private on the created node
7505 * and, if it's NULL, we copy in whatever was in the entity.
7506 * If it's not NULL we leave it alone. This is somewhat of a
7507 * hack - maybe we should have further tests to determine
7510 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7512 * Seems we are generating the DOM content, do
7513 * a simple tree copy for all references except the first
7514 * In the first occurrence list contains the replacement.
7516 if (((list == NULL) && (ent->owner == 0)) ||
7517 (ctxt->parseMode == XML_PARSE_READER)) {
7518 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7521 * We are copying here, make sure there is no abuse
7523 ctxt->sizeentcopy += ent->length + 5;
7524 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7528 * when operating on a reader, the entities definitions
7529 * are always owning the entities subtree.
7530 if (ctxt->parseMode == XML_PARSE_READER)
7534 cur = ent->children;
7535 while (cur != NULL) {
7536 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7538 if (nw->_private == NULL)
7539 nw->_private = cur->_private;
7540 if (firstChild == NULL){
7543 nw = xmlAddChild(ctxt->node, nw);
7545 if (cur == ent->last) {
7547 * needed to detect some strange empty
7548 * node cases in the reader tests
7550 if ((ctxt->parseMode == XML_PARSE_READER) &&
7552 (nw->type == XML_ELEMENT_NODE) &&
7553 (nw->children == NULL))
7560 #ifdef LIBXML_LEGACY_ENABLED
7561 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7562 xmlAddEntityReference(ent, firstChild, nw);
7563 #endif /* LIBXML_LEGACY_ENABLED */
7564 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7565 xmlNodePtr nw = NULL, cur, next, last,
7569 * We are copying here, make sure there is no abuse
7571 ctxt->sizeentcopy += ent->length + 5;
7572 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7576 * Copy the entity child list and make it the new
7577 * entity child list. The goal is to make sure any
7578 * ID or REF referenced will be the one from the
7579 * document content and not the entity copy.
7581 cur = ent->children;
7582 ent->children = NULL;
7585 while (cur != NULL) {
7589 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7591 if (nw->_private == NULL)
7592 nw->_private = cur->_private;
7593 if (firstChild == NULL){
7596 xmlAddChild((xmlNodePtr) ent, nw);
7597 xmlAddChild(ctxt->node, cur);
7603 if (ent->owner == 0)
7605 #ifdef LIBXML_LEGACY_ENABLED
7606 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7607 xmlAddEntityReference(ent, firstChild, nw);
7608 #endif /* LIBXML_LEGACY_ENABLED */
7610 const xmlChar *nbktext;
7613 * the name change is to avoid coalescing of the
7614 * node with a possible previous text one which
7615 * would make ent->children a dangling pointer
7617 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7619 if (ent->children->type == XML_TEXT_NODE)
7620 ent->children->name = nbktext;
7621 if ((ent->last != ent->children) &&
7622 (ent->last->type == XML_TEXT_NODE))
7623 ent->last->name = nbktext;
7624 xmlAddChildList(ctxt->node, ent->children);
7628 * This is to avoid a nasty side effect, see
7629 * characters() in SAX.c
7639 * xmlParseEntityRef:
7640 * @ctxt: an XML parser context
7642 * parse ENTITY references declarations
7644 * [68] EntityRef ::= '&' Name ';'
7646 * [ WFC: Entity Declared ]
7647 * In a document without any DTD, a document with only an internal DTD
7648 * subset which contains no parameter entity references, or a document
7649 * with "standalone='yes'", the Name given in the entity reference
7650 * must match that in an entity declaration, except that well-formed
7651 * documents need not declare any of the following entities: amp, lt,
7652 * gt, apos, quot. The declaration of a parameter entity must precede
7653 * any reference to it. Similarly, the declaration of a general entity
7654 * must precede any reference to it which appears in a default value in an
7655 * attribute-list declaration. Note that if entities are declared in the
7656 * external subset or in external parameter entities, a non-validating
7657 * processor is not obligated to read and process their declarations;
7658 * for such documents, the rule that an entity must be declared is a
7659 * well-formedness constraint only if standalone='yes'.
7661 * [ WFC: Parsed Entity ]
7662 * An entity reference must not contain the name of an unparsed entity
7664 * Returns the xmlEntityPtr if found, or NULL otherwise.
7667 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7668 const xmlChar *name;
7669 xmlEntityPtr ent = NULL;
7672 if (ctxt->instate == XML_PARSER_EOF)
7678 name = xmlParseName(ctxt);
7680 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7681 "xmlParseEntityRef: no name\n");
7685 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7691 * Predefined entities override any extra definition
7693 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7694 ent = xmlGetPredefinedEntity(name);
7700 * Increase the number of entity references parsed
7705 * Ask first SAX for entity resolution, otherwise try the
7706 * entities which may have stored in the parser context.
7708 if (ctxt->sax != NULL) {
7709 if (ctxt->sax->getEntity != NULL)
7710 ent = ctxt->sax->getEntity(ctxt->userData, name);
7711 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7712 (ctxt->options & XML_PARSE_OLDSAX))
7713 ent = xmlGetPredefinedEntity(name);
7714 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7715 (ctxt->userData==ctxt)) {
7716 ent = xmlSAX2GetEntity(ctxt, name);
7719 if (ctxt->instate == XML_PARSER_EOF)
7722 * [ WFC: Entity Declared ]
7723 * In a document without any DTD, a document with only an
7724 * internal DTD subset which contains no parameter entity
7725 * references, or a document with "standalone='yes'", the
7726 * Name given in the entity reference must match that in an
7727 * entity declaration, except that well-formed documents
7728 * need not declare any of the following entities: amp, lt,
7730 * The declaration of a parameter entity must precede any
7732 * Similarly, the declaration of a general entity must
7733 * precede any reference to it which appears in a default
7734 * value in an attribute-list declaration. Note that if
7735 * entities are declared in the external subset or in
7736 * external parameter entities, a non-validating processor
7737 * is not obligated to read and process their declarations;
7738 * for such documents, the rule that an entity must be
7739 * declared is a well-formedness constraint only if
7743 if ((ctxt->standalone == 1) ||
7744 ((ctxt->hasExternalSubset == 0) &&
7745 (ctxt->hasPErefs == 0))) {
7746 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7747 "Entity '%s' not defined\n", name);
7749 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7750 "Entity '%s' not defined\n", name);
7751 if ((ctxt->inSubset == 0) &&
7752 (ctxt->sax != NULL) &&
7753 (ctxt->sax->reference != NULL)) {
7754 ctxt->sax->reference(ctxt->userData, name);
7757 xmlParserEntityCheck(ctxt, 0, ent, 0);
7762 * [ WFC: Parsed Entity ]
7763 * An entity reference must not contain the name of an
7766 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7767 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7768 "Entity reference to unparsed entity %s\n", name);
7772 * [ WFC: No External Entity References ]
7773 * Attribute values cannot contain direct or indirect
7774 * entity references to external entities.
7776 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7777 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7778 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7779 "Attribute references external entity '%s'\n", name);
7782 * [ WFC: No < in Attribute Values ]
7783 * The replacement text of any entity referred to directly or
7784 * indirectly in an attribute value (other than "<") must
7787 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7789 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7790 if (((ent->checked & 1) || (ent->checked == 0)) &&
7791 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7792 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7793 "'<' in entity '%s' is not allowed in attributes values\n", name);
7798 * Internal check, no parameter entities here ...
7801 switch (ent->etype) {
7802 case XML_INTERNAL_PARAMETER_ENTITY:
7803 case XML_EXTERNAL_PARAMETER_ENTITY:
7804 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7805 "Attempt to reference the parameter entity '%s'\n",
7814 * [ WFC: No Recursion ]
7815 * A parsed entity must not contain a recursive reference
7816 * to itself, either directly or indirectly.
7817 * Done somewhere else
7823 * xmlParseStringEntityRef:
7824 * @ctxt: an XML parser context
7825 * @str: a pointer to an index in the string
7827 * parse ENTITY references declarations, but this version parses it from
7830 * [68] EntityRef ::= '&' Name ';'
7832 * [ WFC: Entity Declared ]
7833 * In a document without any DTD, a document with only an internal DTD
7834 * subset which contains no parameter entity references, or a document
7835 * with "standalone='yes'", the Name given in the entity reference
7836 * must match that in an entity declaration, except that well-formed
7837 * documents need not declare any of the following entities: amp, lt,
7838 * gt, apos, quot. The declaration of a parameter entity must precede
7839 * any reference to it. Similarly, the declaration of a general entity
7840 * must precede any reference to it which appears in a default value in an
7841 * attribute-list declaration. Note that if entities are declared in the
7842 * external subset or in external parameter entities, a non-validating
7843 * processor is not obligated to read and process their declarations;
7844 * for such documents, the rule that an entity must be declared is a
7845 * well-formedness constraint only if standalone='yes'.
7847 * [ WFC: Parsed Entity ]
7848 * An entity reference must not contain the name of an unparsed entity
7850 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7851 * is updated to the current location in the string.
7854 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7858 xmlEntityPtr ent = NULL;
7860 if ((str == NULL) || (*str == NULL))
7868 name = xmlParseStringName(ctxt, &ptr);
7870 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7871 "xmlParseStringEntityRef: no name\n");
7876 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7885 * Predefined entities override any extra definition
7887 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7888 ent = xmlGetPredefinedEntity(name);
7897 * Increate the number of entity references parsed
7902 * Ask first SAX for entity resolution, otherwise try the
7903 * entities which may have stored in the parser context.
7905 if (ctxt->sax != NULL) {
7906 if (ctxt->sax->getEntity != NULL)
7907 ent = ctxt->sax->getEntity(ctxt->userData, name);
7908 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7909 ent = xmlGetPredefinedEntity(name);
7910 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7911 ent = xmlSAX2GetEntity(ctxt, name);
7914 if (ctxt->instate == XML_PARSER_EOF) {
7920 * [ WFC: Entity Declared ]
7921 * In a document without any DTD, a document with only an
7922 * internal DTD subset which contains no parameter entity
7923 * references, or a document with "standalone='yes'", the
7924 * Name given in the entity reference must match that in an
7925 * entity declaration, except that well-formed documents
7926 * need not declare any of the following entities: amp, lt,
7928 * The declaration of a parameter entity must precede any
7930 * Similarly, the declaration of a general entity must
7931 * precede any reference to it which appears in a default
7932 * value in an attribute-list declaration. Note that if
7933 * entities are declared in the external subset or in
7934 * external parameter entities, a non-validating processor
7935 * is not obligated to read and process their declarations;
7936 * for such documents, the rule that an entity must be
7937 * declared is a well-formedness constraint only if
7941 if ((ctxt->standalone == 1) ||
7942 ((ctxt->hasExternalSubset == 0) &&
7943 (ctxt->hasPErefs == 0))) {
7944 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7945 "Entity '%s' not defined\n", name);
7947 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7948 "Entity '%s' not defined\n",
7951 xmlParserEntityCheck(ctxt, 0, ent, 0);
7952 /* TODO ? check regressions ctxt->valid = 0; */
7956 * [ WFC: Parsed Entity ]
7957 * An entity reference must not contain the name of an
7960 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7961 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7962 "Entity reference to unparsed entity %s\n", name);
7966 * [ WFC: No External Entity References ]
7967 * Attribute values cannot contain direct or indirect
7968 * entity references to external entities.
7970 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7971 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7972 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7973 "Attribute references external entity '%s'\n", name);
7976 * [ WFC: No < in Attribute Values ]
7977 * The replacement text of any entity referred to directly or
7978 * indirectly in an attribute value (other than "<") must
7981 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7982 (ent != NULL) && (ent->content != NULL) &&
7983 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7984 (xmlStrchr(ent->content, '<'))) {
7985 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7986 "'<' in entity '%s' is not allowed in attributes values\n",
7991 * Internal check, no parameter entities here ...
7994 switch (ent->etype) {
7995 case XML_INTERNAL_PARAMETER_ENTITY:
7996 case XML_EXTERNAL_PARAMETER_ENTITY:
7997 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7998 "Attempt to reference the parameter entity '%s'\n",
8007 * [ WFC: No Recursion ]
8008 * A parsed entity must not contain a recursive reference
8009 * to itself, either directly or indirectly.
8010 * Done somewhere else
8019 * xmlParsePEReference:
8020 * @ctxt: an XML parser context
8022 * parse PEReference declarations
8023 * The entity content is handled directly by pushing it's content as
8024 * a new input stream.
8026 * [69] PEReference ::= '%' Name ';'
8028 * [ WFC: No Recursion ]
8029 * A parsed entity must not contain a recursive
8030 * reference to itself, either directly or indirectly.
8032 * [ WFC: Entity Declared ]
8033 * In a document without any DTD, a document with only an internal DTD
8034 * subset which contains no parameter entity references, or a document
8035 * with "standalone='yes'", ... ... The declaration of a parameter
8036 * entity must precede any reference to it...
8038 * [ VC: Entity Declared ]
8039 * In a document with an external subset or external parameter entities
8040 * with "standalone='no'", ... ... The declaration of a parameter entity
8041 * must precede any reference to it...
8044 * Parameter-entity references may only appear in the DTD.
8045 * NOTE: misleading but this is handled.
8048 xmlParsePEReference(xmlParserCtxtPtr ctxt)
8050 const xmlChar *name;
8051 xmlEntityPtr entity = NULL;
8052 xmlParserInputPtr input;
8057 name = xmlParseName(ctxt);
8059 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8060 "xmlParsePEReference: no name\n");
8064 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8071 * Increate the number of entity references parsed
8076 * Request the entity from SAX
8078 if ((ctxt->sax != NULL) &&
8079 (ctxt->sax->getParameterEntity != NULL))
8080 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8081 if (ctxt->instate == XML_PARSER_EOF)
8083 if (entity == NULL) {
8085 * [ WFC: Entity Declared ]
8086 * In a document without any DTD, a document with only an
8087 * internal DTD subset which contains no parameter entity
8088 * references, or a document with "standalone='yes'", ...
8089 * ... The declaration of a parameter entity must precede
8090 * any reference to it...
8092 if ((ctxt->standalone == 1) ||
8093 ((ctxt->hasExternalSubset == 0) &&
8094 (ctxt->hasPErefs == 0))) {
8095 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8096 "PEReference: %%%s; not found\n",
8100 * [ VC: Entity Declared ]
8101 * In a document with an external subset or external
8102 * parameter entities with "standalone='no'", ...
8103 * ... The declaration of a parameter entity must
8104 * precede any reference to it...
8106 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8107 "PEReference: %%%s; not found\n",
8111 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8114 * Internal checking in case the entity quest barfed
8116 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8117 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8118 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8119 "Internal: %%%s; is not a parameter entity\n",
8121 } else if (ctxt->input->free != deallocblankswrapper) {
8122 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8123 if (xmlPushInput(ctxt, input) < 0)
8128 * handle the extra spaces added before and after
8129 * c.f. http://www.w3.org/TR/REC-xml#as-PE
8131 input = xmlNewEntityInputStream(ctxt, entity);
8132 if (xmlPushInput(ctxt, input) < 0)
8134 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8135 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8136 (IS_BLANK_CH(NXT(5)))) {
8137 xmlParseTextDecl(ctxt);
8139 XML_ERR_UNSUPPORTED_ENCODING) {
8141 * The XML REC instructs us to stop parsing
8144 xmlHaltParser(ctxt);
8150 ctxt->hasPErefs = 1;
8154 * xmlLoadEntityContent:
8155 * @ctxt: an XML parser context
8156 * @entity: an unloaded system entity
8158 * Load the original content of the given system entity from the
8159 * ExternalID/SystemID given. This is to be used for Included in Literal
8160 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8162 * Returns 0 in case of success and -1 in case of failure
8165 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8166 xmlParserInputPtr input;
8171 if ((ctxt == NULL) || (entity == NULL) ||
8172 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8173 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8174 (entity->content != NULL)) {
8175 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8176 "xmlLoadEntityContent parameter error");
8180 if (xmlParserDebugEntities)
8181 xmlGenericError(xmlGenericErrorContext,
8182 "Reading %s entity content input\n", entity->name);
8184 buf = xmlBufferCreate();
8186 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8187 "xmlLoadEntityContent parameter error");
8191 input = xmlNewEntityInputStream(ctxt, entity);
8192 if (input == NULL) {
8193 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8194 "xmlLoadEntityContent input error");
8200 * Push the entity as the current input, read char by char
8201 * saving to the buffer until the end of the entity or an error
8203 if (xmlPushInput(ctxt, input) < 0) {
8210 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8212 xmlBufferAdd(buf, ctxt->input->cur, l);
8213 if (count++ > XML_PARSER_CHUNK_SIZE) {
8216 if (ctxt->instate == XML_PARSER_EOF) {
8226 if (ctxt->instate == XML_PARSER_EOF) {
8234 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8236 } else if (!IS_CHAR(c)) {
8237 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8238 "xmlLoadEntityContent: invalid char value %d\n",
8243 entity->content = buf->content;
8244 buf->content = NULL;
8251 * xmlParseStringPEReference:
8252 * @ctxt: an XML parser context
8253 * @str: a pointer to an index in the string
8255 * parse PEReference declarations
8257 * [69] PEReference ::= '%' Name ';'
8259 * [ WFC: No Recursion ]
8260 * A parsed entity must not contain a recursive
8261 * reference to itself, either directly or indirectly.
8263 * [ WFC: Entity Declared ]
8264 * In a document without any DTD, a document with only an internal DTD
8265 * subset which contains no parameter entity references, or a document
8266 * with "standalone='yes'", ... ... The declaration of a parameter
8267 * entity must precede any reference to it...
8269 * [ VC: Entity Declared ]
8270 * In a document with an external subset or external parameter entities
8271 * with "standalone='no'", ... ... The declaration of a parameter entity
8272 * must precede any reference to it...
8275 * Parameter-entity references may only appear in the DTD.
8276 * NOTE: misleading but this is handled.
8278 * Returns the string of the entity content.
8279 * str is updated to the current value of the index
8282 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8286 xmlEntityPtr entity = NULL;
8288 if ((str == NULL) || (*str == NULL)) return(NULL);
8294 name = xmlParseStringName(ctxt, &ptr);
8296 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8297 "xmlParseStringPEReference: no name\n");
8303 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8311 * Increate the number of entity references parsed
8316 * Request the entity from SAX
8318 if ((ctxt->sax != NULL) &&
8319 (ctxt->sax->getParameterEntity != NULL))
8320 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8321 if (ctxt->instate == XML_PARSER_EOF) {
8325 if (entity == NULL) {
8327 * [ WFC: Entity Declared ]
8328 * In a document without any DTD, a document with only an
8329 * internal DTD subset which contains no parameter entity
8330 * references, or a document with "standalone='yes'", ...
8331 * ... The declaration of a parameter entity must precede
8332 * any reference to it...
8334 if ((ctxt->standalone == 1) ||
8335 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8336 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8337 "PEReference: %%%s; not found\n", name);
8340 * [ VC: Entity Declared ]
8341 * In a document with an external subset or external
8342 * parameter entities with "standalone='no'", ...
8343 * ... The declaration of a parameter entity must
8344 * precede any reference to it...
8346 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8347 "PEReference: %%%s; not found\n",
8351 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8354 * Internal checking in case the entity quest barfed
8356 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8357 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8358 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8359 "%%%s; is not a parameter entity\n",
8363 ctxt->hasPErefs = 1;
8370 * xmlParseDocTypeDecl:
8371 * @ctxt: an XML parser context
8373 * parse a DOCTYPE declaration
8375 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8376 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8378 * [ VC: Root Element Type ]
8379 * The Name in the document type declaration must match the element
8380 * type of the root element.
8384 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8385 const xmlChar *name = NULL;
8386 xmlChar *ExternalID = NULL;
8387 xmlChar *URI = NULL;
8390 * We know that '<!DOCTYPE' has been detected.
8397 * Parse the DOCTYPE name.
8399 name = xmlParseName(ctxt);
8401 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8402 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8404 ctxt->intSubName = name;
8409 * Check for SystemID and ExternalID
8411 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8413 if ((URI != NULL) || (ExternalID != NULL)) {
8414 ctxt->hasExternalSubset = 1;
8416 ctxt->extSubURI = URI;
8417 ctxt->extSubSystem = ExternalID;
8422 * Create and update the internal subset.
8424 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8425 (!ctxt->disableSAX))
8426 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8427 if (ctxt->instate == XML_PARSER_EOF)
8431 * Is there any internal subset declarations ?
8432 * they are handled separately in xmlParseInternalSubset()
8438 * We should be at the end of the DOCTYPE declaration.
8441 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8447 * xmlParseInternalSubset:
8448 * @ctxt: an XML parser context
8450 * parse the internal subset declaration
8452 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8456 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8458 * Is there any DTD definition ?
8461 ctxt->instate = XML_PARSER_DTD;
8464 * Parse the succession of Markup declarations and
8466 * Subsequence (markupdecl | PEReference | S)*
8468 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
8469 const xmlChar *check = CUR_PTR;
8470 unsigned int cons = ctxt->input->consumed;
8473 xmlParseMarkupDecl(ctxt);
8474 xmlParsePEReference(ctxt);
8477 * Pop-up of finished entities.
8479 while ((RAW == 0) && (ctxt->inputNr > 1))
8482 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8483 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8484 "xmlParseInternalSubset: error detected in Markup declaration\n");
8495 * We should be at the end of the DOCTYPE declaration.
8498 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8504 #ifdef LIBXML_SAX1_ENABLED
8506 * xmlParseAttribute:
8507 * @ctxt: an XML parser context
8508 * @value: a xmlChar ** used to store the value of the attribute
8510 * parse an attribute
8512 * [41] Attribute ::= Name Eq AttValue
8514 * [ WFC: No External Entity References ]
8515 * Attribute values cannot contain direct or indirect entity references
8516 * to external entities.
8518 * [ WFC: No < in Attribute Values ]
8519 * The replacement text of any entity referred to directly or indirectly in
8520 * an attribute value (other than "<") must not contain a <.
8522 * [ VC: Attribute Value Type ]
8523 * The attribute must have been declared; the value must be of the type
8526 * [25] Eq ::= S? '=' S?
8530 * [NS 11] Attribute ::= QName Eq AttValue
8532 * Also the case QName == xmlns:??? is handled independently as a namespace
8535 * Returns the attribute name, and the value in *value.
8539 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8540 const xmlChar *name;
8545 name = xmlParseName(ctxt);
8547 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8548 "error parsing attribute name\n");
8559 val = xmlParseAttValue(ctxt);
8560 ctxt->instate = XML_PARSER_CONTENT;
8562 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8563 "Specification mandate value for attribute %s\n", name);
8568 * Check that xml:lang conforms to the specification
8569 * No more registered as an error, just generate a warning now
8570 * since this was deprecated in XML second edition
8572 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8573 if (!xmlCheckLanguageID(val)) {
8574 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8575 "Malformed value for xml:lang : %s\n",
8581 * Check that xml:space conforms to the specification
8583 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8584 if (xmlStrEqual(val, BAD_CAST "default"))
8586 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8589 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8590 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8601 * @ctxt: an XML parser context
8603 * parse a start of tag either for rule element or
8604 * EmptyElement. In both case we don't parse the tag closing chars.
8606 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8608 * [ WFC: Unique Att Spec ]
8609 * No attribute name may appear more than once in the same start-tag or
8610 * empty-element tag.
8612 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8614 * [ WFC: Unique Att Spec ]
8615 * No attribute name may appear more than once in the same start-tag or
8616 * empty-element tag.
8620 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8622 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8624 * Returns the element name parsed
8628 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8629 const xmlChar *name;
8630 const xmlChar *attname;
8632 const xmlChar **atts = ctxt->atts;
8634 int maxatts = ctxt->maxatts;
8637 if (RAW != '<') return(NULL);
8640 name = xmlParseName(ctxt);
8642 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8643 "xmlParseStartTag: invalid element name\n");
8648 * Now parse the attributes, it ends up with the ending
8655 while (((RAW != '>') &&
8656 ((RAW != '/') || (NXT(1) != '>')) &&
8657 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8658 const xmlChar *q = CUR_PTR;
8659 unsigned int cons = ctxt->input->consumed;
8661 attname = xmlParseAttribute(ctxt, &attvalue);
8662 if ((attname != NULL) && (attvalue != NULL)) {
8664 * [ WFC: Unique Att Spec ]
8665 * No attribute name may appear more than once in the same
8666 * start-tag or empty-element tag.
8668 for (i = 0; i < nbatts;i += 2) {
8669 if (xmlStrEqual(atts[i], attname)) {
8670 xmlErrAttributeDup(ctxt, NULL, attname);
8676 * Add the pair to atts
8679 maxatts = 22; /* allow for 10 attrs by default */
8680 atts = (const xmlChar **)
8681 xmlMalloc(maxatts * sizeof(xmlChar *));
8683 xmlErrMemory(ctxt, NULL);
8684 if (attvalue != NULL)
8689 ctxt->maxatts = maxatts;
8690 } else if (nbatts + 4 > maxatts) {
8694 n = (const xmlChar **) xmlRealloc((void *) atts,
8695 maxatts * sizeof(const xmlChar *));
8697 xmlErrMemory(ctxt, NULL);
8698 if (attvalue != NULL)
8704 ctxt->maxatts = maxatts;
8706 atts[nbatts++] = attname;
8707 atts[nbatts++] = attvalue;
8708 atts[nbatts] = NULL;
8709 atts[nbatts + 1] = NULL;
8711 if (attvalue != NULL)
8718 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8720 if (!IS_BLANK_CH(RAW)) {
8721 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8722 "attributes construct error\n");
8725 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8726 (attname == NULL) && (attvalue == NULL)) {
8727 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8728 "xmlParseStartTag: problem parsing attributes\n");
8736 * SAX: Start of Element !
8738 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8739 (!ctxt->disableSAX)) {
8741 ctxt->sax->startElement(ctxt->userData, name, atts);
8743 ctxt->sax->startElement(ctxt->userData, name, NULL);
8747 /* Free only the content strings */
8748 for (i = 1;i < nbatts;i+=2)
8749 if (atts[i] != NULL)
8750 xmlFree((xmlChar *) atts[i]);
8757 * @ctxt: an XML parser context
8758 * @line: line of the start tag
8759 * @nsNr: number of namespaces on the start tag
8761 * parse an end of tag
8763 * [42] ETag ::= '</' Name S? '>'
8767 * [NS 9] ETag ::= '</' QName S? '>'
8771 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8772 const xmlChar *name;
8775 if ((RAW != '<') || (NXT(1) != '/')) {
8776 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8777 "xmlParseEndTag: '</' not found\n");
8782 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8785 * We should definitely be at the ending "S? '>'" part
8789 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8790 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8795 * [ WFC: Element Type Match ]
8796 * The Name in an element's end-tag must match the element type in the
8800 if (name != (xmlChar*)1) {
8801 if (name == NULL) name = BAD_CAST "unparseable";
8802 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8803 "Opening and ending tag mismatch: %s line %d and %s\n",
8804 ctxt->name, line, name);
8810 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8811 (!ctxt->disableSAX))
8812 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8821 * @ctxt: an XML parser context
8823 * parse an end of tag
8825 * [42] ETag ::= '</' Name S? '>'
8829 * [NS 9] ETag ::= '</' QName S? '>'
8833 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8834 xmlParseEndTag1(ctxt, 0);
8836 #endif /* LIBXML_SAX1_ENABLED */
8838 /************************************************************************
8840 * SAX 2 specific operations *
8842 ************************************************************************/
8846 * @ctxt: an XML parser context
8847 * @prefix: the prefix to lookup
8849 * Lookup the namespace name for the @prefix (which ca be NULL)
8850 * The prefix must come from the @ctxt->dict dictionary
8852 * Returns the namespace name or NULL if not bound
8854 static const xmlChar *
8855 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8858 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8859 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8860 if (ctxt->nsTab[i] == prefix) {
8861 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863 return(ctxt->nsTab[i + 1]);
8870 * @ctxt: an XML parser context
8871 * @prefix: pointer to store the prefix part
8873 * parse an XML Namespace QName
8875 * [6] QName ::= (Prefix ':')? LocalPart
8876 * [7] Prefix ::= NCName
8877 * [8] LocalPart ::= NCName
8879 * Returns the Name parsed or NULL
8882 static const xmlChar *
8883 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8884 const xmlChar *l, *p;
8888 l = xmlParseNCName(ctxt);
8891 l = xmlParseName(ctxt);
8893 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8894 "Failed to parse QName '%s'\n", l, NULL, NULL);
8904 l = xmlParseNCName(ctxt);
8908 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8909 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8910 l = xmlParseNmtoken(ctxt);
8912 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8914 tmp = xmlBuildQName(l, p, NULL, 0);
8917 p = xmlDictLookup(ctxt->dict, tmp, -1);
8918 if (tmp != NULL) xmlFree(tmp);
8925 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8926 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8928 tmp = (xmlChar *) xmlParseName(ctxt);
8930 tmp = xmlBuildQName(tmp, l, NULL, 0);
8931 l = xmlDictLookup(ctxt->dict, tmp, -1);
8932 if (tmp != NULL) xmlFree(tmp);
8936 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8937 l = xmlDictLookup(ctxt->dict, tmp, -1);
8938 if (tmp != NULL) xmlFree(tmp);
8949 * xmlParseQNameAndCompare:
8950 * @ctxt: an XML parser context
8951 * @name: the localname
8952 * @prefix: the prefix, if any.
8954 * parse an XML name and compares for match
8955 * (specialized for endtag parsing)
8957 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8958 * and the name for mismatch
8961 static const xmlChar *
8962 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8963 xmlChar const *prefix) {
8967 const xmlChar *prefix2;
8969 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8972 in = ctxt->input->cur;
8975 while (*in != 0 && *in == *cmp) {
8979 if ((*cmp == 0) && (*in == ':')) {
8982 while (*in != 0 && *in == *cmp) {
8986 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8988 ctxt->input->cur = in;
8989 return((const xmlChar*) 1);
8993 * all strings coms from the dictionary, equality can be done directly
8995 ret = xmlParseQName (ctxt, &prefix2);
8996 if ((ret == name) && (prefix == prefix2))
8997 return((const xmlChar*) 1);
9002 * xmlParseAttValueInternal:
9003 * @ctxt: an XML parser context
9004 * @len: attribute len result
9005 * @alloc: whether the attribute was reallocated as a new string
9006 * @normalize: if 1 then further non-CDATA normalization must be done
9008 * parse a value for an attribute.
9009 * NOTE: if no normalization is needed, the routine will return pointers
9010 * directly from the data buffer.
9012 * 3.3.3 Attribute-Value Normalization:
9013 * Before the value of an attribute is passed to the application or
9014 * checked for validity, the XML processor must normalize it as follows:
9015 * - a character reference is processed by appending the referenced
9016 * character to the attribute value
9017 * - an entity reference is processed by recursively processing the
9018 * replacement text of the entity
9019 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9020 * appending #x20 to the normalized value, except that only a single
9021 * #x20 is appended for a "#xD#xA" sequence that is part of an external
9022 * parsed entity or the literal entity value of an internal parsed entity
9023 * - other characters are processed by appending them to the normalized value
9024 * If the declared value is not CDATA, then the XML processor must further
9025 * process the normalized attribute value by discarding any leading and
9026 * trailing space (#x20) characters, and by replacing sequences of space
9027 * (#x20) characters by a single space (#x20) character.
9028 * All attributes for which no declaration has been read should be treated
9029 * by a non-validating parser as if declared CDATA.
9031 * Returns the AttValue parsed or NULL. The value has to be freed by the
9032 * caller if it was copied, this can be detected by val[*len] == 0.
9036 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9040 const xmlChar *in = NULL, *start, *end, *last;
9041 xmlChar *ret = NULL;
9045 in = (xmlChar *) CUR_PTR;
9046 line = ctxt->input->line;
9047 col = ctxt->input->col;
9048 if (*in != '"' && *in != '\'') {
9049 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9052 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9055 * try to handle in this routine the most common case where no
9056 * allocation of a new string is required and where content is
9061 end = ctxt->input->end;
9064 const xmlChar *oldbase = ctxt->input->base;
9066 if (oldbase != ctxt->input->base) {
9067 long delta = ctxt->input->base - oldbase;
9068 start = start + delta;
9071 end = ctxt->input->end;
9075 * Skip any leading spaces
9077 while ((in < end) && (*in != limit) &&
9078 ((*in == 0x20) || (*in == 0x9) ||
9079 (*in == 0xA) || (*in == 0xD))) {
9088 const xmlChar *oldbase = ctxt->input->base;
9090 if (ctxt->instate == XML_PARSER_EOF)
9092 if (oldbase != ctxt->input->base) {
9093 long delta = ctxt->input->base - oldbase;
9094 start = start + delta;
9097 end = ctxt->input->end;
9098 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9099 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9100 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9101 "AttValue length too long\n");
9106 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9107 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9109 if ((*in++ == 0x20) && (*in == 0x20)) break;
9111 const xmlChar *oldbase = ctxt->input->base;
9113 if (ctxt->instate == XML_PARSER_EOF)
9115 if (oldbase != ctxt->input->base) {
9116 long delta = ctxt->input->base - oldbase;
9117 start = start + delta;
9120 end = ctxt->input->end;
9121 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9122 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9123 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9124 "AttValue length too long\n");
9131 * skip the trailing blanks
9133 while ((last[-1] == 0x20) && (last > start)) last--;
9134 while ((in < end) && (*in != limit) &&
9135 ((*in == 0x20) || (*in == 0x9) ||
9136 (*in == 0xA) || (*in == 0xD))) {
9144 const xmlChar *oldbase = ctxt->input->base;
9146 if (ctxt->instate == XML_PARSER_EOF)
9148 if (oldbase != ctxt->input->base) {
9149 long delta = ctxt->input->base - oldbase;
9150 start = start + delta;
9152 last = last + delta;
9154 end = ctxt->input->end;
9155 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9156 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9157 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9158 "AttValue length too long\n");
9163 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9164 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9165 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9166 "AttValue length too long\n");
9169 if (*in != limit) goto need_complex;
9171 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9172 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9176 const xmlChar *oldbase = ctxt->input->base;
9178 if (ctxt->instate == XML_PARSER_EOF)
9180 if (oldbase != ctxt->input->base) {
9181 long delta = ctxt->input->base - oldbase;
9182 start = start + delta;
9185 end = ctxt->input->end;
9186 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9187 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9188 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9189 "AttValue length too long\n");
9195 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9196 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9197 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9198 "AttValue length too long\n");
9201 if (*in != limit) goto need_complex;
9206 *len = last - start;
9207 ret = (xmlChar *) start;
9209 if (alloc) *alloc = 1;
9210 ret = xmlStrndup(start, last - start);
9213 ctxt->input->line = line;
9214 ctxt->input->col = col;
9215 if (alloc) *alloc = 0;
9218 if (alloc) *alloc = 1;
9219 return xmlParseAttValueComplex(ctxt, len, normalize);
9223 * xmlParseAttribute2:
9224 * @ctxt: an XML parser context
9225 * @pref: the element prefix
9226 * @elem: the element name
9227 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9228 * @value: a xmlChar ** used to store the value of the attribute
9229 * @len: an int * to save the length of the attribute
9230 * @alloc: an int * to indicate if the attribute was allocated
9232 * parse an attribute in the new SAX2 framework.
9234 * Returns the attribute name, and the value in *value, .
9237 static const xmlChar *
9238 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9239 const xmlChar * pref, const xmlChar * elem,
9240 const xmlChar ** prefix, xmlChar ** value,
9241 int *len, int *alloc)
9243 const xmlChar *name;
9244 xmlChar *val, *internal_val = NULL;
9249 name = xmlParseQName(ctxt, prefix);
9251 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9252 "error parsing attribute name\n");
9257 * get the type if needed
9259 if (ctxt->attsSpecial != NULL) {
9262 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
9263 pref, elem, *prefix, name);
9275 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9278 * Sometimes a second normalisation pass for spaces is needed
9279 * but that only happens if charrefs or entities refernces
9280 * have been used in the attribute value, i.e. the attribute
9281 * value have been extracted in an allocated string already.
9284 const xmlChar *val2;
9286 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9287 if ((val2 != NULL) && (val2 != val)) {
9289 val = (xmlChar *) val2;
9293 ctxt->instate = XML_PARSER_CONTENT;
9295 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9296 "Specification mandate value for attribute %s\n",
9301 if (*prefix == ctxt->str_xml) {
9303 * Check that xml:lang conforms to the specification
9304 * No more registered as an error, just generate a warning now
9305 * since this was deprecated in XML second edition
9307 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9308 internal_val = xmlStrndup(val, *len);
9309 if (!xmlCheckLanguageID(internal_val)) {
9310 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9311 "Malformed value for xml:lang : %s\n",
9312 internal_val, NULL);
9317 * Check that xml:space conforms to the specification
9319 if (xmlStrEqual(name, BAD_CAST "space")) {
9320 internal_val = xmlStrndup(val, *len);
9321 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9323 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9326 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9327 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9328 internal_val, NULL);
9332 xmlFree(internal_val);
9340 * xmlParseStartTag2:
9341 * @ctxt: an XML parser context
9343 * parse a start of tag either for rule element or
9344 * EmptyElement. In both case we don't parse the tag closing chars.
9345 * This routine is called when running SAX2 parsing
9347 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9349 * [ WFC: Unique Att Spec ]
9350 * No attribute name may appear more than once in the same start-tag or
9351 * empty-element tag.
9353 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9355 * [ WFC: Unique Att Spec ]
9356 * No attribute name may appear more than once in the same start-tag or
9357 * empty-element tag.
9361 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9363 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9365 * Returns the element name parsed
9368 static const xmlChar *
9369 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9370 const xmlChar **URI, int *tlen) {
9371 const xmlChar *localname;
9372 const xmlChar *prefix;
9373 const xmlChar *attname;
9374 const xmlChar *aprefix;
9375 const xmlChar *nsname;
9377 const xmlChar **atts = ctxt->atts;
9378 int maxatts = ctxt->maxatts;
9379 int nratts, nbatts, nbdef;
9380 int i, j, nbNs, attval, oldline, oldcol, inputNr;
9381 const xmlChar *base;
9383 int nsNr = ctxt->nsNr;
9385 if (RAW != '<') return(NULL);
9389 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9390 * point since the attribute values may be stored as pointers to
9391 * the buffer and calling SHRINK would destroy them !
9392 * The Shrinking is only possible once the full set of attribute
9393 * callbacks have been done.
9397 base = ctxt->input->base;
9398 cur = ctxt->input->cur - ctxt->input->base;
9399 inputNr = ctxt->inputNr;
9400 oldline = ctxt->input->line;
9401 oldcol = ctxt->input->col;
9407 /* Forget any namespaces added during an earlier parse of this element. */
9410 localname = xmlParseQName(ctxt, &prefix);
9411 if (localname == NULL) {
9412 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9413 "StartTag: invalid element name\n");
9416 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9419 * Now parse the attributes, it ends up with the ending
9425 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9428 while (((RAW != '>') &&
9429 ((RAW != '/') || (NXT(1) != '>')) &&
9430 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9431 const xmlChar *q = CUR_PTR;
9432 unsigned int cons = ctxt->input->consumed;
9433 int len = -1, alloc = 0;
9435 attname = xmlParseAttribute2(ctxt, prefix, localname,
9436 &aprefix, &attvalue, &len, &alloc);
9437 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) {
9438 if ((attvalue != NULL) && (alloc != 0))
9443 if ((attname != NULL) && (attvalue != NULL)) {
9444 if (len < 0) len = xmlStrlen(attvalue);
9445 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9446 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9450 xmlErrMemory(ctxt, "dictionary allocation failure");
9451 if ((attvalue != NULL) && (alloc != 0))
9456 uri = xmlParseURI((const char *) URL);
9458 xmlNsErr(ctxt, XML_WAR_NS_URI,
9459 "xmlns: '%s' is not a valid URI\n",
9462 if (uri->scheme == NULL) {
9463 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9464 "xmlns: URI %s is not absolute\n",
9469 if (URL == ctxt->str_xml_ns) {
9470 if (attname != ctxt->str_xml) {
9471 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9472 "xml namespace URI cannot be the default namespace\n",
9475 goto skip_default_ns;
9479 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9480 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9481 "reuse of the xmlns namespace name is forbidden\n",
9483 goto skip_default_ns;
9487 * check that it's not a defined namespace
9489 for (j = 1;j <= nbNs;j++)
9490 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9493 xmlErrAttributeDup(ctxt, NULL, attname);
9495 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9497 if ((attvalue != NULL) && (alloc != 0)) {
9501 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9503 if (!IS_BLANK_CH(RAW)) {
9504 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9505 "attributes construct error\n");
9509 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9513 if (aprefix == ctxt->str_xmlns) {
9514 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9517 if (attname == ctxt->str_xml) {
9518 if (URL != ctxt->str_xml_ns) {
9519 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9520 "xml namespace prefix mapped to wrong URI\n",
9524 * Do not keep a namespace definition node
9528 if (URL == ctxt->str_xml_ns) {
9529 if (attname != ctxt->str_xml) {
9530 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9531 "xml namespace URI mapped to wrong prefix\n",
9536 if (attname == ctxt->str_xmlns) {
9537 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9538 "redefinition of the xmlns prefix is forbidden\n",
9544 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9545 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9546 "reuse of the xmlns namespace name is forbidden\n",
9550 if ((URL == NULL) || (URL[0] == 0)) {
9551 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9552 "xmlns:%s: Empty XML namespace is not allowed\n",
9553 attname, NULL, NULL);
9556 uri = xmlParseURI((const char *) URL);
9558 xmlNsErr(ctxt, XML_WAR_NS_URI,
9559 "xmlns:%s: '%s' is not a valid URI\n",
9560 attname, URL, NULL);
9562 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9563 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9564 "xmlns:%s: URI %s is not absolute\n",
9565 attname, URL, NULL);
9572 * check that it's not a defined namespace
9574 for (j = 1;j <= nbNs;j++)
9575 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9578 xmlErrAttributeDup(ctxt, aprefix, attname);
9580 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9582 if ((attvalue != NULL) && (alloc != 0)) {
9586 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588 if (!IS_BLANK_CH(RAW)) {
9589 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9590 "attributes construct error\n");
9594 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9600 * Add the pair to atts
9602 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9603 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9604 if (attvalue[len] == 0)
9608 maxatts = ctxt->maxatts;
9611 ctxt->attallocs[nratts++] = alloc;
9612 atts[nbatts++] = attname;
9613 atts[nbatts++] = aprefix;
9614 atts[nbatts++] = NULL; /* the URI will be fetched later */
9615 atts[nbatts++] = attvalue;
9617 atts[nbatts++] = attvalue;
9619 * tag if some deallocation is needed
9621 if (alloc != 0) attval = 1;
9623 if ((attvalue != NULL) && (attvalue[len] == 0))
9630 if (ctxt->instate == XML_PARSER_EOF)
9632 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9634 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9636 if (!IS_BLANK_CH(RAW)) {
9637 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9638 "attributes construct error\n");
9642 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9643 (attname == NULL) && (attvalue == NULL)) {
9644 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9645 "xmlParseStartTag: problem parsing attributes\n");
9649 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9654 * The attributes defaulting
9656 if (ctxt->attsDefault != NULL) {
9657 xmlDefAttrsPtr defaults;
9659 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9660 if (defaults != NULL) {
9661 for (i = 0;i < defaults->nbAttrs;i++) {
9662 attname = defaults->values[5 * i];
9663 aprefix = defaults->values[5 * i + 1];
9666 * special work for namespaces defaulted defs
9668 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9670 * check that it's not a defined namespace
9672 for (j = 1;j <= nbNs;j++)
9673 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9675 if (j <= nbNs) continue;
9677 nsname = xmlGetNamespace(ctxt, NULL);
9678 if (nsname != defaults->values[5 * i + 2]) {
9679 if (nsPush(ctxt, NULL,
9680 defaults->values[5 * i + 2]) > 0)
9683 } else if (aprefix == ctxt->str_xmlns) {
9685 * check that it's not a defined namespace
9687 for (j = 1;j <= nbNs;j++)
9688 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9690 if (j <= nbNs) continue;
9692 nsname = xmlGetNamespace(ctxt, attname);
9693 if (nsname != defaults->values[2]) {
9694 if (nsPush(ctxt, attname,
9695 defaults->values[5 * i + 2]) > 0)
9700 * check that it's not a defined attribute
9702 for (j = 0;j < nbatts;j+=5) {
9703 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9706 if (j < nbatts) continue;
9708 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9709 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9712 maxatts = ctxt->maxatts;
9715 atts[nbatts++] = attname;
9716 atts[nbatts++] = aprefix;
9717 if (aprefix == NULL)
9718 atts[nbatts++] = NULL;
9720 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9721 atts[nbatts++] = defaults->values[5 * i + 2];
9722 atts[nbatts++] = defaults->values[5 * i + 3];
9723 if ((ctxt->standalone == 1) &&
9724 (defaults->values[5 * i + 4] != NULL)) {
9725 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9726 "standalone: attribute %s on %s defaulted from external subset\n",
9727 attname, localname);
9736 * The attributes checkings
9738 for (i = 0; i < nbatts;i += 5) {
9740 * The default namespace does not apply to attribute names.
9742 if (atts[i + 1] != NULL) {
9743 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9744 if (nsname == NULL) {
9745 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9746 "Namespace prefix %s for %s on %s is not defined\n",
9747 atts[i + 1], atts[i], localname);
9749 atts[i + 2] = nsname;
9753 * [ WFC: Unique Att Spec ]
9754 * No attribute name may appear more than once in the same
9755 * start-tag or empty-element tag.
9756 * As extended by the Namespace in XML REC.
9758 for (j = 0; j < i;j += 5) {
9759 if (atts[i] == atts[j]) {
9760 if (atts[i+1] == atts[j+1]) {
9761 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9764 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9765 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9766 "Namespaced Attribute %s in '%s' redefined\n",
9767 atts[i], nsname, NULL);
9774 nsname = xmlGetNamespace(ctxt, prefix);
9775 if ((prefix != NULL) && (nsname == NULL)) {
9776 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9777 "Namespace prefix %s on %s is not defined\n",
9778 prefix, localname, NULL);
9784 * SAX: Start of Element !
9786 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9787 (!ctxt->disableSAX)) {
9789 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9790 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9791 nbatts / 5, nbdef, atts);
9793 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9794 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9798 * Free up attribute allocated strings if needed
9801 for (i = 3,j = 0; j < nratts;i += 5,j++)
9802 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9803 xmlFree((xmlChar *) atts[i]);
9810 * the attribute strings are valid iif the base didn't changed
9813 for (i = 3,j = 0; j < nratts;i += 5,j++)
9814 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9815 xmlFree((xmlChar *) atts[i]);
9819 * We can't switch from one entity to another in the middle
9822 if (inputNr != ctxt->inputNr) {
9823 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
9824 "Start tag doesn't start and stop in the same entity\n");
9828 ctxt->input->cur = ctxt->input->base + cur;
9829 ctxt->input->line = oldline;
9830 ctxt->input->col = oldcol;
9831 if (ctxt->wellFormed == 1) {
9839 * @ctxt: an XML parser context
9840 * @line: line of the start tag
9841 * @nsNr: number of namespaces on the start tag
9843 * parse an end of tag
9845 * [42] ETag ::= '</' Name S? '>'
9849 * [NS 9] ETag ::= '</' QName S? '>'
9853 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9854 const xmlChar *URI, int line, int nsNr, int tlen) {
9855 const xmlChar *name;
9859 if ((RAW != '<') || (NXT(1) != '/')) {
9860 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9865 curLength = ctxt->input->end - ctxt->input->cur;
9866 if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9867 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9868 if ((curLength >= (size_t)(tlen + 1)) &&
9869 (ctxt->input->cur[tlen] == '>')) {
9870 ctxt->input->cur += tlen + 1;
9871 ctxt->input->col += tlen + 1;
9874 ctxt->input->cur += tlen;
9875 ctxt->input->col += tlen;
9879 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9881 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9885 * We should definitely be at the ending "S? '>'" part
9888 if (ctxt->instate == XML_PARSER_EOF)
9891 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9892 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9897 * [ WFC: Element Type Match ]
9898 * The Name in an element's end-tag must match the element type in the
9902 if (name != (xmlChar*)1) {
9903 if (name == NULL) name = BAD_CAST "unparseable";
9904 if ((line == 0) && (ctxt->node != NULL))
9905 line = ctxt->node->line;
9906 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9907 "Opening and ending tag mismatch: %s line %d and %s\n",
9908 ctxt->name, line, name);
9915 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9916 (!ctxt->disableSAX))
9917 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9927 * @ctxt: an XML parser context
9929 * Parse escaped pure raw content.
9931 * [18] CDSect ::= CDStart CData CDEnd
9933 * [19] CDStart ::= '<![CDATA['
9935 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9937 * [21] CDEnd ::= ']]>'
9940 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9941 xmlChar *buf = NULL;
9943 int size = XML_PARSER_BUFFER_SIZE;
9949 /* Check 2.6.0 was NXT(0) not RAW */
9950 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9955 ctxt->instate = XML_PARSER_CDATA_SECTION;
9958 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9959 ctxt->instate = XML_PARSER_CONTENT;
9965 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9966 ctxt->instate = XML_PARSER_CONTENT;
9971 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9973 xmlErrMemory(ctxt, NULL);
9976 while (IS_CHAR(cur) &&
9977 ((r != ']') || (s != ']') || (cur != '>'))) {
9978 if (len + 5 >= size) {
9981 if ((size > XML_MAX_TEXT_LENGTH) &&
9982 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9983 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9984 "CData section too big found", NULL);
9988 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9991 xmlErrMemory(ctxt, NULL);
9997 COPY_BUF(rl,buf,len,r);
10005 if (ctxt->instate == XML_PARSER_EOF) {
10015 ctxt->instate = XML_PARSER_CONTENT;
10017 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
10018 "CData section not finished\n%.50s\n", buf);
10025 * OK the buffer is to be consumed as cdata.
10027 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10028 if (ctxt->sax->cdataBlock != NULL)
10029 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
10030 else if (ctxt->sax->characters != NULL)
10031 ctxt->sax->characters(ctxt->userData, buf, len);
10038 * @ctxt: an XML parser context
10042 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10046 xmlParseContent(xmlParserCtxtPtr ctxt) {
10048 while ((RAW != 0) &&
10049 ((RAW != '<') || (NXT(1) != '/')) &&
10050 (ctxt->instate != XML_PARSER_EOF)) {
10051 const xmlChar *test = CUR_PTR;
10052 unsigned int cons = ctxt->input->consumed;
10053 const xmlChar *cur = ctxt->input->cur;
10056 * First case : a Processing Instruction.
10058 if ((*cur == '<') && (cur[1] == '?')) {
10063 * Second case : a CDSection
10065 /* 2.6.0 test was *cur not RAW */
10066 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
10067 xmlParseCDSect(ctxt);
10071 * Third case : a comment
10073 else if ((*cur == '<') && (NXT(1) == '!') &&
10074 (NXT(2) == '-') && (NXT(3) == '-')) {
10075 xmlParseComment(ctxt);
10076 ctxt->instate = XML_PARSER_CONTENT;
10080 * Fourth case : a sub-element.
10082 else if (*cur == '<') {
10083 xmlParseElement(ctxt);
10087 * Fifth case : a reference. If if has not been resolved,
10088 * parsing returns it's Name, create the node
10091 else if (*cur == '&') {
10092 xmlParseReference(ctxt);
10096 * Last case, text. Note that References are handled directly.
10099 xmlParseCharData(ctxt, 0);
10104 * Pop-up of finished entities.
10106 while ((RAW == 0) && (ctxt->inputNr > 1))
10110 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10111 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10112 "detected an error in element content\n");
10113 xmlHaltParser(ctxt);
10121 * @ctxt: an XML parser context
10123 * parse an XML element, this is highly recursive
10125 * [39] element ::= EmptyElemTag | STag content ETag
10127 * [ WFC: Element Type Match ]
10128 * The Name in an element's end-tag must match the element type in the
10134 xmlParseElement(xmlParserCtxtPtr ctxt) {
10135 const xmlChar *name;
10136 const xmlChar *prefix = NULL;
10137 const xmlChar *URI = NULL;
10138 xmlParserNodeInfo node_info;
10139 int line, tlen = 0;
10141 int nsNr = ctxt->nsNr;
10143 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10144 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10145 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10146 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10147 xmlParserMaxDepth);
10148 xmlHaltParser(ctxt);
10152 /* Capture start position */
10153 if (ctxt->record_info) {
10154 node_info.begin_pos = ctxt->input->consumed +
10155 (CUR_PTR - ctxt->input->base);
10156 node_info.begin_line = ctxt->input->line;
10159 if (ctxt->spaceNr == 0)
10160 spacePush(ctxt, -1);
10161 else if (*ctxt->space == -2)
10162 spacePush(ctxt, -1);
10164 spacePush(ctxt, *ctxt->space);
10166 line = ctxt->input->line;
10167 #ifdef LIBXML_SAX1_ENABLED
10169 #endif /* LIBXML_SAX1_ENABLED */
10170 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10171 #ifdef LIBXML_SAX1_ENABLED
10173 name = xmlParseStartTag(ctxt);
10174 #endif /* LIBXML_SAX1_ENABLED */
10175 if (ctxt->instate == XML_PARSER_EOF)
10177 if (name == NULL) {
10181 namePush(ctxt, name);
10184 #ifdef LIBXML_VALID_ENABLED
10186 * [ VC: Root Element Type ]
10187 * The Name in the document type declaration must match the element
10188 * type of the root element.
10190 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10191 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10192 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10193 #endif /* LIBXML_VALID_ENABLED */
10196 * Check for an Empty Element.
10198 if ((RAW == '/') && (NXT(1) == '>')) {
10201 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10202 (!ctxt->disableSAX))
10203 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10204 #ifdef LIBXML_SAX1_ENABLED
10206 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10207 (!ctxt->disableSAX))
10208 ctxt->sax->endElement(ctxt->userData, name);
10209 #endif /* LIBXML_SAX1_ENABLED */
10213 if (nsNr != ctxt->nsNr)
10214 nsPop(ctxt, ctxt->nsNr - nsNr);
10215 if ( ret != NULL && ctxt->record_info ) {
10216 node_info.end_pos = ctxt->input->consumed +
10217 (CUR_PTR - ctxt->input->base);
10218 node_info.end_line = ctxt->input->line;
10219 node_info.node = ret;
10220 xmlParserAddNodeInfo(ctxt, &node_info);
10227 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10228 "Couldn't find end of Start Tag %s line %d\n",
10232 * end of parsing of this node.
10237 if (nsNr != ctxt->nsNr)
10238 nsPop(ctxt, ctxt->nsNr - nsNr);
10241 * Capture end position and add node
10243 if ( ret != NULL && ctxt->record_info ) {
10244 node_info.end_pos = ctxt->input->consumed +
10245 (CUR_PTR - ctxt->input->base);
10246 node_info.end_line = ctxt->input->line;
10247 node_info.node = ret;
10248 xmlParserAddNodeInfo(ctxt, &node_info);
10254 * Parse the content of the element:
10256 xmlParseContent(ctxt);
10257 if (ctxt->instate == XML_PARSER_EOF)
10259 if (!IS_BYTE_CHAR(RAW)) {
10260 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10261 "Premature end of data in tag %s line %d\n",
10265 * end of parsing of this node.
10270 if (nsNr != ctxt->nsNr)
10271 nsPop(ctxt, ctxt->nsNr - nsNr);
10276 * parse the end of tag: '</' should be here.
10279 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10282 #ifdef LIBXML_SAX1_ENABLED
10284 xmlParseEndTag1(ctxt, line);
10285 #endif /* LIBXML_SAX1_ENABLED */
10288 * Capture end position and add node
10290 if ( ret != NULL && ctxt->record_info ) {
10291 node_info.end_pos = ctxt->input->consumed +
10292 (CUR_PTR - ctxt->input->base);
10293 node_info.end_line = ctxt->input->line;
10294 node_info.node = ret;
10295 xmlParserAddNodeInfo(ctxt, &node_info);
10300 * xmlParseVersionNum:
10301 * @ctxt: an XML parser context
10303 * parse the XML version value.
10305 * [26] VersionNum ::= '1.' [0-9]+
10307 * In practice allow [0-9].[0-9]+ at that level
10309 * Returns the string giving the XML version number, or NULL
10312 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10313 xmlChar *buf = NULL;
10318 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10320 xmlErrMemory(ctxt, NULL);
10324 if (!((cur >= '0') && (cur <= '9'))) {
10338 while ((cur >= '0') && (cur <= '9')) {
10339 if (len + 1 >= size) {
10343 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10346 xmlErrMemory(ctxt, NULL);
10360 * xmlParseVersionInfo:
10361 * @ctxt: an XML parser context
10363 * parse the XML version.
10365 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10367 * [25] Eq ::= S? '=' S?
10369 * Returns the version string, e.g. "1.0"
10373 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10374 xmlChar *version = NULL;
10376 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10380 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10387 version = xmlParseVersionNum(ctxt);
10389 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10392 } else if (RAW == '\''){
10394 version = xmlParseVersionNum(ctxt);
10396 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10400 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10408 * @ctxt: an XML parser context
10410 * parse the XML encoding name
10412 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10414 * Returns the encoding name value or NULL
10417 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10418 xmlChar *buf = NULL;
10424 if (((cur >= 'a') && (cur <= 'z')) ||
10425 ((cur >= 'A') && (cur <= 'Z'))) {
10426 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10428 xmlErrMemory(ctxt, NULL);
10435 while (((cur >= 'a') && (cur <= 'z')) ||
10436 ((cur >= 'A') && (cur <= 'Z')) ||
10437 ((cur >= '0') && (cur <= '9')) ||
10438 (cur == '.') || (cur == '_') ||
10440 if (len + 1 >= size) {
10444 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10446 xmlErrMemory(ctxt, NULL);
10463 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10469 * xmlParseEncodingDecl:
10470 * @ctxt: an XML parser context
10472 * parse the XML encoding declaration
10474 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10476 * this setups the conversion filters.
10478 * Returns the encoding value or NULL
10482 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10483 xmlChar *encoding = NULL;
10486 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10490 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10497 encoding = xmlParseEncName(ctxt);
10499 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10500 xmlFree((xmlChar *) encoding);
10504 } else if (RAW == '\''){
10506 encoding = xmlParseEncName(ctxt);
10508 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10509 xmlFree((xmlChar *) encoding);
10514 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10518 * Non standard parsing, allowing the user to ignore encoding
10520 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10521 xmlFree((xmlChar *) encoding);
10526 * UTF-16 encoding stwich has already taken place at this stage,
10527 * more over the little-endian/big-endian selection is already done
10529 if ((encoding != NULL) &&
10530 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10531 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10533 * If no encoding was passed to the parser, that we are
10534 * using UTF-16 and no decoder is present i.e. the
10535 * document is apparently UTF-8 compatible, then raise an
10536 * encoding mismatch fatal error
10538 if ((ctxt->encoding == NULL) &&
10539 (ctxt->input->buf != NULL) &&
10540 (ctxt->input->buf->encoder == NULL)) {
10541 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10542 "Document labelled UTF-16 but has UTF-8 content\n");
10544 if (ctxt->encoding != NULL)
10545 xmlFree((xmlChar *) ctxt->encoding);
10546 ctxt->encoding = encoding;
10549 * UTF-8 encoding is handled natively
10551 else if ((encoding != NULL) &&
10552 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10553 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10554 if (ctxt->encoding != NULL)
10555 xmlFree((xmlChar *) ctxt->encoding);
10556 ctxt->encoding = encoding;
10558 else if (encoding != NULL) {
10559 xmlCharEncodingHandlerPtr handler;
10561 if (ctxt->input->encoding != NULL)
10562 xmlFree((xmlChar *) ctxt->input->encoding);
10563 ctxt->input->encoding = encoding;
10565 handler = xmlFindCharEncodingHandler((const char *) encoding);
10566 if (handler != NULL) {
10567 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10568 /* failed to convert */
10569 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10573 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10574 "Unsupported encoding %s\n", encoding);
10584 * @ctxt: an XML parser context
10586 * parse the XML standalone declaration
10588 * [32] SDDecl ::= S 'standalone' Eq
10589 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10591 * [ VC: Standalone Document Declaration ]
10592 * TODO The standalone document declaration must have the value "no"
10593 * if any external markup declarations contain declarations of:
10594 * - attributes with default values, if elements to which these
10595 * attributes apply appear in the document without specifications
10596 * of values for these attributes, or
10597 * - entities (other than amp, lt, gt, apos, quot), if references
10598 * to those entities appear in the document, or
10599 * - attributes with values subject to normalization, where the
10600 * attribute appears in the document with a value which will change
10601 * as a result of normalization, or
10602 * - element types with element content, if white space occurs directly
10603 * within any instance of those types.
10606 * 1 if standalone="yes"
10607 * 0 if standalone="no"
10608 * -2 if standalone attribute is missing or invalid
10609 * (A standalone value of -2 means that the XML declaration was found,
10610 * but no value was specified for the standalone attribute).
10614 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10615 int standalone = -2;
10618 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10622 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10623 return(standalone);
10629 if ((RAW == 'n') && (NXT(1) == 'o')) {
10632 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10637 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10640 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10643 } else if (RAW == '"'){
10645 if ((RAW == 'n') && (NXT(1) == 'o')) {
10648 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10653 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10656 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10660 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10663 return(standalone);
10668 * @ctxt: an XML parser context
10670 * parse an XML declaration header
10672 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10676 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10680 * This value for standalone indicates that the document has an
10681 * XML declaration but it does not have a standalone attribute.
10682 * It will be overwritten later if a standalone attribute is found.
10684 ctxt->input->standalone = -2;
10687 * We know that '<?xml' is here.
10691 if (!IS_BLANK_CH(RAW)) {
10692 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10693 "Blank needed after '<?xml'\n");
10698 * We must have the VersionInfo here.
10700 version = xmlParseVersionInfo(ctxt);
10701 if (version == NULL) {
10702 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10704 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10706 * Changed here for XML-1.0 5th edition
10708 if (ctxt->options & XML_PARSE_OLD10) {
10709 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10710 "Unsupported version '%s'\n",
10713 if ((version[0] == '1') && ((version[1] == '.'))) {
10714 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10715 "Unsupported version '%s'\n",
10718 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10719 "Unsupported version '%s'\n",
10724 if (ctxt->version != NULL)
10725 xmlFree((void *) ctxt->version);
10726 ctxt->version = version;
10730 * We may have the encoding declaration
10732 if (!IS_BLANK_CH(RAW)) {
10733 if ((RAW == '?') && (NXT(1) == '>')) {
10737 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739 xmlParseEncodingDecl(ctxt);
10740 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10741 (ctxt->instate == XML_PARSER_EOF)) {
10743 * The XML REC instructs us to stop parsing right here
10749 * We may have the standalone status.
10751 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10752 if ((RAW == '?') && (NXT(1) == '>')) {
10756 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10760 * We can grow the input buffer freely at that point
10765 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10768 if ((RAW == '?') && (NXT(1) == '>')) {
10770 } else if (RAW == '>') {
10771 /* Deprecated old WD ... */
10772 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10775 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10776 MOVETO_ENDTAG(CUR_PTR);
10783 * @ctxt: an XML parser context
10785 * parse an XML Misc* optional field.
10787 * [27] Misc ::= Comment | PI | S
10791 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10792 while ((ctxt->instate != XML_PARSER_EOF) &&
10793 (((RAW == '<') && (NXT(1) == '?')) ||
10794 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10795 IS_BLANK_CH(CUR))) {
10796 if ((RAW == '<') && (NXT(1) == '?')) {
10798 } else if (IS_BLANK_CH(CUR)) {
10801 xmlParseComment(ctxt);
10806 * xmlParseDocument:
10807 * @ctxt: an XML parser context
10809 * parse an XML document (and build a tree if using the standard SAX
10812 * [1] document ::= prolog element Misc*
10814 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10816 * Returns 0, -1 in case of error. the parser context is augmented
10817 * as a result of the parsing.
10821 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10823 xmlCharEncoding enc;
10827 if ((ctxt == NULL) || (ctxt->input == NULL))
10833 * SAX: detecting the level.
10835 xmlDetectSAX2(ctxt);
10838 * SAX: beginning of the document processing.
10840 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10841 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10842 if (ctxt->instate == XML_PARSER_EOF)
10845 if ((ctxt->encoding == NULL) &&
10846 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10848 * Get the 4 first bytes and decode the charset
10849 * if enc != XML_CHAR_ENCODING_NONE
10850 * plug some encoding conversion routines.
10856 enc = xmlDetectCharEncoding(&start[0], 4);
10857 if (enc != XML_CHAR_ENCODING_NONE) {
10858 xmlSwitchEncoding(ctxt, enc);
10864 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10869 * Check for the XMLDecl in the Prolog.
10870 * do not GROW here to avoid the detected encoder to decode more
10871 * than just the first line, unless the amount of data is really
10872 * too small to hold "<?xml version="1.0" encoding="foo"
10874 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10877 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10880 * Note that we will switch encoding on the fly.
10882 xmlParseXMLDecl(ctxt);
10883 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10884 (ctxt->instate == XML_PARSER_EOF)) {
10886 * The XML REC instructs us to stop parsing right here
10890 ctxt->standalone = ctxt->input->standalone;
10893 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10895 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10896 ctxt->sax->startDocument(ctxt->userData);
10897 if (ctxt->instate == XML_PARSER_EOF)
10899 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10900 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10901 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10905 * The Misc part of the Prolog
10908 xmlParseMisc(ctxt);
10911 * Then possibly doc type declaration(s) and more Misc
10912 * (doctypedecl Misc*)?
10915 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10917 ctxt->inSubset = 1;
10918 xmlParseDocTypeDecl(ctxt);
10920 ctxt->instate = XML_PARSER_DTD;
10921 xmlParseInternalSubset(ctxt);
10922 if (ctxt->instate == XML_PARSER_EOF)
10927 * Create and update the external subset.
10929 ctxt->inSubset = 2;
10930 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10931 (!ctxt->disableSAX))
10932 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10933 ctxt->extSubSystem, ctxt->extSubURI);
10934 if (ctxt->instate == XML_PARSER_EOF)
10936 ctxt->inSubset = 0;
10938 xmlCleanSpecialAttr(ctxt);
10940 ctxt->instate = XML_PARSER_PROLOG;
10941 xmlParseMisc(ctxt);
10945 * Time to start parsing the tree itself
10949 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10950 "Start tag expected, '<' not found\n");
10952 ctxt->instate = XML_PARSER_CONTENT;
10953 xmlParseElement(ctxt);
10954 ctxt->instate = XML_PARSER_EPILOG;
10958 * The Misc part at the end
10960 xmlParseMisc(ctxt);
10963 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10965 ctxt->instate = XML_PARSER_EOF;
10969 * SAX: end of the document processing.
10971 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10972 ctxt->sax->endDocument(ctxt->userData);
10975 * Remove locally kept entity definitions if the tree was not built
10977 if ((ctxt->myDoc != NULL) &&
10978 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10979 xmlFreeDoc(ctxt->myDoc);
10980 ctxt->myDoc = NULL;
10983 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10984 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10986 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10987 if (ctxt->nsWellFormed)
10988 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10989 if (ctxt->options & XML_PARSE_OLD10)
10990 ctxt->myDoc->properties |= XML_DOC_OLD10;
10992 if (! ctxt->wellFormed) {
11000 * xmlParseExtParsedEnt:
11001 * @ctxt: an XML parser context
11003 * parse a general parsed entity
11004 * An external general parsed entity is well-formed if it matches the
11005 * production labeled extParsedEnt.
11007 * [78] extParsedEnt ::= TextDecl? content
11009 * Returns 0, -1 in case of error. the parser context is augmented
11010 * as a result of the parsing.
11014 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11016 xmlCharEncoding enc;
11018 if ((ctxt == NULL) || (ctxt->input == NULL))
11021 xmlDefaultSAXHandlerInit();
11023 xmlDetectSAX2(ctxt);
11028 * SAX: beginning of the document processing.
11030 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11031 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11034 * Get the 4 first bytes and decode the charset
11035 * if enc != XML_CHAR_ENCODING_NONE
11036 * plug some encoding conversion routines.
11038 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11043 enc = xmlDetectCharEncoding(start, 4);
11044 if (enc != XML_CHAR_ENCODING_NONE) {
11045 xmlSwitchEncoding(ctxt, enc);
11051 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11055 * Check for the XMLDecl in the Prolog.
11058 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11061 * Note that we will switch encoding on the fly.
11063 xmlParseXMLDecl(ctxt);
11064 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11066 * The XML REC instructs us to stop parsing right here
11072 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11074 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11075 ctxt->sax->startDocument(ctxt->userData);
11076 if (ctxt->instate == XML_PARSER_EOF)
11080 * Doing validity checking on chunk doesn't make sense
11082 ctxt->instate = XML_PARSER_CONTENT;
11083 ctxt->validate = 0;
11084 ctxt->loadsubset = 0;
11087 xmlParseContent(ctxt);
11088 if (ctxt->instate == XML_PARSER_EOF)
11091 if ((RAW == '<') && (NXT(1) == '/')) {
11092 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11093 } else if (RAW != 0) {
11094 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11098 * SAX: end of the document processing.
11100 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11101 ctxt->sax->endDocument(ctxt->userData);
11103 if (! ctxt->wellFormed) return(-1);
11107 #ifdef LIBXML_PUSH_ENABLED
11108 /************************************************************************
11110 * Progressive parsing interfaces *
11112 ************************************************************************/
11115 * xmlParseLookupSequence:
11116 * @ctxt: an XML parser context
11117 * @first: the first char to lookup
11118 * @next: the next char to lookup or zero
11119 * @third: the next char to lookup or zero
11121 * Try to find if a sequence (first, next, third) or just (first next) or
11122 * (first) is available in the input stream.
11123 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11124 * to avoid rescanning sequences of bytes, it DOES change the state of the
11125 * parser, do not use liberally.
11127 * Returns the index to the current parsing point if the full sequence
11128 * is available, -1 otherwise.
11131 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11132 xmlChar next, xmlChar third) {
11134 xmlParserInputPtr in;
11135 const xmlChar *buf;
11138 if (in == NULL) return(-1);
11139 base = in->cur - in->base;
11140 if (base < 0) return(-1);
11141 if (ctxt->checkIndex > base)
11142 base = ctxt->checkIndex;
11143 if (in->buf == NULL) {
11147 buf = xmlBufContent(in->buf->buffer);
11148 len = xmlBufUse(in->buf->buffer);
11150 /* take into account the sequence length */
11151 if (third) len -= 2;
11152 else if (next) len --;
11153 for (;base < len;base++) {
11154 if (buf[base] == first) {
11156 if ((buf[base + 1] != next) ||
11157 (buf[base + 2] != third)) continue;
11158 } else if (next != 0) {
11159 if (buf[base + 1] != next) continue;
11161 ctxt->checkIndex = 0;
11164 xmlGenericError(xmlGenericErrorContext,
11165 "PP: lookup '%c' found at %d\n",
11167 else if (third == 0)
11168 xmlGenericError(xmlGenericErrorContext,
11169 "PP: lookup '%c%c' found at %d\n",
11170 first, next, base);
11172 xmlGenericError(xmlGenericErrorContext,
11173 "PP: lookup '%c%c%c' found at %d\n",
11174 first, next, third, base);
11176 return(base - (in->cur - in->base));
11179 ctxt->checkIndex = base;
11182 xmlGenericError(xmlGenericErrorContext,
11183 "PP: lookup '%c' failed\n", first);
11184 else if (third == 0)
11185 xmlGenericError(xmlGenericErrorContext,
11186 "PP: lookup '%c%c' failed\n", first, next);
11188 xmlGenericError(xmlGenericErrorContext,
11189 "PP: lookup '%c%c%c' failed\n", first, next, third);
11195 * xmlParseGetLasts:
11196 * @ctxt: an XML parser context
11197 * @lastlt: pointer to store the last '<' from the input
11198 * @lastgt: pointer to store the last '>' from the input
11200 * Lookup the last < and > in the current chunk
11203 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11204 const xmlChar **lastgt) {
11205 const xmlChar *tmp;
11207 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11208 xmlGenericError(xmlGenericErrorContext,
11209 "Internal error: xmlParseGetLasts\n");
11212 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11213 tmp = ctxt->input->end;
11215 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11216 if (tmp < ctxt->input->base) {
11222 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11223 if (*tmp == '\'') {
11225 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11226 if (tmp < ctxt->input->end) tmp++;
11227 } else if (*tmp == '"') {
11229 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11230 if (tmp < ctxt->input->end) tmp++;
11234 if (tmp < ctxt->input->end)
11239 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11240 if (tmp >= ctxt->input->base)
11252 * xmlCheckCdataPush:
11253 * @cur: pointer to the block of characters
11254 * @len: length of the block in bytes
11255 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11257 * Check that the block of characters is okay as SCdata content [20]
11259 * Returns the number of bytes to pass if okay, a negative index where an
11260 * UTF-8 error occured otherwise
11263 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11268 if ((utf == NULL) || (len <= 0))
11271 for (ix = 0; ix < len;) { /* string is 0-terminated */
11273 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11276 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11280 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11281 if (ix + 2 > len) return(complete ? -ix : ix);
11282 if ((utf[ix+1] & 0xc0 ) != 0x80)
11284 codepoint = (utf[ix] & 0x1f) << 6;
11285 codepoint |= utf[ix+1] & 0x3f;
11286 if (!xmlIsCharQ(codepoint))
11289 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11290 if (ix + 3 > len) return(complete ? -ix : ix);
11291 if (((utf[ix+1] & 0xc0) != 0x80) ||
11292 ((utf[ix+2] & 0xc0) != 0x80))
11294 codepoint = (utf[ix] & 0xf) << 12;
11295 codepoint |= (utf[ix+1] & 0x3f) << 6;
11296 codepoint |= utf[ix+2] & 0x3f;
11297 if (!xmlIsCharQ(codepoint))
11300 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11301 if (ix + 4 > len) return(complete ? -ix : ix);
11302 if (((utf[ix+1] & 0xc0) != 0x80) ||
11303 ((utf[ix+2] & 0xc0) != 0x80) ||
11304 ((utf[ix+3] & 0xc0) != 0x80))
11306 codepoint = (utf[ix] & 0x7) << 18;
11307 codepoint |= (utf[ix+1] & 0x3f) << 12;
11308 codepoint |= (utf[ix+2] & 0x3f) << 6;
11309 codepoint |= utf[ix+3] & 0x3f;
11310 if (!xmlIsCharQ(codepoint))
11313 } else /* unknown encoding */
11320 * xmlParseTryOrFinish:
11321 * @ctxt: an XML parser context
11322 * @terminate: last chunk indicator
11324 * Try to progress on parsing
11326 * Returns zero if no parsing was possible
11329 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11333 const xmlChar *lastlt, *lastgt;
11335 if (ctxt->input == NULL)
11339 switch (ctxt->instate) {
11340 case XML_PARSER_EOF:
11341 xmlGenericError(xmlGenericErrorContext,
11342 "PP: try EOF\n"); break;
11343 case XML_PARSER_START:
11344 xmlGenericError(xmlGenericErrorContext,
11345 "PP: try START\n"); break;
11346 case XML_PARSER_MISC:
11347 xmlGenericError(xmlGenericErrorContext,
11348 "PP: try MISC\n");break;
11349 case XML_PARSER_COMMENT:
11350 xmlGenericError(xmlGenericErrorContext,
11351 "PP: try COMMENT\n");break;
11352 case XML_PARSER_PROLOG:
11353 xmlGenericError(xmlGenericErrorContext,
11354 "PP: try PROLOG\n");break;
11355 case XML_PARSER_START_TAG:
11356 xmlGenericError(xmlGenericErrorContext,
11357 "PP: try START_TAG\n");break;
11358 case XML_PARSER_CONTENT:
11359 xmlGenericError(xmlGenericErrorContext,
11360 "PP: try CONTENT\n");break;
11361 case XML_PARSER_CDATA_SECTION:
11362 xmlGenericError(xmlGenericErrorContext,
11363 "PP: try CDATA_SECTION\n");break;
11364 case XML_PARSER_END_TAG:
11365 xmlGenericError(xmlGenericErrorContext,
11366 "PP: try END_TAG\n");break;
11367 case XML_PARSER_ENTITY_DECL:
11368 xmlGenericError(xmlGenericErrorContext,
11369 "PP: try ENTITY_DECL\n");break;
11370 case XML_PARSER_ENTITY_VALUE:
11371 xmlGenericError(xmlGenericErrorContext,
11372 "PP: try ENTITY_VALUE\n");break;
11373 case XML_PARSER_ATTRIBUTE_VALUE:
11374 xmlGenericError(xmlGenericErrorContext,
11375 "PP: try ATTRIBUTE_VALUE\n");break;
11376 case XML_PARSER_DTD:
11377 xmlGenericError(xmlGenericErrorContext,
11378 "PP: try DTD\n");break;
11379 case XML_PARSER_EPILOG:
11380 xmlGenericError(xmlGenericErrorContext,
11381 "PP: try EPILOG\n");break;
11382 case XML_PARSER_PI:
11383 xmlGenericError(xmlGenericErrorContext,
11384 "PP: try PI\n");break;
11385 case XML_PARSER_IGNORE:
11386 xmlGenericError(xmlGenericErrorContext,
11387 "PP: try IGNORE\n");break;
11391 if ((ctxt->input != NULL) &&
11392 (ctxt->input->cur - ctxt->input->base > 4096)) {
11394 ctxt->checkIndex = 0;
11396 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11398 while (ctxt->instate != XML_PARSER_EOF) {
11399 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11404 * Pop-up of finished entities.
11406 while ((RAW == 0) && (ctxt->inputNr > 1))
11409 if (ctxt->input == NULL) break;
11410 if (ctxt->input->buf == NULL)
11411 avail = ctxt->input->length -
11412 (ctxt->input->cur - ctxt->input->base);
11415 * If we are operating on converted input, try to flush
11416 * remainng chars to avoid them stalling in the non-converted
11417 * buffer. But do not do this in document start where
11418 * encoding="..." may not have been read and we work on a
11419 * guessed encoding.
11421 if ((ctxt->instate != XML_PARSER_START) &&
11422 (ctxt->input->buf->raw != NULL) &&
11423 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11424 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11426 size_t current = ctxt->input->cur - ctxt->input->base;
11428 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11429 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11432 avail = xmlBufUse(ctxt->input->buf->buffer) -
11433 (ctxt->input->cur - ctxt->input->base);
11437 switch (ctxt->instate) {
11438 case XML_PARSER_EOF:
11440 * Document parsing is done !
11443 case XML_PARSER_START:
11444 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11446 xmlCharEncoding enc;
11449 * Very first chars read from the document flow.
11455 * Get the 4 first bytes and decode the charset
11456 * if enc != XML_CHAR_ENCODING_NONE
11457 * plug some encoding conversion routines,
11458 * else xmlSwitchEncoding will set to (default)
11465 enc = xmlDetectCharEncoding(start, 4);
11466 xmlSwitchEncoding(ctxt, enc);
11472 cur = ctxt->input->cur[0];
11473 next = ctxt->input->cur[1];
11475 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11476 ctxt->sax->setDocumentLocator(ctxt->userData,
11477 &xmlDefaultSAXLocator);
11478 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11479 xmlHaltParser(ctxt);
11481 xmlGenericError(xmlGenericErrorContext,
11482 "PP: entering EOF\n");
11484 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11485 ctxt->sax->endDocument(ctxt->userData);
11488 if ((cur == '<') && (next == '?')) {
11489 /* PI or XML decl */
11490 if (avail < 5) return(ret);
11491 if ((!terminate) &&
11492 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11494 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11495 ctxt->sax->setDocumentLocator(ctxt->userData,
11496 &xmlDefaultSAXLocator);
11497 if ((ctxt->input->cur[2] == 'x') &&
11498 (ctxt->input->cur[3] == 'm') &&
11499 (ctxt->input->cur[4] == 'l') &&
11500 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11503 xmlGenericError(xmlGenericErrorContext,
11504 "PP: Parsing XML Decl\n");
11506 xmlParseXMLDecl(ctxt);
11507 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11509 * The XML REC instructs us to stop parsing right
11512 xmlHaltParser(ctxt);
11515 ctxt->standalone = ctxt->input->standalone;
11516 if ((ctxt->encoding == NULL) &&
11517 (ctxt->input->encoding != NULL))
11518 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11519 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11520 (!ctxt->disableSAX))
11521 ctxt->sax->startDocument(ctxt->userData);
11522 ctxt->instate = XML_PARSER_MISC;
11524 xmlGenericError(xmlGenericErrorContext,
11525 "PP: entering MISC\n");
11528 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11529 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11530 (!ctxt->disableSAX))
11531 ctxt->sax->startDocument(ctxt->userData);
11532 ctxt->instate = XML_PARSER_MISC;
11534 xmlGenericError(xmlGenericErrorContext,
11535 "PP: entering MISC\n");
11539 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11540 ctxt->sax->setDocumentLocator(ctxt->userData,
11541 &xmlDefaultSAXLocator);
11542 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11543 if (ctxt->version == NULL) {
11544 xmlErrMemory(ctxt, NULL);
11547 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11548 (!ctxt->disableSAX))
11549 ctxt->sax->startDocument(ctxt->userData);
11550 ctxt->instate = XML_PARSER_MISC;
11552 xmlGenericError(xmlGenericErrorContext,
11553 "PP: entering MISC\n");
11557 case XML_PARSER_START_TAG: {
11558 const xmlChar *name;
11559 const xmlChar *prefix = NULL;
11560 const xmlChar *URI = NULL;
11561 int nsNr = ctxt->nsNr;
11563 if ((avail < 2) && (ctxt->inputNr == 1))
11565 cur = ctxt->input->cur[0];
11567 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11568 xmlHaltParser(ctxt);
11569 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11570 ctxt->sax->endDocument(ctxt->userData);
11574 if (ctxt->progressive) {
11575 /* > can be found unescaped in attribute values */
11576 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11578 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11582 if (ctxt->spaceNr == 0)
11583 spacePush(ctxt, -1);
11584 else if (*ctxt->space == -2)
11585 spacePush(ctxt, -1);
11587 spacePush(ctxt, *ctxt->space);
11588 #ifdef LIBXML_SAX1_ENABLED
11590 #endif /* LIBXML_SAX1_ENABLED */
11591 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11592 #ifdef LIBXML_SAX1_ENABLED
11594 name = xmlParseStartTag(ctxt);
11595 #endif /* LIBXML_SAX1_ENABLED */
11596 if (ctxt->instate == XML_PARSER_EOF)
11598 if (name == NULL) {
11600 xmlHaltParser(ctxt);
11601 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11602 ctxt->sax->endDocument(ctxt->userData);
11605 #ifdef LIBXML_VALID_ENABLED
11607 * [ VC: Root Element Type ]
11608 * The Name in the document type declaration must match
11609 * the element type of the root element.
11611 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11612 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11613 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11614 #endif /* LIBXML_VALID_ENABLED */
11617 * Check for an Empty Element.
11619 if ((RAW == '/') && (NXT(1) == '>')) {
11623 if ((ctxt->sax != NULL) &&
11624 (ctxt->sax->endElementNs != NULL) &&
11625 (!ctxt->disableSAX))
11626 ctxt->sax->endElementNs(ctxt->userData, name,
11628 if (ctxt->nsNr - nsNr > 0)
11629 nsPop(ctxt, ctxt->nsNr - nsNr);
11630 #ifdef LIBXML_SAX1_ENABLED
11632 if ((ctxt->sax != NULL) &&
11633 (ctxt->sax->endElement != NULL) &&
11634 (!ctxt->disableSAX))
11635 ctxt->sax->endElement(ctxt->userData, name);
11636 #endif /* LIBXML_SAX1_ENABLED */
11638 if (ctxt->instate == XML_PARSER_EOF)
11641 if (ctxt->nameNr == 0) {
11642 ctxt->instate = XML_PARSER_EPILOG;
11644 ctxt->instate = XML_PARSER_CONTENT;
11646 ctxt->progressive = 1;
11652 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11653 "Couldn't find end of Start Tag %s\n",
11659 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11660 #ifdef LIBXML_SAX1_ENABLED
11662 namePush(ctxt, name);
11663 #endif /* LIBXML_SAX1_ENABLED */
11665 ctxt->instate = XML_PARSER_CONTENT;
11666 ctxt->progressive = 1;
11669 case XML_PARSER_CONTENT: {
11670 const xmlChar *test;
11672 if ((avail < 2) && (ctxt->inputNr == 1))
11674 cur = ctxt->input->cur[0];
11675 next = ctxt->input->cur[1];
11678 cons = ctxt->input->consumed;
11679 if ((cur == '<') && (next == '/')) {
11680 ctxt->instate = XML_PARSER_END_TAG;
11682 } else if ((cur == '<') && (next == '?')) {
11683 if ((!terminate) &&
11684 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11685 ctxt->progressive = XML_PARSER_PI;
11689 ctxt->instate = XML_PARSER_CONTENT;
11690 ctxt->progressive = 1;
11691 } else if ((cur == '<') && (next != '!')) {
11692 ctxt->instate = XML_PARSER_START_TAG;
11694 } else if ((cur == '<') && (next == '!') &&
11695 (ctxt->input->cur[2] == '-') &&
11696 (ctxt->input->cur[3] == '-')) {
11701 ctxt->input->cur += 4;
11702 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11703 ctxt->input->cur -= 4;
11704 if ((!terminate) && (term < 0)) {
11705 ctxt->progressive = XML_PARSER_COMMENT;
11708 xmlParseComment(ctxt);
11709 ctxt->instate = XML_PARSER_CONTENT;
11710 ctxt->progressive = 1;
11711 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11712 (ctxt->input->cur[2] == '[') &&
11713 (ctxt->input->cur[3] == 'C') &&
11714 (ctxt->input->cur[4] == 'D') &&
11715 (ctxt->input->cur[5] == 'A') &&
11716 (ctxt->input->cur[6] == 'T') &&
11717 (ctxt->input->cur[7] == 'A') &&
11718 (ctxt->input->cur[8] == '[')) {
11720 ctxt->instate = XML_PARSER_CDATA_SECTION;
11722 } else if ((cur == '<') && (next == '!') &&
11725 } else if (cur == '&') {
11726 if ((!terminate) &&
11727 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11729 xmlParseReference(ctxt);
11731 /* TODO Avoid the extra copy, handle directly !!! */
11733 * Goal of the following test is:
11734 * - minimize calls to the SAX 'character' callback
11735 * when they are mergeable
11736 * - handle an problem for isBlank when we only parse
11737 * a sequence of blank chars and the next one is
11738 * not available to check against '<' presence.
11739 * - tries to homogenize the differences in SAX
11740 * callbacks between the push and pull versions
11743 if ((ctxt->inputNr == 1) &&
11744 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11746 if (ctxt->progressive) {
11747 if ((lastlt == NULL) ||
11748 (ctxt->input->cur > lastlt))
11750 } else if (xmlParseLookupSequence(ctxt,
11756 ctxt->checkIndex = 0;
11757 xmlParseCharData(ctxt, 0);
11760 * Pop-up of finished entities.
11762 while ((RAW == 0) && (ctxt->inputNr > 1))
11764 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11765 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11766 "detected an error in element content\n");
11767 xmlHaltParser(ctxt);
11772 case XML_PARSER_END_TAG:
11776 if (ctxt->progressive) {
11777 /* > can be found unescaped in attribute values */
11778 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11780 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11785 xmlParseEndTag2(ctxt,
11786 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11787 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11788 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11791 #ifdef LIBXML_SAX1_ENABLED
11793 xmlParseEndTag1(ctxt, 0);
11794 #endif /* LIBXML_SAX1_ENABLED */
11795 if (ctxt->instate == XML_PARSER_EOF) {
11797 } else if (ctxt->nameNr == 0) {
11798 ctxt->instate = XML_PARSER_EPILOG;
11800 ctxt->instate = XML_PARSER_CONTENT;
11803 case XML_PARSER_CDATA_SECTION: {
11805 * The Push mode need to have the SAX callback for
11806 * cdataBlock merge back contiguous callbacks.
11810 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11812 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11815 tmp = xmlCheckCdataPush(ctxt->input->cur,
11816 XML_PARSER_BIG_BUFFER_SIZE, 0);
11819 ctxt->input->cur += tmp;
11820 goto encoding_error;
11822 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11823 if (ctxt->sax->cdataBlock != NULL)
11824 ctxt->sax->cdataBlock(ctxt->userData,
11825 ctxt->input->cur, tmp);
11826 else if (ctxt->sax->characters != NULL)
11827 ctxt->sax->characters(ctxt->userData,
11828 ctxt->input->cur, tmp);
11830 if (ctxt->instate == XML_PARSER_EOF)
11833 ctxt->checkIndex = 0;
11839 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11840 if ((tmp < 0) || (tmp != base)) {
11842 ctxt->input->cur += tmp;
11843 goto encoding_error;
11845 if ((ctxt->sax != NULL) && (base == 0) &&
11846 (ctxt->sax->cdataBlock != NULL) &&
11847 (!ctxt->disableSAX)) {
11849 * Special case to provide identical behaviour
11850 * between pull and push parsers on enpty CDATA
11853 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11854 (!strncmp((const char *)&ctxt->input->cur[-9],
11856 ctxt->sax->cdataBlock(ctxt->userData,
11858 } else if ((ctxt->sax != NULL) && (base > 0) &&
11859 (!ctxt->disableSAX)) {
11860 if (ctxt->sax->cdataBlock != NULL)
11861 ctxt->sax->cdataBlock(ctxt->userData,
11862 ctxt->input->cur, base);
11863 else if (ctxt->sax->characters != NULL)
11864 ctxt->sax->characters(ctxt->userData,
11865 ctxt->input->cur, base);
11867 if (ctxt->instate == XML_PARSER_EOF)
11870 ctxt->checkIndex = 0;
11871 ctxt->instate = XML_PARSER_CONTENT;
11873 xmlGenericError(xmlGenericErrorContext,
11874 "PP: entering CONTENT\n");
11879 case XML_PARSER_MISC:
11881 if (ctxt->input->buf == NULL)
11882 avail = ctxt->input->length -
11883 (ctxt->input->cur - ctxt->input->base);
11885 avail = xmlBufUse(ctxt->input->buf->buffer) -
11886 (ctxt->input->cur - ctxt->input->base);
11889 cur = ctxt->input->cur[0];
11890 next = ctxt->input->cur[1];
11891 if ((cur == '<') && (next == '?')) {
11892 if ((!terminate) &&
11893 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11894 ctxt->progressive = XML_PARSER_PI;
11898 xmlGenericError(xmlGenericErrorContext,
11899 "PP: Parsing PI\n");
11902 if (ctxt->instate == XML_PARSER_EOF)
11904 ctxt->instate = XML_PARSER_MISC;
11905 ctxt->progressive = 1;
11906 ctxt->checkIndex = 0;
11907 } else if ((cur == '<') && (next == '!') &&
11908 (ctxt->input->cur[2] == '-') &&
11909 (ctxt->input->cur[3] == '-')) {
11910 if ((!terminate) &&
11911 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11912 ctxt->progressive = XML_PARSER_COMMENT;
11916 xmlGenericError(xmlGenericErrorContext,
11917 "PP: Parsing Comment\n");
11919 xmlParseComment(ctxt);
11920 if (ctxt->instate == XML_PARSER_EOF)
11922 ctxt->instate = XML_PARSER_MISC;
11923 ctxt->progressive = 1;
11924 ctxt->checkIndex = 0;
11925 } else if ((cur == '<') && (next == '!') &&
11926 (ctxt->input->cur[2] == 'D') &&
11927 (ctxt->input->cur[3] == 'O') &&
11928 (ctxt->input->cur[4] == 'C') &&
11929 (ctxt->input->cur[5] == 'T') &&
11930 (ctxt->input->cur[6] == 'Y') &&
11931 (ctxt->input->cur[7] == 'P') &&
11932 (ctxt->input->cur[8] == 'E')) {
11933 if ((!terminate) &&
11934 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11935 ctxt->progressive = XML_PARSER_DTD;
11939 xmlGenericError(xmlGenericErrorContext,
11940 "PP: Parsing internal subset\n");
11942 ctxt->inSubset = 1;
11943 ctxt->progressive = 0;
11944 ctxt->checkIndex = 0;
11945 xmlParseDocTypeDecl(ctxt);
11946 if (ctxt->instate == XML_PARSER_EOF)
11949 ctxt->instate = XML_PARSER_DTD;
11951 xmlGenericError(xmlGenericErrorContext,
11952 "PP: entering DTD\n");
11956 * Create and update the external subset.
11958 ctxt->inSubset = 2;
11959 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11960 (ctxt->sax->externalSubset != NULL))
11961 ctxt->sax->externalSubset(ctxt->userData,
11962 ctxt->intSubName, ctxt->extSubSystem,
11964 ctxt->inSubset = 0;
11965 xmlCleanSpecialAttr(ctxt);
11966 ctxt->instate = XML_PARSER_PROLOG;
11968 xmlGenericError(xmlGenericErrorContext,
11969 "PP: entering PROLOG\n");
11972 } else if ((cur == '<') && (next == '!') &&
11976 ctxt->instate = XML_PARSER_START_TAG;
11977 ctxt->progressive = XML_PARSER_START_TAG;
11978 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11980 xmlGenericError(xmlGenericErrorContext,
11981 "PP: entering START_TAG\n");
11985 case XML_PARSER_PROLOG:
11987 if (ctxt->input->buf == NULL)
11988 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11990 avail = xmlBufUse(ctxt->input->buf->buffer) -
11991 (ctxt->input->cur - ctxt->input->base);
11994 cur = ctxt->input->cur[0];
11995 next = ctxt->input->cur[1];
11996 if ((cur == '<') && (next == '?')) {
11997 if ((!terminate) &&
11998 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11999 ctxt->progressive = XML_PARSER_PI;
12003 xmlGenericError(xmlGenericErrorContext,
12004 "PP: Parsing PI\n");
12007 if (ctxt->instate == XML_PARSER_EOF)
12009 ctxt->instate = XML_PARSER_PROLOG;
12010 ctxt->progressive = 1;
12011 } else if ((cur == '<') && (next == '!') &&
12012 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12013 if ((!terminate) &&
12014 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12015 ctxt->progressive = XML_PARSER_COMMENT;
12019 xmlGenericError(xmlGenericErrorContext,
12020 "PP: Parsing Comment\n");
12022 xmlParseComment(ctxt);
12023 if (ctxt->instate == XML_PARSER_EOF)
12025 ctxt->instate = XML_PARSER_PROLOG;
12026 ctxt->progressive = 1;
12027 } else if ((cur == '<') && (next == '!') &&
12031 ctxt->instate = XML_PARSER_START_TAG;
12032 if (ctxt->progressive == 0)
12033 ctxt->progressive = XML_PARSER_START_TAG;
12034 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
12036 xmlGenericError(xmlGenericErrorContext,
12037 "PP: entering START_TAG\n");
12041 case XML_PARSER_EPILOG:
12043 if (ctxt->input->buf == NULL)
12044 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
12046 avail = xmlBufUse(ctxt->input->buf->buffer) -
12047 (ctxt->input->cur - ctxt->input->base);
12050 cur = ctxt->input->cur[0];
12051 next = ctxt->input->cur[1];
12052 if ((cur == '<') && (next == '?')) {
12053 if ((!terminate) &&
12054 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12055 ctxt->progressive = XML_PARSER_PI;
12059 xmlGenericError(xmlGenericErrorContext,
12060 "PP: Parsing PI\n");
12063 if (ctxt->instate == XML_PARSER_EOF)
12065 ctxt->instate = XML_PARSER_EPILOG;
12066 ctxt->progressive = 1;
12067 } else if ((cur == '<') && (next == '!') &&
12068 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12069 if ((!terminate) &&
12070 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12071 ctxt->progressive = XML_PARSER_COMMENT;
12075 xmlGenericError(xmlGenericErrorContext,
12076 "PP: Parsing Comment\n");
12078 xmlParseComment(ctxt);
12079 if (ctxt->instate == XML_PARSER_EOF)
12081 ctxt->instate = XML_PARSER_EPILOG;
12082 ctxt->progressive = 1;
12083 } else if ((cur == '<') && (next == '!') &&
12087 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12088 xmlHaltParser(ctxt);
12090 xmlGenericError(xmlGenericErrorContext,
12091 "PP: entering EOF\n");
12093 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12094 ctxt->sax->endDocument(ctxt->userData);
12098 case XML_PARSER_DTD: {
12100 * Sorry but progressive parsing of the internal subset
12101 * is not expected to be supported. We first check that
12102 * the full content of the internal subset is available and
12103 * the parsing is launched only at that point.
12104 * Internal subset ends up with "']' S? '>'" in an unescaped
12105 * section and not in a ']]>' sequence which are conditional
12106 * sections (whoever argued to keep that crap in XML deserve
12107 * a place in hell !).
12114 base = ctxt->input->cur - ctxt->input->base;
12115 if (base < 0) return(0);
12116 if (ctxt->checkIndex > base)
12117 base = ctxt->checkIndex;
12118 buf = xmlBufContent(ctxt->input->buf->buffer);
12119 use = xmlBufUse(ctxt->input->buf->buffer);
12120 for (;(unsigned int) base < use; base++) {
12122 if (buf[base] == quote)
12126 if ((quote == 0) && (buf[base] == '<')) {
12128 /* special handling of comments */
12129 if (((unsigned int) base + 4 < use) &&
12130 (buf[base + 1] == '!') &&
12131 (buf[base + 2] == '-') &&
12132 (buf[base + 3] == '-')) {
12133 for (;(unsigned int) base + 3 < use; base++) {
12134 if ((buf[base] == '-') &&
12135 (buf[base + 1] == '-') &&
12136 (buf[base + 2] == '>')) {
12144 fprintf(stderr, "unfinished comment\n");
12151 if (buf[base] == '"') {
12155 if (buf[base] == '\'') {
12159 if (buf[base] == ']') {
12161 fprintf(stderr, "%c%c%c%c: ", buf[base],
12162 buf[base + 1], buf[base + 2], buf[base + 3]);
12164 if ((unsigned int) base +1 >= use)
12166 if (buf[base + 1] == ']') {
12167 /* conditional crap, skip both ']' ! */
12171 for (i = 1; (unsigned int) base + i < use; i++) {
12172 if (buf[base + i] == '>') {
12174 fprintf(stderr, "found\n");
12176 goto found_end_int_subset;
12178 if (!IS_BLANK_CH(buf[base + i])) {
12180 fprintf(stderr, "not found\n");
12182 goto not_end_of_int_subset;
12186 fprintf(stderr, "end of stream\n");
12191 not_end_of_int_subset:
12192 continue; /* for */
12195 * We didn't found the end of the Internal subset
12198 ctxt->checkIndex = base;
12200 ctxt->checkIndex = 0;
12203 xmlGenericError(xmlGenericErrorContext,
12204 "PP: lookup of int subset end filed\n");
12208 found_end_int_subset:
12209 ctxt->checkIndex = 0;
12210 xmlParseInternalSubset(ctxt);
12211 if (ctxt->instate == XML_PARSER_EOF)
12213 ctxt->inSubset = 2;
12214 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12215 (ctxt->sax->externalSubset != NULL))
12216 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12217 ctxt->extSubSystem, ctxt->extSubURI);
12218 ctxt->inSubset = 0;
12219 xmlCleanSpecialAttr(ctxt);
12220 if (ctxt->instate == XML_PARSER_EOF)
12222 ctxt->instate = XML_PARSER_PROLOG;
12223 ctxt->checkIndex = 0;
12225 xmlGenericError(xmlGenericErrorContext,
12226 "PP: entering PROLOG\n");
12230 case XML_PARSER_COMMENT:
12231 xmlGenericError(xmlGenericErrorContext,
12232 "PP: internal error, state == COMMENT\n");
12233 ctxt->instate = XML_PARSER_CONTENT;
12235 xmlGenericError(xmlGenericErrorContext,
12236 "PP: entering CONTENT\n");
12239 case XML_PARSER_IGNORE:
12240 xmlGenericError(xmlGenericErrorContext,
12241 "PP: internal error, state == IGNORE");
12242 ctxt->instate = XML_PARSER_DTD;
12244 xmlGenericError(xmlGenericErrorContext,
12245 "PP: entering DTD\n");
12248 case XML_PARSER_PI:
12249 xmlGenericError(xmlGenericErrorContext,
12250 "PP: internal error, state == PI\n");
12251 ctxt->instate = XML_PARSER_CONTENT;
12253 xmlGenericError(xmlGenericErrorContext,
12254 "PP: entering CONTENT\n");
12257 case XML_PARSER_ENTITY_DECL:
12258 xmlGenericError(xmlGenericErrorContext,
12259 "PP: internal error, state == ENTITY_DECL\n");
12260 ctxt->instate = XML_PARSER_DTD;
12262 xmlGenericError(xmlGenericErrorContext,
12263 "PP: entering DTD\n");
12266 case XML_PARSER_ENTITY_VALUE:
12267 xmlGenericError(xmlGenericErrorContext,
12268 "PP: internal error, state == ENTITY_VALUE\n");
12269 ctxt->instate = XML_PARSER_CONTENT;
12271 xmlGenericError(xmlGenericErrorContext,
12272 "PP: entering DTD\n");
12275 case XML_PARSER_ATTRIBUTE_VALUE:
12276 xmlGenericError(xmlGenericErrorContext,
12277 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12278 ctxt->instate = XML_PARSER_START_TAG;
12280 xmlGenericError(xmlGenericErrorContext,
12281 "PP: entering START_TAG\n");
12284 case XML_PARSER_SYSTEM_LITERAL:
12285 xmlGenericError(xmlGenericErrorContext,
12286 "PP: internal error, state == SYSTEM_LITERAL\n");
12287 ctxt->instate = XML_PARSER_START_TAG;
12289 xmlGenericError(xmlGenericErrorContext,
12290 "PP: entering START_TAG\n");
12293 case XML_PARSER_PUBLIC_LITERAL:
12294 xmlGenericError(xmlGenericErrorContext,
12295 "PP: internal error, state == PUBLIC_LITERAL\n");
12296 ctxt->instate = XML_PARSER_START_TAG;
12298 xmlGenericError(xmlGenericErrorContext,
12299 "PP: entering START_TAG\n");
12306 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12313 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12314 ctxt->input->cur[0], ctxt->input->cur[1],
12315 ctxt->input->cur[2], ctxt->input->cur[3]);
12316 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12317 "Input is not proper UTF-8, indicate encoding !\n%s",
12318 BAD_CAST buffer, NULL);
12324 * xmlParseCheckTransition:
12325 * @ctxt: an XML parser context
12326 * @chunk: a char array
12327 * @size: the size in byte of the chunk
12329 * Check depending on the current parser state if the chunk given must be
12330 * processed immediately or one need more data to advance on parsing.
12332 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12335 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12336 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12338 if (ctxt->instate == XML_PARSER_START_TAG) {
12339 if (memchr(chunk, '>', size) != NULL)
12343 if (ctxt->progressive == XML_PARSER_COMMENT) {
12344 if (memchr(chunk, '>', size) != NULL)
12348 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12349 if (memchr(chunk, '>', size) != NULL)
12353 if (ctxt->progressive == XML_PARSER_PI) {
12354 if (memchr(chunk, '>', size) != NULL)
12358 if (ctxt->instate == XML_PARSER_END_TAG) {
12359 if (memchr(chunk, '>', size) != NULL)
12363 if ((ctxt->progressive == XML_PARSER_DTD) ||
12364 (ctxt->instate == XML_PARSER_DTD)) {
12365 if (memchr(chunk, '>', size) != NULL)
12374 * @ctxt: an XML parser context
12375 * @chunk: an char array
12376 * @size: the size in byte of the chunk
12377 * @terminate: last chunk indicator
12379 * Parse a Chunk of memory
12381 * Returns zero if no error, the xmlParserErrors otherwise.
12384 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12388 size_t old_avail = 0;
12392 return(XML_ERR_INTERNAL_ERROR);
12393 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12394 return(ctxt->errNo);
12395 if (ctxt->instate == XML_PARSER_EOF)
12397 if (ctxt->instate == XML_PARSER_START)
12398 xmlDetectSAX2(ctxt);
12399 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12400 (chunk[size - 1] == '\r')) {
12407 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12408 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12409 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12410 size_t cur = ctxt->input->cur - ctxt->input->base;
12413 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12415 * Specific handling if we autodetected an encoding, we should not
12416 * push more than the first line ... which depend on the encoding
12417 * And only push the rest once the final encoding was detected
12419 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12420 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12421 unsigned int len = 45;
12423 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12424 BAD_CAST "UTF-16")) ||
12425 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12426 BAD_CAST "UTF16")))
12428 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12429 BAD_CAST "UCS-4")) ||
12430 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12434 if (ctxt->input->buf->rawconsumed < len)
12435 len -= ctxt->input->buf->rawconsumed;
12438 * Change size for reading the initial declaration only
12439 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12440 * will blindly copy extra bytes from memory.
12442 if ((unsigned int) size > len) {
12443 remain = size - len;
12449 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12451 ctxt->errNo = XML_PARSER_EOF;
12452 xmlHaltParser(ctxt);
12453 return (XML_PARSER_EOF);
12455 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12457 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12460 } else if (ctxt->instate != XML_PARSER_EOF) {
12461 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12462 xmlParserInputBufferPtr in = ctxt->input->buf;
12463 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12464 (in->raw != NULL)) {
12466 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12467 size_t current = ctxt->input->cur - ctxt->input->base;
12469 nbchars = xmlCharEncInput(in, terminate);
12472 xmlGenericError(xmlGenericErrorContext,
12473 "xmlParseChunk: encoder error\n");
12474 return(XML_ERR_INVALID_ENCODING);
12476 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12481 xmlParseTryOrFinish(ctxt, 0);
12483 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12484 avail = xmlBufUse(ctxt->input->buf->buffer);
12486 * Depending on the current state it may not be such
12487 * a good idea to try parsing if there is nothing in the chunk
12488 * which would be worth doing a parser state transition and we
12489 * need to wait for more data
12491 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12492 (old_avail == 0) || (avail == 0) ||
12493 (xmlParseCheckTransition(ctxt,
12494 (const char *)&ctxt->input->base[old_avail],
12495 avail - old_avail)))
12496 xmlParseTryOrFinish(ctxt, terminate);
12498 if (ctxt->instate == XML_PARSER_EOF)
12499 return(ctxt->errNo);
12501 if ((ctxt->input != NULL) &&
12502 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12503 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12504 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12505 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12506 xmlHaltParser(ctxt);
12508 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12509 return(ctxt->errNo);
12517 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12518 (ctxt->input->buf != NULL)) {
12519 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12521 size_t current = ctxt->input->cur - ctxt->input->base;
12523 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12525 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12530 * Check for termination
12534 if (ctxt->input != NULL) {
12535 if (ctxt->input->buf == NULL)
12536 cur_avail = ctxt->input->length -
12537 (ctxt->input->cur - ctxt->input->base);
12539 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12540 (ctxt->input->cur - ctxt->input->base);
12543 if ((ctxt->instate != XML_PARSER_EOF) &&
12544 (ctxt->instate != XML_PARSER_EPILOG)) {
12545 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12547 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12548 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12550 if (ctxt->instate != XML_PARSER_EOF) {
12551 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12552 ctxt->sax->endDocument(ctxt->userData);
12554 ctxt->instate = XML_PARSER_EOF;
12556 if (ctxt->wellFormed == 0)
12557 return((xmlParserErrors) ctxt->errNo);
12562 /************************************************************************
12564 * I/O front end functions to the parser *
12566 ************************************************************************/
12569 * xmlCreatePushParserCtxt:
12570 * @sax: a SAX handler
12571 * @user_data: The user data returned on SAX callbacks
12572 * @chunk: a pointer to an array of chars
12573 * @size: number of chars in the array
12574 * @filename: an optional file name or URI
12576 * Create a parser context for using the XML parser in push mode.
12577 * If @buffer and @size are non-NULL, the data is used to detect
12578 * the encoding. The remaining characters will be parsed so they
12579 * don't need to be fed in again through xmlParseChunk.
12580 * To allow content encoding detection, @size should be >= 4
12581 * The value of @filename is used for fetching external entities
12582 * and error/warning reports.
12584 * Returns the new parser context or NULL
12588 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12589 const char *chunk, int size, const char *filename) {
12590 xmlParserCtxtPtr ctxt;
12591 xmlParserInputPtr inputStream;
12592 xmlParserInputBufferPtr buf;
12593 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12596 * plug some encoding conversion routines
12598 if ((chunk != NULL) && (size >= 4))
12599 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12601 buf = xmlAllocParserInputBuffer(enc);
12602 if (buf == NULL) return(NULL);
12604 ctxt = xmlNewParserCtxt();
12605 if (ctxt == NULL) {
12606 xmlErrMemory(NULL, "creating parser: out of memory\n");
12607 xmlFreeParserInputBuffer(buf);
12610 ctxt->dictNames = 1;
12611 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12612 if (ctxt->pushTab == NULL) {
12613 xmlErrMemory(ctxt, NULL);
12614 xmlFreeParserInputBuffer(buf);
12615 xmlFreeParserCtxt(ctxt);
12619 #ifdef LIBXML_SAX1_ENABLED
12620 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12621 #endif /* LIBXML_SAX1_ENABLED */
12622 xmlFree(ctxt->sax);
12623 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12624 if (ctxt->sax == NULL) {
12625 xmlErrMemory(ctxt, NULL);
12626 xmlFreeParserInputBuffer(buf);
12627 xmlFreeParserCtxt(ctxt);
12630 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12631 if (sax->initialized == XML_SAX2_MAGIC)
12632 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12634 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12635 if (user_data != NULL)
12636 ctxt->userData = user_data;
12638 if (filename == NULL) {
12639 ctxt->directory = NULL;
12641 ctxt->directory = xmlParserGetDirectory(filename);
12644 inputStream = xmlNewInputStream(ctxt);
12645 if (inputStream == NULL) {
12646 xmlFreeParserCtxt(ctxt);
12647 xmlFreeParserInputBuffer(buf);
12651 if (filename == NULL)
12652 inputStream->filename = NULL;
12654 inputStream->filename = (char *)
12655 xmlCanonicPath((const xmlChar *) filename);
12656 if (inputStream->filename == NULL) {
12657 xmlFreeParserCtxt(ctxt);
12658 xmlFreeParserInputBuffer(buf);
12662 inputStream->buf = buf;
12663 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12664 inputPush(ctxt, inputStream);
12667 * If the caller didn't provide an initial 'chunk' for determining
12668 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12669 * that it can be automatically determined later
12671 if ((size == 0) || (chunk == NULL)) {
12672 ctxt->charset = XML_CHAR_ENCODING_NONE;
12673 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12674 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12675 size_t cur = ctxt->input->cur - ctxt->input->base;
12677 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12679 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12681 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12685 if (enc != XML_CHAR_ENCODING_NONE) {
12686 xmlSwitchEncoding(ctxt, enc);
12691 #endif /* LIBXML_PUSH_ENABLED */
12695 * @ctxt: an XML parser context
12697 * Blocks further parser processing don't override error
12701 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12704 ctxt->instate = XML_PARSER_EOF;
12705 ctxt->disableSAX = 1;
12706 if (ctxt->input != NULL) {
12708 * in case there was a specific allocation deallocate before
12711 if (ctxt->input->free != NULL) {
12712 ctxt->input->free((xmlChar *) ctxt->input->base);
12713 ctxt->input->free = NULL;
12715 ctxt->input->cur = BAD_CAST"";
12716 ctxt->input->base = ctxt->input->cur;
12722 * @ctxt: an XML parser context
12724 * Blocks further parser processing
12727 xmlStopParser(xmlParserCtxtPtr ctxt) {
12730 xmlHaltParser(ctxt);
12731 ctxt->errNo = XML_ERR_USER_STOP;
12735 * xmlCreateIOParserCtxt:
12736 * @sax: a SAX handler
12737 * @user_data: The user data returned on SAX callbacks
12738 * @ioread: an I/O read function
12739 * @ioclose: an I/O close function
12740 * @ioctx: an I/O handler
12741 * @enc: the charset encoding if known
12743 * Create a parser context for using the XML parser with an existing
12746 * Returns the new parser context or NULL
12749 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12750 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12751 void *ioctx, xmlCharEncoding enc) {
12752 xmlParserCtxtPtr ctxt;
12753 xmlParserInputPtr inputStream;
12754 xmlParserInputBufferPtr buf;
12756 if (ioread == NULL) return(NULL);
12758 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12760 if (ioclose != NULL)
12765 ctxt = xmlNewParserCtxt();
12766 if (ctxt == NULL) {
12767 xmlFreeParserInputBuffer(buf);
12771 #ifdef LIBXML_SAX1_ENABLED
12772 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12773 #endif /* LIBXML_SAX1_ENABLED */
12774 xmlFree(ctxt->sax);
12775 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12776 if (ctxt->sax == NULL) {
12777 xmlErrMemory(ctxt, NULL);
12778 xmlFreeParserCtxt(ctxt);
12781 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12782 if (sax->initialized == XML_SAX2_MAGIC)
12783 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12785 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12786 if (user_data != NULL)
12787 ctxt->userData = user_data;
12790 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12791 if (inputStream == NULL) {
12792 xmlFreeParserCtxt(ctxt);
12795 inputPush(ctxt, inputStream);
12800 #ifdef LIBXML_VALID_ENABLED
12801 /************************************************************************
12803 * Front ends when parsing a DTD *
12805 ************************************************************************/
12809 * @sax: the SAX handler block or NULL
12810 * @input: an Input Buffer
12811 * @enc: the charset encoding if known
12813 * Load and parse a DTD
12815 * Returns the resulting xmlDtdPtr or NULL in case of error.
12816 * @input will be freed by the function in any case.
12820 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12821 xmlCharEncoding enc) {
12822 xmlDtdPtr ret = NULL;
12823 xmlParserCtxtPtr ctxt;
12824 xmlParserInputPtr pinput = NULL;
12830 ctxt = xmlNewParserCtxt();
12831 if (ctxt == NULL) {
12832 xmlFreeParserInputBuffer(input);
12836 /* We are loading a DTD */
12837 ctxt->options |= XML_PARSE_DTDLOAD;
12840 * Set-up the SAX context
12843 if (ctxt->sax != NULL)
12844 xmlFree(ctxt->sax);
12846 ctxt->userData = ctxt;
12848 xmlDetectSAX2(ctxt);
12851 * generate a parser input from the I/O handler
12854 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12855 if (pinput == NULL) {
12856 if (sax != NULL) ctxt->sax = NULL;
12857 xmlFreeParserInputBuffer(input);
12858 xmlFreeParserCtxt(ctxt);
12863 * plug some encoding conversion routines here.
12865 if (xmlPushInput(ctxt, pinput) < 0) {
12866 if (sax != NULL) ctxt->sax = NULL;
12867 xmlFreeParserCtxt(ctxt);
12870 if (enc != XML_CHAR_ENCODING_NONE) {
12871 xmlSwitchEncoding(ctxt, enc);
12874 pinput->filename = NULL;
12877 pinput->base = ctxt->input->cur;
12878 pinput->cur = ctxt->input->cur;
12879 pinput->free = NULL;
12882 * let's parse that entity knowing it's an external subset.
12884 ctxt->inSubset = 2;
12885 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12886 if (ctxt->myDoc == NULL) {
12887 xmlErrMemory(ctxt, "New Doc failed");
12890 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12891 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12892 BAD_CAST "none", BAD_CAST "none");
12894 if ((enc == XML_CHAR_ENCODING_NONE) &&
12895 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12897 * Get the 4 first bytes and decode the charset
12898 * if enc != XML_CHAR_ENCODING_NONE
12899 * plug some encoding conversion routines.
12905 enc = xmlDetectCharEncoding(start, 4);
12906 if (enc != XML_CHAR_ENCODING_NONE) {
12907 xmlSwitchEncoding(ctxt, enc);
12911 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12913 if (ctxt->myDoc != NULL) {
12914 if (ctxt->wellFormed) {
12915 ret = ctxt->myDoc->extSubset;
12916 ctxt->myDoc->extSubset = NULL;
12921 tmp = ret->children;
12922 while (tmp != NULL) {
12930 xmlFreeDoc(ctxt->myDoc);
12931 ctxt->myDoc = NULL;
12933 if (sax != NULL) ctxt->sax = NULL;
12934 xmlFreeParserCtxt(ctxt);
12941 * @sax: the SAX handler block
12942 * @ExternalID: a NAME* containing the External ID of the DTD
12943 * @SystemID: a NAME* containing the URL to the DTD
12945 * Load and parse an external subset.
12947 * Returns the resulting xmlDtdPtr or NULL in case of error.
12951 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12952 const xmlChar *SystemID) {
12953 xmlDtdPtr ret = NULL;
12954 xmlParserCtxtPtr ctxt;
12955 xmlParserInputPtr input = NULL;
12956 xmlCharEncoding enc;
12957 xmlChar* systemIdCanonic;
12959 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12961 ctxt = xmlNewParserCtxt();
12962 if (ctxt == NULL) {
12966 /* We are loading a DTD */
12967 ctxt->options |= XML_PARSE_DTDLOAD;
12970 * Set-up the SAX context
12973 if (ctxt->sax != NULL)
12974 xmlFree(ctxt->sax);
12976 ctxt->userData = ctxt;
12980 * Canonicalise the system ID
12982 systemIdCanonic = xmlCanonicPath(SystemID);
12983 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12984 xmlFreeParserCtxt(ctxt);
12989 * Ask the Entity resolver to load the damn thing
12992 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12993 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12995 if (input == NULL) {
12996 if (sax != NULL) ctxt->sax = NULL;
12997 xmlFreeParserCtxt(ctxt);
12998 if (systemIdCanonic != NULL)
12999 xmlFree(systemIdCanonic);
13004 * plug some encoding conversion routines here.
13006 if (xmlPushInput(ctxt, input) < 0) {
13007 if (sax != NULL) ctxt->sax = NULL;
13008 xmlFreeParserCtxt(ctxt);
13009 if (systemIdCanonic != NULL)
13010 xmlFree(systemIdCanonic);
13013 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13014 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
13015 xmlSwitchEncoding(ctxt, enc);
13018 if (input->filename == NULL)
13019 input->filename = (char *) systemIdCanonic;
13021 xmlFree(systemIdCanonic);
13024 input->base = ctxt->input->cur;
13025 input->cur = ctxt->input->cur;
13026 input->free = NULL;
13029 * let's parse that entity knowing it's an external subset.
13031 ctxt->inSubset = 2;
13032 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
13033 if (ctxt->myDoc == NULL) {
13034 xmlErrMemory(ctxt, "New Doc failed");
13035 if (sax != NULL) ctxt->sax = NULL;
13036 xmlFreeParserCtxt(ctxt);
13039 ctxt->myDoc->properties = XML_DOC_INTERNAL;
13040 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
13041 ExternalID, SystemID);
13042 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
13044 if (ctxt->myDoc != NULL) {
13045 if (ctxt->wellFormed) {
13046 ret = ctxt->myDoc->extSubset;
13047 ctxt->myDoc->extSubset = NULL;
13052 tmp = ret->children;
13053 while (tmp != NULL) {
13061 xmlFreeDoc(ctxt->myDoc);
13062 ctxt->myDoc = NULL;
13064 if (sax != NULL) ctxt->sax = NULL;
13065 xmlFreeParserCtxt(ctxt);
13073 * @ExternalID: a NAME* containing the External ID of the DTD
13074 * @SystemID: a NAME* containing the URL to the DTD
13076 * Load and parse an external subset.
13078 * Returns the resulting xmlDtdPtr or NULL in case of error.
13082 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
13083 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
13085 #endif /* LIBXML_VALID_ENABLED */
13087 /************************************************************************
13089 * Front ends when parsing an Entity *
13091 ************************************************************************/
13094 * xmlParseCtxtExternalEntity:
13095 * @ctx: the existing parsing context
13096 * @URL: the URL for the entity to load
13097 * @ID: the System ID for the entity to load
13098 * @lst: the return value for the set of parsed nodes
13100 * Parse an external general entity within an existing parsing context
13101 * An external general parsed entity is well-formed if it matches the
13102 * production labeled extParsedEnt.
13104 * [78] extParsedEnt ::= TextDecl? content
13106 * Returns 0 if the entity is well formed, -1 in case of args problem and
13107 * the parser error code otherwise
13111 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
13112 const xmlChar *ID, xmlNodePtr *lst) {
13113 xmlParserCtxtPtr ctxt;
13115 xmlNodePtr newRoot;
13116 xmlSAXHandlerPtr oldsax = NULL;
13119 xmlCharEncoding enc;
13121 if (ctx == NULL) return(-1);
13123 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
13124 (ctx->depth > 1024)) {
13125 return(XML_ERR_ENTITY_LOOP);
13130 if ((URL == NULL) && (ID == NULL))
13132 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
13135 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
13136 if (ctxt == NULL) {
13140 oldsax = ctxt->sax;
13141 ctxt->sax = ctx->sax;
13142 xmlDetectSAX2(ctxt);
13143 newDoc = xmlNewDoc(BAD_CAST "1.0");
13144 if (newDoc == NULL) {
13145 xmlFreeParserCtxt(ctxt);
13148 newDoc->properties = XML_DOC_INTERNAL;
13149 if (ctx->myDoc->dict) {
13150 newDoc->dict = ctx->myDoc->dict;
13151 xmlDictReference(newDoc->dict);
13153 if (ctx->myDoc != NULL) {
13154 newDoc->intSubset = ctx->myDoc->intSubset;
13155 newDoc->extSubset = ctx->myDoc->extSubset;
13157 if (ctx->myDoc->URL != NULL) {
13158 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
13160 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13161 if (newRoot == NULL) {
13162 ctxt->sax = oldsax;
13163 xmlFreeParserCtxt(ctxt);
13164 newDoc->intSubset = NULL;
13165 newDoc->extSubset = NULL;
13166 xmlFreeDoc(newDoc);
13169 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13170 nodePush(ctxt, newDoc->children);
13171 if (ctx->myDoc == NULL) {
13172 ctxt->myDoc = newDoc;
13174 ctxt->myDoc = ctx->myDoc;
13175 newDoc->children->doc = ctx->myDoc;
13179 * Get the 4 first bytes and decode the charset
13180 * if enc != XML_CHAR_ENCODING_NONE
13181 * plug some encoding conversion routines.
13184 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13189 enc = xmlDetectCharEncoding(start, 4);
13190 if (enc != XML_CHAR_ENCODING_NONE) {
13191 xmlSwitchEncoding(ctxt, enc);
13196 * Parse a possible text declaration first
13198 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13199 xmlParseTextDecl(ctxt);
13201 * An XML-1.0 document can't reference an entity not XML-1.0
13203 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
13204 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13205 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13206 "Version mismatch between document and entity\n");
13211 * If the user provided its own SAX callbacks then reuse the
13212 * useData callback field, otherwise the expected setup in a
13213 * DOM builder is to have userData == ctxt
13215 if (ctx->userData == ctx)
13216 ctxt->userData = ctxt;
13218 ctxt->userData = ctx->userData;
13221 * Doing validity checking on chunk doesn't make sense
13223 ctxt->instate = XML_PARSER_CONTENT;
13224 ctxt->validate = ctx->validate;
13225 ctxt->valid = ctx->valid;
13226 ctxt->loadsubset = ctx->loadsubset;
13227 ctxt->depth = ctx->depth + 1;
13228 ctxt->replaceEntities = ctx->replaceEntities;
13229 if (ctxt->validate) {
13230 ctxt->vctxt.error = ctx->vctxt.error;
13231 ctxt->vctxt.warning = ctx->vctxt.warning;
13233 ctxt->vctxt.error = NULL;
13234 ctxt->vctxt.warning = NULL;
13236 ctxt->vctxt.nodeTab = NULL;
13237 ctxt->vctxt.nodeNr = 0;
13238 ctxt->vctxt.nodeMax = 0;
13239 ctxt->vctxt.node = NULL;
13240 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13241 ctxt->dict = ctx->dict;
13242 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13243 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13244 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13245 ctxt->dictNames = ctx->dictNames;
13246 ctxt->attsDefault = ctx->attsDefault;
13247 ctxt->attsSpecial = ctx->attsSpecial;
13248 ctxt->linenumbers = ctx->linenumbers;
13250 xmlParseContent(ctxt);
13252 ctx->validate = ctxt->validate;
13253 ctx->valid = ctxt->valid;
13254 if ((RAW == '<') && (NXT(1) == '/')) {
13255 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13256 } else if (RAW != 0) {
13257 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13259 if (ctxt->node != newDoc->children) {
13260 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13263 if (!ctxt->wellFormed) {
13264 if (ctxt->errNo == 0)
13273 * Return the newly created nodeset after unlinking it from
13274 * they pseudo parent.
13276 cur = newDoc->children->children;
13278 while (cur != NULL) {
13279 cur->parent = NULL;
13282 newDoc->children->children = NULL;
13286 ctxt->sax = oldsax;
13288 ctxt->attsDefault = NULL;
13289 ctxt->attsSpecial = NULL;
13290 xmlFreeParserCtxt(ctxt);
13291 newDoc->intSubset = NULL;
13292 newDoc->extSubset = NULL;
13293 xmlFreeDoc(newDoc);
13299 * xmlParseExternalEntityPrivate:
13300 * @doc: the document the chunk pertains to
13301 * @oldctxt: the previous parser context if available
13302 * @sax: the SAX handler bloc (possibly NULL)
13303 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13304 * @depth: Used for loop detection, use 0
13305 * @URL: the URL for the entity to load
13306 * @ID: the System ID for the entity to load
13307 * @list: the return value for the set of parsed nodes
13309 * Private version of xmlParseExternalEntity()
13311 * Returns 0 if the entity is well formed, -1 in case of args problem and
13312 * the parser error code otherwise
13315 static xmlParserErrors
13316 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13317 xmlSAXHandlerPtr sax,
13318 void *user_data, int depth, const xmlChar *URL,
13319 const xmlChar *ID, xmlNodePtr *list) {
13320 xmlParserCtxtPtr ctxt;
13322 xmlNodePtr newRoot;
13323 xmlSAXHandlerPtr oldsax = NULL;
13324 xmlParserErrors ret = XML_ERR_OK;
13326 xmlCharEncoding enc;
13328 if (((depth > 40) &&
13329 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13331 return(XML_ERR_ENTITY_LOOP);
13336 if ((URL == NULL) && (ID == NULL))
13337 return(XML_ERR_INTERNAL_ERROR);
13339 return(XML_ERR_INTERNAL_ERROR);
13342 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13343 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13344 ctxt->userData = ctxt;
13345 if (oldctxt != NULL) {
13346 ctxt->_private = oldctxt->_private;
13347 ctxt->loadsubset = oldctxt->loadsubset;
13348 ctxt->validate = oldctxt->validate;
13349 ctxt->external = oldctxt->external;
13350 ctxt->record_info = oldctxt->record_info;
13351 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13352 ctxt->node_seq.length = oldctxt->node_seq.length;
13353 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13356 * Doing validity checking on chunk without context
13357 * doesn't make sense
13359 ctxt->_private = NULL;
13360 ctxt->validate = 0;
13361 ctxt->external = 2;
13362 ctxt->loadsubset = 0;
13365 oldsax = ctxt->sax;
13367 if (user_data != NULL)
13368 ctxt->userData = user_data;
13370 xmlDetectSAX2(ctxt);
13371 newDoc = xmlNewDoc(BAD_CAST "1.0");
13372 if (newDoc == NULL) {
13373 ctxt->node_seq.maximum = 0;
13374 ctxt->node_seq.length = 0;
13375 ctxt->node_seq.buffer = NULL;
13376 xmlFreeParserCtxt(ctxt);
13377 return(XML_ERR_INTERNAL_ERROR);
13379 newDoc->properties = XML_DOC_INTERNAL;
13380 newDoc->intSubset = doc->intSubset;
13381 newDoc->extSubset = doc->extSubset;
13382 newDoc->dict = doc->dict;
13383 xmlDictReference(newDoc->dict);
13385 if (doc->URL != NULL) {
13386 newDoc->URL = xmlStrdup(doc->URL);
13388 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13389 if (newRoot == NULL) {
13391 ctxt->sax = oldsax;
13392 ctxt->node_seq.maximum = 0;
13393 ctxt->node_seq.length = 0;
13394 ctxt->node_seq.buffer = NULL;
13395 xmlFreeParserCtxt(ctxt);
13396 newDoc->intSubset = NULL;
13397 newDoc->extSubset = NULL;
13398 xmlFreeDoc(newDoc);
13399 return(XML_ERR_INTERNAL_ERROR);
13401 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13402 nodePush(ctxt, newDoc->children);
13404 newRoot->doc = doc;
13407 * Get the 4 first bytes and decode the charset
13408 * if enc != XML_CHAR_ENCODING_NONE
13409 * plug some encoding conversion routines.
13412 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13417 enc = xmlDetectCharEncoding(start, 4);
13418 if (enc != XML_CHAR_ENCODING_NONE) {
13419 xmlSwitchEncoding(ctxt, enc);
13424 * Parse a possible text declaration first
13426 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13427 xmlParseTextDecl(ctxt);
13430 ctxt->instate = XML_PARSER_CONTENT;
13431 ctxt->depth = depth;
13433 xmlParseContent(ctxt);
13435 if ((RAW == '<') && (NXT(1) == '/')) {
13436 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13437 } else if (RAW != 0) {
13438 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13440 if (ctxt->node != newDoc->children) {
13441 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13444 if (!ctxt->wellFormed) {
13445 if (ctxt->errNo == 0)
13446 ret = XML_ERR_INTERNAL_ERROR;
13448 ret = (xmlParserErrors)ctxt->errNo;
13450 if (list != NULL) {
13454 * Return the newly created nodeset after unlinking it from
13455 * they pseudo parent.
13457 cur = newDoc->children->children;
13459 while (cur != NULL) {
13460 cur->parent = NULL;
13463 newDoc->children->children = NULL;
13469 * Record in the parent context the number of entities replacement
13470 * done when parsing that reference.
13472 if (oldctxt != NULL)
13473 oldctxt->nbentities += ctxt->nbentities;
13476 * Also record the size of the entity parsed
13478 if (ctxt->input != NULL && oldctxt != NULL) {
13479 oldctxt->sizeentities += ctxt->input->consumed;
13480 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13483 * And record the last error if any
13485 if (ctxt->lastError.code != XML_ERR_OK)
13486 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13489 ctxt->sax = oldsax;
13490 if (oldctxt != NULL) {
13491 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13492 oldctxt->node_seq.length = ctxt->node_seq.length;
13493 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13495 ctxt->node_seq.maximum = 0;
13496 ctxt->node_seq.length = 0;
13497 ctxt->node_seq.buffer = NULL;
13498 xmlFreeParserCtxt(ctxt);
13499 newDoc->intSubset = NULL;
13500 newDoc->extSubset = NULL;
13501 xmlFreeDoc(newDoc);
13506 #ifdef LIBXML_SAX1_ENABLED
13508 * xmlParseExternalEntity:
13509 * @doc: the document the chunk pertains to
13510 * @sax: the SAX handler bloc (possibly NULL)
13511 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13512 * @depth: Used for loop detection, use 0
13513 * @URL: the URL for the entity to load
13514 * @ID: the System ID for the entity to load
13515 * @lst: the return value for the set of parsed nodes
13517 * Parse an external general entity
13518 * An external general parsed entity is well-formed if it matches the
13519 * production labeled extParsedEnt.
13521 * [78] extParsedEnt ::= TextDecl? content
13523 * Returns 0 if the entity is well formed, -1 in case of args problem and
13524 * the parser error code otherwise
13528 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13529 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13530 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13535 * xmlParseBalancedChunkMemory:
13536 * @doc: the document the chunk pertains to
13537 * @sax: the SAX handler bloc (possibly NULL)
13538 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13539 * @depth: Used for loop detection, use 0
13540 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13541 * @lst: the return value for the set of parsed nodes
13543 * Parse a well-balanced chunk of an XML document
13544 * called by the parser
13545 * The allowed sequence for the Well Balanced Chunk is the one defined by
13546 * the content production in the XML grammar:
13548 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13550 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13551 * the parser error code otherwise
13555 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13557 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13558 depth, string, lst, 0 );
13560 #endif /* LIBXML_SAX1_ENABLED */
13563 * xmlParseBalancedChunkMemoryInternal:
13564 * @oldctxt: the existing parsing context
13565 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13566 * @user_data: the user data field for the parser context
13567 * @lst: the return value for the set of parsed nodes
13570 * Parse a well-balanced chunk of an XML document
13571 * called by the parser
13572 * The allowed sequence for the Well Balanced Chunk is the one defined by
13573 * the content production in the XML grammar:
13575 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13577 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13578 * error code otherwise
13580 * In case recover is set to 1, the nodelist will not be empty even if
13581 * the parsed chunk is not well balanced.
13583 static xmlParserErrors
13584 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13585 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13586 xmlParserCtxtPtr ctxt;
13587 xmlDocPtr newDoc = NULL;
13588 xmlNodePtr newRoot;
13589 xmlSAXHandlerPtr oldsax = NULL;
13590 xmlNodePtr content = NULL;
13591 xmlNodePtr last = NULL;
13593 xmlParserErrors ret = XML_ERR_OK;
13598 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13599 (oldctxt->depth > 1024)) {
13600 return(XML_ERR_ENTITY_LOOP);
13606 if (string == NULL)
13607 return(XML_ERR_INTERNAL_ERROR);
13609 size = xmlStrlen(string);
13611 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13612 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13613 if (user_data != NULL)
13614 ctxt->userData = user_data;
13616 ctxt->userData = ctxt;
13617 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13618 ctxt->dict = oldctxt->dict;
13619 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13620 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13621 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13624 /* propagate namespaces down the entity */
13625 for (i = 0;i < oldctxt->nsNr;i += 2) {
13626 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13630 oldsax = ctxt->sax;
13631 ctxt->sax = oldctxt->sax;
13632 xmlDetectSAX2(ctxt);
13633 ctxt->replaceEntities = oldctxt->replaceEntities;
13634 ctxt->options = oldctxt->options;
13636 ctxt->_private = oldctxt->_private;
13637 if (oldctxt->myDoc == NULL) {
13638 newDoc = xmlNewDoc(BAD_CAST "1.0");
13639 if (newDoc == NULL) {
13640 ctxt->sax = oldsax;
13642 xmlFreeParserCtxt(ctxt);
13643 return(XML_ERR_INTERNAL_ERROR);
13645 newDoc->properties = XML_DOC_INTERNAL;
13646 newDoc->dict = ctxt->dict;
13647 xmlDictReference(newDoc->dict);
13648 ctxt->myDoc = newDoc;
13650 ctxt->myDoc = oldctxt->myDoc;
13651 content = ctxt->myDoc->children;
13652 last = ctxt->myDoc->last;
13654 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13655 if (newRoot == NULL) {
13656 ctxt->sax = oldsax;
13658 xmlFreeParserCtxt(ctxt);
13659 if (newDoc != NULL) {
13660 xmlFreeDoc(newDoc);
13662 return(XML_ERR_INTERNAL_ERROR);
13664 ctxt->myDoc->children = NULL;
13665 ctxt->myDoc->last = NULL;
13666 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13667 nodePush(ctxt, ctxt->myDoc->children);
13668 ctxt->instate = XML_PARSER_CONTENT;
13669 ctxt->depth = oldctxt->depth + 1;
13671 ctxt->validate = 0;
13672 ctxt->loadsubset = oldctxt->loadsubset;
13673 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13675 * ID/IDREF registration will be done in xmlValidateElement below
13677 ctxt->loadsubset |= XML_SKIP_IDS;
13679 ctxt->dictNames = oldctxt->dictNames;
13680 ctxt->attsDefault = oldctxt->attsDefault;
13681 ctxt->attsSpecial = oldctxt->attsSpecial;
13683 xmlParseContent(ctxt);
13684 if ((RAW == '<') && (NXT(1) == '/')) {
13685 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13686 } else if (RAW != 0) {
13687 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13689 if (ctxt->node != ctxt->myDoc->children) {
13690 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13693 if (!ctxt->wellFormed) {
13694 if (ctxt->errNo == 0)
13695 ret = XML_ERR_INTERNAL_ERROR;
13697 ret = (xmlParserErrors)ctxt->errNo;
13702 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13706 * Return the newly created nodeset after unlinking it from
13707 * they pseudo parent.
13709 cur = ctxt->myDoc->children->children;
13711 while (cur != NULL) {
13712 #ifdef LIBXML_VALID_ENABLED
13713 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13714 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13715 (cur->type == XML_ELEMENT_NODE)) {
13716 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13717 oldctxt->myDoc, cur);
13719 #endif /* LIBXML_VALID_ENABLED */
13720 cur->parent = NULL;
13723 ctxt->myDoc->children->children = NULL;
13725 if (ctxt->myDoc != NULL) {
13726 xmlFreeNode(ctxt->myDoc->children);
13727 ctxt->myDoc->children = content;
13728 ctxt->myDoc->last = last;
13732 * Record in the parent context the number of entities replacement
13733 * done when parsing that reference.
13735 if (oldctxt != NULL)
13736 oldctxt->nbentities += ctxt->nbentities;
13739 * Also record the last error if any
13741 if (ctxt->lastError.code != XML_ERR_OK)
13742 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13744 ctxt->sax = oldsax;
13746 ctxt->attsDefault = NULL;
13747 ctxt->attsSpecial = NULL;
13748 xmlFreeParserCtxt(ctxt);
13749 if (newDoc != NULL) {
13750 xmlFreeDoc(newDoc);
13757 * xmlParseInNodeContext:
13758 * @node: the context node
13759 * @data: the input string
13760 * @datalen: the input string length in bytes
13761 * @options: a combination of xmlParserOption
13762 * @lst: the return value for the set of parsed nodes
13764 * Parse a well-balanced chunk of an XML document
13765 * within the context (DTD, namespaces, etc ...) of the given node.
13767 * The allowed sequence for the data is a Well Balanced Chunk defined by
13768 * the content production in the XML grammar:
13770 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13772 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13773 * error code otherwise
13776 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13777 int options, xmlNodePtr *lst) {
13779 xmlParserCtxtPtr ctxt;
13780 xmlDocPtr doc = NULL;
13781 xmlNodePtr fake, cur;
13784 xmlParserErrors ret = XML_ERR_OK;
13787 * check all input parameters, grab the document
13789 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13790 return(XML_ERR_INTERNAL_ERROR);
13791 switch (node->type) {
13792 case XML_ELEMENT_NODE:
13793 case XML_ATTRIBUTE_NODE:
13794 case XML_TEXT_NODE:
13795 case XML_CDATA_SECTION_NODE:
13796 case XML_ENTITY_REF_NODE:
13798 case XML_COMMENT_NODE:
13799 case XML_DOCUMENT_NODE:
13800 case XML_HTML_DOCUMENT_NODE:
13803 return(XML_ERR_INTERNAL_ERROR);
13806 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13807 (node->type != XML_DOCUMENT_NODE) &&
13808 (node->type != XML_HTML_DOCUMENT_NODE))
13809 node = node->parent;
13811 return(XML_ERR_INTERNAL_ERROR);
13812 if (node->type == XML_ELEMENT_NODE)
13815 doc = (xmlDocPtr) node;
13817 return(XML_ERR_INTERNAL_ERROR);
13820 * allocate a context and set-up everything not related to the
13821 * node position in the tree
13823 if (doc->type == XML_DOCUMENT_NODE)
13824 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13825 #ifdef LIBXML_HTML_ENABLED
13826 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13827 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13829 * When parsing in context, it makes no sense to add implied
13830 * elements like html/body/etc...
13832 options |= HTML_PARSE_NOIMPLIED;
13836 return(XML_ERR_INTERNAL_ERROR);
13839 return(XML_ERR_NO_MEMORY);
13842 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13843 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13844 * we must wait until the last moment to free the original one.
13846 if (doc->dict != NULL) {
13847 if (ctxt->dict != NULL)
13848 xmlDictFree(ctxt->dict);
13849 ctxt->dict = doc->dict;
13851 options |= XML_PARSE_NODICT;
13853 if (doc->encoding != NULL) {
13854 xmlCharEncodingHandlerPtr hdlr;
13856 if (ctxt->encoding != NULL)
13857 xmlFree((xmlChar *) ctxt->encoding);
13858 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13860 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13861 if (hdlr != NULL) {
13862 xmlSwitchToEncoding(ctxt, hdlr);
13864 return(XML_ERR_UNSUPPORTED_ENCODING);
13868 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13869 xmlDetectSAX2(ctxt);
13871 /* parsing in context, i.e. as within existing content */
13872 ctxt->instate = XML_PARSER_CONTENT;
13874 fake = xmlNewComment(NULL);
13875 if (fake == NULL) {
13876 xmlFreeParserCtxt(ctxt);
13877 return(XML_ERR_NO_MEMORY);
13879 xmlAddChild(node, fake);
13881 if (node->type == XML_ELEMENT_NODE) {
13882 nodePush(ctxt, node);
13884 * initialize the SAX2 namespaces stack
13887 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13888 xmlNsPtr ns = cur->nsDef;
13889 const xmlChar *iprefix, *ihref;
13891 while (ns != NULL) {
13893 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13894 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13896 iprefix = ns->prefix;
13900 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13901 nsPush(ctxt, iprefix, ihref);
13910 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13912 * ID/IDREF registration will be done in xmlValidateElement below
13914 ctxt->loadsubset |= XML_SKIP_IDS;
13917 #ifdef LIBXML_HTML_ENABLED
13918 if (doc->type == XML_HTML_DOCUMENT_NODE)
13919 __htmlParseContent(ctxt);
13922 xmlParseContent(ctxt);
13925 if ((RAW == '<') && (NXT(1) == '/')) {
13926 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13927 } else if (RAW != 0) {
13928 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13930 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13931 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13932 ctxt->wellFormed = 0;
13935 if (!ctxt->wellFormed) {
13936 if (ctxt->errNo == 0)
13937 ret = XML_ERR_INTERNAL_ERROR;
13939 ret = (xmlParserErrors)ctxt->errNo;
13945 * Return the newly created nodeset after unlinking it from
13946 * the pseudo sibling.
13959 while (cur != NULL) {
13960 cur->parent = NULL;
13964 xmlUnlinkNode(fake);
13968 if (ret != XML_ERR_OK) {
13969 xmlFreeNodeList(*lst);
13973 if (doc->dict != NULL)
13975 xmlFreeParserCtxt(ctxt);
13979 return(XML_ERR_INTERNAL_ERROR);
13983 #ifdef LIBXML_SAX1_ENABLED
13985 * xmlParseBalancedChunkMemoryRecover:
13986 * @doc: the document the chunk pertains to
13987 * @sax: the SAX handler bloc (possibly NULL)
13988 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13989 * @depth: Used for loop detection, use 0
13990 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13991 * @lst: the return value for the set of parsed nodes
13992 * @recover: return nodes even if the data is broken (use 0)
13995 * Parse a well-balanced chunk of an XML document
13996 * called by the parser
13997 * The allowed sequence for the Well Balanced Chunk is the one defined by
13998 * the content production in the XML grammar:
14000 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
14002 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
14003 * the parser error code otherwise
14005 * In case recover is set to 1, the nodelist will not be empty even if
14006 * the parsed chunk is not well balanced, assuming the parsing succeeded to
14010 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
14011 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
14013 xmlParserCtxtPtr ctxt;
14015 xmlSAXHandlerPtr oldsax = NULL;
14016 xmlNodePtr content, newRoot;
14021 return(XML_ERR_ENTITY_LOOP);
14027 if (string == NULL)
14030 size = xmlStrlen(string);
14032 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
14033 if (ctxt == NULL) return(-1);
14034 ctxt->userData = ctxt;
14036 oldsax = ctxt->sax;
14038 if (user_data != NULL)
14039 ctxt->userData = user_data;
14041 newDoc = xmlNewDoc(BAD_CAST "1.0");
14042 if (newDoc == NULL) {
14043 xmlFreeParserCtxt(ctxt);
14046 newDoc->properties = XML_DOC_INTERNAL;
14047 if ((doc != NULL) && (doc->dict != NULL)) {
14048 xmlDictFree(ctxt->dict);
14049 ctxt->dict = doc->dict;
14050 xmlDictReference(ctxt->dict);
14051 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
14052 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
14053 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
14054 ctxt->dictNames = 1;
14056 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
14059 newDoc->intSubset = doc->intSubset;
14060 newDoc->extSubset = doc->extSubset;
14062 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
14063 if (newRoot == NULL) {
14065 ctxt->sax = oldsax;
14066 xmlFreeParserCtxt(ctxt);
14067 newDoc->intSubset = NULL;
14068 newDoc->extSubset = NULL;
14069 xmlFreeDoc(newDoc);
14072 xmlAddChild((xmlNodePtr) newDoc, newRoot);
14073 nodePush(ctxt, newRoot);
14075 ctxt->myDoc = newDoc;
14077 ctxt->myDoc = newDoc;
14078 newDoc->children->doc = doc;
14079 /* Ensure that doc has XML spec namespace */
14080 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
14081 newDoc->oldNs = doc->oldNs;
14083 ctxt->instate = XML_PARSER_CONTENT;
14084 ctxt->depth = depth;
14087 * Doing validity checking on chunk doesn't make sense
14089 ctxt->validate = 0;
14090 ctxt->loadsubset = 0;
14091 xmlDetectSAX2(ctxt);
14093 if ( doc != NULL ){
14094 content = doc->children;
14095 doc->children = NULL;
14096 xmlParseContent(ctxt);
14097 doc->children = content;
14100 xmlParseContent(ctxt);
14102 if ((RAW == '<') && (NXT(1) == '/')) {
14103 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
14104 } else if (RAW != 0) {
14105 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
14107 if (ctxt->node != newDoc->children) {
14108 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
14111 if (!ctxt->wellFormed) {
14112 if (ctxt->errNo == 0)
14120 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
14124 * Return the newly created nodeset after unlinking it from
14125 * they pseudo parent.
14127 cur = newDoc->children->children;
14129 while (cur != NULL) {
14130 xmlSetTreeDoc(cur, doc);
14131 cur->parent = NULL;
14134 newDoc->children->children = NULL;
14138 ctxt->sax = oldsax;
14139 xmlFreeParserCtxt(ctxt);
14140 newDoc->intSubset = NULL;
14141 newDoc->extSubset = NULL;
14142 newDoc->oldNs = NULL;
14143 xmlFreeDoc(newDoc);
14149 * xmlSAXParseEntity:
14150 * @sax: the SAX handler block
14151 * @filename: the filename
14153 * parse an XML external entity out of context and build a tree.
14154 * It use the given SAX function block to handle the parsing callback.
14155 * If sax is NULL, fallback to the default DOM tree building routines.
14157 * [78] extParsedEnt ::= TextDecl? content
14159 * This correspond to a "Well Balanced" chunk
14161 * Returns the resulting document tree
14165 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
14167 xmlParserCtxtPtr ctxt;
14169 ctxt = xmlCreateFileParserCtxt(filename);
14170 if (ctxt == NULL) {
14174 if (ctxt->sax != NULL)
14175 xmlFree(ctxt->sax);
14177 ctxt->userData = NULL;
14180 xmlParseExtParsedEnt(ctxt);
14182 if (ctxt->wellFormed)
14186 xmlFreeDoc(ctxt->myDoc);
14187 ctxt->myDoc = NULL;
14191 xmlFreeParserCtxt(ctxt);
14198 * @filename: the filename
14200 * parse an XML external entity out of context and build a tree.
14202 * [78] extParsedEnt ::= TextDecl? content
14204 * This correspond to a "Well Balanced" chunk
14206 * Returns the resulting document tree
14210 xmlParseEntity(const char *filename) {
14211 return(xmlSAXParseEntity(NULL, filename));
14213 #endif /* LIBXML_SAX1_ENABLED */
14216 * xmlCreateEntityParserCtxtInternal:
14217 * @URL: the entity URL
14218 * @ID: the entity PUBLIC ID
14219 * @base: a possible base for the target URI
14220 * @pctx: parser context used to set options on new context
14222 * Create a parser context for an external entity
14223 * Automatic support for ZLIB/Compress compressed document is provided
14224 * by default if found at compile-time.
14226 * Returns the new parser context or NULL
14228 static xmlParserCtxtPtr
14229 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14230 const xmlChar *base, xmlParserCtxtPtr pctx) {
14231 xmlParserCtxtPtr ctxt;
14232 xmlParserInputPtr inputStream;
14233 char *directory = NULL;
14236 ctxt = xmlNewParserCtxt();
14237 if (ctxt == NULL) {
14241 if (pctx != NULL) {
14242 ctxt->options = pctx->options;
14243 ctxt->_private = pctx->_private;
14246 uri = xmlBuildURI(URL, base);
14249 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14250 if (inputStream == NULL) {
14251 xmlFreeParserCtxt(ctxt);
14255 inputPush(ctxt, inputStream);
14257 if ((ctxt->directory == NULL) && (directory == NULL))
14258 directory = xmlParserGetDirectory((char *)URL);
14259 if ((ctxt->directory == NULL) && (directory != NULL))
14260 ctxt->directory = directory;
14262 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14263 if (inputStream == NULL) {
14265 xmlFreeParserCtxt(ctxt);
14269 inputPush(ctxt, inputStream);
14271 if ((ctxt->directory == NULL) && (directory == NULL))
14272 directory = xmlParserGetDirectory((char *)uri);
14273 if ((ctxt->directory == NULL) && (directory != NULL))
14274 ctxt->directory = directory;
14281 * xmlCreateEntityParserCtxt:
14282 * @URL: the entity URL
14283 * @ID: the entity PUBLIC ID
14284 * @base: a possible base for the target URI
14286 * Create a parser context for an external entity
14287 * Automatic support for ZLIB/Compress compressed document is provided
14288 * by default if found at compile-time.
14290 * Returns the new parser context or NULL
14293 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14294 const xmlChar *base) {
14295 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14299 /************************************************************************
14301 * Front ends when parsing from a file *
14303 ************************************************************************/
14306 * xmlCreateURLParserCtxt:
14307 * @filename: the filename or URL
14308 * @options: a combination of xmlParserOption
14310 * Create a parser context for a file or URL content.
14311 * Automatic support for ZLIB/Compress compressed document is provided
14312 * by default if found at compile-time and for file accesses
14314 * Returns the new parser context or NULL
14317 xmlCreateURLParserCtxt(const char *filename, int options)
14319 xmlParserCtxtPtr ctxt;
14320 xmlParserInputPtr inputStream;
14321 char *directory = NULL;
14323 ctxt = xmlNewParserCtxt();
14324 if (ctxt == NULL) {
14325 xmlErrMemory(NULL, "cannot allocate parser context");
14330 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14331 ctxt->linenumbers = 1;
14333 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14334 if (inputStream == NULL) {
14335 xmlFreeParserCtxt(ctxt);
14339 inputPush(ctxt, inputStream);
14340 if ((ctxt->directory == NULL) && (directory == NULL))
14341 directory = xmlParserGetDirectory(filename);
14342 if ((ctxt->directory == NULL) && (directory != NULL))
14343 ctxt->directory = directory;
14349 * xmlCreateFileParserCtxt:
14350 * @filename: the filename
14352 * Create a parser context for a file content.
14353 * Automatic support for ZLIB/Compress compressed document is provided
14354 * by default if found at compile-time.
14356 * Returns the new parser context or NULL
14359 xmlCreateFileParserCtxt(const char *filename)
14361 return(xmlCreateURLParserCtxt(filename, 0));
14364 #ifdef LIBXML_SAX1_ENABLED
14366 * xmlSAXParseFileWithData:
14367 * @sax: the SAX handler block
14368 * @filename: the filename
14369 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14371 * @data: the userdata
14373 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14374 * compressed document is provided by default if found at compile-time.
14375 * It use the given SAX function block to handle the parsing callback.
14376 * If sax is NULL, fallback to the default DOM tree building routines.
14378 * User data (void *) is stored within the parser context in the
14379 * context's _private member, so it is available nearly everywhere in libxml
14381 * Returns the resulting document tree
14385 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14386 int recovery, void *data) {
14388 xmlParserCtxtPtr ctxt;
14392 ctxt = xmlCreateFileParserCtxt(filename);
14393 if (ctxt == NULL) {
14397 if (ctxt->sax != NULL)
14398 xmlFree(ctxt->sax);
14401 xmlDetectSAX2(ctxt);
14403 ctxt->_private = data;
14406 if (ctxt->directory == NULL)
14407 ctxt->directory = xmlParserGetDirectory(filename);
14409 ctxt->recovery = recovery;
14411 xmlParseDocument(ctxt);
14413 if ((ctxt->wellFormed) || recovery) {
14416 if (ctxt->input->buf->compressed > 0)
14417 ret->compression = 9;
14419 ret->compression = ctxt->input->buf->compressed;
14424 xmlFreeDoc(ctxt->myDoc);
14425 ctxt->myDoc = NULL;
14429 xmlFreeParserCtxt(ctxt);
14436 * @sax: the SAX handler block
14437 * @filename: the filename
14438 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14441 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14442 * compressed document is provided by default if found at compile-time.
14443 * It use the given SAX function block to handle the parsing callback.
14444 * If sax is NULL, fallback to the default DOM tree building routines.
14446 * Returns the resulting document tree
14450 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14452 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14457 * @cur: a pointer to an array of xmlChar
14459 * parse an XML in-memory document and build a tree.
14460 * In the case the document is not Well Formed, a attempt to build a
14461 * tree is tried anyway
14463 * Returns the resulting document tree or NULL in case of failure
14467 xmlRecoverDoc(const xmlChar *cur) {
14468 return(xmlSAXParseDoc(NULL, cur, 1));
14473 * @filename: the filename
14475 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14476 * compressed document is provided by default if found at compile-time.
14478 * Returns the resulting document tree if the file was wellformed,
14483 xmlParseFile(const char *filename) {
14484 return(xmlSAXParseFile(NULL, filename, 0));
14489 * @filename: the filename
14491 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14492 * compressed document is provided by default if found at compile-time.
14493 * In the case the document is not Well Formed, it attempts to build
14496 * Returns the resulting document tree or NULL in case of failure
14500 xmlRecoverFile(const char *filename) {
14501 return(xmlSAXParseFile(NULL, filename, 1));
14506 * xmlSetupParserForBuffer:
14507 * @ctxt: an XML parser context
14508 * @buffer: a xmlChar * buffer
14509 * @filename: a file name
14511 * Setup the parser context to parse a new buffer; Clears any prior
14512 * contents from the parser context. The buffer parameter must not be
14513 * NULL, but the filename parameter can be
14516 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14517 const char* filename)
14519 xmlParserInputPtr input;
14521 if ((ctxt == NULL) || (buffer == NULL))
14524 input = xmlNewInputStream(ctxt);
14525 if (input == NULL) {
14526 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14527 xmlClearParserCtxt(ctxt);
14531 xmlClearParserCtxt(ctxt);
14532 if (filename != NULL)
14533 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14534 input->base = buffer;
14535 input->cur = buffer;
14536 input->end = &buffer[xmlStrlen(buffer)];
14537 inputPush(ctxt, input);
14541 * xmlSAXUserParseFile:
14542 * @sax: a SAX handler
14543 * @user_data: The user data returned on SAX callbacks
14544 * @filename: a file name
14546 * parse an XML file and call the given SAX handler routines.
14547 * Automatic support for ZLIB/Compress compressed document is provided
14549 * Returns 0 in case of success or a error number otherwise
14552 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14553 const char *filename) {
14555 xmlParserCtxtPtr ctxt;
14557 ctxt = xmlCreateFileParserCtxt(filename);
14558 if (ctxt == NULL) return -1;
14559 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14560 xmlFree(ctxt->sax);
14562 xmlDetectSAX2(ctxt);
14564 if (user_data != NULL)
14565 ctxt->userData = user_data;
14567 xmlParseDocument(ctxt);
14569 if (ctxt->wellFormed)
14572 if (ctxt->errNo != 0)
14579 if (ctxt->myDoc != NULL) {
14580 xmlFreeDoc(ctxt->myDoc);
14581 ctxt->myDoc = NULL;
14583 xmlFreeParserCtxt(ctxt);
14587 #endif /* LIBXML_SAX1_ENABLED */
14589 /************************************************************************
14591 * Front ends when parsing from memory *
14593 ************************************************************************/
14596 * xmlCreateMemoryParserCtxt:
14597 * @buffer: a pointer to a char array
14598 * @size: the size of the array
14600 * Create a parser context for an XML in-memory document.
14602 * Returns the new parser context or NULL
14605 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14606 xmlParserCtxtPtr ctxt;
14607 xmlParserInputPtr input;
14608 xmlParserInputBufferPtr buf;
14610 if (buffer == NULL)
14615 ctxt = xmlNewParserCtxt();
14619 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14620 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14622 xmlFreeParserCtxt(ctxt);
14626 input = xmlNewInputStream(ctxt);
14627 if (input == NULL) {
14628 xmlFreeParserInputBuffer(buf);
14629 xmlFreeParserCtxt(ctxt);
14633 input->filename = NULL;
14635 xmlBufResetInput(input->buf->buffer, input);
14637 inputPush(ctxt, input);
14641 #ifdef LIBXML_SAX1_ENABLED
14643 * xmlSAXParseMemoryWithData:
14644 * @sax: the SAX handler block
14645 * @buffer: an pointer to a char array
14646 * @size: the size of the array
14647 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14649 * @data: the userdata
14651 * parse an XML in-memory block and use the given SAX function block
14652 * to handle the parsing callback. If sax is NULL, fallback to the default
14653 * DOM tree building routines.
14655 * User data (void *) is stored within the parser context in the
14656 * context's _private member, so it is available nearly everywhere in libxml
14658 * Returns the resulting document tree
14662 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14663 int size, int recovery, void *data) {
14665 xmlParserCtxtPtr ctxt;
14669 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14670 if (ctxt == NULL) return(NULL);
14672 if (ctxt->sax != NULL)
14673 xmlFree(ctxt->sax);
14676 xmlDetectSAX2(ctxt);
14678 ctxt->_private=data;
14681 ctxt->recovery = recovery;
14683 xmlParseDocument(ctxt);
14685 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14688 xmlFreeDoc(ctxt->myDoc);
14689 ctxt->myDoc = NULL;
14693 xmlFreeParserCtxt(ctxt);
14699 * xmlSAXParseMemory:
14700 * @sax: the SAX handler block
14701 * @buffer: an pointer to a char array
14702 * @size: the size of the array
14703 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14706 * parse an XML in-memory block and use the given SAX function block
14707 * to handle the parsing callback. If sax is NULL, fallback to the default
14708 * DOM tree building routines.
14710 * Returns the resulting document tree
14713 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14714 int size, int recovery) {
14715 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14720 * @buffer: an pointer to a char array
14721 * @size: the size of the array
14723 * parse an XML in-memory block and build a tree.
14725 * Returns the resulting document tree
14728 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14729 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14733 * xmlRecoverMemory:
14734 * @buffer: an pointer to a char array
14735 * @size: the size of the array
14737 * parse an XML in-memory block and build a tree.
14738 * In the case the document is not Well Formed, an attempt to
14739 * build a tree is tried anyway
14741 * Returns the resulting document tree or NULL in case of error
14744 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14745 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14749 * xmlSAXUserParseMemory:
14750 * @sax: a SAX handler
14751 * @user_data: The user data returned on SAX callbacks
14752 * @buffer: an in-memory XML document input
14753 * @size: the length of the XML document in bytes
14755 * A better SAX parsing routine.
14756 * parse an XML in-memory buffer and call the given SAX handler routines.
14758 * Returns 0 in case of success or a error number otherwise
14760 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14761 const char *buffer, int size) {
14763 xmlParserCtxtPtr ctxt;
14767 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14768 if (ctxt == NULL) return -1;
14769 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14770 xmlFree(ctxt->sax);
14772 xmlDetectSAX2(ctxt);
14774 if (user_data != NULL)
14775 ctxt->userData = user_data;
14777 xmlParseDocument(ctxt);
14779 if (ctxt->wellFormed)
14782 if (ctxt->errNo != 0)
14789 if (ctxt->myDoc != NULL) {
14790 xmlFreeDoc(ctxt->myDoc);
14791 ctxt->myDoc = NULL;
14793 xmlFreeParserCtxt(ctxt);
14797 #endif /* LIBXML_SAX1_ENABLED */
14800 * xmlCreateDocParserCtxt:
14801 * @cur: a pointer to an array of xmlChar
14803 * Creates a parser context for an XML in-memory document.
14805 * Returns the new parser context or NULL
14808 xmlCreateDocParserCtxt(const xmlChar *cur) {
14813 len = xmlStrlen(cur);
14814 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14817 #ifdef LIBXML_SAX1_ENABLED
14820 * @sax: the SAX handler block
14821 * @cur: a pointer to an array of xmlChar
14822 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14825 * parse an XML in-memory document and build a tree.
14826 * It use the given SAX function block to handle the parsing callback.
14827 * If sax is NULL, fallback to the default DOM tree building routines.
14829 * Returns the resulting document tree
14833 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14835 xmlParserCtxtPtr ctxt;
14836 xmlSAXHandlerPtr oldsax = NULL;
14838 if (cur == NULL) return(NULL);
14841 ctxt = xmlCreateDocParserCtxt(cur);
14842 if (ctxt == NULL) return(NULL);
14844 oldsax = ctxt->sax;
14846 ctxt->userData = NULL;
14848 xmlDetectSAX2(ctxt);
14850 xmlParseDocument(ctxt);
14851 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14854 xmlFreeDoc(ctxt->myDoc);
14855 ctxt->myDoc = NULL;
14858 ctxt->sax = oldsax;
14859 xmlFreeParserCtxt(ctxt);
14866 * @cur: a pointer to an array of xmlChar
14868 * parse an XML in-memory document and build a tree.
14870 * Returns the resulting document tree
14874 xmlParseDoc(const xmlChar *cur) {
14875 return(xmlSAXParseDoc(NULL, cur, 0));
14877 #endif /* LIBXML_SAX1_ENABLED */
14879 #ifdef LIBXML_LEGACY_ENABLED
14880 /************************************************************************
14882 * Specific function to keep track of entities references *
14883 * and used by the XSLT debugger *
14885 ************************************************************************/
14887 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14890 * xmlAddEntityReference:
14891 * @ent : A valid entity
14892 * @firstNode : A valid first node for children of entity
14893 * @lastNode : A valid last node of children entity
14895 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14898 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14899 xmlNodePtr lastNode)
14901 if (xmlEntityRefFunc != NULL) {
14902 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14908 * xmlSetEntityReferenceFunc:
14909 * @func: A valid function
14911 * Set the function to call call back when a xml reference has been made
14914 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14916 xmlEntityRefFunc = func;
14918 #endif /* LIBXML_LEGACY_ENABLED */
14920 /************************************************************************
14924 ************************************************************************/
14926 #ifdef LIBXML_XPATH_ENABLED
14927 #include <libxml/xpath.h>
14930 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14931 static int xmlParserInitialized = 0;
14936 * Initialization function for the XML parser.
14937 * This is not reentrant. Call once before processing in case of
14938 * use in multithreaded programs.
14942 xmlInitParser(void) {
14943 if (xmlParserInitialized != 0)
14946 #ifdef LIBXML_THREAD_ENABLED
14947 __xmlGlobalInitMutexLock();
14948 if (xmlParserInitialized == 0) {
14952 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14953 (xmlGenericError == NULL))
14954 initGenericErrorDefaultFunc(NULL);
14956 xmlInitializeDict();
14957 xmlInitCharEncodingHandlers();
14958 xmlDefaultSAXHandlerInit();
14959 xmlRegisterDefaultInputCallbacks();
14960 #ifdef LIBXML_OUTPUT_ENABLED
14961 xmlRegisterDefaultOutputCallbacks();
14962 #endif /* LIBXML_OUTPUT_ENABLED */
14963 #ifdef LIBXML_HTML_ENABLED
14964 htmlInitAutoClose();
14965 htmlDefaultSAXHandlerInit();
14967 #ifdef LIBXML_XPATH_ENABLED
14970 xmlParserInitialized = 1;
14971 #ifdef LIBXML_THREAD_ENABLED
14973 __xmlGlobalInitMutexUnlock();
14978 * xmlCleanupParser:
14980 * This function name is somewhat misleading. It does not clean up
14981 * parser state, it cleans up memory allocated by the library itself.
14982 * It is a cleanup function for the XML library. It tries to reclaim all
14983 * related global memory allocated for the library processing.
14984 * It doesn't deallocate any document related memory. One should
14985 * call xmlCleanupParser() only when the process has finished using
14986 * the library and all XML/HTML documents built with it.
14987 * See also xmlInitParser() which has the opposite function of preparing
14988 * the library for operations.
14990 * WARNING: if your application is multithreaded or has plugin support
14991 * calling this may crash the application if another thread or
14992 * a plugin is still using libxml2. It's sometimes very hard to
14993 * guess if libxml2 is in use in the application, some libraries
14994 * or plugins may use it without notice. In case of doubt abstain
14995 * from calling this function or do it just before calling exit()
14996 * to avoid leak reports from valgrind !
15000 xmlCleanupParser(void) {
15001 if (!xmlParserInitialized)
15004 xmlCleanupCharEncodingHandlers();
15005 #ifdef LIBXML_CATALOG_ENABLED
15006 xmlCatalogCleanup();
15009 xmlCleanupInputCallbacks();
15010 #ifdef LIBXML_OUTPUT_ENABLED
15011 xmlCleanupOutputCallbacks();
15013 #ifdef LIBXML_SCHEMAS_ENABLED
15014 xmlSchemaCleanupTypes();
15015 xmlRelaxNGCleanupTypes();
15017 xmlResetLastError();
15018 xmlCleanupGlobals();
15019 xmlCleanupThreads(); /* must be last if called not from the main thread */
15020 xmlCleanupMemory();
15021 xmlParserInitialized = 0;
15024 /************************************************************************
15026 * New set (2.6.0) of simpler and more flexible APIs *
15028 ************************************************************************/
15034 * Free a string if it is not owned by the "dict" dictionary in the
15037 #define DICT_FREE(str) \
15038 if ((str) && ((!dict) || \
15039 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
15040 xmlFree((char *)(str));
15044 * @ctxt: an XML parser context
15046 * Reset a parser context
15049 xmlCtxtReset(xmlParserCtxtPtr ctxt)
15051 xmlParserInputPtr input;
15059 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
15060 xmlFreeInputStream(input);
15063 ctxt->input = NULL;
15066 if (ctxt->spaceTab != NULL) {
15067 ctxt->spaceTab[0] = -1;
15068 ctxt->space = &ctxt->spaceTab[0];
15070 ctxt->space = NULL;
15080 DICT_FREE(ctxt->version);
15081 ctxt->version = NULL;
15082 DICT_FREE(ctxt->encoding);
15083 ctxt->encoding = NULL;
15084 DICT_FREE(ctxt->directory);
15085 ctxt->directory = NULL;
15086 DICT_FREE(ctxt->extSubURI);
15087 ctxt->extSubURI = NULL;
15088 DICT_FREE(ctxt->extSubSystem);
15089 ctxt->extSubSystem = NULL;
15090 if (ctxt->myDoc != NULL)
15091 xmlFreeDoc(ctxt->myDoc);
15092 ctxt->myDoc = NULL;
15094 ctxt->standalone = -1;
15095 ctxt->hasExternalSubset = 0;
15096 ctxt->hasPErefs = 0;
15098 ctxt->external = 0;
15099 ctxt->instate = XML_PARSER_START;
15102 ctxt->wellFormed = 1;
15103 ctxt->nsWellFormed = 1;
15104 ctxt->disableSAX = 0;
15107 ctxt->vctxt.userData = ctxt;
15108 ctxt->vctxt.error = xmlParserValidityError;
15109 ctxt->vctxt.warning = xmlParserValidityWarning;
15111 ctxt->record_info = 0;
15113 ctxt->checkIndex = 0;
15114 ctxt->inSubset = 0;
15115 ctxt->errNo = XML_ERR_OK;
15117 ctxt->charset = XML_CHAR_ENCODING_UTF8;
15118 ctxt->catalogs = NULL;
15119 ctxt->nbentities = 0;
15120 ctxt->sizeentities = 0;
15121 ctxt->sizeentcopy = 0;
15122 xmlInitNodeInfoSeq(&ctxt->node_seq);
15124 if (ctxt->attsDefault != NULL) {
15125 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
15126 ctxt->attsDefault = NULL;
15128 if (ctxt->attsSpecial != NULL) {
15129 xmlHashFree(ctxt->attsSpecial, NULL);
15130 ctxt->attsSpecial = NULL;
15133 #ifdef LIBXML_CATALOG_ENABLED
15134 if (ctxt->catalogs != NULL)
15135 xmlCatalogFreeLocal(ctxt->catalogs);
15137 if (ctxt->lastError.code != XML_ERR_OK)
15138 xmlResetError(&ctxt->lastError);
15142 * xmlCtxtResetPush:
15143 * @ctxt: an XML parser context
15144 * @chunk: a pointer to an array of chars
15145 * @size: number of chars in the array
15146 * @filename: an optional file name or URI
15147 * @encoding: the document encoding, or NULL
15149 * Reset a push parser context
15151 * Returns 0 in case of success and 1 in case of error
15154 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
15155 int size, const char *filename, const char *encoding)
15157 xmlParserInputPtr inputStream;
15158 xmlParserInputBufferPtr buf;
15159 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
15164 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
15165 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
15167 buf = xmlAllocParserInputBuffer(enc);
15171 if (ctxt == NULL) {
15172 xmlFreeParserInputBuffer(buf);
15176 xmlCtxtReset(ctxt);
15178 if (ctxt->pushTab == NULL) {
15179 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
15180 sizeof(xmlChar *));
15181 if (ctxt->pushTab == NULL) {
15182 xmlErrMemory(ctxt, NULL);
15183 xmlFreeParserInputBuffer(buf);
15188 if (filename == NULL) {
15189 ctxt->directory = NULL;
15191 ctxt->directory = xmlParserGetDirectory(filename);
15194 inputStream = xmlNewInputStream(ctxt);
15195 if (inputStream == NULL) {
15196 xmlFreeParserInputBuffer(buf);
15200 if (filename == NULL)
15201 inputStream->filename = NULL;
15203 inputStream->filename = (char *)
15204 xmlCanonicPath((const xmlChar *) filename);
15205 inputStream->buf = buf;
15206 xmlBufResetInput(buf->buffer, inputStream);
15208 inputPush(ctxt, inputStream);
15210 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15211 (ctxt->input->buf != NULL)) {
15212 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15213 size_t cur = ctxt->input->cur - ctxt->input->base;
15215 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15217 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
15219 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15223 if (encoding != NULL) {
15224 xmlCharEncodingHandlerPtr hdlr;
15226 if (ctxt->encoding != NULL)
15227 xmlFree((xmlChar *) ctxt->encoding);
15228 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15230 hdlr = xmlFindCharEncodingHandler(encoding);
15231 if (hdlr != NULL) {
15232 xmlSwitchToEncoding(ctxt, hdlr);
15234 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15235 "Unsupported encoding %s\n", BAD_CAST encoding);
15237 } else if (enc != XML_CHAR_ENCODING_NONE) {
15238 xmlSwitchEncoding(ctxt, enc);
15246 * xmlCtxtUseOptionsInternal:
15247 * @ctxt: an XML parser context
15248 * @options: a combination of xmlParserOption
15249 * @encoding: the user provided encoding to use
15251 * Applies the options to the parser context
15253 * Returns 0 in case of success, the set of unknown or unimplemented options
15254 * in case of error.
15257 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15261 if (encoding != NULL) {
15262 if (ctxt->encoding != NULL)
15263 xmlFree((xmlChar *) ctxt->encoding);
15264 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15266 if (options & XML_PARSE_RECOVER) {
15267 ctxt->recovery = 1;
15268 options -= XML_PARSE_RECOVER;
15269 ctxt->options |= XML_PARSE_RECOVER;
15271 ctxt->recovery = 0;
15272 if (options & XML_PARSE_DTDLOAD) {
15273 ctxt->loadsubset = XML_DETECT_IDS;
15274 options -= XML_PARSE_DTDLOAD;
15275 ctxt->options |= XML_PARSE_DTDLOAD;
15277 ctxt->loadsubset = 0;
15278 if (options & XML_PARSE_DTDATTR) {
15279 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15280 options -= XML_PARSE_DTDATTR;
15281 ctxt->options |= XML_PARSE_DTDATTR;
15283 if (options & XML_PARSE_NOENT) {
15284 ctxt->replaceEntities = 1;
15285 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15286 options -= XML_PARSE_NOENT;
15287 ctxt->options |= XML_PARSE_NOENT;
15289 ctxt->replaceEntities = 0;
15290 if (options & XML_PARSE_PEDANTIC) {
15291 ctxt->pedantic = 1;
15292 options -= XML_PARSE_PEDANTIC;
15293 ctxt->options |= XML_PARSE_PEDANTIC;
15295 ctxt->pedantic = 0;
15296 if (options & XML_PARSE_NOBLANKS) {
15297 ctxt->keepBlanks = 0;
15298 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15299 options -= XML_PARSE_NOBLANKS;
15300 ctxt->options |= XML_PARSE_NOBLANKS;
15302 ctxt->keepBlanks = 1;
15303 if (options & XML_PARSE_DTDVALID) {
15304 ctxt->validate = 1;
15305 if (options & XML_PARSE_NOWARNING)
15306 ctxt->vctxt.warning = NULL;
15307 if (options & XML_PARSE_NOERROR)
15308 ctxt->vctxt.error = NULL;
15309 options -= XML_PARSE_DTDVALID;
15310 ctxt->options |= XML_PARSE_DTDVALID;
15312 ctxt->validate = 0;
15313 if (options & XML_PARSE_NOWARNING) {
15314 ctxt->sax->warning = NULL;
15315 options -= XML_PARSE_NOWARNING;
15317 if (options & XML_PARSE_NOERROR) {
15318 ctxt->sax->error = NULL;
15319 ctxt->sax->fatalError = NULL;
15320 options -= XML_PARSE_NOERROR;
15322 #ifdef LIBXML_SAX1_ENABLED
15323 if (options & XML_PARSE_SAX1) {
15324 ctxt->sax->startElement = xmlSAX2StartElement;
15325 ctxt->sax->endElement = xmlSAX2EndElement;
15326 ctxt->sax->startElementNs = NULL;
15327 ctxt->sax->endElementNs = NULL;
15328 ctxt->sax->initialized = 1;
15329 options -= XML_PARSE_SAX1;
15330 ctxt->options |= XML_PARSE_SAX1;
15332 #endif /* LIBXML_SAX1_ENABLED */
15333 if (options & XML_PARSE_NODICT) {
15334 ctxt->dictNames = 0;
15335 options -= XML_PARSE_NODICT;
15336 ctxt->options |= XML_PARSE_NODICT;
15338 ctxt->dictNames = 1;
15340 if (options & XML_PARSE_NOCDATA) {
15341 ctxt->sax->cdataBlock = NULL;
15342 options -= XML_PARSE_NOCDATA;
15343 ctxt->options |= XML_PARSE_NOCDATA;
15345 if (options & XML_PARSE_NSCLEAN) {
15346 ctxt->options |= XML_PARSE_NSCLEAN;
15347 options -= XML_PARSE_NSCLEAN;
15349 if (options & XML_PARSE_NONET) {
15350 ctxt->options |= XML_PARSE_NONET;
15351 options -= XML_PARSE_NONET;
15353 if (options & XML_PARSE_COMPACT) {
15354 ctxt->options |= XML_PARSE_COMPACT;
15355 options -= XML_PARSE_COMPACT;
15357 if (options & XML_PARSE_OLD10) {
15358 ctxt->options |= XML_PARSE_OLD10;
15359 options -= XML_PARSE_OLD10;
15361 if (options & XML_PARSE_NOBASEFIX) {
15362 ctxt->options |= XML_PARSE_NOBASEFIX;
15363 options -= XML_PARSE_NOBASEFIX;
15365 if (options & XML_PARSE_HUGE) {
15366 ctxt->options |= XML_PARSE_HUGE;
15367 options -= XML_PARSE_HUGE;
15368 if (ctxt->dict != NULL)
15369 xmlDictSetLimit(ctxt->dict, 0);
15371 if (options & XML_PARSE_OLDSAX) {
15372 ctxt->options |= XML_PARSE_OLDSAX;
15373 options -= XML_PARSE_OLDSAX;
15375 if (options & XML_PARSE_IGNORE_ENC) {
15376 ctxt->options |= XML_PARSE_IGNORE_ENC;
15377 options -= XML_PARSE_IGNORE_ENC;
15379 if (options & XML_PARSE_BIG_LINES) {
15380 ctxt->options |= XML_PARSE_BIG_LINES;
15381 options -= XML_PARSE_BIG_LINES;
15383 ctxt->linenumbers = 1;
15388 * xmlCtxtUseOptions:
15389 * @ctxt: an XML parser context
15390 * @options: a combination of xmlParserOption
15392 * Applies the options to the parser context
15394 * Returns 0 in case of success, the set of unknown or unimplemented options
15395 * in case of error.
15398 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15400 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15405 * @ctxt: an XML parser context
15406 * @URL: the base URL to use for the document
15407 * @encoding: the document encoding, or NULL
15408 * @options: a combination of xmlParserOption
15409 * @reuse: keep the context for reuse
15411 * Common front-end for the xmlRead functions
15413 * Returns the resulting document tree or NULL
15416 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15417 int options, int reuse)
15421 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15422 if (encoding != NULL) {
15423 xmlCharEncodingHandlerPtr hdlr;
15425 hdlr = xmlFindCharEncodingHandler(encoding);
15427 xmlSwitchToEncoding(ctxt, hdlr);
15429 if ((URL != NULL) && (ctxt->input != NULL) &&
15430 (ctxt->input->filename == NULL))
15431 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15432 xmlParseDocument(ctxt);
15433 if ((ctxt->wellFormed) || ctxt->recovery)
15437 if (ctxt->myDoc != NULL) {
15438 xmlFreeDoc(ctxt->myDoc);
15441 ctxt->myDoc = NULL;
15443 xmlFreeParserCtxt(ctxt);
15451 * @cur: a pointer to a zero terminated string
15452 * @URL: the base URL to use for the document
15453 * @encoding: the document encoding, or NULL
15454 * @options: a combination of xmlParserOption
15456 * parse an XML in-memory document and build a tree.
15458 * Returns the resulting document tree
15461 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15463 xmlParserCtxtPtr ctxt;
15469 ctxt = xmlCreateDocParserCtxt(cur);
15472 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15477 * @filename: a file or URL
15478 * @encoding: the document encoding, or NULL
15479 * @options: a combination of xmlParserOption
15481 * parse an XML file from the filesystem or the network.
15483 * Returns the resulting document tree
15486 xmlReadFile(const char *filename, const char *encoding, int options)
15488 xmlParserCtxtPtr ctxt;
15491 ctxt = xmlCreateURLParserCtxt(filename, options);
15494 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15499 * @buffer: a pointer to a char array
15500 * @size: the size of the array
15501 * @URL: the base URL to use for the document
15502 * @encoding: the document encoding, or NULL
15503 * @options: a combination of xmlParserOption
15505 * parse an XML in-memory document and build a tree.
15507 * Returns the resulting document tree
15510 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15512 xmlParserCtxtPtr ctxt;
15515 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15518 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15523 * @fd: an open file descriptor
15524 * @URL: the base URL to use for the document
15525 * @encoding: the document encoding, or NULL
15526 * @options: a combination of xmlParserOption
15528 * parse an XML from a file descriptor and build a tree.
15529 * NOTE that the file descriptor will not be closed when the
15530 * reader is closed or reset.
15532 * Returns the resulting document tree
15535 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15537 xmlParserCtxtPtr ctxt;
15538 xmlParserInputBufferPtr input;
15539 xmlParserInputPtr stream;
15545 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15548 input->closecallback = NULL;
15549 ctxt = xmlNewParserCtxt();
15550 if (ctxt == NULL) {
15551 xmlFreeParserInputBuffer(input);
15554 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15555 if (stream == NULL) {
15556 xmlFreeParserInputBuffer(input);
15557 xmlFreeParserCtxt(ctxt);
15560 inputPush(ctxt, stream);
15561 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15566 * @ioread: an I/O read function
15567 * @ioclose: an I/O close function
15568 * @ioctx: an I/O handler
15569 * @URL: the base URL to use for the document
15570 * @encoding: the document encoding, or NULL
15571 * @options: a combination of xmlParserOption
15573 * parse an XML document from I/O functions and source and build a tree.
15575 * Returns the resulting document tree
15578 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15579 void *ioctx, const char *URL, const char *encoding, int options)
15581 xmlParserCtxtPtr ctxt;
15582 xmlParserInputBufferPtr input;
15583 xmlParserInputPtr stream;
15585 if (ioread == NULL)
15589 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15590 XML_CHAR_ENCODING_NONE);
15591 if (input == NULL) {
15592 if (ioclose != NULL)
15596 ctxt = xmlNewParserCtxt();
15597 if (ctxt == NULL) {
15598 xmlFreeParserInputBuffer(input);
15601 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15602 if (stream == NULL) {
15603 xmlFreeParserInputBuffer(input);
15604 xmlFreeParserCtxt(ctxt);
15607 inputPush(ctxt, stream);
15608 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15613 * @ctxt: an XML parser context
15614 * @cur: a pointer to a zero terminated string
15615 * @URL: the base URL to use for the document
15616 * @encoding: the document encoding, or NULL
15617 * @options: a combination of xmlParserOption
15619 * parse an XML in-memory document and build a tree.
15620 * This reuses the existing @ctxt parser context
15622 * Returns the resulting document tree
15625 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15626 const char *URL, const char *encoding, int options)
15628 xmlParserInputPtr stream;
15636 xmlCtxtReset(ctxt);
15638 stream = xmlNewStringInputStream(ctxt, cur);
15639 if (stream == NULL) {
15642 inputPush(ctxt, stream);
15643 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15648 * @ctxt: an XML parser context
15649 * @filename: a file or URL
15650 * @encoding: the document encoding, or NULL
15651 * @options: a combination of xmlParserOption
15653 * parse an XML file from the filesystem or the network.
15654 * This reuses the existing @ctxt parser context
15656 * Returns the resulting document tree
15659 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15660 const char *encoding, int options)
15662 xmlParserInputPtr stream;
15664 if (filename == NULL)
15670 xmlCtxtReset(ctxt);
15672 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15673 if (stream == NULL) {
15676 inputPush(ctxt, stream);
15677 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15681 * xmlCtxtReadMemory:
15682 * @ctxt: an XML parser context
15683 * @buffer: a pointer to a char array
15684 * @size: the size of the array
15685 * @URL: the base URL to use for the document
15686 * @encoding: the document encoding, or NULL
15687 * @options: a combination of xmlParserOption
15689 * parse an XML in-memory document and build a tree.
15690 * This reuses the existing @ctxt parser context
15692 * Returns the resulting document tree
15695 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15696 const char *URL, const char *encoding, int options)
15698 xmlParserInputBufferPtr input;
15699 xmlParserInputPtr stream;
15703 if (buffer == NULL)
15707 xmlCtxtReset(ctxt);
15709 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15710 if (input == NULL) {
15714 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15715 if (stream == NULL) {
15716 xmlFreeParserInputBuffer(input);
15720 inputPush(ctxt, stream);
15721 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15726 * @ctxt: an XML parser context
15727 * @fd: an open file descriptor
15728 * @URL: the base URL to use for the document
15729 * @encoding: the document encoding, or NULL
15730 * @options: a combination of xmlParserOption
15732 * parse an XML from a file descriptor and build a tree.
15733 * This reuses the existing @ctxt parser context
15734 * NOTE that the file descriptor will not be closed when the
15735 * reader is closed or reset.
15737 * Returns the resulting document tree
15740 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15741 const char *URL, const char *encoding, int options)
15743 xmlParserInputBufferPtr input;
15744 xmlParserInputPtr stream;
15752 xmlCtxtReset(ctxt);
15755 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15758 input->closecallback = NULL;
15759 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15760 if (stream == NULL) {
15761 xmlFreeParserInputBuffer(input);
15764 inputPush(ctxt, stream);
15765 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15770 * @ctxt: an XML parser context
15771 * @ioread: an I/O read function
15772 * @ioclose: an I/O close function
15773 * @ioctx: an I/O handler
15774 * @URL: the base URL to use for the document
15775 * @encoding: the document encoding, or NULL
15776 * @options: a combination of xmlParserOption
15778 * parse an XML document from I/O functions and source and build a tree.
15779 * This reuses the existing @ctxt parser context
15781 * Returns the resulting document tree
15784 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15785 xmlInputCloseCallback ioclose, void *ioctx,
15787 const char *encoding, int options)
15789 xmlParserInputBufferPtr input;
15790 xmlParserInputPtr stream;
15792 if (ioread == NULL)
15798 xmlCtxtReset(ctxt);
15800 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15801 XML_CHAR_ENCODING_NONE);
15802 if (input == NULL) {
15803 if (ioclose != NULL)
15807 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15808 if (stream == NULL) {
15809 xmlFreeParserInputBuffer(input);
15812 inputPush(ctxt, stream);
15813 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15816 #define bottom_parser
15817 #include "elfgcchack.h"