2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
28 * See Copyright for the status of this software.
33 /* To avoid EBCDIC trouble when parsing on zOS */
35 #pragma convert("ISO8859-1")
41 #if defined(WIN32) && !defined (__CYGWIN__)
42 #define XML_DIR_SEP '\\'
44 #define XML_DIR_SEP '/'
52 #include <libxml/xmlmemory.h>
53 #include <libxml/threads.h>
54 #include <libxml/globals.h>
55 #include <libxml/tree.h>
56 #include <libxml/parser.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #ifdef LIBXML_CATALOG_ENABLED
65 #include <libxml/catalog.h>
67 #ifdef LIBXML_SCHEMAS_ENABLED
68 #include <libxml/xmlschemastypes.h>
69 #include <libxml/relaxng.h>
77 #ifdef HAVE_SYS_STAT_H
97 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
99 static xmlParserCtxtPtr
100 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
101 const xmlChar *base, xmlParserCtxtPtr pctx);
103 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
105 /************************************************************************
107 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
109 ************************************************************************/
111 #define XML_PARSER_BIG_ENTITY 1000
112 #define XML_PARSER_LOT_ENTITY 5000
115 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
116 * replacement over the size in byte of the input indicates that you have
117 * and eponential behaviour. A value of 10 correspond to at least 3 entity
118 * replacement per byte of input.
120 #define XML_PARSER_NON_LINEAR 10
123 * xmlParserEntityCheck
125 * Function to check non-linear entity expansion behaviour
126 * This is here to detect and stop exponential linear entity expansion
127 * This is not a limitation of the parser but a safety
128 * boundary feature. It can be disabled with the XML_PARSE_HUGE
132 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
133 xmlEntityPtr ent, size_t replacement)
137 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
139 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
143 * This may look absurd but is needed to detect
146 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
147 (ent->content != NULL) && (ent->checked == 0) &&
148 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
149 unsigned long oldnbent = ctxt->nbentities;
155 rep = xmlStringDecodeEntities(ctxt, ent->content,
156 XML_SUBSTITUTE_REF, 0, 0, 0);
158 if (ctxt->errNo == XML_ERR_ENTITY_LOOP) {
162 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
164 if (xmlStrchr(rep, '<'))
170 if (replacement != 0) {
171 if (replacement < XML_MAX_TEXT_LENGTH)
175 * If the volume of entity copy reaches 10 times the
176 * amount of parsed data and over the large text threshold
177 * then that's very likely to be an abuse.
179 if (ctxt->input != NULL) {
180 consumed = ctxt->input->consumed +
181 (ctxt->input->cur - ctxt->input->base);
183 consumed += ctxt->sizeentities;
185 if (replacement < XML_PARSER_NON_LINEAR * consumed)
187 } else if (size != 0) {
189 * Do the check based on the replacement size of the entity
191 if (size < XML_PARSER_BIG_ENTITY)
195 * A limit on the amount of text data reasonably used
197 if (ctxt->input != NULL) {
198 consumed = ctxt->input->consumed +
199 (ctxt->input->cur - ctxt->input->base);
201 consumed += ctxt->sizeentities;
203 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
204 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
206 } else if (ent != NULL) {
208 * use the number of parsed entities in the replacement
210 size = ent->checked / 2;
213 * The amount of data parsed counting entities size only once
215 if (ctxt->input != NULL) {
216 consumed = ctxt->input->consumed +
217 (ctxt->input->cur - ctxt->input->base);
219 consumed += ctxt->sizeentities;
222 * Check the density of entities for the amount of data
223 * knowing an entity reference will take at least 3 bytes
225 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
229 * strange we got no data for checking
231 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
232 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
233 (ctxt->nbentities <= 10000))
236 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
243 * arbitrary depth limit for the XML documents that we allow to
244 * process. This is not a limitation of the parser but a safety
245 * boundary feature. It can be disabled with the XML_PARSE_HUGE
248 unsigned int xmlParserMaxDepth = 256;
253 #define XML_PARSER_BIG_BUFFER_SIZE 300
254 #define XML_PARSER_BUFFER_SIZE 100
255 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
258 * XML_PARSER_CHUNK_SIZE
260 * When calling GROW that's the minimal amount of data
261 * the parser expected to have received. It is not a hard
262 * limit but an optimization when reading strings like Names
263 * It is not strictly needed as long as inputs available characters
264 * are followed by 0, which should be provided by the I/O level
266 #define XML_PARSER_CHUNK_SIZE 100
269 * List of XML prefixed PI allowed by W3C specs
272 static const char *xmlW3CPIs[] = {
279 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
280 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
281 const xmlChar **str);
283 static xmlParserErrors
284 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
285 xmlSAXHandlerPtr sax,
286 void *user_data, int depth, const xmlChar *URL,
287 const xmlChar *ID, xmlNodePtr *list);
290 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
291 const char *encoding);
292 #ifdef LIBXML_LEGACY_ENABLED
294 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
295 xmlNodePtr lastNode);
296 #endif /* LIBXML_LEGACY_ENABLED */
298 static xmlParserErrors
299 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
300 const xmlChar *string, void *user_data, xmlNodePtr *lst);
303 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
305 /************************************************************************
307 * Some factorized error routines *
309 ************************************************************************/
312 * xmlErrAttributeDup:
313 * @ctxt: an XML parser context
314 * @prefix: the attribute prefix
315 * @localname: the attribute localname
317 * Handle a redefinition of attribute error
320 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
321 const xmlChar * localname)
323 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
324 (ctxt->instate == XML_PARSER_EOF))
327 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
330 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
331 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
332 (const char *) localname, NULL, NULL, 0, 0,
333 "Attribute %s redefined\n", localname);
335 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
336 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
337 (const char *) prefix, (const char *) localname,
338 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
341 ctxt->wellFormed = 0;
342 if (ctxt->recovery == 0)
343 ctxt->disableSAX = 1;
349 * @ctxt: an XML parser context
350 * @error: the error number
351 * @extra: extra information string
353 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
356 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
360 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
361 (ctxt->instate == XML_PARSER_EOF))
364 case XML_ERR_INVALID_HEX_CHARREF:
365 errmsg = "CharRef: invalid hexadecimal value";
367 case XML_ERR_INVALID_DEC_CHARREF:
368 errmsg = "CharRef: invalid decimal value";
370 case XML_ERR_INVALID_CHARREF:
371 errmsg = "CharRef: invalid value";
373 case XML_ERR_INTERNAL_ERROR:
374 errmsg = "internal error";
376 case XML_ERR_PEREF_AT_EOF:
377 errmsg = "PEReference at end of document";
379 case XML_ERR_PEREF_IN_PROLOG:
380 errmsg = "PEReference in prolog";
382 case XML_ERR_PEREF_IN_EPILOG:
383 errmsg = "PEReference in epilog";
385 case XML_ERR_PEREF_NO_NAME:
386 errmsg = "PEReference: no name";
388 case XML_ERR_PEREF_SEMICOL_MISSING:
389 errmsg = "PEReference: expecting ';'";
391 case XML_ERR_ENTITY_LOOP:
392 errmsg = "Detected an entity reference loop";
394 case XML_ERR_ENTITY_NOT_STARTED:
395 errmsg = "EntityValue: \" or ' expected";
397 case XML_ERR_ENTITY_PE_INTERNAL:
398 errmsg = "PEReferences forbidden in internal subset";
400 case XML_ERR_ENTITY_NOT_FINISHED:
401 errmsg = "EntityValue: \" or ' expected";
403 case XML_ERR_ATTRIBUTE_NOT_STARTED:
404 errmsg = "AttValue: \" or ' expected";
406 case XML_ERR_LT_IN_ATTRIBUTE:
407 errmsg = "Unescaped '<' not allowed in attributes values";
409 case XML_ERR_LITERAL_NOT_STARTED:
410 errmsg = "SystemLiteral \" or ' expected";
412 case XML_ERR_LITERAL_NOT_FINISHED:
413 errmsg = "Unfinished System or Public ID \" or ' expected";
415 case XML_ERR_MISPLACED_CDATA_END:
416 errmsg = "Sequence ']]>' not allowed in content";
418 case XML_ERR_URI_REQUIRED:
419 errmsg = "SYSTEM or PUBLIC, the URI is missing";
421 case XML_ERR_PUBID_REQUIRED:
422 errmsg = "PUBLIC, the Public Identifier is missing";
424 case XML_ERR_HYPHEN_IN_COMMENT:
425 errmsg = "Comment must not contain '--' (double-hyphen)";
427 case XML_ERR_PI_NOT_STARTED:
428 errmsg = "xmlParsePI : no target name";
430 case XML_ERR_RESERVED_XML_NAME:
431 errmsg = "Invalid PI name";
433 case XML_ERR_NOTATION_NOT_STARTED:
434 errmsg = "NOTATION: Name expected here";
436 case XML_ERR_NOTATION_NOT_FINISHED:
437 errmsg = "'>' required to close NOTATION declaration";
439 case XML_ERR_VALUE_REQUIRED:
440 errmsg = "Entity value required";
442 case XML_ERR_URI_FRAGMENT:
443 errmsg = "Fragment not allowed";
445 case XML_ERR_ATTLIST_NOT_STARTED:
446 errmsg = "'(' required to start ATTLIST enumeration";
448 case XML_ERR_NMTOKEN_REQUIRED:
449 errmsg = "NmToken expected in ATTLIST enumeration";
451 case XML_ERR_ATTLIST_NOT_FINISHED:
452 errmsg = "')' required to finish ATTLIST enumeration";
454 case XML_ERR_MIXED_NOT_STARTED:
455 errmsg = "MixedContentDecl : '|' or ')*' expected";
457 case XML_ERR_PCDATA_REQUIRED:
458 errmsg = "MixedContentDecl : '#PCDATA' expected";
460 case XML_ERR_ELEMCONTENT_NOT_STARTED:
461 errmsg = "ContentDecl : Name or '(' expected";
463 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
464 errmsg = "ContentDecl : ',' '|' or ')' expected";
466 case XML_ERR_PEREF_IN_INT_SUBSET:
468 "PEReference: forbidden within markup decl in internal subset";
470 case XML_ERR_GT_REQUIRED:
471 errmsg = "expected '>'";
473 case XML_ERR_CONDSEC_INVALID:
474 errmsg = "XML conditional section '[' expected";
476 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
477 errmsg = "Content error in the external subset";
479 case XML_ERR_CONDSEC_INVALID_KEYWORD:
481 "conditional section INCLUDE or IGNORE keyword expected";
483 case XML_ERR_CONDSEC_NOT_FINISHED:
484 errmsg = "XML conditional section not closed";
486 case XML_ERR_XMLDECL_NOT_STARTED:
487 errmsg = "Text declaration '<?xml' required";
489 case XML_ERR_XMLDECL_NOT_FINISHED:
490 errmsg = "parsing XML declaration: '?>' expected";
492 case XML_ERR_EXT_ENTITY_STANDALONE:
493 errmsg = "external parsed entities cannot be standalone";
495 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
496 errmsg = "EntityRef: expecting ';'";
498 case XML_ERR_DOCTYPE_NOT_FINISHED:
499 errmsg = "DOCTYPE improperly terminated";
501 case XML_ERR_LTSLASH_REQUIRED:
502 errmsg = "EndTag: '</' not found";
504 case XML_ERR_EQUAL_REQUIRED:
505 errmsg = "expected '='";
507 case XML_ERR_STRING_NOT_CLOSED:
508 errmsg = "String not closed expecting \" or '";
510 case XML_ERR_STRING_NOT_STARTED:
511 errmsg = "String not started expecting ' or \"";
513 case XML_ERR_ENCODING_NAME:
514 errmsg = "Invalid XML encoding name";
516 case XML_ERR_STANDALONE_VALUE:
517 errmsg = "standalone accepts only 'yes' or 'no'";
519 case XML_ERR_DOCUMENT_EMPTY:
520 errmsg = "Document is empty";
522 case XML_ERR_DOCUMENT_END:
523 errmsg = "Extra content at the end of the document";
525 case XML_ERR_NOT_WELL_BALANCED:
526 errmsg = "chunk is not well balanced";
528 case XML_ERR_EXTRA_CONTENT:
529 errmsg = "extra content at the end of well balanced chunk";
531 case XML_ERR_VERSION_MISSING:
532 errmsg = "Malformed declaration expecting version";
534 case XML_ERR_NAME_TOO_LONG:
535 errmsg = "Name too long use XML_PARSE_HUGE option";
543 errmsg = "Unregistered error message";
548 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
549 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
552 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
553 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
557 ctxt->wellFormed = 0;
558 if (ctxt->recovery == 0)
559 ctxt->disableSAX = 1;
565 * @ctxt: an XML parser context
566 * @error: the error number
567 * @msg: the error message
569 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
571 static void LIBXML_ATTR_FORMAT(3,0)
572 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
575 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
576 (ctxt->instate == XML_PARSER_EOF))
580 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
581 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
583 ctxt->wellFormed = 0;
584 if (ctxt->recovery == 0)
585 ctxt->disableSAX = 1;
591 * @ctxt: an XML parser context
592 * @error: the error number
593 * @msg: the error message
599 static void LIBXML_ATTR_FORMAT(3,0)
600 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601 const char *msg, const xmlChar *str1, const xmlChar *str2)
603 xmlStructuredErrorFunc schannel = NULL;
605 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
606 (ctxt->instate == XML_PARSER_EOF))
608 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
609 (ctxt->sax->initialized == XML_SAX2_MAGIC))
610 schannel = ctxt->sax->serror;
612 __xmlRaiseError(schannel,
613 (ctxt->sax) ? ctxt->sax->warning : NULL,
615 ctxt, NULL, XML_FROM_PARSER, error,
616 XML_ERR_WARNING, NULL, 0,
617 (const char *) str1, (const char *) str2, NULL, 0, 0,
618 msg, (const char *) str1, (const char *) str2);
620 __xmlRaiseError(schannel, NULL, NULL,
621 ctxt, NULL, XML_FROM_PARSER, error,
622 XML_ERR_WARNING, NULL, 0,
623 (const char *) str1, (const char *) str2, NULL, 0, 0,
624 msg, (const char *) str1, (const char *) str2);
630 * @ctxt: an XML parser context
631 * @error: the error number
632 * @msg: the error message
635 * Handle a validity error.
637 static void LIBXML_ATTR_FORMAT(3,0)
638 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
639 const char *msg, const xmlChar *str1, const xmlChar *str2)
641 xmlStructuredErrorFunc schannel = NULL;
643 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
644 (ctxt->instate == XML_PARSER_EOF))
648 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
649 schannel = ctxt->sax->serror;
652 __xmlRaiseError(schannel,
653 ctxt->vctxt.error, ctxt->vctxt.userData,
654 ctxt, NULL, XML_FROM_DTD, error,
655 XML_ERR_ERROR, NULL, 0, (const char *) str1,
656 (const char *) str2, NULL, 0, 0,
657 msg, (const char *) str1, (const char *) str2);
660 __xmlRaiseError(schannel, NULL, NULL,
661 ctxt, NULL, XML_FROM_DTD, error,
662 XML_ERR_ERROR, NULL, 0, (const char *) str1,
663 (const char *) str2, NULL, 0, 0,
664 msg, (const char *) str1, (const char *) str2);
670 * @ctxt: an XML parser context
671 * @error: the error number
672 * @msg: the error message
673 * @val: an integer value
675 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
677 static void LIBXML_ATTR_FORMAT(3,0)
678 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
679 const char *msg, int val)
681 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
682 (ctxt->instate == XML_PARSER_EOF))
686 __xmlRaiseError(NULL, NULL, NULL,
687 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
688 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
690 ctxt->wellFormed = 0;
691 if (ctxt->recovery == 0)
692 ctxt->disableSAX = 1;
697 * xmlFatalErrMsgStrIntStr:
698 * @ctxt: an XML parser context
699 * @error: the error number
700 * @msg: the error message
701 * @str1: an string info
702 * @val: an integer value
703 * @str2: an string info
705 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
707 static void LIBXML_ATTR_FORMAT(3,0)
708 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
709 const char *msg, const xmlChar *str1, int val,
712 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
713 (ctxt->instate == XML_PARSER_EOF))
717 __xmlRaiseError(NULL, NULL, NULL,
718 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
719 NULL, 0, (const char *) str1, (const char *) str2,
720 NULL, val, 0, msg, str1, val, str2);
722 ctxt->wellFormed = 0;
723 if (ctxt->recovery == 0)
724 ctxt->disableSAX = 1;
730 * @ctxt: an XML parser context
731 * @error: the error number
732 * @msg: the error message
733 * @val: a string value
735 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
737 static void LIBXML_ATTR_FORMAT(3,0)
738 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
739 const char *msg, const xmlChar * val)
741 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
742 (ctxt->instate == XML_PARSER_EOF))
746 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
747 XML_FROM_PARSER, error, XML_ERR_FATAL,
748 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
751 ctxt->wellFormed = 0;
752 if (ctxt->recovery == 0)
753 ctxt->disableSAX = 1;
759 * @ctxt: an XML parser context
760 * @error: the error number
761 * @msg: the error message
762 * @val: a string value
764 * Handle a non fatal parser error
766 static void LIBXML_ATTR_FORMAT(3,0)
767 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
768 const char *msg, const xmlChar * val)
770 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
771 (ctxt->instate == XML_PARSER_EOF))
775 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
776 XML_FROM_PARSER, error, XML_ERR_ERROR,
777 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
783 * @ctxt: an XML parser context
784 * @error: the error number
786 * @info1: extra information string
787 * @info2: extra information string
789 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
791 static void LIBXML_ATTR_FORMAT(3,0)
792 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
794 const xmlChar * info1, const xmlChar * info2,
795 const xmlChar * info3)
797 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
798 (ctxt->instate == XML_PARSER_EOF))
802 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
803 XML_ERR_ERROR, NULL, 0, (const char *) info1,
804 (const char *) info2, (const char *) info3, 0, 0, msg,
805 info1, info2, info3);
807 ctxt->nsWellFormed = 0;
812 * @ctxt: an XML parser context
813 * @error: the error number
815 * @info1: extra information string
816 * @info2: extra information string
818 * Handle a namespace warning error
820 static void LIBXML_ATTR_FORMAT(3,0)
821 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
823 const xmlChar * info1, const xmlChar * info2,
824 const xmlChar * info3)
826 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
827 (ctxt->instate == XML_PARSER_EOF))
829 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
830 XML_ERR_WARNING, NULL, 0, (const char *) info1,
831 (const char *) info2, (const char *) info3, 0, 0, msg,
832 info1, info2, info3);
835 /************************************************************************
837 * Library wide options *
839 ************************************************************************/
843 * @feature: the feature to be examined
845 * Examines if the library has been compiled with a given feature.
847 * Returns a non-zero value if the feature exist, otherwise zero.
848 * Returns zero (0) if the feature does not exist or an unknown
849 * unknown feature is requested, non-zero otherwise.
852 xmlHasFeature(xmlFeature feature)
855 case XML_WITH_THREAD:
856 #ifdef LIBXML_THREAD_ENABLED
862 #ifdef LIBXML_TREE_ENABLED
867 case XML_WITH_OUTPUT:
868 #ifdef LIBXML_OUTPUT_ENABLED
874 #ifdef LIBXML_PUSH_ENABLED
879 case XML_WITH_READER:
880 #ifdef LIBXML_READER_ENABLED
885 case XML_WITH_PATTERN:
886 #ifdef LIBXML_PATTERN_ENABLED
891 case XML_WITH_WRITER:
892 #ifdef LIBXML_WRITER_ENABLED
898 #ifdef LIBXML_SAX1_ENABLED
904 #ifdef LIBXML_FTP_ENABLED
910 #ifdef LIBXML_HTTP_ENABLED
916 #ifdef LIBXML_VALID_ENABLED
922 #ifdef LIBXML_HTML_ENABLED
927 case XML_WITH_LEGACY:
928 #ifdef LIBXML_LEGACY_ENABLED
934 #ifdef LIBXML_C14N_ENABLED
939 case XML_WITH_CATALOG:
940 #ifdef LIBXML_CATALOG_ENABLED
946 #ifdef LIBXML_XPATH_ENABLED
952 #ifdef LIBXML_XPTR_ENABLED
957 case XML_WITH_XINCLUDE:
958 #ifdef LIBXML_XINCLUDE_ENABLED
964 #ifdef LIBXML_ICONV_ENABLED
969 case XML_WITH_ISO8859X:
970 #ifdef LIBXML_ISO8859X_ENABLED
975 case XML_WITH_UNICODE:
976 #ifdef LIBXML_UNICODE_ENABLED
981 case XML_WITH_REGEXP:
982 #ifdef LIBXML_REGEXP_ENABLED
987 case XML_WITH_AUTOMATA:
988 #ifdef LIBXML_AUTOMATA_ENABLED
994 #ifdef LIBXML_EXPR_ENABLED
999 case XML_WITH_SCHEMAS:
1000 #ifdef LIBXML_SCHEMAS_ENABLED
1005 case XML_WITH_SCHEMATRON:
1006 #ifdef LIBXML_SCHEMATRON_ENABLED
1011 case XML_WITH_MODULES:
1012 #ifdef LIBXML_MODULES_ENABLED
1017 case XML_WITH_DEBUG:
1018 #ifdef LIBXML_DEBUG_ENABLED
1023 case XML_WITH_DEBUG_MEM:
1024 #ifdef DEBUG_MEMORY_LOCATION
1029 case XML_WITH_DEBUG_RUN:
1030 #ifdef LIBXML_DEBUG_RUNTIME
1036 #ifdef LIBXML_ZLIB_ENABLED
1042 #ifdef LIBXML_LZMA_ENABLED
1048 #ifdef LIBXML_ICU_ENABLED
1059 /************************************************************************
1061 * SAX2 defaulted attributes handling *
1063 ************************************************************************/
1067 * @ctxt: an XML parser context
1069 * Do the SAX2 detection and specific intialization
1072 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1073 if (ctxt == NULL) return;
1074 #ifdef LIBXML_SAX1_ENABLED
1075 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1076 ((ctxt->sax->startElementNs != NULL) ||
1077 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1080 #endif /* LIBXML_SAX1_ENABLED */
1082 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1083 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1084 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1085 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1086 (ctxt->str_xml_ns == NULL)) {
1087 xmlErrMemory(ctxt, NULL);
1091 typedef struct _xmlDefAttrs xmlDefAttrs;
1092 typedef xmlDefAttrs *xmlDefAttrsPtr;
1093 struct _xmlDefAttrs {
1094 int nbAttrs; /* number of defaulted attributes on that element */
1095 int maxAttrs; /* the size of the array */
1096 #if __STDC_VERSION__ >= 199901L
1097 /* Using a C99 flexible array member avoids UBSan errors. */
1098 const xmlChar *values[]; /* array of localname/prefix/values/external */
1100 const xmlChar *values[5];
1105 * xmlAttrNormalizeSpace:
1106 * @src: the source string
1107 * @dst: the target string
1109 * Normalize the space in non CDATA attribute values:
1110 * If the attribute type is not CDATA, then the XML processor MUST further
1111 * process the normalized attribute value by discarding any leading and
1112 * trailing space (#x20) characters, and by replacing sequences of space
1113 * (#x20) characters by a single space (#x20) character.
1114 * Note that the size of dst need to be at least src, and if one doesn't need
1115 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1116 * passing src as dst is just fine.
1118 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1122 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1124 if ((src == NULL) || (dst == NULL))
1127 while (*src == 0x20) src++;
1130 while (*src == 0x20) src++;
1144 * xmlAttrNormalizeSpace2:
1145 * @src: the source string
1147 * Normalize the space in non CDATA attribute values, a slightly more complex
1148 * front end to avoid allocation problems when running on attribute values
1149 * coming from the input.
1151 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1154 static const xmlChar *
1155 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1158 int remove_head = 0;
1159 int need_realloc = 0;
1162 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1169 while (*cur == 0x20) {
1176 if ((*cur == 0x20) || (*cur == 0)) {
1186 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1188 xmlErrMemory(ctxt, NULL);
1191 xmlAttrNormalizeSpace(ret, ret);
1192 *len = (int) strlen((const char *)ret);
1194 } else if (remove_head) {
1195 *len -= remove_head;
1196 memmove(src, src + remove_head, 1 + *len);
1204 * @ctxt: an XML parser context
1205 * @fullname: the element fullname
1206 * @fullattr: the attribute fullname
1207 * @value: the attribute value
1209 * Add a defaulted attribute for an element
1212 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1213 const xmlChar *fullname,
1214 const xmlChar *fullattr,
1215 const xmlChar *value) {
1216 xmlDefAttrsPtr defaults;
1218 const xmlChar *name;
1219 const xmlChar *prefix;
1222 * Allows to detect attribute redefinitions
1224 if (ctxt->attsSpecial != NULL) {
1225 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1229 if (ctxt->attsDefault == NULL) {
1230 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1231 if (ctxt->attsDefault == NULL)
1236 * split the element name into prefix:localname , the string found
1237 * are within the DTD and then not associated to namespace names.
1239 name = xmlSplitQName3(fullname, &len);
1241 name = xmlDictLookup(ctxt->dict, fullname, -1);
1244 name = xmlDictLookup(ctxt->dict, name, -1);
1245 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1249 * make sure there is some storage
1251 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1252 if (defaults == NULL) {
1253 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1254 (4 * 5) * sizeof(const xmlChar *));
1255 if (defaults == NULL)
1257 defaults->nbAttrs = 0;
1258 defaults->maxAttrs = 4;
1259 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1260 defaults, NULL) < 0) {
1264 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1265 xmlDefAttrsPtr temp;
1267 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1268 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1272 defaults->maxAttrs *= 2;
1273 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1274 defaults, NULL) < 0) {
1281 * Split the element name into prefix:localname , the string found
1282 * are within the DTD and hen not associated to namespace names.
1284 name = xmlSplitQName3(fullattr, &len);
1286 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1289 name = xmlDictLookup(ctxt->dict, name, -1);
1290 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1293 defaults->values[5 * defaults->nbAttrs] = name;
1294 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1295 /* intern the string and precompute the end */
1296 len = xmlStrlen(value);
1297 value = xmlDictLookup(ctxt->dict, value, len);
1298 defaults->values[5 * defaults->nbAttrs + 2] = value;
1299 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1301 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1303 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1304 defaults->nbAttrs++;
1309 xmlErrMemory(ctxt, NULL);
1314 * xmlAddSpecialAttr:
1315 * @ctxt: an XML parser context
1316 * @fullname: the element fullname
1317 * @fullattr: the attribute fullname
1318 * @type: the attribute type
1320 * Register this attribute type
1323 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1324 const xmlChar *fullname,
1325 const xmlChar *fullattr,
1328 if (ctxt->attsSpecial == NULL) {
1329 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1330 if (ctxt->attsSpecial == NULL)
1334 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1337 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1338 (void *) (long) type);
1342 xmlErrMemory(ctxt, NULL);
1347 * xmlCleanSpecialAttrCallback:
1349 * Removes CDATA attributes from the special attribute table
1352 xmlCleanSpecialAttrCallback(void *payload, void *data,
1353 const xmlChar *fullname, const xmlChar *fullattr,
1354 const xmlChar *unused ATTRIBUTE_UNUSED) {
1355 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1357 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1358 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1363 * xmlCleanSpecialAttr:
1364 * @ctxt: an XML parser context
1366 * Trim the list of attributes defined to remove all those of type
1367 * CDATA as they are not special. This call should be done when finishing
1368 * to parse the DTD and before starting to parse the document root.
1371 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1373 if (ctxt->attsSpecial == NULL)
1376 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1378 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1379 xmlHashFree(ctxt->attsSpecial, NULL);
1380 ctxt->attsSpecial = NULL;
1386 * xmlCheckLanguageID:
1387 * @lang: pointer to the string value
1389 * Checks that the value conforms to the LanguageID production:
1391 * NOTE: this is somewhat deprecated, those productions were removed from
1392 * the XML Second edition.
1394 * [33] LanguageID ::= Langcode ('-' Subcode)*
1395 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1396 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1397 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1398 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1399 * [38] Subcode ::= ([a-z] | [A-Z])+
1401 * The current REC reference the sucessors of RFC 1766, currently 5646
1403 * http://www.rfc-editor.org/rfc/rfc5646.txt
1404 * langtag = language
1410 * language = 2*3ALPHA ; shortest ISO 639 code
1411 * ["-" extlang] ; sometimes followed by
1412 * ; extended language subtags
1413 * / 4ALPHA ; or reserved for future use
1414 * / 5*8ALPHA ; or registered language subtag
1416 * extlang = 3ALPHA ; selected ISO 639 codes
1417 * *2("-" 3ALPHA) ; permanently reserved
1419 * script = 4ALPHA ; ISO 15924 code
1421 * region = 2ALPHA ; ISO 3166-1 code
1422 * / 3DIGIT ; UN M.49 code
1424 * variant = 5*8alphanum ; registered variants
1425 * / (DIGIT 3alphanum)
1427 * extension = singleton 1*("-" (2*8alphanum))
1429 * ; Single alphanumerics
1430 * ; "x" reserved for private use
1431 * singleton = DIGIT ; 0 - 9
1437 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1438 * The parser below doesn't try to cope with extension or privateuse
1439 * that could be added but that's not interoperable anyway
1441 * Returns 1 if correct 0 otherwise
1444 xmlCheckLanguageID(const xmlChar * lang)
1446 const xmlChar *cur = lang, *nxt;
1450 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1451 ((cur[0] == 'I') && (cur[1] == '-')) ||
1452 ((cur[0] == 'x') && (cur[1] == '-')) ||
1453 ((cur[0] == 'X') && (cur[1] == '-'))) {
1455 * Still allow IANA code and user code which were coming
1456 * from the previous version of the XML-1.0 specification
1457 * it's deprecated but we should not fail
1460 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1461 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1463 return(cur[0] == 0);
1466 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1467 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1469 if (nxt - cur >= 4) {
1473 if ((nxt - cur > 8) || (nxt[0] != 0))
1479 /* we got an ISO 639 code */
1487 /* now we can have extlang or script or region or variant */
1488 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1491 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1492 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1498 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1502 /* we parsed an extlang */
1510 /* now we can have script or region or variant */
1511 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1514 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1515 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1523 /* we parsed a script */
1532 /* now we can have region or variant */
1533 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1536 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1537 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1540 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1544 /* we parsed a region */
1553 /* now we can just have a variant */
1554 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1555 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1558 if ((nxt - cur < 5) || (nxt - cur > 8))
1561 /* we parsed a variant */
1567 /* extensions and private use subtags not checked */
1571 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1572 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1579 /************************************************************************
1581 * Parser stacks related functions and macros *
1583 ************************************************************************/
1585 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1586 const xmlChar ** str);
1591 * @ctxt: an XML parser context
1592 * @prefix: the namespace prefix or NULL
1593 * @URL: the namespace name
1595 * Pushes a new parser namespace on top of the ns stack
1597 * Returns -1 in case of error, -2 if the namespace should be discarded
1598 * and the index in the stack otherwise.
1601 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1603 if (ctxt->options & XML_PARSE_NSCLEAN) {
1605 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1606 if (ctxt->nsTab[i] == prefix) {
1608 if (ctxt->nsTab[i + 1] == URL)
1610 /* out of scope keep it */
1615 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1618 ctxt->nsTab = (const xmlChar **)
1619 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1620 if (ctxt->nsTab == NULL) {
1621 xmlErrMemory(ctxt, NULL);
1625 } else if (ctxt->nsNr >= ctxt->nsMax) {
1626 const xmlChar ** tmp;
1628 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1629 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1631 xmlErrMemory(ctxt, NULL);
1637 ctxt->nsTab[ctxt->nsNr++] = prefix;
1638 ctxt->nsTab[ctxt->nsNr++] = URL;
1639 return (ctxt->nsNr);
1643 * @ctxt: an XML parser context
1644 * @nr: the number to pop
1646 * Pops the top @nr parser prefix/namespace from the ns stack
1648 * Returns the number of namespaces removed
1651 nsPop(xmlParserCtxtPtr ctxt, int nr)
1655 if (ctxt->nsTab == NULL) return(0);
1656 if (ctxt->nsNr < nr) {
1657 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1660 if (ctxt->nsNr <= 0)
1663 for (i = 0;i < nr;i++) {
1665 ctxt->nsTab[ctxt->nsNr] = NULL;
1672 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1673 const xmlChar **atts;
1677 if (ctxt->atts == NULL) {
1678 maxatts = 55; /* allow for 10 attrs by default */
1679 atts = (const xmlChar **)
1680 xmlMalloc(maxatts * sizeof(xmlChar *));
1681 if (atts == NULL) goto mem_error;
1683 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1684 if (attallocs == NULL) goto mem_error;
1685 ctxt->attallocs = attallocs;
1686 ctxt->maxatts = maxatts;
1687 } else if (nr + 5 > ctxt->maxatts) {
1688 maxatts = (nr + 5) * 2;
1689 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1690 maxatts * sizeof(const xmlChar *));
1691 if (atts == NULL) goto mem_error;
1693 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1694 (maxatts / 5) * sizeof(int));
1695 if (attallocs == NULL) goto mem_error;
1696 ctxt->attallocs = attallocs;
1697 ctxt->maxatts = maxatts;
1699 return(ctxt->maxatts);
1701 xmlErrMemory(ctxt, NULL);
1707 * @ctxt: an XML parser context
1708 * @value: the parser input
1710 * Pushes a new parser input on top of the input stack
1712 * Returns -1 in case of error, the index in the stack otherwise
1715 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1717 if ((ctxt == NULL) || (value == NULL))
1719 if (ctxt->inputNr >= ctxt->inputMax) {
1720 ctxt->inputMax *= 2;
1722 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1724 sizeof(ctxt->inputTab[0]));
1725 if (ctxt->inputTab == NULL) {
1726 xmlErrMemory(ctxt, NULL);
1727 xmlFreeInputStream(value);
1728 ctxt->inputMax /= 2;
1733 ctxt->inputTab[ctxt->inputNr] = value;
1734 ctxt->input = value;
1735 return (ctxt->inputNr++);
1739 * @ctxt: an XML parser context
1741 * Pops the top parser input from the input stack
1743 * Returns the input just removed
1746 inputPop(xmlParserCtxtPtr ctxt)
1748 xmlParserInputPtr ret;
1752 if (ctxt->inputNr <= 0)
1755 if (ctxt->inputNr > 0)
1756 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1759 ret = ctxt->inputTab[ctxt->inputNr];
1760 ctxt->inputTab[ctxt->inputNr] = NULL;
1765 * @ctxt: an XML parser context
1766 * @value: the element node
1768 * Pushes a new element node on top of the node stack
1770 * Returns -1 in case of error, the index in the stack otherwise
1773 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1775 if (ctxt == NULL) return(0);
1776 if (ctxt->nodeNr >= ctxt->nodeMax) {
1779 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1781 sizeof(ctxt->nodeTab[0]));
1783 xmlErrMemory(ctxt, NULL);
1786 ctxt->nodeTab = tmp;
1789 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1790 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1791 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1792 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1794 xmlHaltParser(ctxt);
1797 ctxt->nodeTab[ctxt->nodeNr] = value;
1799 return (ctxt->nodeNr++);
1804 * @ctxt: an XML parser context
1806 * Pops the top element node from the node stack
1808 * Returns the node just removed
1811 nodePop(xmlParserCtxtPtr ctxt)
1815 if (ctxt == NULL) return(NULL);
1816 if (ctxt->nodeNr <= 0)
1819 if (ctxt->nodeNr > 0)
1820 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1823 ret = ctxt->nodeTab[ctxt->nodeNr];
1824 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1828 #ifdef LIBXML_PUSH_ENABLED
1831 * @ctxt: an XML parser context
1832 * @value: the element name
1833 * @prefix: the element prefix
1834 * @URI: the element namespace name
1836 * Pushes a new element name/prefix/URL on top of the name stack
1838 * Returns -1 in case of error, the index in the stack otherwise
1841 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1842 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1844 if (ctxt->nameNr >= ctxt->nameMax) {
1845 const xmlChar * *tmp;
1848 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1850 sizeof(ctxt->nameTab[0]));
1855 ctxt->nameTab = tmp;
1856 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1858 sizeof(ctxt->pushTab[0]));
1863 ctxt->pushTab = tmp2;
1865 ctxt->nameTab[ctxt->nameNr] = value;
1867 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1868 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1869 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1870 return (ctxt->nameNr++);
1872 xmlErrMemory(ctxt, NULL);
1877 * @ctxt: an XML parser context
1879 * Pops the top element/prefix/URI name from the name stack
1881 * Returns the name just removed
1883 static const xmlChar *
1884 nameNsPop(xmlParserCtxtPtr ctxt)
1888 if (ctxt->nameNr <= 0)
1891 if (ctxt->nameNr > 0)
1892 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1895 ret = ctxt->nameTab[ctxt->nameNr];
1896 ctxt->nameTab[ctxt->nameNr] = NULL;
1899 #endif /* LIBXML_PUSH_ENABLED */
1903 * @ctxt: an XML parser context
1904 * @value: the element name
1906 * Pushes a new element name on top of the name stack
1908 * Returns -1 in case of error, the index in the stack otherwise
1911 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1913 if (ctxt == NULL) return (-1);
1915 if (ctxt->nameNr >= ctxt->nameMax) {
1916 const xmlChar * *tmp;
1917 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1919 sizeof(ctxt->nameTab[0]));
1923 ctxt->nameTab = tmp;
1926 ctxt->nameTab[ctxt->nameNr] = value;
1928 return (ctxt->nameNr++);
1930 xmlErrMemory(ctxt, NULL);
1935 * @ctxt: an XML parser context
1937 * Pops the top element name from the name stack
1939 * Returns the name just removed
1942 namePop(xmlParserCtxtPtr ctxt)
1946 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1949 if (ctxt->nameNr > 0)
1950 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1953 ret = ctxt->nameTab[ctxt->nameNr];
1954 ctxt->nameTab[ctxt->nameNr] = NULL;
1958 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1959 if (ctxt->spaceNr >= ctxt->spaceMax) {
1962 ctxt->spaceMax *= 2;
1963 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1964 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1966 xmlErrMemory(ctxt, NULL);
1970 ctxt->spaceTab = tmp;
1972 ctxt->spaceTab[ctxt->spaceNr] = val;
1973 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1974 return(ctxt->spaceNr++);
1977 static int spacePop(xmlParserCtxtPtr ctxt) {
1979 if (ctxt->spaceNr <= 0) return(0);
1981 if (ctxt->spaceNr > 0)
1982 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1984 ctxt->space = &ctxt->spaceTab[0];
1985 ret = ctxt->spaceTab[ctxt->spaceNr];
1986 ctxt->spaceTab[ctxt->spaceNr] = -1;
1991 * Macros for accessing the content. Those should be used only by the parser,
1994 * Dirty macros, i.e. one often need to make assumption on the context to
1997 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1998 * To be used with extreme caution since operations consuming
1999 * characters may move the input buffer to a different location !
2000 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2001 * This should be used internally by the parser
2002 * only to compare to ASCII values otherwise it would break when
2003 * running with UTF-8 encoding.
2004 * RAW same as CUR but in the input buffer, bypass any token
2005 * extraction that may have been done
2006 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2007 * to compare on ASCII based substring.
2008 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2009 * strings without newlines within the parser.
2010 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2011 * defined char within the parser.
2012 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2014 * NEXT Skip to the next character, this does the proper decoding
2015 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2016 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2017 * CUR_CHAR(l) returns the current unicode character (int), set l
2018 * to the number of xmlChars used for the encoding [0-5].
2019 * CUR_SCHAR same but operate on a string instead of the context
2020 * COPY_BUF copy the current unicode char to the target buffer, increment
2022 * GROW, SHRINK handling of input buffers
2025 #define RAW (*ctxt->input->cur)
2026 #define CUR (*ctxt->input->cur)
2027 #define NXT(val) ctxt->input->cur[(val)]
2028 #define CUR_PTR ctxt->input->cur
2029 #define BASE_PTR ctxt->input->base
2031 #define CMP4( s, c1, c2, c3, c4 ) \
2032 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2033 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2034 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2035 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2036 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2037 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2038 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2039 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2040 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2041 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2042 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2043 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2044 ((unsigned char *) s)[ 8 ] == c9 )
2045 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2046 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2047 ((unsigned char *) s)[ 9 ] == c10 )
2049 #define SKIP(val) do { \
2050 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2051 if (*ctxt->input->cur == 0) \
2052 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2055 #define SKIPL(val) do { \
2057 for(skipl=0; skipl<val; skipl++) { \
2058 if (*(ctxt->input->cur) == '\n') { \
2059 ctxt->input->line++; ctxt->input->col = 1; \
2060 } else ctxt->input->col++; \
2062 ctxt->input->cur++; \
2064 if (*ctxt->input->cur == 0) \
2065 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2068 #define SHRINK if ((ctxt->progressive == 0) && \
2069 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2070 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2073 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2074 xmlParserInputShrink(ctxt->input);
2075 if (*ctxt->input->cur == 0)
2076 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2079 #define GROW if ((ctxt->progressive == 0) && \
2080 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2083 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2084 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2085 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2087 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2088 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2089 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2090 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2091 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2092 xmlHaltParser(ctxt);
2095 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2096 if ((ctxt->input->cur > ctxt->input->end) ||
2097 (ctxt->input->cur < ctxt->input->base)) {
2098 xmlHaltParser(ctxt);
2099 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2102 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2103 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2106 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2108 #define NEXT xmlNextChar(ctxt)
2111 ctxt->input->col++; \
2112 ctxt->input->cur++; \
2114 if (*ctxt->input->cur == 0) \
2115 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2118 #define NEXTL(l) do { \
2119 if (*(ctxt->input->cur) == '\n') { \
2120 ctxt->input->line++; ctxt->input->col = 1; \
2121 } else ctxt->input->col++; \
2122 ctxt->input->cur += l; \
2125 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2126 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2128 #define COPY_BUF(l,b,i,v) \
2129 if (l == 1) b[i++] = (xmlChar) v; \
2130 else i += xmlCopyCharMultiByte(&b[i],v)
2133 * xmlSkipBlankChars:
2134 * @ctxt: the XML parser context
2136 * skip all blanks character found at that point in the input streams.
2137 * It pops up finished entities in the process if allowable at that point.
2139 * Returns the number of space chars skipped
2143 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2147 * It's Okay to use CUR/NEXT here since all the blanks are on
2150 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2153 * if we are in the document content, go really fast
2155 cur = ctxt->input->cur;
2156 while (IS_BLANK_CH(*cur)) {
2158 ctxt->input->line++; ctxt->input->col = 1;
2165 ctxt->input->cur = cur;
2166 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2167 cur = ctxt->input->cur;
2170 ctxt->input->cur = cur;
2172 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2175 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2177 } else if (CUR == '%') {
2179 * Need to handle support of entities branching here
2181 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2183 xmlParsePEReference(ctxt);
2184 } else if (CUR == 0) {
2185 if (ctxt->inputNr <= 1)
2193 * Also increase the counter when entering or exiting a PERef.
2194 * The spec says: "When a parameter-entity reference is recognized
2195 * in the DTD and included, its replacement text MUST be enlarged
2196 * by the attachment of one leading and one following space (#x20)
2205 /************************************************************************
2207 * Commodity functions to handle entities *
2209 ************************************************************************/
2213 * @ctxt: an XML parser context
2215 * xmlPopInput: the current input pointed by ctxt->input came to an end
2216 * pop it and return the next char.
2218 * Returns the current xmlChar in the parser context
2221 xmlPopInput(xmlParserCtxtPtr ctxt) {
2222 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2223 if (xmlParserDebugEntities)
2224 xmlGenericError(xmlGenericErrorContext,
2225 "Popping input %d\n", ctxt->inputNr);
2226 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2227 (ctxt->instate != XML_PARSER_EOF))
2228 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2229 "Unfinished entity outside the DTD");
2230 xmlFreeInputStream(inputPop(ctxt));
2231 if (*ctxt->input->cur == 0)
2232 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2238 * @ctxt: an XML parser context
2239 * @input: an XML parser input fragment (entity, XML fragment ...).
2241 * xmlPushInput: switch to a new input stream which is stacked on top
2242 * of the previous one(s).
2243 * Returns -1 in case of error or the index in the input stack
2246 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2248 if (input == NULL) return(-1);
2250 if (xmlParserDebugEntities) {
2251 if ((ctxt->input != NULL) && (ctxt->input->filename))
2252 xmlGenericError(xmlGenericErrorContext,
2253 "%s(%d): ", ctxt->input->filename,
2255 xmlGenericError(xmlGenericErrorContext,
2256 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2258 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2259 (ctxt->inputNr > 1024)) {
2260 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2261 while (ctxt->inputNr > 1)
2262 xmlFreeInputStream(inputPop(ctxt));
2265 ret = inputPush(ctxt, input);
2266 if (ctxt->instate == XML_PARSER_EOF)
2274 * @ctxt: an XML parser context
2276 * parse Reference declarations
2278 * [66] CharRef ::= '&#' [0-9]+ ';' |
2279 * '&#x' [0-9a-fA-F]+ ';'
2281 * [ WFC: Legal Character ]
2282 * Characters referred to using character references must match the
2283 * production for Char.
2285 * Returns the value parsed (as an int), 0 in case of error
2288 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2289 unsigned int val = 0;
2291 unsigned int outofrange = 0;
2294 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2296 if ((RAW == '&') && (NXT(1) == '#') &&
2300 while (RAW != ';') { /* loop blocked by count */
2304 if (ctxt->instate == XML_PARSER_EOF)
2307 if ((RAW >= '0') && (RAW <= '9'))
2308 val = val * 16 + (CUR - '0');
2309 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2310 val = val * 16 + (CUR - 'a') + 10;
2311 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2312 val = val * 16 + (CUR - 'A') + 10;
2314 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2325 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2330 } else if ((RAW == '&') && (NXT(1) == '#')) {
2333 while (RAW != ';') { /* loop blocked by count */
2337 if (ctxt->instate == XML_PARSER_EOF)
2340 if ((RAW >= '0') && (RAW <= '9'))
2341 val = val * 10 + (CUR - '0');
2343 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2354 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2360 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2364 * [ WFC: Legal Character ]
2365 * Characters referred to using character references must match the
2366 * production for Char.
2368 if ((IS_CHAR(val) && (outofrange == 0))) {
2371 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2372 "xmlParseCharRef: invalid xmlChar value %d\n",
2379 * xmlParseStringCharRef:
2380 * @ctxt: an XML parser context
2381 * @str: a pointer to an index in the string
2383 * parse Reference declarations, variant parsing from a string rather
2384 * than an an input flow.
2386 * [66] CharRef ::= '&#' [0-9]+ ';' |
2387 * '&#x' [0-9a-fA-F]+ ';'
2389 * [ WFC: Legal Character ]
2390 * Characters referred to using character references must match the
2391 * production for Char.
2393 * Returns the value parsed (as an int), 0 in case of error, str will be
2394 * updated to the current value of the index
2397 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2400 unsigned int val = 0;
2401 unsigned int outofrange = 0;
2403 if ((str == NULL) || (*str == NULL)) return(0);
2406 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2409 while (cur != ';') { /* Non input consuming loop */
2410 if ((cur >= '0') && (cur <= '9'))
2411 val = val * 16 + (cur - '0');
2412 else if ((cur >= 'a') && (cur <= 'f'))
2413 val = val * 16 + (cur - 'a') + 10;
2414 else if ((cur >= 'A') && (cur <= 'F'))
2415 val = val * 16 + (cur - 'A') + 10;
2417 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2429 } else if ((cur == '&') && (ptr[1] == '#')){
2432 while (cur != ';') { /* Non input consuming loops */
2433 if ((cur >= '0') && (cur <= '9'))
2434 val = val * 10 + (cur - '0');
2436 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2449 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2455 * [ WFC: Legal Character ]
2456 * Characters referred to using character references must match the
2457 * production for Char.
2459 if ((IS_CHAR(val) && (outofrange == 0))) {
2462 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2463 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2470 * xmlParserHandlePEReference:
2471 * @ctxt: the parser context
2473 * [69] PEReference ::= '%' Name ';'
2475 * [ WFC: No Recursion ]
2476 * A parsed entity must not contain a recursive
2477 * reference to itself, either directly or indirectly.
2479 * [ WFC: Entity Declared ]
2480 * In a document without any DTD, a document with only an internal DTD
2481 * subset which contains no parameter entity references, or a document
2482 * with "standalone='yes'", ... ... The declaration of a parameter
2483 * entity must precede any reference to it...
2485 * [ VC: Entity Declared ]
2486 * In a document with an external subset or external parameter entities
2487 * with "standalone='no'", ... ... The declaration of a parameter entity
2488 * must precede any reference to it...
2491 * Parameter-entity references may only appear in the DTD.
2492 * NOTE: misleading but this is handled.
2494 * A PEReference may have been detected in the current input stream
2495 * the handling is done accordingly to
2496 * http://www.w3.org/TR/REC-xml#entproc
2498 * - Included in literal in entity values
2499 * - Included as Parameter Entity reference within DTDs
2502 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2503 switch(ctxt->instate) {
2504 case XML_PARSER_CDATA_SECTION:
2506 case XML_PARSER_COMMENT:
2508 case XML_PARSER_START_TAG:
2510 case XML_PARSER_END_TAG:
2512 case XML_PARSER_EOF:
2513 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2515 case XML_PARSER_PROLOG:
2516 case XML_PARSER_START:
2517 case XML_PARSER_MISC:
2518 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2520 case XML_PARSER_ENTITY_DECL:
2521 case XML_PARSER_CONTENT:
2522 case XML_PARSER_ATTRIBUTE_VALUE:
2524 case XML_PARSER_SYSTEM_LITERAL:
2525 case XML_PARSER_PUBLIC_LITERAL:
2526 /* we just ignore it there */
2528 case XML_PARSER_EPILOG:
2529 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2531 case XML_PARSER_ENTITY_VALUE:
2533 * NOTE: in the case of entity values, we don't do the
2534 * substitution here since we need the literal
2535 * entity value to be able to save the internal
2536 * subset of the document.
2537 * This will be handled by xmlStringDecodeEntities
2540 case XML_PARSER_DTD:
2542 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2543 * In the internal DTD subset, parameter-entity references
2544 * can occur only where markup declarations can occur, not
2545 * within markup declarations.
2546 * In that case this is handled in xmlParseMarkupDecl
2548 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2550 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2553 case XML_PARSER_IGNORE:
2557 xmlParsePEReference(ctxt);
2561 * Macro used to grow the current buffer.
2562 * buffer##_size is expected to be a size_t
2563 * mem_error: is expected to handle memory allocation failures
2565 #define growBuffer(buffer, n) { \
2567 size_t new_size = buffer##_size * 2 + n; \
2568 if (new_size < buffer##_size) goto mem_error; \
2569 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2570 if (tmp == NULL) goto mem_error; \
2572 buffer##_size = new_size; \
2576 * xmlStringLenDecodeEntities:
2577 * @ctxt: the parser context
2578 * @str: the input string
2579 * @len: the string length
2580 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2581 * @end: an end marker xmlChar, 0 if none
2582 * @end2: an end marker xmlChar, 0 if none
2583 * @end3: an end marker xmlChar, 0 if none
2585 * Takes a entity string content and process to do the adequate substitutions.
2587 * [67] Reference ::= EntityRef | CharRef
2589 * [69] PEReference ::= '%' Name ';'
2591 * Returns A newly allocated string with the substitution done. The caller
2592 * must deallocate it !
2595 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2596 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2597 xmlChar *buffer = NULL;
2598 size_t buffer_size = 0;
2601 xmlChar *current = NULL;
2602 xmlChar *rep = NULL;
2603 const xmlChar *last;
2607 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2611 if (((ctxt->depth > 40) &&
2612 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2613 (ctxt->depth > 1024)) {
2614 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2619 * allocate a translation buffer.
2621 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2622 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2623 if (buffer == NULL) goto mem_error;
2626 * OK loop until we reach one of the ending char or a size limit.
2627 * we are operating on already parsed values.
2630 c = CUR_SCHAR(str, l);
2633 while ((c != 0) && (c != end) && /* non input consuming loop */
2634 (c != end2) && (c != end3)) {
2637 if ((c == '&') && (str[1] == '#')) {
2638 int val = xmlParseStringCharRef(ctxt, &str);
2641 COPY_BUF(0,buffer,nbchars,val);
2642 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2643 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2645 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2646 if (xmlParserDebugEntities)
2647 xmlGenericError(xmlGenericErrorContext,
2648 "String decoding Entity Reference: %.30s\n",
2650 ent = xmlParseStringEntityRef(ctxt, &str);
2651 xmlParserEntityCheck(ctxt, 0, ent, 0);
2653 ctxt->nbentities += ent->checked / 2;
2654 if ((ent != NULL) &&
2655 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2656 if (ent->content != NULL) {
2657 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2658 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2659 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2662 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2663 "predefined entity has no content\n");
2666 } else if ((ent != NULL) && (ent->content != NULL)) {
2668 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2675 while (*current != 0) { /* non input consuming loop */
2676 buffer[nbchars++] = *current++;
2677 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2680 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2685 } else if (ent != NULL) {
2686 int i = xmlStrlen(ent->name);
2687 const xmlChar *cur = ent->name;
2689 buffer[nbchars++] = '&';
2690 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2691 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2694 buffer[nbchars++] = *cur++;
2695 buffer[nbchars++] = ';';
2697 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2698 if (xmlParserDebugEntities)
2699 xmlGenericError(xmlGenericErrorContext,
2700 "String decoding PE Reference: %.30s\n", str);
2701 ent = xmlParseStringPEReference(ctxt, &str);
2702 xmlParserEntityCheck(ctxt, 0, ent, 0);
2704 ctxt->nbentities += ent->checked / 2;
2706 if (ent->content == NULL) {
2708 * Note: external parsed entities will not be loaded,
2709 * it is not required for a non-validating parser to
2710 * complete external PEreferences coming from the
2713 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2714 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2715 (ctxt->validate != 0)) {
2716 xmlLoadEntityContent(ctxt, ent);
2718 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2719 "not validating will not read content for PE entity %s\n",
2724 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2730 while (*current != 0) { /* non input consuming loop */
2731 buffer[nbchars++] = *current++;
2732 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2733 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2735 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2742 COPY_BUF(l,buffer,nbchars,c);
2744 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2745 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2749 c = CUR_SCHAR(str, l);
2753 buffer[nbchars] = 0;
2757 xmlErrMemory(ctxt, NULL);
2767 * xmlStringDecodeEntities:
2768 * @ctxt: the parser context
2769 * @str: the input string
2770 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2771 * @end: an end marker xmlChar, 0 if none
2772 * @end2: an end marker xmlChar, 0 if none
2773 * @end3: an end marker xmlChar, 0 if none
2775 * Takes a entity string content and process to do the adequate substitutions.
2777 * [67] Reference ::= EntityRef | CharRef
2779 * [69] PEReference ::= '%' Name ';'
2781 * Returns A newly allocated string with the substitution done. The caller
2782 * must deallocate it !
2785 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2786 xmlChar end, xmlChar end2, xmlChar end3) {
2787 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2788 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2792 /************************************************************************
2794 * Commodity functions, cleanup needed ? *
2796 ************************************************************************/
2800 * @ctxt: an XML parser context
2802 * @len: the size of @str
2803 * @blank_chars: we know the chars are blanks
2805 * Is this a sequence of blank chars that one can ignore ?
2807 * Returns 1 if ignorable 0 otherwise.
2810 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2813 xmlNodePtr lastChild;
2816 * Don't spend time trying to differentiate them, the same callback is
2819 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2823 * Check for xml:space value.
2825 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2826 (*(ctxt->space) == -2))
2830 * Check that the string is made of blanks
2832 if (blank_chars == 0) {
2833 for (i = 0;i < len;i++)
2834 if (!(IS_BLANK_CH(str[i]))) return(0);
2838 * Look if the element is mixed content in the DTD if available
2840 if (ctxt->node == NULL) return(0);
2841 if (ctxt->myDoc != NULL) {
2842 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2843 if (ret == 0) return(1);
2844 if (ret == 1) return(0);
2848 * Otherwise, heuristic :-\
2850 if ((RAW != '<') && (RAW != 0xD)) return(0);
2851 if ((ctxt->node->children == NULL) &&
2852 (RAW == '<') && (NXT(1) == '/')) return(0);
2854 lastChild = xmlGetLastChild(ctxt->node);
2855 if (lastChild == NULL) {
2856 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2857 (ctxt->node->content != NULL)) return(0);
2858 } else if (xmlNodeIsText(lastChild))
2860 else if ((ctxt->node->children != NULL) &&
2861 (xmlNodeIsText(ctxt->node->children)))
2866 /************************************************************************
2868 * Extra stuff for namespace support *
2869 * Relates to http://www.w3.org/TR/WD-xml-names *
2871 ************************************************************************/
2875 * @ctxt: an XML parser context
2876 * @name: an XML parser context
2877 * @prefix: a xmlChar **
2879 * parse an UTF8 encoded XML qualified name string
2881 * [NS 5] QName ::= (Prefix ':')? LocalPart
2883 * [NS 6] Prefix ::= NCName
2885 * [NS 7] LocalPart ::= NCName
2887 * Returns the local part, and prefix is updated
2888 * to get the Prefix if any.
2892 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2893 xmlChar buf[XML_MAX_NAMELEN + 5];
2894 xmlChar *buffer = NULL;
2896 int max = XML_MAX_NAMELEN;
2897 xmlChar *ret = NULL;
2898 const xmlChar *cur = name;
2901 if (prefix == NULL) return(NULL);
2904 if (cur == NULL) return(NULL);
2906 #ifndef XML_XML_NAMESPACE
2907 /* xml: prefix is not really a namespace */
2908 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2909 (cur[2] == 'l') && (cur[3] == ':'))
2910 return(xmlStrdup(name));
2913 /* nasty but well=formed */
2915 return(xmlStrdup(name));
2918 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2924 * Okay someone managed to make a huge name, so he's ready to pay
2925 * for the processing speed.
2929 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2930 if (buffer == NULL) {
2931 xmlErrMemory(ctxt, NULL);
2934 memcpy(buffer, buf, len);
2935 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2936 if (len + 10 > max) {
2940 tmp = (xmlChar *) xmlRealloc(buffer,
2941 max * sizeof(xmlChar));
2944 xmlErrMemory(ctxt, NULL);
2955 if ((c == ':') && (*cur == 0)) {
2959 return(xmlStrdup(name));
2963 ret = xmlStrndup(buf, len);
2967 max = XML_MAX_NAMELEN;
2975 return(xmlStrndup(BAD_CAST "", 0));
2980 * Check that the first character is proper to start
2983 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2984 ((c >= 0x41) && (c <= 0x5A)) ||
2985 (c == '_') || (c == ':'))) {
2987 int first = CUR_SCHAR(cur, l);
2989 if (!IS_LETTER(first) && (first != '_')) {
2990 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2991 "Name %s is not XML Namespace compliant\n",
2997 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3003 * Okay someone managed to make a huge name, so he's ready to pay
3004 * for the processing speed.
3008 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3009 if (buffer == NULL) {
3010 xmlErrMemory(ctxt, NULL);
3013 memcpy(buffer, buf, len);
3014 while (c != 0) { /* tested bigname2.xml */
3015 if (len + 10 > max) {
3019 tmp = (xmlChar *) xmlRealloc(buffer,
3020 max * sizeof(xmlChar));
3022 xmlErrMemory(ctxt, NULL);
3035 ret = xmlStrndup(buf, len);
3044 /************************************************************************
3046 * The parser itself *
3047 * Relates to http://www.w3.org/TR/REC-xml *
3049 ************************************************************************/
3051 /************************************************************************
3053 * Routines to parse Name, NCName and NmToken *
3055 ************************************************************************/
3057 static unsigned long nbParseName = 0;
3058 static unsigned long nbParseNmToken = 0;
3059 static unsigned long nbParseNCName = 0;
3060 static unsigned long nbParseNCNameComplex = 0;
3061 static unsigned long nbParseNameComplex = 0;
3062 static unsigned long nbParseStringName = 0;
3066 * The two following functions are related to the change of accepted
3067 * characters for Name and NmToken in the Revision 5 of XML-1.0
3068 * They correspond to the modified production [4] and the new production [4a]
3069 * changes in that revision. Also note that the macros used for the
3070 * productions Letter, Digit, CombiningChar and Extender are not needed
3072 * We still keep compatibility to pre-revision5 parsing semantic if the
3073 * new XML_PARSE_OLD10 option is given to the parser.
3076 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3077 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3079 * Use the new checks of production [4] [4a] amd [5] of the
3080 * Update 5 of XML-1.0
3082 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3083 (((c >= 'a') && (c <= 'z')) ||
3084 ((c >= 'A') && (c <= 'Z')) ||
3085 (c == '_') || (c == ':') ||
3086 ((c >= 0xC0) && (c <= 0xD6)) ||
3087 ((c >= 0xD8) && (c <= 0xF6)) ||
3088 ((c >= 0xF8) && (c <= 0x2FF)) ||
3089 ((c >= 0x370) && (c <= 0x37D)) ||
3090 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3091 ((c >= 0x200C) && (c <= 0x200D)) ||
3092 ((c >= 0x2070) && (c <= 0x218F)) ||
3093 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3094 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3095 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3096 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3097 ((c >= 0x10000) && (c <= 0xEFFFF))))
3100 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3107 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3108 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3110 * Use the new checks of production [4] [4a] amd [5] of the
3111 * Update 5 of XML-1.0
3113 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3114 (((c >= 'a') && (c <= 'z')) ||
3115 ((c >= 'A') && (c <= 'Z')) ||
3116 ((c >= '0') && (c <= '9')) || /* !start */
3117 (c == '_') || (c == ':') ||
3118 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3119 ((c >= 0xC0) && (c <= 0xD6)) ||
3120 ((c >= 0xD8) && (c <= 0xF6)) ||
3121 ((c >= 0xF8) && (c <= 0x2FF)) ||
3122 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3123 ((c >= 0x370) && (c <= 0x37D)) ||
3124 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3125 ((c >= 0x200C) && (c <= 0x200D)) ||
3126 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3127 ((c >= 0x2070) && (c <= 0x218F)) ||
3128 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3129 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3130 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3131 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3132 ((c >= 0x10000) && (c <= 0xEFFFF))))
3135 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3136 (c == '.') || (c == '-') ||
3137 (c == '_') || (c == ':') ||
3138 (IS_COMBINING(c)) ||
3145 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3146 int *len, int *alloc, int normalize);
3148 static const xmlChar *
3149 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3155 nbParseNameComplex++;
3159 * Handler for more complex cases
3162 if (ctxt->instate == XML_PARSER_EOF)
3165 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3167 * Use the new checks of production [4] [4a] amd [5] of the
3168 * Update 5 of XML-1.0
3170 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3171 (!(((c >= 'a') && (c <= 'z')) ||
3172 ((c >= 'A') && (c <= 'Z')) ||
3173 (c == '_') || (c == ':') ||
3174 ((c >= 0xC0) && (c <= 0xD6)) ||
3175 ((c >= 0xD8) && (c <= 0xF6)) ||
3176 ((c >= 0xF8) && (c <= 0x2FF)) ||
3177 ((c >= 0x370) && (c <= 0x37D)) ||
3178 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3179 ((c >= 0x200C) && (c <= 0x200D)) ||
3180 ((c >= 0x2070) && (c <= 0x218F)) ||
3181 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3182 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3183 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3184 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3185 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3191 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3192 (((c >= 'a') && (c <= 'z')) ||
3193 ((c >= 'A') && (c <= 'Z')) ||
3194 ((c >= '0') && (c <= '9')) || /* !start */
3195 (c == '_') || (c == ':') ||
3196 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3197 ((c >= 0xC0) && (c <= 0xD6)) ||
3198 ((c >= 0xD8) && (c <= 0xF6)) ||
3199 ((c >= 0xF8) && (c <= 0x2FF)) ||
3200 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3201 ((c >= 0x370) && (c <= 0x37D)) ||
3202 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3203 ((c >= 0x200C) && (c <= 0x200D)) ||
3204 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3205 ((c >= 0x2070) && (c <= 0x218F)) ||
3206 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3207 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3208 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3209 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3210 ((c >= 0x10000) && (c <= 0xEFFFF))
3212 if (count++ > XML_PARSER_CHUNK_SIZE) {
3215 if (ctxt->instate == XML_PARSER_EOF)
3223 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3224 (!IS_LETTER(c) && (c != '_') &&
3232 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3233 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3234 (c == '.') || (c == '-') ||
3235 (c == '_') || (c == ':') ||
3236 (IS_COMBINING(c)) ||
3237 (IS_EXTENDER(c)))) {
3238 if (count++ > XML_PARSER_CHUNK_SIZE) {
3241 if (ctxt->instate == XML_PARSER_EOF)
3249 if ((len > XML_MAX_NAME_LENGTH) &&
3250 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3251 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3254 if (ctxt->input->cur - ctxt->input->base < len) {
3256 * There were a couple of bugs where PERefs lead to to a change
3257 * of the buffer. Check the buffer size to avoid passing an invalid
3258 * pointer to xmlDictLookup.
3260 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3261 "unexpected change of input buffer");
3264 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3265 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3266 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3271 * @ctxt: an XML parser context
3273 * parse an XML name.
3275 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3276 * CombiningChar | Extender
3278 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3280 * [6] Names ::= Name (#x20 Name)*
3282 * Returns the Name parsed or NULL
3286 xmlParseName(xmlParserCtxtPtr ctxt) {
3298 * Accelerator for simple ASCII names
3300 in = ctxt->input->cur;
3301 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3302 ((*in >= 0x41) && (*in <= 0x5A)) ||
3303 (*in == '_') || (*in == ':')) {
3305 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3306 ((*in >= 0x41) && (*in <= 0x5A)) ||
3307 ((*in >= 0x30) && (*in <= 0x39)) ||
3308 (*in == '_') || (*in == '-') ||
3309 (*in == ':') || (*in == '.'))
3311 if ((*in > 0) && (*in < 0x80)) {
3312 count = in - ctxt->input->cur;
3313 if ((count > XML_MAX_NAME_LENGTH) &&
3314 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3315 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3318 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3319 ctxt->input->cur = in;
3320 ctxt->nbChars += count;
3321 ctxt->input->col += count;
3323 xmlErrMemory(ctxt, NULL);
3327 /* accelerator for special cases */
3328 return(xmlParseNameComplex(ctxt));
3331 static const xmlChar *
3332 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3336 size_t startPosition = 0;
3339 nbParseNCNameComplex++;
3343 * Handler for more complex cases
3346 startPosition = CUR_PTR - BASE_PTR;
3348 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3349 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3353 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3354 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3355 if (count++ > XML_PARSER_CHUNK_SIZE) {
3356 if ((len > XML_MAX_NAME_LENGTH) &&
3357 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3358 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3363 if (ctxt->instate == XML_PARSER_EOF)
3372 * when shrinking to extend the buffer we really need to preserve
3373 * the part of the name we already parsed. Hence rolling back
3374 * by current lenght.
3376 ctxt->input->cur -= l;
3378 ctxt->input->cur += l;
3379 if (ctxt->instate == XML_PARSER_EOF)
3384 if ((len > XML_MAX_NAME_LENGTH) &&
3385 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3386 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3389 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3394 * @ctxt: an XML parser context
3395 * @len: length of the string parsed
3397 * parse an XML name.
3399 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3400 * CombiningChar | Extender
3402 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3404 * Returns the Name parsed or NULL
3407 static const xmlChar *
3408 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3409 const xmlChar *in, *e;
3418 * Accelerator for simple ASCII names
3420 in = ctxt->input->cur;
3421 e = ctxt->input->end;
3422 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3423 ((*in >= 0x41) && (*in <= 0x5A)) ||
3424 (*in == '_')) && (in < e)) {
3426 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3427 ((*in >= 0x41) && (*in <= 0x5A)) ||
3428 ((*in >= 0x30) && (*in <= 0x39)) ||
3429 (*in == '_') || (*in == '-') ||
3430 (*in == '.')) && (in < e))
3434 if ((*in > 0) && (*in < 0x80)) {
3435 count = in - ctxt->input->cur;
3436 if ((count > XML_MAX_NAME_LENGTH) &&
3437 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3438 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3441 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3442 ctxt->input->cur = in;
3443 ctxt->nbChars += count;
3444 ctxt->input->col += count;
3446 xmlErrMemory(ctxt, NULL);
3452 return(xmlParseNCNameComplex(ctxt));
3456 * xmlParseNameAndCompare:
3457 * @ctxt: an XML parser context
3459 * parse an XML name and compares for match
3460 * (specialized for endtag parsing)
3462 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3463 * and the name for mismatch
3466 static const xmlChar *
3467 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3468 register const xmlChar *cmp = other;
3469 register const xmlChar *in;
3473 if (ctxt->instate == XML_PARSER_EOF)
3476 in = ctxt->input->cur;
3477 while (*in != 0 && *in == *cmp) {
3482 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3484 ctxt->input->cur = in;
3485 return (const xmlChar*) 1;
3487 /* failure (or end of input buffer), check with full function */
3488 ret = xmlParseName (ctxt);
3489 /* strings coming from the dictionary direct compare possible */
3491 return (const xmlChar*) 1;
3497 * xmlParseStringName:
3498 * @ctxt: an XML parser context
3499 * @str: a pointer to the string pointer (IN/OUT)
3501 * parse an XML name.
3503 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3504 * CombiningChar | Extender
3506 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3508 * [6] Names ::= Name (#x20 Name)*
3510 * Returns the Name parsed or NULL. The @str pointer
3511 * is updated to the current location in the string.
3515 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3516 xmlChar buf[XML_MAX_NAMELEN + 5];
3517 const xmlChar *cur = *str;
3522 nbParseStringName++;
3525 c = CUR_SCHAR(cur, l);
3526 if (!xmlIsNameStartChar(ctxt, c)) {
3530 COPY_BUF(l,buf,len,c);
3532 c = CUR_SCHAR(cur, l);
3533 while (xmlIsNameChar(ctxt, c)) {
3534 COPY_BUF(l,buf,len,c);
3536 c = CUR_SCHAR(cur, l);
3537 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3539 * Okay someone managed to make a huge name, so he's ready to pay
3540 * for the processing speed.
3545 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3546 if (buffer == NULL) {
3547 xmlErrMemory(ctxt, NULL);
3550 memcpy(buffer, buf, len);
3551 while (xmlIsNameChar(ctxt, c)) {
3552 if (len + 10 > max) {
3555 if ((len > XML_MAX_NAME_LENGTH) &&
3556 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3557 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3562 tmp = (xmlChar *) xmlRealloc(buffer,
3563 max * sizeof(xmlChar));
3565 xmlErrMemory(ctxt, NULL);
3571 COPY_BUF(l,buffer,len,c);
3573 c = CUR_SCHAR(cur, l);
3580 if ((len > XML_MAX_NAME_LENGTH) &&
3581 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3582 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3586 return(xmlStrndup(buf, len));
3591 * @ctxt: an XML parser context
3593 * parse an XML Nmtoken.
3595 * [7] Nmtoken ::= (NameChar)+
3597 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3599 * Returns the Nmtoken parsed or NULL
3603 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3604 xmlChar buf[XML_MAX_NAMELEN + 5];
3614 if (ctxt->instate == XML_PARSER_EOF)
3618 while (xmlIsNameChar(ctxt, c)) {
3619 if (count++ > XML_PARSER_CHUNK_SIZE) {
3623 COPY_BUF(l,buf,len,c);
3629 if (ctxt->instate == XML_PARSER_EOF)
3633 if (len >= XML_MAX_NAMELEN) {
3635 * Okay someone managed to make a huge token, so he's ready to pay
3636 * for the processing speed.
3641 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3642 if (buffer == NULL) {
3643 xmlErrMemory(ctxt, NULL);
3646 memcpy(buffer, buf, len);
3647 while (xmlIsNameChar(ctxt, c)) {
3648 if (count++ > XML_PARSER_CHUNK_SIZE) {
3651 if (ctxt->instate == XML_PARSER_EOF) {
3656 if (len + 10 > max) {
3659 if ((max > XML_MAX_NAME_LENGTH) &&
3660 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3661 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3666 tmp = (xmlChar *) xmlRealloc(buffer,
3667 max * sizeof(xmlChar));
3669 xmlErrMemory(ctxt, NULL);
3675 COPY_BUF(l,buffer,len,c);
3685 if ((len > XML_MAX_NAME_LENGTH) &&
3686 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3687 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3690 return(xmlStrndup(buf, len));
3694 * xmlParseEntityValue:
3695 * @ctxt: an XML parser context
3696 * @orig: if non-NULL store a copy of the original entity value
3698 * parse a value for ENTITY declarations
3700 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3701 * "'" ([^%&'] | PEReference | Reference)* "'"
3703 * Returns the EntityValue parsed with reference substituted or NULL
3707 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3708 xmlChar *buf = NULL;
3710 int size = XML_PARSER_BUFFER_SIZE;
3713 xmlChar *ret = NULL;
3714 const xmlChar *cur = NULL;
3715 xmlParserInputPtr input;
3717 if (RAW == '"') stop = '"';
3718 else if (RAW == '\'') stop = '\'';
3720 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3723 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3725 xmlErrMemory(ctxt, NULL);
3730 * The content of the entity definition is copied in a buffer.
3733 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3734 input = ctxt->input;
3736 if (ctxt->instate == XML_PARSER_EOF)
3741 * NOTE: 4.4.5 Included in Literal
3742 * When a parameter entity reference appears in a literal entity
3743 * value, ... a single or double quote character in the replacement
3744 * text is always treated as a normal data character and will not
3745 * terminate the literal.
3746 * In practice it means we stop the loop only when back at parsing
3747 * the initial entity and the quote is found
3749 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3750 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3751 if (len + 5 >= size) {
3755 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3757 xmlErrMemory(ctxt, NULL);
3762 COPY_BUF(l,buf,len,c);
3773 if (ctxt->instate == XML_PARSER_EOF)
3776 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3782 * Raise problem w.r.t. '&' and '%' being used in non-entities
3783 * reference constructs. Note Charref will be handled in
3784 * xmlStringDecodeEntities()
3787 while (*cur != 0) { /* non input consuming */
3788 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3794 name = xmlParseStringName(ctxt, &cur);
3799 if ((nameOk == 0) || (*cur != ';')) {
3800 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3801 "EntityValue: '%c' forbidden except for entities references\n",
3805 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3806 (ctxt->inputNr == 1)) {
3807 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3817 * Then PEReference entities are substituted.
3819 * NOTE: 4.4.7 Bypassed
3820 * When a general entity reference appears in the EntityValue in
3821 * an entity declaration, it is bypassed and left as is.
3822 * so XML_SUBSTITUTE_REF is not set here.
3825 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3840 * xmlParseAttValueComplex:
3841 * @ctxt: an XML parser context
3842 * @len: the resulting attribute len
3843 * @normalize: wether to apply the inner normalization
3845 * parse a value for an attribute, this is the fallback function
3846 * of xmlParseAttValue() when the attribute parsing requires handling
3847 * of non-ASCII characters, or normalization compaction.
3849 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3852 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3854 xmlChar *buf = NULL;
3855 xmlChar *rep = NULL;
3857 size_t buf_size = 0;
3858 int c, l, in_space = 0;
3859 xmlChar *current = NULL;
3862 if (NXT(0) == '"') {
3863 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3866 } else if (NXT(0) == '\'') {
3868 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3871 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3876 * allocate a translation buffer.
3878 buf_size = XML_PARSER_BUFFER_SIZE;
3879 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3880 if (buf == NULL) goto mem_error;
3883 * OK loop until we reach one of the ending char or a size limit.
3886 while (((NXT(0) != limit) && /* checked */
3887 (IS_CHAR(c)) && (c != '<')) &&
3888 (ctxt->instate != XML_PARSER_EOF)) {
3890 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3891 * special option is given
3893 if ((len > XML_MAX_TEXT_LENGTH) &&
3894 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3895 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3896 "AttValue length too long\n");
3902 if (NXT(1) == '#') {
3903 int val = xmlParseCharRef(ctxt);
3906 if (ctxt->replaceEntities) {
3907 if (len + 10 > buf_size) {
3908 growBuffer(buf, 10);
3913 * The reparsing will be done in xmlStringGetNodeList()
3914 * called by the attribute() function in SAX.c
3916 if (len + 10 > buf_size) {
3917 growBuffer(buf, 10);
3925 } else if (val != 0) {
3926 if (len + 10 > buf_size) {
3927 growBuffer(buf, 10);
3929 len += xmlCopyChar(0, &buf[len], val);
3932 ent = xmlParseEntityRef(ctxt);
3935 ctxt->nbentities += ent->owner;
3936 if ((ent != NULL) &&
3937 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3938 if (len + 10 > buf_size) {
3939 growBuffer(buf, 10);
3941 if ((ctxt->replaceEntities == 0) &&
3942 (ent->content[0] == '&')) {
3949 buf[len++] = ent->content[0];
3951 } else if ((ent != NULL) &&
3952 (ctxt->replaceEntities != 0)) {
3953 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3955 rep = xmlStringDecodeEntities(ctxt, ent->content,
3961 while (*current != 0) { /* non input consuming */
3962 if ((*current == 0xD) || (*current == 0xA) ||
3963 (*current == 0x9)) {
3967 buf[len++] = *current++;
3968 if (len + 10 > buf_size) {
3969 growBuffer(buf, 10);
3976 if (len + 10 > buf_size) {
3977 growBuffer(buf, 10);
3979 if (ent->content != NULL)
3980 buf[len++] = ent->content[0];
3982 } else if (ent != NULL) {
3983 int i = xmlStrlen(ent->name);
3984 const xmlChar *cur = ent->name;
3987 * This may look absurd but is needed to detect
3990 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3991 (ent->content != NULL) && (ent->checked == 0)) {
3992 unsigned long oldnbent = ctxt->nbentities;
3995 rep = xmlStringDecodeEntities(ctxt, ent->content,
3996 XML_SUBSTITUTE_REF, 0, 0, 0);
3999 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4001 if (xmlStrchr(rep, '<'))
4006 ent->content[0] = 0;
4011 * Just output the reference
4014 while (len + i + 10 > buf_size) {
4015 growBuffer(buf, i + 10);
4018 buf[len++] = *cur++;
4023 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4024 if ((len != 0) || (!normalize)) {
4025 if ((!normalize) || (!in_space)) {
4026 COPY_BUF(l,buf,len,0x20);
4027 while (len + 10 > buf_size) {
4028 growBuffer(buf, 10);
4035 COPY_BUF(l,buf,len,c);
4036 if (len + 10 > buf_size) {
4037 growBuffer(buf, 10);
4045 if (ctxt->instate == XML_PARSER_EOF)
4048 if ((in_space) && (normalize)) {
4049 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4053 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4054 } else if (RAW != limit) {
4055 if ((c != 0) && (!IS_CHAR(c))) {
4056 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4057 "invalid character in attribute value\n");
4059 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4060 "AttValue: ' expected\n");
4066 * There we potentially risk an overflow, don't allow attribute value of
4067 * length more than INT_MAX it is a very reasonnable assumption !
4069 if (len >= INT_MAX) {
4070 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4071 "AttValue length too long\n");
4075 if (attlen != NULL) *attlen = (int) len;
4079 xmlErrMemory(ctxt, NULL);
4090 * @ctxt: an XML parser context
4092 * parse a value for an attribute
4093 * Note: the parser won't do substitution of entities here, this
4094 * will be handled later in xmlStringGetNodeList
4096 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4097 * "'" ([^<&'] | Reference)* "'"
4099 * 3.3.3 Attribute-Value Normalization:
4100 * Before the value of an attribute is passed to the application or
4101 * checked for validity, the XML processor must normalize it as follows:
4102 * - a character reference is processed by appending the referenced
4103 * character to the attribute value
4104 * - an entity reference is processed by recursively processing the
4105 * replacement text of the entity
4106 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4107 * appending #x20 to the normalized value, except that only a single
4108 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4109 * parsed entity or the literal entity value of an internal parsed entity
4110 * - other characters are processed by appending them to the normalized value
4111 * If the declared value is not CDATA, then the XML processor must further
4112 * process the normalized attribute value by discarding any leading and
4113 * trailing space (#x20) characters, and by replacing sequences of space
4114 * (#x20) characters by a single space (#x20) character.
4115 * All attributes for which no declaration has been read should be treated
4116 * by a non-validating parser as if declared CDATA.
4118 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4123 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4124 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4125 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4129 * xmlParseSystemLiteral:
4130 * @ctxt: an XML parser context
4132 * parse an XML Literal
4134 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4136 * Returns the SystemLiteral parsed or NULL
4140 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4141 xmlChar *buf = NULL;
4143 int size = XML_PARSER_BUFFER_SIZE;
4146 int state = ctxt->instate;
4153 } else if (RAW == '\'') {
4157 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4161 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4163 xmlErrMemory(ctxt, NULL);
4166 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4168 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4169 if (len + 5 >= size) {
4172 if ((size > XML_MAX_NAME_LENGTH) &&
4173 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4174 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4176 ctxt->instate = (xmlParserInputState) state;
4180 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4183 xmlErrMemory(ctxt, NULL);
4184 ctxt->instate = (xmlParserInputState) state;
4193 if (ctxt->instate == XML_PARSER_EOF) {
4198 COPY_BUF(l,buf,len,cur);
4208 ctxt->instate = (xmlParserInputState) state;
4209 if (!IS_CHAR(cur)) {
4210 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4218 * xmlParsePubidLiteral:
4219 * @ctxt: an XML parser context
4221 * parse an XML public literal
4223 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4225 * Returns the PubidLiteral parsed or NULL.
4229 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4230 xmlChar *buf = NULL;
4232 int size = XML_PARSER_BUFFER_SIZE;
4236 xmlParserInputState oldstate = ctxt->instate;
4242 } else if (RAW == '\'') {
4246 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4249 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4251 xmlErrMemory(ctxt, NULL);
4254 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4256 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4257 if (len + 1 >= size) {
4260 if ((size > XML_MAX_NAME_LENGTH) &&
4261 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4262 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4267 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4269 xmlErrMemory(ctxt, NULL);
4280 if (ctxt->instate == XML_PARSER_EOF) {
4295 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4299 ctxt->instate = oldstate;
4303 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4306 * used for the test in the inner loop of the char data testing
4308 static const unsigned char test_char_data[256] = {
4309 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4310 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4311 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4312 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4313 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4314 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4315 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4316 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4317 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4318 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4319 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4320 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4321 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4322 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4323 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4324 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4325 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4326 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4327 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4328 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4329 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4330 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4331 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4332 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4333 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4334 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4335 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4336 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4337 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4338 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4339 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4340 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4345 * @ctxt: an XML parser context
4346 * @cdata: int indicating whether we are within a CDATA section
4348 * parse a CharData section.
4349 * if we are within a CDATA section ']]>' marks an end of section.
4351 * The right angle bracket (>) may be represented using the string ">",
4352 * and must, for compatibility, be escaped using ">" or a character
4353 * reference when it appears in the string "]]>" in content, when that
4354 * string is not marking the end of a CDATA section.
4356 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4360 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4363 int line = ctxt->input->line;
4364 int col = ctxt->input->col;
4370 * Accelerated common case where input don't need to be
4371 * modified before passing it to the handler.
4374 in = ctxt->input->cur;
4377 while (*in == 0x20) { in++; ctxt->input->col++; }
4380 ctxt->input->line++; ctxt->input->col = 1;
4382 } while (*in == 0xA);
4383 goto get_more_space;
4386 nbchar = in - ctxt->input->cur;
4388 const xmlChar *tmp = ctxt->input->cur;
4389 ctxt->input->cur = in;
4391 if ((ctxt->sax != NULL) &&
4392 (ctxt->sax->ignorableWhitespace !=
4393 ctxt->sax->characters)) {
4394 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4395 if (ctxt->sax->ignorableWhitespace != NULL)
4396 ctxt->sax->ignorableWhitespace(ctxt->userData,
4399 if (ctxt->sax->characters != NULL)
4400 ctxt->sax->characters(ctxt->userData,
4402 if (*ctxt->space == -1)
4405 } else if ((ctxt->sax != NULL) &&
4406 (ctxt->sax->characters != NULL)) {
4407 ctxt->sax->characters(ctxt->userData,
4415 ccol = ctxt->input->col;
4416 while (test_char_data[*in]) {
4420 ctxt->input->col = ccol;
4423 ctxt->input->line++; ctxt->input->col = 1;
4425 } while (*in == 0xA);
4429 if ((in[1] == ']') && (in[2] == '>')) {
4430 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4431 ctxt->input->cur = in + 1;
4438 nbchar = in - ctxt->input->cur;
4440 if ((ctxt->sax != NULL) &&
4441 (ctxt->sax->ignorableWhitespace !=
4442 ctxt->sax->characters) &&
4443 (IS_BLANK_CH(*ctxt->input->cur))) {
4444 const xmlChar *tmp = ctxt->input->cur;
4445 ctxt->input->cur = in;
4447 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4448 if (ctxt->sax->ignorableWhitespace != NULL)
4449 ctxt->sax->ignorableWhitespace(ctxt->userData,
4452 if (ctxt->sax->characters != NULL)
4453 ctxt->sax->characters(ctxt->userData,
4455 if (*ctxt->space == -1)
4458 line = ctxt->input->line;
4459 col = ctxt->input->col;
4460 } else if (ctxt->sax != NULL) {
4461 if (ctxt->sax->characters != NULL)
4462 ctxt->sax->characters(ctxt->userData,
4463 ctxt->input->cur, nbchar);
4464 line = ctxt->input->line;
4465 col = ctxt->input->col;
4467 /* something really bad happened in the SAX callback */
4468 if (ctxt->instate != XML_PARSER_CONTENT)
4471 ctxt->input->cur = in;
4475 ctxt->input->cur = in;
4477 ctxt->input->line++; ctxt->input->col = 1;
4478 continue; /* while */
4490 if (ctxt->instate == XML_PARSER_EOF)
4492 in = ctxt->input->cur;
4493 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4496 ctxt->input->line = line;
4497 ctxt->input->col = col;
4498 xmlParseCharDataComplex(ctxt, cdata);
4502 * xmlParseCharDataComplex:
4503 * @ctxt: an XML parser context
4504 * @cdata: int indicating whether we are within a CDATA section
4506 * parse a CharData section.this is the fallback function
4507 * of xmlParseCharData() when the parsing requires handling
4508 * of non-ASCII characters.
4511 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4512 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4520 while ((cur != '<') && /* checked */
4522 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4523 if ((cur == ']') && (NXT(1) == ']') &&
4527 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4530 COPY_BUF(l,buf,nbchar,cur);
4531 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4535 * OK the segment is to be consumed as chars.
4537 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4538 if (areBlanks(ctxt, buf, nbchar, 0)) {
4539 if (ctxt->sax->ignorableWhitespace != NULL)
4540 ctxt->sax->ignorableWhitespace(ctxt->userData,
4543 if (ctxt->sax->characters != NULL)
4544 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4545 if ((ctxt->sax->characters !=
4546 ctxt->sax->ignorableWhitespace) &&
4547 (*ctxt->space == -1))
4552 /* something really bad happened in the SAX callback */
4553 if (ctxt->instate != XML_PARSER_CONTENT)
4560 if (ctxt->instate == XML_PARSER_EOF)
4569 * OK the segment is to be consumed as chars.
4571 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4572 if (areBlanks(ctxt, buf, nbchar, 0)) {
4573 if (ctxt->sax->ignorableWhitespace != NULL)
4574 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4576 if (ctxt->sax->characters != NULL)
4577 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4578 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4579 (*ctxt->space == -1))
4584 if ((cur != 0) && (!IS_CHAR(cur))) {
4585 /* Generate the error and skip the offending character */
4586 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4587 "PCDATA invalid Char value %d\n",
4594 * xmlParseExternalID:
4595 * @ctxt: an XML parser context
4596 * @publicID: a xmlChar** receiving PubidLiteral
4597 * @strict: indicate whether we should restrict parsing to only
4598 * production [75], see NOTE below
4600 * Parse an External ID or a Public ID
4602 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4603 * 'PUBLIC' S PubidLiteral S SystemLiteral
4605 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4606 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4608 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4610 * Returns the function returns SystemLiteral and in the second
4611 * case publicID receives PubidLiteral, is strict is off
4612 * it is possible to return NULL and have publicID set.
4616 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4617 xmlChar *URI = NULL;
4622 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4624 if (SKIP_BLANKS == 0) {
4625 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4626 "Space required after 'SYSTEM'\n");
4628 URI = xmlParseSystemLiteral(ctxt);
4630 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4632 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4634 if (SKIP_BLANKS == 0) {
4635 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4636 "Space required after 'PUBLIC'\n");
4638 *publicID = xmlParsePubidLiteral(ctxt);
4639 if (*publicID == NULL) {
4640 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4644 * We don't handle [83] so "S SystemLiteral" is required.
4646 if (SKIP_BLANKS == 0) {
4647 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4648 "Space required after the Public Identifier\n");
4652 * We handle [83] so we return immediately, if
4653 * "S SystemLiteral" is not detected. We skip blanks if no
4654 * system literal was found, but this is harmless since we must
4655 * be at the end of a NotationDecl.
4657 if (SKIP_BLANKS == 0) return(NULL);
4658 if ((CUR != '\'') && (CUR != '"')) return(NULL);
4660 URI = xmlParseSystemLiteral(ctxt);
4662 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4669 * xmlParseCommentComplex:
4670 * @ctxt: an XML parser context
4671 * @buf: the already parsed part of the buffer
4672 * @len: number of bytes filles in the buffer
4673 * @size: allocated size of the buffer
4675 * Skip an XML (SGML) comment <!-- .... -->
4676 * The spec says that "For compatibility, the string "--" (double-hyphen)
4677 * must not occur within comments. "
4678 * This is the slow routine in case the accelerator for ascii didn't work
4680 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4683 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4684 size_t len, size_t size) {
4691 inputid = ctxt->input->id;
4695 size = XML_PARSER_BUFFER_SIZE;
4696 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4698 xmlErrMemory(ctxt, NULL);
4702 GROW; /* Assure there's enough input data */
4705 goto not_terminated;
4707 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4708 "xmlParseComment: invalid xmlChar value %d\n",
4716 goto not_terminated;
4718 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4719 "xmlParseComment: invalid xmlChar value %d\n",
4727 goto not_terminated;
4728 while (IS_CHAR(cur) && /* checked */
4730 (r != '-') || (q != '-'))) {
4731 if ((r == '-') && (q == '-')) {
4732 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4734 if ((len > XML_MAX_TEXT_LENGTH) &&
4735 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4736 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4737 "Comment too big found", NULL);
4741 if (len + 5 >= size) {
4745 new_size = size * 2;
4746 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4747 if (new_buf == NULL) {
4749 xmlErrMemory(ctxt, NULL);
4755 COPY_BUF(ql,buf,len,q);
4765 if (ctxt->instate == XML_PARSER_EOF) {
4780 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4781 "Comment not terminated \n<!--%.50s\n", buf);
4782 } else if (!IS_CHAR(cur)) {
4783 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4784 "xmlParseComment: invalid xmlChar value %d\n",
4787 if (inputid != ctxt->input->id) {
4788 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4789 "Comment doesn't start and stop in the same"
4793 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4794 (!ctxt->disableSAX))
4795 ctxt->sax->comment(ctxt->userData, buf);
4800 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4801 "Comment not terminated\n", NULL);
4808 * @ctxt: an XML parser context
4810 * Skip an XML (SGML) comment <!-- .... -->
4811 * The spec says that "For compatibility, the string "--" (double-hyphen)
4812 * must not occur within comments. "
4814 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4817 xmlParseComment(xmlParserCtxtPtr ctxt) {
4818 xmlChar *buf = NULL;
4819 size_t size = XML_PARSER_BUFFER_SIZE;
4821 xmlParserInputState state;
4828 * Check that there is a comment right here.
4830 if ((RAW != '<') || (NXT(1) != '!') ||
4831 (NXT(2) != '-') || (NXT(3) != '-')) return;
4832 state = ctxt->instate;
4833 ctxt->instate = XML_PARSER_COMMENT;
4834 inputid = ctxt->input->id;
4840 * Accelerated common case where input don't need to be
4841 * modified before passing it to the handler.
4843 in = ctxt->input->cur;
4847 ctxt->input->line++; ctxt->input->col = 1;
4849 } while (*in == 0xA);
4852 ccol = ctxt->input->col;
4853 while (((*in > '-') && (*in <= 0x7F)) ||
4854 ((*in >= 0x20) && (*in < '-')) ||
4859 ctxt->input->col = ccol;
4862 ctxt->input->line++; ctxt->input->col = 1;
4864 } while (*in == 0xA);
4867 nbchar = in - ctxt->input->cur;
4869 * save current set of data
4872 if ((ctxt->sax != NULL) &&
4873 (ctxt->sax->comment != NULL)) {
4875 if ((*in == '-') && (in[1] == '-'))
4878 size = XML_PARSER_BUFFER_SIZE + nbchar;
4879 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4881 xmlErrMemory(ctxt, NULL);
4882 ctxt->instate = state;
4886 } else if (len + nbchar + 1 >= size) {
4888 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4889 new_buf = (xmlChar *) xmlRealloc(buf,
4890 size * sizeof(xmlChar));
4891 if (new_buf == NULL) {
4893 xmlErrMemory(ctxt, NULL);
4894 ctxt->instate = state;
4899 memcpy(&buf[len], ctxt->input->cur, nbchar);
4904 if ((len > XML_MAX_TEXT_LENGTH) &&
4905 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4906 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4907 "Comment too big found", NULL);
4911 ctxt->input->cur = in;
4914 ctxt->input->line++; ctxt->input->col = 1;
4919 ctxt->input->cur = in;
4921 ctxt->input->line++; ctxt->input->col = 1;
4922 continue; /* while */
4928 if (ctxt->instate == XML_PARSER_EOF) {
4932 in = ctxt->input->cur;
4936 if (ctxt->input->id != inputid) {
4937 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4938 "comment doesn't start and stop in the"
4942 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4943 (!ctxt->disableSAX)) {
4945 ctxt->sax->comment(ctxt->userData, buf);
4947 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4951 if (ctxt->instate != XML_PARSER_EOF)
4952 ctxt->instate = state;
4956 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4957 "Double hyphen within comment: "
4961 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4962 "Double hyphen within comment\n", NULL);
4970 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4971 xmlParseCommentComplex(ctxt, buf, len, size);
4972 ctxt->instate = state;
4979 * @ctxt: an XML parser context
4981 * parse the name of a PI
4983 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4985 * Returns the PITarget name or NULL
4989 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4990 const xmlChar *name;
4992 name = xmlParseName(ctxt);
4993 if ((name != NULL) &&
4994 ((name[0] == 'x') || (name[0] == 'X')) &&
4995 ((name[1] == 'm') || (name[1] == 'M')) &&
4996 ((name[2] == 'l') || (name[2] == 'L'))) {
4998 if ((name[0] == 'x') && (name[1] == 'm') &&
4999 (name[2] == 'l') && (name[3] == 0)) {
5000 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5001 "XML declaration allowed only at the start of the document\n");
5003 } else if (name[3] == 0) {
5004 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5008 if (xmlW3CPIs[i] == NULL) break;
5009 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5012 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5013 "xmlParsePITarget: invalid name prefix 'xml'\n",
5016 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5017 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5018 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5023 #ifdef LIBXML_CATALOG_ENABLED
5025 * xmlParseCatalogPI:
5026 * @ctxt: an XML parser context
5027 * @catalog: the PI value string
5029 * parse an XML Catalog Processing Instruction.
5031 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5033 * Occurs only if allowed by the user and if happening in the Misc
5034 * part of the document before any doctype informations
5035 * This will add the given catalog to the parsing context in order
5036 * to be used if there is a resolution need further down in the document
5040 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5041 xmlChar *URL = NULL;
5042 const xmlChar *tmp, *base;
5046 while (IS_BLANK_CH(*tmp)) tmp++;
5047 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5050 while (IS_BLANK_CH(*tmp)) tmp++;
5055 while (IS_BLANK_CH(*tmp)) tmp++;
5057 if ((marker != '\'') && (marker != '"'))
5061 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5064 URL = xmlStrndup(base, tmp - base);
5066 while (IS_BLANK_CH(*tmp)) tmp++;
5071 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5077 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5078 "Catalog PI syntax error: %s\n",
5087 * @ctxt: an XML parser context
5089 * parse an XML Processing Instruction.
5091 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5093 * The processing is transfered to SAX once parsed.
5097 xmlParsePI(xmlParserCtxtPtr ctxt) {
5098 xmlChar *buf = NULL;
5100 size_t size = XML_PARSER_BUFFER_SIZE;
5102 const xmlChar *target;
5103 xmlParserInputState state;
5106 if ((RAW == '<') && (NXT(1) == '?')) {
5107 int inputid = ctxt->input->id;
5108 state = ctxt->instate;
5109 ctxt->instate = XML_PARSER_PI;
5111 * this is a Processing Instruction.
5117 * Parse the target name and check for special support like
5120 target = xmlParsePITarget(ctxt);
5121 if (target != NULL) {
5122 if ((RAW == '?') && (NXT(1) == '>')) {
5123 if (inputid != ctxt->input->id) {
5124 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5125 "PI declaration doesn't start and stop in"
5126 " the same entity\n");
5133 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5134 (ctxt->sax->processingInstruction != NULL))
5135 ctxt->sax->processingInstruction(ctxt->userData,
5137 if (ctxt->instate != XML_PARSER_EOF)
5138 ctxt->instate = state;
5141 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5143 xmlErrMemory(ctxt, NULL);
5144 ctxt->instate = state;
5147 if (SKIP_BLANKS == 0) {
5148 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5149 "ParsePI: PI %s space expected\n", target);
5152 while (IS_CHAR(cur) && /* checked */
5153 ((cur != '?') || (NXT(1) != '>'))) {
5154 if (len + 5 >= size) {
5156 size_t new_size = size * 2;
5157 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5159 xmlErrMemory(ctxt, NULL);
5161 ctxt->instate = state;
5170 if (ctxt->instate == XML_PARSER_EOF) {
5175 if ((len > XML_MAX_TEXT_LENGTH) &&
5176 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5177 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5178 "PI %s too big found", target);
5180 ctxt->instate = state;
5184 COPY_BUF(l,buf,len,cur);
5193 if ((len > XML_MAX_TEXT_LENGTH) &&
5194 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5195 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5196 "PI %s too big found", target);
5198 ctxt->instate = state;
5203 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5204 "ParsePI: PI %s never end ...\n", target);
5206 if (inputid != ctxt->input->id) {
5207 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5208 "PI declaration doesn't start and stop in"
5209 " the same entity\n");
5213 #ifdef LIBXML_CATALOG_ENABLED
5214 if (((state == XML_PARSER_MISC) ||
5215 (state == XML_PARSER_START)) &&
5216 (xmlStrEqual(target, XML_CATALOG_PI))) {
5217 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5218 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5219 (allow == XML_CATA_ALLOW_ALL))
5220 xmlParseCatalogPI(ctxt, buf);
5228 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5229 (ctxt->sax->processingInstruction != NULL))
5230 ctxt->sax->processingInstruction(ctxt->userData,
5235 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5237 if (ctxt->instate != XML_PARSER_EOF)
5238 ctxt->instate = state;
5243 * xmlParseNotationDecl:
5244 * @ctxt: an XML parser context
5246 * parse a notation declaration
5248 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5250 * Hence there is actually 3 choices:
5251 * 'PUBLIC' S PubidLiteral
5252 * 'PUBLIC' S PubidLiteral S SystemLiteral
5253 * and 'SYSTEM' S SystemLiteral
5255 * See the NOTE on xmlParseExternalID().
5259 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5260 const xmlChar *name;
5264 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5265 int inputid = ctxt->input->id;
5268 if (SKIP_BLANKS == 0) {
5269 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5270 "Space required after '<!NOTATION'\n");
5274 name = xmlParseName(ctxt);
5276 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5279 if (xmlStrchr(name, ':') != NULL) {
5280 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5281 "colons are forbidden from notation names '%s'\n",
5284 if (SKIP_BLANKS == 0) {
5285 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5286 "Space required after the NOTATION name'\n");
5293 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5297 if (inputid != ctxt->input->id) {
5298 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5299 "Notation declaration doesn't start and stop"
5300 " in the same entity\n");
5303 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5304 (ctxt->sax->notationDecl != NULL))
5305 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5307 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5309 if (Systemid != NULL) xmlFree(Systemid);
5310 if (Pubid != NULL) xmlFree(Pubid);
5315 * xmlParseEntityDecl:
5316 * @ctxt: an XML parser context
5318 * parse <!ENTITY declarations
5320 * [70] EntityDecl ::= GEDecl | PEDecl
5322 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5324 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5326 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5328 * [74] PEDef ::= EntityValue | ExternalID
5330 * [76] NDataDecl ::= S 'NDATA' S Name
5332 * [ VC: Notation Declared ]
5333 * The Name must match the declared name of a notation.
5337 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5338 const xmlChar *name = NULL;
5339 xmlChar *value = NULL;
5340 xmlChar *URI = NULL, *literal = NULL;
5341 const xmlChar *ndata = NULL;
5342 int isParameter = 0;
5343 xmlChar *orig = NULL;
5345 /* GROW; done in the caller */
5346 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5347 int inputid = ctxt->input->id;
5350 if (SKIP_BLANKS == 0) {
5351 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5352 "Space required after '<!ENTITY'\n");
5357 if (SKIP_BLANKS == 0) {
5358 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5359 "Space required after '%%'\n");
5364 name = xmlParseName(ctxt);
5366 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5367 "xmlParseEntityDecl: no name\n");
5370 if (xmlStrchr(name, ':') != NULL) {
5371 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5372 "colons are forbidden from entities names '%s'\n",
5375 if (SKIP_BLANKS == 0) {
5376 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5377 "Space required after the entity name\n");
5380 ctxt->instate = XML_PARSER_ENTITY_DECL;
5382 * handle the various case of definitions...
5385 if ((RAW == '"') || (RAW == '\'')) {
5386 value = xmlParseEntityValue(ctxt, &orig);
5388 if ((ctxt->sax != NULL) &&
5389 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5390 ctxt->sax->entityDecl(ctxt->userData, name,
5391 XML_INTERNAL_PARAMETER_ENTITY,
5395 URI = xmlParseExternalID(ctxt, &literal, 1);
5396 if ((URI == NULL) && (literal == NULL)) {
5397 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5402 uri = xmlParseURI((const char *) URI);
5404 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5405 "Invalid URI: %s\n", URI);
5407 * This really ought to be a well formedness error
5408 * but the XML Core WG decided otherwise c.f. issue
5409 * E26 of the XML erratas.
5412 if (uri->fragment != NULL) {
5414 * Okay this is foolish to block those but not
5417 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5419 if ((ctxt->sax != NULL) &&
5420 (!ctxt->disableSAX) &&
5421 (ctxt->sax->entityDecl != NULL))
5422 ctxt->sax->entityDecl(ctxt->userData, name,
5423 XML_EXTERNAL_PARAMETER_ENTITY,
5424 literal, URI, NULL);
5431 if ((RAW == '"') || (RAW == '\'')) {
5432 value = xmlParseEntityValue(ctxt, &orig);
5433 if ((ctxt->sax != NULL) &&
5434 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5435 ctxt->sax->entityDecl(ctxt->userData, name,
5436 XML_INTERNAL_GENERAL_ENTITY,
5439 * For expat compatibility in SAX mode.
5441 if ((ctxt->myDoc == NULL) ||
5442 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5443 if (ctxt->myDoc == NULL) {
5444 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5445 if (ctxt->myDoc == NULL) {
5446 xmlErrMemory(ctxt, "New Doc failed");
5449 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5451 if (ctxt->myDoc->intSubset == NULL)
5452 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5453 BAD_CAST "fake", NULL, NULL);
5455 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5459 URI = xmlParseExternalID(ctxt, &literal, 1);
5460 if ((URI == NULL) && (literal == NULL)) {
5461 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5466 uri = xmlParseURI((const char *)URI);
5468 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5469 "Invalid URI: %s\n", URI);
5471 * This really ought to be a well formedness error
5472 * but the XML Core WG decided otherwise c.f. issue
5473 * E26 of the XML erratas.
5476 if (uri->fragment != NULL) {
5478 * Okay this is foolish to block those but not
5481 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5486 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5487 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5488 "Space required before 'NDATA'\n");
5490 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5492 if (SKIP_BLANKS == 0) {
5493 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5494 "Space required after 'NDATA'\n");
5496 ndata = xmlParseName(ctxt);
5497 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5498 (ctxt->sax->unparsedEntityDecl != NULL))
5499 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5500 literal, URI, ndata);
5502 if ((ctxt->sax != NULL) &&
5503 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5504 ctxt->sax->entityDecl(ctxt->userData, name,
5505 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5506 literal, URI, NULL);
5508 * For expat compatibility in SAX mode.
5509 * assuming the entity repalcement was asked for
5511 if ((ctxt->replaceEntities != 0) &&
5512 ((ctxt->myDoc == NULL) ||
5513 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5514 if (ctxt->myDoc == NULL) {
5515 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5516 if (ctxt->myDoc == NULL) {
5517 xmlErrMemory(ctxt, "New Doc failed");
5520 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5523 if (ctxt->myDoc->intSubset == NULL)
5524 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5525 BAD_CAST "fake", NULL, NULL);
5526 xmlSAX2EntityDecl(ctxt, name,
5527 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5528 literal, URI, NULL);
5533 if (ctxt->instate == XML_PARSER_EOF)
5537 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5538 "xmlParseEntityDecl: entity %s not terminated\n", name);
5539 xmlHaltParser(ctxt);
5541 if (inputid != ctxt->input->id) {
5542 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5543 "Entity declaration doesn't start and stop in"
5544 " the same entity\n");
5550 * Ugly mechanism to save the raw entity value.
5552 xmlEntityPtr cur = NULL;
5555 if ((ctxt->sax != NULL) &&
5556 (ctxt->sax->getParameterEntity != NULL))
5557 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5559 if ((ctxt->sax != NULL) &&
5560 (ctxt->sax->getEntity != NULL))
5561 cur = ctxt->sax->getEntity(ctxt->userData, name);
5562 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5563 cur = xmlSAX2GetEntity(ctxt, name);
5566 if ((cur != NULL) && (cur->orig == NULL)) {
5573 if (value != NULL) xmlFree(value);
5574 if (URI != NULL) xmlFree(URI);
5575 if (literal != NULL) xmlFree(literal);
5576 if (orig != NULL) xmlFree(orig);
5581 * xmlParseDefaultDecl:
5582 * @ctxt: an XML parser context
5583 * @value: Receive a possible fixed default value for the attribute
5585 * Parse an attribute default declaration
5587 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5589 * [ VC: Required Attribute ]
5590 * if the default declaration is the keyword #REQUIRED, then the
5591 * attribute must be specified for all elements of the type in the
5592 * attribute-list declaration.
5594 * [ VC: Attribute Default Legal ]
5595 * The declared default value must meet the lexical constraints of
5596 * the declared attribute type c.f. xmlValidateAttributeDecl()
5598 * [ VC: Fixed Attribute Default ]
5599 * if an attribute has a default value declared with the #FIXED
5600 * keyword, instances of that attribute must match the default value.
5602 * [ WFC: No < in Attribute Values ]
5603 * handled in xmlParseAttValue()
5605 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5606 * or XML_ATTRIBUTE_FIXED.
5610 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5615 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5617 return(XML_ATTRIBUTE_REQUIRED);
5619 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5621 return(XML_ATTRIBUTE_IMPLIED);
5623 val = XML_ATTRIBUTE_NONE;
5624 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5626 val = XML_ATTRIBUTE_FIXED;
5627 if (SKIP_BLANKS == 0) {
5628 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5629 "Space required after '#FIXED'\n");
5632 ret = xmlParseAttValue(ctxt);
5633 ctxt->instate = XML_PARSER_DTD;
5635 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5636 "Attribute default value declaration error\n");
5643 * xmlParseNotationType:
5644 * @ctxt: an XML parser context
5646 * parse an Notation attribute type.
5648 * Note: the leading 'NOTATION' S part has already being parsed...
5650 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5652 * [ VC: Notation Attributes ]
5653 * Values of this type must match one of the notation names included
5654 * in the declaration; all notation names in the declaration must be declared.
5656 * Returns: the notation attribute tree built while parsing
5660 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5661 const xmlChar *name;
5662 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5665 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5672 name = xmlParseName(ctxt);
5674 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5675 "Name expected in NOTATION declaration\n");
5676 xmlFreeEnumeration(ret);
5680 while (tmp != NULL) {
5681 if (xmlStrEqual(name, tmp->name)) {
5682 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5683 "standalone: attribute notation value token %s duplicated\n",
5685 if (!xmlDictOwns(ctxt->dict, name))
5686 xmlFree((xmlChar *) name);
5692 cur = xmlCreateEnumeration(name);
5694 xmlFreeEnumeration(ret);
5697 if (last == NULL) ret = last = cur;
5704 } while (RAW == '|');
5706 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5707 xmlFreeEnumeration(ret);
5715 * xmlParseEnumerationType:
5716 * @ctxt: an XML parser context
5718 * parse an Enumeration attribute type.
5720 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5722 * [ VC: Enumeration ]
5723 * Values of this type must match one of the Nmtoken tokens in
5726 * Returns: the enumeration attribute tree built while parsing
5730 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5732 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5735 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5742 name = xmlParseNmtoken(ctxt);
5744 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5748 while (tmp != NULL) {
5749 if (xmlStrEqual(name, tmp->name)) {
5750 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5751 "standalone: attribute enumeration value token %s duplicated\n",
5753 if (!xmlDictOwns(ctxt->dict, name))
5760 cur = xmlCreateEnumeration(name);
5761 if (!xmlDictOwns(ctxt->dict, name))
5764 xmlFreeEnumeration(ret);
5767 if (last == NULL) ret = last = cur;
5774 } while (RAW == '|');
5776 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5784 * xmlParseEnumeratedType:
5785 * @ctxt: an XML parser context
5786 * @tree: the enumeration tree built while parsing
5788 * parse an Enumerated attribute type.
5790 * [57] EnumeratedType ::= NotationType | Enumeration
5792 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5795 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5799 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5800 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5802 if (SKIP_BLANKS == 0) {
5803 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5804 "Space required after 'NOTATION'\n");
5807 *tree = xmlParseNotationType(ctxt);
5808 if (*tree == NULL) return(0);
5809 return(XML_ATTRIBUTE_NOTATION);
5811 *tree = xmlParseEnumerationType(ctxt);
5812 if (*tree == NULL) return(0);
5813 return(XML_ATTRIBUTE_ENUMERATION);
5817 * xmlParseAttributeType:
5818 * @ctxt: an XML parser context
5819 * @tree: the enumeration tree built while parsing
5821 * parse the Attribute list def for an element
5823 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5825 * [55] StringType ::= 'CDATA'
5827 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5828 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5830 * Validity constraints for attribute values syntax are checked in
5831 * xmlValidateAttributeValue()
5834 * Values of type ID must match the Name production. A name must not
5835 * appear more than once in an XML document as a value of this type;
5836 * i.e., ID values must uniquely identify the elements which bear them.
5838 * [ VC: One ID per Element Type ]
5839 * No element type may have more than one ID attribute specified.
5841 * [ VC: ID Attribute Default ]
5842 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5845 * Values of type IDREF must match the Name production, and values
5846 * of type IDREFS must match Names; each IDREF Name must match the value
5847 * of an ID attribute on some element in the XML document; i.e. IDREF
5848 * values must match the value of some ID attribute.
5850 * [ VC: Entity Name ]
5851 * Values of type ENTITY must match the Name production, values
5852 * of type ENTITIES must match Names; each Entity Name must match the
5853 * name of an unparsed entity declared in the DTD.
5855 * [ VC: Name Token ]
5856 * Values of type NMTOKEN must match the Nmtoken production; values
5857 * of type NMTOKENS must match Nmtokens.
5859 * Returns the attribute type
5862 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5864 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5866 return(XML_ATTRIBUTE_CDATA);
5867 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5869 return(XML_ATTRIBUTE_IDREFS);
5870 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5872 return(XML_ATTRIBUTE_IDREF);
5873 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5875 return(XML_ATTRIBUTE_ID);
5876 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5878 return(XML_ATTRIBUTE_ENTITY);
5879 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5881 return(XML_ATTRIBUTE_ENTITIES);
5882 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5884 return(XML_ATTRIBUTE_NMTOKENS);
5885 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5887 return(XML_ATTRIBUTE_NMTOKEN);
5889 return(xmlParseEnumeratedType(ctxt, tree));
5893 * xmlParseAttributeListDecl:
5894 * @ctxt: an XML parser context
5896 * : parse the Attribute list def for an element
5898 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5900 * [53] AttDef ::= S Name S AttType S DefaultDecl
5904 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5905 const xmlChar *elemName;
5906 const xmlChar *attrName;
5907 xmlEnumerationPtr tree;
5909 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5910 int inputid = ctxt->input->id;
5913 if (SKIP_BLANKS == 0) {
5914 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5915 "Space required after '<!ATTLIST'\n");
5917 elemName = xmlParseName(ctxt);
5918 if (elemName == NULL) {
5919 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5920 "ATTLIST: no name for Element\n");
5925 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5928 xmlChar *defaultValue = NULL;
5932 attrName = xmlParseName(ctxt);
5933 if (attrName == NULL) {
5934 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5935 "ATTLIST: no name for Attribute\n");
5939 if (SKIP_BLANKS == 0) {
5940 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5941 "Space required after the attribute name\n");
5945 type = xmlParseAttributeType(ctxt, &tree);
5951 if (SKIP_BLANKS == 0) {
5952 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5953 "Space required after the attribute type\n");
5955 xmlFreeEnumeration(tree);
5959 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5961 if (defaultValue != NULL)
5962 xmlFree(defaultValue);
5964 xmlFreeEnumeration(tree);
5967 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5968 xmlAttrNormalizeSpace(defaultValue, defaultValue);
5972 if (SKIP_BLANKS == 0) {
5973 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5974 "Space required after the attribute default value\n");
5975 if (defaultValue != NULL)
5976 xmlFree(defaultValue);
5978 xmlFreeEnumeration(tree);
5982 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5983 (ctxt->sax->attributeDecl != NULL))
5984 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5985 type, def, defaultValue, tree);
5986 else if (tree != NULL)
5987 xmlFreeEnumeration(tree);
5989 if ((ctxt->sax2) && (defaultValue != NULL) &&
5990 (def != XML_ATTRIBUTE_IMPLIED) &&
5991 (def != XML_ATTRIBUTE_REQUIRED)) {
5992 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5995 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5997 if (defaultValue != NULL)
5998 xmlFree(defaultValue);
6002 if (inputid != ctxt->input->id) {
6003 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6004 "Attribute list declaration doesn't start and"
6005 " stop in the same entity\n");
6013 * xmlParseElementMixedContentDecl:
6014 * @ctxt: an XML parser context
6015 * @inputchk: the input used for the current entity, needed for boundary checks
6017 * parse the declaration for a Mixed Element content
6018 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6020 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6021 * '(' S? '#PCDATA' S? ')'
6023 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6025 * [ VC: No Duplicate Types ]
6026 * The same name must not appear more than once in a single
6027 * mixed-content declaration.
6029 * returns: the list of the xmlElementContentPtr describing the element choices
6031 xmlElementContentPtr
6032 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6033 xmlElementContentPtr ret = NULL, cur = NULL, n;
6034 const xmlChar *elem = NULL;
6037 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6042 if (ctxt->input->id != inputchk) {
6043 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6044 "Element content declaration doesn't start and"
6045 " stop in the same entity\n");
6048 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6052 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6057 if ((RAW == '(') || (RAW == '|')) {
6058 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6059 if (ret == NULL) return(NULL);
6061 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6064 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6065 if (ret == NULL) return(NULL);
6071 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6072 if (n == NULL) return(NULL);
6073 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6082 elem = xmlParseName(ctxt);
6084 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6085 "xmlParseElementMixedContentDecl : Name expected\n");
6086 xmlFreeDocElementContent(ctxt->myDoc, ret);
6092 if ((RAW == ')') && (NXT(1) == '*')) {
6094 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6095 XML_ELEMENT_CONTENT_ELEMENT);
6096 if (cur->c2 != NULL)
6097 cur->c2->parent = cur;
6100 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6101 if (ctxt->input->id != inputchk) {
6102 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6103 "Element content declaration doesn't start and"
6104 " stop in the same entity\n");
6108 xmlFreeDocElementContent(ctxt->myDoc, ret);
6109 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6114 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6120 * xmlParseElementChildrenContentDeclPriv:
6121 * @ctxt: an XML parser context
6122 * @inputchk: the input used for the current entity, needed for boundary checks
6123 * @depth: the level of recursion
6125 * parse the declaration for a Mixed Element content
6126 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6129 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6131 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6133 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6135 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6137 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6138 * TODO Parameter-entity replacement text must be properly nested
6139 * with parenthesized groups. That is to say, if either of the
6140 * opening or closing parentheses in a choice, seq, or Mixed
6141 * construct is contained in the replacement text for a parameter
6142 * entity, both must be contained in the same replacement text. For
6143 * interoperability, if a parameter-entity reference appears in a
6144 * choice, seq, or Mixed construct, its replacement text should not
6145 * be empty, and neither the first nor last non-blank character of
6146 * the replacement text should be a connector (| or ,).
6148 * Returns the tree of xmlElementContentPtr describing the element
6151 static xmlElementContentPtr
6152 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6154 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6155 const xmlChar *elem;
6158 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6160 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6161 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6168 int inputid = ctxt->input->id;
6170 /* Recurse on first child */
6173 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6178 elem = xmlParseName(ctxt);
6180 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6183 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6185 xmlErrMemory(ctxt, NULL);
6190 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6192 } else if (RAW == '*') {
6193 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6195 } else if (RAW == '+') {
6196 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6199 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6205 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6207 * Each loop we parse one separator and one element.
6210 if (type == 0) type = CUR;
6213 * Detect "Name | Name , Name" error
6215 else if (type != CUR) {
6216 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6217 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6219 if ((last != NULL) && (last != ret))
6220 xmlFreeDocElementContent(ctxt->myDoc, last);
6222 xmlFreeDocElementContent(ctxt->myDoc, ret);
6227 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6229 if ((last != NULL) && (last != ret))
6230 xmlFreeDocElementContent(ctxt->myDoc, last);
6231 xmlFreeDocElementContent(ctxt->myDoc, ret);
6249 } else if (RAW == '|') {
6250 if (type == 0) type = CUR;
6253 * Detect "Name , Name | Name" error
6255 else if (type != CUR) {
6256 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6257 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6259 if ((last != NULL) && (last != ret))
6260 xmlFreeDocElementContent(ctxt->myDoc, last);
6262 xmlFreeDocElementContent(ctxt->myDoc, ret);
6267 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6269 if ((last != NULL) && (last != ret))
6270 xmlFreeDocElementContent(ctxt->myDoc, last);
6272 xmlFreeDocElementContent(ctxt->myDoc, ret);
6291 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6292 if ((last != NULL) && (last != ret))
6293 xmlFreeDocElementContent(ctxt->myDoc, last);
6295 xmlFreeDocElementContent(ctxt->myDoc, ret);
6302 int inputid = ctxt->input->id;
6303 /* Recurse on second child */
6306 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6310 elem = xmlParseName(ctxt);
6312 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6314 xmlFreeDocElementContent(ctxt->myDoc, ret);
6317 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6320 xmlFreeDocElementContent(ctxt->myDoc, ret);
6324 last->ocur = XML_ELEMENT_CONTENT_OPT;
6326 } else if (RAW == '*') {
6327 last->ocur = XML_ELEMENT_CONTENT_MULT;
6329 } else if (RAW == '+') {
6330 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6333 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6339 if ((cur != NULL) && (last != NULL)) {
6344 if (ctxt->input->id != inputchk) {
6345 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6346 "Element content declaration doesn't start and stop in"
6347 " the same entity\n");
6352 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6353 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6354 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6356 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6359 } else if (RAW == '*') {
6361 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6364 * Some normalization:
6365 * (a | b* | c?)* == (a | b | c)*
6367 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6368 if ((cur->c1 != NULL) &&
6369 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6370 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6371 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6372 if ((cur->c2 != NULL) &&
6373 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6374 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6375 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6380 } else if (RAW == '+') {
6384 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6385 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6386 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6388 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6390 * Some normalization:
6391 * (a | b*)+ == (a | b)*
6392 * (a | b?)+ == (a | b)*
6394 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6395 if ((cur->c1 != NULL) &&
6396 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6397 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6398 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6401 if ((cur->c2 != NULL) &&
6402 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6403 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6404 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6410 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6418 * xmlParseElementChildrenContentDecl:
6419 * @ctxt: an XML parser context
6420 * @inputchk: the input used for the current entity, needed for boundary checks
6422 * parse the declaration for a Mixed Element content
6423 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6425 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6427 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6429 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6431 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6433 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6434 * TODO Parameter-entity replacement text must be properly nested
6435 * with parenthesized groups. That is to say, if either of the
6436 * opening or closing parentheses in a choice, seq, or Mixed
6437 * construct is contained in the replacement text for a parameter
6438 * entity, both must be contained in the same replacement text. For
6439 * interoperability, if a parameter-entity reference appears in a
6440 * choice, seq, or Mixed construct, its replacement text should not
6441 * be empty, and neither the first nor last non-blank character of
6442 * the replacement text should be a connector (| or ,).
6444 * Returns the tree of xmlElementContentPtr describing the element
6447 xmlElementContentPtr
6448 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6449 /* stub left for API/ABI compat */
6450 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6454 * xmlParseElementContentDecl:
6455 * @ctxt: an XML parser context
6456 * @name: the name of the element being defined.
6457 * @result: the Element Content pointer will be stored here if any
6459 * parse the declaration for an Element content either Mixed or Children,
6460 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6462 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6464 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6468 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6469 xmlElementContentPtr *result) {
6471 xmlElementContentPtr tree = NULL;
6472 int inputid = ctxt->input->id;
6478 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6479 "xmlParseElementContentDecl : %s '(' expected\n", name);
6484 if (ctxt->instate == XML_PARSER_EOF)
6487 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6488 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6489 res = XML_ELEMENT_TYPE_MIXED;
6491 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6492 res = XML_ELEMENT_TYPE_ELEMENT;
6500 * xmlParseElementDecl:
6501 * @ctxt: an XML parser context
6503 * parse an Element declaration.
6505 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6507 * [ VC: Unique Element Type Declaration ]
6508 * No element type may be declared more than once
6510 * Returns the type of the element, or -1 in case of error
6513 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6514 const xmlChar *name;
6516 xmlElementContentPtr content = NULL;
6518 /* GROW; done in the caller */
6519 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6520 int inputid = ctxt->input->id;
6523 if (SKIP_BLANKS == 0) {
6524 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6525 "Space required after 'ELEMENT'\n");
6528 name = xmlParseName(ctxt);
6530 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6531 "xmlParseElementDecl: no name for Element\n");
6534 if (SKIP_BLANKS == 0) {
6535 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6536 "Space required after the element name\n");
6538 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6541 * Element must always be empty.
6543 ret = XML_ELEMENT_TYPE_EMPTY;
6544 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6548 * Element is a generic container.
6550 ret = XML_ELEMENT_TYPE_ANY;
6551 } else if (RAW == '(') {
6552 ret = xmlParseElementContentDecl(ctxt, name, &content);
6555 * [ WFC: PEs in Internal Subset ] error handling.
6557 if ((RAW == '%') && (ctxt->external == 0) &&
6558 (ctxt->inputNr == 1)) {
6559 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6560 "PEReference: forbidden within markup decl in internal subset\n");
6562 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6563 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6571 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6572 if (content != NULL) {
6573 xmlFreeDocElementContent(ctxt->myDoc, content);
6576 if (inputid != ctxt->input->id) {
6577 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6578 "Element declaration doesn't start and stop in"
6579 " the same entity\n");
6583 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6584 (ctxt->sax->elementDecl != NULL)) {
6585 if (content != NULL)
6586 content->parent = NULL;
6587 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6589 if ((content != NULL) && (content->parent == NULL)) {
6591 * this is a trick: if xmlAddElementDecl is called,
6592 * instead of copying the full tree it is plugged directly
6593 * if called from the parser. Avoid duplicating the
6594 * interfaces or change the API/ABI
6596 xmlFreeDocElementContent(ctxt->myDoc, content);
6598 } else if (content != NULL) {
6599 xmlFreeDocElementContent(ctxt->myDoc, content);
6607 * xmlParseConditionalSections
6608 * @ctxt: an XML parser context
6610 * [61] conditionalSect ::= includeSect | ignoreSect
6611 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6612 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6613 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6614 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6618 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6619 int id = ctxt->input->id;
6623 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6627 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6628 xmlHaltParser(ctxt);
6631 if (ctxt->input->id != id) {
6632 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6633 "All markup of the conditional section is not"
6634 " in the same entity\n");
6638 if (xmlParserDebugEntities) {
6639 if ((ctxt->input != NULL) && (ctxt->input->filename))
6640 xmlGenericError(xmlGenericErrorContext,
6641 "%s(%d): ", ctxt->input->filename,
6643 xmlGenericError(xmlGenericErrorContext,
6644 "Entering INCLUDE Conditional Section\n");
6649 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6650 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6651 const xmlChar *check = CUR_PTR;
6652 unsigned int cons = ctxt->input->consumed;
6654 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6655 xmlParseConditionalSections(ctxt);
6657 xmlParseMarkupDecl(ctxt);
6662 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6663 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6664 xmlHaltParser(ctxt);
6668 if (xmlParserDebugEntities) {
6669 if ((ctxt->input != NULL) && (ctxt->input->filename))
6670 xmlGenericError(xmlGenericErrorContext,
6671 "%s(%d): ", ctxt->input->filename,
6673 xmlGenericError(xmlGenericErrorContext,
6674 "Leaving INCLUDE Conditional Section\n");
6677 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6679 xmlParserInputState instate;
6685 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6686 xmlHaltParser(ctxt);
6689 if (ctxt->input->id != id) {
6690 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6691 "All markup of the conditional section is not"
6692 " in the same entity\n");
6696 if (xmlParserDebugEntities) {
6697 if ((ctxt->input != NULL) && (ctxt->input->filename))
6698 xmlGenericError(xmlGenericErrorContext,
6699 "%s(%d): ", ctxt->input->filename,
6701 xmlGenericError(xmlGenericErrorContext,
6702 "Entering IGNORE Conditional Section\n");
6706 * Parse up to the end of the conditional section
6707 * But disable SAX event generating DTD building in the meantime
6709 state = ctxt->disableSAX;
6710 instate = ctxt->instate;
6711 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6712 ctxt->instate = XML_PARSER_IGNORE;
6714 while (((depth >= 0) && (RAW != 0)) &&
6715 (ctxt->instate != XML_PARSER_EOF)) {
6716 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6721 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6722 if (--depth >= 0) SKIP(3);
6729 ctxt->disableSAX = state;
6730 ctxt->instate = instate;
6732 if (xmlParserDebugEntities) {
6733 if ((ctxt->input != NULL) && (ctxt->input->filename))
6734 xmlGenericError(xmlGenericErrorContext,
6735 "%s(%d): ", ctxt->input->filename,
6737 xmlGenericError(xmlGenericErrorContext,
6738 "Leaving IGNORE Conditional Section\n");
6742 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6743 xmlHaltParser(ctxt);
6751 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6753 if (ctxt->input->id != id) {
6754 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6755 "All markup of the conditional section is not in"
6756 " the same entity\n");
6758 if ((ctxt-> instate != XML_PARSER_EOF) &&
6759 ((ctxt->input->cur + 3) <= ctxt->input->end))
6765 * xmlParseMarkupDecl:
6766 * @ctxt: an XML parser context
6768 * parse Markup declarations
6770 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6771 * NotationDecl | PI | Comment
6773 * [ VC: Proper Declaration/PE Nesting ]
6774 * Parameter-entity replacement text must be properly nested with
6775 * markup declarations. That is to say, if either the first character
6776 * or the last character of a markup declaration (markupdecl above) is
6777 * contained in the replacement text for a parameter-entity reference,
6778 * both must be contained in the same replacement text.
6780 * [ WFC: PEs in Internal Subset ]
6781 * In the internal DTD subset, parameter-entity references can occur
6782 * only where markup declarations can occur, not within markup declarations.
6783 * (This does not apply to references that occur in external parameter
6784 * entities or to the external subset.)
6787 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6790 if (NXT(1) == '!') {
6794 xmlParseElementDecl(ctxt);
6795 else if (NXT(3) == 'N')
6796 xmlParseEntityDecl(ctxt);
6799 xmlParseAttributeListDecl(ctxt);
6802 xmlParseNotationDecl(ctxt);
6805 xmlParseComment(ctxt);
6808 /* there is an error but it will be detected later */
6811 } else if (NXT(1) == '?') {
6817 * detect requirement to exit there and act accordingly
6818 * and avoid having instate overriden later on
6820 if (ctxt->instate == XML_PARSER_EOF)
6824 * Conditional sections are allowed from entities included
6825 * by PE References in the internal subset.
6827 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6828 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6829 xmlParseConditionalSections(ctxt);
6833 ctxt->instate = XML_PARSER_DTD;
6838 * @ctxt: an XML parser context
6840 * parse an XML declaration header for external entities
6842 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6846 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6848 const xmlChar *encoding;
6851 * We know that '<?xml' is here.
6853 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6856 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6860 if (SKIP_BLANKS == 0) {
6861 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6862 "Space needed after '<?xml'\n");
6866 * We may have the VersionInfo here.
6868 version = xmlParseVersionInfo(ctxt);
6869 if (version == NULL)
6870 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6872 if (SKIP_BLANKS == 0) {
6873 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6874 "Space needed here\n");
6877 ctxt->input->version = version;
6880 * We must have the encoding declaration
6882 encoding = xmlParseEncodingDecl(ctxt);
6883 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6885 * The XML REC instructs us to stop parsing right here
6889 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6890 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6891 "Missing encoding in text declaration\n");
6895 if ((RAW == '?') && (NXT(1) == '>')) {
6897 } else if (RAW == '>') {
6898 /* Deprecated old WD ... */
6899 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6902 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6903 MOVETO_ENDTAG(CUR_PTR);
6909 * xmlParseExternalSubset:
6910 * @ctxt: an XML parser context
6911 * @ExternalID: the external identifier
6912 * @SystemID: the system identifier (or URL)
6914 * parse Markup declarations from an external subset
6916 * [30] extSubset ::= textDecl? extSubsetDecl
6918 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6921 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6922 const xmlChar *SystemID) {
6923 xmlDetectSAX2(ctxt);
6926 if ((ctxt->encoding == NULL) &&
6927 (ctxt->input->end - ctxt->input->cur >= 4)) {
6929 xmlCharEncoding enc;
6935 enc = xmlDetectCharEncoding(start, 4);
6936 if (enc != XML_CHAR_ENCODING_NONE)
6937 xmlSwitchEncoding(ctxt, enc);
6940 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6941 xmlParseTextDecl(ctxt);
6942 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6944 * The XML REC instructs us to stop parsing right here
6946 xmlHaltParser(ctxt);
6950 if (ctxt->myDoc == NULL) {
6951 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6952 if (ctxt->myDoc == NULL) {
6953 xmlErrMemory(ctxt, "New Doc failed");
6956 ctxt->myDoc->properties = XML_DOC_INTERNAL;
6958 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6959 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6961 ctxt->instate = XML_PARSER_DTD;
6964 while (((RAW == '<') && (NXT(1) == '?')) ||
6965 ((RAW == '<') && (NXT(1) == '!')) ||
6967 const xmlChar *check = CUR_PTR;
6968 unsigned int cons = ctxt->input->consumed;
6971 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6972 xmlParseConditionalSections(ctxt);
6974 xmlParseMarkupDecl(ctxt);
6977 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6978 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6984 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6990 * xmlParseReference:
6991 * @ctxt: an XML parser context
6993 * parse and handle entity references in content, depending on the SAX
6994 * interface, this may end-up in a call to character() if this is a
6995 * CharRef, a predefined entity, if there is no reference() callback.
6996 * or if the parser was asked to switch to that mode.
6998 * [67] Reference ::= EntityRef | CharRef
7001 xmlParseReference(xmlParserCtxtPtr ctxt) {
7005 xmlNodePtr list = NULL;
7006 xmlParserErrors ret = XML_ERR_OK;
7013 * Simple case of a CharRef
7015 if (NXT(1) == '#') {
7019 int value = xmlParseCharRef(ctxt);
7023 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7025 * So we are using non-UTF-8 buffers
7026 * Check that the char fit on 8bits, if not
7027 * generate a CharRef.
7029 if (value <= 0xFF) {
7032 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7033 (!ctxt->disableSAX))
7034 ctxt->sax->characters(ctxt->userData, out, 1);
7036 if ((hex == 'x') || (hex == 'X'))
7037 snprintf((char *)out, sizeof(out), "#x%X", value);
7039 snprintf((char *)out, sizeof(out), "#%d", value);
7040 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7041 (!ctxt->disableSAX))
7042 ctxt->sax->reference(ctxt->userData, out);
7046 * Just encode the value in UTF-8
7048 COPY_BUF(0 ,out, i, value);
7050 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7051 (!ctxt->disableSAX))
7052 ctxt->sax->characters(ctxt->userData, out, i);
7058 * We are seeing an entity reference
7060 ent = xmlParseEntityRef(ctxt);
7061 if (ent == NULL) return;
7062 if (!ctxt->wellFormed)
7064 was_checked = ent->checked;
7066 /* special case of predefined entities */
7067 if ((ent->name == NULL) ||
7068 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7070 if (val == NULL) return;
7072 * inline the entity.
7074 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7075 (!ctxt->disableSAX))
7076 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7081 * The first reference to the entity trigger a parsing phase
7082 * where the ent->children is filled with the result from
7084 * Note: external parsed entities will not be loaded, it is not
7085 * required for a non-validating parser, unless the parsing option
7086 * of validating, or substituting entities were given. Doing so is
7087 * far more secure as the parser will only process data coming from
7088 * the document entity by default.
7090 if (((ent->checked == 0) ||
7091 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7092 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7093 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7094 unsigned long oldnbent = ctxt->nbentities;
7097 * This is a bit hackish but this seems the best
7098 * way to make sure both SAX and DOM entity support
7102 if (ctxt->userData == ctxt)
7105 user_data = ctxt->userData;
7108 * Check that this entity is well formed
7109 * 4.3.2: An internal general parsed entity is well-formed
7110 * if its replacement text matches the production labeled
7113 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7115 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7119 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7121 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7122 user_data, ctxt->depth, ent->URI,
7123 ent->ExternalID, &list);
7126 ret = XML_ERR_ENTITY_PE_INTERNAL;
7127 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7128 "invalid entity type found\n", NULL);
7132 * Store the number of entities needing parsing for this entity
7133 * content and do checkings
7135 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7136 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7138 if (ret == XML_ERR_ENTITY_LOOP) {
7139 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7140 xmlFreeNodeList(list);
7143 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7144 xmlFreeNodeList(list);
7148 if ((ret == XML_ERR_OK) && (list != NULL)) {
7149 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7150 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7151 (ent->children == NULL)) {
7152 ent->children = list;
7153 if (ctxt->replaceEntities) {
7155 * Prune it directly in the generated document
7156 * except for single text nodes.
7158 if (((list->type == XML_TEXT_NODE) &&
7159 (list->next == NULL)) ||
7160 (ctxt->parseMode == XML_PARSE_READER)) {
7161 list->parent = (xmlNodePtr) ent;
7166 while (list != NULL) {
7167 list->parent = (xmlNodePtr) ctxt->node;
7168 list->doc = ctxt->myDoc;
7169 if (list->next == NULL)
7173 list = ent->children;
7174 #ifdef LIBXML_LEGACY_ENABLED
7175 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7176 xmlAddEntityReference(ent, list, NULL);
7177 #endif /* LIBXML_LEGACY_ENABLED */
7181 while (list != NULL) {
7182 list->parent = (xmlNodePtr) ent;
7183 xmlSetTreeDoc(list, ent->doc);
7184 if (list->next == NULL)
7190 xmlFreeNodeList(list);
7193 } else if ((ret != XML_ERR_OK) &&
7194 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7195 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7196 "Entity '%s' failed to parse\n", ent->name);
7197 xmlParserEntityCheck(ctxt, 0, ent, 0);
7198 } else if (list != NULL) {
7199 xmlFreeNodeList(list);
7202 if (ent->checked == 0)
7205 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7207 } else if (ent->checked != 1) {
7208 ctxt->nbentities += ent->checked / 2;
7212 * Now that the entity content has been gathered
7213 * provide it to the application, this can take different forms based
7214 * on the parsing modes.
7216 if (ent->children == NULL) {
7218 * Probably running in SAX mode and the callbacks don't
7219 * build the entity content. So unless we already went
7220 * though parsing for first checking go though the entity
7221 * content to generate callbacks associated to the entity
7223 if (was_checked != 0) {
7226 * This is a bit hackish but this seems the best
7227 * way to make sure both SAX and DOM entity support
7230 if (ctxt->userData == ctxt)
7233 user_data = ctxt->userData;
7235 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7237 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7238 ent->content, user_data, NULL);
7240 } else if (ent->etype ==
7241 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7243 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7244 ctxt->sax, user_data, ctxt->depth,
7245 ent->URI, ent->ExternalID, NULL);
7248 ret = XML_ERR_ENTITY_PE_INTERNAL;
7249 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7250 "invalid entity type found\n", NULL);
7252 if (ret == XML_ERR_ENTITY_LOOP) {
7253 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7257 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7258 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7260 * Entity reference callback comes second, it's somewhat
7261 * superfluous but a compatibility to historical behaviour
7263 ctxt->sax->reference(ctxt->userData, ent->name);
7269 * If we didn't get any children for the entity being built
7271 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7272 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7276 ctxt->sax->reference(ctxt->userData, ent->name);
7280 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7282 * There is a problem on the handling of _private for entities
7283 * (bug 155816): Should we copy the content of the field from
7284 * the entity (possibly overwriting some value set by the user
7285 * when a copy is created), should we leave it alone, or should
7286 * we try to take care of different situations? The problem
7287 * is exacerbated by the usage of this field by the xmlReader.
7288 * To fix this bug, we look at _private on the created node
7289 * and, if it's NULL, we copy in whatever was in the entity.
7290 * If it's not NULL we leave it alone. This is somewhat of a
7291 * hack - maybe we should have further tests to determine
7294 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7296 * Seems we are generating the DOM content, do
7297 * a simple tree copy for all references except the first
7298 * In the first occurrence list contains the replacement.
7300 if (((list == NULL) && (ent->owner == 0)) ||
7301 (ctxt->parseMode == XML_PARSE_READER)) {
7302 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7305 * We are copying here, make sure there is no abuse
7307 ctxt->sizeentcopy += ent->length + 5;
7308 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7312 * when operating on a reader, the entities definitions
7313 * are always owning the entities subtree.
7314 if (ctxt->parseMode == XML_PARSE_READER)
7318 cur = ent->children;
7319 while (cur != NULL) {
7320 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7322 if (nw->_private == NULL)
7323 nw->_private = cur->_private;
7324 if (firstChild == NULL){
7327 nw = xmlAddChild(ctxt->node, nw);
7329 if (cur == ent->last) {
7331 * needed to detect some strange empty
7332 * node cases in the reader tests
7334 if ((ctxt->parseMode == XML_PARSE_READER) &&
7336 (nw->type == XML_ELEMENT_NODE) &&
7337 (nw->children == NULL))
7344 #ifdef LIBXML_LEGACY_ENABLED
7345 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7346 xmlAddEntityReference(ent, firstChild, nw);
7347 #endif /* LIBXML_LEGACY_ENABLED */
7348 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7349 xmlNodePtr nw = NULL, cur, next, last,
7353 * We are copying here, make sure there is no abuse
7355 ctxt->sizeentcopy += ent->length + 5;
7356 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7360 * Copy the entity child list and make it the new
7361 * entity child list. The goal is to make sure any
7362 * ID or REF referenced will be the one from the
7363 * document content and not the entity copy.
7365 cur = ent->children;
7366 ent->children = NULL;
7369 while (cur != NULL) {
7373 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7375 if (nw->_private == NULL)
7376 nw->_private = cur->_private;
7377 if (firstChild == NULL){
7380 xmlAddChild((xmlNodePtr) ent, nw);
7381 xmlAddChild(ctxt->node, cur);
7387 if (ent->owner == 0)
7389 #ifdef LIBXML_LEGACY_ENABLED
7390 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7391 xmlAddEntityReference(ent, firstChild, nw);
7392 #endif /* LIBXML_LEGACY_ENABLED */
7394 const xmlChar *nbktext;
7397 * the name change is to avoid coalescing of the
7398 * node with a possible previous text one which
7399 * would make ent->children a dangling pointer
7401 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7403 if (ent->children->type == XML_TEXT_NODE)
7404 ent->children->name = nbktext;
7405 if ((ent->last != ent->children) &&
7406 (ent->last->type == XML_TEXT_NODE))
7407 ent->last->name = nbktext;
7408 xmlAddChildList(ctxt->node, ent->children);
7412 * This is to avoid a nasty side effect, see
7413 * characters() in SAX.c
7423 * xmlParseEntityRef:
7424 * @ctxt: an XML parser context
7426 * parse ENTITY references declarations
7428 * [68] EntityRef ::= '&' Name ';'
7430 * [ WFC: Entity Declared ]
7431 * In a document without any DTD, a document with only an internal DTD
7432 * subset which contains no parameter entity references, or a document
7433 * with "standalone='yes'", the Name given in the entity reference
7434 * must match that in an entity declaration, except that well-formed
7435 * documents need not declare any of the following entities: amp, lt,
7436 * gt, apos, quot. The declaration of a parameter entity must precede
7437 * any reference to it. Similarly, the declaration of a general entity
7438 * must precede any reference to it which appears in a default value in an
7439 * attribute-list declaration. Note that if entities are declared in the
7440 * external subset or in external parameter entities, a non-validating
7441 * processor is not obligated to read and process their declarations;
7442 * for such documents, the rule that an entity must be declared is a
7443 * well-formedness constraint only if standalone='yes'.
7445 * [ WFC: Parsed Entity ]
7446 * An entity reference must not contain the name of an unparsed entity
7448 * Returns the xmlEntityPtr if found, or NULL otherwise.
7451 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7452 const xmlChar *name;
7453 xmlEntityPtr ent = NULL;
7456 if (ctxt->instate == XML_PARSER_EOF)
7462 name = xmlParseName(ctxt);
7464 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7465 "xmlParseEntityRef: no name\n");
7469 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7475 * Predefined entities override any extra definition
7477 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7478 ent = xmlGetPredefinedEntity(name);
7484 * Increase the number of entity references parsed
7489 * Ask first SAX for entity resolution, otherwise try the
7490 * entities which may have stored in the parser context.
7492 if (ctxt->sax != NULL) {
7493 if (ctxt->sax->getEntity != NULL)
7494 ent = ctxt->sax->getEntity(ctxt->userData, name);
7495 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7496 (ctxt->options & XML_PARSE_OLDSAX))
7497 ent = xmlGetPredefinedEntity(name);
7498 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7499 (ctxt->userData==ctxt)) {
7500 ent = xmlSAX2GetEntity(ctxt, name);
7503 if (ctxt->instate == XML_PARSER_EOF)
7506 * [ WFC: Entity Declared ]
7507 * In a document without any DTD, a document with only an
7508 * internal DTD subset which contains no parameter entity
7509 * references, or a document with "standalone='yes'", the
7510 * Name given in the entity reference must match that in an
7511 * entity declaration, except that well-formed documents
7512 * need not declare any of the following entities: amp, lt,
7514 * The declaration of a parameter entity must precede any
7516 * Similarly, the declaration of a general entity must
7517 * precede any reference to it which appears in a default
7518 * value in an attribute-list declaration. Note that if
7519 * entities are declared in the external subset or in
7520 * external parameter entities, a non-validating processor
7521 * is not obligated to read and process their declarations;
7522 * for such documents, the rule that an entity must be
7523 * declared is a well-formedness constraint only if
7527 if ((ctxt->standalone == 1) ||
7528 ((ctxt->hasExternalSubset == 0) &&
7529 (ctxt->hasPErefs == 0))) {
7530 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7531 "Entity '%s' not defined\n", name);
7533 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7534 "Entity '%s' not defined\n", name);
7535 if ((ctxt->inSubset == 0) &&
7536 (ctxt->sax != NULL) &&
7537 (ctxt->sax->reference != NULL)) {
7538 ctxt->sax->reference(ctxt->userData, name);
7541 xmlParserEntityCheck(ctxt, 0, ent, 0);
7546 * [ WFC: Parsed Entity ]
7547 * An entity reference must not contain the name of an
7550 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7551 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7552 "Entity reference to unparsed entity %s\n", name);
7556 * [ WFC: No External Entity References ]
7557 * Attribute values cannot contain direct or indirect
7558 * entity references to external entities.
7560 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7561 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7562 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7563 "Attribute references external entity '%s'\n", name);
7566 * [ WFC: No < in Attribute Values ]
7567 * The replacement text of any entity referred to directly or
7568 * indirectly in an attribute value (other than "<") must
7571 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7573 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7574 if (((ent->checked & 1) || (ent->checked == 0)) &&
7575 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7576 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7577 "'<' in entity '%s' is not allowed in attributes values\n", name);
7582 * Internal check, no parameter entities here ...
7585 switch (ent->etype) {
7586 case XML_INTERNAL_PARAMETER_ENTITY:
7587 case XML_EXTERNAL_PARAMETER_ENTITY:
7588 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7589 "Attempt to reference the parameter entity '%s'\n",
7598 * [ WFC: No Recursion ]
7599 * A parsed entity must not contain a recursive reference
7600 * to itself, either directly or indirectly.
7601 * Done somewhere else
7607 * xmlParseStringEntityRef:
7608 * @ctxt: an XML parser context
7609 * @str: a pointer to an index in the string
7611 * parse ENTITY references declarations, but this version parses it from
7614 * [68] EntityRef ::= '&' Name ';'
7616 * [ WFC: Entity Declared ]
7617 * In a document without any DTD, a document with only an internal DTD
7618 * subset which contains no parameter entity references, or a document
7619 * with "standalone='yes'", the Name given in the entity reference
7620 * must match that in an entity declaration, except that well-formed
7621 * documents need not declare any of the following entities: amp, lt,
7622 * gt, apos, quot. The declaration of a parameter entity must precede
7623 * any reference to it. Similarly, the declaration of a general entity
7624 * must precede any reference to it which appears in a default value in an
7625 * attribute-list declaration. Note that if entities are declared in the
7626 * external subset or in external parameter entities, a non-validating
7627 * processor is not obligated to read and process their declarations;
7628 * for such documents, the rule that an entity must be declared is a
7629 * well-formedness constraint only if standalone='yes'.
7631 * [ WFC: Parsed Entity ]
7632 * An entity reference must not contain the name of an unparsed entity
7634 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7635 * is updated to the current location in the string.
7638 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7642 xmlEntityPtr ent = NULL;
7644 if ((str == NULL) || (*str == NULL))
7652 name = xmlParseStringName(ctxt, &ptr);
7654 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7655 "xmlParseStringEntityRef: no name\n");
7660 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7669 * Predefined entities override any extra definition
7671 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7672 ent = xmlGetPredefinedEntity(name);
7681 * Increate the number of entity references parsed
7686 * Ask first SAX for entity resolution, otherwise try the
7687 * entities which may have stored in the parser context.
7689 if (ctxt->sax != NULL) {
7690 if (ctxt->sax->getEntity != NULL)
7691 ent = ctxt->sax->getEntity(ctxt->userData, name);
7692 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7693 ent = xmlGetPredefinedEntity(name);
7694 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7695 ent = xmlSAX2GetEntity(ctxt, name);
7698 if (ctxt->instate == XML_PARSER_EOF) {
7704 * [ WFC: Entity Declared ]
7705 * In a document without any DTD, a document with only an
7706 * internal DTD subset which contains no parameter entity
7707 * references, or a document with "standalone='yes'", the
7708 * Name given in the entity reference must match that in an
7709 * entity declaration, except that well-formed documents
7710 * need not declare any of the following entities: amp, lt,
7712 * The declaration of a parameter entity must precede any
7714 * Similarly, the declaration of a general entity must
7715 * precede any reference to it which appears in a default
7716 * value in an attribute-list declaration. Note that if
7717 * entities are declared in the external subset or in
7718 * external parameter entities, a non-validating processor
7719 * is not obligated to read and process their declarations;
7720 * for such documents, the rule that an entity must be
7721 * declared is a well-formedness constraint only if
7725 if ((ctxt->standalone == 1) ||
7726 ((ctxt->hasExternalSubset == 0) &&
7727 (ctxt->hasPErefs == 0))) {
7728 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7729 "Entity '%s' not defined\n", name);
7731 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7732 "Entity '%s' not defined\n",
7735 xmlParserEntityCheck(ctxt, 0, ent, 0);
7736 /* TODO ? check regressions ctxt->valid = 0; */
7740 * [ WFC: Parsed Entity ]
7741 * An entity reference must not contain the name of an
7744 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7745 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7746 "Entity reference to unparsed entity %s\n", name);
7750 * [ WFC: No External Entity References ]
7751 * Attribute values cannot contain direct or indirect
7752 * entity references to external entities.
7754 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7755 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7756 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7757 "Attribute references external entity '%s'\n", name);
7760 * [ WFC: No < in Attribute Values ]
7761 * The replacement text of any entity referred to directly or
7762 * indirectly in an attribute value (other than "<") must
7765 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7766 (ent != NULL) && (ent->content != NULL) &&
7767 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7768 (xmlStrchr(ent->content, '<'))) {
7769 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7770 "'<' in entity '%s' is not allowed in attributes values\n",
7775 * Internal check, no parameter entities here ...
7778 switch (ent->etype) {
7779 case XML_INTERNAL_PARAMETER_ENTITY:
7780 case XML_EXTERNAL_PARAMETER_ENTITY:
7781 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7782 "Attempt to reference the parameter entity '%s'\n",
7791 * [ WFC: No Recursion ]
7792 * A parsed entity must not contain a recursive reference
7793 * to itself, either directly or indirectly.
7794 * Done somewhere else
7803 * xmlParsePEReference:
7804 * @ctxt: an XML parser context
7806 * parse PEReference declarations
7807 * The entity content is handled directly by pushing it's content as
7808 * a new input stream.
7810 * [69] PEReference ::= '%' Name ';'
7812 * [ WFC: No Recursion ]
7813 * A parsed entity must not contain a recursive
7814 * reference to itself, either directly or indirectly.
7816 * [ WFC: Entity Declared ]
7817 * In a document without any DTD, a document with only an internal DTD
7818 * subset which contains no parameter entity references, or a document
7819 * with "standalone='yes'", ... ... The declaration of a parameter
7820 * entity must precede any reference to it...
7822 * [ VC: Entity Declared ]
7823 * In a document with an external subset or external parameter entities
7824 * with "standalone='no'", ... ... The declaration of a parameter entity
7825 * must precede any reference to it...
7828 * Parameter-entity references may only appear in the DTD.
7829 * NOTE: misleading but this is handled.
7832 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7834 const xmlChar *name;
7835 xmlEntityPtr entity = NULL;
7836 xmlParserInputPtr input;
7841 name = xmlParseName(ctxt);
7843 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7846 if (xmlParserDebugEntities)
7847 xmlGenericError(xmlGenericErrorContext,
7848 "PEReference: %s\n", name);
7850 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7857 * Increate the number of entity references parsed
7862 * Request the entity from SAX
7864 if ((ctxt->sax != NULL) &&
7865 (ctxt->sax->getParameterEntity != NULL))
7866 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7867 if (ctxt->instate == XML_PARSER_EOF)
7869 if (entity == NULL) {
7871 * [ WFC: Entity Declared ]
7872 * In a document without any DTD, a document with only an
7873 * internal DTD subset which contains no parameter entity
7874 * references, or a document with "standalone='yes'", ...
7875 * ... The declaration of a parameter entity must precede
7876 * any reference to it...
7878 if ((ctxt->standalone == 1) ||
7879 ((ctxt->hasExternalSubset == 0) &&
7880 (ctxt->hasPErefs == 0))) {
7881 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7882 "PEReference: %%%s; not found\n",
7886 * [ VC: Entity Declared ]
7887 * In a document with an external subset or external
7888 * parameter entities with "standalone='no'", ...
7889 * ... The declaration of a parameter entity must
7890 * precede any reference to it...
7892 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7893 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7894 "PEReference: %%%s; not found\n",
7897 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7898 "PEReference: %%%s; not found\n",
7902 xmlParserEntityCheck(ctxt, 0, NULL, 0);
7905 * Internal checking in case the entity quest barfed
7907 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7908 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7909 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7910 "Internal: %%%s; is not a parameter entity\n",
7914 xmlCharEncoding enc;
7916 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7917 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7918 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7919 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7920 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7921 (ctxt->replaceEntities == 0) &&
7922 (ctxt->validate == 0))
7925 input = xmlNewEntityInputStream(ctxt, entity);
7926 if (xmlPushInput(ctxt, input) < 0) {
7927 xmlFreeInputStream(input);
7931 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7933 * Get the 4 first bytes and decode the charset
7934 * if enc != XML_CHAR_ENCODING_NONE
7935 * plug some encoding conversion routines.
7936 * Note that, since we may have some non-UTF8
7937 * encoding (like UTF16, bug 135229), the 'length'
7938 * is not known, but we can calculate based upon
7939 * the amount of data in the buffer.
7942 if (ctxt->instate == XML_PARSER_EOF)
7944 if ((ctxt->input->end - ctxt->input->cur)>=4) {
7949 enc = xmlDetectCharEncoding(start, 4);
7950 if (enc != XML_CHAR_ENCODING_NONE) {
7951 xmlSwitchEncoding(ctxt, enc);
7955 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7956 (IS_BLANK_CH(NXT(5)))) {
7957 xmlParseTextDecl(ctxt);
7962 ctxt->hasPErefs = 1;
7966 * xmlLoadEntityContent:
7967 * @ctxt: an XML parser context
7968 * @entity: an unloaded system entity
7970 * Load the original content of the given system entity from the
7971 * ExternalID/SystemID given. This is to be used for Included in Literal
7972 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7974 * Returns 0 in case of success and -1 in case of failure
7977 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7978 xmlParserInputPtr input;
7983 if ((ctxt == NULL) || (entity == NULL) ||
7984 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7985 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7986 (entity->content != NULL)) {
7987 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7988 "xmlLoadEntityContent parameter error");
7992 if (xmlParserDebugEntities)
7993 xmlGenericError(xmlGenericErrorContext,
7994 "Reading %s entity content input\n", entity->name);
7996 buf = xmlBufferCreate();
7998 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7999 "xmlLoadEntityContent parameter error");
8003 input = xmlNewEntityInputStream(ctxt, entity);
8004 if (input == NULL) {
8005 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8006 "xmlLoadEntityContent input error");
8012 * Push the entity as the current input, read char by char
8013 * saving to the buffer until the end of the entity or an error
8015 if (xmlPushInput(ctxt, input) < 0) {
8022 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8024 xmlBufferAdd(buf, ctxt->input->cur, l);
8025 if (count++ > XML_PARSER_CHUNK_SIZE) {
8028 if (ctxt->instate == XML_PARSER_EOF) {
8038 if (ctxt->instate == XML_PARSER_EOF) {
8046 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8048 } else if (!IS_CHAR(c)) {
8049 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8050 "xmlLoadEntityContent: invalid char value %d\n",
8055 entity->content = buf->content;
8056 buf->content = NULL;
8063 * xmlParseStringPEReference:
8064 * @ctxt: an XML parser context
8065 * @str: a pointer to an index in the string
8067 * parse PEReference declarations
8069 * [69] PEReference ::= '%' Name ';'
8071 * [ WFC: No Recursion ]
8072 * A parsed entity must not contain a recursive
8073 * reference to itself, either directly or indirectly.
8075 * [ WFC: Entity Declared ]
8076 * In a document without any DTD, a document with only an internal DTD
8077 * subset which contains no parameter entity references, or a document
8078 * with "standalone='yes'", ... ... The declaration of a parameter
8079 * entity must precede any reference to it...
8081 * [ VC: Entity Declared ]
8082 * In a document with an external subset or external parameter entities
8083 * with "standalone='no'", ... ... The declaration of a parameter entity
8084 * must precede any reference to it...
8087 * Parameter-entity references may only appear in the DTD.
8088 * NOTE: misleading but this is handled.
8090 * Returns the string of the entity content.
8091 * str is updated to the current value of the index
8094 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8098 xmlEntityPtr entity = NULL;
8100 if ((str == NULL) || (*str == NULL)) return(NULL);
8106 name = xmlParseStringName(ctxt, &ptr);
8108 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8109 "xmlParseStringPEReference: no name\n");
8115 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8123 * Increate the number of entity references parsed
8128 * Request the entity from SAX
8130 if ((ctxt->sax != NULL) &&
8131 (ctxt->sax->getParameterEntity != NULL))
8132 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8133 if (ctxt->instate == XML_PARSER_EOF) {
8138 if (entity == NULL) {
8140 * [ WFC: Entity Declared ]
8141 * In a document without any DTD, a document with only an
8142 * internal DTD subset which contains no parameter entity
8143 * references, or a document with "standalone='yes'", ...
8144 * ... The declaration of a parameter entity must precede
8145 * any reference to it...
8147 if ((ctxt->standalone == 1) ||
8148 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8149 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8150 "PEReference: %%%s; not found\n", name);
8153 * [ VC: Entity Declared ]
8154 * In a document with an external subset or external
8155 * parameter entities with "standalone='no'", ...
8156 * ... The declaration of a parameter entity must
8157 * precede any reference to it...
8159 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8160 "PEReference: %%%s; not found\n",
8164 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8167 * Internal checking in case the entity quest barfed
8169 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8170 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8171 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8172 "%%%s; is not a parameter entity\n",
8176 ctxt->hasPErefs = 1;
8183 * xmlParseDocTypeDecl:
8184 * @ctxt: an XML parser context
8186 * parse a DOCTYPE declaration
8188 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8189 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8191 * [ VC: Root Element Type ]
8192 * The Name in the document type declaration must match the element
8193 * type of the root element.
8197 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8198 const xmlChar *name = NULL;
8199 xmlChar *ExternalID = NULL;
8200 xmlChar *URI = NULL;
8203 * We know that '<!DOCTYPE' has been detected.
8210 * Parse the DOCTYPE name.
8212 name = xmlParseName(ctxt);
8214 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8215 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8217 ctxt->intSubName = name;
8222 * Check for SystemID and ExternalID
8224 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8226 if ((URI != NULL) || (ExternalID != NULL)) {
8227 ctxt->hasExternalSubset = 1;
8229 ctxt->extSubURI = URI;
8230 ctxt->extSubSystem = ExternalID;
8235 * Create and update the internal subset.
8237 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8238 (!ctxt->disableSAX))
8239 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8240 if (ctxt->instate == XML_PARSER_EOF)
8244 * Is there any internal subset declarations ?
8245 * they are handled separately in xmlParseInternalSubset()
8251 * We should be at the end of the DOCTYPE declaration.
8254 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8260 * xmlParseInternalSubset:
8261 * @ctxt: an XML parser context
8263 * parse the internal subset declaration
8265 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8269 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8271 * Is there any DTD definition ?
8274 int baseInputNr = ctxt->inputNr;
8275 ctxt->instate = XML_PARSER_DTD;
8278 * Parse the succession of Markup declarations and
8280 * Subsequence (markupdecl | PEReference | S)*
8282 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8283 (ctxt->instate != XML_PARSER_EOF)) {
8284 const xmlChar *check = CUR_PTR;
8285 unsigned int cons = ctxt->input->consumed;
8288 xmlParseMarkupDecl(ctxt);
8289 xmlParsePEReference(ctxt);
8291 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8292 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8293 "xmlParseInternalSubset: error detected in Markup declaration\n");
8294 if (ctxt->inputNr > baseInputNr)
8307 * We should be at the end of the DOCTYPE declaration.
8310 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8316 #ifdef LIBXML_SAX1_ENABLED
8318 * xmlParseAttribute:
8319 * @ctxt: an XML parser context
8320 * @value: a xmlChar ** used to store the value of the attribute
8322 * parse an attribute
8324 * [41] Attribute ::= Name Eq AttValue
8326 * [ WFC: No External Entity References ]
8327 * Attribute values cannot contain direct or indirect entity references
8328 * to external entities.
8330 * [ WFC: No < in Attribute Values ]
8331 * The replacement text of any entity referred to directly or indirectly in
8332 * an attribute value (other than "<") must not contain a <.
8334 * [ VC: Attribute Value Type ]
8335 * The attribute must have been declared; the value must be of the type
8338 * [25] Eq ::= S? '=' S?
8342 * [NS 11] Attribute ::= QName Eq AttValue
8344 * Also the case QName == xmlns:??? is handled independently as a namespace
8347 * Returns the attribute name, and the value in *value.
8351 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8352 const xmlChar *name;
8357 name = xmlParseName(ctxt);
8359 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8360 "error parsing attribute name\n");
8371 val = xmlParseAttValue(ctxt);
8372 ctxt->instate = XML_PARSER_CONTENT;
8374 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8375 "Specification mandates value for attribute %s\n", name);
8380 * Check that xml:lang conforms to the specification
8381 * No more registered as an error, just generate a warning now
8382 * since this was deprecated in XML second edition
8384 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8385 if (!xmlCheckLanguageID(val)) {
8386 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8387 "Malformed value for xml:lang : %s\n",
8393 * Check that xml:space conforms to the specification
8395 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8396 if (xmlStrEqual(val, BAD_CAST "default"))
8398 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8401 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8402 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8413 * @ctxt: an XML parser context
8415 * parse a start of tag either for rule element or
8416 * EmptyElement. In both case we don't parse the tag closing chars.
8418 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8420 * [ WFC: Unique Att Spec ]
8421 * No attribute name may appear more than once in the same start-tag or
8422 * empty-element tag.
8424 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8426 * [ WFC: Unique Att Spec ]
8427 * No attribute name may appear more than once in the same start-tag or
8428 * empty-element tag.
8432 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8434 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8436 * Returns the element name parsed
8440 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8441 const xmlChar *name;
8442 const xmlChar *attname;
8444 const xmlChar **atts = ctxt->atts;
8446 int maxatts = ctxt->maxatts;
8449 if (RAW != '<') return(NULL);
8452 name = xmlParseName(ctxt);
8454 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8455 "xmlParseStartTag: invalid element name\n");
8460 * Now parse the attributes, it ends up with the ending
8467 while (((RAW != '>') &&
8468 ((RAW != '/') || (NXT(1) != '>')) &&
8469 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8470 const xmlChar *q = CUR_PTR;
8471 unsigned int cons = ctxt->input->consumed;
8473 attname = xmlParseAttribute(ctxt, &attvalue);
8474 if ((attname != NULL) && (attvalue != NULL)) {
8476 * [ WFC: Unique Att Spec ]
8477 * No attribute name may appear more than once in the same
8478 * start-tag or empty-element tag.
8480 for (i = 0; i < nbatts;i += 2) {
8481 if (xmlStrEqual(atts[i], attname)) {
8482 xmlErrAttributeDup(ctxt, NULL, attname);
8488 * Add the pair to atts
8491 maxatts = 22; /* allow for 10 attrs by default */
8492 atts = (const xmlChar **)
8493 xmlMalloc(maxatts * sizeof(xmlChar *));
8495 xmlErrMemory(ctxt, NULL);
8496 if (attvalue != NULL)
8501 ctxt->maxatts = maxatts;
8502 } else if (nbatts + 4 > maxatts) {
8506 n = (const xmlChar **) xmlRealloc((void *) atts,
8507 maxatts * sizeof(const xmlChar *));
8509 xmlErrMemory(ctxt, NULL);
8510 if (attvalue != NULL)
8516 ctxt->maxatts = maxatts;
8518 atts[nbatts++] = attname;
8519 atts[nbatts++] = attvalue;
8520 atts[nbatts] = NULL;
8521 atts[nbatts + 1] = NULL;
8523 if (attvalue != NULL)
8530 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8532 if (SKIP_BLANKS == 0) {
8533 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8534 "attributes construct error\n");
8536 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8537 (attname == NULL) && (attvalue == NULL)) {
8538 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8539 "xmlParseStartTag: problem parsing attributes\n");
8547 * SAX: Start of Element !
8549 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8550 (!ctxt->disableSAX)) {
8552 ctxt->sax->startElement(ctxt->userData, name, atts);
8554 ctxt->sax->startElement(ctxt->userData, name, NULL);
8558 /* Free only the content strings */
8559 for (i = 1;i < nbatts;i+=2)
8560 if (atts[i] != NULL)
8561 xmlFree((xmlChar *) atts[i]);
8568 * @ctxt: an XML parser context
8569 * @line: line of the start tag
8570 * @nsNr: number of namespaces on the start tag
8572 * parse an end of tag
8574 * [42] ETag ::= '</' Name S? '>'
8578 * [NS 9] ETag ::= '</' QName S? '>'
8582 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8583 const xmlChar *name;
8586 if ((RAW != '<') || (NXT(1) != '/')) {
8587 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8588 "xmlParseEndTag: '</' not found\n");
8593 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8596 * We should definitely be at the ending "S? '>'" part
8600 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8601 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8606 * [ WFC: Element Type Match ]
8607 * The Name in an element's end-tag must match the element type in the
8611 if (name != (xmlChar*)1) {
8612 if (name == NULL) name = BAD_CAST "unparseable";
8613 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8614 "Opening and ending tag mismatch: %s line %d and %s\n",
8615 ctxt->name, line, name);
8621 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8622 (!ctxt->disableSAX))
8623 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8632 * @ctxt: an XML parser context
8634 * parse an end of tag
8636 * [42] ETag ::= '</' Name S? '>'
8640 * [NS 9] ETag ::= '</' QName S? '>'
8644 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8645 xmlParseEndTag1(ctxt, 0);
8647 #endif /* LIBXML_SAX1_ENABLED */
8649 /************************************************************************
8651 * SAX 2 specific operations *
8653 ************************************************************************/
8657 * @ctxt: an XML parser context
8658 * @prefix: the prefix to lookup
8660 * Lookup the namespace name for the @prefix (which ca be NULL)
8661 * The prefix must come from the @ctxt->dict dictionary
8663 * Returns the namespace name or NULL if not bound
8665 static const xmlChar *
8666 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8669 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8670 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8671 if (ctxt->nsTab[i] == prefix) {
8672 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8674 return(ctxt->nsTab[i + 1]);
8681 * @ctxt: an XML parser context
8682 * @prefix: pointer to store the prefix part
8684 * parse an XML Namespace QName
8686 * [6] QName ::= (Prefix ':')? LocalPart
8687 * [7] Prefix ::= NCName
8688 * [8] LocalPart ::= NCName
8690 * Returns the Name parsed or NULL
8693 static const xmlChar *
8694 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8695 const xmlChar *l, *p;
8699 l = xmlParseNCName(ctxt);
8702 l = xmlParseName(ctxt);
8704 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8705 "Failed to parse QName '%s'\n", l, NULL, NULL);
8715 l = xmlParseNCName(ctxt);
8719 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8720 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8721 l = xmlParseNmtoken(ctxt);
8723 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8725 tmp = xmlBuildQName(l, p, NULL, 0);
8728 p = xmlDictLookup(ctxt->dict, tmp, -1);
8729 if (tmp != NULL) xmlFree(tmp);
8736 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8737 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8739 tmp = (xmlChar *) xmlParseName(ctxt);
8741 tmp = xmlBuildQName(tmp, l, NULL, 0);
8742 l = xmlDictLookup(ctxt->dict, tmp, -1);
8743 if (tmp != NULL) xmlFree(tmp);
8747 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8748 l = xmlDictLookup(ctxt->dict, tmp, -1);
8749 if (tmp != NULL) xmlFree(tmp);
8760 * xmlParseQNameAndCompare:
8761 * @ctxt: an XML parser context
8762 * @name: the localname
8763 * @prefix: the prefix, if any.
8765 * parse an XML name and compares for match
8766 * (specialized for endtag parsing)
8768 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8769 * and the name for mismatch
8772 static const xmlChar *
8773 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8774 xmlChar const *prefix) {
8778 const xmlChar *prefix2;
8780 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8783 in = ctxt->input->cur;
8786 while (*in != 0 && *in == *cmp) {
8790 if ((*cmp == 0) && (*in == ':')) {
8793 while (*in != 0 && *in == *cmp) {
8797 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8799 ctxt->input->cur = in;
8800 return((const xmlChar*) 1);
8804 * all strings coms from the dictionary, equality can be done directly
8806 ret = xmlParseQName (ctxt, &prefix2);
8807 if ((ret == name) && (prefix == prefix2))
8808 return((const xmlChar*) 1);
8813 * xmlParseAttValueInternal:
8814 * @ctxt: an XML parser context
8815 * @len: attribute len result
8816 * @alloc: whether the attribute was reallocated as a new string
8817 * @normalize: if 1 then further non-CDATA normalization must be done
8819 * parse a value for an attribute.
8820 * NOTE: if no normalization is needed, the routine will return pointers
8821 * directly from the data buffer.
8823 * 3.3.3 Attribute-Value Normalization:
8824 * Before the value of an attribute is passed to the application or
8825 * checked for validity, the XML processor must normalize it as follows:
8826 * - a character reference is processed by appending the referenced
8827 * character to the attribute value
8828 * - an entity reference is processed by recursively processing the
8829 * replacement text of the entity
8830 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8831 * appending #x20 to the normalized value, except that only a single
8832 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8833 * parsed entity or the literal entity value of an internal parsed entity
8834 * - other characters are processed by appending them to the normalized value
8835 * If the declared value is not CDATA, then the XML processor must further
8836 * process the normalized attribute value by discarding any leading and
8837 * trailing space (#x20) characters, and by replacing sequences of space
8838 * (#x20) characters by a single space (#x20) character.
8839 * All attributes for which no declaration has been read should be treated
8840 * by a non-validating parser as if declared CDATA.
8842 * Returns the AttValue parsed or NULL. The value has to be freed by the
8843 * caller if it was copied, this can be detected by val[*len] == 0.
8847 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8851 const xmlChar *in = NULL, *start, *end, *last;
8852 xmlChar *ret = NULL;
8856 in = (xmlChar *) CUR_PTR;
8857 line = ctxt->input->line;
8858 col = ctxt->input->col;
8859 if (*in != '"' && *in != '\'') {
8860 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8863 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8866 * try to handle in this routine the most common case where no
8867 * allocation of a new string is required and where content is
8872 end = ctxt->input->end;
8875 const xmlChar *oldbase = ctxt->input->base;
8877 if (oldbase != ctxt->input->base) {
8878 long delta = ctxt->input->base - oldbase;
8879 start = start + delta;
8882 end = ctxt->input->end;
8886 * Skip any leading spaces
8888 while ((in < end) && (*in != limit) &&
8889 ((*in == 0x20) || (*in == 0x9) ||
8890 (*in == 0xA) || (*in == 0xD))) {
8899 const xmlChar *oldbase = ctxt->input->base;
8901 if (ctxt->instate == XML_PARSER_EOF)
8903 if (oldbase != ctxt->input->base) {
8904 long delta = ctxt->input->base - oldbase;
8905 start = start + delta;
8908 end = ctxt->input->end;
8909 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8910 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8911 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8912 "AttValue length too long\n");
8917 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8918 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8920 if ((*in++ == 0x20) && (*in == 0x20)) break;
8922 const xmlChar *oldbase = ctxt->input->base;
8924 if (ctxt->instate == XML_PARSER_EOF)
8926 if (oldbase != ctxt->input->base) {
8927 long delta = ctxt->input->base - oldbase;
8928 start = start + delta;
8931 end = ctxt->input->end;
8932 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8933 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8934 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8935 "AttValue length too long\n");
8942 * skip the trailing blanks
8944 while ((last[-1] == 0x20) && (last > start)) last--;
8945 while ((in < end) && (*in != limit) &&
8946 ((*in == 0x20) || (*in == 0x9) ||
8947 (*in == 0xA) || (*in == 0xD))) {
8955 const xmlChar *oldbase = ctxt->input->base;
8957 if (ctxt->instate == XML_PARSER_EOF)
8959 if (oldbase != ctxt->input->base) {
8960 long delta = ctxt->input->base - oldbase;
8961 start = start + delta;
8963 last = last + delta;
8965 end = ctxt->input->end;
8966 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8967 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8968 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8969 "AttValue length too long\n");
8974 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8975 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8976 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8977 "AttValue length too long\n");
8980 if (*in != limit) goto need_complex;
8982 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8983 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8987 const xmlChar *oldbase = ctxt->input->base;
8989 if (ctxt->instate == XML_PARSER_EOF)
8991 if (oldbase != ctxt->input->base) {
8992 long delta = ctxt->input->base - oldbase;
8993 start = start + delta;
8996 end = ctxt->input->end;
8997 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8998 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8999 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9000 "AttValue length too long\n");
9006 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9007 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9008 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9009 "AttValue length too long\n");
9012 if (*in != limit) goto need_complex;
9017 *len = last - start;
9018 ret = (xmlChar *) start;
9020 if (alloc) *alloc = 1;
9021 ret = xmlStrndup(start, last - start);
9024 ctxt->input->line = line;
9025 ctxt->input->col = col;
9026 if (alloc) *alloc = 0;
9029 if (alloc) *alloc = 1;
9030 return xmlParseAttValueComplex(ctxt, len, normalize);
9034 * xmlParseAttribute2:
9035 * @ctxt: an XML parser context
9036 * @pref: the element prefix
9037 * @elem: the element name
9038 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9039 * @value: a xmlChar ** used to store the value of the attribute
9040 * @len: an int * to save the length of the attribute
9041 * @alloc: an int * to indicate if the attribute was allocated
9043 * parse an attribute in the new SAX2 framework.
9045 * Returns the attribute name, and the value in *value, .
9048 static const xmlChar *
9049 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9050 const xmlChar * pref, const xmlChar * elem,
9051 const xmlChar ** prefix, xmlChar ** value,
9052 int *len, int *alloc)
9054 const xmlChar *name;
9055 xmlChar *val, *internal_val = NULL;
9060 name = xmlParseQName(ctxt, prefix);
9062 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9063 "error parsing attribute name\n");
9068 * get the type if needed
9070 if (ctxt->attsSpecial != NULL) {
9073 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
9074 pref, elem, *prefix, name);
9086 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9089 * Sometimes a second normalisation pass for spaces is needed
9090 * but that only happens if charrefs or entities refernces
9091 * have been used in the attribute value, i.e. the attribute
9092 * value have been extracted in an allocated string already.
9095 const xmlChar *val2;
9097 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9098 if ((val2 != NULL) && (val2 != val)) {
9100 val = (xmlChar *) val2;
9104 ctxt->instate = XML_PARSER_CONTENT;
9106 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9107 "Specification mandates value for attribute %s\n",
9112 if (*prefix == ctxt->str_xml) {
9114 * Check that xml:lang conforms to the specification
9115 * No more registered as an error, just generate a warning now
9116 * since this was deprecated in XML second edition
9118 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9119 internal_val = xmlStrndup(val, *len);
9120 if (!xmlCheckLanguageID(internal_val)) {
9121 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9122 "Malformed value for xml:lang : %s\n",
9123 internal_val, NULL);
9128 * Check that xml:space conforms to the specification
9130 if (xmlStrEqual(name, BAD_CAST "space")) {
9131 internal_val = xmlStrndup(val, *len);
9132 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9134 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9137 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9138 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9139 internal_val, NULL);
9143 xmlFree(internal_val);
9151 * xmlParseStartTag2:
9152 * @ctxt: an XML parser context
9154 * parse a start of tag either for rule element or
9155 * EmptyElement. In both case we don't parse the tag closing chars.
9156 * This routine is called when running SAX2 parsing
9158 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9160 * [ WFC: Unique Att Spec ]
9161 * No attribute name may appear more than once in the same start-tag or
9162 * empty-element tag.
9164 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9166 * [ WFC: Unique Att Spec ]
9167 * No attribute name may appear more than once in the same start-tag or
9168 * empty-element tag.
9172 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9174 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9176 * Returns the element name parsed
9179 static const xmlChar *
9180 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9181 const xmlChar **URI, int *tlen) {
9182 const xmlChar *localname;
9183 const xmlChar *prefix;
9184 const xmlChar *attname;
9185 const xmlChar *aprefix;
9186 const xmlChar *nsname;
9188 const xmlChar **atts = ctxt->atts;
9189 int maxatts = ctxt->maxatts;
9190 int nratts, nbatts, nbdef, inputid;
9191 int i, j, nbNs, attval;
9193 int nsNr = ctxt->nsNr;
9195 if (RAW != '<') return(NULL);
9199 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9200 * point since the attribute values may be stored as pointers to
9201 * the buffer and calling SHRINK would destroy them !
9202 * The Shrinking is only possible once the full set of attribute
9203 * callbacks have been done.
9206 cur = ctxt->input->cur - ctxt->input->base;
9207 inputid = ctxt->input->id;
9213 /* Forget any namespaces added during an earlier parse of this element. */
9216 localname = xmlParseQName(ctxt, &prefix);
9217 if (localname == NULL) {
9218 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9219 "StartTag: invalid element name\n");
9222 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9225 * Now parse the attributes, it ends up with the ending
9232 while (((RAW != '>') &&
9233 ((RAW != '/') || (NXT(1) != '>')) &&
9234 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9235 const xmlChar *q = CUR_PTR;
9236 unsigned int cons = ctxt->input->consumed;
9237 int len = -1, alloc = 0;
9239 attname = xmlParseAttribute2(ctxt, prefix, localname,
9240 &aprefix, &attvalue, &len, &alloc);
9241 if ((attname == NULL) || (attvalue == NULL))
9243 if (len < 0) len = xmlStrlen(attvalue);
9245 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9246 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9250 xmlErrMemory(ctxt, "dictionary allocation failure");
9251 if ((attvalue != NULL) && (alloc != 0))
9256 uri = xmlParseURI((const char *) URL);
9258 xmlNsErr(ctxt, XML_WAR_NS_URI,
9259 "xmlns: '%s' is not a valid URI\n",
9262 if (uri->scheme == NULL) {
9263 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9264 "xmlns: URI %s is not absolute\n",
9269 if (URL == ctxt->str_xml_ns) {
9270 if (attname != ctxt->str_xml) {
9271 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9272 "xml namespace URI cannot be the default namespace\n",
9279 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9280 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9281 "reuse of the xmlns namespace name is forbidden\n",
9287 * check that it's not a defined namespace
9289 for (j = 1;j <= nbNs;j++)
9290 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9293 xmlErrAttributeDup(ctxt, NULL, attname);
9295 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9297 } else if (aprefix == ctxt->str_xmlns) {
9298 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9301 if (attname == ctxt->str_xml) {
9302 if (URL != ctxt->str_xml_ns) {
9303 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9304 "xml namespace prefix mapped to wrong URI\n",
9308 * Do not keep a namespace definition node
9312 if (URL == ctxt->str_xml_ns) {
9313 if (attname != ctxt->str_xml) {
9314 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9315 "xml namespace URI mapped to wrong prefix\n",
9320 if (attname == ctxt->str_xmlns) {
9321 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9322 "redefinition of the xmlns prefix is forbidden\n",
9328 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9329 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9330 "reuse of the xmlns namespace name is forbidden\n",
9334 if ((URL == NULL) || (URL[0] == 0)) {
9335 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9336 "xmlns:%s: Empty XML namespace is not allowed\n",
9337 attname, NULL, NULL);
9340 uri = xmlParseURI((const char *) URL);
9342 xmlNsErr(ctxt, XML_WAR_NS_URI,
9343 "xmlns:%s: '%s' is not a valid URI\n",
9344 attname, URL, NULL);
9346 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9347 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9348 "xmlns:%s: URI %s is not absolute\n",
9349 attname, URL, NULL);
9356 * check that it's not a defined namespace
9358 for (j = 1;j <= nbNs;j++)
9359 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9362 xmlErrAttributeDup(ctxt, aprefix, attname);
9364 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9368 * Add the pair to atts
9370 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9371 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9374 maxatts = ctxt->maxatts;
9377 ctxt->attallocs[nratts++] = alloc;
9378 atts[nbatts++] = attname;
9379 atts[nbatts++] = aprefix;
9381 * The namespace URI field is used temporarily to point at the
9382 * base of the current input buffer for non-alloced attributes.
9383 * When the input buffer is reallocated, all the pointers become
9384 * invalid, but they can be reconstructed later.
9387 atts[nbatts++] = NULL;
9389 atts[nbatts++] = ctxt->input->base;
9390 atts[nbatts++] = attvalue;
9392 atts[nbatts++] = attvalue;
9394 * tag if some deallocation is needed
9396 if (alloc != 0) attval = 1;
9397 attvalue = NULL; /* moved into atts */
9401 if ((attvalue != NULL) && (alloc != 0)) {
9407 if (ctxt->instate == XML_PARSER_EOF)
9409 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9411 if (SKIP_BLANKS == 0) {
9412 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9413 "attributes construct error\n");
9416 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9417 (attname == NULL) && (attvalue == NULL)) {
9418 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9419 "xmlParseStartTag: problem parsing attributes\n");
9425 if (ctxt->input->id != inputid) {
9426 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9427 "Unexpected change of input\n");
9432 /* Reconstruct attribute value pointers. */
9433 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9434 if (atts[i+2] != NULL) {
9436 * Arithmetic on dangling pointers is technically undefined
9437 * behavior, but well...
9439 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9440 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9441 atts[i+3] += offset; /* value */
9442 atts[i+4] += offset; /* valuend */
9447 * The attributes defaulting
9449 if (ctxt->attsDefault != NULL) {
9450 xmlDefAttrsPtr defaults;
9452 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9453 if (defaults != NULL) {
9454 for (i = 0;i < defaults->nbAttrs;i++) {
9455 attname = defaults->values[5 * i];
9456 aprefix = defaults->values[5 * i + 1];
9459 * special work for namespaces defaulted defs
9461 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9463 * check that it's not a defined namespace
9465 for (j = 1;j <= nbNs;j++)
9466 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9468 if (j <= nbNs) continue;
9470 nsname = xmlGetNamespace(ctxt, NULL);
9471 if (nsname != defaults->values[5 * i + 2]) {
9472 if (nsPush(ctxt, NULL,
9473 defaults->values[5 * i + 2]) > 0)
9476 } else if (aprefix == ctxt->str_xmlns) {
9478 * check that it's not a defined namespace
9480 for (j = 1;j <= nbNs;j++)
9481 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9483 if (j <= nbNs) continue;
9485 nsname = xmlGetNamespace(ctxt, attname);
9486 if (nsname != defaults->values[2]) {
9487 if (nsPush(ctxt, attname,
9488 defaults->values[5 * i + 2]) > 0)
9493 * check that it's not a defined attribute
9495 for (j = 0;j < nbatts;j+=5) {
9496 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9499 if (j < nbatts) continue;
9501 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9502 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9505 maxatts = ctxt->maxatts;
9508 atts[nbatts++] = attname;
9509 atts[nbatts++] = aprefix;
9510 if (aprefix == NULL)
9511 atts[nbatts++] = NULL;
9513 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9514 atts[nbatts++] = defaults->values[5 * i + 2];
9515 atts[nbatts++] = defaults->values[5 * i + 3];
9516 if ((ctxt->standalone == 1) &&
9517 (defaults->values[5 * i + 4] != NULL)) {
9518 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9519 "standalone: attribute %s on %s defaulted from external subset\n",
9520 attname, localname);
9529 * The attributes checkings
9531 for (i = 0; i < nbatts;i += 5) {
9533 * The default namespace does not apply to attribute names.
9535 if (atts[i + 1] != NULL) {
9536 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9537 if (nsname == NULL) {
9538 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9539 "Namespace prefix %s for %s on %s is not defined\n",
9540 atts[i + 1], atts[i], localname);
9542 atts[i + 2] = nsname;
9546 * [ WFC: Unique Att Spec ]
9547 * No attribute name may appear more than once in the same
9548 * start-tag or empty-element tag.
9549 * As extended by the Namespace in XML REC.
9551 for (j = 0; j < i;j += 5) {
9552 if (atts[i] == atts[j]) {
9553 if (atts[i+1] == atts[j+1]) {
9554 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9557 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9558 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9559 "Namespaced Attribute %s in '%s' redefined\n",
9560 atts[i], nsname, NULL);
9567 nsname = xmlGetNamespace(ctxt, prefix);
9568 if ((prefix != NULL) && (nsname == NULL)) {
9569 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9570 "Namespace prefix %s on %s is not defined\n",
9571 prefix, localname, NULL);
9577 * SAX: Start of Element !
9579 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9580 (!ctxt->disableSAX)) {
9582 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9583 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9584 nbatts / 5, nbdef, atts);
9586 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9587 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9592 * Free up attribute allocated strings if needed
9595 for (i = 3,j = 0; j < nratts;i += 5,j++)
9596 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9597 xmlFree((xmlChar *) atts[i]);
9605 * @ctxt: an XML parser context
9606 * @line: line of the start tag
9607 * @nsNr: number of namespaces on the start tag
9609 * parse an end of tag
9611 * [42] ETag ::= '</' Name S? '>'
9615 * [NS 9] ETag ::= '</' QName S? '>'
9619 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9620 const xmlChar *URI, int line, int nsNr, int tlen) {
9621 const xmlChar *name;
9625 if ((RAW != '<') || (NXT(1) != '/')) {
9626 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9631 curLength = ctxt->input->end - ctxt->input->cur;
9632 if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9633 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9634 if ((curLength >= (size_t)(tlen + 1)) &&
9635 (ctxt->input->cur[tlen] == '>')) {
9636 ctxt->input->cur += tlen + 1;
9637 ctxt->input->col += tlen + 1;
9640 ctxt->input->cur += tlen;
9641 ctxt->input->col += tlen;
9645 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9647 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9651 * We should definitely be at the ending "S? '>'" part
9654 if (ctxt->instate == XML_PARSER_EOF)
9657 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9658 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9663 * [ WFC: Element Type Match ]
9664 * The Name in an element's end-tag must match the element type in the
9668 if (name != (xmlChar*)1) {
9669 if (name == NULL) name = BAD_CAST "unparseable";
9670 if ((line == 0) && (ctxt->node != NULL))
9671 line = ctxt->node->line;
9672 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9673 "Opening and ending tag mismatch: %s line %d and %s\n",
9674 ctxt->name, line, name);
9681 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9682 (!ctxt->disableSAX))
9683 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9693 * @ctxt: an XML parser context
9695 * Parse escaped pure raw content.
9697 * [18] CDSect ::= CDStart CData CDEnd
9699 * [19] CDStart ::= '<![CDATA['
9701 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9703 * [21] CDEnd ::= ']]>'
9706 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9707 xmlChar *buf = NULL;
9709 int size = XML_PARSER_BUFFER_SIZE;
9715 /* Check 2.6.0 was NXT(0) not RAW */
9716 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9721 ctxt->instate = XML_PARSER_CDATA_SECTION;
9724 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9725 ctxt->instate = XML_PARSER_CONTENT;
9731 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9732 ctxt->instate = XML_PARSER_CONTENT;
9737 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9739 xmlErrMemory(ctxt, NULL);
9742 while (IS_CHAR(cur) &&
9743 ((r != ']') || (s != ']') || (cur != '>'))) {
9744 if (len + 5 >= size) {
9747 if ((size > XML_MAX_TEXT_LENGTH) &&
9748 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9749 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9750 "CData section too big found", NULL);
9754 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9757 xmlErrMemory(ctxt, NULL);
9763 COPY_BUF(rl,buf,len,r);
9771 if (ctxt->instate == XML_PARSER_EOF) {
9781 ctxt->instate = XML_PARSER_CONTENT;
9783 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9784 "CData section not finished\n%.50s\n", buf);
9791 * OK the buffer is to be consumed as cdata.
9793 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9794 if (ctxt->sax->cdataBlock != NULL)
9795 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9796 else if (ctxt->sax->characters != NULL)
9797 ctxt->sax->characters(ctxt->userData, buf, len);
9804 * @ctxt: an XML parser context
9808 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9812 xmlParseContent(xmlParserCtxtPtr ctxt) {
9814 while ((RAW != 0) &&
9815 ((RAW != '<') || (NXT(1) != '/')) &&
9816 (ctxt->instate != XML_PARSER_EOF)) {
9817 const xmlChar *test = CUR_PTR;
9818 unsigned int cons = ctxt->input->consumed;
9819 const xmlChar *cur = ctxt->input->cur;
9822 * First case : a Processing Instruction.
9824 if ((*cur == '<') && (cur[1] == '?')) {
9829 * Second case : a CDSection
9831 /* 2.6.0 test was *cur not RAW */
9832 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9833 xmlParseCDSect(ctxt);
9837 * Third case : a comment
9839 else if ((*cur == '<') && (NXT(1) == '!') &&
9840 (NXT(2) == '-') && (NXT(3) == '-')) {
9841 xmlParseComment(ctxt);
9842 ctxt->instate = XML_PARSER_CONTENT;
9846 * Fourth case : a sub-element.
9848 else if (*cur == '<') {
9849 xmlParseElement(ctxt);
9853 * Fifth case : a reference. If if has not been resolved,
9854 * parsing returns it's Name, create the node
9857 else if (*cur == '&') {
9858 xmlParseReference(ctxt);
9862 * Last case, text. Note that References are handled directly.
9865 xmlParseCharData(ctxt, 0);
9871 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9872 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9873 "detected an error in element content\n");
9874 xmlHaltParser(ctxt);
9882 * @ctxt: an XML parser context
9884 * parse an XML element, this is highly recursive
9886 * [39] element ::= EmptyElemTag | STag content ETag
9888 * [ WFC: Element Type Match ]
9889 * The Name in an element's end-tag must match the element type in the
9895 xmlParseElement(xmlParserCtxtPtr ctxt) {
9896 const xmlChar *name;
9897 const xmlChar *prefix = NULL;
9898 const xmlChar *URI = NULL;
9899 xmlParserNodeInfo node_info;
9902 int nsNr = ctxt->nsNr;
9904 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9905 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9906 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9907 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9909 xmlHaltParser(ctxt);
9913 /* Capture start position */
9914 if (ctxt->record_info) {
9915 node_info.begin_pos = ctxt->input->consumed +
9916 (CUR_PTR - ctxt->input->base);
9917 node_info.begin_line = ctxt->input->line;
9920 if (ctxt->spaceNr == 0)
9921 spacePush(ctxt, -1);
9922 else if (*ctxt->space == -2)
9923 spacePush(ctxt, -1);
9925 spacePush(ctxt, *ctxt->space);
9927 line = ctxt->input->line;
9928 #ifdef LIBXML_SAX1_ENABLED
9930 #endif /* LIBXML_SAX1_ENABLED */
9931 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9932 #ifdef LIBXML_SAX1_ENABLED
9934 name = xmlParseStartTag(ctxt);
9935 #endif /* LIBXML_SAX1_ENABLED */
9936 if (ctxt->instate == XML_PARSER_EOF)
9942 namePush(ctxt, name);
9945 #ifdef LIBXML_VALID_ENABLED
9947 * [ VC: Root Element Type ]
9948 * The Name in the document type declaration must match the element
9949 * type of the root element.
9951 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9952 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9953 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9954 #endif /* LIBXML_VALID_ENABLED */
9957 * Check for an Empty Element.
9959 if ((RAW == '/') && (NXT(1) == '>')) {
9962 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9963 (!ctxt->disableSAX))
9964 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9965 #ifdef LIBXML_SAX1_ENABLED
9967 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9968 (!ctxt->disableSAX))
9969 ctxt->sax->endElement(ctxt->userData, name);
9970 #endif /* LIBXML_SAX1_ENABLED */
9974 if (nsNr != ctxt->nsNr)
9975 nsPop(ctxt, ctxt->nsNr - nsNr);
9976 if ( ret != NULL && ctxt->record_info ) {
9977 node_info.end_pos = ctxt->input->consumed +
9978 (CUR_PTR - ctxt->input->base);
9979 node_info.end_line = ctxt->input->line;
9980 node_info.node = ret;
9981 xmlParserAddNodeInfo(ctxt, &node_info);
9988 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9989 "Couldn't find end of Start Tag %s line %d\n",
9993 * end of parsing of this node.
9998 if (nsNr != ctxt->nsNr)
9999 nsPop(ctxt, ctxt->nsNr - nsNr);
10002 * Capture end position and add node
10004 if ( ret != NULL && ctxt->record_info ) {
10005 node_info.end_pos = ctxt->input->consumed +
10006 (CUR_PTR - ctxt->input->base);
10007 node_info.end_line = ctxt->input->line;
10008 node_info.node = ret;
10009 xmlParserAddNodeInfo(ctxt, &node_info);
10015 * Parse the content of the element:
10017 xmlParseContent(ctxt);
10018 if (ctxt->instate == XML_PARSER_EOF)
10020 if (!IS_BYTE_CHAR(RAW)) {
10021 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10022 "Premature end of data in tag %s line %d\n",
10026 * end of parsing of this node.
10031 if (nsNr != ctxt->nsNr)
10032 nsPop(ctxt, ctxt->nsNr - nsNr);
10037 * parse the end of tag: '</' should be here.
10040 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10043 #ifdef LIBXML_SAX1_ENABLED
10045 xmlParseEndTag1(ctxt, line);
10046 #endif /* LIBXML_SAX1_ENABLED */
10049 * Capture end position and add node
10051 if ( ret != NULL && ctxt->record_info ) {
10052 node_info.end_pos = ctxt->input->consumed +
10053 (CUR_PTR - ctxt->input->base);
10054 node_info.end_line = ctxt->input->line;
10055 node_info.node = ret;
10056 xmlParserAddNodeInfo(ctxt, &node_info);
10061 * xmlParseVersionNum:
10062 * @ctxt: an XML parser context
10064 * parse the XML version value.
10066 * [26] VersionNum ::= '1.' [0-9]+
10068 * In practice allow [0-9].[0-9]+ at that level
10070 * Returns the string giving the XML version number, or NULL
10073 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10074 xmlChar *buf = NULL;
10079 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10081 xmlErrMemory(ctxt, NULL);
10085 if (!((cur >= '0') && (cur <= '9'))) {
10099 while ((cur >= '0') && (cur <= '9')) {
10100 if (len + 1 >= size) {
10104 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10107 xmlErrMemory(ctxt, NULL);
10121 * xmlParseVersionInfo:
10122 * @ctxt: an XML parser context
10124 * parse the XML version.
10126 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10128 * [25] Eq ::= S? '=' S?
10130 * Returns the version string, e.g. "1.0"
10134 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10135 xmlChar *version = NULL;
10137 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10141 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10148 version = xmlParseVersionNum(ctxt);
10150 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10153 } else if (RAW == '\''){
10155 version = xmlParseVersionNum(ctxt);
10157 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10161 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10169 * @ctxt: an XML parser context
10171 * parse the XML encoding name
10173 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10175 * Returns the encoding name value or NULL
10178 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10179 xmlChar *buf = NULL;
10185 if (((cur >= 'a') && (cur <= 'z')) ||
10186 ((cur >= 'A') && (cur <= 'Z'))) {
10187 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10189 xmlErrMemory(ctxt, NULL);
10196 while (((cur >= 'a') && (cur <= 'z')) ||
10197 ((cur >= 'A') && (cur <= 'Z')) ||
10198 ((cur >= '0') && (cur <= '9')) ||
10199 (cur == '.') || (cur == '_') ||
10201 if (len + 1 >= size) {
10205 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10207 xmlErrMemory(ctxt, NULL);
10224 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10230 * xmlParseEncodingDecl:
10231 * @ctxt: an XML parser context
10233 * parse the XML encoding declaration
10235 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10237 * this setups the conversion filters.
10239 * Returns the encoding value or NULL
10243 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10244 xmlChar *encoding = NULL;
10247 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10251 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10258 encoding = xmlParseEncName(ctxt);
10260 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10261 xmlFree((xmlChar *) encoding);
10265 } else if (RAW == '\''){
10267 encoding = xmlParseEncName(ctxt);
10269 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10270 xmlFree((xmlChar *) encoding);
10275 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10279 * Non standard parsing, allowing the user to ignore encoding
10281 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10282 xmlFree((xmlChar *) encoding);
10287 * UTF-16 encoding stwich has already taken place at this stage,
10288 * more over the little-endian/big-endian selection is already done
10290 if ((encoding != NULL) &&
10291 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10292 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10294 * If no encoding was passed to the parser, that we are
10295 * using UTF-16 and no decoder is present i.e. the
10296 * document is apparently UTF-8 compatible, then raise an
10297 * encoding mismatch fatal error
10299 if ((ctxt->encoding == NULL) &&
10300 (ctxt->input->buf != NULL) &&
10301 (ctxt->input->buf->encoder == NULL)) {
10302 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10303 "Document labelled UTF-16 but has UTF-8 content\n");
10305 if (ctxt->encoding != NULL)
10306 xmlFree((xmlChar *) ctxt->encoding);
10307 ctxt->encoding = encoding;
10310 * UTF-8 encoding is handled natively
10312 else if ((encoding != NULL) &&
10313 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10314 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10315 if (ctxt->encoding != NULL)
10316 xmlFree((xmlChar *) ctxt->encoding);
10317 ctxt->encoding = encoding;
10319 else if (encoding != NULL) {
10320 xmlCharEncodingHandlerPtr handler;
10322 if (ctxt->input->encoding != NULL)
10323 xmlFree((xmlChar *) ctxt->input->encoding);
10324 ctxt->input->encoding = encoding;
10326 handler = xmlFindCharEncodingHandler((const char *) encoding);
10327 if (handler != NULL) {
10328 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10329 /* failed to convert */
10330 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10334 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10335 "Unsupported encoding %s\n", encoding);
10345 * @ctxt: an XML parser context
10347 * parse the XML standalone declaration
10349 * [32] SDDecl ::= S 'standalone' Eq
10350 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10352 * [ VC: Standalone Document Declaration ]
10353 * TODO The standalone document declaration must have the value "no"
10354 * if any external markup declarations contain declarations of:
10355 * - attributes with default values, if elements to which these
10356 * attributes apply appear in the document without specifications
10357 * of values for these attributes, or
10358 * - entities (other than amp, lt, gt, apos, quot), if references
10359 * to those entities appear in the document, or
10360 * - attributes with values subject to normalization, where the
10361 * attribute appears in the document with a value which will change
10362 * as a result of normalization, or
10363 * - element types with element content, if white space occurs directly
10364 * within any instance of those types.
10367 * 1 if standalone="yes"
10368 * 0 if standalone="no"
10369 * -2 if standalone attribute is missing or invalid
10370 * (A standalone value of -2 means that the XML declaration was found,
10371 * but no value was specified for the standalone attribute).
10375 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10376 int standalone = -2;
10379 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10383 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10384 return(standalone);
10390 if ((RAW == 'n') && (NXT(1) == 'o')) {
10393 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10398 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10401 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10404 } else if (RAW == '"'){
10406 if ((RAW == 'n') && (NXT(1) == 'o')) {
10409 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10414 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10417 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10421 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10424 return(standalone);
10429 * @ctxt: an XML parser context
10431 * parse an XML declaration header
10433 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10437 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10441 * This value for standalone indicates that the document has an
10442 * XML declaration but it does not have a standalone attribute.
10443 * It will be overwritten later if a standalone attribute is found.
10445 ctxt->input->standalone = -2;
10448 * We know that '<?xml' is here.
10452 if (!IS_BLANK_CH(RAW)) {
10453 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10454 "Blank needed after '<?xml'\n");
10459 * We must have the VersionInfo here.
10461 version = xmlParseVersionInfo(ctxt);
10462 if (version == NULL) {
10463 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10465 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10467 * Changed here for XML-1.0 5th edition
10469 if (ctxt->options & XML_PARSE_OLD10) {
10470 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10471 "Unsupported version '%s'\n",
10474 if ((version[0] == '1') && ((version[1] == '.'))) {
10475 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10476 "Unsupported version '%s'\n",
10479 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10480 "Unsupported version '%s'\n",
10485 if (ctxt->version != NULL)
10486 xmlFree((void *) ctxt->version);
10487 ctxt->version = version;
10491 * We may have the encoding declaration
10493 if (!IS_BLANK_CH(RAW)) {
10494 if ((RAW == '?') && (NXT(1) == '>')) {
10498 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10500 xmlParseEncodingDecl(ctxt);
10501 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10502 (ctxt->instate == XML_PARSER_EOF)) {
10504 * The XML REC instructs us to stop parsing right here
10510 * We may have the standalone status.
10512 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10513 if ((RAW == '?') && (NXT(1) == '>')) {
10517 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10521 * We can grow the input buffer freely at that point
10526 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10529 if ((RAW == '?') && (NXT(1) == '>')) {
10531 } else if (RAW == '>') {
10532 /* Deprecated old WD ... */
10533 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10536 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10537 MOVETO_ENDTAG(CUR_PTR);
10544 * @ctxt: an XML parser context
10546 * parse an XML Misc* optional field.
10548 * [27] Misc ::= Comment | PI | S
10552 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10553 while ((ctxt->instate != XML_PARSER_EOF) &&
10554 (((RAW == '<') && (NXT(1) == '?')) ||
10555 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10556 IS_BLANK_CH(CUR))) {
10557 if ((RAW == '<') && (NXT(1) == '?')) {
10559 } else if (IS_BLANK_CH(CUR)) {
10562 xmlParseComment(ctxt);
10567 * xmlParseDocument:
10568 * @ctxt: an XML parser context
10570 * parse an XML document (and build a tree if using the standard SAX
10573 * [1] document ::= prolog element Misc*
10575 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10577 * Returns 0, -1 in case of error. the parser context is augmented
10578 * as a result of the parsing.
10582 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10584 xmlCharEncoding enc;
10588 if ((ctxt == NULL) || (ctxt->input == NULL))
10594 * SAX: detecting the level.
10596 xmlDetectSAX2(ctxt);
10599 * SAX: beginning of the document processing.
10601 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10602 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10603 if (ctxt->instate == XML_PARSER_EOF)
10606 if ((ctxt->encoding == NULL) &&
10607 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10609 * Get the 4 first bytes and decode the charset
10610 * if enc != XML_CHAR_ENCODING_NONE
10611 * plug some encoding conversion routines.
10617 enc = xmlDetectCharEncoding(&start[0], 4);
10618 if (enc != XML_CHAR_ENCODING_NONE) {
10619 xmlSwitchEncoding(ctxt, enc);
10625 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10630 * Check for the XMLDecl in the Prolog.
10631 * do not GROW here to avoid the detected encoder to decode more
10632 * than just the first line, unless the amount of data is really
10633 * too small to hold "<?xml version="1.0" encoding="foo"
10635 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10638 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10641 * Note that we will switch encoding on the fly.
10643 xmlParseXMLDecl(ctxt);
10644 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10645 (ctxt->instate == XML_PARSER_EOF)) {
10647 * The XML REC instructs us to stop parsing right here
10651 ctxt->standalone = ctxt->input->standalone;
10654 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10656 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10657 ctxt->sax->startDocument(ctxt->userData);
10658 if (ctxt->instate == XML_PARSER_EOF)
10660 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10661 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10662 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10666 * The Misc part of the Prolog
10669 xmlParseMisc(ctxt);
10672 * Then possibly doc type declaration(s) and more Misc
10673 * (doctypedecl Misc*)?
10676 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10678 ctxt->inSubset = 1;
10679 xmlParseDocTypeDecl(ctxt);
10681 ctxt->instate = XML_PARSER_DTD;
10682 xmlParseInternalSubset(ctxt);
10683 if (ctxt->instate == XML_PARSER_EOF)
10688 * Create and update the external subset.
10690 ctxt->inSubset = 2;
10691 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10692 (!ctxt->disableSAX))
10693 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10694 ctxt->extSubSystem, ctxt->extSubURI);
10695 if (ctxt->instate == XML_PARSER_EOF)
10697 ctxt->inSubset = 0;
10699 xmlCleanSpecialAttr(ctxt);
10701 ctxt->instate = XML_PARSER_PROLOG;
10702 xmlParseMisc(ctxt);
10706 * Time to start parsing the tree itself
10710 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10711 "Start tag expected, '<' not found\n");
10713 ctxt->instate = XML_PARSER_CONTENT;
10714 xmlParseElement(ctxt);
10715 ctxt->instate = XML_PARSER_EPILOG;
10719 * The Misc part at the end
10721 xmlParseMisc(ctxt);
10724 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10726 ctxt->instate = XML_PARSER_EOF;
10730 * SAX: end of the document processing.
10732 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10733 ctxt->sax->endDocument(ctxt->userData);
10736 * Remove locally kept entity definitions if the tree was not built
10738 if ((ctxt->myDoc != NULL) &&
10739 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10740 xmlFreeDoc(ctxt->myDoc);
10741 ctxt->myDoc = NULL;
10744 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10745 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10747 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10748 if (ctxt->nsWellFormed)
10749 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10750 if (ctxt->options & XML_PARSE_OLD10)
10751 ctxt->myDoc->properties |= XML_DOC_OLD10;
10753 if (! ctxt->wellFormed) {
10761 * xmlParseExtParsedEnt:
10762 * @ctxt: an XML parser context
10764 * parse a general parsed entity
10765 * An external general parsed entity is well-formed if it matches the
10766 * production labeled extParsedEnt.
10768 * [78] extParsedEnt ::= TextDecl? content
10770 * Returns 0, -1 in case of error. the parser context is augmented
10771 * as a result of the parsing.
10775 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10777 xmlCharEncoding enc;
10779 if ((ctxt == NULL) || (ctxt->input == NULL))
10782 xmlDefaultSAXHandlerInit();
10784 xmlDetectSAX2(ctxt);
10789 * SAX: beginning of the document processing.
10791 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10792 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10795 * Get the 4 first bytes and decode the charset
10796 * if enc != XML_CHAR_ENCODING_NONE
10797 * plug some encoding conversion routines.
10799 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10804 enc = xmlDetectCharEncoding(start, 4);
10805 if (enc != XML_CHAR_ENCODING_NONE) {
10806 xmlSwitchEncoding(ctxt, enc);
10812 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10816 * Check for the XMLDecl in the Prolog.
10819 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10822 * Note that we will switch encoding on the fly.
10824 xmlParseXMLDecl(ctxt);
10825 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10827 * The XML REC instructs us to stop parsing right here
10833 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10835 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10836 ctxt->sax->startDocument(ctxt->userData);
10837 if (ctxt->instate == XML_PARSER_EOF)
10841 * Doing validity checking on chunk doesn't make sense
10843 ctxt->instate = XML_PARSER_CONTENT;
10844 ctxt->validate = 0;
10845 ctxt->loadsubset = 0;
10848 xmlParseContent(ctxt);
10849 if (ctxt->instate == XML_PARSER_EOF)
10852 if ((RAW == '<') && (NXT(1) == '/')) {
10853 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10854 } else if (RAW != 0) {
10855 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10859 * SAX: end of the document processing.
10861 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10862 ctxt->sax->endDocument(ctxt->userData);
10864 if (! ctxt->wellFormed) return(-1);
10868 #ifdef LIBXML_PUSH_ENABLED
10869 /************************************************************************
10871 * Progressive parsing interfaces *
10873 ************************************************************************/
10876 * xmlParseLookupSequence:
10877 * @ctxt: an XML parser context
10878 * @first: the first char to lookup
10879 * @next: the next char to lookup or zero
10880 * @third: the next char to lookup or zero
10882 * Try to find if a sequence (first, next, third) or just (first next) or
10883 * (first) is available in the input stream.
10884 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10885 * to avoid rescanning sequences of bytes, it DOES change the state of the
10886 * parser, do not use liberally.
10888 * Returns the index to the current parsing point if the full sequence
10889 * is available, -1 otherwise.
10892 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10893 xmlChar next, xmlChar third) {
10895 xmlParserInputPtr in;
10896 const xmlChar *buf;
10899 if (in == NULL) return(-1);
10900 base = in->cur - in->base;
10901 if (base < 0) return(-1);
10902 if (ctxt->checkIndex > base)
10903 base = ctxt->checkIndex;
10904 if (in->buf == NULL) {
10908 buf = xmlBufContent(in->buf->buffer);
10909 len = xmlBufUse(in->buf->buffer);
10911 /* take into account the sequence length */
10912 if (third) len -= 2;
10913 else if (next) len --;
10914 for (;base < len;base++) {
10915 if (buf[base] == first) {
10917 if ((buf[base + 1] != next) ||
10918 (buf[base + 2] != third)) continue;
10919 } else if (next != 0) {
10920 if (buf[base + 1] != next) continue;
10922 ctxt->checkIndex = 0;
10925 xmlGenericError(xmlGenericErrorContext,
10926 "PP: lookup '%c' found at %d\n",
10928 else if (third == 0)
10929 xmlGenericError(xmlGenericErrorContext,
10930 "PP: lookup '%c%c' found at %d\n",
10931 first, next, base);
10933 xmlGenericError(xmlGenericErrorContext,
10934 "PP: lookup '%c%c%c' found at %d\n",
10935 first, next, third, base);
10937 return(base - (in->cur - in->base));
10940 ctxt->checkIndex = base;
10943 xmlGenericError(xmlGenericErrorContext,
10944 "PP: lookup '%c' failed\n", first);
10945 else if (third == 0)
10946 xmlGenericError(xmlGenericErrorContext,
10947 "PP: lookup '%c%c' failed\n", first, next);
10949 xmlGenericError(xmlGenericErrorContext,
10950 "PP: lookup '%c%c%c' failed\n", first, next, third);
10956 * xmlParseGetLasts:
10957 * @ctxt: an XML parser context
10958 * @lastlt: pointer to store the last '<' from the input
10959 * @lastgt: pointer to store the last '>' from the input
10961 * Lookup the last < and > in the current chunk
10964 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10965 const xmlChar **lastgt) {
10966 const xmlChar *tmp;
10968 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10969 xmlGenericError(xmlGenericErrorContext,
10970 "Internal error: xmlParseGetLasts\n");
10973 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10974 tmp = ctxt->input->end;
10976 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10977 if (tmp < ctxt->input->base) {
10983 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10984 if (*tmp == '\'') {
10986 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10987 if (tmp < ctxt->input->end) tmp++;
10988 } else if (*tmp == '"') {
10990 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10991 if (tmp < ctxt->input->end) tmp++;
10995 if (tmp < ctxt->input->end)
11000 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11001 if (tmp >= ctxt->input->base)
11013 * xmlCheckCdataPush:
11014 * @cur: pointer to the block of characters
11015 * @len: length of the block in bytes
11016 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11018 * Check that the block of characters is okay as SCdata content [20]
11020 * Returns the number of bytes to pass if okay, a negative index where an
11021 * UTF-8 error occurred otherwise
11024 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11029 if ((utf == NULL) || (len <= 0))
11032 for (ix = 0; ix < len;) { /* string is 0-terminated */
11034 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11037 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11041 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11042 if (ix + 2 > len) return(complete ? -ix : ix);
11043 if ((utf[ix+1] & 0xc0 ) != 0x80)
11045 codepoint = (utf[ix] & 0x1f) << 6;
11046 codepoint |= utf[ix+1] & 0x3f;
11047 if (!xmlIsCharQ(codepoint))
11050 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11051 if (ix + 3 > len) return(complete ? -ix : ix);
11052 if (((utf[ix+1] & 0xc0) != 0x80) ||
11053 ((utf[ix+2] & 0xc0) != 0x80))
11055 codepoint = (utf[ix] & 0xf) << 12;
11056 codepoint |= (utf[ix+1] & 0x3f) << 6;
11057 codepoint |= utf[ix+2] & 0x3f;
11058 if (!xmlIsCharQ(codepoint))
11061 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11062 if (ix + 4 > len) return(complete ? -ix : ix);
11063 if (((utf[ix+1] & 0xc0) != 0x80) ||
11064 ((utf[ix+2] & 0xc0) != 0x80) ||
11065 ((utf[ix+3] & 0xc0) != 0x80))
11067 codepoint = (utf[ix] & 0x7) << 18;
11068 codepoint |= (utf[ix+1] & 0x3f) << 12;
11069 codepoint |= (utf[ix+2] & 0x3f) << 6;
11070 codepoint |= utf[ix+3] & 0x3f;
11071 if (!xmlIsCharQ(codepoint))
11074 } else /* unknown encoding */
11081 * xmlParseTryOrFinish:
11082 * @ctxt: an XML parser context
11083 * @terminate: last chunk indicator
11085 * Try to progress on parsing
11087 * Returns zero if no parsing was possible
11090 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11094 const xmlChar *lastlt, *lastgt;
11096 if (ctxt->input == NULL)
11100 switch (ctxt->instate) {
11101 case XML_PARSER_EOF:
11102 xmlGenericError(xmlGenericErrorContext,
11103 "PP: try EOF\n"); break;
11104 case XML_PARSER_START:
11105 xmlGenericError(xmlGenericErrorContext,
11106 "PP: try START\n"); break;
11107 case XML_PARSER_MISC:
11108 xmlGenericError(xmlGenericErrorContext,
11109 "PP: try MISC\n");break;
11110 case XML_PARSER_COMMENT:
11111 xmlGenericError(xmlGenericErrorContext,
11112 "PP: try COMMENT\n");break;
11113 case XML_PARSER_PROLOG:
11114 xmlGenericError(xmlGenericErrorContext,
11115 "PP: try PROLOG\n");break;
11116 case XML_PARSER_START_TAG:
11117 xmlGenericError(xmlGenericErrorContext,
11118 "PP: try START_TAG\n");break;
11119 case XML_PARSER_CONTENT:
11120 xmlGenericError(xmlGenericErrorContext,
11121 "PP: try CONTENT\n");break;
11122 case XML_PARSER_CDATA_SECTION:
11123 xmlGenericError(xmlGenericErrorContext,
11124 "PP: try CDATA_SECTION\n");break;
11125 case XML_PARSER_END_TAG:
11126 xmlGenericError(xmlGenericErrorContext,
11127 "PP: try END_TAG\n");break;
11128 case XML_PARSER_ENTITY_DECL:
11129 xmlGenericError(xmlGenericErrorContext,
11130 "PP: try ENTITY_DECL\n");break;
11131 case XML_PARSER_ENTITY_VALUE:
11132 xmlGenericError(xmlGenericErrorContext,
11133 "PP: try ENTITY_VALUE\n");break;
11134 case XML_PARSER_ATTRIBUTE_VALUE:
11135 xmlGenericError(xmlGenericErrorContext,
11136 "PP: try ATTRIBUTE_VALUE\n");break;
11137 case XML_PARSER_DTD:
11138 xmlGenericError(xmlGenericErrorContext,
11139 "PP: try DTD\n");break;
11140 case XML_PARSER_EPILOG:
11141 xmlGenericError(xmlGenericErrorContext,
11142 "PP: try EPILOG\n");break;
11143 case XML_PARSER_PI:
11144 xmlGenericError(xmlGenericErrorContext,
11145 "PP: try PI\n");break;
11146 case XML_PARSER_IGNORE:
11147 xmlGenericError(xmlGenericErrorContext,
11148 "PP: try IGNORE\n");break;
11152 if ((ctxt->input != NULL) &&
11153 (ctxt->input->cur - ctxt->input->base > 4096)) {
11155 ctxt->checkIndex = 0;
11157 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11159 while (ctxt->instate != XML_PARSER_EOF) {
11160 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11163 if (ctxt->input == NULL) break;
11164 if (ctxt->input->buf == NULL)
11165 avail = ctxt->input->length -
11166 (ctxt->input->cur - ctxt->input->base);
11169 * If we are operating on converted input, try to flush
11170 * remainng chars to avoid them stalling in the non-converted
11171 * buffer. But do not do this in document start where
11172 * encoding="..." may not have been read and we work on a
11173 * guessed encoding.
11175 if ((ctxt->instate != XML_PARSER_START) &&
11176 (ctxt->input->buf->raw != NULL) &&
11177 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11178 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11180 size_t current = ctxt->input->cur - ctxt->input->base;
11182 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11183 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11186 avail = xmlBufUse(ctxt->input->buf->buffer) -
11187 (ctxt->input->cur - ctxt->input->base);
11191 switch (ctxt->instate) {
11192 case XML_PARSER_EOF:
11194 * Document parsing is done !
11197 case XML_PARSER_START:
11198 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11200 xmlCharEncoding enc;
11203 * Very first chars read from the document flow.
11209 * Get the 4 first bytes and decode the charset
11210 * if enc != XML_CHAR_ENCODING_NONE
11211 * plug some encoding conversion routines,
11212 * else xmlSwitchEncoding will set to (default)
11219 enc = xmlDetectCharEncoding(start, 4);
11220 xmlSwitchEncoding(ctxt, enc);
11226 cur = ctxt->input->cur[0];
11227 next = ctxt->input->cur[1];
11229 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11230 ctxt->sax->setDocumentLocator(ctxt->userData,
11231 &xmlDefaultSAXLocator);
11232 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11233 xmlHaltParser(ctxt);
11235 xmlGenericError(xmlGenericErrorContext,
11236 "PP: entering EOF\n");
11238 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11239 ctxt->sax->endDocument(ctxt->userData);
11242 if ((cur == '<') && (next == '?')) {
11243 /* PI or XML decl */
11244 if (avail < 5) return(ret);
11245 if ((!terminate) &&
11246 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11248 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11249 ctxt->sax->setDocumentLocator(ctxt->userData,
11250 &xmlDefaultSAXLocator);
11251 if ((ctxt->input->cur[2] == 'x') &&
11252 (ctxt->input->cur[3] == 'm') &&
11253 (ctxt->input->cur[4] == 'l') &&
11254 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11257 xmlGenericError(xmlGenericErrorContext,
11258 "PP: Parsing XML Decl\n");
11260 xmlParseXMLDecl(ctxt);
11261 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11263 * The XML REC instructs us to stop parsing right
11266 xmlHaltParser(ctxt);
11269 ctxt->standalone = ctxt->input->standalone;
11270 if ((ctxt->encoding == NULL) &&
11271 (ctxt->input->encoding != NULL))
11272 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11273 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11274 (!ctxt->disableSAX))
11275 ctxt->sax->startDocument(ctxt->userData);
11276 ctxt->instate = XML_PARSER_MISC;
11278 xmlGenericError(xmlGenericErrorContext,
11279 "PP: entering MISC\n");
11282 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11283 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11284 (!ctxt->disableSAX))
11285 ctxt->sax->startDocument(ctxt->userData);
11286 ctxt->instate = XML_PARSER_MISC;
11288 xmlGenericError(xmlGenericErrorContext,
11289 "PP: entering MISC\n");
11293 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11294 ctxt->sax->setDocumentLocator(ctxt->userData,
11295 &xmlDefaultSAXLocator);
11296 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11297 if (ctxt->version == NULL) {
11298 xmlErrMemory(ctxt, NULL);
11301 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11302 (!ctxt->disableSAX))
11303 ctxt->sax->startDocument(ctxt->userData);
11304 ctxt->instate = XML_PARSER_MISC;
11306 xmlGenericError(xmlGenericErrorContext,
11307 "PP: entering MISC\n");
11311 case XML_PARSER_START_TAG: {
11312 const xmlChar *name;
11313 const xmlChar *prefix = NULL;
11314 const xmlChar *URI = NULL;
11315 int nsNr = ctxt->nsNr;
11317 if ((avail < 2) && (ctxt->inputNr == 1))
11319 cur = ctxt->input->cur[0];
11321 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11322 xmlHaltParser(ctxt);
11323 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11324 ctxt->sax->endDocument(ctxt->userData);
11328 if (ctxt->progressive) {
11329 /* > can be found unescaped in attribute values */
11330 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11332 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11336 if (ctxt->spaceNr == 0)
11337 spacePush(ctxt, -1);
11338 else if (*ctxt->space == -2)
11339 spacePush(ctxt, -1);
11341 spacePush(ctxt, *ctxt->space);
11342 #ifdef LIBXML_SAX1_ENABLED
11344 #endif /* LIBXML_SAX1_ENABLED */
11345 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11346 #ifdef LIBXML_SAX1_ENABLED
11348 name = xmlParseStartTag(ctxt);
11349 #endif /* LIBXML_SAX1_ENABLED */
11350 if (ctxt->instate == XML_PARSER_EOF)
11352 if (name == NULL) {
11354 xmlHaltParser(ctxt);
11355 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11356 ctxt->sax->endDocument(ctxt->userData);
11359 #ifdef LIBXML_VALID_ENABLED
11361 * [ VC: Root Element Type ]
11362 * The Name in the document type declaration must match
11363 * the element type of the root element.
11365 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11366 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11367 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11368 #endif /* LIBXML_VALID_ENABLED */
11371 * Check for an Empty Element.
11373 if ((RAW == '/') && (NXT(1) == '>')) {
11377 if ((ctxt->sax != NULL) &&
11378 (ctxt->sax->endElementNs != NULL) &&
11379 (!ctxt->disableSAX))
11380 ctxt->sax->endElementNs(ctxt->userData, name,
11382 if (ctxt->nsNr - nsNr > 0)
11383 nsPop(ctxt, ctxt->nsNr - nsNr);
11384 #ifdef LIBXML_SAX1_ENABLED
11386 if ((ctxt->sax != NULL) &&
11387 (ctxt->sax->endElement != NULL) &&
11388 (!ctxt->disableSAX))
11389 ctxt->sax->endElement(ctxt->userData, name);
11390 #endif /* LIBXML_SAX1_ENABLED */
11392 if (ctxt->instate == XML_PARSER_EOF)
11395 if (ctxt->nameNr == 0) {
11396 ctxt->instate = XML_PARSER_EPILOG;
11398 ctxt->instate = XML_PARSER_CONTENT;
11400 ctxt->progressive = 1;
11406 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11407 "Couldn't find end of Start Tag %s\n",
11413 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11414 #ifdef LIBXML_SAX1_ENABLED
11416 namePush(ctxt, name);
11417 #endif /* LIBXML_SAX1_ENABLED */
11419 ctxt->instate = XML_PARSER_CONTENT;
11420 ctxt->progressive = 1;
11423 case XML_PARSER_CONTENT: {
11424 const xmlChar *test;
11426 if ((avail < 2) && (ctxt->inputNr == 1))
11428 cur = ctxt->input->cur[0];
11429 next = ctxt->input->cur[1];
11432 cons = ctxt->input->consumed;
11433 if ((cur == '<') && (next == '/')) {
11434 ctxt->instate = XML_PARSER_END_TAG;
11436 } else if ((cur == '<') && (next == '?')) {
11437 if ((!terminate) &&
11438 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11439 ctxt->progressive = XML_PARSER_PI;
11443 ctxt->instate = XML_PARSER_CONTENT;
11444 ctxt->progressive = 1;
11445 } else if ((cur == '<') && (next != '!')) {
11446 ctxt->instate = XML_PARSER_START_TAG;
11448 } else if ((cur == '<') && (next == '!') &&
11449 (ctxt->input->cur[2] == '-') &&
11450 (ctxt->input->cur[3] == '-')) {
11455 ctxt->input->cur += 4;
11456 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11457 ctxt->input->cur -= 4;
11458 if ((!terminate) && (term < 0)) {
11459 ctxt->progressive = XML_PARSER_COMMENT;
11462 xmlParseComment(ctxt);
11463 ctxt->instate = XML_PARSER_CONTENT;
11464 ctxt->progressive = 1;
11465 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11466 (ctxt->input->cur[2] == '[') &&
11467 (ctxt->input->cur[3] == 'C') &&
11468 (ctxt->input->cur[4] == 'D') &&
11469 (ctxt->input->cur[5] == 'A') &&
11470 (ctxt->input->cur[6] == 'T') &&
11471 (ctxt->input->cur[7] == 'A') &&
11472 (ctxt->input->cur[8] == '[')) {
11474 ctxt->instate = XML_PARSER_CDATA_SECTION;
11476 } else if ((cur == '<') && (next == '!') &&
11479 } else if (cur == '&') {
11480 if ((!terminate) &&
11481 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11483 xmlParseReference(ctxt);
11485 /* TODO Avoid the extra copy, handle directly !!! */
11487 * Goal of the following test is:
11488 * - minimize calls to the SAX 'character' callback
11489 * when they are mergeable
11490 * - handle an problem for isBlank when we only parse
11491 * a sequence of blank chars and the next one is
11492 * not available to check against '<' presence.
11493 * - tries to homogenize the differences in SAX
11494 * callbacks between the push and pull versions
11497 if ((ctxt->inputNr == 1) &&
11498 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11500 if (ctxt->progressive) {
11501 if ((lastlt == NULL) ||
11502 (ctxt->input->cur > lastlt))
11504 } else if (xmlParseLookupSequence(ctxt,
11510 ctxt->checkIndex = 0;
11511 xmlParseCharData(ctxt, 0);
11513 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11514 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11515 "detected an error in element content\n");
11516 xmlHaltParser(ctxt);
11521 case XML_PARSER_END_TAG:
11525 if (ctxt->progressive) {
11526 /* > can be found unescaped in attribute values */
11527 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11529 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11534 xmlParseEndTag2(ctxt,
11535 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11536 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11537 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11540 #ifdef LIBXML_SAX1_ENABLED
11542 xmlParseEndTag1(ctxt, 0);
11543 #endif /* LIBXML_SAX1_ENABLED */
11544 if (ctxt->instate == XML_PARSER_EOF) {
11546 } else if (ctxt->nameNr == 0) {
11547 ctxt->instate = XML_PARSER_EPILOG;
11549 ctxt->instate = XML_PARSER_CONTENT;
11552 case XML_PARSER_CDATA_SECTION: {
11554 * The Push mode need to have the SAX callback for
11555 * cdataBlock merge back contiguous callbacks.
11559 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11561 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11564 tmp = xmlCheckCdataPush(ctxt->input->cur,
11565 XML_PARSER_BIG_BUFFER_SIZE, 0);
11568 ctxt->input->cur += tmp;
11569 goto encoding_error;
11571 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11572 if (ctxt->sax->cdataBlock != NULL)
11573 ctxt->sax->cdataBlock(ctxt->userData,
11574 ctxt->input->cur, tmp);
11575 else if (ctxt->sax->characters != NULL)
11576 ctxt->sax->characters(ctxt->userData,
11577 ctxt->input->cur, tmp);
11579 if (ctxt->instate == XML_PARSER_EOF)
11582 ctxt->checkIndex = 0;
11588 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11589 if ((tmp < 0) || (tmp != base)) {
11591 ctxt->input->cur += tmp;
11592 goto encoding_error;
11594 if ((ctxt->sax != NULL) && (base == 0) &&
11595 (ctxt->sax->cdataBlock != NULL) &&
11596 (!ctxt->disableSAX)) {
11598 * Special case to provide identical behaviour
11599 * between pull and push parsers on enpty CDATA
11602 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11603 (!strncmp((const char *)&ctxt->input->cur[-9],
11605 ctxt->sax->cdataBlock(ctxt->userData,
11607 } else if ((ctxt->sax != NULL) && (base > 0) &&
11608 (!ctxt->disableSAX)) {
11609 if (ctxt->sax->cdataBlock != NULL)
11610 ctxt->sax->cdataBlock(ctxt->userData,
11611 ctxt->input->cur, base);
11612 else if (ctxt->sax->characters != NULL)
11613 ctxt->sax->characters(ctxt->userData,
11614 ctxt->input->cur, base);
11616 if (ctxt->instate == XML_PARSER_EOF)
11619 ctxt->checkIndex = 0;
11620 ctxt->instate = XML_PARSER_CONTENT;
11622 xmlGenericError(xmlGenericErrorContext,
11623 "PP: entering CONTENT\n");
11628 case XML_PARSER_MISC:
11630 if (ctxt->input->buf == NULL)
11631 avail = ctxt->input->length -
11632 (ctxt->input->cur - ctxt->input->base);
11634 avail = xmlBufUse(ctxt->input->buf->buffer) -
11635 (ctxt->input->cur - ctxt->input->base);
11638 cur = ctxt->input->cur[0];
11639 next = ctxt->input->cur[1];
11640 if ((cur == '<') && (next == '?')) {
11641 if ((!terminate) &&
11642 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11643 ctxt->progressive = XML_PARSER_PI;
11647 xmlGenericError(xmlGenericErrorContext,
11648 "PP: Parsing PI\n");
11651 if (ctxt->instate == XML_PARSER_EOF)
11653 ctxt->instate = XML_PARSER_MISC;
11654 ctxt->progressive = 1;
11655 ctxt->checkIndex = 0;
11656 } else if ((cur == '<') && (next == '!') &&
11657 (ctxt->input->cur[2] == '-') &&
11658 (ctxt->input->cur[3] == '-')) {
11659 if ((!terminate) &&
11660 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11661 ctxt->progressive = XML_PARSER_COMMENT;
11665 xmlGenericError(xmlGenericErrorContext,
11666 "PP: Parsing Comment\n");
11668 xmlParseComment(ctxt);
11669 if (ctxt->instate == XML_PARSER_EOF)
11671 ctxt->instate = XML_PARSER_MISC;
11672 ctxt->progressive = 1;
11673 ctxt->checkIndex = 0;
11674 } else if ((cur == '<') && (next == '!') &&
11675 (ctxt->input->cur[2] == 'D') &&
11676 (ctxt->input->cur[3] == 'O') &&
11677 (ctxt->input->cur[4] == 'C') &&
11678 (ctxt->input->cur[5] == 'T') &&
11679 (ctxt->input->cur[6] == 'Y') &&
11680 (ctxt->input->cur[7] == 'P') &&
11681 (ctxt->input->cur[8] == 'E')) {
11682 if ((!terminate) &&
11683 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11684 ctxt->progressive = XML_PARSER_DTD;
11688 xmlGenericError(xmlGenericErrorContext,
11689 "PP: Parsing internal subset\n");
11691 ctxt->inSubset = 1;
11692 ctxt->progressive = 0;
11693 ctxt->checkIndex = 0;
11694 xmlParseDocTypeDecl(ctxt);
11695 if (ctxt->instate == XML_PARSER_EOF)
11698 ctxt->instate = XML_PARSER_DTD;
11700 xmlGenericError(xmlGenericErrorContext,
11701 "PP: entering DTD\n");
11705 * Create and update the external subset.
11707 ctxt->inSubset = 2;
11708 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11709 (ctxt->sax->externalSubset != NULL))
11710 ctxt->sax->externalSubset(ctxt->userData,
11711 ctxt->intSubName, ctxt->extSubSystem,
11713 ctxt->inSubset = 0;
11714 xmlCleanSpecialAttr(ctxt);
11715 ctxt->instate = XML_PARSER_PROLOG;
11717 xmlGenericError(xmlGenericErrorContext,
11718 "PP: entering PROLOG\n");
11721 } else if ((cur == '<') && (next == '!') &&
11725 ctxt->instate = XML_PARSER_START_TAG;
11726 ctxt->progressive = XML_PARSER_START_TAG;
11727 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11729 xmlGenericError(xmlGenericErrorContext,
11730 "PP: entering START_TAG\n");
11734 case XML_PARSER_PROLOG:
11736 if (ctxt->input->buf == NULL)
11737 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11739 avail = xmlBufUse(ctxt->input->buf->buffer) -
11740 (ctxt->input->cur - ctxt->input->base);
11743 cur = ctxt->input->cur[0];
11744 next = ctxt->input->cur[1];
11745 if ((cur == '<') && (next == '?')) {
11746 if ((!terminate) &&
11747 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11748 ctxt->progressive = XML_PARSER_PI;
11752 xmlGenericError(xmlGenericErrorContext,
11753 "PP: Parsing PI\n");
11756 if (ctxt->instate == XML_PARSER_EOF)
11758 ctxt->instate = XML_PARSER_PROLOG;
11759 ctxt->progressive = 1;
11760 } else if ((cur == '<') && (next == '!') &&
11761 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11762 if ((!terminate) &&
11763 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11764 ctxt->progressive = XML_PARSER_COMMENT;
11768 xmlGenericError(xmlGenericErrorContext,
11769 "PP: Parsing Comment\n");
11771 xmlParseComment(ctxt);
11772 if (ctxt->instate == XML_PARSER_EOF)
11774 ctxt->instate = XML_PARSER_PROLOG;
11775 ctxt->progressive = 1;
11776 } else if ((cur == '<') && (next == '!') &&
11780 ctxt->instate = XML_PARSER_START_TAG;
11781 if (ctxt->progressive == 0)
11782 ctxt->progressive = XML_PARSER_START_TAG;
11783 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11785 xmlGenericError(xmlGenericErrorContext,
11786 "PP: entering START_TAG\n");
11790 case XML_PARSER_EPILOG:
11792 if (ctxt->input->buf == NULL)
11793 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11795 avail = xmlBufUse(ctxt->input->buf->buffer) -
11796 (ctxt->input->cur - ctxt->input->base);
11799 cur = ctxt->input->cur[0];
11800 next = ctxt->input->cur[1];
11801 if ((cur == '<') && (next == '?')) {
11802 if ((!terminate) &&
11803 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11804 ctxt->progressive = XML_PARSER_PI;
11808 xmlGenericError(xmlGenericErrorContext,
11809 "PP: Parsing PI\n");
11812 if (ctxt->instate == XML_PARSER_EOF)
11814 ctxt->instate = XML_PARSER_EPILOG;
11815 ctxt->progressive = 1;
11816 } else if ((cur == '<') && (next == '!') &&
11817 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11818 if ((!terminate) &&
11819 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11820 ctxt->progressive = XML_PARSER_COMMENT;
11824 xmlGenericError(xmlGenericErrorContext,
11825 "PP: Parsing Comment\n");
11827 xmlParseComment(ctxt);
11828 if (ctxt->instate == XML_PARSER_EOF)
11830 ctxt->instate = XML_PARSER_EPILOG;
11831 ctxt->progressive = 1;
11832 } else if ((cur == '<') && (next == '!') &&
11836 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11837 xmlHaltParser(ctxt);
11839 xmlGenericError(xmlGenericErrorContext,
11840 "PP: entering EOF\n");
11842 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11843 ctxt->sax->endDocument(ctxt->userData);
11847 case XML_PARSER_DTD: {
11849 * Sorry but progressive parsing of the internal subset
11850 * is not expected to be supported. We first check that
11851 * the full content of the internal subset is available and
11852 * the parsing is launched only at that point.
11853 * Internal subset ends up with "']' S? '>'" in an unescaped
11854 * section and not in a ']]>' sequence which are conditional
11855 * sections (whoever argued to keep that crap in XML deserve
11856 * a place in hell !).
11863 base = ctxt->input->cur - ctxt->input->base;
11864 if (base < 0) return(0);
11865 if (ctxt->checkIndex > base)
11866 base = ctxt->checkIndex;
11867 buf = xmlBufContent(ctxt->input->buf->buffer);
11868 use = xmlBufUse(ctxt->input->buf->buffer);
11869 for (;(unsigned int) base < use; base++) {
11871 if (buf[base] == quote)
11875 if ((quote == 0) && (buf[base] == '<')) {
11877 /* special handling of comments */
11878 if (((unsigned int) base + 4 < use) &&
11879 (buf[base + 1] == '!') &&
11880 (buf[base + 2] == '-') &&
11881 (buf[base + 3] == '-')) {
11882 for (;(unsigned int) base + 3 < use; base++) {
11883 if ((buf[base] == '-') &&
11884 (buf[base + 1] == '-') &&
11885 (buf[base + 2] == '>')) {
11893 fprintf(stderr, "unfinished comment\n");
11900 if (buf[base] == '"') {
11904 if (buf[base] == '\'') {
11908 if (buf[base] == ']') {
11910 fprintf(stderr, "%c%c%c%c: ", buf[base],
11911 buf[base + 1], buf[base + 2], buf[base + 3]);
11913 if ((unsigned int) base +1 >= use)
11915 if (buf[base + 1] == ']') {
11916 /* conditional crap, skip both ']' ! */
11920 for (i = 1; (unsigned int) base + i < use; i++) {
11921 if (buf[base + i] == '>') {
11923 fprintf(stderr, "found\n");
11925 goto found_end_int_subset;
11927 if (!IS_BLANK_CH(buf[base + i])) {
11929 fprintf(stderr, "not found\n");
11931 goto not_end_of_int_subset;
11935 fprintf(stderr, "end of stream\n");
11940 not_end_of_int_subset:
11941 continue; /* for */
11944 * We didn't found the end of the Internal subset
11947 ctxt->checkIndex = base;
11949 ctxt->checkIndex = 0;
11952 xmlGenericError(xmlGenericErrorContext,
11953 "PP: lookup of int subset end filed\n");
11957 found_end_int_subset:
11958 ctxt->checkIndex = 0;
11959 xmlParseInternalSubset(ctxt);
11960 if (ctxt->instate == XML_PARSER_EOF)
11962 ctxt->inSubset = 2;
11963 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11964 (ctxt->sax->externalSubset != NULL))
11965 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11966 ctxt->extSubSystem, ctxt->extSubURI);
11967 ctxt->inSubset = 0;
11968 xmlCleanSpecialAttr(ctxt);
11969 if (ctxt->instate == XML_PARSER_EOF)
11971 ctxt->instate = XML_PARSER_PROLOG;
11972 ctxt->checkIndex = 0;
11974 xmlGenericError(xmlGenericErrorContext,
11975 "PP: entering PROLOG\n");
11979 case XML_PARSER_COMMENT:
11980 xmlGenericError(xmlGenericErrorContext,
11981 "PP: internal error, state == COMMENT\n");
11982 ctxt->instate = XML_PARSER_CONTENT;
11984 xmlGenericError(xmlGenericErrorContext,
11985 "PP: entering CONTENT\n");
11988 case XML_PARSER_IGNORE:
11989 xmlGenericError(xmlGenericErrorContext,
11990 "PP: internal error, state == IGNORE");
11991 ctxt->instate = XML_PARSER_DTD;
11993 xmlGenericError(xmlGenericErrorContext,
11994 "PP: entering DTD\n");
11997 case XML_PARSER_PI:
11998 xmlGenericError(xmlGenericErrorContext,
11999 "PP: internal error, state == PI\n");
12000 ctxt->instate = XML_PARSER_CONTENT;
12002 xmlGenericError(xmlGenericErrorContext,
12003 "PP: entering CONTENT\n");
12006 case XML_PARSER_ENTITY_DECL:
12007 xmlGenericError(xmlGenericErrorContext,
12008 "PP: internal error, state == ENTITY_DECL\n");
12009 ctxt->instate = XML_PARSER_DTD;
12011 xmlGenericError(xmlGenericErrorContext,
12012 "PP: entering DTD\n");
12015 case XML_PARSER_ENTITY_VALUE:
12016 xmlGenericError(xmlGenericErrorContext,
12017 "PP: internal error, state == ENTITY_VALUE\n");
12018 ctxt->instate = XML_PARSER_CONTENT;
12020 xmlGenericError(xmlGenericErrorContext,
12021 "PP: entering DTD\n");
12024 case XML_PARSER_ATTRIBUTE_VALUE:
12025 xmlGenericError(xmlGenericErrorContext,
12026 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12027 ctxt->instate = XML_PARSER_START_TAG;
12029 xmlGenericError(xmlGenericErrorContext,
12030 "PP: entering START_TAG\n");
12033 case XML_PARSER_SYSTEM_LITERAL:
12034 xmlGenericError(xmlGenericErrorContext,
12035 "PP: internal error, state == SYSTEM_LITERAL\n");
12036 ctxt->instate = XML_PARSER_START_TAG;
12038 xmlGenericError(xmlGenericErrorContext,
12039 "PP: entering START_TAG\n");
12042 case XML_PARSER_PUBLIC_LITERAL:
12043 xmlGenericError(xmlGenericErrorContext,
12044 "PP: internal error, state == PUBLIC_LITERAL\n");
12045 ctxt->instate = XML_PARSER_START_TAG;
12047 xmlGenericError(xmlGenericErrorContext,
12048 "PP: entering START_TAG\n");
12055 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12062 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12063 ctxt->input->cur[0], ctxt->input->cur[1],
12064 ctxt->input->cur[2], ctxt->input->cur[3]);
12065 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12066 "Input is not proper UTF-8, indicate encoding !\n%s",
12067 BAD_CAST buffer, NULL);
12073 * xmlParseCheckTransition:
12074 * @ctxt: an XML parser context
12075 * @chunk: a char array
12076 * @size: the size in byte of the chunk
12078 * Check depending on the current parser state if the chunk given must be
12079 * processed immediately or one need more data to advance on parsing.
12081 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12084 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12085 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12087 if (ctxt->instate == XML_PARSER_START_TAG) {
12088 if (memchr(chunk, '>', size) != NULL)
12092 if (ctxt->progressive == XML_PARSER_COMMENT) {
12093 if (memchr(chunk, '>', size) != NULL)
12097 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12098 if (memchr(chunk, '>', size) != NULL)
12102 if (ctxt->progressive == XML_PARSER_PI) {
12103 if (memchr(chunk, '>', size) != NULL)
12107 if (ctxt->instate == XML_PARSER_END_TAG) {
12108 if (memchr(chunk, '>', size) != NULL)
12112 if ((ctxt->progressive == XML_PARSER_DTD) ||
12113 (ctxt->instate == XML_PARSER_DTD)) {
12114 if (memchr(chunk, '>', size) != NULL)
12123 * @ctxt: an XML parser context
12124 * @chunk: an char array
12125 * @size: the size in byte of the chunk
12126 * @terminate: last chunk indicator
12128 * Parse a Chunk of memory
12130 * Returns zero if no error, the xmlParserErrors otherwise.
12133 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12137 size_t old_avail = 0;
12141 return(XML_ERR_INTERNAL_ERROR);
12142 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12143 return(ctxt->errNo);
12144 if (ctxt->instate == XML_PARSER_EOF)
12146 if (ctxt->instate == XML_PARSER_START)
12147 xmlDetectSAX2(ctxt);
12148 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12149 (chunk[size - 1] == '\r')) {
12156 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12157 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12158 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12159 size_t cur = ctxt->input->cur - ctxt->input->base;
12162 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12164 * Specific handling if we autodetected an encoding, we should not
12165 * push more than the first line ... which depend on the encoding
12166 * And only push the rest once the final encoding was detected
12168 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12169 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12170 unsigned int len = 45;
12172 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12173 BAD_CAST "UTF-16")) ||
12174 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175 BAD_CAST "UTF16")))
12177 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12178 BAD_CAST "UCS-4")) ||
12179 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12183 if (ctxt->input->buf->rawconsumed < len)
12184 len -= ctxt->input->buf->rawconsumed;
12187 * Change size for reading the initial declaration only
12188 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12189 * will blindly copy extra bytes from memory.
12191 if ((unsigned int) size > len) {
12192 remain = size - len;
12198 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12200 ctxt->errNo = XML_PARSER_EOF;
12201 xmlHaltParser(ctxt);
12202 return (XML_PARSER_EOF);
12204 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12206 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209 } else if (ctxt->instate != XML_PARSER_EOF) {
12210 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12211 xmlParserInputBufferPtr in = ctxt->input->buf;
12212 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12213 (in->raw != NULL)) {
12215 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12216 size_t current = ctxt->input->cur - ctxt->input->base;
12218 nbchars = xmlCharEncInput(in, terminate);
12221 xmlGenericError(xmlGenericErrorContext,
12222 "xmlParseChunk: encoder error\n");
12223 return(XML_ERR_INVALID_ENCODING);
12225 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12230 xmlParseTryOrFinish(ctxt, 0);
12232 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12233 avail = xmlBufUse(ctxt->input->buf->buffer);
12235 * Depending on the current state it may not be such
12236 * a good idea to try parsing if there is nothing in the chunk
12237 * which would be worth doing a parser state transition and we
12238 * need to wait for more data
12240 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12241 (old_avail == 0) || (avail == 0) ||
12242 (xmlParseCheckTransition(ctxt,
12243 (const char *)&ctxt->input->base[old_avail],
12244 avail - old_avail)))
12245 xmlParseTryOrFinish(ctxt, terminate);
12247 if (ctxt->instate == XML_PARSER_EOF)
12248 return(ctxt->errNo);
12250 if ((ctxt->input != NULL) &&
12251 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12252 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12253 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12254 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12255 xmlHaltParser(ctxt);
12257 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12258 return(ctxt->errNo);
12266 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12267 (ctxt->input->buf != NULL)) {
12268 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12270 size_t current = ctxt->input->cur - ctxt->input->base;
12272 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12274 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12279 * Check for termination
12283 if (ctxt->input != NULL) {
12284 if (ctxt->input->buf == NULL)
12285 cur_avail = ctxt->input->length -
12286 (ctxt->input->cur - ctxt->input->base);
12288 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12289 (ctxt->input->cur - ctxt->input->base);
12292 if ((ctxt->instate != XML_PARSER_EOF) &&
12293 (ctxt->instate != XML_PARSER_EPILOG)) {
12294 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12296 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12297 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12299 if (ctxt->instate != XML_PARSER_EOF) {
12300 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12301 ctxt->sax->endDocument(ctxt->userData);
12303 ctxt->instate = XML_PARSER_EOF;
12305 if (ctxt->wellFormed == 0)
12306 return((xmlParserErrors) ctxt->errNo);
12311 /************************************************************************
12313 * I/O front end functions to the parser *
12315 ************************************************************************/
12318 * xmlCreatePushParserCtxt:
12319 * @sax: a SAX handler
12320 * @user_data: The user data returned on SAX callbacks
12321 * @chunk: a pointer to an array of chars
12322 * @size: number of chars in the array
12323 * @filename: an optional file name or URI
12325 * Create a parser context for using the XML parser in push mode.
12326 * If @buffer and @size are non-NULL, the data is used to detect
12327 * the encoding. The remaining characters will be parsed so they
12328 * don't need to be fed in again through xmlParseChunk.
12329 * To allow content encoding detection, @size should be >= 4
12330 * The value of @filename is used for fetching external entities
12331 * and error/warning reports.
12333 * Returns the new parser context or NULL
12337 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12338 const char *chunk, int size, const char *filename) {
12339 xmlParserCtxtPtr ctxt;
12340 xmlParserInputPtr inputStream;
12341 xmlParserInputBufferPtr buf;
12342 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12345 * plug some encoding conversion routines
12347 if ((chunk != NULL) && (size >= 4))
12348 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12350 buf = xmlAllocParserInputBuffer(enc);
12351 if (buf == NULL) return(NULL);
12353 ctxt = xmlNewParserCtxt();
12354 if (ctxt == NULL) {
12355 xmlErrMemory(NULL, "creating parser: out of memory\n");
12356 xmlFreeParserInputBuffer(buf);
12359 ctxt->dictNames = 1;
12360 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12361 if (ctxt->pushTab == NULL) {
12362 xmlErrMemory(ctxt, NULL);
12363 xmlFreeParserInputBuffer(buf);
12364 xmlFreeParserCtxt(ctxt);
12368 #ifdef LIBXML_SAX1_ENABLED
12369 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12370 #endif /* LIBXML_SAX1_ENABLED */
12371 xmlFree(ctxt->sax);
12372 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12373 if (ctxt->sax == NULL) {
12374 xmlErrMemory(ctxt, NULL);
12375 xmlFreeParserInputBuffer(buf);
12376 xmlFreeParserCtxt(ctxt);
12379 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12380 if (sax->initialized == XML_SAX2_MAGIC)
12381 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12383 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12384 if (user_data != NULL)
12385 ctxt->userData = user_data;
12387 if (filename == NULL) {
12388 ctxt->directory = NULL;
12390 ctxt->directory = xmlParserGetDirectory(filename);
12393 inputStream = xmlNewInputStream(ctxt);
12394 if (inputStream == NULL) {
12395 xmlFreeParserCtxt(ctxt);
12396 xmlFreeParserInputBuffer(buf);
12400 if (filename == NULL)
12401 inputStream->filename = NULL;
12403 inputStream->filename = (char *)
12404 xmlCanonicPath((const xmlChar *) filename);
12405 if (inputStream->filename == NULL) {
12406 xmlFreeParserCtxt(ctxt);
12407 xmlFreeParserInputBuffer(buf);
12411 inputStream->buf = buf;
12412 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12413 inputPush(ctxt, inputStream);
12416 * If the caller didn't provide an initial 'chunk' for determining
12417 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12418 * that it can be automatically determined later
12420 if ((size == 0) || (chunk == NULL)) {
12421 ctxt->charset = XML_CHAR_ENCODING_NONE;
12422 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12423 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12424 size_t cur = ctxt->input->cur - ctxt->input->base;
12426 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12428 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12430 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12434 if (enc != XML_CHAR_ENCODING_NONE) {
12435 xmlSwitchEncoding(ctxt, enc);
12440 #endif /* LIBXML_PUSH_ENABLED */
12444 * @ctxt: an XML parser context
12446 * Blocks further parser processing don't override error
12450 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12453 ctxt->instate = XML_PARSER_EOF;
12454 ctxt->disableSAX = 1;
12455 while (ctxt->inputNr > 1)
12456 xmlFreeInputStream(inputPop(ctxt));
12457 if (ctxt->input != NULL) {
12459 * in case there was a specific allocation deallocate before
12462 if (ctxt->input->free != NULL) {
12463 ctxt->input->free((xmlChar *) ctxt->input->base);
12464 ctxt->input->free = NULL;
12466 ctxt->input->cur = BAD_CAST"";
12467 ctxt->input->base = ctxt->input->cur;
12468 ctxt->input->end = ctxt->input->cur;
12474 * @ctxt: an XML parser context
12476 * Blocks further parser processing
12479 xmlStopParser(xmlParserCtxtPtr ctxt) {
12482 xmlHaltParser(ctxt);
12483 ctxt->errNo = XML_ERR_USER_STOP;
12487 * xmlCreateIOParserCtxt:
12488 * @sax: a SAX handler
12489 * @user_data: The user data returned on SAX callbacks
12490 * @ioread: an I/O read function
12491 * @ioclose: an I/O close function
12492 * @ioctx: an I/O handler
12493 * @enc: the charset encoding if known
12495 * Create a parser context for using the XML parser with an existing
12498 * Returns the new parser context or NULL
12501 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12502 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12503 void *ioctx, xmlCharEncoding enc) {
12504 xmlParserCtxtPtr ctxt;
12505 xmlParserInputPtr inputStream;
12506 xmlParserInputBufferPtr buf;
12508 if (ioread == NULL) return(NULL);
12510 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12512 if (ioclose != NULL)
12517 ctxt = xmlNewParserCtxt();
12518 if (ctxt == NULL) {
12519 xmlFreeParserInputBuffer(buf);
12523 #ifdef LIBXML_SAX1_ENABLED
12524 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12525 #endif /* LIBXML_SAX1_ENABLED */
12526 xmlFree(ctxt->sax);
12527 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12528 if (ctxt->sax == NULL) {
12529 xmlErrMemory(ctxt, NULL);
12530 xmlFreeParserCtxt(ctxt);
12533 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12534 if (sax->initialized == XML_SAX2_MAGIC)
12535 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12537 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12538 if (user_data != NULL)
12539 ctxt->userData = user_data;
12542 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12543 if (inputStream == NULL) {
12544 xmlFreeParserCtxt(ctxt);
12547 inputPush(ctxt, inputStream);
12552 #ifdef LIBXML_VALID_ENABLED
12553 /************************************************************************
12555 * Front ends when parsing a DTD *
12557 ************************************************************************/
12561 * @sax: the SAX handler block or NULL
12562 * @input: an Input Buffer
12563 * @enc: the charset encoding if known
12565 * Load and parse a DTD
12567 * Returns the resulting xmlDtdPtr or NULL in case of error.
12568 * @input will be freed by the function in any case.
12572 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12573 xmlCharEncoding enc) {
12574 xmlDtdPtr ret = NULL;
12575 xmlParserCtxtPtr ctxt;
12576 xmlParserInputPtr pinput = NULL;
12582 ctxt = xmlNewParserCtxt();
12583 if (ctxt == NULL) {
12584 xmlFreeParserInputBuffer(input);
12588 /* We are loading a DTD */
12589 ctxt->options |= XML_PARSE_DTDLOAD;
12592 * Set-up the SAX context
12595 if (ctxt->sax != NULL)
12596 xmlFree(ctxt->sax);
12598 ctxt->userData = ctxt;
12600 xmlDetectSAX2(ctxt);
12603 * generate a parser input from the I/O handler
12606 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12607 if (pinput == NULL) {
12608 if (sax != NULL) ctxt->sax = NULL;
12609 xmlFreeParserInputBuffer(input);
12610 xmlFreeParserCtxt(ctxt);
12615 * plug some encoding conversion routines here.
12617 if (xmlPushInput(ctxt, pinput) < 0) {
12618 if (sax != NULL) ctxt->sax = NULL;
12619 xmlFreeParserCtxt(ctxt);
12622 if (enc != XML_CHAR_ENCODING_NONE) {
12623 xmlSwitchEncoding(ctxt, enc);
12626 pinput->filename = NULL;
12629 pinput->base = ctxt->input->cur;
12630 pinput->cur = ctxt->input->cur;
12631 pinput->free = NULL;
12634 * let's parse that entity knowing it's an external subset.
12636 ctxt->inSubset = 2;
12637 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12638 if (ctxt->myDoc == NULL) {
12639 xmlErrMemory(ctxt, "New Doc failed");
12642 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12643 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12644 BAD_CAST "none", BAD_CAST "none");
12646 if ((enc == XML_CHAR_ENCODING_NONE) &&
12647 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12649 * Get the 4 first bytes and decode the charset
12650 * if enc != XML_CHAR_ENCODING_NONE
12651 * plug some encoding conversion routines.
12657 enc = xmlDetectCharEncoding(start, 4);
12658 if (enc != XML_CHAR_ENCODING_NONE) {
12659 xmlSwitchEncoding(ctxt, enc);
12663 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12665 if (ctxt->myDoc != NULL) {
12666 if (ctxt->wellFormed) {
12667 ret = ctxt->myDoc->extSubset;
12668 ctxt->myDoc->extSubset = NULL;
12673 tmp = ret->children;
12674 while (tmp != NULL) {
12682 xmlFreeDoc(ctxt->myDoc);
12683 ctxt->myDoc = NULL;
12685 if (sax != NULL) ctxt->sax = NULL;
12686 xmlFreeParserCtxt(ctxt);
12693 * @sax: the SAX handler block
12694 * @ExternalID: a NAME* containing the External ID of the DTD
12695 * @SystemID: a NAME* containing the URL to the DTD
12697 * Load and parse an external subset.
12699 * Returns the resulting xmlDtdPtr or NULL in case of error.
12703 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12704 const xmlChar *SystemID) {
12705 xmlDtdPtr ret = NULL;
12706 xmlParserCtxtPtr ctxt;
12707 xmlParserInputPtr input = NULL;
12708 xmlCharEncoding enc;
12709 xmlChar* systemIdCanonic;
12711 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12713 ctxt = xmlNewParserCtxt();
12714 if (ctxt == NULL) {
12718 /* We are loading a DTD */
12719 ctxt->options |= XML_PARSE_DTDLOAD;
12722 * Set-up the SAX context
12725 if (ctxt->sax != NULL)
12726 xmlFree(ctxt->sax);
12728 ctxt->userData = ctxt;
12732 * Canonicalise the system ID
12734 systemIdCanonic = xmlCanonicPath(SystemID);
12735 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12736 xmlFreeParserCtxt(ctxt);
12741 * Ask the Entity resolver to load the damn thing
12744 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12745 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12747 if (input == NULL) {
12748 if (sax != NULL) ctxt->sax = NULL;
12749 xmlFreeParserCtxt(ctxt);
12750 if (systemIdCanonic != NULL)
12751 xmlFree(systemIdCanonic);
12756 * plug some encoding conversion routines here.
12758 if (xmlPushInput(ctxt, input) < 0) {
12759 if (sax != NULL) ctxt->sax = NULL;
12760 xmlFreeParserCtxt(ctxt);
12761 if (systemIdCanonic != NULL)
12762 xmlFree(systemIdCanonic);
12765 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12766 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12767 xmlSwitchEncoding(ctxt, enc);
12770 if (input->filename == NULL)
12771 input->filename = (char *) systemIdCanonic;
12773 xmlFree(systemIdCanonic);
12776 input->base = ctxt->input->cur;
12777 input->cur = ctxt->input->cur;
12778 input->free = NULL;
12781 * let's parse that entity knowing it's an external subset.
12783 ctxt->inSubset = 2;
12784 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12785 if (ctxt->myDoc == NULL) {
12786 xmlErrMemory(ctxt, "New Doc failed");
12787 if (sax != NULL) ctxt->sax = NULL;
12788 xmlFreeParserCtxt(ctxt);
12791 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12792 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12793 ExternalID, SystemID);
12794 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12796 if (ctxt->myDoc != NULL) {
12797 if (ctxt->wellFormed) {
12798 ret = ctxt->myDoc->extSubset;
12799 ctxt->myDoc->extSubset = NULL;
12804 tmp = ret->children;
12805 while (tmp != NULL) {
12813 xmlFreeDoc(ctxt->myDoc);
12814 ctxt->myDoc = NULL;
12816 if (sax != NULL) ctxt->sax = NULL;
12817 xmlFreeParserCtxt(ctxt);
12825 * @ExternalID: a NAME* containing the External ID of the DTD
12826 * @SystemID: a NAME* containing the URL to the DTD
12828 * Load and parse an external subset.
12830 * Returns the resulting xmlDtdPtr or NULL in case of error.
12834 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12835 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12837 #endif /* LIBXML_VALID_ENABLED */
12839 /************************************************************************
12841 * Front ends when parsing an Entity *
12843 ************************************************************************/
12846 * xmlParseCtxtExternalEntity:
12847 * @ctx: the existing parsing context
12848 * @URL: the URL for the entity to load
12849 * @ID: the System ID for the entity to load
12850 * @lst: the return value for the set of parsed nodes
12852 * Parse an external general entity within an existing parsing context
12853 * An external general parsed entity is well-formed if it matches the
12854 * production labeled extParsedEnt.
12856 * [78] extParsedEnt ::= TextDecl? content
12858 * Returns 0 if the entity is well formed, -1 in case of args problem and
12859 * the parser error code otherwise
12863 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12864 const xmlChar *ID, xmlNodePtr *lst) {
12865 xmlParserCtxtPtr ctxt;
12867 xmlNodePtr newRoot;
12868 xmlSAXHandlerPtr oldsax = NULL;
12871 xmlCharEncoding enc;
12873 if (ctx == NULL) return(-1);
12875 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12876 (ctx->depth > 1024)) {
12877 return(XML_ERR_ENTITY_LOOP);
12882 if ((URL == NULL) && (ID == NULL))
12884 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12887 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12888 if (ctxt == NULL) {
12892 oldsax = ctxt->sax;
12893 ctxt->sax = ctx->sax;
12894 xmlDetectSAX2(ctxt);
12895 newDoc = xmlNewDoc(BAD_CAST "1.0");
12896 if (newDoc == NULL) {
12897 xmlFreeParserCtxt(ctxt);
12900 newDoc->properties = XML_DOC_INTERNAL;
12901 if (ctx->myDoc->dict) {
12902 newDoc->dict = ctx->myDoc->dict;
12903 xmlDictReference(newDoc->dict);
12905 if (ctx->myDoc != NULL) {
12906 newDoc->intSubset = ctx->myDoc->intSubset;
12907 newDoc->extSubset = ctx->myDoc->extSubset;
12909 if (ctx->myDoc->URL != NULL) {
12910 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12912 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12913 if (newRoot == NULL) {
12914 ctxt->sax = oldsax;
12915 xmlFreeParserCtxt(ctxt);
12916 newDoc->intSubset = NULL;
12917 newDoc->extSubset = NULL;
12918 xmlFreeDoc(newDoc);
12921 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12922 nodePush(ctxt, newDoc->children);
12923 if (ctx->myDoc == NULL) {
12924 ctxt->myDoc = newDoc;
12926 ctxt->myDoc = ctx->myDoc;
12927 newDoc->children->doc = ctx->myDoc;
12931 * Get the 4 first bytes and decode the charset
12932 * if enc != XML_CHAR_ENCODING_NONE
12933 * plug some encoding conversion routines.
12936 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12941 enc = xmlDetectCharEncoding(start, 4);
12942 if (enc != XML_CHAR_ENCODING_NONE) {
12943 xmlSwitchEncoding(ctxt, enc);
12948 * Parse a possible text declaration first
12950 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12951 xmlParseTextDecl(ctxt);
12953 * An XML-1.0 document can't reference an entity not XML-1.0
12955 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12956 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12957 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12958 "Version mismatch between document and entity\n");
12963 * If the user provided its own SAX callbacks then reuse the
12964 * useData callback field, otherwise the expected setup in a
12965 * DOM builder is to have userData == ctxt
12967 if (ctx->userData == ctx)
12968 ctxt->userData = ctxt;
12970 ctxt->userData = ctx->userData;
12973 * Doing validity checking on chunk doesn't make sense
12975 ctxt->instate = XML_PARSER_CONTENT;
12976 ctxt->validate = ctx->validate;
12977 ctxt->valid = ctx->valid;
12978 ctxt->loadsubset = ctx->loadsubset;
12979 ctxt->depth = ctx->depth + 1;
12980 ctxt->replaceEntities = ctx->replaceEntities;
12981 if (ctxt->validate) {
12982 ctxt->vctxt.error = ctx->vctxt.error;
12983 ctxt->vctxt.warning = ctx->vctxt.warning;
12985 ctxt->vctxt.error = NULL;
12986 ctxt->vctxt.warning = NULL;
12988 ctxt->vctxt.nodeTab = NULL;
12989 ctxt->vctxt.nodeNr = 0;
12990 ctxt->vctxt.nodeMax = 0;
12991 ctxt->vctxt.node = NULL;
12992 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12993 ctxt->dict = ctx->dict;
12994 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12995 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12996 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12997 ctxt->dictNames = ctx->dictNames;
12998 ctxt->attsDefault = ctx->attsDefault;
12999 ctxt->attsSpecial = ctx->attsSpecial;
13000 ctxt->linenumbers = ctx->linenumbers;
13002 xmlParseContent(ctxt);
13004 ctx->validate = ctxt->validate;
13005 ctx->valid = ctxt->valid;
13006 if ((RAW == '<') && (NXT(1) == '/')) {
13007 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13008 } else if (RAW != 0) {
13009 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13011 if (ctxt->node != newDoc->children) {
13012 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13015 if (!ctxt->wellFormed) {
13016 if (ctxt->errNo == 0)
13025 * Return the newly created nodeset after unlinking it from
13026 * they pseudo parent.
13028 cur = newDoc->children->children;
13030 while (cur != NULL) {
13031 cur->parent = NULL;
13034 newDoc->children->children = NULL;
13038 ctxt->sax = oldsax;
13040 ctxt->attsDefault = NULL;
13041 ctxt->attsSpecial = NULL;
13042 xmlFreeParserCtxt(ctxt);
13043 newDoc->intSubset = NULL;
13044 newDoc->extSubset = NULL;
13045 xmlFreeDoc(newDoc);
13051 * xmlParseExternalEntityPrivate:
13052 * @doc: the document the chunk pertains to
13053 * @oldctxt: the previous parser context if available
13054 * @sax: the SAX handler bloc (possibly NULL)
13055 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13056 * @depth: Used for loop detection, use 0
13057 * @URL: the URL for the entity to load
13058 * @ID: the System ID for the entity to load
13059 * @list: the return value for the set of parsed nodes
13061 * Private version of xmlParseExternalEntity()
13063 * Returns 0 if the entity is well formed, -1 in case of args problem and
13064 * the parser error code otherwise
13067 static xmlParserErrors
13068 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13069 xmlSAXHandlerPtr sax,
13070 void *user_data, int depth, const xmlChar *URL,
13071 const xmlChar *ID, xmlNodePtr *list) {
13072 xmlParserCtxtPtr ctxt;
13074 xmlNodePtr newRoot;
13075 xmlSAXHandlerPtr oldsax = NULL;
13076 xmlParserErrors ret = XML_ERR_OK;
13078 xmlCharEncoding enc;
13080 if (((depth > 40) &&
13081 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13083 return(XML_ERR_ENTITY_LOOP);
13088 if ((URL == NULL) && (ID == NULL))
13089 return(XML_ERR_INTERNAL_ERROR);
13091 return(XML_ERR_INTERNAL_ERROR);
13094 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13095 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13096 ctxt->userData = ctxt;
13097 if (oldctxt != NULL) {
13098 ctxt->_private = oldctxt->_private;
13099 ctxt->loadsubset = oldctxt->loadsubset;
13100 ctxt->validate = oldctxt->validate;
13101 ctxt->external = oldctxt->external;
13102 ctxt->record_info = oldctxt->record_info;
13103 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13104 ctxt->node_seq.length = oldctxt->node_seq.length;
13105 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13108 * Doing validity checking on chunk without context
13109 * doesn't make sense
13111 ctxt->_private = NULL;
13112 ctxt->validate = 0;
13113 ctxt->external = 2;
13114 ctxt->loadsubset = 0;
13117 oldsax = ctxt->sax;
13119 if (user_data != NULL)
13120 ctxt->userData = user_data;
13122 xmlDetectSAX2(ctxt);
13123 newDoc = xmlNewDoc(BAD_CAST "1.0");
13124 if (newDoc == NULL) {
13125 ctxt->node_seq.maximum = 0;
13126 ctxt->node_seq.length = 0;
13127 ctxt->node_seq.buffer = NULL;
13128 xmlFreeParserCtxt(ctxt);
13129 return(XML_ERR_INTERNAL_ERROR);
13131 newDoc->properties = XML_DOC_INTERNAL;
13132 newDoc->intSubset = doc->intSubset;
13133 newDoc->extSubset = doc->extSubset;
13134 newDoc->dict = doc->dict;
13135 xmlDictReference(newDoc->dict);
13137 if (doc->URL != NULL) {
13138 newDoc->URL = xmlStrdup(doc->URL);
13140 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13141 if (newRoot == NULL) {
13143 ctxt->sax = oldsax;
13144 ctxt->node_seq.maximum = 0;
13145 ctxt->node_seq.length = 0;
13146 ctxt->node_seq.buffer = NULL;
13147 xmlFreeParserCtxt(ctxt);
13148 newDoc->intSubset = NULL;
13149 newDoc->extSubset = NULL;
13150 xmlFreeDoc(newDoc);
13151 return(XML_ERR_INTERNAL_ERROR);
13153 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13154 nodePush(ctxt, newDoc->children);
13156 newRoot->doc = doc;
13159 * Get the 4 first bytes and decode the charset
13160 * if enc != XML_CHAR_ENCODING_NONE
13161 * plug some encoding conversion routines.
13164 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13169 enc = xmlDetectCharEncoding(start, 4);
13170 if (enc != XML_CHAR_ENCODING_NONE) {
13171 xmlSwitchEncoding(ctxt, enc);
13176 * Parse a possible text declaration first
13178 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13179 xmlParseTextDecl(ctxt);
13182 ctxt->instate = XML_PARSER_CONTENT;
13183 ctxt->depth = depth;
13185 xmlParseContent(ctxt);
13187 if ((RAW == '<') && (NXT(1) == '/')) {
13188 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13189 } else if (RAW != 0) {
13190 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13192 if (ctxt->node != newDoc->children) {
13193 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13196 if (!ctxt->wellFormed) {
13197 if (ctxt->errNo == 0)
13198 ret = XML_ERR_INTERNAL_ERROR;
13200 ret = (xmlParserErrors)ctxt->errNo;
13202 if (list != NULL) {
13206 * Return the newly created nodeset after unlinking it from
13207 * they pseudo parent.
13209 cur = newDoc->children->children;
13211 while (cur != NULL) {
13212 cur->parent = NULL;
13215 newDoc->children->children = NULL;
13221 * Record in the parent context the number of entities replacement
13222 * done when parsing that reference.
13224 if (oldctxt != NULL)
13225 oldctxt->nbentities += ctxt->nbentities;
13228 * Also record the size of the entity parsed
13230 if (ctxt->input != NULL && oldctxt != NULL) {
13231 oldctxt->sizeentities += ctxt->input->consumed;
13232 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13235 * And record the last error if any
13237 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13238 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241 ctxt->sax = oldsax;
13242 if (oldctxt != NULL) {
13243 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13244 oldctxt->node_seq.length = ctxt->node_seq.length;
13245 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13247 ctxt->node_seq.maximum = 0;
13248 ctxt->node_seq.length = 0;
13249 ctxt->node_seq.buffer = NULL;
13250 xmlFreeParserCtxt(ctxt);
13251 newDoc->intSubset = NULL;
13252 newDoc->extSubset = NULL;
13253 xmlFreeDoc(newDoc);
13258 #ifdef LIBXML_SAX1_ENABLED
13260 * xmlParseExternalEntity:
13261 * @doc: the document the chunk pertains to
13262 * @sax: the SAX handler bloc (possibly NULL)
13263 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13264 * @depth: Used for loop detection, use 0
13265 * @URL: the URL for the entity to load
13266 * @ID: the System ID for the entity to load
13267 * @lst: the return value for the set of parsed nodes
13269 * Parse an external general entity
13270 * An external general parsed entity is well-formed if it matches the
13271 * production labeled extParsedEnt.
13273 * [78] extParsedEnt ::= TextDecl? content
13275 * Returns 0 if the entity is well formed, -1 in case of args problem and
13276 * the parser error code otherwise
13280 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13281 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13282 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13287 * xmlParseBalancedChunkMemory:
13288 * @doc: the document the chunk pertains to
13289 * @sax: the SAX handler bloc (possibly NULL)
13290 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13291 * @depth: Used for loop detection, use 0
13292 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13293 * @lst: the return value for the set of parsed nodes
13295 * Parse a well-balanced chunk of an XML document
13296 * called by the parser
13297 * The allowed sequence for the Well Balanced Chunk is the one defined by
13298 * the content production in the XML grammar:
13300 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13302 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13303 * the parser error code otherwise
13307 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13308 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13309 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13310 depth, string, lst, 0 );
13312 #endif /* LIBXML_SAX1_ENABLED */
13315 * xmlParseBalancedChunkMemoryInternal:
13316 * @oldctxt: the existing parsing context
13317 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13318 * @user_data: the user data field for the parser context
13319 * @lst: the return value for the set of parsed nodes
13322 * Parse a well-balanced chunk of an XML document
13323 * called by the parser
13324 * The allowed sequence for the Well Balanced Chunk is the one defined by
13325 * the content production in the XML grammar:
13327 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13329 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13330 * error code otherwise
13332 * In case recover is set to 1, the nodelist will not be empty even if
13333 * the parsed chunk is not well balanced.
13335 static xmlParserErrors
13336 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13337 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13338 xmlParserCtxtPtr ctxt;
13339 xmlDocPtr newDoc = NULL;
13340 xmlNodePtr newRoot;
13341 xmlSAXHandlerPtr oldsax = NULL;
13342 xmlNodePtr content = NULL;
13343 xmlNodePtr last = NULL;
13345 xmlParserErrors ret = XML_ERR_OK;
13350 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13351 (oldctxt->depth > 1024)) {
13352 return(XML_ERR_ENTITY_LOOP);
13358 if (string == NULL)
13359 return(XML_ERR_INTERNAL_ERROR);
13361 size = xmlStrlen(string);
13363 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13364 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13365 if (user_data != NULL)
13366 ctxt->userData = user_data;
13368 ctxt->userData = ctxt;
13369 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13370 ctxt->dict = oldctxt->dict;
13371 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13372 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13373 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13376 /* propagate namespaces down the entity */
13377 for (i = 0;i < oldctxt->nsNr;i += 2) {
13378 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13382 oldsax = ctxt->sax;
13383 ctxt->sax = oldctxt->sax;
13384 xmlDetectSAX2(ctxt);
13385 ctxt->replaceEntities = oldctxt->replaceEntities;
13386 ctxt->options = oldctxt->options;
13388 ctxt->_private = oldctxt->_private;
13389 if (oldctxt->myDoc == NULL) {
13390 newDoc = xmlNewDoc(BAD_CAST "1.0");
13391 if (newDoc == NULL) {
13392 ctxt->sax = oldsax;
13394 xmlFreeParserCtxt(ctxt);
13395 return(XML_ERR_INTERNAL_ERROR);
13397 newDoc->properties = XML_DOC_INTERNAL;
13398 newDoc->dict = ctxt->dict;
13399 xmlDictReference(newDoc->dict);
13400 ctxt->myDoc = newDoc;
13402 ctxt->myDoc = oldctxt->myDoc;
13403 content = ctxt->myDoc->children;
13404 last = ctxt->myDoc->last;
13406 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13407 if (newRoot == NULL) {
13408 ctxt->sax = oldsax;
13410 xmlFreeParserCtxt(ctxt);
13411 if (newDoc != NULL) {
13412 xmlFreeDoc(newDoc);
13414 return(XML_ERR_INTERNAL_ERROR);
13416 ctxt->myDoc->children = NULL;
13417 ctxt->myDoc->last = NULL;
13418 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13419 nodePush(ctxt, ctxt->myDoc->children);
13420 ctxt->instate = XML_PARSER_CONTENT;
13421 ctxt->depth = oldctxt->depth + 1;
13423 ctxt->validate = 0;
13424 ctxt->loadsubset = oldctxt->loadsubset;
13425 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13427 * ID/IDREF registration will be done in xmlValidateElement below
13429 ctxt->loadsubset |= XML_SKIP_IDS;
13431 ctxt->dictNames = oldctxt->dictNames;
13432 ctxt->attsDefault = oldctxt->attsDefault;
13433 ctxt->attsSpecial = oldctxt->attsSpecial;
13435 xmlParseContent(ctxt);
13436 if ((RAW == '<') && (NXT(1) == '/')) {
13437 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13438 } else if (RAW != 0) {
13439 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13441 if (ctxt->node != ctxt->myDoc->children) {
13442 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13445 if (!ctxt->wellFormed) {
13446 if (ctxt->errNo == 0)
13447 ret = XML_ERR_INTERNAL_ERROR;
13449 ret = (xmlParserErrors)ctxt->errNo;
13454 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13458 * Return the newly created nodeset after unlinking it from
13459 * they pseudo parent.
13461 cur = ctxt->myDoc->children->children;
13463 while (cur != NULL) {
13464 #ifdef LIBXML_VALID_ENABLED
13465 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13466 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13467 (cur->type == XML_ELEMENT_NODE)) {
13468 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13469 oldctxt->myDoc, cur);
13471 #endif /* LIBXML_VALID_ENABLED */
13472 cur->parent = NULL;
13475 ctxt->myDoc->children->children = NULL;
13477 if (ctxt->myDoc != NULL) {
13478 xmlFreeNode(ctxt->myDoc->children);
13479 ctxt->myDoc->children = content;
13480 ctxt->myDoc->last = last;
13484 * Record in the parent context the number of entities replacement
13485 * done when parsing that reference.
13487 if (oldctxt != NULL)
13488 oldctxt->nbentities += ctxt->nbentities;
13491 * Also record the last error if any
13493 if (ctxt->lastError.code != XML_ERR_OK)
13494 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13496 ctxt->sax = oldsax;
13498 ctxt->attsDefault = NULL;
13499 ctxt->attsSpecial = NULL;
13500 xmlFreeParserCtxt(ctxt);
13501 if (newDoc != NULL) {
13502 xmlFreeDoc(newDoc);
13509 * xmlParseInNodeContext:
13510 * @node: the context node
13511 * @data: the input string
13512 * @datalen: the input string length in bytes
13513 * @options: a combination of xmlParserOption
13514 * @lst: the return value for the set of parsed nodes
13516 * Parse a well-balanced chunk of an XML document
13517 * within the context (DTD, namespaces, etc ...) of the given node.
13519 * The allowed sequence for the data is a Well Balanced Chunk defined by
13520 * the content production in the XML grammar:
13522 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13524 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13525 * error code otherwise
13528 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13529 int options, xmlNodePtr *lst) {
13531 xmlParserCtxtPtr ctxt;
13532 xmlDocPtr doc = NULL;
13533 xmlNodePtr fake, cur;
13536 xmlParserErrors ret = XML_ERR_OK;
13539 * check all input parameters, grab the document
13541 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13542 return(XML_ERR_INTERNAL_ERROR);
13543 switch (node->type) {
13544 case XML_ELEMENT_NODE:
13545 case XML_ATTRIBUTE_NODE:
13546 case XML_TEXT_NODE:
13547 case XML_CDATA_SECTION_NODE:
13548 case XML_ENTITY_REF_NODE:
13550 case XML_COMMENT_NODE:
13551 case XML_DOCUMENT_NODE:
13552 case XML_HTML_DOCUMENT_NODE:
13555 return(XML_ERR_INTERNAL_ERROR);
13558 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13559 (node->type != XML_DOCUMENT_NODE) &&
13560 (node->type != XML_HTML_DOCUMENT_NODE))
13561 node = node->parent;
13563 return(XML_ERR_INTERNAL_ERROR);
13564 if (node->type == XML_ELEMENT_NODE)
13567 doc = (xmlDocPtr) node;
13569 return(XML_ERR_INTERNAL_ERROR);
13572 * allocate a context and set-up everything not related to the
13573 * node position in the tree
13575 if (doc->type == XML_DOCUMENT_NODE)
13576 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13577 #ifdef LIBXML_HTML_ENABLED
13578 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13579 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13581 * When parsing in context, it makes no sense to add implied
13582 * elements like html/body/etc...
13584 options |= HTML_PARSE_NOIMPLIED;
13588 return(XML_ERR_INTERNAL_ERROR);
13591 return(XML_ERR_NO_MEMORY);
13594 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13595 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13596 * we must wait until the last moment to free the original one.
13598 if (doc->dict != NULL) {
13599 if (ctxt->dict != NULL)
13600 xmlDictFree(ctxt->dict);
13601 ctxt->dict = doc->dict;
13603 options |= XML_PARSE_NODICT;
13605 if (doc->encoding != NULL) {
13606 xmlCharEncodingHandlerPtr hdlr;
13608 if (ctxt->encoding != NULL)
13609 xmlFree((xmlChar *) ctxt->encoding);
13610 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13612 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13613 if (hdlr != NULL) {
13614 xmlSwitchToEncoding(ctxt, hdlr);
13616 return(XML_ERR_UNSUPPORTED_ENCODING);
13620 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13621 xmlDetectSAX2(ctxt);
13623 /* parsing in context, i.e. as within existing content */
13624 ctxt->instate = XML_PARSER_CONTENT;
13626 fake = xmlNewComment(NULL);
13627 if (fake == NULL) {
13628 xmlFreeParserCtxt(ctxt);
13629 return(XML_ERR_NO_MEMORY);
13631 xmlAddChild(node, fake);
13633 if (node->type == XML_ELEMENT_NODE) {
13634 nodePush(ctxt, node);
13636 * initialize the SAX2 namespaces stack
13639 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13640 xmlNsPtr ns = cur->nsDef;
13641 const xmlChar *iprefix, *ihref;
13643 while (ns != NULL) {
13645 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13646 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13648 iprefix = ns->prefix;
13652 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13653 nsPush(ctxt, iprefix, ihref);
13662 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13664 * ID/IDREF registration will be done in xmlValidateElement below
13666 ctxt->loadsubset |= XML_SKIP_IDS;
13669 #ifdef LIBXML_HTML_ENABLED
13670 if (doc->type == XML_HTML_DOCUMENT_NODE)
13671 __htmlParseContent(ctxt);
13674 xmlParseContent(ctxt);
13677 if ((RAW == '<') && (NXT(1) == '/')) {
13678 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13679 } else if (RAW != 0) {
13680 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13682 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13683 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13684 ctxt->wellFormed = 0;
13687 if (!ctxt->wellFormed) {
13688 if (ctxt->errNo == 0)
13689 ret = XML_ERR_INTERNAL_ERROR;
13691 ret = (xmlParserErrors)ctxt->errNo;
13697 * Return the newly created nodeset after unlinking it from
13698 * the pseudo sibling.
13711 while (cur != NULL) {
13712 cur->parent = NULL;
13716 xmlUnlinkNode(fake);
13720 if (ret != XML_ERR_OK) {
13721 xmlFreeNodeList(*lst);
13725 if (doc->dict != NULL)
13727 xmlFreeParserCtxt(ctxt);
13731 return(XML_ERR_INTERNAL_ERROR);
13735 #ifdef LIBXML_SAX1_ENABLED
13737 * xmlParseBalancedChunkMemoryRecover:
13738 * @doc: the document the chunk pertains to
13739 * @sax: the SAX handler bloc (possibly NULL)
13740 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13741 * @depth: Used for loop detection, use 0
13742 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13743 * @lst: the return value for the set of parsed nodes
13744 * @recover: return nodes even if the data is broken (use 0)
13747 * Parse a well-balanced chunk of an XML document
13748 * called by the parser
13749 * The allowed sequence for the Well Balanced Chunk is the one defined by
13750 * the content production in the XML grammar:
13752 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13754 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13755 * the parser error code otherwise
13757 * In case recover is set to 1, the nodelist will not be empty even if
13758 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13762 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13763 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13765 xmlParserCtxtPtr ctxt;
13767 xmlSAXHandlerPtr oldsax = NULL;
13768 xmlNodePtr content, newRoot;
13773 return(XML_ERR_ENTITY_LOOP);
13779 if (string == NULL)
13782 size = xmlStrlen(string);
13784 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13785 if (ctxt == NULL) return(-1);
13786 ctxt->userData = ctxt;
13788 oldsax = ctxt->sax;
13790 if (user_data != NULL)
13791 ctxt->userData = user_data;
13793 newDoc = xmlNewDoc(BAD_CAST "1.0");
13794 if (newDoc == NULL) {
13795 xmlFreeParserCtxt(ctxt);
13798 newDoc->properties = XML_DOC_INTERNAL;
13799 if ((doc != NULL) && (doc->dict != NULL)) {
13800 xmlDictFree(ctxt->dict);
13801 ctxt->dict = doc->dict;
13802 xmlDictReference(ctxt->dict);
13803 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13804 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13805 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13806 ctxt->dictNames = 1;
13808 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13811 newDoc->intSubset = doc->intSubset;
13812 newDoc->extSubset = doc->extSubset;
13814 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13815 if (newRoot == NULL) {
13817 ctxt->sax = oldsax;
13818 xmlFreeParserCtxt(ctxt);
13819 newDoc->intSubset = NULL;
13820 newDoc->extSubset = NULL;
13821 xmlFreeDoc(newDoc);
13824 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13825 nodePush(ctxt, newRoot);
13827 ctxt->myDoc = newDoc;
13829 ctxt->myDoc = newDoc;
13830 newDoc->children->doc = doc;
13831 /* Ensure that doc has XML spec namespace */
13832 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13833 newDoc->oldNs = doc->oldNs;
13835 ctxt->instate = XML_PARSER_CONTENT;
13836 ctxt->depth = depth;
13839 * Doing validity checking on chunk doesn't make sense
13841 ctxt->validate = 0;
13842 ctxt->loadsubset = 0;
13843 xmlDetectSAX2(ctxt);
13845 if ( doc != NULL ){
13846 content = doc->children;
13847 doc->children = NULL;
13848 xmlParseContent(ctxt);
13849 doc->children = content;
13852 xmlParseContent(ctxt);
13854 if ((RAW == '<') && (NXT(1) == '/')) {
13855 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13856 } else if (RAW != 0) {
13857 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13859 if (ctxt->node != newDoc->children) {
13860 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13863 if (!ctxt->wellFormed) {
13864 if (ctxt->errNo == 0)
13872 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13876 * Return the newly created nodeset after unlinking it from
13877 * they pseudo parent.
13879 cur = newDoc->children->children;
13881 while (cur != NULL) {
13882 xmlSetTreeDoc(cur, doc);
13883 cur->parent = NULL;
13886 newDoc->children->children = NULL;
13890 ctxt->sax = oldsax;
13891 xmlFreeParserCtxt(ctxt);
13892 newDoc->intSubset = NULL;
13893 newDoc->extSubset = NULL;
13894 newDoc->oldNs = NULL;
13895 xmlFreeDoc(newDoc);
13901 * xmlSAXParseEntity:
13902 * @sax: the SAX handler block
13903 * @filename: the filename
13905 * parse an XML external entity out of context and build a tree.
13906 * It use the given SAX function block to handle the parsing callback.
13907 * If sax is NULL, fallback to the default DOM tree building routines.
13909 * [78] extParsedEnt ::= TextDecl? content
13911 * This correspond to a "Well Balanced" chunk
13913 * Returns the resulting document tree
13917 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13919 xmlParserCtxtPtr ctxt;
13921 ctxt = xmlCreateFileParserCtxt(filename);
13922 if (ctxt == NULL) {
13926 if (ctxt->sax != NULL)
13927 xmlFree(ctxt->sax);
13929 ctxt->userData = NULL;
13932 xmlParseExtParsedEnt(ctxt);
13934 if (ctxt->wellFormed)
13938 xmlFreeDoc(ctxt->myDoc);
13939 ctxt->myDoc = NULL;
13943 xmlFreeParserCtxt(ctxt);
13950 * @filename: the filename
13952 * parse an XML external entity out of context and build a tree.
13954 * [78] extParsedEnt ::= TextDecl? content
13956 * This correspond to a "Well Balanced" chunk
13958 * Returns the resulting document tree
13962 xmlParseEntity(const char *filename) {
13963 return(xmlSAXParseEntity(NULL, filename));
13965 #endif /* LIBXML_SAX1_ENABLED */
13968 * xmlCreateEntityParserCtxtInternal:
13969 * @URL: the entity URL
13970 * @ID: the entity PUBLIC ID
13971 * @base: a possible base for the target URI
13972 * @pctx: parser context used to set options on new context
13974 * Create a parser context for an external entity
13975 * Automatic support for ZLIB/Compress compressed document is provided
13976 * by default if found at compile-time.
13978 * Returns the new parser context or NULL
13980 static xmlParserCtxtPtr
13981 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13982 const xmlChar *base, xmlParserCtxtPtr pctx) {
13983 xmlParserCtxtPtr ctxt;
13984 xmlParserInputPtr inputStream;
13985 char *directory = NULL;
13988 ctxt = xmlNewParserCtxt();
13989 if (ctxt == NULL) {
13993 if (pctx != NULL) {
13994 ctxt->options = pctx->options;
13995 ctxt->_private = pctx->_private;
13998 uri = xmlBuildURI(URL, base);
14001 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14002 if (inputStream == NULL) {
14003 xmlFreeParserCtxt(ctxt);
14007 inputPush(ctxt, inputStream);
14009 if ((ctxt->directory == NULL) && (directory == NULL))
14010 directory = xmlParserGetDirectory((char *)URL);
14011 if ((ctxt->directory == NULL) && (directory != NULL))
14012 ctxt->directory = directory;
14014 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14015 if (inputStream == NULL) {
14017 xmlFreeParserCtxt(ctxt);
14021 inputPush(ctxt, inputStream);
14023 if ((ctxt->directory == NULL) && (directory == NULL))
14024 directory = xmlParserGetDirectory((char *)uri);
14025 if ((ctxt->directory == NULL) && (directory != NULL))
14026 ctxt->directory = directory;
14033 * xmlCreateEntityParserCtxt:
14034 * @URL: the entity URL
14035 * @ID: the entity PUBLIC ID
14036 * @base: a possible base for the target URI
14038 * Create a parser context for an external entity
14039 * Automatic support for ZLIB/Compress compressed document is provided
14040 * by default if found at compile-time.
14042 * Returns the new parser context or NULL
14045 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14046 const xmlChar *base) {
14047 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14051 /************************************************************************
14053 * Front ends when parsing from a file *
14055 ************************************************************************/
14058 * xmlCreateURLParserCtxt:
14059 * @filename: the filename or URL
14060 * @options: a combination of xmlParserOption
14062 * Create a parser context for a file or URL content.
14063 * Automatic support for ZLIB/Compress compressed document is provided
14064 * by default if found at compile-time and for file accesses
14066 * Returns the new parser context or NULL
14069 xmlCreateURLParserCtxt(const char *filename, int options)
14071 xmlParserCtxtPtr ctxt;
14072 xmlParserInputPtr inputStream;
14073 char *directory = NULL;
14075 ctxt = xmlNewParserCtxt();
14076 if (ctxt == NULL) {
14077 xmlErrMemory(NULL, "cannot allocate parser context");
14082 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14083 ctxt->linenumbers = 1;
14085 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14086 if (inputStream == NULL) {
14087 xmlFreeParserCtxt(ctxt);
14091 inputPush(ctxt, inputStream);
14092 if ((ctxt->directory == NULL) && (directory == NULL))
14093 directory = xmlParserGetDirectory(filename);
14094 if ((ctxt->directory == NULL) && (directory != NULL))
14095 ctxt->directory = directory;
14101 * xmlCreateFileParserCtxt:
14102 * @filename: the filename
14104 * Create a parser context for a file content.
14105 * Automatic support for ZLIB/Compress compressed document is provided
14106 * by default if found at compile-time.
14108 * Returns the new parser context or NULL
14111 xmlCreateFileParserCtxt(const char *filename)
14113 return(xmlCreateURLParserCtxt(filename, 0));
14116 #ifdef LIBXML_SAX1_ENABLED
14118 * xmlSAXParseFileWithData:
14119 * @sax: the SAX handler block
14120 * @filename: the filename
14121 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14123 * @data: the userdata
14125 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14126 * compressed document is provided by default if found at compile-time.
14127 * It use the given SAX function block to handle the parsing callback.
14128 * If sax is NULL, fallback to the default DOM tree building routines.
14130 * User data (void *) is stored within the parser context in the
14131 * context's _private member, so it is available nearly everywhere in libxml
14133 * Returns the resulting document tree
14137 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14138 int recovery, void *data) {
14140 xmlParserCtxtPtr ctxt;
14144 ctxt = xmlCreateFileParserCtxt(filename);
14145 if (ctxt == NULL) {
14149 if (ctxt->sax != NULL)
14150 xmlFree(ctxt->sax);
14153 xmlDetectSAX2(ctxt);
14155 ctxt->_private = data;
14158 if (ctxt->directory == NULL)
14159 ctxt->directory = xmlParserGetDirectory(filename);
14161 ctxt->recovery = recovery;
14163 xmlParseDocument(ctxt);
14165 if ((ctxt->wellFormed) || recovery) {
14168 if (ctxt->input->buf->compressed > 0)
14169 ret->compression = 9;
14171 ret->compression = ctxt->input->buf->compressed;
14176 xmlFreeDoc(ctxt->myDoc);
14177 ctxt->myDoc = NULL;
14181 xmlFreeParserCtxt(ctxt);
14188 * @sax: the SAX handler block
14189 * @filename: the filename
14190 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14193 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14194 * compressed document is provided by default if found at compile-time.
14195 * It use the given SAX function block to handle the parsing callback.
14196 * If sax is NULL, fallback to the default DOM tree building routines.
14198 * Returns the resulting document tree
14202 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14204 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14209 * @cur: a pointer to an array of xmlChar
14211 * parse an XML in-memory document and build a tree.
14212 * In the case the document is not Well Formed, a attempt to build a
14213 * tree is tried anyway
14215 * Returns the resulting document tree or NULL in case of failure
14219 xmlRecoverDoc(const xmlChar *cur) {
14220 return(xmlSAXParseDoc(NULL, cur, 1));
14225 * @filename: the filename
14227 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14228 * compressed document is provided by default if found at compile-time.
14230 * Returns the resulting document tree if the file was wellformed,
14235 xmlParseFile(const char *filename) {
14236 return(xmlSAXParseFile(NULL, filename, 0));
14241 * @filename: the filename
14243 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14244 * compressed document is provided by default if found at compile-time.
14245 * In the case the document is not Well Formed, it attempts to build
14248 * Returns the resulting document tree or NULL in case of failure
14252 xmlRecoverFile(const char *filename) {
14253 return(xmlSAXParseFile(NULL, filename, 1));
14258 * xmlSetupParserForBuffer:
14259 * @ctxt: an XML parser context
14260 * @buffer: a xmlChar * buffer
14261 * @filename: a file name
14263 * Setup the parser context to parse a new buffer; Clears any prior
14264 * contents from the parser context. The buffer parameter must not be
14265 * NULL, but the filename parameter can be
14268 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14269 const char* filename)
14271 xmlParserInputPtr input;
14273 if ((ctxt == NULL) || (buffer == NULL))
14276 input = xmlNewInputStream(ctxt);
14277 if (input == NULL) {
14278 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14279 xmlClearParserCtxt(ctxt);
14283 xmlClearParserCtxt(ctxt);
14284 if (filename != NULL)
14285 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14286 input->base = buffer;
14287 input->cur = buffer;
14288 input->end = &buffer[xmlStrlen(buffer)];
14289 inputPush(ctxt, input);
14293 * xmlSAXUserParseFile:
14294 * @sax: a SAX handler
14295 * @user_data: The user data returned on SAX callbacks
14296 * @filename: a file name
14298 * parse an XML file and call the given SAX handler routines.
14299 * Automatic support for ZLIB/Compress compressed document is provided
14301 * Returns 0 in case of success or a error number otherwise
14304 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14305 const char *filename) {
14307 xmlParserCtxtPtr ctxt;
14309 ctxt = xmlCreateFileParserCtxt(filename);
14310 if (ctxt == NULL) return -1;
14311 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14312 xmlFree(ctxt->sax);
14314 xmlDetectSAX2(ctxt);
14316 if (user_data != NULL)
14317 ctxt->userData = user_data;
14319 xmlParseDocument(ctxt);
14321 if (ctxt->wellFormed)
14324 if (ctxt->errNo != 0)
14331 if (ctxt->myDoc != NULL) {
14332 xmlFreeDoc(ctxt->myDoc);
14333 ctxt->myDoc = NULL;
14335 xmlFreeParserCtxt(ctxt);
14339 #endif /* LIBXML_SAX1_ENABLED */
14341 /************************************************************************
14343 * Front ends when parsing from memory *
14345 ************************************************************************/
14348 * xmlCreateMemoryParserCtxt:
14349 * @buffer: a pointer to a char array
14350 * @size: the size of the array
14352 * Create a parser context for an XML in-memory document.
14354 * Returns the new parser context or NULL
14357 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14358 xmlParserCtxtPtr ctxt;
14359 xmlParserInputPtr input;
14360 xmlParserInputBufferPtr buf;
14362 if (buffer == NULL)
14367 ctxt = xmlNewParserCtxt();
14371 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14372 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14374 xmlFreeParserCtxt(ctxt);
14378 input = xmlNewInputStream(ctxt);
14379 if (input == NULL) {
14380 xmlFreeParserInputBuffer(buf);
14381 xmlFreeParserCtxt(ctxt);
14385 input->filename = NULL;
14387 xmlBufResetInput(input->buf->buffer, input);
14389 inputPush(ctxt, input);
14393 #ifdef LIBXML_SAX1_ENABLED
14395 * xmlSAXParseMemoryWithData:
14396 * @sax: the SAX handler block
14397 * @buffer: an pointer to a char array
14398 * @size: the size of the array
14399 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14401 * @data: the userdata
14403 * parse an XML in-memory block and use the given SAX function block
14404 * to handle the parsing callback. If sax is NULL, fallback to the default
14405 * DOM tree building routines.
14407 * User data (void *) is stored within the parser context in the
14408 * context's _private member, so it is available nearly everywhere in libxml
14410 * Returns the resulting document tree
14414 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14415 int size, int recovery, void *data) {
14417 xmlParserCtxtPtr ctxt;
14421 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14422 if (ctxt == NULL) return(NULL);
14424 if (ctxt->sax != NULL)
14425 xmlFree(ctxt->sax);
14428 xmlDetectSAX2(ctxt);
14430 ctxt->_private=data;
14433 ctxt->recovery = recovery;
14435 xmlParseDocument(ctxt);
14437 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14440 xmlFreeDoc(ctxt->myDoc);
14441 ctxt->myDoc = NULL;
14445 xmlFreeParserCtxt(ctxt);
14451 * xmlSAXParseMemory:
14452 * @sax: the SAX handler block
14453 * @buffer: an pointer to a char array
14454 * @size: the size of the array
14455 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14458 * parse an XML in-memory block and use the given SAX function block
14459 * to handle the parsing callback. If sax is NULL, fallback to the default
14460 * DOM tree building routines.
14462 * Returns the resulting document tree
14465 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14466 int size, int recovery) {
14467 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14472 * @buffer: an pointer to a char array
14473 * @size: the size of the array
14475 * parse an XML in-memory block and build a tree.
14477 * Returns the resulting document tree
14480 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14481 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14485 * xmlRecoverMemory:
14486 * @buffer: an pointer to a char array
14487 * @size: the size of the array
14489 * parse an XML in-memory block and build a tree.
14490 * In the case the document is not Well Formed, an attempt to
14491 * build a tree is tried anyway
14493 * Returns the resulting document tree or NULL in case of error
14496 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14497 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14501 * xmlSAXUserParseMemory:
14502 * @sax: a SAX handler
14503 * @user_data: The user data returned on SAX callbacks
14504 * @buffer: an in-memory XML document input
14505 * @size: the length of the XML document in bytes
14507 * A better SAX parsing routine.
14508 * parse an XML in-memory buffer and call the given SAX handler routines.
14510 * Returns 0 in case of success or a error number otherwise
14512 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14513 const char *buffer, int size) {
14515 xmlParserCtxtPtr ctxt;
14519 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14520 if (ctxt == NULL) return -1;
14521 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14522 xmlFree(ctxt->sax);
14524 xmlDetectSAX2(ctxt);
14526 if (user_data != NULL)
14527 ctxt->userData = user_data;
14529 xmlParseDocument(ctxt);
14531 if (ctxt->wellFormed)
14534 if (ctxt->errNo != 0)
14541 if (ctxt->myDoc != NULL) {
14542 xmlFreeDoc(ctxt->myDoc);
14543 ctxt->myDoc = NULL;
14545 xmlFreeParserCtxt(ctxt);
14549 #endif /* LIBXML_SAX1_ENABLED */
14552 * xmlCreateDocParserCtxt:
14553 * @cur: a pointer to an array of xmlChar
14555 * Creates a parser context for an XML in-memory document.
14557 * Returns the new parser context or NULL
14560 xmlCreateDocParserCtxt(const xmlChar *cur) {
14565 len = xmlStrlen(cur);
14566 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14569 #ifdef LIBXML_SAX1_ENABLED
14572 * @sax: the SAX handler block
14573 * @cur: a pointer to an array of xmlChar
14574 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14577 * parse an XML in-memory document and build a tree.
14578 * It use the given SAX function block to handle the parsing callback.
14579 * If sax is NULL, fallback to the default DOM tree building routines.
14581 * Returns the resulting document tree
14585 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14587 xmlParserCtxtPtr ctxt;
14588 xmlSAXHandlerPtr oldsax = NULL;
14590 if (cur == NULL) return(NULL);
14593 ctxt = xmlCreateDocParserCtxt(cur);
14594 if (ctxt == NULL) return(NULL);
14596 oldsax = ctxt->sax;
14598 ctxt->userData = NULL;
14600 xmlDetectSAX2(ctxt);
14602 xmlParseDocument(ctxt);
14603 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14606 xmlFreeDoc(ctxt->myDoc);
14607 ctxt->myDoc = NULL;
14610 ctxt->sax = oldsax;
14611 xmlFreeParserCtxt(ctxt);
14618 * @cur: a pointer to an array of xmlChar
14620 * parse an XML in-memory document and build a tree.
14622 * Returns the resulting document tree
14626 xmlParseDoc(const xmlChar *cur) {
14627 return(xmlSAXParseDoc(NULL, cur, 0));
14629 #endif /* LIBXML_SAX1_ENABLED */
14631 #ifdef LIBXML_LEGACY_ENABLED
14632 /************************************************************************
14634 * Specific function to keep track of entities references *
14635 * and used by the XSLT debugger *
14637 ************************************************************************/
14639 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14642 * xmlAddEntityReference:
14643 * @ent : A valid entity
14644 * @firstNode : A valid first node for children of entity
14645 * @lastNode : A valid last node of children entity
14647 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14650 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14651 xmlNodePtr lastNode)
14653 if (xmlEntityRefFunc != NULL) {
14654 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14660 * xmlSetEntityReferenceFunc:
14661 * @func: A valid function
14663 * Set the function to call call back when a xml reference has been made
14666 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14668 xmlEntityRefFunc = func;
14670 #endif /* LIBXML_LEGACY_ENABLED */
14672 /************************************************************************
14676 ************************************************************************/
14678 #ifdef LIBXML_XPATH_ENABLED
14679 #include <libxml/xpath.h>
14682 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14683 static int xmlParserInitialized = 0;
14688 * Initialization function for the XML parser.
14689 * This is not reentrant. Call once before processing in case of
14690 * use in multithreaded programs.
14694 xmlInitParser(void) {
14695 if (xmlParserInitialized != 0)
14698 #ifdef LIBXML_THREAD_ENABLED
14699 __xmlGlobalInitMutexLock();
14700 if (xmlParserInitialized == 0) {
14704 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14705 (xmlGenericError == NULL))
14706 initGenericErrorDefaultFunc(NULL);
14708 xmlInitializeDict();
14709 xmlInitCharEncodingHandlers();
14710 xmlDefaultSAXHandlerInit();
14711 xmlRegisterDefaultInputCallbacks();
14712 #ifdef LIBXML_OUTPUT_ENABLED
14713 xmlRegisterDefaultOutputCallbacks();
14714 #endif /* LIBXML_OUTPUT_ENABLED */
14715 #ifdef LIBXML_HTML_ENABLED
14716 htmlInitAutoClose();
14717 htmlDefaultSAXHandlerInit();
14719 #ifdef LIBXML_XPATH_ENABLED
14722 xmlParserInitialized = 1;
14723 #ifdef LIBXML_THREAD_ENABLED
14725 __xmlGlobalInitMutexUnlock();
14730 * xmlCleanupParser:
14732 * This function name is somewhat misleading. It does not clean up
14733 * parser state, it cleans up memory allocated by the library itself.
14734 * It is a cleanup function for the XML library. It tries to reclaim all
14735 * related global memory allocated for the library processing.
14736 * It doesn't deallocate any document related memory. One should
14737 * call xmlCleanupParser() only when the process has finished using
14738 * the library and all XML/HTML documents built with it.
14739 * See also xmlInitParser() which has the opposite function of preparing
14740 * the library for operations.
14742 * WARNING: if your application is multithreaded or has plugin support
14743 * calling this may crash the application if another thread or
14744 * a plugin is still using libxml2. It's sometimes very hard to
14745 * guess if libxml2 is in use in the application, some libraries
14746 * or plugins may use it without notice. In case of doubt abstain
14747 * from calling this function or do it just before calling exit()
14748 * to avoid leak reports from valgrind !
14752 xmlCleanupParser(void) {
14753 if (!xmlParserInitialized)
14756 xmlCleanupCharEncodingHandlers();
14757 #ifdef LIBXML_CATALOG_ENABLED
14758 xmlCatalogCleanup();
14761 xmlCleanupInputCallbacks();
14762 #ifdef LIBXML_OUTPUT_ENABLED
14763 xmlCleanupOutputCallbacks();
14765 #ifdef LIBXML_SCHEMAS_ENABLED
14766 xmlSchemaCleanupTypes();
14767 xmlRelaxNGCleanupTypes();
14769 xmlResetLastError();
14770 xmlCleanupGlobals();
14771 xmlCleanupThreads(); /* must be last if called not from the main thread */
14772 xmlCleanupMemory();
14773 xmlParserInitialized = 0;
14776 /************************************************************************
14778 * New set (2.6.0) of simpler and more flexible APIs *
14780 ************************************************************************/
14786 * Free a string if it is not owned by the "dict" dictionary in the
14789 #define DICT_FREE(str) \
14790 if ((str) && ((!dict) || \
14791 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14792 xmlFree((char *)(str));
14796 * @ctxt: an XML parser context
14798 * Reset a parser context
14801 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14803 xmlParserInputPtr input;
14811 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14812 xmlFreeInputStream(input);
14815 ctxt->input = NULL;
14818 if (ctxt->spaceTab != NULL) {
14819 ctxt->spaceTab[0] = -1;
14820 ctxt->space = &ctxt->spaceTab[0];
14822 ctxt->space = NULL;
14832 DICT_FREE(ctxt->version);
14833 ctxt->version = NULL;
14834 DICT_FREE(ctxt->encoding);
14835 ctxt->encoding = NULL;
14836 DICT_FREE(ctxt->directory);
14837 ctxt->directory = NULL;
14838 DICT_FREE(ctxt->extSubURI);
14839 ctxt->extSubURI = NULL;
14840 DICT_FREE(ctxt->extSubSystem);
14841 ctxt->extSubSystem = NULL;
14842 if (ctxt->myDoc != NULL)
14843 xmlFreeDoc(ctxt->myDoc);
14844 ctxt->myDoc = NULL;
14846 ctxt->standalone = -1;
14847 ctxt->hasExternalSubset = 0;
14848 ctxt->hasPErefs = 0;
14850 ctxt->external = 0;
14851 ctxt->instate = XML_PARSER_START;
14854 ctxt->wellFormed = 1;
14855 ctxt->nsWellFormed = 1;
14856 ctxt->disableSAX = 0;
14859 ctxt->vctxt.userData = ctxt;
14860 ctxt->vctxt.error = xmlParserValidityError;
14861 ctxt->vctxt.warning = xmlParserValidityWarning;
14863 ctxt->record_info = 0;
14865 ctxt->checkIndex = 0;
14866 ctxt->inSubset = 0;
14867 ctxt->errNo = XML_ERR_OK;
14869 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14870 ctxt->catalogs = NULL;
14871 ctxt->nbentities = 0;
14872 ctxt->sizeentities = 0;
14873 ctxt->sizeentcopy = 0;
14874 xmlInitNodeInfoSeq(&ctxt->node_seq);
14876 if (ctxt->attsDefault != NULL) {
14877 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14878 ctxt->attsDefault = NULL;
14880 if (ctxt->attsSpecial != NULL) {
14881 xmlHashFree(ctxt->attsSpecial, NULL);
14882 ctxt->attsSpecial = NULL;
14885 #ifdef LIBXML_CATALOG_ENABLED
14886 if (ctxt->catalogs != NULL)
14887 xmlCatalogFreeLocal(ctxt->catalogs);
14889 if (ctxt->lastError.code != XML_ERR_OK)
14890 xmlResetError(&ctxt->lastError);
14894 * xmlCtxtResetPush:
14895 * @ctxt: an XML parser context
14896 * @chunk: a pointer to an array of chars
14897 * @size: number of chars in the array
14898 * @filename: an optional file name or URI
14899 * @encoding: the document encoding, or NULL
14901 * Reset a push parser context
14903 * Returns 0 in case of success and 1 in case of error
14906 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14907 int size, const char *filename, const char *encoding)
14909 xmlParserInputPtr inputStream;
14910 xmlParserInputBufferPtr buf;
14911 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14916 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14917 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14919 buf = xmlAllocParserInputBuffer(enc);
14923 if (ctxt == NULL) {
14924 xmlFreeParserInputBuffer(buf);
14928 xmlCtxtReset(ctxt);
14930 if (ctxt->pushTab == NULL) {
14931 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14932 sizeof(xmlChar *));
14933 if (ctxt->pushTab == NULL) {
14934 xmlErrMemory(ctxt, NULL);
14935 xmlFreeParserInputBuffer(buf);
14940 if (filename == NULL) {
14941 ctxt->directory = NULL;
14943 ctxt->directory = xmlParserGetDirectory(filename);
14946 inputStream = xmlNewInputStream(ctxt);
14947 if (inputStream == NULL) {
14948 xmlFreeParserInputBuffer(buf);
14952 if (filename == NULL)
14953 inputStream->filename = NULL;
14955 inputStream->filename = (char *)
14956 xmlCanonicPath((const xmlChar *) filename);
14957 inputStream->buf = buf;
14958 xmlBufResetInput(buf->buffer, inputStream);
14960 inputPush(ctxt, inputStream);
14962 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14963 (ctxt->input->buf != NULL)) {
14964 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14965 size_t cur = ctxt->input->cur - ctxt->input->base;
14967 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14969 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14971 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14975 if (encoding != NULL) {
14976 xmlCharEncodingHandlerPtr hdlr;
14978 if (ctxt->encoding != NULL)
14979 xmlFree((xmlChar *) ctxt->encoding);
14980 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14982 hdlr = xmlFindCharEncodingHandler(encoding);
14983 if (hdlr != NULL) {
14984 xmlSwitchToEncoding(ctxt, hdlr);
14986 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14987 "Unsupported encoding %s\n", BAD_CAST encoding);
14989 } else if (enc != XML_CHAR_ENCODING_NONE) {
14990 xmlSwitchEncoding(ctxt, enc);
14998 * xmlCtxtUseOptionsInternal:
14999 * @ctxt: an XML parser context
15000 * @options: a combination of xmlParserOption
15001 * @encoding: the user provided encoding to use
15003 * Applies the options to the parser context
15005 * Returns 0 in case of success, the set of unknown or unimplemented options
15006 * in case of error.
15009 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15013 if (encoding != NULL) {
15014 if (ctxt->encoding != NULL)
15015 xmlFree((xmlChar *) ctxt->encoding);
15016 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15018 if (options & XML_PARSE_RECOVER) {
15019 ctxt->recovery = 1;
15020 options -= XML_PARSE_RECOVER;
15021 ctxt->options |= XML_PARSE_RECOVER;
15023 ctxt->recovery = 0;
15024 if (options & XML_PARSE_DTDLOAD) {
15025 ctxt->loadsubset = XML_DETECT_IDS;
15026 options -= XML_PARSE_DTDLOAD;
15027 ctxt->options |= XML_PARSE_DTDLOAD;
15029 ctxt->loadsubset = 0;
15030 if (options & XML_PARSE_DTDATTR) {
15031 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15032 options -= XML_PARSE_DTDATTR;
15033 ctxt->options |= XML_PARSE_DTDATTR;
15035 if (options & XML_PARSE_NOENT) {
15036 ctxt->replaceEntities = 1;
15037 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15038 options -= XML_PARSE_NOENT;
15039 ctxt->options |= XML_PARSE_NOENT;
15041 ctxt->replaceEntities = 0;
15042 if (options & XML_PARSE_PEDANTIC) {
15043 ctxt->pedantic = 1;
15044 options -= XML_PARSE_PEDANTIC;
15045 ctxt->options |= XML_PARSE_PEDANTIC;
15047 ctxt->pedantic = 0;
15048 if (options & XML_PARSE_NOBLANKS) {
15049 ctxt->keepBlanks = 0;
15050 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15051 options -= XML_PARSE_NOBLANKS;
15052 ctxt->options |= XML_PARSE_NOBLANKS;
15054 ctxt->keepBlanks = 1;
15055 if (options & XML_PARSE_DTDVALID) {
15056 ctxt->validate = 1;
15057 if (options & XML_PARSE_NOWARNING)
15058 ctxt->vctxt.warning = NULL;
15059 if (options & XML_PARSE_NOERROR)
15060 ctxt->vctxt.error = NULL;
15061 options -= XML_PARSE_DTDVALID;
15062 ctxt->options |= XML_PARSE_DTDVALID;
15064 ctxt->validate = 0;
15065 if (options & XML_PARSE_NOWARNING) {
15066 ctxt->sax->warning = NULL;
15067 options -= XML_PARSE_NOWARNING;
15069 if (options & XML_PARSE_NOERROR) {
15070 ctxt->sax->error = NULL;
15071 ctxt->sax->fatalError = NULL;
15072 options -= XML_PARSE_NOERROR;
15074 #ifdef LIBXML_SAX1_ENABLED
15075 if (options & XML_PARSE_SAX1) {
15076 ctxt->sax->startElement = xmlSAX2StartElement;
15077 ctxt->sax->endElement = xmlSAX2EndElement;
15078 ctxt->sax->startElementNs = NULL;
15079 ctxt->sax->endElementNs = NULL;
15080 ctxt->sax->initialized = 1;
15081 options -= XML_PARSE_SAX1;
15082 ctxt->options |= XML_PARSE_SAX1;
15084 #endif /* LIBXML_SAX1_ENABLED */
15085 if (options & XML_PARSE_NODICT) {
15086 ctxt->dictNames = 0;
15087 options -= XML_PARSE_NODICT;
15088 ctxt->options |= XML_PARSE_NODICT;
15090 ctxt->dictNames = 1;
15092 if (options & XML_PARSE_NOCDATA) {
15093 ctxt->sax->cdataBlock = NULL;
15094 options -= XML_PARSE_NOCDATA;
15095 ctxt->options |= XML_PARSE_NOCDATA;
15097 if (options & XML_PARSE_NSCLEAN) {
15098 ctxt->options |= XML_PARSE_NSCLEAN;
15099 options -= XML_PARSE_NSCLEAN;
15101 if (options & XML_PARSE_NONET) {
15102 ctxt->options |= XML_PARSE_NONET;
15103 options -= XML_PARSE_NONET;
15105 if (options & XML_PARSE_COMPACT) {
15106 ctxt->options |= XML_PARSE_COMPACT;
15107 options -= XML_PARSE_COMPACT;
15109 if (options & XML_PARSE_OLD10) {
15110 ctxt->options |= XML_PARSE_OLD10;
15111 options -= XML_PARSE_OLD10;
15113 if (options & XML_PARSE_NOBASEFIX) {
15114 ctxt->options |= XML_PARSE_NOBASEFIX;
15115 options -= XML_PARSE_NOBASEFIX;
15117 if (options & XML_PARSE_HUGE) {
15118 ctxt->options |= XML_PARSE_HUGE;
15119 options -= XML_PARSE_HUGE;
15120 if (ctxt->dict != NULL)
15121 xmlDictSetLimit(ctxt->dict, 0);
15123 if (options & XML_PARSE_OLDSAX) {
15124 ctxt->options |= XML_PARSE_OLDSAX;
15125 options -= XML_PARSE_OLDSAX;
15127 if (options & XML_PARSE_IGNORE_ENC) {
15128 ctxt->options |= XML_PARSE_IGNORE_ENC;
15129 options -= XML_PARSE_IGNORE_ENC;
15131 if (options & XML_PARSE_BIG_LINES) {
15132 ctxt->options |= XML_PARSE_BIG_LINES;
15133 options -= XML_PARSE_BIG_LINES;
15135 ctxt->linenumbers = 1;
15140 * xmlCtxtUseOptions:
15141 * @ctxt: an XML parser context
15142 * @options: a combination of xmlParserOption
15144 * Applies the options to the parser context
15146 * Returns 0 in case of success, the set of unknown or unimplemented options
15147 * in case of error.
15150 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15152 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15157 * @ctxt: an XML parser context
15158 * @URL: the base URL to use for the document
15159 * @encoding: the document encoding, or NULL
15160 * @options: a combination of xmlParserOption
15161 * @reuse: keep the context for reuse
15163 * Common front-end for the xmlRead functions
15165 * Returns the resulting document tree or NULL
15168 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15169 int options, int reuse)
15173 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15174 if (encoding != NULL) {
15175 xmlCharEncodingHandlerPtr hdlr;
15177 hdlr = xmlFindCharEncodingHandler(encoding);
15179 xmlSwitchToEncoding(ctxt, hdlr);
15181 if ((URL != NULL) && (ctxt->input != NULL) &&
15182 (ctxt->input->filename == NULL))
15183 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15184 xmlParseDocument(ctxt);
15185 if ((ctxt->wellFormed) || ctxt->recovery)
15189 if (ctxt->myDoc != NULL) {
15190 xmlFreeDoc(ctxt->myDoc);
15193 ctxt->myDoc = NULL;
15195 xmlFreeParserCtxt(ctxt);
15203 * @cur: a pointer to a zero terminated string
15204 * @URL: the base URL to use for the document
15205 * @encoding: the document encoding, or NULL
15206 * @options: a combination of xmlParserOption
15208 * parse an XML in-memory document and build a tree.
15210 * Returns the resulting document tree
15213 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15215 xmlParserCtxtPtr ctxt;
15221 ctxt = xmlCreateDocParserCtxt(cur);
15224 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15229 * @filename: a file or URL
15230 * @encoding: the document encoding, or NULL
15231 * @options: a combination of xmlParserOption
15233 * parse an XML file from the filesystem or the network.
15235 * Returns the resulting document tree
15238 xmlReadFile(const char *filename, const char *encoding, int options)
15240 xmlParserCtxtPtr ctxt;
15243 ctxt = xmlCreateURLParserCtxt(filename, options);
15246 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15251 * @buffer: a pointer to a char array
15252 * @size: the size of the array
15253 * @URL: the base URL to use for the document
15254 * @encoding: the document encoding, or NULL
15255 * @options: a combination of xmlParserOption
15257 * parse an XML in-memory document and build a tree.
15259 * Returns the resulting document tree
15262 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15264 xmlParserCtxtPtr ctxt;
15267 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15270 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15275 * @fd: an open file descriptor
15276 * @URL: the base URL to use for the document
15277 * @encoding: the document encoding, or NULL
15278 * @options: a combination of xmlParserOption
15280 * parse an XML from a file descriptor and build a tree.
15281 * NOTE that the file descriptor will not be closed when the
15282 * reader is closed or reset.
15284 * Returns the resulting document tree
15287 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15289 xmlParserCtxtPtr ctxt;
15290 xmlParserInputBufferPtr input;
15291 xmlParserInputPtr stream;
15297 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15300 input->closecallback = NULL;
15301 ctxt = xmlNewParserCtxt();
15302 if (ctxt == NULL) {
15303 xmlFreeParserInputBuffer(input);
15306 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15307 if (stream == NULL) {
15308 xmlFreeParserInputBuffer(input);
15309 xmlFreeParserCtxt(ctxt);
15312 inputPush(ctxt, stream);
15313 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15318 * @ioread: an I/O read function
15319 * @ioclose: an I/O close function
15320 * @ioctx: an I/O handler
15321 * @URL: the base URL to use for the document
15322 * @encoding: the document encoding, or NULL
15323 * @options: a combination of xmlParserOption
15325 * parse an XML document from I/O functions and source and build a tree.
15327 * Returns the resulting document tree
15330 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15331 void *ioctx, const char *URL, const char *encoding, int options)
15333 xmlParserCtxtPtr ctxt;
15334 xmlParserInputBufferPtr input;
15335 xmlParserInputPtr stream;
15337 if (ioread == NULL)
15341 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15342 XML_CHAR_ENCODING_NONE);
15343 if (input == NULL) {
15344 if (ioclose != NULL)
15348 ctxt = xmlNewParserCtxt();
15349 if (ctxt == NULL) {
15350 xmlFreeParserInputBuffer(input);
15353 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15354 if (stream == NULL) {
15355 xmlFreeParserInputBuffer(input);
15356 xmlFreeParserCtxt(ctxt);
15359 inputPush(ctxt, stream);
15360 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15365 * @ctxt: an XML parser context
15366 * @cur: a pointer to a zero terminated string
15367 * @URL: the base URL to use for the document
15368 * @encoding: the document encoding, or NULL
15369 * @options: a combination of xmlParserOption
15371 * parse an XML in-memory document and build a tree.
15372 * This reuses the existing @ctxt parser context
15374 * Returns the resulting document tree
15377 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15378 const char *URL, const char *encoding, int options)
15380 xmlParserInputPtr stream;
15388 xmlCtxtReset(ctxt);
15390 stream = xmlNewStringInputStream(ctxt, cur);
15391 if (stream == NULL) {
15394 inputPush(ctxt, stream);
15395 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15400 * @ctxt: an XML parser context
15401 * @filename: a file or URL
15402 * @encoding: the document encoding, or NULL
15403 * @options: a combination of xmlParserOption
15405 * parse an XML file from the filesystem or the network.
15406 * This reuses the existing @ctxt parser context
15408 * Returns the resulting document tree
15411 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15412 const char *encoding, int options)
15414 xmlParserInputPtr stream;
15416 if (filename == NULL)
15422 xmlCtxtReset(ctxt);
15424 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15425 if (stream == NULL) {
15428 inputPush(ctxt, stream);
15429 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15433 * xmlCtxtReadMemory:
15434 * @ctxt: an XML parser context
15435 * @buffer: a pointer to a char array
15436 * @size: the size of the array
15437 * @URL: the base URL to use for the document
15438 * @encoding: the document encoding, or NULL
15439 * @options: a combination of xmlParserOption
15441 * parse an XML in-memory document and build a tree.
15442 * This reuses the existing @ctxt parser context
15444 * Returns the resulting document tree
15447 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15448 const char *URL, const char *encoding, int options)
15450 xmlParserInputBufferPtr input;
15451 xmlParserInputPtr stream;
15455 if (buffer == NULL)
15459 xmlCtxtReset(ctxt);
15461 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15462 if (input == NULL) {
15466 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15467 if (stream == NULL) {
15468 xmlFreeParserInputBuffer(input);
15472 inputPush(ctxt, stream);
15473 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15478 * @ctxt: an XML parser context
15479 * @fd: an open file descriptor
15480 * @URL: the base URL to use for the document
15481 * @encoding: the document encoding, or NULL
15482 * @options: a combination of xmlParserOption
15484 * parse an XML from a file descriptor and build a tree.
15485 * This reuses the existing @ctxt parser context
15486 * NOTE that the file descriptor will not be closed when the
15487 * reader is closed or reset.
15489 * Returns the resulting document tree
15492 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15493 const char *URL, const char *encoding, int options)
15495 xmlParserInputBufferPtr input;
15496 xmlParserInputPtr stream;
15504 xmlCtxtReset(ctxt);
15507 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15510 input->closecallback = NULL;
15511 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15512 if (stream == NULL) {
15513 xmlFreeParserInputBuffer(input);
15516 inputPush(ctxt, stream);
15517 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15522 * @ctxt: an XML parser context
15523 * @ioread: an I/O read function
15524 * @ioclose: an I/O close function
15525 * @ioctx: an I/O handler
15526 * @URL: the base URL to use for the document
15527 * @encoding: the document encoding, or NULL
15528 * @options: a combination of xmlParserOption
15530 * parse an XML document from I/O functions and source and build a tree.
15531 * This reuses the existing @ctxt parser context
15533 * Returns the resulting document tree
15536 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15537 xmlInputCloseCallback ioclose, void *ioctx,
15539 const char *encoding, int options)
15541 xmlParserInputBufferPtr input;
15542 xmlParserInputPtr stream;
15544 if (ioread == NULL)
15550 xmlCtxtReset(ctxt);
15552 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15553 XML_CHAR_ENCODING_NONE);
15554 if (input == NULL) {
15555 if (ioclose != NULL)
15559 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15560 if (stream == NULL) {
15561 xmlFreeParserInputBuffer(input);
15564 inputPush(ctxt, stream);
15565 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15568 #define bottom_parser
15569 #include "elfgcchack.h"