2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
28 * See Copyright for the status of this software.
33 /* To avoid EBCDIC trouble when parsing on zOS */
35 #pragma convert("ISO8859-1")
41 #if defined(_WIN32) && !defined (__CYGWIN__)
42 #define XML_DIR_SEP '\\'
44 #define XML_DIR_SEP '/'
52 #include <libxml/xmlmemory.h>
53 #include <libxml/threads.h>
54 #include <libxml/globals.h>
55 #include <libxml/tree.h>
56 #include <libxml/parser.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #ifdef LIBXML_CATALOG_ENABLED
65 #include <libxml/catalog.h>
67 #ifdef LIBXML_SCHEMAS_ENABLED
68 #include <libxml/xmlschemastypes.h>
69 #include <libxml/relaxng.h>
77 #ifdef HAVE_SYS_STAT_H
97 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
99 static xmlParserCtxtPtr
100 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
101 const xmlChar *base, xmlParserCtxtPtr pctx);
103 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
105 /************************************************************************
107 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
109 ************************************************************************/
111 #define XML_PARSER_BIG_ENTITY 1000
112 #define XML_PARSER_LOT_ENTITY 5000
115 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
116 * replacement over the size in byte of the input indicates that you have
117 * and eponential behaviour. A value of 10 correspond to at least 3 entity
118 * replacement per byte of input.
120 #define XML_PARSER_NON_LINEAR 10
123 * xmlParserEntityCheck
125 * Function to check non-linear entity expansion behaviour
126 * This is here to detect and stop exponential linear entity expansion
127 * This is not a limitation of the parser but a safety
128 * boundary feature. It can be disabled with the XML_PARSE_HUGE
132 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
133 xmlEntityPtr ent, size_t replacement)
137 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
139 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
143 * This may look absurd but is needed to detect
146 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
147 (ent->content != NULL) && (ent->checked == 0) &&
148 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
149 unsigned long oldnbent = ctxt->nbentities;
155 rep = xmlStringDecodeEntities(ctxt, ent->content,
156 XML_SUBSTITUTE_REF, 0, 0, 0);
158 if (ctxt->errNo == XML_ERR_ENTITY_LOOP) {
162 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
164 if (xmlStrchr(rep, '<'))
170 if (replacement != 0) {
171 if (replacement < XML_MAX_TEXT_LENGTH)
175 * If the volume of entity copy reaches 10 times the
176 * amount of parsed data and over the large text threshold
177 * then that's very likely to be an abuse.
179 if (ctxt->input != NULL) {
180 consumed = ctxt->input->consumed +
181 (ctxt->input->cur - ctxt->input->base);
183 consumed += ctxt->sizeentities;
185 if (replacement < XML_PARSER_NON_LINEAR * consumed)
187 } else if (size != 0) {
189 * Do the check based on the replacement size of the entity
191 if (size < XML_PARSER_BIG_ENTITY)
195 * A limit on the amount of text data reasonably used
197 if (ctxt->input != NULL) {
198 consumed = ctxt->input->consumed +
199 (ctxt->input->cur - ctxt->input->base);
201 consumed += ctxt->sizeentities;
203 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
204 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
206 } else if (ent != NULL) {
208 * use the number of parsed entities in the replacement
210 size = ent->checked / 2;
213 * The amount of data parsed counting entities size only once
215 if (ctxt->input != NULL) {
216 consumed = ctxt->input->consumed +
217 (ctxt->input->cur - ctxt->input->base);
219 consumed += ctxt->sizeentities;
222 * Check the density of entities for the amount of data
223 * knowing an entity reference will take at least 3 bytes
225 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
229 * strange we got no data for checking
231 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
232 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
233 (ctxt->nbentities <= 10000))
236 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
243 * arbitrary depth limit for the XML documents that we allow to
244 * process. This is not a limitation of the parser but a safety
245 * boundary feature. It can be disabled with the XML_PARSE_HUGE
248 unsigned int xmlParserMaxDepth = 256;
253 #define XML_PARSER_BIG_BUFFER_SIZE 300
254 #define XML_PARSER_BUFFER_SIZE 100
255 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
258 * XML_PARSER_CHUNK_SIZE
260 * When calling GROW that's the minimal amount of data
261 * the parser expected to have received. It is not a hard
262 * limit but an optimization when reading strings like Names
263 * It is not strictly needed as long as inputs available characters
264 * are followed by 0, which should be provided by the I/O level
266 #define XML_PARSER_CHUNK_SIZE 100
269 * List of XML prefixed PI allowed by W3C specs
272 static const char *xmlW3CPIs[] = {
279 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
280 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
281 const xmlChar **str);
283 static xmlParserErrors
284 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
285 xmlSAXHandlerPtr sax,
286 void *user_data, int depth, const xmlChar *URL,
287 const xmlChar *ID, xmlNodePtr *list);
290 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
291 const char *encoding);
292 #ifdef LIBXML_LEGACY_ENABLED
294 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
295 xmlNodePtr lastNode);
296 #endif /* LIBXML_LEGACY_ENABLED */
298 static xmlParserErrors
299 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
300 const xmlChar *string, void *user_data, xmlNodePtr *lst);
303 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
305 /************************************************************************
307 * Some factorized error routines *
309 ************************************************************************/
312 * xmlErrAttributeDup:
313 * @ctxt: an XML parser context
314 * @prefix: the attribute prefix
315 * @localname: the attribute localname
317 * Handle a redefinition of attribute error
320 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
321 const xmlChar * localname)
323 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
324 (ctxt->instate == XML_PARSER_EOF))
327 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
330 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
331 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
332 (const char *) localname, NULL, NULL, 0, 0,
333 "Attribute %s redefined\n", localname);
335 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
336 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
337 (const char *) prefix, (const char *) localname,
338 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
341 ctxt->wellFormed = 0;
342 if (ctxt->recovery == 0)
343 ctxt->disableSAX = 1;
349 * @ctxt: an XML parser context
350 * @error: the error number
351 * @extra: extra information string
353 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
356 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
360 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
361 (ctxt->instate == XML_PARSER_EOF))
364 case XML_ERR_INVALID_HEX_CHARREF:
365 errmsg = "CharRef: invalid hexadecimal value";
367 case XML_ERR_INVALID_DEC_CHARREF:
368 errmsg = "CharRef: invalid decimal value";
370 case XML_ERR_INVALID_CHARREF:
371 errmsg = "CharRef: invalid value";
373 case XML_ERR_INTERNAL_ERROR:
374 errmsg = "internal error";
376 case XML_ERR_PEREF_AT_EOF:
377 errmsg = "PEReference at end of document";
379 case XML_ERR_PEREF_IN_PROLOG:
380 errmsg = "PEReference in prolog";
382 case XML_ERR_PEREF_IN_EPILOG:
383 errmsg = "PEReference in epilog";
385 case XML_ERR_PEREF_NO_NAME:
386 errmsg = "PEReference: no name";
388 case XML_ERR_PEREF_SEMICOL_MISSING:
389 errmsg = "PEReference: expecting ';'";
391 case XML_ERR_ENTITY_LOOP:
392 errmsg = "Detected an entity reference loop";
394 case XML_ERR_ENTITY_NOT_STARTED:
395 errmsg = "EntityValue: \" or ' expected";
397 case XML_ERR_ENTITY_PE_INTERNAL:
398 errmsg = "PEReferences forbidden in internal subset";
400 case XML_ERR_ENTITY_NOT_FINISHED:
401 errmsg = "EntityValue: \" or ' expected";
403 case XML_ERR_ATTRIBUTE_NOT_STARTED:
404 errmsg = "AttValue: \" or ' expected";
406 case XML_ERR_LT_IN_ATTRIBUTE:
407 errmsg = "Unescaped '<' not allowed in attributes values";
409 case XML_ERR_LITERAL_NOT_STARTED:
410 errmsg = "SystemLiteral \" or ' expected";
412 case XML_ERR_LITERAL_NOT_FINISHED:
413 errmsg = "Unfinished System or Public ID \" or ' expected";
415 case XML_ERR_MISPLACED_CDATA_END:
416 errmsg = "Sequence ']]>' not allowed in content";
418 case XML_ERR_URI_REQUIRED:
419 errmsg = "SYSTEM or PUBLIC, the URI is missing";
421 case XML_ERR_PUBID_REQUIRED:
422 errmsg = "PUBLIC, the Public Identifier is missing";
424 case XML_ERR_HYPHEN_IN_COMMENT:
425 errmsg = "Comment must not contain '--' (double-hyphen)";
427 case XML_ERR_PI_NOT_STARTED:
428 errmsg = "xmlParsePI : no target name";
430 case XML_ERR_RESERVED_XML_NAME:
431 errmsg = "Invalid PI name";
433 case XML_ERR_NOTATION_NOT_STARTED:
434 errmsg = "NOTATION: Name expected here";
436 case XML_ERR_NOTATION_NOT_FINISHED:
437 errmsg = "'>' required to close NOTATION declaration";
439 case XML_ERR_VALUE_REQUIRED:
440 errmsg = "Entity value required";
442 case XML_ERR_URI_FRAGMENT:
443 errmsg = "Fragment not allowed";
445 case XML_ERR_ATTLIST_NOT_STARTED:
446 errmsg = "'(' required to start ATTLIST enumeration";
448 case XML_ERR_NMTOKEN_REQUIRED:
449 errmsg = "NmToken expected in ATTLIST enumeration";
451 case XML_ERR_ATTLIST_NOT_FINISHED:
452 errmsg = "')' required to finish ATTLIST enumeration";
454 case XML_ERR_MIXED_NOT_STARTED:
455 errmsg = "MixedContentDecl : '|' or ')*' expected";
457 case XML_ERR_PCDATA_REQUIRED:
458 errmsg = "MixedContentDecl : '#PCDATA' expected";
460 case XML_ERR_ELEMCONTENT_NOT_STARTED:
461 errmsg = "ContentDecl : Name or '(' expected";
463 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
464 errmsg = "ContentDecl : ',' '|' or ')' expected";
466 case XML_ERR_PEREF_IN_INT_SUBSET:
468 "PEReference: forbidden within markup decl in internal subset";
470 case XML_ERR_GT_REQUIRED:
471 errmsg = "expected '>'";
473 case XML_ERR_CONDSEC_INVALID:
474 errmsg = "XML conditional section '[' expected";
476 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
477 errmsg = "Content error in the external subset";
479 case XML_ERR_CONDSEC_INVALID_KEYWORD:
481 "conditional section INCLUDE or IGNORE keyword expected";
483 case XML_ERR_CONDSEC_NOT_FINISHED:
484 errmsg = "XML conditional section not closed";
486 case XML_ERR_XMLDECL_NOT_STARTED:
487 errmsg = "Text declaration '<?xml' required";
489 case XML_ERR_XMLDECL_NOT_FINISHED:
490 errmsg = "parsing XML declaration: '?>' expected";
492 case XML_ERR_EXT_ENTITY_STANDALONE:
493 errmsg = "external parsed entities cannot be standalone";
495 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
496 errmsg = "EntityRef: expecting ';'";
498 case XML_ERR_DOCTYPE_NOT_FINISHED:
499 errmsg = "DOCTYPE improperly terminated";
501 case XML_ERR_LTSLASH_REQUIRED:
502 errmsg = "EndTag: '</' not found";
504 case XML_ERR_EQUAL_REQUIRED:
505 errmsg = "expected '='";
507 case XML_ERR_STRING_NOT_CLOSED:
508 errmsg = "String not closed expecting \" or '";
510 case XML_ERR_STRING_NOT_STARTED:
511 errmsg = "String not started expecting ' or \"";
513 case XML_ERR_ENCODING_NAME:
514 errmsg = "Invalid XML encoding name";
516 case XML_ERR_STANDALONE_VALUE:
517 errmsg = "standalone accepts only 'yes' or 'no'";
519 case XML_ERR_DOCUMENT_EMPTY:
520 errmsg = "Document is empty";
522 case XML_ERR_DOCUMENT_END:
523 errmsg = "Extra content at the end of the document";
525 case XML_ERR_NOT_WELL_BALANCED:
526 errmsg = "chunk is not well balanced";
528 case XML_ERR_EXTRA_CONTENT:
529 errmsg = "extra content at the end of well balanced chunk";
531 case XML_ERR_VERSION_MISSING:
532 errmsg = "Malformed declaration expecting version";
534 case XML_ERR_NAME_TOO_LONG:
535 errmsg = "Name too long use XML_PARSE_HUGE option";
543 errmsg = "Unregistered error message";
548 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
549 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
552 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
553 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
557 ctxt->wellFormed = 0;
558 if (ctxt->recovery == 0)
559 ctxt->disableSAX = 1;
565 * @ctxt: an XML parser context
566 * @error: the error number
567 * @msg: the error message
569 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
571 static void LIBXML_ATTR_FORMAT(3,0)
572 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
575 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
576 (ctxt->instate == XML_PARSER_EOF))
580 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
581 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
583 ctxt->wellFormed = 0;
584 if (ctxt->recovery == 0)
585 ctxt->disableSAX = 1;
591 * @ctxt: an XML parser context
592 * @error: the error number
593 * @msg: the error message
599 static void LIBXML_ATTR_FORMAT(3,0)
600 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601 const char *msg, const xmlChar *str1, const xmlChar *str2)
603 xmlStructuredErrorFunc schannel = NULL;
605 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
606 (ctxt->instate == XML_PARSER_EOF))
608 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
609 (ctxt->sax->initialized == XML_SAX2_MAGIC))
610 schannel = ctxt->sax->serror;
612 __xmlRaiseError(schannel,
613 (ctxt->sax) ? ctxt->sax->warning : NULL,
615 ctxt, NULL, XML_FROM_PARSER, error,
616 XML_ERR_WARNING, NULL, 0,
617 (const char *) str1, (const char *) str2, NULL, 0, 0,
618 msg, (const char *) str1, (const char *) str2);
620 __xmlRaiseError(schannel, NULL, NULL,
621 ctxt, NULL, XML_FROM_PARSER, error,
622 XML_ERR_WARNING, NULL, 0,
623 (const char *) str1, (const char *) str2, NULL, 0, 0,
624 msg, (const char *) str1, (const char *) str2);
630 * @ctxt: an XML parser context
631 * @error: the error number
632 * @msg: the error message
635 * Handle a validity error.
637 static void LIBXML_ATTR_FORMAT(3,0)
638 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
639 const char *msg, const xmlChar *str1, const xmlChar *str2)
641 xmlStructuredErrorFunc schannel = NULL;
643 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
644 (ctxt->instate == XML_PARSER_EOF))
648 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
649 schannel = ctxt->sax->serror;
652 __xmlRaiseError(schannel,
653 ctxt->vctxt.error, ctxt->vctxt.userData,
654 ctxt, NULL, XML_FROM_DTD, error,
655 XML_ERR_ERROR, NULL, 0, (const char *) str1,
656 (const char *) str2, NULL, 0, 0,
657 msg, (const char *) str1, (const char *) str2);
660 __xmlRaiseError(schannel, NULL, NULL,
661 ctxt, NULL, XML_FROM_DTD, error,
662 XML_ERR_ERROR, NULL, 0, (const char *) str1,
663 (const char *) str2, NULL, 0, 0,
664 msg, (const char *) str1, (const char *) str2);
670 * @ctxt: an XML parser context
671 * @error: the error number
672 * @msg: the error message
673 * @val: an integer value
675 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
677 static void LIBXML_ATTR_FORMAT(3,0)
678 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
679 const char *msg, int val)
681 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
682 (ctxt->instate == XML_PARSER_EOF))
686 __xmlRaiseError(NULL, NULL, NULL,
687 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
688 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
690 ctxt->wellFormed = 0;
691 if (ctxt->recovery == 0)
692 ctxt->disableSAX = 1;
697 * xmlFatalErrMsgStrIntStr:
698 * @ctxt: an XML parser context
699 * @error: the error number
700 * @msg: the error message
701 * @str1: an string info
702 * @val: an integer value
703 * @str2: an string info
705 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
707 static void LIBXML_ATTR_FORMAT(3,0)
708 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
709 const char *msg, const xmlChar *str1, int val,
712 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
713 (ctxt->instate == XML_PARSER_EOF))
717 __xmlRaiseError(NULL, NULL, NULL,
718 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
719 NULL, 0, (const char *) str1, (const char *) str2,
720 NULL, val, 0, msg, str1, val, str2);
722 ctxt->wellFormed = 0;
723 if (ctxt->recovery == 0)
724 ctxt->disableSAX = 1;
730 * @ctxt: an XML parser context
731 * @error: the error number
732 * @msg: the error message
733 * @val: a string value
735 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
737 static void LIBXML_ATTR_FORMAT(3,0)
738 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
739 const char *msg, const xmlChar * val)
741 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
742 (ctxt->instate == XML_PARSER_EOF))
746 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
747 XML_FROM_PARSER, error, XML_ERR_FATAL,
748 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
751 ctxt->wellFormed = 0;
752 if (ctxt->recovery == 0)
753 ctxt->disableSAX = 1;
759 * @ctxt: an XML parser context
760 * @error: the error number
761 * @msg: the error message
762 * @val: a string value
764 * Handle a non fatal parser error
766 static void LIBXML_ATTR_FORMAT(3,0)
767 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
768 const char *msg, const xmlChar * val)
770 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
771 (ctxt->instate == XML_PARSER_EOF))
775 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
776 XML_FROM_PARSER, error, XML_ERR_ERROR,
777 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
783 * @ctxt: an XML parser context
784 * @error: the error number
786 * @info1: extra information string
787 * @info2: extra information string
789 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
791 static void LIBXML_ATTR_FORMAT(3,0)
792 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
794 const xmlChar * info1, const xmlChar * info2,
795 const xmlChar * info3)
797 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
798 (ctxt->instate == XML_PARSER_EOF))
802 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
803 XML_ERR_ERROR, NULL, 0, (const char *) info1,
804 (const char *) info2, (const char *) info3, 0, 0, msg,
805 info1, info2, info3);
807 ctxt->nsWellFormed = 0;
812 * @ctxt: an XML parser context
813 * @error: the error number
815 * @info1: extra information string
816 * @info2: extra information string
818 * Handle a namespace warning error
820 static void LIBXML_ATTR_FORMAT(3,0)
821 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
823 const xmlChar * info1, const xmlChar * info2,
824 const xmlChar * info3)
826 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
827 (ctxt->instate == XML_PARSER_EOF))
829 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
830 XML_ERR_WARNING, NULL, 0, (const char *) info1,
831 (const char *) info2, (const char *) info3, 0, 0, msg,
832 info1, info2, info3);
835 /************************************************************************
837 * Library wide options *
839 ************************************************************************/
843 * @feature: the feature to be examined
845 * Examines if the library has been compiled with a given feature.
847 * Returns a non-zero value if the feature exist, otherwise zero.
848 * Returns zero (0) if the feature does not exist or an unknown
849 * unknown feature is requested, non-zero otherwise.
852 xmlHasFeature(xmlFeature feature)
855 case XML_WITH_THREAD:
856 #ifdef LIBXML_THREAD_ENABLED
862 #ifdef LIBXML_TREE_ENABLED
867 case XML_WITH_OUTPUT:
868 #ifdef LIBXML_OUTPUT_ENABLED
874 #ifdef LIBXML_PUSH_ENABLED
879 case XML_WITH_READER:
880 #ifdef LIBXML_READER_ENABLED
885 case XML_WITH_PATTERN:
886 #ifdef LIBXML_PATTERN_ENABLED
891 case XML_WITH_WRITER:
892 #ifdef LIBXML_WRITER_ENABLED
898 #ifdef LIBXML_SAX1_ENABLED
904 #ifdef LIBXML_FTP_ENABLED
910 #ifdef LIBXML_HTTP_ENABLED
916 #ifdef LIBXML_VALID_ENABLED
922 #ifdef LIBXML_HTML_ENABLED
927 case XML_WITH_LEGACY:
928 #ifdef LIBXML_LEGACY_ENABLED
934 #ifdef LIBXML_C14N_ENABLED
939 case XML_WITH_CATALOG:
940 #ifdef LIBXML_CATALOG_ENABLED
946 #ifdef LIBXML_XPATH_ENABLED
952 #ifdef LIBXML_XPTR_ENABLED
957 case XML_WITH_XINCLUDE:
958 #ifdef LIBXML_XINCLUDE_ENABLED
964 #ifdef LIBXML_ICONV_ENABLED
969 case XML_WITH_ISO8859X:
970 #ifdef LIBXML_ISO8859X_ENABLED
975 case XML_WITH_UNICODE:
976 #ifdef LIBXML_UNICODE_ENABLED
981 case XML_WITH_REGEXP:
982 #ifdef LIBXML_REGEXP_ENABLED
987 case XML_WITH_AUTOMATA:
988 #ifdef LIBXML_AUTOMATA_ENABLED
994 #ifdef LIBXML_EXPR_ENABLED
999 case XML_WITH_SCHEMAS:
1000 #ifdef LIBXML_SCHEMAS_ENABLED
1005 case XML_WITH_SCHEMATRON:
1006 #ifdef LIBXML_SCHEMATRON_ENABLED
1011 case XML_WITH_MODULES:
1012 #ifdef LIBXML_MODULES_ENABLED
1017 case XML_WITH_DEBUG:
1018 #ifdef LIBXML_DEBUG_ENABLED
1023 case XML_WITH_DEBUG_MEM:
1024 #ifdef DEBUG_MEMORY_LOCATION
1029 case XML_WITH_DEBUG_RUN:
1030 #ifdef LIBXML_DEBUG_RUNTIME
1036 #ifdef LIBXML_ZLIB_ENABLED
1042 #ifdef LIBXML_LZMA_ENABLED
1048 #ifdef LIBXML_ICU_ENABLED
1059 /************************************************************************
1061 * SAX2 defaulted attributes handling *
1063 ************************************************************************/
1067 * @ctxt: an XML parser context
1069 * Do the SAX2 detection and specific intialization
1072 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1073 if (ctxt == NULL) return;
1074 #ifdef LIBXML_SAX1_ENABLED
1075 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1076 ((ctxt->sax->startElementNs != NULL) ||
1077 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1080 #endif /* LIBXML_SAX1_ENABLED */
1082 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1083 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1084 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1085 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1086 (ctxt->str_xml_ns == NULL)) {
1087 xmlErrMemory(ctxt, NULL);
1091 typedef struct _xmlDefAttrs xmlDefAttrs;
1092 typedef xmlDefAttrs *xmlDefAttrsPtr;
1093 struct _xmlDefAttrs {
1094 int nbAttrs; /* number of defaulted attributes on that element */
1095 int maxAttrs; /* the size of the array */
1096 #if __STDC_VERSION__ >= 199901L
1097 /* Using a C99 flexible array member avoids UBSan errors. */
1098 const xmlChar *values[]; /* array of localname/prefix/values/external */
1100 const xmlChar *values[5];
1105 * xmlAttrNormalizeSpace:
1106 * @src: the source string
1107 * @dst: the target string
1109 * Normalize the space in non CDATA attribute values:
1110 * If the attribute type is not CDATA, then the XML processor MUST further
1111 * process the normalized attribute value by discarding any leading and
1112 * trailing space (#x20) characters, and by replacing sequences of space
1113 * (#x20) characters by a single space (#x20) character.
1114 * Note that the size of dst need to be at least src, and if one doesn't need
1115 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1116 * passing src as dst is just fine.
1118 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1122 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1124 if ((src == NULL) || (dst == NULL))
1127 while (*src == 0x20) src++;
1130 while (*src == 0x20) src++;
1144 * xmlAttrNormalizeSpace2:
1145 * @src: the source string
1147 * Normalize the space in non CDATA attribute values, a slightly more complex
1148 * front end to avoid allocation problems when running on attribute values
1149 * coming from the input.
1151 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1154 static const xmlChar *
1155 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1158 int remove_head = 0;
1159 int need_realloc = 0;
1162 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1169 while (*cur == 0x20) {
1176 if ((*cur == 0x20) || (*cur == 0)) {
1186 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1188 xmlErrMemory(ctxt, NULL);
1191 xmlAttrNormalizeSpace(ret, ret);
1192 *len = (int) strlen((const char *)ret);
1194 } else if (remove_head) {
1195 *len -= remove_head;
1196 memmove(src, src + remove_head, 1 + *len);
1204 * @ctxt: an XML parser context
1205 * @fullname: the element fullname
1206 * @fullattr: the attribute fullname
1207 * @value: the attribute value
1209 * Add a defaulted attribute for an element
1212 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1213 const xmlChar *fullname,
1214 const xmlChar *fullattr,
1215 const xmlChar *value) {
1216 xmlDefAttrsPtr defaults;
1218 const xmlChar *name;
1219 const xmlChar *prefix;
1222 * Allows to detect attribute redefinitions
1224 if (ctxt->attsSpecial != NULL) {
1225 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1229 if (ctxt->attsDefault == NULL) {
1230 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1231 if (ctxt->attsDefault == NULL)
1236 * split the element name into prefix:localname , the string found
1237 * are within the DTD and then not associated to namespace names.
1239 name = xmlSplitQName3(fullname, &len);
1241 name = xmlDictLookup(ctxt->dict, fullname, -1);
1244 name = xmlDictLookup(ctxt->dict, name, -1);
1245 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1249 * make sure there is some storage
1251 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1252 if (defaults == NULL) {
1253 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1254 (4 * 5) * sizeof(const xmlChar *));
1255 if (defaults == NULL)
1257 defaults->nbAttrs = 0;
1258 defaults->maxAttrs = 4;
1259 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1260 defaults, NULL) < 0) {
1264 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1265 xmlDefAttrsPtr temp;
1267 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1268 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1272 defaults->maxAttrs *= 2;
1273 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1274 defaults, NULL) < 0) {
1281 * Split the element name into prefix:localname , the string found
1282 * are within the DTD and hen not associated to namespace names.
1284 name = xmlSplitQName3(fullattr, &len);
1286 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1289 name = xmlDictLookup(ctxt->dict, name, -1);
1290 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1293 defaults->values[5 * defaults->nbAttrs] = name;
1294 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1295 /* intern the string and precompute the end */
1296 len = xmlStrlen(value);
1297 value = xmlDictLookup(ctxt->dict, value, len);
1298 defaults->values[5 * defaults->nbAttrs + 2] = value;
1299 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1301 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1303 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1304 defaults->nbAttrs++;
1309 xmlErrMemory(ctxt, NULL);
1314 * xmlAddSpecialAttr:
1315 * @ctxt: an XML parser context
1316 * @fullname: the element fullname
1317 * @fullattr: the attribute fullname
1318 * @type: the attribute type
1320 * Register this attribute type
1323 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1324 const xmlChar *fullname,
1325 const xmlChar *fullattr,
1328 if (ctxt->attsSpecial == NULL) {
1329 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1330 if (ctxt->attsSpecial == NULL)
1334 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1337 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1338 (void *) (ptrdiff_t) type);
1342 xmlErrMemory(ctxt, NULL);
1347 * xmlCleanSpecialAttrCallback:
1349 * Removes CDATA attributes from the special attribute table
1352 xmlCleanSpecialAttrCallback(void *payload, void *data,
1353 const xmlChar *fullname, const xmlChar *fullattr,
1354 const xmlChar *unused ATTRIBUTE_UNUSED) {
1355 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1357 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1358 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1363 * xmlCleanSpecialAttr:
1364 * @ctxt: an XML parser context
1366 * Trim the list of attributes defined to remove all those of type
1367 * CDATA as they are not special. This call should be done when finishing
1368 * to parse the DTD and before starting to parse the document root.
1371 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1373 if (ctxt->attsSpecial == NULL)
1376 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1378 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1379 xmlHashFree(ctxt->attsSpecial, NULL);
1380 ctxt->attsSpecial = NULL;
1386 * xmlCheckLanguageID:
1387 * @lang: pointer to the string value
1389 * Checks that the value conforms to the LanguageID production:
1391 * NOTE: this is somewhat deprecated, those productions were removed from
1392 * the XML Second edition.
1394 * [33] LanguageID ::= Langcode ('-' Subcode)*
1395 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1396 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1397 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1398 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1399 * [38] Subcode ::= ([a-z] | [A-Z])+
1401 * The current REC reference the sucessors of RFC 1766, currently 5646
1403 * http://www.rfc-editor.org/rfc/rfc5646.txt
1404 * langtag = language
1410 * language = 2*3ALPHA ; shortest ISO 639 code
1411 * ["-" extlang] ; sometimes followed by
1412 * ; extended language subtags
1413 * / 4ALPHA ; or reserved for future use
1414 * / 5*8ALPHA ; or registered language subtag
1416 * extlang = 3ALPHA ; selected ISO 639 codes
1417 * *2("-" 3ALPHA) ; permanently reserved
1419 * script = 4ALPHA ; ISO 15924 code
1421 * region = 2ALPHA ; ISO 3166-1 code
1422 * / 3DIGIT ; UN M.49 code
1424 * variant = 5*8alphanum ; registered variants
1425 * / (DIGIT 3alphanum)
1427 * extension = singleton 1*("-" (2*8alphanum))
1429 * ; Single alphanumerics
1430 * ; "x" reserved for private use
1431 * singleton = DIGIT ; 0 - 9
1437 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1438 * The parser below doesn't try to cope with extension or privateuse
1439 * that could be added but that's not interoperable anyway
1441 * Returns 1 if correct 0 otherwise
1444 xmlCheckLanguageID(const xmlChar * lang)
1446 const xmlChar *cur = lang, *nxt;
1450 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1451 ((cur[0] == 'I') && (cur[1] == '-')) ||
1452 ((cur[0] == 'x') && (cur[1] == '-')) ||
1453 ((cur[0] == 'X') && (cur[1] == '-'))) {
1455 * Still allow IANA code and user code which were coming
1456 * from the previous version of the XML-1.0 specification
1457 * it's deprecated but we should not fail
1460 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1461 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1463 return(cur[0] == 0);
1466 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1467 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1469 if (nxt - cur >= 4) {
1473 if ((nxt - cur > 8) || (nxt[0] != 0))
1479 /* we got an ISO 639 code */
1487 /* now we can have extlang or script or region or variant */
1488 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1491 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1492 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1498 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1502 /* we parsed an extlang */
1510 /* now we can have script or region or variant */
1511 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1514 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1515 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1523 /* we parsed a script */
1532 /* now we can have region or variant */
1533 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1536 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1537 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1540 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1544 /* we parsed a region */
1553 /* now we can just have a variant */
1554 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1555 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1558 if ((nxt - cur < 5) || (nxt - cur > 8))
1561 /* we parsed a variant */
1567 /* extensions and private use subtags not checked */
1571 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1572 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1579 /************************************************************************
1581 * Parser stacks related functions and macros *
1583 ************************************************************************/
1585 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1586 const xmlChar ** str);
1591 * @ctxt: an XML parser context
1592 * @prefix: the namespace prefix or NULL
1593 * @URL: the namespace name
1595 * Pushes a new parser namespace on top of the ns stack
1597 * Returns -1 in case of error, -2 if the namespace should be discarded
1598 * and the index in the stack otherwise.
1601 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1603 if (ctxt->options & XML_PARSE_NSCLEAN) {
1605 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1606 if (ctxt->nsTab[i] == prefix) {
1608 if (ctxt->nsTab[i + 1] == URL)
1610 /* out of scope keep it */
1615 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1618 ctxt->nsTab = (const xmlChar **)
1619 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1620 if (ctxt->nsTab == NULL) {
1621 xmlErrMemory(ctxt, NULL);
1625 } else if (ctxt->nsNr >= ctxt->nsMax) {
1626 const xmlChar ** tmp;
1628 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1629 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1631 xmlErrMemory(ctxt, NULL);
1637 ctxt->nsTab[ctxt->nsNr++] = prefix;
1638 ctxt->nsTab[ctxt->nsNr++] = URL;
1639 return (ctxt->nsNr);
1643 * @ctxt: an XML parser context
1644 * @nr: the number to pop
1646 * Pops the top @nr parser prefix/namespace from the ns stack
1648 * Returns the number of namespaces removed
1651 nsPop(xmlParserCtxtPtr ctxt, int nr)
1655 if (ctxt->nsTab == NULL) return(0);
1656 if (ctxt->nsNr < nr) {
1657 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1660 if (ctxt->nsNr <= 0)
1663 for (i = 0;i < nr;i++) {
1665 ctxt->nsTab[ctxt->nsNr] = NULL;
1672 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1673 const xmlChar **atts;
1677 if (ctxt->atts == NULL) {
1678 maxatts = 55; /* allow for 10 attrs by default */
1679 atts = (const xmlChar **)
1680 xmlMalloc(maxatts * sizeof(xmlChar *));
1681 if (atts == NULL) goto mem_error;
1683 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1684 if (attallocs == NULL) goto mem_error;
1685 ctxt->attallocs = attallocs;
1686 ctxt->maxatts = maxatts;
1687 } else if (nr + 5 > ctxt->maxatts) {
1688 maxatts = (nr + 5) * 2;
1689 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1690 maxatts * sizeof(const xmlChar *));
1691 if (atts == NULL) goto mem_error;
1693 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1694 (maxatts / 5) * sizeof(int));
1695 if (attallocs == NULL) goto mem_error;
1696 ctxt->attallocs = attallocs;
1697 ctxt->maxatts = maxatts;
1699 return(ctxt->maxatts);
1701 xmlErrMemory(ctxt, NULL);
1707 * @ctxt: an XML parser context
1708 * @value: the parser input
1710 * Pushes a new parser input on top of the input stack
1712 * Returns -1 in case of error, the index in the stack otherwise
1715 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1717 if ((ctxt == NULL) || (value == NULL))
1719 if (ctxt->inputNr >= ctxt->inputMax) {
1720 ctxt->inputMax *= 2;
1722 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1724 sizeof(ctxt->inputTab[0]));
1725 if (ctxt->inputTab == NULL) {
1726 xmlErrMemory(ctxt, NULL);
1727 xmlFreeInputStream(value);
1728 ctxt->inputMax /= 2;
1733 ctxt->inputTab[ctxt->inputNr] = value;
1734 ctxt->input = value;
1735 return (ctxt->inputNr++);
1739 * @ctxt: an XML parser context
1741 * Pops the top parser input from the input stack
1743 * Returns the input just removed
1746 inputPop(xmlParserCtxtPtr ctxt)
1748 xmlParserInputPtr ret;
1752 if (ctxt->inputNr <= 0)
1755 if (ctxt->inputNr > 0)
1756 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1759 ret = ctxt->inputTab[ctxt->inputNr];
1760 ctxt->inputTab[ctxt->inputNr] = NULL;
1765 * @ctxt: an XML parser context
1766 * @value: the element node
1768 * Pushes a new element node on top of the node stack
1770 * Returns -1 in case of error, the index in the stack otherwise
1773 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1775 if (ctxt == NULL) return(0);
1776 if (ctxt->nodeNr >= ctxt->nodeMax) {
1779 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1781 sizeof(ctxt->nodeTab[0]));
1783 xmlErrMemory(ctxt, NULL);
1786 ctxt->nodeTab = tmp;
1789 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1790 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1791 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1792 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1794 xmlHaltParser(ctxt);
1797 ctxt->nodeTab[ctxt->nodeNr] = value;
1799 return (ctxt->nodeNr++);
1804 * @ctxt: an XML parser context
1806 * Pops the top element node from the node stack
1808 * Returns the node just removed
1811 nodePop(xmlParserCtxtPtr ctxt)
1815 if (ctxt == NULL) return(NULL);
1816 if (ctxt->nodeNr <= 0)
1819 if (ctxt->nodeNr > 0)
1820 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1823 ret = ctxt->nodeTab[ctxt->nodeNr];
1824 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1828 #ifdef LIBXML_PUSH_ENABLED
1831 * @ctxt: an XML parser context
1832 * @value: the element name
1833 * @prefix: the element prefix
1834 * @URI: the element namespace name
1836 * Pushes a new element name/prefix/URL on top of the name stack
1838 * Returns -1 in case of error, the index in the stack otherwise
1841 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1842 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1844 if (ctxt->nameNr >= ctxt->nameMax) {
1845 const xmlChar * *tmp;
1848 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1850 sizeof(ctxt->nameTab[0]));
1855 ctxt->nameTab = tmp;
1856 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1858 sizeof(ctxt->pushTab[0]));
1863 ctxt->pushTab = tmp2;
1865 ctxt->nameTab[ctxt->nameNr] = value;
1867 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1868 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1869 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (ptrdiff_t) nsNr;
1870 return (ctxt->nameNr++);
1872 xmlErrMemory(ctxt, NULL);
1877 * @ctxt: an XML parser context
1879 * Pops the top element/prefix/URI name from the name stack
1881 * Returns the name just removed
1883 static const xmlChar *
1884 nameNsPop(xmlParserCtxtPtr ctxt)
1888 if (ctxt->nameNr <= 0)
1891 if (ctxt->nameNr > 0)
1892 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1895 ret = ctxt->nameTab[ctxt->nameNr];
1896 ctxt->nameTab[ctxt->nameNr] = NULL;
1899 #endif /* LIBXML_PUSH_ENABLED */
1903 * @ctxt: an XML parser context
1904 * @value: the element name
1906 * Pushes a new element name on top of the name stack
1908 * Returns -1 in case of error, the index in the stack otherwise
1911 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1913 if (ctxt == NULL) return (-1);
1915 if (ctxt->nameNr >= ctxt->nameMax) {
1916 const xmlChar * *tmp;
1917 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1919 sizeof(ctxt->nameTab[0]));
1923 ctxt->nameTab = tmp;
1926 ctxt->nameTab[ctxt->nameNr] = value;
1928 return (ctxt->nameNr++);
1930 xmlErrMemory(ctxt, NULL);
1935 * @ctxt: an XML parser context
1937 * Pops the top element name from the name stack
1939 * Returns the name just removed
1942 namePop(xmlParserCtxtPtr ctxt)
1946 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1949 if (ctxt->nameNr > 0)
1950 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1953 ret = ctxt->nameTab[ctxt->nameNr];
1954 ctxt->nameTab[ctxt->nameNr] = NULL;
1958 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1959 if (ctxt->spaceNr >= ctxt->spaceMax) {
1962 ctxt->spaceMax *= 2;
1963 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1964 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1966 xmlErrMemory(ctxt, NULL);
1970 ctxt->spaceTab = tmp;
1972 ctxt->spaceTab[ctxt->spaceNr] = val;
1973 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1974 return(ctxt->spaceNr++);
1977 static int spacePop(xmlParserCtxtPtr ctxt) {
1979 if (ctxt->spaceNr <= 0) return(0);
1981 if (ctxt->spaceNr > 0)
1982 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1984 ctxt->space = &ctxt->spaceTab[0];
1985 ret = ctxt->spaceTab[ctxt->spaceNr];
1986 ctxt->spaceTab[ctxt->spaceNr] = -1;
1991 * Macros for accessing the content. Those should be used only by the parser,
1994 * Dirty macros, i.e. one often need to make assumption on the context to
1997 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1998 * To be used with extreme caution since operations consuming
1999 * characters may move the input buffer to a different location !
2000 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2001 * This should be used internally by the parser
2002 * only to compare to ASCII values otherwise it would break when
2003 * running with UTF-8 encoding.
2004 * RAW same as CUR but in the input buffer, bypass any token
2005 * extraction that may have been done
2006 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2007 * to compare on ASCII based substring.
2008 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2009 * strings without newlines within the parser.
2010 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2011 * defined char within the parser.
2012 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2014 * NEXT Skip to the next character, this does the proper decoding
2015 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2016 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2017 * CUR_CHAR(l) returns the current unicode character (int), set l
2018 * to the number of xmlChars used for the encoding [0-5].
2019 * CUR_SCHAR same but operate on a string instead of the context
2020 * COPY_BUF copy the current unicode char to the target buffer, increment
2022 * GROW, SHRINK handling of input buffers
2025 #define RAW (*ctxt->input->cur)
2026 #define CUR (*ctxt->input->cur)
2027 #define NXT(val) ctxt->input->cur[(val)]
2028 #define CUR_PTR ctxt->input->cur
2029 #define BASE_PTR ctxt->input->base
2031 #define CMP4( s, c1, c2, c3, c4 ) \
2032 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2033 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2034 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2035 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2036 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2037 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2038 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2039 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2040 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2041 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2042 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2043 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2044 ((unsigned char *) s)[ 8 ] == c9 )
2045 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2046 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2047 ((unsigned char *) s)[ 9 ] == c10 )
2049 #define SKIP(val) do { \
2050 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2051 if (*ctxt->input->cur == 0) \
2052 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2055 #define SKIPL(val) do { \
2057 for(skipl=0; skipl<val; skipl++) { \
2058 if (*(ctxt->input->cur) == '\n') { \
2059 ctxt->input->line++; ctxt->input->col = 1; \
2060 } else ctxt->input->col++; \
2062 ctxt->input->cur++; \
2064 if (*ctxt->input->cur == 0) \
2065 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2068 #define SHRINK if ((ctxt->progressive == 0) && \
2069 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2070 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2073 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2074 xmlParserInputShrink(ctxt->input);
2075 if (*ctxt->input->cur == 0)
2076 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2079 #define GROW if ((ctxt->progressive == 0) && \
2080 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2083 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2084 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2085 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2087 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2088 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2089 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2090 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2091 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2092 xmlHaltParser(ctxt);
2095 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2096 if ((ctxt->input->cur > ctxt->input->end) ||
2097 (ctxt->input->cur < ctxt->input->base)) {
2098 xmlHaltParser(ctxt);
2099 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2102 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2103 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2106 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2108 #define NEXT xmlNextChar(ctxt)
2111 ctxt->input->col++; \
2112 ctxt->input->cur++; \
2114 if (*ctxt->input->cur == 0) \
2115 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2118 #define NEXTL(l) do { \
2119 if (*(ctxt->input->cur) == '\n') { \
2120 ctxt->input->line++; ctxt->input->col = 1; \
2121 } else ctxt->input->col++; \
2122 ctxt->input->cur += l; \
2125 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2126 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2128 #define COPY_BUF(l,b,i,v) \
2129 if (l == 1) b[i++] = (xmlChar) v; \
2130 else i += xmlCopyCharMultiByte(&b[i],v)
2133 * xmlSkipBlankChars:
2134 * @ctxt: the XML parser context
2136 * skip all blanks character found at that point in the input streams.
2137 * It pops up finished entities in the process if allowable at that point.
2139 * Returns the number of space chars skipped
2143 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2147 * It's Okay to use CUR/NEXT here since all the blanks are on
2150 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2153 * if we are in the document content, go really fast
2155 cur = ctxt->input->cur;
2156 while (IS_BLANK_CH(*cur)) {
2158 ctxt->input->line++; ctxt->input->col = 1;
2165 ctxt->input->cur = cur;
2166 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2167 cur = ctxt->input->cur;
2170 ctxt->input->cur = cur;
2172 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2175 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2177 } else if (CUR == '%') {
2179 * Need to handle support of entities branching here
2181 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2183 xmlParsePEReference(ctxt);
2184 } else if (CUR == 0) {
2185 if (ctxt->inputNr <= 1)
2193 * Also increase the counter when entering or exiting a PERef.
2194 * The spec says: "When a parameter-entity reference is recognized
2195 * in the DTD and included, its replacement text MUST be enlarged
2196 * by the attachment of one leading and one following space (#x20)
2205 /************************************************************************
2207 * Commodity functions to handle entities *
2209 ************************************************************************/
2213 * @ctxt: an XML parser context
2215 * xmlPopInput: the current input pointed by ctxt->input came to an end
2216 * pop it and return the next char.
2218 * Returns the current xmlChar in the parser context
2221 xmlPopInput(xmlParserCtxtPtr ctxt) {
2222 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2223 if (xmlParserDebugEntities)
2224 xmlGenericError(xmlGenericErrorContext,
2225 "Popping input %d\n", ctxt->inputNr);
2226 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2227 (ctxt->instate != XML_PARSER_EOF))
2228 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2229 "Unfinished entity outside the DTD");
2230 xmlFreeInputStream(inputPop(ctxt));
2231 if (*ctxt->input->cur == 0)
2232 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2238 * @ctxt: an XML parser context
2239 * @input: an XML parser input fragment (entity, XML fragment ...).
2241 * xmlPushInput: switch to a new input stream which is stacked on top
2242 * of the previous one(s).
2243 * Returns -1 in case of error or the index in the input stack
2246 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2248 if (input == NULL) return(-1);
2250 if (xmlParserDebugEntities) {
2251 if ((ctxt->input != NULL) && (ctxt->input->filename))
2252 xmlGenericError(xmlGenericErrorContext,
2253 "%s(%d): ", ctxt->input->filename,
2255 xmlGenericError(xmlGenericErrorContext,
2256 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2258 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2259 (ctxt->inputNr > 1024)) {
2260 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2261 while (ctxt->inputNr > 1)
2262 xmlFreeInputStream(inputPop(ctxt));
2265 ret = inputPush(ctxt, input);
2266 if (ctxt->instate == XML_PARSER_EOF)
2274 * @ctxt: an XML parser context
2276 * parse Reference declarations
2278 * [66] CharRef ::= '&#' [0-9]+ ';' |
2279 * '&#x' [0-9a-fA-F]+ ';'
2281 * [ WFC: Legal Character ]
2282 * Characters referred to using character references must match the
2283 * production for Char.
2285 * Returns the value parsed (as an int), 0 in case of error
2288 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2289 unsigned int val = 0;
2291 unsigned int outofrange = 0;
2294 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2296 if ((RAW == '&') && (NXT(1) == '#') &&
2300 while (RAW != ';') { /* loop blocked by count */
2304 if (ctxt->instate == XML_PARSER_EOF)
2307 if ((RAW >= '0') && (RAW <= '9'))
2308 val = val * 16 + (CUR - '0');
2309 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2310 val = val * 16 + (CUR - 'a') + 10;
2311 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2312 val = val * 16 + (CUR - 'A') + 10;
2314 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2325 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2330 } else if ((RAW == '&') && (NXT(1) == '#')) {
2333 while (RAW != ';') { /* loop blocked by count */
2337 if (ctxt->instate == XML_PARSER_EOF)
2340 if ((RAW >= '0') && (RAW <= '9'))
2341 val = val * 10 + (CUR - '0');
2343 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2354 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2360 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2364 * [ WFC: Legal Character ]
2365 * Characters referred to using character references must match the
2366 * production for Char.
2368 if ((IS_CHAR(val) && (outofrange == 0))) {
2371 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2372 "xmlParseCharRef: invalid xmlChar value %d\n",
2379 * xmlParseStringCharRef:
2380 * @ctxt: an XML parser context
2381 * @str: a pointer to an index in the string
2383 * parse Reference declarations, variant parsing from a string rather
2384 * than an an input flow.
2386 * [66] CharRef ::= '&#' [0-9]+ ';' |
2387 * '&#x' [0-9a-fA-F]+ ';'
2389 * [ WFC: Legal Character ]
2390 * Characters referred to using character references must match the
2391 * production for Char.
2393 * Returns the value parsed (as an int), 0 in case of error, str will be
2394 * updated to the current value of the index
2397 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2400 unsigned int val = 0;
2401 unsigned int outofrange = 0;
2403 if ((str == NULL) || (*str == NULL)) return(0);
2406 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2409 while (cur != ';') { /* Non input consuming loop */
2410 if ((cur >= '0') && (cur <= '9'))
2411 val = val * 16 + (cur - '0');
2412 else if ((cur >= 'a') && (cur <= 'f'))
2413 val = val * 16 + (cur - 'a') + 10;
2414 else if ((cur >= 'A') && (cur <= 'F'))
2415 val = val * 16 + (cur - 'A') + 10;
2417 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2429 } else if ((cur == '&') && (ptr[1] == '#')){
2432 while (cur != ';') { /* Non input consuming loops */
2433 if ((cur >= '0') && (cur <= '9'))
2434 val = val * 10 + (cur - '0');
2436 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2449 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2455 * [ WFC: Legal Character ]
2456 * Characters referred to using character references must match the
2457 * production for Char.
2459 if ((IS_CHAR(val) && (outofrange == 0))) {
2462 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2463 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2470 * xmlParserHandlePEReference:
2471 * @ctxt: the parser context
2473 * [69] PEReference ::= '%' Name ';'
2475 * [ WFC: No Recursion ]
2476 * A parsed entity must not contain a recursive
2477 * reference to itself, either directly or indirectly.
2479 * [ WFC: Entity Declared ]
2480 * In a document without any DTD, a document with only an internal DTD
2481 * subset which contains no parameter entity references, or a document
2482 * with "standalone='yes'", ... ... The declaration of a parameter
2483 * entity must precede any reference to it...
2485 * [ VC: Entity Declared ]
2486 * In a document with an external subset or external parameter entities
2487 * with "standalone='no'", ... ... The declaration of a parameter entity
2488 * must precede any reference to it...
2491 * Parameter-entity references may only appear in the DTD.
2492 * NOTE: misleading but this is handled.
2494 * A PEReference may have been detected in the current input stream
2495 * the handling is done accordingly to
2496 * http://www.w3.org/TR/REC-xml#entproc
2498 * - Included in literal in entity values
2499 * - Included as Parameter Entity reference within DTDs
2502 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2503 switch(ctxt->instate) {
2504 case XML_PARSER_CDATA_SECTION:
2506 case XML_PARSER_COMMENT:
2508 case XML_PARSER_START_TAG:
2510 case XML_PARSER_END_TAG:
2512 case XML_PARSER_EOF:
2513 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2515 case XML_PARSER_PROLOG:
2516 case XML_PARSER_START:
2517 case XML_PARSER_MISC:
2518 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2520 case XML_PARSER_ENTITY_DECL:
2521 case XML_PARSER_CONTENT:
2522 case XML_PARSER_ATTRIBUTE_VALUE:
2524 case XML_PARSER_SYSTEM_LITERAL:
2525 case XML_PARSER_PUBLIC_LITERAL:
2526 /* we just ignore it there */
2528 case XML_PARSER_EPILOG:
2529 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2531 case XML_PARSER_ENTITY_VALUE:
2533 * NOTE: in the case of entity values, we don't do the
2534 * substitution here since we need the literal
2535 * entity value to be able to save the internal
2536 * subset of the document.
2537 * This will be handled by xmlStringDecodeEntities
2540 case XML_PARSER_DTD:
2542 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2543 * In the internal DTD subset, parameter-entity references
2544 * can occur only where markup declarations can occur, not
2545 * within markup declarations.
2546 * In that case this is handled in xmlParseMarkupDecl
2548 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2550 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2553 case XML_PARSER_IGNORE:
2557 xmlParsePEReference(ctxt);
2561 * Macro used to grow the current buffer.
2562 * buffer##_size is expected to be a size_t
2563 * mem_error: is expected to handle memory allocation failures
2565 #define growBuffer(buffer, n) { \
2567 size_t new_size = buffer##_size * 2 + n; \
2568 if (new_size < buffer##_size) goto mem_error; \
2569 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2570 if (tmp == NULL) goto mem_error; \
2572 buffer##_size = new_size; \
2576 * xmlStringLenDecodeEntities:
2577 * @ctxt: the parser context
2578 * @str: the input string
2579 * @len: the string length
2580 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2581 * @end: an end marker xmlChar, 0 if none
2582 * @end2: an end marker xmlChar, 0 if none
2583 * @end3: an end marker xmlChar, 0 if none
2585 * Takes a entity string content and process to do the adequate substitutions.
2587 * [67] Reference ::= EntityRef | CharRef
2589 * [69] PEReference ::= '%' Name ';'
2591 * Returns A newly allocated string with the substitution done. The caller
2592 * must deallocate it !
2595 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2596 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2597 xmlChar *buffer = NULL;
2598 size_t buffer_size = 0;
2601 xmlChar *current = NULL;
2602 xmlChar *rep = NULL;
2603 const xmlChar *last;
2607 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2611 if (((ctxt->depth > 40) &&
2612 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2613 (ctxt->depth > 1024)) {
2614 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2619 * allocate a translation buffer.
2621 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2622 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2623 if (buffer == NULL) goto mem_error;
2626 * OK loop until we reach one of the ending char or a size limit.
2627 * we are operating on already parsed values.
2630 c = CUR_SCHAR(str, l);
2633 while ((c != 0) && (c != end) && /* non input consuming loop */
2634 (c != end2) && (c != end3)) {
2637 if ((c == '&') && (str[1] == '#')) {
2638 int val = xmlParseStringCharRef(ctxt, &str);
2641 COPY_BUF(0,buffer,nbchars,val);
2642 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2643 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2645 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2646 if (xmlParserDebugEntities)
2647 xmlGenericError(xmlGenericErrorContext,
2648 "String decoding Entity Reference: %.30s\n",
2650 ent = xmlParseStringEntityRef(ctxt, &str);
2651 xmlParserEntityCheck(ctxt, 0, ent, 0);
2653 ctxt->nbentities += ent->checked / 2;
2654 if ((ent != NULL) &&
2655 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2656 if (ent->content != NULL) {
2657 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2658 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2659 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2662 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2663 "predefined entity has no content\n");
2666 } else if ((ent != NULL) && (ent->content != NULL)) {
2668 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2675 while (*current != 0) { /* non input consuming loop */
2676 buffer[nbchars++] = *current++;
2677 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2680 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2685 } else if (ent != NULL) {
2686 int i = xmlStrlen(ent->name);
2687 const xmlChar *cur = ent->name;
2689 buffer[nbchars++] = '&';
2690 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2691 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2694 buffer[nbchars++] = *cur++;
2695 buffer[nbchars++] = ';';
2697 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2698 if (xmlParserDebugEntities)
2699 xmlGenericError(xmlGenericErrorContext,
2700 "String decoding PE Reference: %.30s\n", str);
2701 ent = xmlParseStringPEReference(ctxt, &str);
2702 xmlParserEntityCheck(ctxt, 0, ent, 0);
2704 ctxt->nbentities += ent->checked / 2;
2706 if (ent->content == NULL) {
2708 * Note: external parsed entities will not be loaded,
2709 * it is not required for a non-validating parser to
2710 * complete external PEreferences coming from the
2713 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2714 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2715 (ctxt->validate != 0)) {
2716 xmlLoadEntityContent(ctxt, ent);
2718 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2719 "not validating will not read content for PE entity %s\n",
2724 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2730 while (*current != 0) { /* non input consuming loop */
2731 buffer[nbchars++] = *current++;
2732 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2733 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2735 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2742 COPY_BUF(l,buffer,nbchars,c);
2744 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2745 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2749 c = CUR_SCHAR(str, l);
2753 buffer[nbchars] = 0;
2757 xmlErrMemory(ctxt, NULL);
2767 * xmlStringDecodeEntities:
2768 * @ctxt: the parser context
2769 * @str: the input string
2770 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2771 * @end: an end marker xmlChar, 0 if none
2772 * @end2: an end marker xmlChar, 0 if none
2773 * @end3: an end marker xmlChar, 0 if none
2775 * Takes a entity string content and process to do the adequate substitutions.
2777 * [67] Reference ::= EntityRef | CharRef
2779 * [69] PEReference ::= '%' Name ';'
2781 * Returns A newly allocated string with the substitution done. The caller
2782 * must deallocate it !
2785 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2786 xmlChar end, xmlChar end2, xmlChar end3) {
2787 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2788 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2792 /************************************************************************
2794 * Commodity functions, cleanup needed ? *
2796 ************************************************************************/
2800 * @ctxt: an XML parser context
2802 * @len: the size of @str
2803 * @blank_chars: we know the chars are blanks
2805 * Is this a sequence of blank chars that one can ignore ?
2807 * Returns 1 if ignorable 0 otherwise.
2810 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2813 xmlNodePtr lastChild;
2816 * Don't spend time trying to differentiate them, the same callback is
2819 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2823 * Check for xml:space value.
2825 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2826 (*(ctxt->space) == -2))
2830 * Check that the string is made of blanks
2832 if (blank_chars == 0) {
2833 for (i = 0;i < len;i++)
2834 if (!(IS_BLANK_CH(str[i]))) return(0);
2838 * Look if the element is mixed content in the DTD if available
2840 if (ctxt->node == NULL) return(0);
2841 if (ctxt->myDoc != NULL) {
2842 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2843 if (ret == 0) return(1);
2844 if (ret == 1) return(0);
2848 * Otherwise, heuristic :-\
2850 if ((RAW != '<') && (RAW != 0xD)) return(0);
2851 if ((ctxt->node->children == NULL) &&
2852 (RAW == '<') && (NXT(1) == '/')) return(0);
2854 lastChild = xmlGetLastChild(ctxt->node);
2855 if (lastChild == NULL) {
2856 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2857 (ctxt->node->content != NULL)) return(0);
2858 } else if (xmlNodeIsText(lastChild))
2860 else if ((ctxt->node->children != NULL) &&
2861 (xmlNodeIsText(ctxt->node->children)))
2866 /************************************************************************
2868 * Extra stuff for namespace support *
2869 * Relates to http://www.w3.org/TR/WD-xml-names *
2871 ************************************************************************/
2875 * @ctxt: an XML parser context
2876 * @name: an XML parser context
2877 * @prefix: a xmlChar **
2879 * parse an UTF8 encoded XML qualified name string
2881 * [NS 5] QName ::= (Prefix ':')? LocalPart
2883 * [NS 6] Prefix ::= NCName
2885 * [NS 7] LocalPart ::= NCName
2887 * Returns the local part, and prefix is updated
2888 * to get the Prefix if any.
2892 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2893 xmlChar buf[XML_MAX_NAMELEN + 5];
2894 xmlChar *buffer = NULL;
2896 int max = XML_MAX_NAMELEN;
2897 xmlChar *ret = NULL;
2898 const xmlChar *cur = name;
2901 if (prefix == NULL) return(NULL);
2904 if (cur == NULL) return(NULL);
2906 #ifndef XML_XML_NAMESPACE
2907 /* xml: prefix is not really a namespace */
2908 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2909 (cur[2] == 'l') && (cur[3] == ':'))
2910 return(xmlStrdup(name));
2913 /* nasty but well=formed */
2915 return(xmlStrdup(name));
2918 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2924 * Okay someone managed to make a huge name, so he's ready to pay
2925 * for the processing speed.
2929 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2930 if (buffer == NULL) {
2931 xmlErrMemory(ctxt, NULL);
2934 memcpy(buffer, buf, len);
2935 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2936 if (len + 10 > max) {
2940 tmp = (xmlChar *) xmlRealloc(buffer,
2941 max * sizeof(xmlChar));
2944 xmlErrMemory(ctxt, NULL);
2955 if ((c == ':') && (*cur == 0)) {
2959 return(xmlStrdup(name));
2963 ret = xmlStrndup(buf, len);
2967 max = XML_MAX_NAMELEN;
2975 return(xmlStrndup(BAD_CAST "", 0));
2980 * Check that the first character is proper to start
2983 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2984 ((c >= 0x41) && (c <= 0x5A)) ||
2985 (c == '_') || (c == ':'))) {
2987 int first = CUR_SCHAR(cur, l);
2989 if (!IS_LETTER(first) && (first != '_')) {
2990 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2991 "Name %s is not XML Namespace compliant\n",
2997 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3003 * Okay someone managed to make a huge name, so he's ready to pay
3004 * for the processing speed.
3008 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3009 if (buffer == NULL) {
3010 xmlErrMemory(ctxt, NULL);
3013 memcpy(buffer, buf, len);
3014 while (c != 0) { /* tested bigname2.xml */
3015 if (len + 10 > max) {
3019 tmp = (xmlChar *) xmlRealloc(buffer,
3020 max * sizeof(xmlChar));
3022 xmlErrMemory(ctxt, NULL);
3035 ret = xmlStrndup(buf, len);
3044 /************************************************************************
3046 * The parser itself *
3047 * Relates to http://www.w3.org/TR/REC-xml *
3049 ************************************************************************/
3051 /************************************************************************
3053 * Routines to parse Name, NCName and NmToken *
3055 ************************************************************************/
3057 static unsigned long nbParseName = 0;
3058 static unsigned long nbParseNmToken = 0;
3059 static unsigned long nbParseNCName = 0;
3060 static unsigned long nbParseNCNameComplex = 0;
3061 static unsigned long nbParseNameComplex = 0;
3062 static unsigned long nbParseStringName = 0;
3066 * The two following functions are related to the change of accepted
3067 * characters for Name and NmToken in the Revision 5 of XML-1.0
3068 * They correspond to the modified production [4] and the new production [4a]
3069 * changes in that revision. Also note that the macros used for the
3070 * productions Letter, Digit, CombiningChar and Extender are not needed
3072 * We still keep compatibility to pre-revision5 parsing semantic if the
3073 * new XML_PARSE_OLD10 option is given to the parser.
3076 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3077 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3079 * Use the new checks of production [4] [4a] amd [5] of the
3080 * Update 5 of XML-1.0
3082 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3083 (((c >= 'a') && (c <= 'z')) ||
3084 ((c >= 'A') && (c <= 'Z')) ||
3085 (c == '_') || (c == ':') ||
3086 ((c >= 0xC0) && (c <= 0xD6)) ||
3087 ((c >= 0xD8) && (c <= 0xF6)) ||
3088 ((c >= 0xF8) && (c <= 0x2FF)) ||
3089 ((c >= 0x370) && (c <= 0x37D)) ||
3090 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3091 ((c >= 0x200C) && (c <= 0x200D)) ||
3092 ((c >= 0x2070) && (c <= 0x218F)) ||
3093 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3094 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3095 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3096 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3097 ((c >= 0x10000) && (c <= 0xEFFFF))))
3100 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3107 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3108 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3110 * Use the new checks of production [4] [4a] amd [5] of the
3111 * Update 5 of XML-1.0
3113 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3114 (((c >= 'a') && (c <= 'z')) ||
3115 ((c >= 'A') && (c <= 'Z')) ||
3116 ((c >= '0') && (c <= '9')) || /* !start */
3117 (c == '_') || (c == ':') ||
3118 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3119 ((c >= 0xC0) && (c <= 0xD6)) ||
3120 ((c >= 0xD8) && (c <= 0xF6)) ||
3121 ((c >= 0xF8) && (c <= 0x2FF)) ||
3122 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3123 ((c >= 0x370) && (c <= 0x37D)) ||
3124 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3125 ((c >= 0x200C) && (c <= 0x200D)) ||
3126 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3127 ((c >= 0x2070) && (c <= 0x218F)) ||
3128 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3129 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3130 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3131 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3132 ((c >= 0x10000) && (c <= 0xEFFFF))))
3135 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3136 (c == '.') || (c == '-') ||
3137 (c == '_') || (c == ':') ||
3138 (IS_COMBINING(c)) ||
3145 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3146 int *len, int *alloc, int normalize);
3148 static const xmlChar *
3149 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3155 nbParseNameComplex++;
3159 * Handler for more complex cases
3162 if (ctxt->instate == XML_PARSER_EOF)
3165 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3167 * Use the new checks of production [4] [4a] amd [5] of the
3168 * Update 5 of XML-1.0
3170 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3171 (!(((c >= 'a') && (c <= 'z')) ||
3172 ((c >= 'A') && (c <= 'Z')) ||
3173 (c == '_') || (c == ':') ||
3174 ((c >= 0xC0) && (c <= 0xD6)) ||
3175 ((c >= 0xD8) && (c <= 0xF6)) ||
3176 ((c >= 0xF8) && (c <= 0x2FF)) ||
3177 ((c >= 0x370) && (c <= 0x37D)) ||
3178 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3179 ((c >= 0x200C) && (c <= 0x200D)) ||
3180 ((c >= 0x2070) && (c <= 0x218F)) ||
3181 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3182 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3183 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3184 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3185 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3191 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3192 (((c >= 'a') && (c <= 'z')) ||
3193 ((c >= 'A') && (c <= 'Z')) ||
3194 ((c >= '0') && (c <= '9')) || /* !start */
3195 (c == '_') || (c == ':') ||
3196 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3197 ((c >= 0xC0) && (c <= 0xD6)) ||
3198 ((c >= 0xD8) && (c <= 0xF6)) ||
3199 ((c >= 0xF8) && (c <= 0x2FF)) ||
3200 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3201 ((c >= 0x370) && (c <= 0x37D)) ||
3202 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3203 ((c >= 0x200C) && (c <= 0x200D)) ||
3204 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3205 ((c >= 0x2070) && (c <= 0x218F)) ||
3206 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3207 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3208 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3209 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3210 ((c >= 0x10000) && (c <= 0xEFFFF))
3212 if (count++ > XML_PARSER_CHUNK_SIZE) {
3215 if (ctxt->instate == XML_PARSER_EOF)
3223 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3224 (!IS_LETTER(c) && (c != '_') &&
3232 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3233 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3234 (c == '.') || (c == '-') ||
3235 (c == '_') || (c == ':') ||
3236 (IS_COMBINING(c)) ||
3237 (IS_EXTENDER(c)))) {
3238 if (count++ > XML_PARSER_CHUNK_SIZE) {
3241 if (ctxt->instate == XML_PARSER_EOF)
3249 if ((len > XML_MAX_NAME_LENGTH) &&
3250 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3251 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3254 if (ctxt->input->cur - ctxt->input->base < len) {
3256 * There were a couple of bugs where PERefs lead to to a change
3257 * of the buffer. Check the buffer size to avoid passing an invalid
3258 * pointer to xmlDictLookup.
3260 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3261 "unexpected change of input buffer");
3264 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3265 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3266 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3271 * @ctxt: an XML parser context
3273 * parse an XML name.
3275 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3276 * CombiningChar | Extender
3278 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3280 * [6] Names ::= Name (#x20 Name)*
3282 * Returns the Name parsed or NULL
3286 xmlParseName(xmlParserCtxtPtr ctxt) {
3298 * Accelerator for simple ASCII names
3300 in = ctxt->input->cur;
3301 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3302 ((*in >= 0x41) && (*in <= 0x5A)) ||
3303 (*in == '_') || (*in == ':')) {
3305 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3306 ((*in >= 0x41) && (*in <= 0x5A)) ||
3307 ((*in >= 0x30) && (*in <= 0x39)) ||
3308 (*in == '_') || (*in == '-') ||
3309 (*in == ':') || (*in == '.'))
3311 if ((*in > 0) && (*in < 0x80)) {
3312 count = in - ctxt->input->cur;
3313 if ((count > XML_MAX_NAME_LENGTH) &&
3314 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3315 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3318 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3319 ctxt->input->cur = in;
3320 ctxt->nbChars += count;
3321 ctxt->input->col += count;
3323 xmlErrMemory(ctxt, NULL);
3327 /* accelerator for special cases */
3328 return(xmlParseNameComplex(ctxt));
3331 static const xmlChar *
3332 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3336 size_t startPosition = 0;
3339 nbParseNCNameComplex++;
3343 * Handler for more complex cases
3346 startPosition = CUR_PTR - BASE_PTR;
3348 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3349 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3353 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3354 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3355 if (count++ > XML_PARSER_CHUNK_SIZE) {
3356 if ((len > XML_MAX_NAME_LENGTH) &&
3357 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3358 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3363 if (ctxt->instate == XML_PARSER_EOF)
3372 * when shrinking to extend the buffer we really need to preserve
3373 * the part of the name we already parsed. Hence rolling back
3374 * by current lenght.
3376 ctxt->input->cur -= l;
3378 ctxt->input->cur += l;
3379 if (ctxt->instate == XML_PARSER_EOF)
3384 if ((len > XML_MAX_NAME_LENGTH) &&
3385 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3386 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3389 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3394 * @ctxt: an XML parser context
3395 * @len: length of the string parsed
3397 * parse an XML name.
3399 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3400 * CombiningChar | Extender
3402 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3404 * Returns the Name parsed or NULL
3407 static const xmlChar *
3408 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3409 const xmlChar *in, *e;
3418 * Accelerator for simple ASCII names
3420 in = ctxt->input->cur;
3421 e = ctxt->input->end;
3422 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3423 ((*in >= 0x41) && (*in <= 0x5A)) ||
3424 (*in == '_')) && (in < e)) {
3426 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3427 ((*in >= 0x41) && (*in <= 0x5A)) ||
3428 ((*in >= 0x30) && (*in <= 0x39)) ||
3429 (*in == '_') || (*in == '-') ||
3430 (*in == '.')) && (in < e))
3434 if ((*in > 0) && (*in < 0x80)) {
3435 count = in - ctxt->input->cur;
3436 if ((count > XML_MAX_NAME_LENGTH) &&
3437 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3438 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3441 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3442 ctxt->input->cur = in;
3443 ctxt->nbChars += count;
3444 ctxt->input->col += count;
3446 xmlErrMemory(ctxt, NULL);
3452 return(xmlParseNCNameComplex(ctxt));
3456 * xmlParseNameAndCompare:
3457 * @ctxt: an XML parser context
3459 * parse an XML name and compares for match
3460 * (specialized for endtag parsing)
3462 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3463 * and the name for mismatch
3466 static const xmlChar *
3467 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3468 register const xmlChar *cmp = other;
3469 register const xmlChar *in;
3473 if (ctxt->instate == XML_PARSER_EOF)
3476 in = ctxt->input->cur;
3477 while (*in != 0 && *in == *cmp) {
3482 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3484 ctxt->input->cur = in;
3485 return (const xmlChar*) 1;
3487 /* failure (or end of input buffer), check with full function */
3488 ret = xmlParseName (ctxt);
3489 /* strings coming from the dictionary direct compare possible */
3491 return (const xmlChar*) 1;
3497 * xmlParseStringName:
3498 * @ctxt: an XML parser context
3499 * @str: a pointer to the string pointer (IN/OUT)
3501 * parse an XML name.
3503 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3504 * CombiningChar | Extender
3506 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3508 * [6] Names ::= Name (#x20 Name)*
3510 * Returns the Name parsed or NULL. The @str pointer
3511 * is updated to the current location in the string.
3515 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3516 xmlChar buf[XML_MAX_NAMELEN + 5];
3517 const xmlChar *cur = *str;
3522 nbParseStringName++;
3525 c = CUR_SCHAR(cur, l);
3526 if (!xmlIsNameStartChar(ctxt, c)) {
3530 COPY_BUF(l,buf,len,c);
3532 c = CUR_SCHAR(cur, l);
3533 while (xmlIsNameChar(ctxt, c)) {
3534 COPY_BUF(l,buf,len,c);
3536 c = CUR_SCHAR(cur, l);
3537 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3539 * Okay someone managed to make a huge name, so he's ready to pay
3540 * for the processing speed.
3545 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3546 if (buffer == NULL) {
3547 xmlErrMemory(ctxt, NULL);
3550 memcpy(buffer, buf, len);
3551 while (xmlIsNameChar(ctxt, c)) {
3552 if (len + 10 > max) {
3555 if ((len > XML_MAX_NAME_LENGTH) &&
3556 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3557 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3562 tmp = (xmlChar *) xmlRealloc(buffer,
3563 max * sizeof(xmlChar));
3565 xmlErrMemory(ctxt, NULL);
3571 COPY_BUF(l,buffer,len,c);
3573 c = CUR_SCHAR(cur, l);
3580 if ((len > XML_MAX_NAME_LENGTH) &&
3581 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3582 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3586 return(xmlStrndup(buf, len));
3591 * @ctxt: an XML parser context
3593 * parse an XML Nmtoken.
3595 * [7] Nmtoken ::= (NameChar)+
3597 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3599 * Returns the Nmtoken parsed or NULL
3603 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3604 xmlChar buf[XML_MAX_NAMELEN + 5];
3614 if (ctxt->instate == XML_PARSER_EOF)
3618 while (xmlIsNameChar(ctxt, c)) {
3619 if (count++ > XML_PARSER_CHUNK_SIZE) {
3623 COPY_BUF(l,buf,len,c);
3629 if (ctxt->instate == XML_PARSER_EOF)
3633 if (len >= XML_MAX_NAMELEN) {
3635 * Okay someone managed to make a huge token, so he's ready to pay
3636 * for the processing speed.
3641 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3642 if (buffer == NULL) {
3643 xmlErrMemory(ctxt, NULL);
3646 memcpy(buffer, buf, len);
3647 while (xmlIsNameChar(ctxt, c)) {
3648 if (count++ > XML_PARSER_CHUNK_SIZE) {
3651 if (ctxt->instate == XML_PARSER_EOF) {
3656 if (len + 10 > max) {
3659 if ((max > XML_MAX_NAME_LENGTH) &&
3660 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3661 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3666 tmp = (xmlChar *) xmlRealloc(buffer,
3667 max * sizeof(xmlChar));
3669 xmlErrMemory(ctxt, NULL);
3675 COPY_BUF(l,buffer,len,c);
3685 if ((len > XML_MAX_NAME_LENGTH) &&
3686 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3687 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3690 return(xmlStrndup(buf, len));
3694 * xmlParseEntityValue:
3695 * @ctxt: an XML parser context
3696 * @orig: if non-NULL store a copy of the original entity value
3698 * parse a value for ENTITY declarations
3700 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3701 * "'" ([^%&'] | PEReference | Reference)* "'"
3703 * Returns the EntityValue parsed with reference substituted or NULL
3707 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3708 xmlChar *buf = NULL;
3710 int size = XML_PARSER_BUFFER_SIZE;
3713 xmlChar *ret = NULL;
3714 const xmlChar *cur = NULL;
3715 xmlParserInputPtr input;
3717 if (RAW == '"') stop = '"';
3718 else if (RAW == '\'') stop = '\'';
3720 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3723 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3725 xmlErrMemory(ctxt, NULL);
3730 * The content of the entity definition is copied in a buffer.
3733 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3734 input = ctxt->input;
3736 if (ctxt->instate == XML_PARSER_EOF)
3741 * NOTE: 4.4.5 Included in Literal
3742 * When a parameter entity reference appears in a literal entity
3743 * value, ... a single or double quote character in the replacement
3744 * text is always treated as a normal data character and will not
3745 * terminate the literal.
3746 * In practice it means we stop the loop only when back at parsing
3747 * the initial entity and the quote is found
3749 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3750 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3751 if (len + 5 >= size) {
3755 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3757 xmlErrMemory(ctxt, NULL);
3762 COPY_BUF(l,buf,len,c);
3773 if (ctxt->instate == XML_PARSER_EOF)
3776 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3782 * Raise problem w.r.t. '&' and '%' being used in non-entities
3783 * reference constructs. Note Charref will be handled in
3784 * xmlStringDecodeEntities()
3787 while (*cur != 0) { /* non input consuming */
3788 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3794 name = xmlParseStringName(ctxt, &cur);
3799 if ((nameOk == 0) || (*cur != ';')) {
3800 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3801 "EntityValue: '%c' forbidden except for entities references\n",
3805 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3806 (ctxt->inputNr == 1)) {
3807 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3817 * Then PEReference entities are substituted.
3819 * NOTE: 4.4.7 Bypassed
3820 * When a general entity reference appears in the EntityValue in
3821 * an entity declaration, it is bypassed and left as is.
3822 * so XML_SUBSTITUTE_REF is not set here.
3825 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3840 * xmlParseAttValueComplex:
3841 * @ctxt: an XML parser context
3842 * @len: the resulting attribute len
3843 * @normalize: wether to apply the inner normalization
3845 * parse a value for an attribute, this is the fallback function
3846 * of xmlParseAttValue() when the attribute parsing requires handling
3847 * of non-ASCII characters, or normalization compaction.
3849 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3852 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3854 xmlChar *buf = NULL;
3855 xmlChar *rep = NULL;
3857 size_t buf_size = 0;
3858 int c, l, in_space = 0;
3859 xmlChar *current = NULL;
3862 if (NXT(0) == '"') {
3863 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3866 } else if (NXT(0) == '\'') {
3868 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3871 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3876 * allocate a translation buffer.
3878 buf_size = XML_PARSER_BUFFER_SIZE;
3879 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3880 if (buf == NULL) goto mem_error;
3883 * OK loop until we reach one of the ending char or a size limit.
3886 while (((NXT(0) != limit) && /* checked */
3887 (IS_CHAR(c)) && (c != '<')) &&
3888 (ctxt->instate != XML_PARSER_EOF)) {
3890 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3891 * special option is given
3893 if ((len > XML_MAX_TEXT_LENGTH) &&
3894 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3895 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3896 "AttValue length too long\n");
3902 if (NXT(1) == '#') {
3903 int val = xmlParseCharRef(ctxt);
3906 if (ctxt->replaceEntities) {
3907 if (len + 10 > buf_size) {
3908 growBuffer(buf, 10);
3913 * The reparsing will be done in xmlStringGetNodeList()
3914 * called by the attribute() function in SAX.c
3916 if (len + 10 > buf_size) {
3917 growBuffer(buf, 10);
3925 } else if (val != 0) {
3926 if (len + 10 > buf_size) {
3927 growBuffer(buf, 10);
3929 len += xmlCopyChar(0, &buf[len], val);
3932 ent = xmlParseEntityRef(ctxt);
3935 ctxt->nbentities += ent->owner;
3936 if ((ent != NULL) &&
3937 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3938 if (len + 10 > buf_size) {
3939 growBuffer(buf, 10);
3941 if ((ctxt->replaceEntities == 0) &&
3942 (ent->content[0] == '&')) {
3949 buf[len++] = ent->content[0];
3951 } else if ((ent != NULL) &&
3952 (ctxt->replaceEntities != 0)) {
3953 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3955 rep = xmlStringDecodeEntities(ctxt, ent->content,
3961 while (*current != 0) { /* non input consuming */
3962 if ((*current == 0xD) || (*current == 0xA) ||
3963 (*current == 0x9)) {
3967 buf[len++] = *current++;
3968 if (len + 10 > buf_size) {
3969 growBuffer(buf, 10);
3976 if (len + 10 > buf_size) {
3977 growBuffer(buf, 10);
3979 if (ent->content != NULL)
3980 buf[len++] = ent->content[0];
3982 } else if (ent != NULL) {
3983 int i = xmlStrlen(ent->name);
3984 const xmlChar *cur = ent->name;
3987 * This may look absurd but is needed to detect
3990 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3991 (ent->content != NULL) && (ent->checked == 0)) {
3992 unsigned long oldnbent = ctxt->nbentities;
3995 rep = xmlStringDecodeEntities(ctxt, ent->content,
3996 XML_SUBSTITUTE_REF, 0, 0, 0);
3999 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4001 if (xmlStrchr(rep, '<'))
4006 ent->content[0] = 0;
4011 * Just output the reference
4014 while (len + i + 10 > buf_size) {
4015 growBuffer(buf, i + 10);
4018 buf[len++] = *cur++;
4023 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4024 if ((len != 0) || (!normalize)) {
4025 if ((!normalize) || (!in_space)) {
4026 COPY_BUF(l,buf,len,0x20);
4027 while (len + 10 > buf_size) {
4028 growBuffer(buf, 10);
4035 COPY_BUF(l,buf,len,c);
4036 if (len + 10 > buf_size) {
4037 growBuffer(buf, 10);
4045 if (ctxt->instate == XML_PARSER_EOF)
4048 if ((in_space) && (normalize)) {
4049 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4053 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4054 } else if (RAW != limit) {
4055 if ((c != 0) && (!IS_CHAR(c))) {
4056 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4057 "invalid character in attribute value\n");
4059 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4060 "AttValue: ' expected\n");
4066 * There we potentially risk an overflow, don't allow attribute value of
4067 * length more than INT_MAX it is a very reasonnable assumption !
4069 if (len >= INT_MAX) {
4070 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4071 "AttValue length too long\n");
4075 if (attlen != NULL) *attlen = (int) len;
4079 xmlErrMemory(ctxt, NULL);
4090 * @ctxt: an XML parser context
4092 * parse a value for an attribute
4093 * Note: the parser won't do substitution of entities here, this
4094 * will be handled later in xmlStringGetNodeList
4096 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4097 * "'" ([^<&'] | Reference)* "'"
4099 * 3.3.3 Attribute-Value Normalization:
4100 * Before the value of an attribute is passed to the application or
4101 * checked for validity, the XML processor must normalize it as follows:
4102 * - a character reference is processed by appending the referenced
4103 * character to the attribute value
4104 * - an entity reference is processed by recursively processing the
4105 * replacement text of the entity
4106 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4107 * appending #x20 to the normalized value, except that only a single
4108 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4109 * parsed entity or the literal entity value of an internal parsed entity
4110 * - other characters are processed by appending them to the normalized value
4111 * If the declared value is not CDATA, then the XML processor must further
4112 * process the normalized attribute value by discarding any leading and
4113 * trailing space (#x20) characters, and by replacing sequences of space
4114 * (#x20) characters by a single space (#x20) character.
4115 * All attributes for which no declaration has been read should be treated
4116 * by a non-validating parser as if declared CDATA.
4118 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4123 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4124 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4125 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4129 * xmlParseSystemLiteral:
4130 * @ctxt: an XML parser context
4132 * parse an XML Literal
4134 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4136 * Returns the SystemLiteral parsed or NULL
4140 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4141 xmlChar *buf = NULL;
4143 int size = XML_PARSER_BUFFER_SIZE;
4146 int state = ctxt->instate;
4153 } else if (RAW == '\'') {
4157 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4161 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4163 xmlErrMemory(ctxt, NULL);
4166 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4168 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4169 if (len + 5 >= size) {
4172 if ((size > XML_MAX_NAME_LENGTH) &&
4173 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4174 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4176 ctxt->instate = (xmlParserInputState) state;
4180 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4183 xmlErrMemory(ctxt, NULL);
4184 ctxt->instate = (xmlParserInputState) state;
4193 if (ctxt->instate == XML_PARSER_EOF) {
4198 COPY_BUF(l,buf,len,cur);
4208 ctxt->instate = (xmlParserInputState) state;
4209 if (!IS_CHAR(cur)) {
4210 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4218 * xmlParsePubidLiteral:
4219 * @ctxt: an XML parser context
4221 * parse an XML public literal
4223 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4225 * Returns the PubidLiteral parsed or NULL.
4229 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4230 xmlChar *buf = NULL;
4232 int size = XML_PARSER_BUFFER_SIZE;
4236 xmlParserInputState oldstate = ctxt->instate;
4242 } else if (RAW == '\'') {
4246 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4249 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4251 xmlErrMemory(ctxt, NULL);
4254 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4256 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4257 if (len + 1 >= size) {
4260 if ((size > XML_MAX_NAME_LENGTH) &&
4261 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4262 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4267 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4269 xmlErrMemory(ctxt, NULL);
4280 if (ctxt->instate == XML_PARSER_EOF) {
4295 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4299 ctxt->instate = oldstate;
4303 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4306 * used for the test in the inner loop of the char data testing
4308 static const unsigned char test_char_data[256] = {
4309 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4310 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4311 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4312 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4313 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4314 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4315 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4316 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4317 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4318 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4319 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4320 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4321 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4322 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4323 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4324 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4325 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4326 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4327 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4328 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4329 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4330 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4331 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4332 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4333 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4334 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4335 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4336 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4337 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4338 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4339 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4340 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4345 * @ctxt: an XML parser context
4346 * @cdata: int indicating whether we are within a CDATA section
4348 * parse a CharData section.
4349 * if we are within a CDATA section ']]>' marks an end of section.
4351 * The right angle bracket (>) may be represented using the string ">",
4352 * and must, for compatibility, be escaped using ">" or a character
4353 * reference when it appears in the string "]]>" in content, when that
4354 * string is not marking the end of a CDATA section.
4356 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4360 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4363 int line = ctxt->input->line;
4364 int col = ctxt->input->col;
4370 * Accelerated common case where input don't need to be
4371 * modified before passing it to the handler.
4374 in = ctxt->input->cur;
4377 while (*in == 0x20) { in++; ctxt->input->col++; }
4380 ctxt->input->line++; ctxt->input->col = 1;
4382 } while (*in == 0xA);
4383 goto get_more_space;
4386 nbchar = in - ctxt->input->cur;
4388 const xmlChar *tmp = ctxt->input->cur;
4389 ctxt->input->cur = in;
4391 if ((ctxt->sax != NULL) &&
4392 (ctxt->sax->ignorableWhitespace !=
4393 ctxt->sax->characters)) {
4394 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4395 if (ctxt->sax->ignorableWhitespace != NULL)
4396 ctxt->sax->ignorableWhitespace(ctxt->userData,
4399 if (ctxt->sax->characters != NULL)
4400 ctxt->sax->characters(ctxt->userData,
4402 if (*ctxt->space == -1)
4405 } else if ((ctxt->sax != NULL) &&
4406 (ctxt->sax->characters != NULL)) {
4407 ctxt->sax->characters(ctxt->userData,
4415 ccol = ctxt->input->col;
4416 while (test_char_data[*in]) {
4420 ctxt->input->col = ccol;
4423 ctxt->input->line++; ctxt->input->col = 1;
4425 } while (*in == 0xA);
4429 if ((in[1] == ']') && (in[2] == '>')) {
4430 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4431 ctxt->input->cur = in + 1;
4438 nbchar = in - ctxt->input->cur;
4440 if ((ctxt->sax != NULL) &&
4441 (ctxt->sax->ignorableWhitespace !=
4442 ctxt->sax->characters) &&
4443 (IS_BLANK_CH(*ctxt->input->cur))) {
4444 const xmlChar *tmp = ctxt->input->cur;
4445 ctxt->input->cur = in;
4447 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4448 if (ctxt->sax->ignorableWhitespace != NULL)
4449 ctxt->sax->ignorableWhitespace(ctxt->userData,
4452 if (ctxt->sax->characters != NULL)
4453 ctxt->sax->characters(ctxt->userData,
4455 if (*ctxt->space == -1)
4458 line = ctxt->input->line;
4459 col = ctxt->input->col;
4460 } else if (ctxt->sax != NULL) {
4461 if (ctxt->sax->characters != NULL)
4462 ctxt->sax->characters(ctxt->userData,
4463 ctxt->input->cur, nbchar);
4464 line = ctxt->input->line;
4465 col = ctxt->input->col;
4467 /* something really bad happened in the SAX callback */
4468 if (ctxt->instate != XML_PARSER_CONTENT)
4471 ctxt->input->cur = in;
4475 ctxt->input->cur = in;
4477 ctxt->input->line++; ctxt->input->col = 1;
4478 continue; /* while */
4490 if (ctxt->instate == XML_PARSER_EOF)
4492 in = ctxt->input->cur;
4493 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4496 ctxt->input->line = line;
4497 ctxt->input->col = col;
4498 xmlParseCharDataComplex(ctxt, cdata);
4502 * xmlParseCharDataComplex:
4503 * @ctxt: an XML parser context
4504 * @cdata: int indicating whether we are within a CDATA section
4506 * parse a CharData section.this is the fallback function
4507 * of xmlParseCharData() when the parsing requires handling
4508 * of non-ASCII characters.
4511 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4512 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4520 while ((cur != '<') && /* checked */
4522 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4523 if ((cur == ']') && (NXT(1) == ']') &&
4527 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4530 COPY_BUF(l,buf,nbchar,cur);
4531 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4535 * OK the segment is to be consumed as chars.
4537 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4538 if (areBlanks(ctxt, buf, nbchar, 0)) {
4539 if (ctxt->sax->ignorableWhitespace != NULL)
4540 ctxt->sax->ignorableWhitespace(ctxt->userData,
4543 if (ctxt->sax->characters != NULL)
4544 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4545 if ((ctxt->sax->characters !=
4546 ctxt->sax->ignorableWhitespace) &&
4547 (*ctxt->space == -1))
4552 /* something really bad happened in the SAX callback */
4553 if (ctxt->instate != XML_PARSER_CONTENT)
4560 if (ctxt->instate == XML_PARSER_EOF)
4569 * OK the segment is to be consumed as chars.
4571 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4572 if (areBlanks(ctxt, buf, nbchar, 0)) {
4573 if (ctxt->sax->ignorableWhitespace != NULL)
4574 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4576 if (ctxt->sax->characters != NULL)
4577 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4578 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4579 (*ctxt->space == -1))
4584 if ((cur != 0) && (!IS_CHAR(cur))) {
4585 /* Generate the error and skip the offending character */
4586 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4587 "PCDATA invalid Char value %d\n",
4594 * xmlParseExternalID:
4595 * @ctxt: an XML parser context
4596 * @publicID: a xmlChar** receiving PubidLiteral
4597 * @strict: indicate whether we should restrict parsing to only
4598 * production [75], see NOTE below
4600 * Parse an External ID or a Public ID
4602 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4603 * 'PUBLIC' S PubidLiteral S SystemLiteral
4605 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4606 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4608 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4610 * Returns the function returns SystemLiteral and in the second
4611 * case publicID receives PubidLiteral, is strict is off
4612 * it is possible to return NULL and have publicID set.
4616 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4617 xmlChar *URI = NULL;
4622 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4624 if (SKIP_BLANKS == 0) {
4625 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4626 "Space required after 'SYSTEM'\n");
4628 URI = xmlParseSystemLiteral(ctxt);
4630 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4632 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4634 if (SKIP_BLANKS == 0) {
4635 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4636 "Space required after 'PUBLIC'\n");
4638 *publicID = xmlParsePubidLiteral(ctxt);
4639 if (*publicID == NULL) {
4640 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4644 * We don't handle [83] so "S SystemLiteral" is required.
4646 if (SKIP_BLANKS == 0) {
4647 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4648 "Space required after the Public Identifier\n");
4652 * We handle [83] so we return immediately, if
4653 * "S SystemLiteral" is not detected. We skip blanks if no
4654 * system literal was found, but this is harmless since we must
4655 * be at the end of a NotationDecl.
4657 if (SKIP_BLANKS == 0) return(NULL);
4658 if ((CUR != '\'') && (CUR != '"')) return(NULL);
4660 URI = xmlParseSystemLiteral(ctxt);
4662 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4669 * xmlParseCommentComplex:
4670 * @ctxt: an XML parser context
4671 * @buf: the already parsed part of the buffer
4672 * @len: number of bytes filles in the buffer
4673 * @size: allocated size of the buffer
4675 * Skip an XML (SGML) comment <!-- .... -->
4676 * The spec says that "For compatibility, the string "--" (double-hyphen)
4677 * must not occur within comments. "
4678 * This is the slow routine in case the accelerator for ascii didn't work
4680 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4683 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4684 size_t len, size_t size) {
4691 inputid = ctxt->input->id;
4695 size = XML_PARSER_BUFFER_SIZE;
4696 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4698 xmlErrMemory(ctxt, NULL);
4702 GROW; /* Assure there's enough input data */
4705 goto not_terminated;
4707 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4708 "xmlParseComment: invalid xmlChar value %d\n",
4716 goto not_terminated;
4718 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4719 "xmlParseComment: invalid xmlChar value %d\n",
4727 goto not_terminated;
4728 while (IS_CHAR(cur) && /* checked */
4730 (r != '-') || (q != '-'))) {
4731 if ((r == '-') && (q == '-')) {
4732 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4734 if ((len > XML_MAX_TEXT_LENGTH) &&
4735 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4736 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4737 "Comment too big found", NULL);
4741 if (len + 5 >= size) {
4745 new_size = size * 2;
4746 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4747 if (new_buf == NULL) {
4749 xmlErrMemory(ctxt, NULL);
4755 COPY_BUF(ql,buf,len,q);
4765 if (ctxt->instate == XML_PARSER_EOF) {
4780 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4781 "Comment not terminated \n<!--%.50s\n", buf);
4782 } else if (!IS_CHAR(cur)) {
4783 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4784 "xmlParseComment: invalid xmlChar value %d\n",
4787 if (inputid != ctxt->input->id) {
4788 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4789 "Comment doesn't start and stop in the same"
4793 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4794 (!ctxt->disableSAX))
4795 ctxt->sax->comment(ctxt->userData, buf);
4800 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4801 "Comment not terminated\n", NULL);
4808 * @ctxt: an XML parser context
4810 * Skip an XML (SGML) comment <!-- .... -->
4811 * The spec says that "For compatibility, the string "--" (double-hyphen)
4812 * must not occur within comments. "
4814 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4817 xmlParseComment(xmlParserCtxtPtr ctxt) {
4818 xmlChar *buf = NULL;
4819 size_t size = XML_PARSER_BUFFER_SIZE;
4821 xmlParserInputState state;
4828 * Check that there is a comment right here.
4830 if ((RAW != '<') || (NXT(1) != '!') ||
4831 (NXT(2) != '-') || (NXT(3) != '-')) return;
4832 state = ctxt->instate;
4833 ctxt->instate = XML_PARSER_COMMENT;
4834 inputid = ctxt->input->id;
4840 * Accelerated common case where input don't need to be
4841 * modified before passing it to the handler.
4843 in = ctxt->input->cur;
4847 ctxt->input->line++; ctxt->input->col = 1;
4849 } while (*in == 0xA);
4852 ccol = ctxt->input->col;
4853 while (((*in > '-') && (*in <= 0x7F)) ||
4854 ((*in >= 0x20) && (*in < '-')) ||
4859 ctxt->input->col = ccol;
4862 ctxt->input->line++; ctxt->input->col = 1;
4864 } while (*in == 0xA);
4867 nbchar = in - ctxt->input->cur;
4869 * save current set of data
4872 if ((ctxt->sax != NULL) &&
4873 (ctxt->sax->comment != NULL)) {
4875 if ((*in == '-') && (in[1] == '-'))
4878 size = XML_PARSER_BUFFER_SIZE + nbchar;
4879 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4881 xmlErrMemory(ctxt, NULL);
4882 ctxt->instate = state;
4886 } else if (len + nbchar + 1 >= size) {
4888 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4889 new_buf = (xmlChar *) xmlRealloc(buf,
4890 size * sizeof(xmlChar));
4891 if (new_buf == NULL) {
4893 xmlErrMemory(ctxt, NULL);
4894 ctxt->instate = state;
4899 memcpy(&buf[len], ctxt->input->cur, nbchar);
4904 if ((len > XML_MAX_TEXT_LENGTH) &&
4905 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4906 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4907 "Comment too big found", NULL);
4911 ctxt->input->cur = in;
4914 ctxt->input->line++; ctxt->input->col = 1;
4919 ctxt->input->cur = in;
4921 ctxt->input->line++; ctxt->input->col = 1;
4922 continue; /* while */
4928 if (ctxt->instate == XML_PARSER_EOF) {
4932 in = ctxt->input->cur;
4936 if (ctxt->input->id != inputid) {
4937 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4938 "comment doesn't start and stop in the"
4942 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4943 (!ctxt->disableSAX)) {
4945 ctxt->sax->comment(ctxt->userData, buf);
4947 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4951 if (ctxt->instate != XML_PARSER_EOF)
4952 ctxt->instate = state;
4956 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4957 "Double hyphen within comment: "
4961 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4962 "Double hyphen within comment\n", NULL);
4970 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4971 xmlParseCommentComplex(ctxt, buf, len, size);
4972 ctxt->instate = state;
4979 * @ctxt: an XML parser context
4981 * parse the name of a PI
4983 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4985 * Returns the PITarget name or NULL
4989 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4990 const xmlChar *name;
4992 name = xmlParseName(ctxt);
4993 if ((name != NULL) &&
4994 ((name[0] == 'x') || (name[0] == 'X')) &&
4995 ((name[1] == 'm') || (name[1] == 'M')) &&
4996 ((name[2] == 'l') || (name[2] == 'L'))) {
4998 if ((name[0] == 'x') && (name[1] == 'm') &&
4999 (name[2] == 'l') && (name[3] == 0)) {
5000 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5001 "XML declaration allowed only at the start of the document\n");
5003 } else if (name[3] == 0) {
5004 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5008 if (xmlW3CPIs[i] == NULL) break;
5009 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5012 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5013 "xmlParsePITarget: invalid name prefix 'xml'\n",
5016 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5017 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5018 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5023 #ifdef LIBXML_CATALOG_ENABLED
5025 * xmlParseCatalogPI:
5026 * @ctxt: an XML parser context
5027 * @catalog: the PI value string
5029 * parse an XML Catalog Processing Instruction.
5031 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5033 * Occurs only if allowed by the user and if happening in the Misc
5034 * part of the document before any doctype informations
5035 * This will add the given catalog to the parsing context in order
5036 * to be used if there is a resolution need further down in the document
5040 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5041 xmlChar *URL = NULL;
5042 const xmlChar *tmp, *base;
5046 while (IS_BLANK_CH(*tmp)) tmp++;
5047 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5050 while (IS_BLANK_CH(*tmp)) tmp++;
5055 while (IS_BLANK_CH(*tmp)) tmp++;
5057 if ((marker != '\'') && (marker != '"'))
5061 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5064 URL = xmlStrndup(base, tmp - base);
5066 while (IS_BLANK_CH(*tmp)) tmp++;
5071 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5077 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5078 "Catalog PI syntax error: %s\n",
5087 * @ctxt: an XML parser context
5089 * parse an XML Processing Instruction.
5091 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5093 * The processing is transfered to SAX once parsed.
5097 xmlParsePI(xmlParserCtxtPtr ctxt) {
5098 xmlChar *buf = NULL;
5100 size_t size = XML_PARSER_BUFFER_SIZE;
5102 const xmlChar *target;
5103 xmlParserInputState state;
5106 if ((RAW == '<') && (NXT(1) == '?')) {
5107 int inputid = ctxt->input->id;
5108 state = ctxt->instate;
5109 ctxt->instate = XML_PARSER_PI;
5111 * this is a Processing Instruction.
5117 * Parse the target name and check for special support like
5120 target = xmlParsePITarget(ctxt);
5121 if (target != NULL) {
5122 if ((RAW == '?') && (NXT(1) == '>')) {
5123 if (inputid != ctxt->input->id) {
5124 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5125 "PI declaration doesn't start and stop in"
5126 " the same entity\n");
5133 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5134 (ctxt->sax->processingInstruction != NULL))
5135 ctxt->sax->processingInstruction(ctxt->userData,
5137 if (ctxt->instate != XML_PARSER_EOF)
5138 ctxt->instate = state;
5141 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5143 xmlErrMemory(ctxt, NULL);
5144 ctxt->instate = state;
5147 if (SKIP_BLANKS == 0) {
5148 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5149 "ParsePI: PI %s space expected\n", target);
5152 while (IS_CHAR(cur) && /* checked */
5153 ((cur != '?') || (NXT(1) != '>'))) {
5154 if (len + 5 >= size) {
5156 size_t new_size = size * 2;
5157 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5159 xmlErrMemory(ctxt, NULL);
5161 ctxt->instate = state;
5170 if (ctxt->instate == XML_PARSER_EOF) {
5175 if ((len > XML_MAX_TEXT_LENGTH) &&
5176 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5177 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5178 "PI %s too big found", target);
5180 ctxt->instate = state;
5184 COPY_BUF(l,buf,len,cur);
5193 if ((len > XML_MAX_TEXT_LENGTH) &&
5194 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5195 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5196 "PI %s too big found", target);
5198 ctxt->instate = state;
5203 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5204 "ParsePI: PI %s never end ...\n", target);
5206 if (inputid != ctxt->input->id) {
5207 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5208 "PI declaration doesn't start and stop in"
5209 " the same entity\n");
5213 #ifdef LIBXML_CATALOG_ENABLED
5214 if (((state == XML_PARSER_MISC) ||
5215 (state == XML_PARSER_START)) &&
5216 (xmlStrEqual(target, XML_CATALOG_PI))) {
5217 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5218 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5219 (allow == XML_CATA_ALLOW_ALL))
5220 xmlParseCatalogPI(ctxt, buf);
5228 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5229 (ctxt->sax->processingInstruction != NULL))
5230 ctxt->sax->processingInstruction(ctxt->userData,
5235 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5237 if (ctxt->instate != XML_PARSER_EOF)
5238 ctxt->instate = state;
5243 * xmlParseNotationDecl:
5244 * @ctxt: an XML parser context
5246 * parse a notation declaration
5248 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5250 * Hence there is actually 3 choices:
5251 * 'PUBLIC' S PubidLiteral
5252 * 'PUBLIC' S PubidLiteral S SystemLiteral
5253 * and 'SYSTEM' S SystemLiteral
5255 * See the NOTE on xmlParseExternalID().
5259 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5260 const xmlChar *name;
5264 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5265 int inputid = ctxt->input->id;
5268 if (SKIP_BLANKS == 0) {
5269 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5270 "Space required after '<!NOTATION'\n");
5274 name = xmlParseName(ctxt);
5276 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5279 if (xmlStrchr(name, ':') != NULL) {
5280 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5281 "colons are forbidden from notation names '%s'\n",
5284 if (SKIP_BLANKS == 0) {
5285 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5286 "Space required after the NOTATION name'\n");
5293 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5297 if (inputid != ctxt->input->id) {
5298 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5299 "Notation declaration doesn't start and stop"
5300 " in the same entity\n");
5303 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5304 (ctxt->sax->notationDecl != NULL))
5305 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5307 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5309 if (Systemid != NULL) xmlFree(Systemid);
5310 if (Pubid != NULL) xmlFree(Pubid);
5315 * xmlParseEntityDecl:
5316 * @ctxt: an XML parser context
5318 * parse <!ENTITY declarations
5320 * [70] EntityDecl ::= GEDecl | PEDecl
5322 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5324 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5326 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5328 * [74] PEDef ::= EntityValue | ExternalID
5330 * [76] NDataDecl ::= S 'NDATA' S Name
5332 * [ VC: Notation Declared ]
5333 * The Name must match the declared name of a notation.
5337 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5338 const xmlChar *name = NULL;
5339 xmlChar *value = NULL;
5340 xmlChar *URI = NULL, *literal = NULL;
5341 const xmlChar *ndata = NULL;
5342 int isParameter = 0;
5343 xmlChar *orig = NULL;
5345 /* GROW; done in the caller */
5346 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5347 int inputid = ctxt->input->id;
5350 if (SKIP_BLANKS == 0) {
5351 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5352 "Space required after '<!ENTITY'\n");
5357 if (SKIP_BLANKS == 0) {
5358 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5359 "Space required after '%%'\n");
5364 name = xmlParseName(ctxt);
5366 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5367 "xmlParseEntityDecl: no name\n");
5370 if (xmlStrchr(name, ':') != NULL) {
5371 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5372 "colons are forbidden from entities names '%s'\n",
5375 if (SKIP_BLANKS == 0) {
5376 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5377 "Space required after the entity name\n");
5380 ctxt->instate = XML_PARSER_ENTITY_DECL;
5382 * handle the various case of definitions...
5385 if ((RAW == '"') || (RAW == '\'')) {
5386 value = xmlParseEntityValue(ctxt, &orig);
5388 if ((ctxt->sax != NULL) &&
5389 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5390 ctxt->sax->entityDecl(ctxt->userData, name,
5391 XML_INTERNAL_PARAMETER_ENTITY,
5395 URI = xmlParseExternalID(ctxt, &literal, 1);
5396 if ((URI == NULL) && (literal == NULL)) {
5397 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5402 uri = xmlParseURI((const char *) URI);
5404 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5405 "Invalid URI: %s\n", URI);
5407 * This really ought to be a well formedness error
5408 * but the XML Core WG decided otherwise c.f. issue
5409 * E26 of the XML erratas.
5412 if (uri->fragment != NULL) {
5414 * Okay this is foolish to block those but not
5417 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5419 if ((ctxt->sax != NULL) &&
5420 (!ctxt->disableSAX) &&
5421 (ctxt->sax->entityDecl != NULL))
5422 ctxt->sax->entityDecl(ctxt->userData, name,
5423 XML_EXTERNAL_PARAMETER_ENTITY,
5424 literal, URI, NULL);
5431 if ((RAW == '"') || (RAW == '\'')) {
5432 value = xmlParseEntityValue(ctxt, &orig);
5433 if ((ctxt->sax != NULL) &&
5434 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5435 ctxt->sax->entityDecl(ctxt->userData, name,
5436 XML_INTERNAL_GENERAL_ENTITY,
5439 * For expat compatibility in SAX mode.
5441 if ((ctxt->myDoc == NULL) ||
5442 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5443 if (ctxt->myDoc == NULL) {
5444 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5445 if (ctxt->myDoc == NULL) {
5446 xmlErrMemory(ctxt, "New Doc failed");
5449 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5451 if (ctxt->myDoc->intSubset == NULL)
5452 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5453 BAD_CAST "fake", NULL, NULL);
5455 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5459 URI = xmlParseExternalID(ctxt, &literal, 1);
5460 if ((URI == NULL) && (literal == NULL)) {
5461 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5466 uri = xmlParseURI((const char *)URI);
5468 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5469 "Invalid URI: %s\n", URI);
5471 * This really ought to be a well formedness error
5472 * but the XML Core WG decided otherwise c.f. issue
5473 * E26 of the XML erratas.
5476 if (uri->fragment != NULL) {
5478 * Okay this is foolish to block those but not
5481 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5486 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5487 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5488 "Space required before 'NDATA'\n");
5490 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5492 if (SKIP_BLANKS == 0) {
5493 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5494 "Space required after 'NDATA'\n");
5496 ndata = xmlParseName(ctxt);
5497 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5498 (ctxt->sax->unparsedEntityDecl != NULL))
5499 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5500 literal, URI, ndata);
5502 if ((ctxt->sax != NULL) &&
5503 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5504 ctxt->sax->entityDecl(ctxt->userData, name,
5505 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5506 literal, URI, NULL);
5508 * For expat compatibility in SAX mode.
5509 * assuming the entity repalcement was asked for
5511 if ((ctxt->replaceEntities != 0) &&
5512 ((ctxt->myDoc == NULL) ||
5513 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5514 if (ctxt->myDoc == NULL) {
5515 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5516 if (ctxt->myDoc == NULL) {
5517 xmlErrMemory(ctxt, "New Doc failed");
5520 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5523 if (ctxt->myDoc->intSubset == NULL)
5524 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5525 BAD_CAST "fake", NULL, NULL);
5526 xmlSAX2EntityDecl(ctxt, name,
5527 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5528 literal, URI, NULL);
5533 if (ctxt->instate == XML_PARSER_EOF)
5537 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5538 "xmlParseEntityDecl: entity %s not terminated\n", name);
5539 xmlHaltParser(ctxt);
5541 if (inputid != ctxt->input->id) {
5542 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5543 "Entity declaration doesn't start and stop in"
5544 " the same entity\n");
5550 * Ugly mechanism to save the raw entity value.
5552 xmlEntityPtr cur = NULL;
5555 if ((ctxt->sax != NULL) &&
5556 (ctxt->sax->getParameterEntity != NULL))
5557 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5559 if ((ctxt->sax != NULL) &&
5560 (ctxt->sax->getEntity != NULL))
5561 cur = ctxt->sax->getEntity(ctxt->userData, name);
5562 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5563 cur = xmlSAX2GetEntity(ctxt, name);
5566 if ((cur != NULL) && (cur->orig == NULL)) {
5573 if (value != NULL) xmlFree(value);
5574 if (URI != NULL) xmlFree(URI);
5575 if (literal != NULL) xmlFree(literal);
5576 if (orig != NULL) xmlFree(orig);
5581 * xmlParseDefaultDecl:
5582 * @ctxt: an XML parser context
5583 * @value: Receive a possible fixed default value for the attribute
5585 * Parse an attribute default declaration
5587 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5589 * [ VC: Required Attribute ]
5590 * if the default declaration is the keyword #REQUIRED, then the
5591 * attribute must be specified for all elements of the type in the
5592 * attribute-list declaration.
5594 * [ VC: Attribute Default Legal ]
5595 * The declared default value must meet the lexical constraints of
5596 * the declared attribute type c.f. xmlValidateAttributeDecl()
5598 * [ VC: Fixed Attribute Default ]
5599 * if an attribute has a default value declared with the #FIXED
5600 * keyword, instances of that attribute must match the default value.
5602 * [ WFC: No < in Attribute Values ]
5603 * handled in xmlParseAttValue()
5605 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5606 * or XML_ATTRIBUTE_FIXED.
5610 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5615 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5617 return(XML_ATTRIBUTE_REQUIRED);
5619 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5621 return(XML_ATTRIBUTE_IMPLIED);
5623 val = XML_ATTRIBUTE_NONE;
5624 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5626 val = XML_ATTRIBUTE_FIXED;
5627 if (SKIP_BLANKS == 0) {
5628 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5629 "Space required after '#FIXED'\n");
5632 ret = xmlParseAttValue(ctxt);
5633 ctxt->instate = XML_PARSER_DTD;
5635 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5636 "Attribute default value declaration error\n");
5643 * xmlParseNotationType:
5644 * @ctxt: an XML parser context
5646 * parse an Notation attribute type.
5648 * Note: the leading 'NOTATION' S part has already being parsed...
5650 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5652 * [ VC: Notation Attributes ]
5653 * Values of this type must match one of the notation names included
5654 * in the declaration; all notation names in the declaration must be declared.
5656 * Returns: the notation attribute tree built while parsing
5660 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5661 const xmlChar *name;
5662 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5665 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5672 name = xmlParseName(ctxt);
5674 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5675 "Name expected in NOTATION declaration\n");
5676 xmlFreeEnumeration(ret);
5680 while (tmp != NULL) {
5681 if (xmlStrEqual(name, tmp->name)) {
5682 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5683 "standalone: attribute notation value token %s duplicated\n",
5685 if (!xmlDictOwns(ctxt->dict, name))
5686 xmlFree((xmlChar *) name);
5692 cur = xmlCreateEnumeration(name);
5694 xmlFreeEnumeration(ret);
5697 if (last == NULL) ret = last = cur;
5704 } while (RAW == '|');
5706 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5707 xmlFreeEnumeration(ret);
5715 * xmlParseEnumerationType:
5716 * @ctxt: an XML parser context
5718 * parse an Enumeration attribute type.
5720 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5722 * [ VC: Enumeration ]
5723 * Values of this type must match one of the Nmtoken tokens in
5726 * Returns: the enumeration attribute tree built while parsing
5730 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5732 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5735 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5742 name = xmlParseNmtoken(ctxt);
5744 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5748 while (tmp != NULL) {
5749 if (xmlStrEqual(name, tmp->name)) {
5750 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5751 "standalone: attribute enumeration value token %s duplicated\n",
5753 if (!xmlDictOwns(ctxt->dict, name))
5760 cur = xmlCreateEnumeration(name);
5761 if (!xmlDictOwns(ctxt->dict, name))
5764 xmlFreeEnumeration(ret);
5767 if (last == NULL) ret = last = cur;
5774 } while (RAW == '|');
5776 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5784 * xmlParseEnumeratedType:
5785 * @ctxt: an XML parser context
5786 * @tree: the enumeration tree built while parsing
5788 * parse an Enumerated attribute type.
5790 * [57] EnumeratedType ::= NotationType | Enumeration
5792 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5795 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5799 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5800 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5802 if (SKIP_BLANKS == 0) {
5803 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5804 "Space required after 'NOTATION'\n");
5807 *tree = xmlParseNotationType(ctxt);
5808 if (*tree == NULL) return(0);
5809 return(XML_ATTRIBUTE_NOTATION);
5811 *tree = xmlParseEnumerationType(ctxt);
5812 if (*tree == NULL) return(0);
5813 return(XML_ATTRIBUTE_ENUMERATION);
5817 * xmlParseAttributeType:
5818 * @ctxt: an XML parser context
5819 * @tree: the enumeration tree built while parsing
5821 * parse the Attribute list def for an element
5823 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5825 * [55] StringType ::= 'CDATA'
5827 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5828 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5830 * Validity constraints for attribute values syntax are checked in
5831 * xmlValidateAttributeValue()
5834 * Values of type ID must match the Name production. A name must not
5835 * appear more than once in an XML document as a value of this type;
5836 * i.e., ID values must uniquely identify the elements which bear them.
5838 * [ VC: One ID per Element Type ]
5839 * No element type may have more than one ID attribute specified.
5841 * [ VC: ID Attribute Default ]
5842 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5845 * Values of type IDREF must match the Name production, and values
5846 * of type IDREFS must match Names; each IDREF Name must match the value
5847 * of an ID attribute on some element in the XML document; i.e. IDREF
5848 * values must match the value of some ID attribute.
5850 * [ VC: Entity Name ]
5851 * Values of type ENTITY must match the Name production, values
5852 * of type ENTITIES must match Names; each Entity Name must match the
5853 * name of an unparsed entity declared in the DTD.
5855 * [ VC: Name Token ]
5856 * Values of type NMTOKEN must match the Nmtoken production; values
5857 * of type NMTOKENS must match Nmtokens.
5859 * Returns the attribute type
5862 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5864 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5866 return(XML_ATTRIBUTE_CDATA);
5867 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5869 return(XML_ATTRIBUTE_IDREFS);
5870 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5872 return(XML_ATTRIBUTE_IDREF);
5873 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5875 return(XML_ATTRIBUTE_ID);
5876 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5878 return(XML_ATTRIBUTE_ENTITY);
5879 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5881 return(XML_ATTRIBUTE_ENTITIES);
5882 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5884 return(XML_ATTRIBUTE_NMTOKENS);
5885 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5887 return(XML_ATTRIBUTE_NMTOKEN);
5889 return(xmlParseEnumeratedType(ctxt, tree));
5893 * xmlParseAttributeListDecl:
5894 * @ctxt: an XML parser context
5896 * : parse the Attribute list def for an element
5898 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5900 * [53] AttDef ::= S Name S AttType S DefaultDecl
5904 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5905 const xmlChar *elemName;
5906 const xmlChar *attrName;
5907 xmlEnumerationPtr tree;
5909 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5910 int inputid = ctxt->input->id;
5913 if (SKIP_BLANKS == 0) {
5914 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5915 "Space required after '<!ATTLIST'\n");
5917 elemName = xmlParseName(ctxt);
5918 if (elemName == NULL) {
5919 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5920 "ATTLIST: no name for Element\n");
5925 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5928 xmlChar *defaultValue = NULL;
5932 attrName = xmlParseName(ctxt);
5933 if (attrName == NULL) {
5934 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5935 "ATTLIST: no name for Attribute\n");
5939 if (SKIP_BLANKS == 0) {
5940 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5941 "Space required after the attribute name\n");
5945 type = xmlParseAttributeType(ctxt, &tree);
5951 if (SKIP_BLANKS == 0) {
5952 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5953 "Space required after the attribute type\n");
5955 xmlFreeEnumeration(tree);
5959 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5961 if (defaultValue != NULL)
5962 xmlFree(defaultValue);
5964 xmlFreeEnumeration(tree);
5967 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5968 xmlAttrNormalizeSpace(defaultValue, defaultValue);
5972 if (SKIP_BLANKS == 0) {
5973 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5974 "Space required after the attribute default value\n");
5975 if (defaultValue != NULL)
5976 xmlFree(defaultValue);
5978 xmlFreeEnumeration(tree);
5982 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5983 (ctxt->sax->attributeDecl != NULL))
5984 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5985 type, def, defaultValue, tree);
5986 else if (tree != NULL)
5987 xmlFreeEnumeration(tree);
5989 if ((ctxt->sax2) && (defaultValue != NULL) &&
5990 (def != XML_ATTRIBUTE_IMPLIED) &&
5991 (def != XML_ATTRIBUTE_REQUIRED)) {
5992 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5995 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5997 if (defaultValue != NULL)
5998 xmlFree(defaultValue);
6002 if (inputid != ctxt->input->id) {
6003 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6004 "Attribute list declaration doesn't start and"
6005 " stop in the same entity\n");
6013 * xmlParseElementMixedContentDecl:
6014 * @ctxt: an XML parser context
6015 * @inputchk: the input used for the current entity, needed for boundary checks
6017 * parse the declaration for a Mixed Element content
6018 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6020 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6021 * '(' S? '#PCDATA' S? ')'
6023 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6025 * [ VC: No Duplicate Types ]
6026 * The same name must not appear more than once in a single
6027 * mixed-content declaration.
6029 * returns: the list of the xmlElementContentPtr describing the element choices
6031 xmlElementContentPtr
6032 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6033 xmlElementContentPtr ret = NULL, cur = NULL, n;
6034 const xmlChar *elem = NULL;
6037 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6042 if (ctxt->input->id != inputchk) {
6043 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6044 "Element content declaration doesn't start and"
6045 " stop in the same entity\n");
6048 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6052 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6057 if ((RAW == '(') || (RAW == '|')) {
6058 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6059 if (ret == NULL) return(NULL);
6061 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6064 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6065 if (ret == NULL) return(NULL);
6071 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6072 if (n == NULL) return(NULL);
6073 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6082 elem = xmlParseName(ctxt);
6084 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6085 "xmlParseElementMixedContentDecl : Name expected\n");
6086 xmlFreeDocElementContent(ctxt->myDoc, ret);
6092 if ((RAW == ')') && (NXT(1) == '*')) {
6094 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6095 XML_ELEMENT_CONTENT_ELEMENT);
6096 if (cur->c2 != NULL)
6097 cur->c2->parent = cur;
6100 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6101 if (ctxt->input->id != inputchk) {
6102 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6103 "Element content declaration doesn't start and"
6104 " stop in the same entity\n");
6108 xmlFreeDocElementContent(ctxt->myDoc, ret);
6109 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6114 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6120 * xmlParseElementChildrenContentDeclPriv:
6121 * @ctxt: an XML parser context
6122 * @inputchk: the input used for the current entity, needed for boundary checks
6123 * @depth: the level of recursion
6125 * parse the declaration for a Mixed Element content
6126 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6129 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6131 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6133 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6135 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6137 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6138 * TODO Parameter-entity replacement text must be properly nested
6139 * with parenthesized groups. That is to say, if either of the
6140 * opening or closing parentheses in a choice, seq, or Mixed
6141 * construct is contained in the replacement text for a parameter
6142 * entity, both must be contained in the same replacement text. For
6143 * interoperability, if a parameter-entity reference appears in a
6144 * choice, seq, or Mixed construct, its replacement text should not
6145 * be empty, and neither the first nor last non-blank character of
6146 * the replacement text should be a connector (| or ,).
6148 * Returns the tree of xmlElementContentPtr describing the element
6151 static xmlElementContentPtr
6152 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6154 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6155 const xmlChar *elem;
6158 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6160 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6161 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6168 int inputid = ctxt->input->id;
6170 /* Recurse on first child */
6173 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6178 elem = xmlParseName(ctxt);
6180 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6183 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6185 xmlErrMemory(ctxt, NULL);
6190 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6192 } else if (RAW == '*') {
6193 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6195 } else if (RAW == '+') {
6196 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6199 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6205 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6207 * Each loop we parse one separator and one element.
6210 if (type == 0) type = CUR;
6213 * Detect "Name | Name , Name" error
6215 else if (type != CUR) {
6216 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6217 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6219 if ((last != NULL) && (last != ret))
6220 xmlFreeDocElementContent(ctxt->myDoc, last);
6222 xmlFreeDocElementContent(ctxt->myDoc, ret);
6227 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6229 if ((last != NULL) && (last != ret))
6230 xmlFreeDocElementContent(ctxt->myDoc, last);
6231 xmlFreeDocElementContent(ctxt->myDoc, ret);
6249 } else if (RAW == '|') {
6250 if (type == 0) type = CUR;
6253 * Detect "Name , Name | Name" error
6255 else if (type != CUR) {
6256 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6257 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6259 if ((last != NULL) && (last != ret))
6260 xmlFreeDocElementContent(ctxt->myDoc, last);
6262 xmlFreeDocElementContent(ctxt->myDoc, ret);
6267 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6269 if ((last != NULL) && (last != ret))
6270 xmlFreeDocElementContent(ctxt->myDoc, last);
6272 xmlFreeDocElementContent(ctxt->myDoc, ret);
6291 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6292 if ((last != NULL) && (last != ret))
6293 xmlFreeDocElementContent(ctxt->myDoc, last);
6295 xmlFreeDocElementContent(ctxt->myDoc, ret);
6302 int inputid = ctxt->input->id;
6303 /* Recurse on second child */
6306 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6310 elem = xmlParseName(ctxt);
6312 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6314 xmlFreeDocElementContent(ctxt->myDoc, ret);
6317 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6320 xmlFreeDocElementContent(ctxt->myDoc, ret);
6324 last->ocur = XML_ELEMENT_CONTENT_OPT;
6326 } else if (RAW == '*') {
6327 last->ocur = XML_ELEMENT_CONTENT_MULT;
6329 } else if (RAW == '+') {
6330 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6333 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6339 if ((cur != NULL) && (last != NULL)) {
6344 if (ctxt->input->id != inputchk) {
6345 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6346 "Element content declaration doesn't start and stop in"
6347 " the same entity\n");
6352 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6353 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6354 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6356 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6359 } else if (RAW == '*') {
6361 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6364 * Some normalization:
6365 * (a | b* | c?)* == (a | b | c)*
6367 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6368 if ((cur->c1 != NULL) &&
6369 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6370 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6371 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6372 if ((cur->c2 != NULL) &&
6373 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6374 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6375 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6380 } else if (RAW == '+') {
6384 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6385 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6386 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6388 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6390 * Some normalization:
6391 * (a | b*)+ == (a | b)*
6392 * (a | b?)+ == (a | b)*
6394 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6395 if ((cur->c1 != NULL) &&
6396 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6397 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6398 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6401 if ((cur->c2 != NULL) &&
6402 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6403 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6404 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6410 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6418 * xmlParseElementChildrenContentDecl:
6419 * @ctxt: an XML parser context
6420 * @inputchk: the input used for the current entity, needed for boundary checks
6422 * parse the declaration for a Mixed Element content
6423 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6425 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6427 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6429 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6431 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6433 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6434 * TODO Parameter-entity replacement text must be properly nested
6435 * with parenthesized groups. That is to say, if either of the
6436 * opening or closing parentheses in a choice, seq, or Mixed
6437 * construct is contained in the replacement text for a parameter
6438 * entity, both must be contained in the same replacement text. For
6439 * interoperability, if a parameter-entity reference appears in a
6440 * choice, seq, or Mixed construct, its replacement text should not
6441 * be empty, and neither the first nor last non-blank character of
6442 * the replacement text should be a connector (| or ,).
6444 * Returns the tree of xmlElementContentPtr describing the element
6447 xmlElementContentPtr
6448 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6449 /* stub left for API/ABI compat */
6450 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6454 * xmlParseElementContentDecl:
6455 * @ctxt: an XML parser context
6456 * @name: the name of the element being defined.
6457 * @result: the Element Content pointer will be stored here if any
6459 * parse the declaration for an Element content either Mixed or Children,
6460 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6462 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6464 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6468 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6469 xmlElementContentPtr *result) {
6471 xmlElementContentPtr tree = NULL;
6472 int inputid = ctxt->input->id;
6478 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6479 "xmlParseElementContentDecl : %s '(' expected\n", name);
6484 if (ctxt->instate == XML_PARSER_EOF)
6487 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6488 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6489 res = XML_ELEMENT_TYPE_MIXED;
6491 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6492 res = XML_ELEMENT_TYPE_ELEMENT;
6500 * xmlParseElementDecl:
6501 * @ctxt: an XML parser context
6503 * parse an Element declaration.
6505 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6507 * [ VC: Unique Element Type Declaration ]
6508 * No element type may be declared more than once
6510 * Returns the type of the element, or -1 in case of error
6513 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6514 const xmlChar *name;
6516 xmlElementContentPtr content = NULL;
6518 /* GROW; done in the caller */
6519 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6520 int inputid = ctxt->input->id;
6523 if (SKIP_BLANKS == 0) {
6524 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6525 "Space required after 'ELEMENT'\n");
6528 name = xmlParseName(ctxt);
6530 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6531 "xmlParseElementDecl: no name for Element\n");
6534 if (SKIP_BLANKS == 0) {
6535 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6536 "Space required after the element name\n");
6538 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6541 * Element must always be empty.
6543 ret = XML_ELEMENT_TYPE_EMPTY;
6544 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6548 * Element is a generic container.
6550 ret = XML_ELEMENT_TYPE_ANY;
6551 } else if (RAW == '(') {
6552 ret = xmlParseElementContentDecl(ctxt, name, &content);
6555 * [ WFC: PEs in Internal Subset ] error handling.
6557 if ((RAW == '%') && (ctxt->external == 0) &&
6558 (ctxt->inputNr == 1)) {
6559 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6560 "PEReference: forbidden within markup decl in internal subset\n");
6562 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6563 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6571 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6572 if (content != NULL) {
6573 xmlFreeDocElementContent(ctxt->myDoc, content);
6576 if (inputid != ctxt->input->id) {
6577 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6578 "Element declaration doesn't start and stop in"
6579 " the same entity\n");
6583 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6584 (ctxt->sax->elementDecl != NULL)) {
6585 if (content != NULL)
6586 content->parent = NULL;
6587 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6589 if ((content != NULL) && (content->parent == NULL)) {
6591 * this is a trick: if xmlAddElementDecl is called,
6592 * instead of copying the full tree it is plugged directly
6593 * if called from the parser. Avoid duplicating the
6594 * interfaces or change the API/ABI
6596 xmlFreeDocElementContent(ctxt->myDoc, content);
6598 } else if (content != NULL) {
6599 xmlFreeDocElementContent(ctxt->myDoc, content);
6607 * xmlParseConditionalSections
6608 * @ctxt: an XML parser context
6610 * [61] conditionalSect ::= includeSect | ignoreSect
6611 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6612 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6613 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6614 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6618 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6619 int id = ctxt->input->id;
6623 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6627 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6628 xmlHaltParser(ctxt);
6631 if (ctxt->input->id != id) {
6632 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6633 "All markup of the conditional section is not"
6634 " in the same entity\n");
6638 if (xmlParserDebugEntities) {
6639 if ((ctxt->input != NULL) && (ctxt->input->filename))
6640 xmlGenericError(xmlGenericErrorContext,
6641 "%s(%d): ", ctxt->input->filename,
6643 xmlGenericError(xmlGenericErrorContext,
6644 "Entering INCLUDE Conditional Section\n");
6649 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6650 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6651 const xmlChar *check = CUR_PTR;
6652 unsigned int cons = ctxt->input->consumed;
6654 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6655 xmlParseConditionalSections(ctxt);
6657 xmlParseMarkupDecl(ctxt);
6662 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6663 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6664 xmlHaltParser(ctxt);
6668 if (xmlParserDebugEntities) {
6669 if ((ctxt->input != NULL) && (ctxt->input->filename))
6670 xmlGenericError(xmlGenericErrorContext,
6671 "%s(%d): ", ctxt->input->filename,
6673 xmlGenericError(xmlGenericErrorContext,
6674 "Leaving INCLUDE Conditional Section\n");
6677 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6679 xmlParserInputState instate;
6685 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6686 xmlHaltParser(ctxt);
6689 if (ctxt->input->id != id) {
6690 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6691 "All markup of the conditional section is not"
6692 " in the same entity\n");
6696 if (xmlParserDebugEntities) {
6697 if ((ctxt->input != NULL) && (ctxt->input->filename))
6698 xmlGenericError(xmlGenericErrorContext,
6699 "%s(%d): ", ctxt->input->filename,
6701 xmlGenericError(xmlGenericErrorContext,
6702 "Entering IGNORE Conditional Section\n");
6706 * Parse up to the end of the conditional section
6707 * But disable SAX event generating DTD building in the meantime
6709 state = ctxt->disableSAX;
6710 instate = ctxt->instate;
6711 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6712 ctxt->instate = XML_PARSER_IGNORE;
6714 while (((depth >= 0) && (RAW != 0)) &&
6715 (ctxt->instate != XML_PARSER_EOF)) {
6716 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6721 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6722 if (--depth >= 0) SKIP(3);
6729 ctxt->disableSAX = state;
6730 ctxt->instate = instate;
6732 if (xmlParserDebugEntities) {
6733 if ((ctxt->input != NULL) && (ctxt->input->filename))
6734 xmlGenericError(xmlGenericErrorContext,
6735 "%s(%d): ", ctxt->input->filename,
6737 xmlGenericError(xmlGenericErrorContext,
6738 "Leaving IGNORE Conditional Section\n");
6742 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6743 xmlHaltParser(ctxt);
6751 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6753 if (ctxt->input->id != id) {
6754 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6755 "All markup of the conditional section is not in"
6756 " the same entity\n");
6758 if ((ctxt-> instate != XML_PARSER_EOF) &&
6759 ((ctxt->input->cur + 3) <= ctxt->input->end))
6765 * xmlParseMarkupDecl:
6766 * @ctxt: an XML parser context
6768 * parse Markup declarations
6770 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6771 * NotationDecl | PI | Comment
6773 * [ VC: Proper Declaration/PE Nesting ]
6774 * Parameter-entity replacement text must be properly nested with
6775 * markup declarations. That is to say, if either the first character
6776 * or the last character of a markup declaration (markupdecl above) is
6777 * contained in the replacement text for a parameter-entity reference,
6778 * both must be contained in the same replacement text.
6780 * [ WFC: PEs in Internal Subset ]
6781 * In the internal DTD subset, parameter-entity references can occur
6782 * only where markup declarations can occur, not within markup declarations.
6783 * (This does not apply to references that occur in external parameter
6784 * entities or to the external subset.)
6787 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6790 if (NXT(1) == '!') {
6794 xmlParseElementDecl(ctxt);
6795 else if (NXT(3) == 'N')
6796 xmlParseEntityDecl(ctxt);
6799 xmlParseAttributeListDecl(ctxt);
6802 xmlParseNotationDecl(ctxt);
6805 xmlParseComment(ctxt);
6808 /* there is an error but it will be detected later */
6811 } else if (NXT(1) == '?') {
6817 * detect requirement to exit there and act accordingly
6818 * and avoid having instate overriden later on
6820 if (ctxt->instate == XML_PARSER_EOF)
6824 * Conditional sections are allowed from entities included
6825 * by PE References in the internal subset.
6827 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6828 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6829 xmlParseConditionalSections(ctxt);
6833 ctxt->instate = XML_PARSER_DTD;
6838 * @ctxt: an XML parser context
6840 * parse an XML declaration header for external entities
6842 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6846 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6848 const xmlChar *encoding;
6851 * We know that '<?xml' is here.
6853 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6856 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6860 if (SKIP_BLANKS == 0) {
6861 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6862 "Space needed after '<?xml'\n");
6866 * We may have the VersionInfo here.
6868 version = xmlParseVersionInfo(ctxt);
6869 if (version == NULL)
6870 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6872 if (SKIP_BLANKS == 0) {
6873 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6874 "Space needed here\n");
6877 ctxt->input->version = version;
6880 * We must have the encoding declaration
6882 encoding = xmlParseEncodingDecl(ctxt);
6883 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6885 * The XML REC instructs us to stop parsing right here
6889 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6890 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6891 "Missing encoding in text declaration\n");
6895 if ((RAW == '?') && (NXT(1) == '>')) {
6897 } else if (RAW == '>') {
6898 /* Deprecated old WD ... */
6899 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6902 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6903 MOVETO_ENDTAG(CUR_PTR);
6909 * xmlParseExternalSubset:
6910 * @ctxt: an XML parser context
6911 * @ExternalID: the external identifier
6912 * @SystemID: the system identifier (or URL)
6914 * parse Markup declarations from an external subset
6916 * [30] extSubset ::= textDecl? extSubsetDecl
6918 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6921 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6922 const xmlChar *SystemID) {
6923 xmlDetectSAX2(ctxt);
6926 if ((ctxt->encoding == NULL) &&
6927 (ctxt->input->end - ctxt->input->cur >= 4)) {
6929 xmlCharEncoding enc;
6935 enc = xmlDetectCharEncoding(start, 4);
6936 if (enc != XML_CHAR_ENCODING_NONE)
6937 xmlSwitchEncoding(ctxt, enc);
6940 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6941 xmlParseTextDecl(ctxt);
6942 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6944 * The XML REC instructs us to stop parsing right here
6946 xmlHaltParser(ctxt);
6950 if (ctxt->myDoc == NULL) {
6951 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6952 if (ctxt->myDoc == NULL) {
6953 xmlErrMemory(ctxt, "New Doc failed");
6956 ctxt->myDoc->properties = XML_DOC_INTERNAL;
6958 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6959 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6961 ctxt->instate = XML_PARSER_DTD;
6964 while (((RAW == '<') && (NXT(1) == '?')) ||
6965 ((RAW == '<') && (NXT(1) == '!')) ||
6967 const xmlChar *check = CUR_PTR;
6968 unsigned int cons = ctxt->input->consumed;
6971 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6972 xmlParseConditionalSections(ctxt);
6974 xmlParseMarkupDecl(ctxt);
6977 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6978 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6984 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6990 * xmlParseReference:
6991 * @ctxt: an XML parser context
6993 * parse and handle entity references in content, depending on the SAX
6994 * interface, this may end-up in a call to character() if this is a
6995 * CharRef, a predefined entity, if there is no reference() callback.
6996 * or if the parser was asked to switch to that mode.
6998 * [67] Reference ::= EntityRef | CharRef
7001 xmlParseReference(xmlParserCtxtPtr ctxt) {
7005 xmlNodePtr list = NULL;
7006 xmlParserErrors ret = XML_ERR_OK;
7013 * Simple case of a CharRef
7015 if (NXT(1) == '#') {
7019 int value = xmlParseCharRef(ctxt);
7023 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7025 * So we are using non-UTF-8 buffers
7026 * Check that the char fit on 8bits, if not
7027 * generate a CharRef.
7029 if (value <= 0xFF) {
7032 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7033 (!ctxt->disableSAX))
7034 ctxt->sax->characters(ctxt->userData, out, 1);
7036 if ((hex == 'x') || (hex == 'X'))
7037 snprintf((char *)out, sizeof(out), "#x%X", value);
7039 snprintf((char *)out, sizeof(out), "#%d", value);
7040 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7041 (!ctxt->disableSAX))
7042 ctxt->sax->reference(ctxt->userData, out);
7046 * Just encode the value in UTF-8
7048 COPY_BUF(0 ,out, i, value);
7050 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7051 (!ctxt->disableSAX))
7052 ctxt->sax->characters(ctxt->userData, out, i);
7058 * We are seeing an entity reference
7060 ent = xmlParseEntityRef(ctxt);
7061 if (ent == NULL) return;
7062 if (!ctxt->wellFormed)
7064 was_checked = ent->checked;
7066 /* special case of predefined entities */
7067 if ((ent->name == NULL) ||
7068 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7070 if (val == NULL) return;
7072 * inline the entity.
7074 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7075 (!ctxt->disableSAX))
7076 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7081 * The first reference to the entity trigger a parsing phase
7082 * where the ent->children is filled with the result from
7084 * Note: external parsed entities will not be loaded, it is not
7085 * required for a non-validating parser, unless the parsing option
7086 * of validating, or substituting entities were given. Doing so is
7087 * far more secure as the parser will only process data coming from
7088 * the document entity by default.
7090 if (((ent->checked == 0) ||
7091 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7092 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7093 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7094 unsigned long oldnbent = ctxt->nbentities;
7097 * This is a bit hackish but this seems the best
7098 * way to make sure both SAX and DOM entity support
7102 if (ctxt->userData == ctxt)
7105 user_data = ctxt->userData;
7108 * Check that this entity is well formed
7109 * 4.3.2: An internal general parsed entity is well-formed
7110 * if its replacement text matches the production labeled
7113 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7115 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7119 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7121 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7122 user_data, ctxt->depth, ent->URI,
7123 ent->ExternalID, &list);
7126 ret = XML_ERR_ENTITY_PE_INTERNAL;
7127 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7128 "invalid entity type found\n", NULL);
7132 * Store the number of entities needing parsing for this entity
7133 * content and do checkings
7135 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7136 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7138 if (ret == XML_ERR_ENTITY_LOOP) {
7139 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7140 xmlFreeNodeList(list);
7143 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7144 xmlFreeNodeList(list);
7148 if ((ret == XML_ERR_OK) && (list != NULL)) {
7149 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7150 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7151 (ent->children == NULL)) {
7152 ent->children = list;
7153 if (ctxt->replaceEntities) {
7155 * Prune it directly in the generated document
7156 * except for single text nodes.
7158 if (((list->type == XML_TEXT_NODE) &&
7159 (list->next == NULL)) ||
7160 (ctxt->parseMode == XML_PARSE_READER)) {
7161 list->parent = (xmlNodePtr) ent;
7166 while (list != NULL) {
7167 list->parent = (xmlNodePtr) ctxt->node;
7168 list->doc = ctxt->myDoc;
7169 if (list->next == NULL)
7173 list = ent->children;
7174 #ifdef LIBXML_LEGACY_ENABLED
7175 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7176 xmlAddEntityReference(ent, list, NULL);
7177 #endif /* LIBXML_LEGACY_ENABLED */
7181 while (list != NULL) {
7182 list->parent = (xmlNodePtr) ent;
7183 xmlSetTreeDoc(list, ent->doc);
7184 if (list->next == NULL)
7190 xmlFreeNodeList(list);
7193 } else if ((ret != XML_ERR_OK) &&
7194 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7195 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7196 "Entity '%s' failed to parse\n", ent->name);
7197 xmlParserEntityCheck(ctxt, 0, ent, 0);
7198 } else if (list != NULL) {
7199 xmlFreeNodeList(list);
7202 if (ent->checked == 0)
7205 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7207 } else if (ent->checked != 1) {
7208 ctxt->nbentities += ent->checked / 2;
7212 * Now that the entity content has been gathered
7213 * provide it to the application, this can take different forms based
7214 * on the parsing modes.
7216 if (ent->children == NULL) {
7218 * Probably running in SAX mode and the callbacks don't
7219 * build the entity content. So unless we already went
7220 * though parsing for first checking go though the entity
7221 * content to generate callbacks associated to the entity
7223 if (was_checked != 0) {
7226 * This is a bit hackish but this seems the best
7227 * way to make sure both SAX and DOM entity support
7230 if (ctxt->userData == ctxt)
7233 user_data = ctxt->userData;
7235 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7237 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7238 ent->content, user_data, NULL);
7240 } else if (ent->etype ==
7241 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7243 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7244 ctxt->sax, user_data, ctxt->depth,
7245 ent->URI, ent->ExternalID, NULL);
7248 ret = XML_ERR_ENTITY_PE_INTERNAL;
7249 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7250 "invalid entity type found\n", NULL);
7252 if (ret == XML_ERR_ENTITY_LOOP) {
7253 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7257 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7258 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7260 * Entity reference callback comes second, it's somewhat
7261 * superfluous but a compatibility to historical behaviour
7263 ctxt->sax->reference(ctxt->userData, ent->name);
7269 * If we didn't get any children for the entity being built
7271 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7272 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7276 ctxt->sax->reference(ctxt->userData, ent->name);
7280 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7282 * There is a problem on the handling of _private for entities
7283 * (bug 155816): Should we copy the content of the field from
7284 * the entity (possibly overwriting some value set by the user
7285 * when a copy is created), should we leave it alone, or should
7286 * we try to take care of different situations? The problem
7287 * is exacerbated by the usage of this field by the xmlReader.
7288 * To fix this bug, we look at _private on the created node
7289 * and, if it's NULL, we copy in whatever was in the entity.
7290 * If it's not NULL we leave it alone. This is somewhat of a
7291 * hack - maybe we should have further tests to determine
7294 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7296 * Seems we are generating the DOM content, do
7297 * a simple tree copy for all references except the first
7298 * In the first occurrence list contains the replacement.
7300 if (((list == NULL) && (ent->owner == 0)) ||
7301 (ctxt->parseMode == XML_PARSE_READER)) {
7302 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7305 * We are copying here, make sure there is no abuse
7307 ctxt->sizeentcopy += ent->length + 5;
7308 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7312 * when operating on a reader, the entities definitions
7313 * are always owning the entities subtree.
7314 if (ctxt->parseMode == XML_PARSE_READER)
7318 cur = ent->children;
7319 while (cur != NULL) {
7320 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7322 if (nw->_private == NULL)
7323 nw->_private = cur->_private;
7324 if (firstChild == NULL){
7327 nw = xmlAddChild(ctxt->node, nw);
7329 if (cur == ent->last) {
7331 * needed to detect some strange empty
7332 * node cases in the reader tests
7334 if ((ctxt->parseMode == XML_PARSE_READER) &&
7336 (nw->type == XML_ELEMENT_NODE) &&
7337 (nw->children == NULL))
7344 #ifdef LIBXML_LEGACY_ENABLED
7345 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7346 xmlAddEntityReference(ent, firstChild, nw);
7347 #endif /* LIBXML_LEGACY_ENABLED */
7348 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7349 xmlNodePtr nw = NULL, cur, next, last,
7353 * We are copying here, make sure there is no abuse
7355 ctxt->sizeentcopy += ent->length + 5;
7356 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7360 * Copy the entity child list and make it the new
7361 * entity child list. The goal is to make sure any
7362 * ID or REF referenced will be the one from the
7363 * document content and not the entity copy.
7365 cur = ent->children;
7366 ent->children = NULL;
7369 while (cur != NULL) {
7373 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7375 if (nw->_private == NULL)
7376 nw->_private = cur->_private;
7377 if (firstChild == NULL){
7380 xmlAddChild((xmlNodePtr) ent, nw);
7381 xmlAddChild(ctxt->node, cur);
7387 if (ent->owner == 0)
7389 #ifdef LIBXML_LEGACY_ENABLED
7390 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7391 xmlAddEntityReference(ent, firstChild, nw);
7392 #endif /* LIBXML_LEGACY_ENABLED */
7394 const xmlChar *nbktext;
7397 * the name change is to avoid coalescing of the
7398 * node with a possible previous text one which
7399 * would make ent->children a dangling pointer
7401 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7403 if (ent->children->type == XML_TEXT_NODE)
7404 ent->children->name = nbktext;
7405 if ((ent->last != ent->children) &&
7406 (ent->last->type == XML_TEXT_NODE))
7407 ent->last->name = nbktext;
7408 xmlAddChildList(ctxt->node, ent->children);
7412 * This is to avoid a nasty side effect, see
7413 * characters() in SAX.c
7423 * xmlParseEntityRef:
7424 * @ctxt: an XML parser context
7426 * parse ENTITY references declarations
7428 * [68] EntityRef ::= '&' Name ';'
7430 * [ WFC: Entity Declared ]
7431 * In a document without any DTD, a document with only an internal DTD
7432 * subset which contains no parameter entity references, or a document
7433 * with "standalone='yes'", the Name given in the entity reference
7434 * must match that in an entity declaration, except that well-formed
7435 * documents need not declare any of the following entities: amp, lt,
7436 * gt, apos, quot. The declaration of a parameter entity must precede
7437 * any reference to it. Similarly, the declaration of a general entity
7438 * must precede any reference to it which appears in a default value in an
7439 * attribute-list declaration. Note that if entities are declared in the
7440 * external subset or in external parameter entities, a non-validating
7441 * processor is not obligated to read and process their declarations;
7442 * for such documents, the rule that an entity must be declared is a
7443 * well-formedness constraint only if standalone='yes'.
7445 * [ WFC: Parsed Entity ]
7446 * An entity reference must not contain the name of an unparsed entity
7448 * Returns the xmlEntityPtr if found, or NULL otherwise.
7451 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7452 const xmlChar *name;
7453 xmlEntityPtr ent = NULL;
7456 if (ctxt->instate == XML_PARSER_EOF)
7462 name = xmlParseName(ctxt);
7464 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7465 "xmlParseEntityRef: no name\n");
7469 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7475 * Predefined entities override any extra definition
7477 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7478 ent = xmlGetPredefinedEntity(name);
7484 * Increase the number of entity references parsed
7489 * Ask first SAX for entity resolution, otherwise try the
7490 * entities which may have stored in the parser context.
7492 if (ctxt->sax != NULL) {
7493 if (ctxt->sax->getEntity != NULL)
7494 ent = ctxt->sax->getEntity(ctxt->userData, name);
7495 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7496 (ctxt->options & XML_PARSE_OLDSAX))
7497 ent = xmlGetPredefinedEntity(name);
7498 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7499 (ctxt->userData==ctxt)) {
7500 ent = xmlSAX2GetEntity(ctxt, name);
7503 if (ctxt->instate == XML_PARSER_EOF)
7506 * [ WFC: Entity Declared ]
7507 * In a document without any DTD, a document with only an
7508 * internal DTD subset which contains no parameter entity
7509 * references, or a document with "standalone='yes'", the
7510 * Name given in the entity reference must match that in an
7511 * entity declaration, except that well-formed documents
7512 * need not declare any of the following entities: amp, lt,
7514 * The declaration of a parameter entity must precede any
7516 * Similarly, the declaration of a general entity must
7517 * precede any reference to it which appears in a default
7518 * value in an attribute-list declaration. Note that if
7519 * entities are declared in the external subset or in
7520 * external parameter entities, a non-validating processor
7521 * is not obligated to read and process their declarations;
7522 * for such documents, the rule that an entity must be
7523 * declared is a well-formedness constraint only if
7527 if ((ctxt->standalone == 1) ||
7528 ((ctxt->hasExternalSubset == 0) &&
7529 (ctxt->hasPErefs == 0))) {
7530 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7531 "Entity '%s' not defined\n", name);
7533 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7534 "Entity '%s' not defined\n", name);
7535 if ((ctxt->inSubset == 0) &&
7536 (ctxt->sax != NULL) &&
7537 (ctxt->sax->reference != NULL)) {
7538 ctxt->sax->reference(ctxt->userData, name);
7541 xmlParserEntityCheck(ctxt, 0, ent, 0);
7546 * [ WFC: Parsed Entity ]
7547 * An entity reference must not contain the name of an
7550 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7551 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7552 "Entity reference to unparsed entity %s\n", name);
7556 * [ WFC: No External Entity References ]
7557 * Attribute values cannot contain direct or indirect
7558 * entity references to external entities.
7560 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7561 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7562 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7563 "Attribute references external entity '%s'\n", name);
7566 * [ WFC: No < in Attribute Values ]
7567 * The replacement text of any entity referred to directly or
7568 * indirectly in an attribute value (other than "<") must
7571 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7573 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7574 if (((ent->checked & 1) || (ent->checked == 0)) &&
7575 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7576 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7577 "'<' in entity '%s' is not allowed in attributes values\n", name);
7582 * Internal check, no parameter entities here ...
7585 switch (ent->etype) {
7586 case XML_INTERNAL_PARAMETER_ENTITY:
7587 case XML_EXTERNAL_PARAMETER_ENTITY:
7588 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7589 "Attempt to reference the parameter entity '%s'\n",
7598 * [ WFC: No Recursion ]
7599 * A parsed entity must not contain a recursive reference
7600 * to itself, either directly or indirectly.
7601 * Done somewhere else
7607 * xmlParseStringEntityRef:
7608 * @ctxt: an XML parser context
7609 * @str: a pointer to an index in the string
7611 * parse ENTITY references declarations, but this version parses it from
7614 * [68] EntityRef ::= '&' Name ';'
7616 * [ WFC: Entity Declared ]
7617 * In a document without any DTD, a document with only an internal DTD
7618 * subset which contains no parameter entity references, or a document
7619 * with "standalone='yes'", the Name given in the entity reference
7620 * must match that in an entity declaration, except that well-formed
7621 * documents need not declare any of the following entities: amp, lt,
7622 * gt, apos, quot. The declaration of a parameter entity must precede
7623 * any reference to it. Similarly, the declaration of a general entity
7624 * must precede any reference to it which appears in a default value in an
7625 * attribute-list declaration. Note that if entities are declared in the
7626 * external subset or in external parameter entities, a non-validating
7627 * processor is not obligated to read and process their declarations;
7628 * for such documents, the rule that an entity must be declared is a
7629 * well-formedness constraint only if standalone='yes'.
7631 * [ WFC: Parsed Entity ]
7632 * An entity reference must not contain the name of an unparsed entity
7634 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7635 * is updated to the current location in the string.
7638 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7642 xmlEntityPtr ent = NULL;
7644 if ((str == NULL) || (*str == NULL))
7652 name = xmlParseStringName(ctxt, &ptr);
7654 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7655 "xmlParseStringEntityRef: no name\n");
7660 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7669 * Predefined entities override any extra definition
7671 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7672 ent = xmlGetPredefinedEntity(name);
7681 * Increate the number of entity references parsed
7686 * Ask first SAX for entity resolution, otherwise try the
7687 * entities which may have stored in the parser context.
7689 if (ctxt->sax != NULL) {
7690 if (ctxt->sax->getEntity != NULL)
7691 ent = ctxt->sax->getEntity(ctxt->userData, name);
7692 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7693 ent = xmlGetPredefinedEntity(name);
7694 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7695 ent = xmlSAX2GetEntity(ctxt, name);
7698 if (ctxt->instate == XML_PARSER_EOF) {
7704 * [ WFC: Entity Declared ]
7705 * In a document without any DTD, a document with only an
7706 * internal DTD subset which contains no parameter entity
7707 * references, or a document with "standalone='yes'", the
7708 * Name given in the entity reference must match that in an
7709 * entity declaration, except that well-formed documents
7710 * need not declare any of the following entities: amp, lt,
7712 * The declaration of a parameter entity must precede any
7714 * Similarly, the declaration of a general entity must
7715 * precede any reference to it which appears in a default
7716 * value in an attribute-list declaration. Note that if
7717 * entities are declared in the external subset or in
7718 * external parameter entities, a non-validating processor
7719 * is not obligated to read and process their declarations;
7720 * for such documents, the rule that an entity must be
7721 * declared is a well-formedness constraint only if
7725 if ((ctxt->standalone == 1) ||
7726 ((ctxt->hasExternalSubset == 0) &&
7727 (ctxt->hasPErefs == 0))) {
7728 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7729 "Entity '%s' not defined\n", name);
7731 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7732 "Entity '%s' not defined\n",
7735 xmlParserEntityCheck(ctxt, 0, ent, 0);
7736 /* TODO ? check regressions ctxt->valid = 0; */
7740 * [ WFC: Parsed Entity ]
7741 * An entity reference must not contain the name of an
7744 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7745 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7746 "Entity reference to unparsed entity %s\n", name);
7750 * [ WFC: No External Entity References ]
7751 * Attribute values cannot contain direct or indirect
7752 * entity references to external entities.
7754 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7755 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7756 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7757 "Attribute references external entity '%s'\n", name);
7760 * [ WFC: No < in Attribute Values ]
7761 * The replacement text of any entity referred to directly or
7762 * indirectly in an attribute value (other than "<") must
7765 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7766 (ent != NULL) && (ent->content != NULL) &&
7767 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7768 (xmlStrchr(ent->content, '<'))) {
7769 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7770 "'<' in entity '%s' is not allowed in attributes values\n",
7775 * Internal check, no parameter entities here ...
7778 switch (ent->etype) {
7779 case XML_INTERNAL_PARAMETER_ENTITY:
7780 case XML_EXTERNAL_PARAMETER_ENTITY:
7781 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7782 "Attempt to reference the parameter entity '%s'\n",
7791 * [ WFC: No Recursion ]
7792 * A parsed entity must not contain a recursive reference
7793 * to itself, either directly or indirectly.
7794 * Done somewhere else
7803 * xmlParsePEReference:
7804 * @ctxt: an XML parser context
7806 * parse PEReference declarations
7807 * The entity content is handled directly by pushing it's content as
7808 * a new input stream.
7810 * [69] PEReference ::= '%' Name ';'
7812 * [ WFC: No Recursion ]
7813 * A parsed entity must not contain a recursive
7814 * reference to itself, either directly or indirectly.
7816 * [ WFC: Entity Declared ]
7817 * In a document without any DTD, a document with only an internal DTD
7818 * subset which contains no parameter entity references, or a document
7819 * with "standalone='yes'", ... ... The declaration of a parameter
7820 * entity must precede any reference to it...
7822 * [ VC: Entity Declared ]
7823 * In a document with an external subset or external parameter entities
7824 * with "standalone='no'", ... ... The declaration of a parameter entity
7825 * must precede any reference to it...
7828 * Parameter-entity references may only appear in the DTD.
7829 * NOTE: misleading but this is handled.
7832 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7834 const xmlChar *name;
7835 xmlEntityPtr entity = NULL;
7836 xmlParserInputPtr input;
7841 name = xmlParseName(ctxt);
7843 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7846 if (xmlParserDebugEntities)
7847 xmlGenericError(xmlGenericErrorContext,
7848 "PEReference: %s\n", name);
7850 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7857 * Increate the number of entity references parsed
7862 * Request the entity from SAX
7864 if ((ctxt->sax != NULL) &&
7865 (ctxt->sax->getParameterEntity != NULL))
7866 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7867 if (ctxt->instate == XML_PARSER_EOF)
7869 if (entity == NULL) {
7871 * [ WFC: Entity Declared ]
7872 * In a document without any DTD, a document with only an
7873 * internal DTD subset which contains no parameter entity
7874 * references, or a document with "standalone='yes'", ...
7875 * ... The declaration of a parameter entity must precede
7876 * any reference to it...
7878 if ((ctxt->standalone == 1) ||
7879 ((ctxt->hasExternalSubset == 0) &&
7880 (ctxt->hasPErefs == 0))) {
7881 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7882 "PEReference: %%%s; not found\n",
7886 * [ VC: Entity Declared ]
7887 * In a document with an external subset or external
7888 * parameter entities with "standalone='no'", ...
7889 * ... The declaration of a parameter entity must
7890 * precede any reference to it...
7892 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7893 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7894 "PEReference: %%%s; not found\n",
7897 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7898 "PEReference: %%%s; not found\n",
7902 xmlParserEntityCheck(ctxt, 0, NULL, 0);
7905 * Internal checking in case the entity quest barfed
7907 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7908 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7909 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7910 "Internal: %%%s; is not a parameter entity\n",
7914 xmlCharEncoding enc;
7916 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7917 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7918 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7919 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7920 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7921 (ctxt->replaceEntities == 0) &&
7922 (ctxt->validate == 0))
7925 input = xmlNewEntityInputStream(ctxt, entity);
7926 if (xmlPushInput(ctxt, input) < 0) {
7927 xmlFreeInputStream(input);
7931 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7933 * Get the 4 first bytes and decode the charset
7934 * if enc != XML_CHAR_ENCODING_NONE
7935 * plug some encoding conversion routines.
7936 * Note that, since we may have some non-UTF8
7937 * encoding (like UTF16, bug 135229), the 'length'
7938 * is not known, but we can calculate based upon
7939 * the amount of data in the buffer.
7942 if (ctxt->instate == XML_PARSER_EOF)
7944 if ((ctxt->input->end - ctxt->input->cur)>=4) {
7949 enc = xmlDetectCharEncoding(start, 4);
7950 if (enc != XML_CHAR_ENCODING_NONE) {
7951 xmlSwitchEncoding(ctxt, enc);
7955 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7956 (IS_BLANK_CH(NXT(5)))) {
7957 xmlParseTextDecl(ctxt);
7962 ctxt->hasPErefs = 1;
7966 * xmlLoadEntityContent:
7967 * @ctxt: an XML parser context
7968 * @entity: an unloaded system entity
7970 * Load the original content of the given system entity from the
7971 * ExternalID/SystemID given. This is to be used for Included in Literal
7972 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7974 * Returns 0 in case of success and -1 in case of failure
7977 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7978 xmlParserInputPtr input;
7983 if ((ctxt == NULL) || (entity == NULL) ||
7984 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7985 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7986 (entity->content != NULL)) {
7987 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7988 "xmlLoadEntityContent parameter error");
7992 if (xmlParserDebugEntities)
7993 xmlGenericError(xmlGenericErrorContext,
7994 "Reading %s entity content input\n", entity->name);
7996 buf = xmlBufferCreate();
7998 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7999 "xmlLoadEntityContent parameter error");
8003 input = xmlNewEntityInputStream(ctxt, entity);
8004 if (input == NULL) {
8005 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8006 "xmlLoadEntityContent input error");
8012 * Push the entity as the current input, read char by char
8013 * saving to the buffer until the end of the entity or an error
8015 if (xmlPushInput(ctxt, input) < 0) {
8022 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8024 xmlBufferAdd(buf, ctxt->input->cur, l);
8025 if (count++ > XML_PARSER_CHUNK_SIZE) {
8028 if (ctxt->instate == XML_PARSER_EOF) {
8038 if (ctxt->instate == XML_PARSER_EOF) {
8046 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8048 } else if (!IS_CHAR(c)) {
8049 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8050 "xmlLoadEntityContent: invalid char value %d\n",
8055 entity->content = buf->content;
8056 buf->content = NULL;
8063 * xmlParseStringPEReference:
8064 * @ctxt: an XML parser context
8065 * @str: a pointer to an index in the string
8067 * parse PEReference declarations
8069 * [69] PEReference ::= '%' Name ';'
8071 * [ WFC: No Recursion ]
8072 * A parsed entity must not contain a recursive
8073 * reference to itself, either directly or indirectly.
8075 * [ WFC: Entity Declared ]
8076 * In a document without any DTD, a document with only an internal DTD
8077 * subset which contains no parameter entity references, or a document
8078 * with "standalone='yes'", ... ... The declaration of a parameter
8079 * entity must precede any reference to it...
8081 * [ VC: Entity Declared ]
8082 * In a document with an external subset or external parameter entities
8083 * with "standalone='no'", ... ... The declaration of a parameter entity
8084 * must precede any reference to it...
8087 * Parameter-entity references may only appear in the DTD.
8088 * NOTE: misleading but this is handled.
8090 * Returns the string of the entity content.
8091 * str is updated to the current value of the index
8094 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8098 xmlEntityPtr entity = NULL;
8100 if ((str == NULL) || (*str == NULL)) return(NULL);
8106 name = xmlParseStringName(ctxt, &ptr);
8108 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8109 "xmlParseStringPEReference: no name\n");
8115 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8123 * Increate the number of entity references parsed
8128 * Request the entity from SAX
8130 if ((ctxt->sax != NULL) &&
8131 (ctxt->sax->getParameterEntity != NULL))
8132 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8133 if (ctxt->instate == XML_PARSER_EOF) {
8138 if (entity == NULL) {
8140 * [ WFC: Entity Declared ]
8141 * In a document without any DTD, a document with only an
8142 * internal DTD subset which contains no parameter entity
8143 * references, or a document with "standalone='yes'", ...
8144 * ... The declaration of a parameter entity must precede
8145 * any reference to it...
8147 if ((ctxt->standalone == 1) ||
8148 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8149 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8150 "PEReference: %%%s; not found\n", name);
8153 * [ VC: Entity Declared ]
8154 * In a document with an external subset or external
8155 * parameter entities with "standalone='no'", ...
8156 * ... The declaration of a parameter entity must
8157 * precede any reference to it...
8159 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8160 "PEReference: %%%s; not found\n",
8164 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8167 * Internal checking in case the entity quest barfed
8169 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8170 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8171 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8172 "%%%s; is not a parameter entity\n",
8176 ctxt->hasPErefs = 1;
8183 * xmlParseDocTypeDecl:
8184 * @ctxt: an XML parser context
8186 * parse a DOCTYPE declaration
8188 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8189 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8191 * [ VC: Root Element Type ]
8192 * The Name in the document type declaration must match the element
8193 * type of the root element.
8197 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8198 const xmlChar *name = NULL;
8199 xmlChar *ExternalID = NULL;
8200 xmlChar *URI = NULL;
8203 * We know that '<!DOCTYPE' has been detected.
8210 * Parse the DOCTYPE name.
8212 name = xmlParseName(ctxt);
8214 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8215 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8217 ctxt->intSubName = name;
8222 * Check for SystemID and ExternalID
8224 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8226 if ((URI != NULL) || (ExternalID != NULL)) {
8227 ctxt->hasExternalSubset = 1;
8229 ctxt->extSubURI = URI;
8230 ctxt->extSubSystem = ExternalID;
8235 * Create and update the internal subset.
8237 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8238 (!ctxt->disableSAX))
8239 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8240 if (ctxt->instate == XML_PARSER_EOF)
8244 * Is there any internal subset declarations ?
8245 * they are handled separately in xmlParseInternalSubset()
8251 * We should be at the end of the DOCTYPE declaration.
8254 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8260 * xmlParseInternalSubset:
8261 * @ctxt: an XML parser context
8263 * parse the internal subset declaration
8265 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8269 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8271 * Is there any DTD definition ?
8274 int baseInputNr = ctxt->inputNr;
8275 ctxt->instate = XML_PARSER_DTD;
8278 * Parse the succession of Markup declarations and
8280 * Subsequence (markupdecl | PEReference | S)*
8282 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8283 (ctxt->instate != XML_PARSER_EOF)) {
8284 const xmlChar *check = CUR_PTR;
8285 unsigned int cons = ctxt->input->consumed;
8288 xmlParseMarkupDecl(ctxt);
8289 xmlParsePEReference(ctxt);
8291 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8292 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8293 "xmlParseInternalSubset: error detected in Markup declaration\n");
8294 if (ctxt->inputNr > baseInputNr)
8307 * We should be at the end of the DOCTYPE declaration.
8310 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8316 #ifdef LIBXML_SAX1_ENABLED
8318 * xmlParseAttribute:
8319 * @ctxt: an XML parser context
8320 * @value: a xmlChar ** used to store the value of the attribute
8322 * parse an attribute
8324 * [41] Attribute ::= Name Eq AttValue
8326 * [ WFC: No External Entity References ]
8327 * Attribute values cannot contain direct or indirect entity references
8328 * to external entities.
8330 * [ WFC: No < in Attribute Values ]
8331 * The replacement text of any entity referred to directly or indirectly in
8332 * an attribute value (other than "<") must not contain a <.
8334 * [ VC: Attribute Value Type ]
8335 * The attribute must have been declared; the value must be of the type
8338 * [25] Eq ::= S? '=' S?
8342 * [NS 11] Attribute ::= QName Eq AttValue
8344 * Also the case QName == xmlns:??? is handled independently as a namespace
8347 * Returns the attribute name, and the value in *value.
8351 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8352 const xmlChar *name;
8357 name = xmlParseName(ctxt);
8359 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8360 "error parsing attribute name\n");
8371 val = xmlParseAttValue(ctxt);
8372 ctxt->instate = XML_PARSER_CONTENT;
8374 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8375 "Specification mandates value for attribute %s\n", name);
8380 * Check that xml:lang conforms to the specification
8381 * No more registered as an error, just generate a warning now
8382 * since this was deprecated in XML second edition
8384 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8385 if (!xmlCheckLanguageID(val)) {
8386 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8387 "Malformed value for xml:lang : %s\n",
8393 * Check that xml:space conforms to the specification
8395 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8396 if (xmlStrEqual(val, BAD_CAST "default"))
8398 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8401 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8402 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8413 * @ctxt: an XML parser context
8415 * parse a start of tag either for rule element or
8416 * EmptyElement. In both case we don't parse the tag closing chars.
8418 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8420 * [ WFC: Unique Att Spec ]
8421 * No attribute name may appear more than once in the same start-tag or
8422 * empty-element tag.
8424 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8426 * [ WFC: Unique Att Spec ]
8427 * No attribute name may appear more than once in the same start-tag or
8428 * empty-element tag.
8432 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8434 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8436 * Returns the element name parsed
8440 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8441 const xmlChar *name;
8442 const xmlChar *attname;
8444 const xmlChar **atts = ctxt->atts;
8446 int maxatts = ctxt->maxatts;
8449 if (RAW != '<') return(NULL);
8452 name = xmlParseName(ctxt);
8454 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8455 "xmlParseStartTag: invalid element name\n");
8460 * Now parse the attributes, it ends up with the ending
8467 while (((RAW != '>') &&
8468 ((RAW != '/') || (NXT(1) != '>')) &&
8469 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8470 const xmlChar *q = CUR_PTR;
8471 unsigned int cons = ctxt->input->consumed;
8473 attname = xmlParseAttribute(ctxt, &attvalue);
8474 if ((attname != NULL) && (attvalue != NULL)) {
8476 * [ WFC: Unique Att Spec ]
8477 * No attribute name may appear more than once in the same
8478 * start-tag or empty-element tag.
8480 for (i = 0; i < nbatts;i += 2) {
8481 if (xmlStrEqual(atts[i], attname)) {
8482 xmlErrAttributeDup(ctxt, NULL, attname);
8488 * Add the pair to atts
8491 maxatts = 22; /* allow for 10 attrs by default */
8492 atts = (const xmlChar **)
8493 xmlMalloc(maxatts * sizeof(xmlChar *));
8495 xmlErrMemory(ctxt, NULL);
8496 if (attvalue != NULL)
8501 ctxt->maxatts = maxatts;
8502 } else if (nbatts + 4 > maxatts) {
8506 n = (const xmlChar **) xmlRealloc((void *) atts,
8507 maxatts * sizeof(const xmlChar *));
8509 xmlErrMemory(ctxt, NULL);
8510 if (attvalue != NULL)
8516 ctxt->maxatts = maxatts;
8518 atts[nbatts++] = attname;
8519 atts[nbatts++] = attvalue;
8520 atts[nbatts] = NULL;
8521 atts[nbatts + 1] = NULL;
8523 if (attvalue != NULL)
8530 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8532 if (SKIP_BLANKS == 0) {
8533 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8534 "attributes construct error\n");
8536 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8537 (attname == NULL) && (attvalue == NULL)) {
8538 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8539 "xmlParseStartTag: problem parsing attributes\n");
8547 * SAX: Start of Element !
8549 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8550 (!ctxt->disableSAX)) {
8552 ctxt->sax->startElement(ctxt->userData, name, atts);
8554 ctxt->sax->startElement(ctxt->userData, name, NULL);
8558 /* Free only the content strings */
8559 for (i = 1;i < nbatts;i+=2)
8560 if (atts[i] != NULL)
8561 xmlFree((xmlChar *) atts[i]);
8568 * @ctxt: an XML parser context
8569 * @line: line of the start tag
8570 * @nsNr: number of namespaces on the start tag
8572 * parse an end of tag
8574 * [42] ETag ::= '</' Name S? '>'
8578 * [NS 9] ETag ::= '</' QName S? '>'
8582 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8583 const xmlChar *name;
8586 if ((RAW != '<') || (NXT(1) != '/')) {
8587 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8588 "xmlParseEndTag: '</' not found\n");
8593 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8596 * We should definitely be at the ending "S? '>'" part
8600 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8601 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8606 * [ WFC: Element Type Match ]
8607 * The Name in an element's end-tag must match the element type in the
8611 if (name != (xmlChar*)1) {
8612 if (name == NULL) name = BAD_CAST "unparseable";
8613 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8614 "Opening and ending tag mismatch: %s line %d and %s\n",
8615 ctxt->name, line, name);
8621 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8622 (!ctxt->disableSAX))
8623 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8632 * @ctxt: an XML parser context
8634 * parse an end of tag
8636 * [42] ETag ::= '</' Name S? '>'
8640 * [NS 9] ETag ::= '</' QName S? '>'
8644 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8645 xmlParseEndTag1(ctxt, 0);
8647 #endif /* LIBXML_SAX1_ENABLED */
8649 /************************************************************************
8651 * SAX 2 specific operations *
8653 ************************************************************************/
8657 * @ctxt: an XML parser context
8658 * @prefix: the prefix to lookup
8660 * Lookup the namespace name for the @prefix (which ca be NULL)
8661 * The prefix must come from the @ctxt->dict dictionary
8663 * Returns the namespace name or NULL if not bound
8665 static const xmlChar *
8666 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8669 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8670 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8671 if (ctxt->nsTab[i] == prefix) {
8672 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8674 return(ctxt->nsTab[i + 1]);
8681 * @ctxt: an XML parser context
8682 * @prefix: pointer to store the prefix part
8684 * parse an XML Namespace QName
8686 * [6] QName ::= (Prefix ':')? LocalPart
8687 * [7] Prefix ::= NCName
8688 * [8] LocalPart ::= NCName
8690 * Returns the Name parsed or NULL
8693 static const xmlChar *
8694 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8695 const xmlChar *l, *p;
8699 l = xmlParseNCName(ctxt);
8702 l = xmlParseName(ctxt);
8704 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8705 "Failed to parse QName '%s'\n", l, NULL, NULL);
8715 l = xmlParseNCName(ctxt);
8719 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8720 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8721 l = xmlParseNmtoken(ctxt);
8723 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8725 tmp = xmlBuildQName(l, p, NULL, 0);
8728 p = xmlDictLookup(ctxt->dict, tmp, -1);
8729 if (tmp != NULL) xmlFree(tmp);
8736 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8737 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8739 tmp = (xmlChar *) xmlParseName(ctxt);
8741 tmp = xmlBuildQName(tmp, l, NULL, 0);
8742 l = xmlDictLookup(ctxt->dict, tmp, -1);
8743 if (tmp != NULL) xmlFree(tmp);
8747 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8748 l = xmlDictLookup(ctxt->dict, tmp, -1);
8749 if (tmp != NULL) xmlFree(tmp);
8760 * xmlParseQNameAndCompare:
8761 * @ctxt: an XML parser context
8762 * @name: the localname
8763 * @prefix: the prefix, if any.
8765 * parse an XML name and compares for match
8766 * (specialized for endtag parsing)
8768 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8769 * and the name for mismatch
8772 static const xmlChar *
8773 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8774 xmlChar const *prefix) {
8778 const xmlChar *prefix2;
8780 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8783 in = ctxt->input->cur;
8786 while (*in != 0 && *in == *cmp) {
8790 if ((*cmp == 0) && (*in == ':')) {
8793 while (*in != 0 && *in == *cmp) {
8797 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8799 ctxt->input->cur = in;
8800 return((const xmlChar*) 1);
8804 * all strings coms from the dictionary, equality can be done directly
8806 ret = xmlParseQName (ctxt, &prefix2);
8807 if ((ret == name) && (prefix == prefix2))
8808 return((const xmlChar*) 1);
8813 * xmlParseAttValueInternal:
8814 * @ctxt: an XML parser context
8815 * @len: attribute len result
8816 * @alloc: whether the attribute was reallocated as a new string
8817 * @normalize: if 1 then further non-CDATA normalization must be done
8819 * parse a value for an attribute.
8820 * NOTE: if no normalization is needed, the routine will return pointers
8821 * directly from the data buffer.
8823 * 3.3.3 Attribute-Value Normalization:
8824 * Before the value of an attribute is passed to the application or
8825 * checked for validity, the XML processor must normalize it as follows:
8826 * - a character reference is processed by appending the referenced
8827 * character to the attribute value
8828 * - an entity reference is processed by recursively processing the
8829 * replacement text of the entity
8830 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8831 * appending #x20 to the normalized value, except that only a single
8832 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8833 * parsed entity or the literal entity value of an internal parsed entity
8834 * - other characters are processed by appending them to the normalized value
8835 * If the declared value is not CDATA, then the XML processor must further
8836 * process the normalized attribute value by discarding any leading and
8837 * trailing space (#x20) characters, and by replacing sequences of space
8838 * (#x20) characters by a single space (#x20) character.
8839 * All attributes for which no declaration has been read should be treated
8840 * by a non-validating parser as if declared CDATA.
8842 * Returns the AttValue parsed or NULL. The value has to be freed by the
8843 * caller if it was copied, this can be detected by val[*len] == 0.
8847 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8851 const xmlChar *in = NULL, *start, *end, *last;
8852 xmlChar *ret = NULL;
8856 in = (xmlChar *) CUR_PTR;
8857 line = ctxt->input->line;
8858 col = ctxt->input->col;
8859 if (*in != '"' && *in != '\'') {
8860 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8863 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8866 * try to handle in this routine the most common case where no
8867 * allocation of a new string is required and where content is
8872 end = ctxt->input->end;
8875 const xmlChar *oldbase = ctxt->input->base;
8877 if (oldbase != ctxt->input->base) {
8878 long delta = ctxt->input->base - oldbase;
8879 start = start + delta;
8882 end = ctxt->input->end;
8886 * Skip any leading spaces
8888 while ((in < end) && (*in != limit) &&
8889 ((*in == 0x20) || (*in == 0x9) ||
8890 (*in == 0xA) || (*in == 0xD))) {
8899 const xmlChar *oldbase = ctxt->input->base;
8901 if (ctxt->instate == XML_PARSER_EOF)
8903 if (oldbase != ctxt->input->base) {
8904 long delta = ctxt->input->base - oldbase;
8905 start = start + delta;
8908 end = ctxt->input->end;
8909 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8910 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8911 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8912 "AttValue length too long\n");
8917 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8918 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8920 if ((*in++ == 0x20) && (*in == 0x20)) break;
8922 const xmlChar *oldbase = ctxt->input->base;
8924 if (ctxt->instate == XML_PARSER_EOF)
8926 if (oldbase != ctxt->input->base) {
8927 long delta = ctxt->input->base - oldbase;
8928 start = start + delta;
8931 end = ctxt->input->end;
8932 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8933 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8934 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8935 "AttValue length too long\n");
8942 * skip the trailing blanks
8944 while ((last[-1] == 0x20) && (last > start)) last--;
8945 while ((in < end) && (*in != limit) &&
8946 ((*in == 0x20) || (*in == 0x9) ||
8947 (*in == 0xA) || (*in == 0xD))) {
8955 const xmlChar *oldbase = ctxt->input->base;
8957 if (ctxt->instate == XML_PARSER_EOF)
8959 if (oldbase != ctxt->input->base) {
8960 long delta = ctxt->input->base - oldbase;
8961 start = start + delta;
8963 last = last + delta;
8965 end = ctxt->input->end;
8966 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8967 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8968 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8969 "AttValue length too long\n");
8974 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8975 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8976 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8977 "AttValue length too long\n");
8980 if (*in != limit) goto need_complex;
8982 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8983 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8987 const xmlChar *oldbase = ctxt->input->base;
8989 if (ctxt->instate == XML_PARSER_EOF)
8991 if (oldbase != ctxt->input->base) {
8992 long delta = ctxt->input->base - oldbase;
8993 start = start + delta;
8996 end = ctxt->input->end;
8997 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8998 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8999 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9000 "AttValue length too long\n");
9006 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9007 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9008 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9009 "AttValue length too long\n");
9012 if (*in != limit) goto need_complex;
9017 *len = last - start;
9018 ret = (xmlChar *) start;
9020 if (alloc) *alloc = 1;
9021 ret = xmlStrndup(start, last - start);
9024 ctxt->input->line = line;
9025 ctxt->input->col = col;
9026 if (alloc) *alloc = 0;
9029 if (alloc) *alloc = 1;
9030 return xmlParseAttValueComplex(ctxt, len, normalize);
9034 * xmlParseAttribute2:
9035 * @ctxt: an XML parser context
9036 * @pref: the element prefix
9037 * @elem: the element name
9038 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9039 * @value: a xmlChar ** used to store the value of the attribute
9040 * @len: an int * to save the length of the attribute
9041 * @alloc: an int * to indicate if the attribute was allocated
9043 * parse an attribute in the new SAX2 framework.
9045 * Returns the attribute name, and the value in *value, .
9048 static const xmlChar *
9049 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9050 const xmlChar * pref, const xmlChar * elem,
9051 const xmlChar ** prefix, xmlChar ** value,
9052 int *len, int *alloc)
9054 const xmlChar *name;
9055 xmlChar *val, *internal_val = NULL;
9060 name = xmlParseQName(ctxt, prefix);
9062 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9063 "error parsing attribute name\n");
9068 * get the type if needed
9070 if (ctxt->attsSpecial != NULL) {
9073 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9074 pref, elem, *prefix, name);
9086 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9089 * Sometimes a second normalisation pass for spaces is needed
9090 * but that only happens if charrefs or entities refernces
9091 * have been used in the attribute value, i.e. the attribute
9092 * value have been extracted in an allocated string already.
9095 const xmlChar *val2;
9097 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9098 if ((val2 != NULL) && (val2 != val)) {
9100 val = (xmlChar *) val2;
9104 ctxt->instate = XML_PARSER_CONTENT;
9106 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9107 "Specification mandates value for attribute %s\n",
9112 if (*prefix == ctxt->str_xml) {
9114 * Check that xml:lang conforms to the specification
9115 * No more registered as an error, just generate a warning now
9116 * since this was deprecated in XML second edition
9118 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9119 internal_val = xmlStrndup(val, *len);
9120 if (!xmlCheckLanguageID(internal_val)) {
9121 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9122 "Malformed value for xml:lang : %s\n",
9123 internal_val, NULL);
9128 * Check that xml:space conforms to the specification
9130 if (xmlStrEqual(name, BAD_CAST "space")) {
9131 internal_val = xmlStrndup(val, *len);
9132 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9134 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9137 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9138 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9139 internal_val, NULL);
9143 xmlFree(internal_val);
9151 * xmlParseStartTag2:
9152 * @ctxt: an XML parser context
9154 * parse a start of tag either for rule element or
9155 * EmptyElement. In both case we don't parse the tag closing chars.
9156 * This routine is called when running SAX2 parsing
9158 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9160 * [ WFC: Unique Att Spec ]
9161 * No attribute name may appear more than once in the same start-tag or
9162 * empty-element tag.
9164 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9166 * [ WFC: Unique Att Spec ]
9167 * No attribute name may appear more than once in the same start-tag or
9168 * empty-element tag.
9172 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9174 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9176 * Returns the element name parsed
9179 static const xmlChar *
9180 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9181 const xmlChar **URI, int *tlen) {
9182 const xmlChar *localname;
9183 const xmlChar *prefix;
9184 const xmlChar *attname;
9185 const xmlChar *aprefix;
9186 const xmlChar *nsname;
9188 const xmlChar **atts = ctxt->atts;
9189 int maxatts = ctxt->maxatts;
9190 int nratts, nbatts, nbdef, inputid;
9191 int i, j, nbNs, attval;
9193 int nsNr = ctxt->nsNr;
9195 if (RAW != '<') return(NULL);
9199 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9200 * point since the attribute values may be stored as pointers to
9201 * the buffer and calling SHRINK would destroy them !
9202 * The Shrinking is only possible once the full set of attribute
9203 * callbacks have been done.
9206 cur = ctxt->input->cur - ctxt->input->base;
9207 inputid = ctxt->input->id;
9213 /* Forget any namespaces added during an earlier parse of this element. */
9216 localname = xmlParseQName(ctxt, &prefix);
9217 if (localname == NULL) {
9218 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9219 "StartTag: invalid element name\n");
9222 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9225 * Now parse the attributes, it ends up with the ending
9232 while (((RAW != '>') &&
9233 ((RAW != '/') || (NXT(1) != '>')) &&
9234 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9235 const xmlChar *q = CUR_PTR;
9236 unsigned int cons = ctxt->input->consumed;
9237 int len = -1, alloc = 0;
9239 attname = xmlParseAttribute2(ctxt, prefix, localname,
9240 &aprefix, &attvalue, &len, &alloc);
9241 if ((attname == NULL) || (attvalue == NULL))
9243 if (len < 0) len = xmlStrlen(attvalue);
9245 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9246 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9250 xmlErrMemory(ctxt, "dictionary allocation failure");
9251 if ((attvalue != NULL) && (alloc != 0))
9256 uri = xmlParseURI((const char *) URL);
9258 xmlNsErr(ctxt, XML_WAR_NS_URI,
9259 "xmlns: '%s' is not a valid URI\n",
9262 if (uri->scheme == NULL) {
9263 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9264 "xmlns: URI %s is not absolute\n",
9269 if (URL == ctxt->str_xml_ns) {
9270 if (attname != ctxt->str_xml) {
9271 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9272 "xml namespace URI cannot be the default namespace\n",
9279 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9280 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9281 "reuse of the xmlns namespace name is forbidden\n",
9287 * check that it's not a defined namespace
9289 for (j = 1;j <= nbNs;j++)
9290 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9293 xmlErrAttributeDup(ctxt, NULL, attname);
9295 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9297 } else if (aprefix == ctxt->str_xmlns) {
9298 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9301 if (attname == ctxt->str_xml) {
9302 if (URL != ctxt->str_xml_ns) {
9303 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9304 "xml namespace prefix mapped to wrong URI\n",
9308 * Do not keep a namespace definition node
9312 if (URL == ctxt->str_xml_ns) {
9313 if (attname != ctxt->str_xml) {
9314 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9315 "xml namespace URI mapped to wrong prefix\n",
9320 if (attname == ctxt->str_xmlns) {
9321 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9322 "redefinition of the xmlns prefix is forbidden\n",
9328 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9329 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9330 "reuse of the xmlns namespace name is forbidden\n",
9334 if ((URL == NULL) || (URL[0] == 0)) {
9335 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9336 "xmlns:%s: Empty XML namespace is not allowed\n",
9337 attname, NULL, NULL);
9340 uri = xmlParseURI((const char *) URL);
9342 xmlNsErr(ctxt, XML_WAR_NS_URI,
9343 "xmlns:%s: '%s' is not a valid URI\n",
9344 attname, URL, NULL);
9346 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9347 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9348 "xmlns:%s: URI %s is not absolute\n",
9349 attname, URL, NULL);
9356 * check that it's not a defined namespace
9358 for (j = 1;j <= nbNs;j++)
9359 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9362 xmlErrAttributeDup(ctxt, aprefix, attname);
9364 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9368 * Add the pair to atts
9370 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9371 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9374 maxatts = ctxt->maxatts;
9377 ctxt->attallocs[nratts++] = alloc;
9378 atts[nbatts++] = attname;
9379 atts[nbatts++] = aprefix;
9381 * The namespace URI field is used temporarily to point at the
9382 * base of the current input buffer for non-alloced attributes.
9383 * When the input buffer is reallocated, all the pointers become
9384 * invalid, but they can be reconstructed later.
9387 atts[nbatts++] = NULL;
9389 atts[nbatts++] = ctxt->input->base;
9390 atts[nbatts++] = attvalue;
9392 atts[nbatts++] = attvalue;
9394 * tag if some deallocation is needed
9396 if (alloc != 0) attval = 1;
9397 attvalue = NULL; /* moved into atts */
9401 if ((attvalue != NULL) && (alloc != 0)) {
9407 if (ctxt->instate == XML_PARSER_EOF)
9409 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9411 if (SKIP_BLANKS == 0) {
9412 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9413 "attributes construct error\n");
9416 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9417 (attname == NULL) && (attvalue == NULL)) {
9418 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9419 "xmlParseStartTag: problem parsing attributes\n");
9425 if (ctxt->input->id != inputid) {
9426 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9427 "Unexpected change of input\n");
9432 /* Reconstruct attribute value pointers. */
9433 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9434 if (atts[i+2] != NULL) {
9436 * Arithmetic on dangling pointers is technically undefined
9437 * behavior, but well...
9439 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9440 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9441 atts[i+3] += offset; /* value */
9442 atts[i+4] += offset; /* valuend */
9447 * The attributes defaulting
9449 if (ctxt->attsDefault != NULL) {
9450 xmlDefAttrsPtr defaults;
9452 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9453 if (defaults != NULL) {
9454 for (i = 0;i < defaults->nbAttrs;i++) {
9455 attname = defaults->values[5 * i];
9456 aprefix = defaults->values[5 * i + 1];
9459 * special work for namespaces defaulted defs
9461 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9463 * check that it's not a defined namespace
9465 for (j = 1;j <= nbNs;j++)
9466 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9468 if (j <= nbNs) continue;
9470 nsname = xmlGetNamespace(ctxt, NULL);
9471 if (nsname != defaults->values[5 * i + 2]) {
9472 if (nsPush(ctxt, NULL,
9473 defaults->values[5 * i + 2]) > 0)
9476 } else if (aprefix == ctxt->str_xmlns) {
9478 * check that it's not a defined namespace
9480 for (j = 1;j <= nbNs;j++)
9481 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9483 if (j <= nbNs) continue;
9485 nsname = xmlGetNamespace(ctxt, attname);
9486 if (nsname != defaults->values[2]) {
9487 if (nsPush(ctxt, attname,
9488 defaults->values[5 * i + 2]) > 0)
9493 * check that it's not a defined attribute
9495 for (j = 0;j < nbatts;j+=5) {
9496 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9499 if (j < nbatts) continue;
9501 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9502 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9505 maxatts = ctxt->maxatts;
9508 atts[nbatts++] = attname;
9509 atts[nbatts++] = aprefix;
9510 if (aprefix == NULL)
9511 atts[nbatts++] = NULL;
9513 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9514 atts[nbatts++] = defaults->values[5 * i + 2];
9515 atts[nbatts++] = defaults->values[5 * i + 3];
9516 if ((ctxt->standalone == 1) &&
9517 (defaults->values[5 * i + 4] != NULL)) {
9518 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9519 "standalone: attribute %s on %s defaulted from external subset\n",
9520 attname, localname);
9529 * The attributes checkings
9531 for (i = 0; i < nbatts;i += 5) {
9533 * The default namespace does not apply to attribute names.
9535 if (atts[i + 1] != NULL) {
9536 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9537 if (nsname == NULL) {
9538 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9539 "Namespace prefix %s for %s on %s is not defined\n",
9540 atts[i + 1], atts[i], localname);
9542 atts[i + 2] = nsname;
9546 * [ WFC: Unique Att Spec ]
9547 * No attribute name may appear more than once in the same
9548 * start-tag or empty-element tag.
9549 * As extended by the Namespace in XML REC.
9551 for (j = 0; j < i;j += 5) {
9552 if (atts[i] == atts[j]) {
9553 if (atts[i+1] == atts[j+1]) {
9554 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9557 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9558 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9559 "Namespaced Attribute %s in '%s' redefined\n",
9560 atts[i], nsname, NULL);
9567 nsname = xmlGetNamespace(ctxt, prefix);
9568 if ((prefix != NULL) && (nsname == NULL)) {
9569 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9570 "Namespace prefix %s on %s is not defined\n",
9571 prefix, localname, NULL);
9577 * SAX: Start of Element !
9579 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9580 (!ctxt->disableSAX)) {
9582 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9583 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9584 nbatts / 5, nbdef, atts);
9586 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9587 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9592 * Free up attribute allocated strings if needed
9595 for (i = 3,j = 0; j < nratts;i += 5,j++)
9596 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9597 xmlFree((xmlChar *) atts[i]);
9605 * @ctxt: an XML parser context
9606 * @line: line of the start tag
9607 * @nsNr: number of namespaces on the start tag
9609 * parse an end of tag
9611 * [42] ETag ::= '</' Name S? '>'
9615 * [NS 9] ETag ::= '</' QName S? '>'
9619 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9620 const xmlChar *URI, int line, int nsNr, int tlen) {
9621 const xmlChar *name;
9625 if ((RAW != '<') || (NXT(1) != '/')) {
9626 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9631 curLength = ctxt->input->end - ctxt->input->cur;
9632 if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9633 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9634 if ((curLength >= (size_t)(tlen + 1)) &&
9635 (ctxt->input->cur[tlen] == '>')) {
9636 ctxt->input->cur += tlen + 1;
9637 ctxt->input->col += tlen + 1;
9640 ctxt->input->cur += tlen;
9641 ctxt->input->col += tlen;
9645 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9647 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9651 * We should definitely be at the ending "S? '>'" part
9654 if (ctxt->instate == XML_PARSER_EOF)
9657 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9658 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9663 * [ WFC: Element Type Match ]
9664 * The Name in an element's end-tag must match the element type in the
9668 if (name != (xmlChar*)1) {
9669 if (name == NULL) name = BAD_CAST "unparseable";
9670 if ((line == 0) && (ctxt->node != NULL))
9671 line = ctxt->node->line;
9672 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9673 "Opening and ending tag mismatch: %s line %d and %s\n",
9674 ctxt->name, line, name);
9681 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9682 (!ctxt->disableSAX))
9683 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9693 * @ctxt: an XML parser context
9695 * Parse escaped pure raw content.
9697 * [18] CDSect ::= CDStart CData CDEnd
9699 * [19] CDStart ::= '<![CDATA['
9701 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9703 * [21] CDEnd ::= ']]>'
9706 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9707 xmlChar *buf = NULL;
9709 int size = XML_PARSER_BUFFER_SIZE;
9715 /* Check 2.6.0 was NXT(0) not RAW */
9716 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9721 ctxt->instate = XML_PARSER_CDATA_SECTION;
9724 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9725 ctxt->instate = XML_PARSER_CONTENT;
9731 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9732 ctxt->instate = XML_PARSER_CONTENT;
9737 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9739 xmlErrMemory(ctxt, NULL);
9742 while (IS_CHAR(cur) &&
9743 ((r != ']') || (s != ']') || (cur != '>'))) {
9744 if (len + 5 >= size) {
9747 if ((size > XML_MAX_TEXT_LENGTH) &&
9748 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9749 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9750 "CData section too big found", NULL);
9754 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9757 xmlErrMemory(ctxt, NULL);
9763 COPY_BUF(rl,buf,len,r);
9771 if (ctxt->instate == XML_PARSER_EOF) {
9781 ctxt->instate = XML_PARSER_CONTENT;
9783 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9784 "CData section not finished\n%.50s\n", buf);
9791 * OK the buffer is to be consumed as cdata.
9793 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9794 if (ctxt->sax->cdataBlock != NULL)
9795 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9796 else if (ctxt->sax->characters != NULL)
9797 ctxt->sax->characters(ctxt->userData, buf, len);
9804 * @ctxt: an XML parser context
9808 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9812 xmlParseContent(xmlParserCtxtPtr ctxt) {
9814 while ((RAW != 0) &&
9815 ((RAW != '<') || (NXT(1) != '/')) &&
9816 (ctxt->instate != XML_PARSER_EOF)) {
9817 const xmlChar *test = CUR_PTR;
9818 unsigned int cons = ctxt->input->consumed;
9819 const xmlChar *cur = ctxt->input->cur;
9822 * First case : a Processing Instruction.
9824 if ((*cur == '<') && (cur[1] == '?')) {
9829 * Second case : a CDSection
9831 /* 2.6.0 test was *cur not RAW */
9832 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9833 xmlParseCDSect(ctxt);
9837 * Third case : a comment
9839 else if ((*cur == '<') && (NXT(1) == '!') &&
9840 (NXT(2) == '-') && (NXT(3) == '-')) {
9841 xmlParseComment(ctxt);
9842 ctxt->instate = XML_PARSER_CONTENT;
9846 * Fourth case : a sub-element.
9848 else if (*cur == '<') {
9849 xmlParseElement(ctxt);
9853 * Fifth case : a reference. If if has not been resolved,
9854 * parsing returns it's Name, create the node
9857 else if (*cur == '&') {
9858 xmlParseReference(ctxt);
9862 * Last case, text. Note that References are handled directly.
9865 xmlParseCharData(ctxt, 0);
9871 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9872 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9873 "detected an error in element content\n");
9874 xmlHaltParser(ctxt);
9882 * @ctxt: an XML parser context
9884 * parse an XML element, this is highly recursive
9886 * [39] element ::= EmptyElemTag | STag content ETag
9888 * [ WFC: Element Type Match ]
9889 * The Name in an element's end-tag must match the element type in the
9895 xmlParseElement(xmlParserCtxtPtr ctxt) {
9896 const xmlChar *name;
9897 const xmlChar *prefix = NULL;
9898 const xmlChar *URI = NULL;
9899 xmlParserNodeInfo node_info;
9902 int nsNr = ctxt->nsNr;
9904 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9905 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9906 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9907 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9909 xmlHaltParser(ctxt);
9913 /* Capture start position */
9914 if (ctxt->record_info) {
9915 node_info.begin_pos = ctxt->input->consumed +
9916 (CUR_PTR - ctxt->input->base);
9917 node_info.begin_line = ctxt->input->line;
9920 if (ctxt->spaceNr == 0)
9921 spacePush(ctxt, -1);
9922 else if (*ctxt->space == -2)
9923 spacePush(ctxt, -1);
9925 spacePush(ctxt, *ctxt->space);
9927 line = ctxt->input->line;
9928 #ifdef LIBXML_SAX1_ENABLED
9930 #endif /* LIBXML_SAX1_ENABLED */
9931 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9932 #ifdef LIBXML_SAX1_ENABLED
9934 name = xmlParseStartTag(ctxt);
9935 #endif /* LIBXML_SAX1_ENABLED */
9936 if (ctxt->instate == XML_PARSER_EOF)
9942 namePush(ctxt, name);
9945 #ifdef LIBXML_VALID_ENABLED
9947 * [ VC: Root Element Type ]
9948 * The Name in the document type declaration must match the element
9949 * type of the root element.
9951 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9952 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9953 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9954 #endif /* LIBXML_VALID_ENABLED */
9957 * Check for an Empty Element.
9959 if ((RAW == '/') && (NXT(1) == '>')) {
9962 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9963 (!ctxt->disableSAX))
9964 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9965 #ifdef LIBXML_SAX1_ENABLED
9967 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9968 (!ctxt->disableSAX))
9969 ctxt->sax->endElement(ctxt->userData, name);
9970 #endif /* LIBXML_SAX1_ENABLED */
9974 if (nsNr != ctxt->nsNr)
9975 nsPop(ctxt, ctxt->nsNr - nsNr);
9976 if ( ret != NULL && ctxt->record_info ) {
9977 node_info.end_pos = ctxt->input->consumed +
9978 (CUR_PTR - ctxt->input->base);
9979 node_info.end_line = ctxt->input->line;
9980 node_info.node = ret;
9981 xmlParserAddNodeInfo(ctxt, &node_info);
9988 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9989 "Couldn't find end of Start Tag %s line %d\n",
9993 * end of parsing of this node.
9998 if (nsNr != ctxt->nsNr)
9999 nsPop(ctxt, ctxt->nsNr - nsNr);
10002 * Capture end position and add node
10004 if ( ret != NULL && ctxt->record_info ) {
10005 node_info.end_pos = ctxt->input->consumed +
10006 (CUR_PTR - ctxt->input->base);
10007 node_info.end_line = ctxt->input->line;
10008 node_info.node = ret;
10009 xmlParserAddNodeInfo(ctxt, &node_info);
10015 * Parse the content of the element:
10017 xmlParseContent(ctxt);
10018 if (ctxt->instate == XML_PARSER_EOF)
10020 if (!IS_BYTE_CHAR(RAW)) {
10021 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10022 "Premature end of data in tag %s line %d\n",
10026 * end of parsing of this node.
10031 if (nsNr != ctxt->nsNr)
10032 nsPop(ctxt, ctxt->nsNr - nsNr);
10037 * parse the end of tag: '</' should be here.
10040 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10043 #ifdef LIBXML_SAX1_ENABLED
10045 xmlParseEndTag1(ctxt, line);
10046 #endif /* LIBXML_SAX1_ENABLED */
10049 * Capture end position and add node
10051 if ( ret != NULL && ctxt->record_info ) {
10052 node_info.end_pos = ctxt->input->consumed +
10053 (CUR_PTR - ctxt->input->base);
10054 node_info.end_line = ctxt->input->line;
10055 node_info.node = ret;
10056 xmlParserAddNodeInfo(ctxt, &node_info);
10061 * xmlParseVersionNum:
10062 * @ctxt: an XML parser context
10064 * parse the XML version value.
10066 * [26] VersionNum ::= '1.' [0-9]+
10068 * In practice allow [0-9].[0-9]+ at that level
10070 * Returns the string giving the XML version number, or NULL
10073 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10074 xmlChar *buf = NULL;
10079 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10081 xmlErrMemory(ctxt, NULL);
10085 if (!((cur >= '0') && (cur <= '9'))) {
10099 while ((cur >= '0') && (cur <= '9')) {
10100 if (len + 1 >= size) {
10104 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10107 xmlErrMemory(ctxt, NULL);
10121 * xmlParseVersionInfo:
10122 * @ctxt: an XML parser context
10124 * parse the XML version.
10126 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10128 * [25] Eq ::= S? '=' S?
10130 * Returns the version string, e.g. "1.0"
10134 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10135 xmlChar *version = NULL;
10137 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10141 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10148 version = xmlParseVersionNum(ctxt);
10150 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10153 } else if (RAW == '\''){
10155 version = xmlParseVersionNum(ctxt);
10157 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10161 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10169 * @ctxt: an XML parser context
10171 * parse the XML encoding name
10173 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10175 * Returns the encoding name value or NULL
10178 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10179 xmlChar *buf = NULL;
10185 if (((cur >= 'a') && (cur <= 'z')) ||
10186 ((cur >= 'A') && (cur <= 'Z'))) {
10187 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10189 xmlErrMemory(ctxt, NULL);
10196 while (((cur >= 'a') && (cur <= 'z')) ||
10197 ((cur >= 'A') && (cur <= 'Z')) ||
10198 ((cur >= '0') && (cur <= '9')) ||
10199 (cur == '.') || (cur == '_') ||
10201 if (len + 1 >= size) {
10205 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10207 xmlErrMemory(ctxt, NULL);
10224 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10230 * xmlParseEncodingDecl:
10231 * @ctxt: an XML parser context
10233 * parse the XML encoding declaration
10235 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10237 * this setups the conversion filters.
10239 * Returns the encoding value or NULL
10243 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10244 xmlChar *encoding = NULL;
10247 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10251 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10258 encoding = xmlParseEncName(ctxt);
10260 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10261 xmlFree((xmlChar *) encoding);
10265 } else if (RAW == '\''){
10267 encoding = xmlParseEncName(ctxt);
10269 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10270 xmlFree((xmlChar *) encoding);
10275 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10279 * Non standard parsing, allowing the user to ignore encoding
10281 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10282 xmlFree((xmlChar *) encoding);
10287 * UTF-16 encoding stwich has already taken place at this stage,
10288 * more over the little-endian/big-endian selection is already done
10290 if ((encoding != NULL) &&
10291 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10292 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10294 * If no encoding was passed to the parser, that we are
10295 * using UTF-16 and no decoder is present i.e. the
10296 * document is apparently UTF-8 compatible, then raise an
10297 * encoding mismatch fatal error
10299 if ((ctxt->encoding == NULL) &&
10300 (ctxt->input->buf != NULL) &&
10301 (ctxt->input->buf->encoder == NULL)) {
10302 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10303 "Document labelled UTF-16 but has UTF-8 content\n");
10305 if (ctxt->encoding != NULL)
10306 xmlFree((xmlChar *) ctxt->encoding);
10307 ctxt->encoding = encoding;
10310 * UTF-8 encoding is handled natively
10312 else if ((encoding != NULL) &&
10313 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10314 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10315 if (ctxt->encoding != NULL)
10316 xmlFree((xmlChar *) ctxt->encoding);
10317 ctxt->encoding = encoding;
10319 else if (encoding != NULL) {
10320 xmlCharEncodingHandlerPtr handler;
10322 if (ctxt->input->encoding != NULL)
10323 xmlFree((xmlChar *) ctxt->input->encoding);
10324 ctxt->input->encoding = encoding;
10326 handler = xmlFindCharEncodingHandler((const char *) encoding);
10327 if (handler != NULL) {
10328 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10329 /* failed to convert */
10330 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10334 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10335 "Unsupported encoding %s\n", encoding);
10345 * @ctxt: an XML parser context
10347 * parse the XML standalone declaration
10349 * [32] SDDecl ::= S 'standalone' Eq
10350 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10352 * [ VC: Standalone Document Declaration ]
10353 * TODO The standalone document declaration must have the value "no"
10354 * if any external markup declarations contain declarations of:
10355 * - attributes with default values, if elements to which these
10356 * attributes apply appear in the document without specifications
10357 * of values for these attributes, or
10358 * - entities (other than amp, lt, gt, apos, quot), if references
10359 * to those entities appear in the document, or
10360 * - attributes with values subject to normalization, where the
10361 * attribute appears in the document with a value which will change
10362 * as a result of normalization, or
10363 * - element types with element content, if white space occurs directly
10364 * within any instance of those types.
10367 * 1 if standalone="yes"
10368 * 0 if standalone="no"
10369 * -2 if standalone attribute is missing or invalid
10370 * (A standalone value of -2 means that the XML declaration was found,
10371 * but no value was specified for the standalone attribute).
10375 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10376 int standalone = -2;
10379 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10383 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10384 return(standalone);
10390 if ((RAW == 'n') && (NXT(1) == 'o')) {
10393 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10398 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10401 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10404 } else if (RAW == '"'){
10406 if ((RAW == 'n') && (NXT(1) == 'o')) {
10409 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10414 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10417 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10421 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10424 return(standalone);
10429 * @ctxt: an XML parser context
10431 * parse an XML declaration header
10433 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10437 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10441 * This value for standalone indicates that the document has an
10442 * XML declaration but it does not have a standalone attribute.
10443 * It will be overwritten later if a standalone attribute is found.
10445 ctxt->input->standalone = -2;
10448 * We know that '<?xml' is here.
10452 if (!IS_BLANK_CH(RAW)) {
10453 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10454 "Blank needed after '<?xml'\n");
10459 * We must have the VersionInfo here.
10461 version = xmlParseVersionInfo(ctxt);
10462 if (version == NULL) {
10463 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10465 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10467 * Changed here for XML-1.0 5th edition
10469 if (ctxt->options & XML_PARSE_OLD10) {
10470 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10471 "Unsupported version '%s'\n",
10474 if ((version[0] == '1') && ((version[1] == '.'))) {
10475 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10476 "Unsupported version '%s'\n",
10479 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10480 "Unsupported version '%s'\n",
10485 if (ctxt->version != NULL)
10486 xmlFree((void *) ctxt->version);
10487 ctxt->version = version;
10491 * We may have the encoding declaration
10493 if (!IS_BLANK_CH(RAW)) {
10494 if ((RAW == '?') && (NXT(1) == '>')) {
10498 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10500 xmlParseEncodingDecl(ctxt);
10501 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10502 (ctxt->instate == XML_PARSER_EOF)) {
10504 * The XML REC instructs us to stop parsing right here
10510 * We may have the standalone status.
10512 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10513 if ((RAW == '?') && (NXT(1) == '>')) {
10517 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10521 * We can grow the input buffer freely at that point
10526 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10529 if ((RAW == '?') && (NXT(1) == '>')) {
10531 } else if (RAW == '>') {
10532 /* Deprecated old WD ... */
10533 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10536 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10537 MOVETO_ENDTAG(CUR_PTR);
10544 * @ctxt: an XML parser context
10546 * parse an XML Misc* optional field.
10548 * [27] Misc ::= Comment | PI | S
10552 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10553 while ((ctxt->instate != XML_PARSER_EOF) &&
10554 (((RAW == '<') && (NXT(1) == '?')) ||
10555 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10556 IS_BLANK_CH(CUR))) {
10557 if ((RAW == '<') && (NXT(1) == '?')) {
10559 } else if (IS_BLANK_CH(CUR)) {
10562 xmlParseComment(ctxt);
10567 * xmlParseDocument:
10568 * @ctxt: an XML parser context
10570 * parse an XML document (and build a tree if using the standard SAX
10573 * [1] document ::= prolog element Misc*
10575 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10577 * Returns 0, -1 in case of error. the parser context is augmented
10578 * as a result of the parsing.
10582 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10584 xmlCharEncoding enc;
10588 if ((ctxt == NULL) || (ctxt->input == NULL))
10594 * SAX: detecting the level.
10596 xmlDetectSAX2(ctxt);
10599 * SAX: beginning of the document processing.
10601 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10602 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10603 if (ctxt->instate == XML_PARSER_EOF)
10606 if ((ctxt->encoding == NULL) &&
10607 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10609 * Get the 4 first bytes and decode the charset
10610 * if enc != XML_CHAR_ENCODING_NONE
10611 * plug some encoding conversion routines.
10617 enc = xmlDetectCharEncoding(&start[0], 4);
10618 if (enc != XML_CHAR_ENCODING_NONE) {
10619 xmlSwitchEncoding(ctxt, enc);
10625 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10630 * Check for the XMLDecl in the Prolog.
10631 * do not GROW here to avoid the detected encoder to decode more
10632 * than just the first line, unless the amount of data is really
10633 * too small to hold "<?xml version="1.0" encoding="foo"
10635 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10638 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10641 * Note that we will switch encoding on the fly.
10643 xmlParseXMLDecl(ctxt);
10644 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10645 (ctxt->instate == XML_PARSER_EOF)) {
10647 * The XML REC instructs us to stop parsing right here
10651 ctxt->standalone = ctxt->input->standalone;
10654 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10656 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10657 ctxt->sax->startDocument(ctxt->userData);
10658 if (ctxt->instate == XML_PARSER_EOF)
10660 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10661 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10662 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10666 * The Misc part of the Prolog
10669 xmlParseMisc(ctxt);
10672 * Then possibly doc type declaration(s) and more Misc
10673 * (doctypedecl Misc*)?
10676 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10678 ctxt->inSubset = 1;
10679 xmlParseDocTypeDecl(ctxt);
10681 ctxt->instate = XML_PARSER_DTD;
10682 xmlParseInternalSubset(ctxt);
10683 if (ctxt->instate == XML_PARSER_EOF)
10688 * Create and update the external subset.
10690 ctxt->inSubset = 2;
10691 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10692 (!ctxt->disableSAX))
10693 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10694 ctxt->extSubSystem, ctxt->extSubURI);
10695 if (ctxt->instate == XML_PARSER_EOF)
10697 ctxt->inSubset = 0;
10699 xmlCleanSpecialAttr(ctxt);
10701 ctxt->instate = XML_PARSER_PROLOG;
10702 xmlParseMisc(ctxt);
10706 * Time to start parsing the tree itself
10710 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10711 "Start tag expected, '<' not found\n");
10713 ctxt->instate = XML_PARSER_CONTENT;
10714 xmlParseElement(ctxt);
10715 ctxt->instate = XML_PARSER_EPILOG;
10719 * The Misc part at the end
10721 xmlParseMisc(ctxt);
10724 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10726 ctxt->instate = XML_PARSER_EOF;
10730 * SAX: end of the document processing.
10732 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10733 ctxt->sax->endDocument(ctxt->userData);
10736 * Remove locally kept entity definitions if the tree was not built
10738 if ((ctxt->myDoc != NULL) &&
10739 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10740 xmlFreeDoc(ctxt->myDoc);
10741 ctxt->myDoc = NULL;
10744 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10745 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10747 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10748 if (ctxt->nsWellFormed)
10749 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10750 if (ctxt->options & XML_PARSE_OLD10)
10751 ctxt->myDoc->properties |= XML_DOC_OLD10;
10753 if (! ctxt->wellFormed) {
10761 * xmlParseExtParsedEnt:
10762 * @ctxt: an XML parser context
10764 * parse a general parsed entity
10765 * An external general parsed entity is well-formed if it matches the
10766 * production labeled extParsedEnt.
10768 * [78] extParsedEnt ::= TextDecl? content
10770 * Returns 0, -1 in case of error. the parser context is augmented
10771 * as a result of the parsing.
10775 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10777 xmlCharEncoding enc;
10779 if ((ctxt == NULL) || (ctxt->input == NULL))
10782 xmlDefaultSAXHandlerInit();
10784 xmlDetectSAX2(ctxt);
10789 * SAX: beginning of the document processing.
10791 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10792 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10795 * Get the 4 first bytes and decode the charset
10796 * if enc != XML_CHAR_ENCODING_NONE
10797 * plug some encoding conversion routines.
10799 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10804 enc = xmlDetectCharEncoding(start, 4);
10805 if (enc != XML_CHAR_ENCODING_NONE) {
10806 xmlSwitchEncoding(ctxt, enc);
10812 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10816 * Check for the XMLDecl in the Prolog.
10819 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10822 * Note that we will switch encoding on the fly.
10824 xmlParseXMLDecl(ctxt);
10825 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10827 * The XML REC instructs us to stop parsing right here
10833 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10835 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10836 ctxt->sax->startDocument(ctxt->userData);
10837 if (ctxt->instate == XML_PARSER_EOF)
10841 * Doing validity checking on chunk doesn't make sense
10843 ctxt->instate = XML_PARSER_CONTENT;
10844 ctxt->validate = 0;
10845 ctxt->loadsubset = 0;
10848 xmlParseContent(ctxt);
10849 if (ctxt->instate == XML_PARSER_EOF)
10852 if ((RAW == '<') && (NXT(1) == '/')) {
10853 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10854 } else if (RAW != 0) {
10855 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10859 * SAX: end of the document processing.
10861 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10862 ctxt->sax->endDocument(ctxt->userData);
10864 if (! ctxt->wellFormed) return(-1);
10868 #ifdef LIBXML_PUSH_ENABLED
10869 /************************************************************************
10871 * Progressive parsing interfaces *
10873 ************************************************************************/
10876 * xmlParseLookupSequence:
10877 * @ctxt: an XML parser context
10878 * @first: the first char to lookup
10879 * @next: the next char to lookup or zero
10880 * @third: the next char to lookup or zero
10882 * Try to find if a sequence (first, next, third) or just (first next) or
10883 * (first) is available in the input stream.
10884 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10885 * to avoid rescanning sequences of bytes, it DOES change the state of the
10886 * parser, do not use liberally.
10888 * Returns the index to the current parsing point if the full sequence
10889 * is available, -1 otherwise.
10892 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10893 xmlChar next, xmlChar third) {
10895 xmlParserInputPtr in;
10896 const xmlChar *buf;
10899 if (in == NULL) return(-1);
10900 base = in->cur - in->base;
10901 if (base < 0) return(-1);
10902 if (ctxt->checkIndex > base)
10903 base = ctxt->checkIndex;
10904 if (in->buf == NULL) {
10908 buf = xmlBufContent(in->buf->buffer);
10909 len = xmlBufUse(in->buf->buffer);
10911 /* take into account the sequence length */
10912 if (third) len -= 2;
10913 else if (next) len --;
10914 for (;base < len;base++) {
10915 if (buf[base] == first) {
10917 if ((buf[base + 1] != next) ||
10918 (buf[base + 2] != third)) continue;
10919 } else if (next != 0) {
10920 if (buf[base + 1] != next) continue;
10922 ctxt->checkIndex = 0;
10925 xmlGenericError(xmlGenericErrorContext,
10926 "PP: lookup '%c' found at %d\n",
10928 else if (third == 0)
10929 xmlGenericError(xmlGenericErrorContext,
10930 "PP: lookup '%c%c' found at %d\n",
10931 first, next, base);
10933 xmlGenericError(xmlGenericErrorContext,
10934 "PP: lookup '%c%c%c' found at %d\n",
10935 first, next, third, base);
10937 return(base - (in->cur - in->base));
10940 ctxt->checkIndex = base;
10943 xmlGenericError(xmlGenericErrorContext,
10944 "PP: lookup '%c' failed\n", first);
10945 else if (third == 0)
10946 xmlGenericError(xmlGenericErrorContext,
10947 "PP: lookup '%c%c' failed\n", first, next);
10949 xmlGenericError(xmlGenericErrorContext,
10950 "PP: lookup '%c%c%c' failed\n", first, next, third);
10956 * xmlParseGetLasts:
10957 * @ctxt: an XML parser context
10958 * @lastlt: pointer to store the last '<' from the input
10959 * @lastgt: pointer to store the last '>' from the input
10961 * Lookup the last < and > in the current chunk
10964 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10965 const xmlChar **lastgt) {
10966 const xmlChar *tmp;
10968 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10969 xmlGenericError(xmlGenericErrorContext,
10970 "Internal error: xmlParseGetLasts\n");
10973 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10974 tmp = ctxt->input->end;
10976 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10977 if (tmp < ctxt->input->base) {
10983 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10984 if (*tmp == '\'') {
10986 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10987 if (tmp < ctxt->input->end) tmp++;
10988 } else if (*tmp == '"') {
10990 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10991 if (tmp < ctxt->input->end) tmp++;
10995 if (tmp < ctxt->input->end)
11000 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11001 if (tmp >= ctxt->input->base)
11013 * xmlCheckCdataPush:
11014 * @cur: pointer to the block of characters
11015 * @len: length of the block in bytes
11016 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11018 * Check that the block of characters is okay as SCdata content [20]
11020 * Returns the number of bytes to pass if okay, a negative index where an
11021 * UTF-8 error occurred otherwise
11024 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11029 if ((utf == NULL) || (len <= 0))
11032 for (ix = 0; ix < len;) { /* string is 0-terminated */
11034 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11037 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11041 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11042 if (ix + 2 > len) return(complete ? -ix : ix);
11043 if ((utf[ix+1] & 0xc0 ) != 0x80)
11045 codepoint = (utf[ix] & 0x1f) << 6;
11046 codepoint |= utf[ix+1] & 0x3f;
11047 if (!xmlIsCharQ(codepoint))
11050 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11051 if (ix + 3 > len) return(complete ? -ix : ix);
11052 if (((utf[ix+1] & 0xc0) != 0x80) ||
11053 ((utf[ix+2] & 0xc0) != 0x80))
11055 codepoint = (utf[ix] & 0xf) << 12;
11056 codepoint |= (utf[ix+1] & 0x3f) << 6;
11057 codepoint |= utf[ix+2] & 0x3f;
11058 if (!xmlIsCharQ(codepoint))
11061 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11062 if (ix + 4 > len) return(complete ? -ix : ix);
11063 if (((utf[ix+1] & 0xc0) != 0x80) ||
11064 ((utf[ix+2] & 0xc0) != 0x80) ||
11065 ((utf[ix+3] & 0xc0) != 0x80))
11067 codepoint = (utf[ix] & 0x7) << 18;
11068 codepoint |= (utf[ix+1] & 0x3f) << 12;
11069 codepoint |= (utf[ix+2] & 0x3f) << 6;
11070 codepoint |= utf[ix+3] & 0x3f;
11071 if (!xmlIsCharQ(codepoint))
11074 } else /* unknown encoding */
11081 * xmlParseTryOrFinish:
11082 * @ctxt: an XML parser context
11083 * @terminate: last chunk indicator
11085 * Try to progress on parsing
11087 * Returns zero if no parsing was possible
11090 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11094 const xmlChar *lastlt, *lastgt;
11096 if (ctxt->input == NULL)
11100 switch (ctxt->instate) {
11101 case XML_PARSER_EOF:
11102 xmlGenericError(xmlGenericErrorContext,
11103 "PP: try EOF\n"); break;
11104 case XML_PARSER_START:
11105 xmlGenericError(xmlGenericErrorContext,
11106 "PP: try START\n"); break;
11107 case XML_PARSER_MISC:
11108 xmlGenericError(xmlGenericErrorContext,
11109 "PP: try MISC\n");break;
11110 case XML_PARSER_COMMENT:
11111 xmlGenericError(xmlGenericErrorContext,
11112 "PP: try COMMENT\n");break;
11113 case XML_PARSER_PROLOG:
11114 xmlGenericError(xmlGenericErrorContext,
11115 "PP: try PROLOG\n");break;
11116 case XML_PARSER_START_TAG:
11117 xmlGenericError(xmlGenericErrorContext,
11118 "PP: try START_TAG\n");break;
11119 case XML_PARSER_CONTENT:
11120 xmlGenericError(xmlGenericErrorContext,
11121 "PP: try CONTENT\n");break;
11122 case XML_PARSER_CDATA_SECTION:
11123 xmlGenericError(xmlGenericErrorContext,
11124 "PP: try CDATA_SECTION\n");break;
11125 case XML_PARSER_END_TAG:
11126 xmlGenericError(xmlGenericErrorContext,
11127 "PP: try END_TAG\n");break;
11128 case XML_PARSER_ENTITY_DECL:
11129 xmlGenericError(xmlGenericErrorContext,
11130 "PP: try ENTITY_DECL\n");break;
11131 case XML_PARSER_ENTITY_VALUE:
11132 xmlGenericError(xmlGenericErrorContext,
11133 "PP: try ENTITY_VALUE\n");break;
11134 case XML_PARSER_ATTRIBUTE_VALUE:
11135 xmlGenericError(xmlGenericErrorContext,
11136 "PP: try ATTRIBUTE_VALUE\n");break;
11137 case XML_PARSER_DTD:
11138 xmlGenericError(xmlGenericErrorContext,
11139 "PP: try DTD\n");break;
11140 case XML_PARSER_EPILOG:
11141 xmlGenericError(xmlGenericErrorContext,
11142 "PP: try EPILOG\n");break;
11143 case XML_PARSER_PI:
11144 xmlGenericError(xmlGenericErrorContext,
11145 "PP: try PI\n");break;
11146 case XML_PARSER_IGNORE:
11147 xmlGenericError(xmlGenericErrorContext,
11148 "PP: try IGNORE\n");break;
11152 if ((ctxt->input != NULL) &&
11153 (ctxt->input->cur - ctxt->input->base > 4096)) {
11155 ctxt->checkIndex = 0;
11157 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11159 while (ctxt->instate != XML_PARSER_EOF) {
11160 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11163 if (ctxt->input == NULL) break;
11164 if (ctxt->input->buf == NULL)
11165 avail = ctxt->input->length -
11166 (ctxt->input->cur - ctxt->input->base);
11169 * If we are operating on converted input, try to flush
11170 * remainng chars to avoid them stalling in the non-converted
11171 * buffer. But do not do this in document start where
11172 * encoding="..." may not have been read and we work on a
11173 * guessed encoding.
11175 if ((ctxt->instate != XML_PARSER_START) &&
11176 (ctxt->input->buf->raw != NULL) &&
11177 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11178 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11180 size_t current = ctxt->input->cur - ctxt->input->base;
11182 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11183 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11186 avail = xmlBufUse(ctxt->input->buf->buffer) -
11187 (ctxt->input->cur - ctxt->input->base);
11191 switch (ctxt->instate) {
11192 case XML_PARSER_EOF:
11194 * Document parsing is done !
11197 case XML_PARSER_START:
11198 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11200 xmlCharEncoding enc;
11203 * Very first chars read from the document flow.
11209 * Get the 4 first bytes and decode the charset
11210 * if enc != XML_CHAR_ENCODING_NONE
11211 * plug some encoding conversion routines,
11212 * else xmlSwitchEncoding will set to (default)
11219 enc = xmlDetectCharEncoding(start, 4);
11220 xmlSwitchEncoding(ctxt, enc);
11226 cur = ctxt->input->cur[0];
11227 next = ctxt->input->cur[1];
11229 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11230 ctxt->sax->setDocumentLocator(ctxt->userData,
11231 &xmlDefaultSAXLocator);
11232 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11233 xmlHaltParser(ctxt);
11235 xmlGenericError(xmlGenericErrorContext,
11236 "PP: entering EOF\n");
11238 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11239 ctxt->sax->endDocument(ctxt->userData);
11242 if ((cur == '<') && (next == '?')) {
11243 /* PI or XML decl */
11244 if (avail < 5) return(ret);
11245 if ((!terminate) &&
11246 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11248 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11249 ctxt->sax->setDocumentLocator(ctxt->userData,
11250 &xmlDefaultSAXLocator);
11251 if ((ctxt->input->cur[2] == 'x') &&
11252 (ctxt->input->cur[3] == 'm') &&
11253 (ctxt->input->cur[4] == 'l') &&
11254 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11257 xmlGenericError(xmlGenericErrorContext,
11258 "PP: Parsing XML Decl\n");
11260 xmlParseXMLDecl(ctxt);
11261 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11263 * The XML REC instructs us to stop parsing right
11266 xmlHaltParser(ctxt);
11269 ctxt->standalone = ctxt->input->standalone;
11270 if ((ctxt->encoding == NULL) &&
11271 (ctxt->input->encoding != NULL))
11272 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11273 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11274 (!ctxt->disableSAX))
11275 ctxt->sax->startDocument(ctxt->userData);
11276 ctxt->instate = XML_PARSER_MISC;
11278 xmlGenericError(xmlGenericErrorContext,
11279 "PP: entering MISC\n");
11282 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11283 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11284 (!ctxt->disableSAX))
11285 ctxt->sax->startDocument(ctxt->userData);
11286 ctxt->instate = XML_PARSER_MISC;
11288 xmlGenericError(xmlGenericErrorContext,
11289 "PP: entering MISC\n");
11293 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11294 ctxt->sax->setDocumentLocator(ctxt->userData,
11295 &xmlDefaultSAXLocator);
11296 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11297 if (ctxt->version == NULL) {
11298 xmlErrMemory(ctxt, NULL);
11301 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11302 (!ctxt->disableSAX))
11303 ctxt->sax->startDocument(ctxt->userData);
11304 ctxt->instate = XML_PARSER_MISC;
11306 xmlGenericError(xmlGenericErrorContext,
11307 "PP: entering MISC\n");
11311 case XML_PARSER_START_TAG: {
11312 const xmlChar *name;
11313 const xmlChar *prefix = NULL;
11314 const xmlChar *URI = NULL;
11315 int nsNr = ctxt->nsNr;
11317 if ((avail < 2) && (ctxt->inputNr == 1))
11319 cur = ctxt->input->cur[0];
11321 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11322 xmlHaltParser(ctxt);
11323 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11324 ctxt->sax->endDocument(ctxt->userData);
11328 if (ctxt->progressive) {
11329 /* > can be found unescaped in attribute values */
11330 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11332 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11336 if (ctxt->spaceNr == 0)
11337 spacePush(ctxt, -1);
11338 else if (*ctxt->space == -2)
11339 spacePush(ctxt, -1);
11341 spacePush(ctxt, *ctxt->space);
11342 #ifdef LIBXML_SAX1_ENABLED
11344 #endif /* LIBXML_SAX1_ENABLED */
11345 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11346 #ifdef LIBXML_SAX1_ENABLED
11348 name = xmlParseStartTag(ctxt);
11349 #endif /* LIBXML_SAX1_ENABLED */
11350 if (ctxt->instate == XML_PARSER_EOF)
11352 if (name == NULL) {
11354 xmlHaltParser(ctxt);
11355 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11356 ctxt->sax->endDocument(ctxt->userData);
11359 #ifdef LIBXML_VALID_ENABLED
11361 * [ VC: Root Element Type ]
11362 * The Name in the document type declaration must match
11363 * the element type of the root element.
11365 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11366 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11367 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11368 #endif /* LIBXML_VALID_ENABLED */
11371 * Check for an Empty Element.
11373 if ((RAW == '/') && (NXT(1) == '>')) {
11377 if ((ctxt->sax != NULL) &&
11378 (ctxt->sax->endElementNs != NULL) &&
11379 (!ctxt->disableSAX))
11380 ctxt->sax->endElementNs(ctxt->userData, name,
11382 if (ctxt->nsNr - nsNr > 0)
11383 nsPop(ctxt, ctxt->nsNr - nsNr);
11384 #ifdef LIBXML_SAX1_ENABLED
11386 if ((ctxt->sax != NULL) &&
11387 (ctxt->sax->endElement != NULL) &&
11388 (!ctxt->disableSAX))
11389 ctxt->sax->endElement(ctxt->userData, name);
11390 #endif /* LIBXML_SAX1_ENABLED */
11392 if (ctxt->instate == XML_PARSER_EOF)
11395 if (ctxt->nameNr == 0) {
11396 ctxt->instate = XML_PARSER_EPILOG;
11398 ctxt->instate = XML_PARSER_CONTENT;
11400 ctxt->progressive = 1;
11406 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11407 "Couldn't find end of Start Tag %s\n",
11413 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11414 #ifdef LIBXML_SAX1_ENABLED
11416 namePush(ctxt, name);
11417 #endif /* LIBXML_SAX1_ENABLED */
11419 ctxt->instate = XML_PARSER_CONTENT;
11420 ctxt->progressive = 1;
11423 case XML_PARSER_CONTENT: {
11424 const xmlChar *test;
11426 if ((avail < 2) && (ctxt->inputNr == 1))
11428 cur = ctxt->input->cur[0];
11429 next = ctxt->input->cur[1];
11432 cons = ctxt->input->consumed;
11433 if ((cur == '<') && (next == '/')) {
11434 ctxt->instate = XML_PARSER_END_TAG;
11436 } else if ((cur == '<') && (next == '?')) {
11437 if ((!terminate) &&
11438 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11439 ctxt->progressive = XML_PARSER_PI;
11443 ctxt->instate = XML_PARSER_CONTENT;
11444 ctxt->progressive = 1;
11445 } else if ((cur == '<') && (next != '!')) {
11446 ctxt->instate = XML_PARSER_START_TAG;
11448 } else if ((cur == '<') && (next == '!') &&
11449 (ctxt->input->cur[2] == '-') &&
11450 (ctxt->input->cur[3] == '-')) {
11455 ctxt->input->cur += 4;
11456 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11457 ctxt->input->cur -= 4;
11458 if ((!terminate) && (term < 0)) {
11459 ctxt->progressive = XML_PARSER_COMMENT;
11462 xmlParseComment(ctxt);
11463 ctxt->instate = XML_PARSER_CONTENT;
11464 ctxt->progressive = 1;
11465 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11466 (ctxt->input->cur[2] == '[') &&
11467 (ctxt->input->cur[3] == 'C') &&
11468 (ctxt->input->cur[4] == 'D') &&
11469 (ctxt->input->cur[5] == 'A') &&
11470 (ctxt->input->cur[6] == 'T') &&
11471 (ctxt->input->cur[7] == 'A') &&
11472 (ctxt->input->cur[8] == '[')) {
11474 ctxt->instate = XML_PARSER_CDATA_SECTION;
11476 } else if ((cur == '<') && (next == '!') &&
11479 } else if (cur == '&') {
11480 if ((!terminate) &&
11481 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11483 xmlParseReference(ctxt);
11485 /* TODO Avoid the extra copy, handle directly !!! */
11487 * Goal of the following test is:
11488 * - minimize calls to the SAX 'character' callback
11489 * when they are mergeable
11490 * - handle an problem for isBlank when we only parse
11491 * a sequence of blank chars and the next one is
11492 * not available to check against '<' presence.
11493 * - tries to homogenize the differences in SAX
11494 * callbacks between the push and pull versions
11497 if ((ctxt->inputNr == 1) &&
11498 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11500 if (ctxt->progressive) {
11501 if ((lastlt == NULL) ||
11502 (ctxt->input->cur > lastlt))
11504 } else if (xmlParseLookupSequence(ctxt,
11510 ctxt->checkIndex = 0;
11511 xmlParseCharData(ctxt, 0);
11513 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11514 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11515 "detected an error in element content\n");
11516 xmlHaltParser(ctxt);
11521 case XML_PARSER_END_TAG:
11525 if (ctxt->progressive) {
11526 /* > can be found unescaped in attribute values */
11527 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11529 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11534 xmlParseEndTag2(ctxt,
11535 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11536 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11538 ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11541 #ifdef LIBXML_SAX1_ENABLED
11543 xmlParseEndTag1(ctxt, 0);
11544 #endif /* LIBXML_SAX1_ENABLED */
11545 if (ctxt->instate == XML_PARSER_EOF) {
11547 } else if (ctxt->nameNr == 0) {
11548 ctxt->instate = XML_PARSER_EPILOG;
11550 ctxt->instate = XML_PARSER_CONTENT;
11553 case XML_PARSER_CDATA_SECTION: {
11555 * The Push mode need to have the SAX callback for
11556 * cdataBlock merge back contiguous callbacks.
11560 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11562 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11565 tmp = xmlCheckCdataPush(ctxt->input->cur,
11566 XML_PARSER_BIG_BUFFER_SIZE, 0);
11569 ctxt->input->cur += tmp;
11570 goto encoding_error;
11572 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11573 if (ctxt->sax->cdataBlock != NULL)
11574 ctxt->sax->cdataBlock(ctxt->userData,
11575 ctxt->input->cur, tmp);
11576 else if (ctxt->sax->characters != NULL)
11577 ctxt->sax->characters(ctxt->userData,
11578 ctxt->input->cur, tmp);
11580 if (ctxt->instate == XML_PARSER_EOF)
11583 ctxt->checkIndex = 0;
11589 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11590 if ((tmp < 0) || (tmp != base)) {
11592 ctxt->input->cur += tmp;
11593 goto encoding_error;
11595 if ((ctxt->sax != NULL) && (base == 0) &&
11596 (ctxt->sax->cdataBlock != NULL) &&
11597 (!ctxt->disableSAX)) {
11599 * Special case to provide identical behaviour
11600 * between pull and push parsers on enpty CDATA
11603 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11604 (!strncmp((const char *)&ctxt->input->cur[-9],
11606 ctxt->sax->cdataBlock(ctxt->userData,
11608 } else if ((ctxt->sax != NULL) && (base > 0) &&
11609 (!ctxt->disableSAX)) {
11610 if (ctxt->sax->cdataBlock != NULL)
11611 ctxt->sax->cdataBlock(ctxt->userData,
11612 ctxt->input->cur, base);
11613 else if (ctxt->sax->characters != NULL)
11614 ctxt->sax->characters(ctxt->userData,
11615 ctxt->input->cur, base);
11617 if (ctxt->instate == XML_PARSER_EOF)
11620 ctxt->checkIndex = 0;
11621 ctxt->instate = XML_PARSER_CONTENT;
11623 xmlGenericError(xmlGenericErrorContext,
11624 "PP: entering CONTENT\n");
11629 case XML_PARSER_MISC:
11631 if (ctxt->input->buf == NULL)
11632 avail = ctxt->input->length -
11633 (ctxt->input->cur - ctxt->input->base);
11635 avail = xmlBufUse(ctxt->input->buf->buffer) -
11636 (ctxt->input->cur - ctxt->input->base);
11639 cur = ctxt->input->cur[0];
11640 next = ctxt->input->cur[1];
11641 if ((cur == '<') && (next == '?')) {
11642 if ((!terminate) &&
11643 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11644 ctxt->progressive = XML_PARSER_PI;
11648 xmlGenericError(xmlGenericErrorContext,
11649 "PP: Parsing PI\n");
11652 if (ctxt->instate == XML_PARSER_EOF)
11654 ctxt->instate = XML_PARSER_MISC;
11655 ctxt->progressive = 1;
11656 ctxt->checkIndex = 0;
11657 } else if ((cur == '<') && (next == '!') &&
11658 (ctxt->input->cur[2] == '-') &&
11659 (ctxt->input->cur[3] == '-')) {
11660 if ((!terminate) &&
11661 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11662 ctxt->progressive = XML_PARSER_COMMENT;
11666 xmlGenericError(xmlGenericErrorContext,
11667 "PP: Parsing Comment\n");
11669 xmlParseComment(ctxt);
11670 if (ctxt->instate == XML_PARSER_EOF)
11672 ctxt->instate = XML_PARSER_MISC;
11673 ctxt->progressive = 1;
11674 ctxt->checkIndex = 0;
11675 } else if ((cur == '<') && (next == '!') &&
11676 (ctxt->input->cur[2] == 'D') &&
11677 (ctxt->input->cur[3] == 'O') &&
11678 (ctxt->input->cur[4] == 'C') &&
11679 (ctxt->input->cur[5] == 'T') &&
11680 (ctxt->input->cur[6] == 'Y') &&
11681 (ctxt->input->cur[7] == 'P') &&
11682 (ctxt->input->cur[8] == 'E')) {
11683 if ((!terminate) &&
11684 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11685 ctxt->progressive = XML_PARSER_DTD;
11689 xmlGenericError(xmlGenericErrorContext,
11690 "PP: Parsing internal subset\n");
11692 ctxt->inSubset = 1;
11693 ctxt->progressive = 0;
11694 ctxt->checkIndex = 0;
11695 xmlParseDocTypeDecl(ctxt);
11696 if (ctxt->instate == XML_PARSER_EOF)
11699 ctxt->instate = XML_PARSER_DTD;
11701 xmlGenericError(xmlGenericErrorContext,
11702 "PP: entering DTD\n");
11706 * Create and update the external subset.
11708 ctxt->inSubset = 2;
11709 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11710 (ctxt->sax->externalSubset != NULL))
11711 ctxt->sax->externalSubset(ctxt->userData,
11712 ctxt->intSubName, ctxt->extSubSystem,
11714 ctxt->inSubset = 0;
11715 xmlCleanSpecialAttr(ctxt);
11716 ctxt->instate = XML_PARSER_PROLOG;
11718 xmlGenericError(xmlGenericErrorContext,
11719 "PP: entering PROLOG\n");
11722 } else if ((cur == '<') && (next == '!') &&
11726 ctxt->instate = XML_PARSER_START_TAG;
11727 ctxt->progressive = XML_PARSER_START_TAG;
11728 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11730 xmlGenericError(xmlGenericErrorContext,
11731 "PP: entering START_TAG\n");
11735 case XML_PARSER_PROLOG:
11737 if (ctxt->input->buf == NULL)
11738 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11740 avail = xmlBufUse(ctxt->input->buf->buffer) -
11741 (ctxt->input->cur - ctxt->input->base);
11744 cur = ctxt->input->cur[0];
11745 next = ctxt->input->cur[1];
11746 if ((cur == '<') && (next == '?')) {
11747 if ((!terminate) &&
11748 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11749 ctxt->progressive = XML_PARSER_PI;
11753 xmlGenericError(xmlGenericErrorContext,
11754 "PP: Parsing PI\n");
11757 if (ctxt->instate == XML_PARSER_EOF)
11759 ctxt->instate = XML_PARSER_PROLOG;
11760 ctxt->progressive = 1;
11761 } else if ((cur == '<') && (next == '!') &&
11762 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11763 if ((!terminate) &&
11764 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11765 ctxt->progressive = XML_PARSER_COMMENT;
11769 xmlGenericError(xmlGenericErrorContext,
11770 "PP: Parsing Comment\n");
11772 xmlParseComment(ctxt);
11773 if (ctxt->instate == XML_PARSER_EOF)
11775 ctxt->instate = XML_PARSER_PROLOG;
11776 ctxt->progressive = 1;
11777 } else if ((cur == '<') && (next == '!') &&
11781 ctxt->instate = XML_PARSER_START_TAG;
11782 if (ctxt->progressive == 0)
11783 ctxt->progressive = XML_PARSER_START_TAG;
11784 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11786 xmlGenericError(xmlGenericErrorContext,
11787 "PP: entering START_TAG\n");
11791 case XML_PARSER_EPILOG:
11793 if (ctxt->input->buf == NULL)
11794 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11796 avail = xmlBufUse(ctxt->input->buf->buffer) -
11797 (ctxt->input->cur - ctxt->input->base);
11800 cur = ctxt->input->cur[0];
11801 next = ctxt->input->cur[1];
11802 if ((cur == '<') && (next == '?')) {
11803 if ((!terminate) &&
11804 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11805 ctxt->progressive = XML_PARSER_PI;
11809 xmlGenericError(xmlGenericErrorContext,
11810 "PP: Parsing PI\n");
11813 if (ctxt->instate == XML_PARSER_EOF)
11815 ctxt->instate = XML_PARSER_EPILOG;
11816 ctxt->progressive = 1;
11817 } else if ((cur == '<') && (next == '!') &&
11818 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11819 if ((!terminate) &&
11820 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11821 ctxt->progressive = XML_PARSER_COMMENT;
11825 xmlGenericError(xmlGenericErrorContext,
11826 "PP: Parsing Comment\n");
11828 xmlParseComment(ctxt);
11829 if (ctxt->instate == XML_PARSER_EOF)
11831 ctxt->instate = XML_PARSER_EPILOG;
11832 ctxt->progressive = 1;
11833 } else if ((cur == '<') && (next == '!') &&
11837 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11838 xmlHaltParser(ctxt);
11840 xmlGenericError(xmlGenericErrorContext,
11841 "PP: entering EOF\n");
11843 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11844 ctxt->sax->endDocument(ctxt->userData);
11848 case XML_PARSER_DTD: {
11850 * Sorry but progressive parsing of the internal subset
11851 * is not expected to be supported. We first check that
11852 * the full content of the internal subset is available and
11853 * the parsing is launched only at that point.
11854 * Internal subset ends up with "']' S? '>'" in an unescaped
11855 * section and not in a ']]>' sequence which are conditional
11856 * sections (whoever argued to keep that crap in XML deserve
11857 * a place in hell !).
11864 base = ctxt->input->cur - ctxt->input->base;
11865 if (base < 0) return(0);
11866 if (ctxt->checkIndex > base)
11867 base = ctxt->checkIndex;
11868 buf = xmlBufContent(ctxt->input->buf->buffer);
11869 use = xmlBufUse(ctxt->input->buf->buffer);
11870 for (;(unsigned int) base < use; base++) {
11872 if (buf[base] == quote)
11876 if ((quote == 0) && (buf[base] == '<')) {
11878 /* special handling of comments */
11879 if (((unsigned int) base + 4 < use) &&
11880 (buf[base + 1] == '!') &&
11881 (buf[base + 2] == '-') &&
11882 (buf[base + 3] == '-')) {
11883 for (;(unsigned int) base + 3 < use; base++) {
11884 if ((buf[base] == '-') &&
11885 (buf[base + 1] == '-') &&
11886 (buf[base + 2] == '>')) {
11894 fprintf(stderr, "unfinished comment\n");
11901 if (buf[base] == '"') {
11905 if (buf[base] == '\'') {
11909 if (buf[base] == ']') {
11911 fprintf(stderr, "%c%c%c%c: ", buf[base],
11912 buf[base + 1], buf[base + 2], buf[base + 3]);
11914 if ((unsigned int) base +1 >= use)
11916 if (buf[base + 1] == ']') {
11917 /* conditional crap, skip both ']' ! */
11921 for (i = 1; (unsigned int) base + i < use; i++) {
11922 if (buf[base + i] == '>') {
11924 fprintf(stderr, "found\n");
11926 goto found_end_int_subset;
11928 if (!IS_BLANK_CH(buf[base + i])) {
11930 fprintf(stderr, "not found\n");
11932 goto not_end_of_int_subset;
11936 fprintf(stderr, "end of stream\n");
11941 not_end_of_int_subset:
11942 continue; /* for */
11945 * We didn't found the end of the Internal subset
11948 ctxt->checkIndex = base;
11950 ctxt->checkIndex = 0;
11953 xmlGenericError(xmlGenericErrorContext,
11954 "PP: lookup of int subset end filed\n");
11958 found_end_int_subset:
11959 ctxt->checkIndex = 0;
11960 xmlParseInternalSubset(ctxt);
11961 if (ctxt->instate == XML_PARSER_EOF)
11963 ctxt->inSubset = 2;
11964 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11965 (ctxt->sax->externalSubset != NULL))
11966 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11967 ctxt->extSubSystem, ctxt->extSubURI);
11968 ctxt->inSubset = 0;
11969 xmlCleanSpecialAttr(ctxt);
11970 if (ctxt->instate == XML_PARSER_EOF)
11972 ctxt->instate = XML_PARSER_PROLOG;
11973 ctxt->checkIndex = 0;
11975 xmlGenericError(xmlGenericErrorContext,
11976 "PP: entering PROLOG\n");
11980 case XML_PARSER_COMMENT:
11981 xmlGenericError(xmlGenericErrorContext,
11982 "PP: internal error, state == COMMENT\n");
11983 ctxt->instate = XML_PARSER_CONTENT;
11985 xmlGenericError(xmlGenericErrorContext,
11986 "PP: entering CONTENT\n");
11989 case XML_PARSER_IGNORE:
11990 xmlGenericError(xmlGenericErrorContext,
11991 "PP: internal error, state == IGNORE");
11992 ctxt->instate = XML_PARSER_DTD;
11994 xmlGenericError(xmlGenericErrorContext,
11995 "PP: entering DTD\n");
11998 case XML_PARSER_PI:
11999 xmlGenericError(xmlGenericErrorContext,
12000 "PP: internal error, state == PI\n");
12001 ctxt->instate = XML_PARSER_CONTENT;
12003 xmlGenericError(xmlGenericErrorContext,
12004 "PP: entering CONTENT\n");
12007 case XML_PARSER_ENTITY_DECL:
12008 xmlGenericError(xmlGenericErrorContext,
12009 "PP: internal error, state == ENTITY_DECL\n");
12010 ctxt->instate = XML_PARSER_DTD;
12012 xmlGenericError(xmlGenericErrorContext,
12013 "PP: entering DTD\n");
12016 case XML_PARSER_ENTITY_VALUE:
12017 xmlGenericError(xmlGenericErrorContext,
12018 "PP: internal error, state == ENTITY_VALUE\n");
12019 ctxt->instate = XML_PARSER_CONTENT;
12021 xmlGenericError(xmlGenericErrorContext,
12022 "PP: entering DTD\n");
12025 case XML_PARSER_ATTRIBUTE_VALUE:
12026 xmlGenericError(xmlGenericErrorContext,
12027 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12028 ctxt->instate = XML_PARSER_START_TAG;
12030 xmlGenericError(xmlGenericErrorContext,
12031 "PP: entering START_TAG\n");
12034 case XML_PARSER_SYSTEM_LITERAL:
12035 xmlGenericError(xmlGenericErrorContext,
12036 "PP: internal error, state == SYSTEM_LITERAL\n");
12037 ctxt->instate = XML_PARSER_START_TAG;
12039 xmlGenericError(xmlGenericErrorContext,
12040 "PP: entering START_TAG\n");
12043 case XML_PARSER_PUBLIC_LITERAL:
12044 xmlGenericError(xmlGenericErrorContext,
12045 "PP: internal error, state == PUBLIC_LITERAL\n");
12046 ctxt->instate = XML_PARSER_START_TAG;
12048 xmlGenericError(xmlGenericErrorContext,
12049 "PP: entering START_TAG\n");
12056 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12063 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12064 ctxt->input->cur[0], ctxt->input->cur[1],
12065 ctxt->input->cur[2], ctxt->input->cur[3]);
12066 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12067 "Input is not proper UTF-8, indicate encoding !\n%s",
12068 BAD_CAST buffer, NULL);
12074 * xmlParseCheckTransition:
12075 * @ctxt: an XML parser context
12076 * @chunk: a char array
12077 * @size: the size in byte of the chunk
12079 * Check depending on the current parser state if the chunk given must be
12080 * processed immediately or one need more data to advance on parsing.
12082 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12085 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12086 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12088 if (ctxt->instate == XML_PARSER_START_TAG) {
12089 if (memchr(chunk, '>', size) != NULL)
12093 if (ctxt->progressive == XML_PARSER_COMMENT) {
12094 if (memchr(chunk, '>', size) != NULL)
12098 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12099 if (memchr(chunk, '>', size) != NULL)
12103 if (ctxt->progressive == XML_PARSER_PI) {
12104 if (memchr(chunk, '>', size) != NULL)
12108 if (ctxt->instate == XML_PARSER_END_TAG) {
12109 if (memchr(chunk, '>', size) != NULL)
12113 if ((ctxt->progressive == XML_PARSER_DTD) ||
12114 (ctxt->instate == XML_PARSER_DTD)) {
12115 if (memchr(chunk, '>', size) != NULL)
12124 * @ctxt: an XML parser context
12125 * @chunk: an char array
12126 * @size: the size in byte of the chunk
12127 * @terminate: last chunk indicator
12129 * Parse a Chunk of memory
12131 * Returns zero if no error, the xmlParserErrors otherwise.
12134 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12138 size_t old_avail = 0;
12142 return(XML_ERR_INTERNAL_ERROR);
12143 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12144 return(ctxt->errNo);
12145 if (ctxt->instate == XML_PARSER_EOF)
12147 if (ctxt->instate == XML_PARSER_START)
12148 xmlDetectSAX2(ctxt);
12149 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12150 (chunk[size - 1] == '\r')) {
12157 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12158 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12159 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12160 size_t cur = ctxt->input->cur - ctxt->input->base;
12163 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12165 * Specific handling if we autodetected an encoding, we should not
12166 * push more than the first line ... which depend on the encoding
12167 * And only push the rest once the final encoding was detected
12169 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12170 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12171 unsigned int len = 45;
12173 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12174 BAD_CAST "UTF-16")) ||
12175 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12176 BAD_CAST "UTF16")))
12178 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12179 BAD_CAST "UCS-4")) ||
12180 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12184 if (ctxt->input->buf->rawconsumed < len)
12185 len -= ctxt->input->buf->rawconsumed;
12188 * Change size for reading the initial declaration only
12189 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12190 * will blindly copy extra bytes from memory.
12192 if ((unsigned int) size > len) {
12193 remain = size - len;
12199 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201 ctxt->errNo = XML_PARSER_EOF;
12202 xmlHaltParser(ctxt);
12203 return (XML_PARSER_EOF);
12205 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12207 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12210 } else if (ctxt->instate != XML_PARSER_EOF) {
12211 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12212 xmlParserInputBufferPtr in = ctxt->input->buf;
12213 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12214 (in->raw != NULL)) {
12216 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12217 size_t current = ctxt->input->cur - ctxt->input->base;
12219 nbchars = xmlCharEncInput(in, terminate);
12222 xmlGenericError(xmlGenericErrorContext,
12223 "xmlParseChunk: encoder error\n");
12224 return(XML_ERR_INVALID_ENCODING);
12226 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12231 xmlParseTryOrFinish(ctxt, 0);
12233 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12234 avail = xmlBufUse(ctxt->input->buf->buffer);
12236 * Depending on the current state it may not be such
12237 * a good idea to try parsing if there is nothing in the chunk
12238 * which would be worth doing a parser state transition and we
12239 * need to wait for more data
12241 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12242 (old_avail == 0) || (avail == 0) ||
12243 (xmlParseCheckTransition(ctxt,
12244 (const char *)&ctxt->input->base[old_avail],
12245 avail - old_avail)))
12246 xmlParseTryOrFinish(ctxt, terminate);
12248 if (ctxt->instate == XML_PARSER_EOF)
12249 return(ctxt->errNo);
12251 if ((ctxt->input != NULL) &&
12252 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12253 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12254 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12255 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12256 xmlHaltParser(ctxt);
12258 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12259 return(ctxt->errNo);
12267 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12268 (ctxt->input->buf != NULL)) {
12269 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12271 size_t current = ctxt->input->cur - ctxt->input->base;
12273 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12275 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12280 * Check for termination
12284 if (ctxt->input != NULL) {
12285 if (ctxt->input->buf == NULL)
12286 cur_avail = ctxt->input->length -
12287 (ctxt->input->cur - ctxt->input->base);
12289 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12290 (ctxt->input->cur - ctxt->input->base);
12293 if ((ctxt->instate != XML_PARSER_EOF) &&
12294 (ctxt->instate != XML_PARSER_EPILOG)) {
12295 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12297 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12298 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12300 if (ctxt->instate != XML_PARSER_EOF) {
12301 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12302 ctxt->sax->endDocument(ctxt->userData);
12304 ctxt->instate = XML_PARSER_EOF;
12306 if (ctxt->wellFormed == 0)
12307 return((xmlParserErrors) ctxt->errNo);
12312 /************************************************************************
12314 * I/O front end functions to the parser *
12316 ************************************************************************/
12319 * xmlCreatePushParserCtxt:
12320 * @sax: a SAX handler
12321 * @user_data: The user data returned on SAX callbacks
12322 * @chunk: a pointer to an array of chars
12323 * @size: number of chars in the array
12324 * @filename: an optional file name or URI
12326 * Create a parser context for using the XML parser in push mode.
12327 * If @buffer and @size are non-NULL, the data is used to detect
12328 * the encoding. The remaining characters will be parsed so they
12329 * don't need to be fed in again through xmlParseChunk.
12330 * To allow content encoding detection, @size should be >= 4
12331 * The value of @filename is used for fetching external entities
12332 * and error/warning reports.
12334 * Returns the new parser context or NULL
12338 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12339 const char *chunk, int size, const char *filename) {
12340 xmlParserCtxtPtr ctxt;
12341 xmlParserInputPtr inputStream;
12342 xmlParserInputBufferPtr buf;
12343 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12346 * plug some encoding conversion routines
12348 if ((chunk != NULL) && (size >= 4))
12349 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12351 buf = xmlAllocParserInputBuffer(enc);
12352 if (buf == NULL) return(NULL);
12354 ctxt = xmlNewParserCtxt();
12355 if (ctxt == NULL) {
12356 xmlErrMemory(NULL, "creating parser: out of memory\n");
12357 xmlFreeParserInputBuffer(buf);
12360 ctxt->dictNames = 1;
12361 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12362 if (ctxt->pushTab == NULL) {
12363 xmlErrMemory(ctxt, NULL);
12364 xmlFreeParserInputBuffer(buf);
12365 xmlFreeParserCtxt(ctxt);
12369 #ifdef LIBXML_SAX1_ENABLED
12370 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12371 #endif /* LIBXML_SAX1_ENABLED */
12372 xmlFree(ctxt->sax);
12373 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12374 if (ctxt->sax == NULL) {
12375 xmlErrMemory(ctxt, NULL);
12376 xmlFreeParserInputBuffer(buf);
12377 xmlFreeParserCtxt(ctxt);
12380 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12381 if (sax->initialized == XML_SAX2_MAGIC)
12382 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12384 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12385 if (user_data != NULL)
12386 ctxt->userData = user_data;
12388 if (filename == NULL) {
12389 ctxt->directory = NULL;
12391 ctxt->directory = xmlParserGetDirectory(filename);
12394 inputStream = xmlNewInputStream(ctxt);
12395 if (inputStream == NULL) {
12396 xmlFreeParserCtxt(ctxt);
12397 xmlFreeParserInputBuffer(buf);
12401 if (filename == NULL)
12402 inputStream->filename = NULL;
12404 inputStream->filename = (char *)
12405 xmlCanonicPath((const xmlChar *) filename);
12406 if (inputStream->filename == NULL) {
12407 xmlFreeParserCtxt(ctxt);
12408 xmlFreeParserInputBuffer(buf);
12412 inputStream->buf = buf;
12413 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12414 inputPush(ctxt, inputStream);
12417 * If the caller didn't provide an initial 'chunk' for determining
12418 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12419 * that it can be automatically determined later
12421 if ((size == 0) || (chunk == NULL)) {
12422 ctxt->charset = XML_CHAR_ENCODING_NONE;
12423 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12424 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12425 size_t cur = ctxt->input->cur - ctxt->input->base;
12427 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12429 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12431 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12435 if (enc != XML_CHAR_ENCODING_NONE) {
12436 xmlSwitchEncoding(ctxt, enc);
12441 #endif /* LIBXML_PUSH_ENABLED */
12445 * @ctxt: an XML parser context
12447 * Blocks further parser processing don't override error
12451 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12454 ctxt->instate = XML_PARSER_EOF;
12455 ctxt->disableSAX = 1;
12456 while (ctxt->inputNr > 1)
12457 xmlFreeInputStream(inputPop(ctxt));
12458 if (ctxt->input != NULL) {
12460 * in case there was a specific allocation deallocate before
12463 if (ctxt->input->free != NULL) {
12464 ctxt->input->free((xmlChar *) ctxt->input->base);
12465 ctxt->input->free = NULL;
12467 ctxt->input->cur = BAD_CAST"";
12468 ctxt->input->base = ctxt->input->cur;
12469 ctxt->input->end = ctxt->input->cur;
12475 * @ctxt: an XML parser context
12477 * Blocks further parser processing
12480 xmlStopParser(xmlParserCtxtPtr ctxt) {
12483 xmlHaltParser(ctxt);
12484 ctxt->errNo = XML_ERR_USER_STOP;
12488 * xmlCreateIOParserCtxt:
12489 * @sax: a SAX handler
12490 * @user_data: The user data returned on SAX callbacks
12491 * @ioread: an I/O read function
12492 * @ioclose: an I/O close function
12493 * @ioctx: an I/O handler
12494 * @enc: the charset encoding if known
12496 * Create a parser context for using the XML parser with an existing
12499 * Returns the new parser context or NULL
12502 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12503 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12504 void *ioctx, xmlCharEncoding enc) {
12505 xmlParserCtxtPtr ctxt;
12506 xmlParserInputPtr inputStream;
12507 xmlParserInputBufferPtr buf;
12509 if (ioread == NULL) return(NULL);
12511 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12513 if (ioclose != NULL)
12518 ctxt = xmlNewParserCtxt();
12519 if (ctxt == NULL) {
12520 xmlFreeParserInputBuffer(buf);
12524 #ifdef LIBXML_SAX1_ENABLED
12525 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12526 #endif /* LIBXML_SAX1_ENABLED */
12527 xmlFree(ctxt->sax);
12528 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12529 if (ctxt->sax == NULL) {
12530 xmlErrMemory(ctxt, NULL);
12531 xmlFreeParserCtxt(ctxt);
12534 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12535 if (sax->initialized == XML_SAX2_MAGIC)
12536 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12538 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12539 if (user_data != NULL)
12540 ctxt->userData = user_data;
12543 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12544 if (inputStream == NULL) {
12545 xmlFreeParserCtxt(ctxt);
12548 inputPush(ctxt, inputStream);
12553 #ifdef LIBXML_VALID_ENABLED
12554 /************************************************************************
12556 * Front ends when parsing a DTD *
12558 ************************************************************************/
12562 * @sax: the SAX handler block or NULL
12563 * @input: an Input Buffer
12564 * @enc: the charset encoding if known
12566 * Load and parse a DTD
12568 * Returns the resulting xmlDtdPtr or NULL in case of error.
12569 * @input will be freed by the function in any case.
12573 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12574 xmlCharEncoding enc) {
12575 xmlDtdPtr ret = NULL;
12576 xmlParserCtxtPtr ctxt;
12577 xmlParserInputPtr pinput = NULL;
12583 ctxt = xmlNewParserCtxt();
12584 if (ctxt == NULL) {
12585 xmlFreeParserInputBuffer(input);
12589 /* We are loading a DTD */
12590 ctxt->options |= XML_PARSE_DTDLOAD;
12593 * Set-up the SAX context
12596 if (ctxt->sax != NULL)
12597 xmlFree(ctxt->sax);
12599 ctxt->userData = ctxt;
12601 xmlDetectSAX2(ctxt);
12604 * generate a parser input from the I/O handler
12607 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12608 if (pinput == NULL) {
12609 if (sax != NULL) ctxt->sax = NULL;
12610 xmlFreeParserInputBuffer(input);
12611 xmlFreeParserCtxt(ctxt);
12616 * plug some encoding conversion routines here.
12618 if (xmlPushInput(ctxt, pinput) < 0) {
12619 if (sax != NULL) ctxt->sax = NULL;
12620 xmlFreeParserCtxt(ctxt);
12623 if (enc != XML_CHAR_ENCODING_NONE) {
12624 xmlSwitchEncoding(ctxt, enc);
12627 pinput->filename = NULL;
12630 pinput->base = ctxt->input->cur;
12631 pinput->cur = ctxt->input->cur;
12632 pinput->free = NULL;
12635 * let's parse that entity knowing it's an external subset.
12637 ctxt->inSubset = 2;
12638 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12639 if (ctxt->myDoc == NULL) {
12640 xmlErrMemory(ctxt, "New Doc failed");
12643 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12644 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12645 BAD_CAST "none", BAD_CAST "none");
12647 if ((enc == XML_CHAR_ENCODING_NONE) &&
12648 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12650 * Get the 4 first bytes and decode the charset
12651 * if enc != XML_CHAR_ENCODING_NONE
12652 * plug some encoding conversion routines.
12658 enc = xmlDetectCharEncoding(start, 4);
12659 if (enc != XML_CHAR_ENCODING_NONE) {
12660 xmlSwitchEncoding(ctxt, enc);
12664 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12666 if (ctxt->myDoc != NULL) {
12667 if (ctxt->wellFormed) {
12668 ret = ctxt->myDoc->extSubset;
12669 ctxt->myDoc->extSubset = NULL;
12674 tmp = ret->children;
12675 while (tmp != NULL) {
12683 xmlFreeDoc(ctxt->myDoc);
12684 ctxt->myDoc = NULL;
12686 if (sax != NULL) ctxt->sax = NULL;
12687 xmlFreeParserCtxt(ctxt);
12694 * @sax: the SAX handler block
12695 * @ExternalID: a NAME* containing the External ID of the DTD
12696 * @SystemID: a NAME* containing the URL to the DTD
12698 * Load and parse an external subset.
12700 * Returns the resulting xmlDtdPtr or NULL in case of error.
12704 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12705 const xmlChar *SystemID) {
12706 xmlDtdPtr ret = NULL;
12707 xmlParserCtxtPtr ctxt;
12708 xmlParserInputPtr input = NULL;
12709 xmlCharEncoding enc;
12710 xmlChar* systemIdCanonic;
12712 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12714 ctxt = xmlNewParserCtxt();
12715 if (ctxt == NULL) {
12719 /* We are loading a DTD */
12720 ctxt->options |= XML_PARSE_DTDLOAD;
12723 * Set-up the SAX context
12726 if (ctxt->sax != NULL)
12727 xmlFree(ctxt->sax);
12729 ctxt->userData = ctxt;
12733 * Canonicalise the system ID
12735 systemIdCanonic = xmlCanonicPath(SystemID);
12736 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12737 xmlFreeParserCtxt(ctxt);
12742 * Ask the Entity resolver to load the damn thing
12745 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12746 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12748 if (input == NULL) {
12749 if (sax != NULL) ctxt->sax = NULL;
12750 xmlFreeParserCtxt(ctxt);
12751 if (systemIdCanonic != NULL)
12752 xmlFree(systemIdCanonic);
12757 * plug some encoding conversion routines here.
12759 if (xmlPushInput(ctxt, input) < 0) {
12760 if (sax != NULL) ctxt->sax = NULL;
12761 xmlFreeParserCtxt(ctxt);
12762 if (systemIdCanonic != NULL)
12763 xmlFree(systemIdCanonic);
12766 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12767 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12768 xmlSwitchEncoding(ctxt, enc);
12771 if (input->filename == NULL)
12772 input->filename = (char *) systemIdCanonic;
12774 xmlFree(systemIdCanonic);
12777 input->base = ctxt->input->cur;
12778 input->cur = ctxt->input->cur;
12779 input->free = NULL;
12782 * let's parse that entity knowing it's an external subset.
12784 ctxt->inSubset = 2;
12785 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12786 if (ctxt->myDoc == NULL) {
12787 xmlErrMemory(ctxt, "New Doc failed");
12788 if (sax != NULL) ctxt->sax = NULL;
12789 xmlFreeParserCtxt(ctxt);
12792 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12793 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12794 ExternalID, SystemID);
12795 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12797 if (ctxt->myDoc != NULL) {
12798 if (ctxt->wellFormed) {
12799 ret = ctxt->myDoc->extSubset;
12800 ctxt->myDoc->extSubset = NULL;
12805 tmp = ret->children;
12806 while (tmp != NULL) {
12814 xmlFreeDoc(ctxt->myDoc);
12815 ctxt->myDoc = NULL;
12817 if (sax != NULL) ctxt->sax = NULL;
12818 xmlFreeParserCtxt(ctxt);
12826 * @ExternalID: a NAME* containing the External ID of the DTD
12827 * @SystemID: a NAME* containing the URL to the DTD
12829 * Load and parse an external subset.
12831 * Returns the resulting xmlDtdPtr or NULL in case of error.
12835 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12836 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12838 #endif /* LIBXML_VALID_ENABLED */
12840 /************************************************************************
12842 * Front ends when parsing an Entity *
12844 ************************************************************************/
12847 * xmlParseCtxtExternalEntity:
12848 * @ctx: the existing parsing context
12849 * @URL: the URL for the entity to load
12850 * @ID: the System ID for the entity to load
12851 * @lst: the return value for the set of parsed nodes
12853 * Parse an external general entity within an existing parsing context
12854 * An external general parsed entity is well-formed if it matches the
12855 * production labeled extParsedEnt.
12857 * [78] extParsedEnt ::= TextDecl? content
12859 * Returns 0 if the entity is well formed, -1 in case of args problem and
12860 * the parser error code otherwise
12864 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12865 const xmlChar *ID, xmlNodePtr *lst) {
12866 xmlParserCtxtPtr ctxt;
12868 xmlNodePtr newRoot;
12869 xmlSAXHandlerPtr oldsax = NULL;
12872 xmlCharEncoding enc;
12874 if (ctx == NULL) return(-1);
12876 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12877 (ctx->depth > 1024)) {
12878 return(XML_ERR_ENTITY_LOOP);
12883 if ((URL == NULL) && (ID == NULL))
12885 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12888 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12889 if (ctxt == NULL) {
12893 oldsax = ctxt->sax;
12894 ctxt->sax = ctx->sax;
12895 xmlDetectSAX2(ctxt);
12896 newDoc = xmlNewDoc(BAD_CAST "1.0");
12897 if (newDoc == NULL) {
12898 xmlFreeParserCtxt(ctxt);
12901 newDoc->properties = XML_DOC_INTERNAL;
12902 if (ctx->myDoc->dict) {
12903 newDoc->dict = ctx->myDoc->dict;
12904 xmlDictReference(newDoc->dict);
12906 if (ctx->myDoc != NULL) {
12907 newDoc->intSubset = ctx->myDoc->intSubset;
12908 newDoc->extSubset = ctx->myDoc->extSubset;
12910 if (ctx->myDoc->URL != NULL) {
12911 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12913 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12914 if (newRoot == NULL) {
12915 ctxt->sax = oldsax;
12916 xmlFreeParserCtxt(ctxt);
12917 newDoc->intSubset = NULL;
12918 newDoc->extSubset = NULL;
12919 xmlFreeDoc(newDoc);
12922 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12923 nodePush(ctxt, newDoc->children);
12924 if (ctx->myDoc == NULL) {
12925 ctxt->myDoc = newDoc;
12927 ctxt->myDoc = ctx->myDoc;
12928 newDoc->children->doc = ctx->myDoc;
12932 * Get the 4 first bytes and decode the charset
12933 * if enc != XML_CHAR_ENCODING_NONE
12934 * plug some encoding conversion routines.
12937 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12942 enc = xmlDetectCharEncoding(start, 4);
12943 if (enc != XML_CHAR_ENCODING_NONE) {
12944 xmlSwitchEncoding(ctxt, enc);
12949 * Parse a possible text declaration first
12951 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12952 xmlParseTextDecl(ctxt);
12954 * An XML-1.0 document can't reference an entity not XML-1.0
12956 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12957 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12958 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12959 "Version mismatch between document and entity\n");
12964 * If the user provided its own SAX callbacks then reuse the
12965 * useData callback field, otherwise the expected setup in a
12966 * DOM builder is to have userData == ctxt
12968 if (ctx->userData == ctx)
12969 ctxt->userData = ctxt;
12971 ctxt->userData = ctx->userData;
12974 * Doing validity checking on chunk doesn't make sense
12976 ctxt->instate = XML_PARSER_CONTENT;
12977 ctxt->validate = ctx->validate;
12978 ctxt->valid = ctx->valid;
12979 ctxt->loadsubset = ctx->loadsubset;
12980 ctxt->depth = ctx->depth + 1;
12981 ctxt->replaceEntities = ctx->replaceEntities;
12982 if (ctxt->validate) {
12983 ctxt->vctxt.error = ctx->vctxt.error;
12984 ctxt->vctxt.warning = ctx->vctxt.warning;
12986 ctxt->vctxt.error = NULL;
12987 ctxt->vctxt.warning = NULL;
12989 ctxt->vctxt.nodeTab = NULL;
12990 ctxt->vctxt.nodeNr = 0;
12991 ctxt->vctxt.nodeMax = 0;
12992 ctxt->vctxt.node = NULL;
12993 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12994 ctxt->dict = ctx->dict;
12995 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12996 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12997 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12998 ctxt->dictNames = ctx->dictNames;
12999 ctxt->attsDefault = ctx->attsDefault;
13000 ctxt->attsSpecial = ctx->attsSpecial;
13001 ctxt->linenumbers = ctx->linenumbers;
13003 xmlParseContent(ctxt);
13005 ctx->validate = ctxt->validate;
13006 ctx->valid = ctxt->valid;
13007 if ((RAW == '<') && (NXT(1) == '/')) {
13008 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13009 } else if (RAW != 0) {
13010 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13012 if (ctxt->node != newDoc->children) {
13013 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13016 if (!ctxt->wellFormed) {
13017 if (ctxt->errNo == 0)
13026 * Return the newly created nodeset after unlinking it from
13027 * they pseudo parent.
13029 cur = newDoc->children->children;
13031 while (cur != NULL) {
13032 cur->parent = NULL;
13035 newDoc->children->children = NULL;
13039 ctxt->sax = oldsax;
13041 ctxt->attsDefault = NULL;
13042 ctxt->attsSpecial = NULL;
13043 xmlFreeParserCtxt(ctxt);
13044 newDoc->intSubset = NULL;
13045 newDoc->extSubset = NULL;
13046 xmlFreeDoc(newDoc);
13052 * xmlParseExternalEntityPrivate:
13053 * @doc: the document the chunk pertains to
13054 * @oldctxt: the previous parser context if available
13055 * @sax: the SAX handler bloc (possibly NULL)
13056 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13057 * @depth: Used for loop detection, use 0
13058 * @URL: the URL for the entity to load
13059 * @ID: the System ID for the entity to load
13060 * @list: the return value for the set of parsed nodes
13062 * Private version of xmlParseExternalEntity()
13064 * Returns 0 if the entity is well formed, -1 in case of args problem and
13065 * the parser error code otherwise
13068 static xmlParserErrors
13069 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13070 xmlSAXHandlerPtr sax,
13071 void *user_data, int depth, const xmlChar *URL,
13072 const xmlChar *ID, xmlNodePtr *list) {
13073 xmlParserCtxtPtr ctxt;
13075 xmlNodePtr newRoot;
13076 xmlSAXHandlerPtr oldsax = NULL;
13077 xmlParserErrors ret = XML_ERR_OK;
13079 xmlCharEncoding enc;
13081 if (((depth > 40) &&
13082 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13084 return(XML_ERR_ENTITY_LOOP);
13089 if ((URL == NULL) && (ID == NULL))
13090 return(XML_ERR_INTERNAL_ERROR);
13092 return(XML_ERR_INTERNAL_ERROR);
13095 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13096 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13097 ctxt->userData = ctxt;
13098 if (oldctxt != NULL) {
13099 ctxt->_private = oldctxt->_private;
13100 ctxt->loadsubset = oldctxt->loadsubset;
13101 ctxt->validate = oldctxt->validate;
13102 ctxt->external = oldctxt->external;
13103 ctxt->record_info = oldctxt->record_info;
13104 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13105 ctxt->node_seq.length = oldctxt->node_seq.length;
13106 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13109 * Doing validity checking on chunk without context
13110 * doesn't make sense
13112 ctxt->_private = NULL;
13113 ctxt->validate = 0;
13114 ctxt->external = 2;
13115 ctxt->loadsubset = 0;
13118 oldsax = ctxt->sax;
13120 if (user_data != NULL)
13121 ctxt->userData = user_data;
13123 xmlDetectSAX2(ctxt);
13124 newDoc = xmlNewDoc(BAD_CAST "1.0");
13125 if (newDoc == NULL) {
13126 ctxt->node_seq.maximum = 0;
13127 ctxt->node_seq.length = 0;
13128 ctxt->node_seq.buffer = NULL;
13129 xmlFreeParserCtxt(ctxt);
13130 return(XML_ERR_INTERNAL_ERROR);
13132 newDoc->properties = XML_DOC_INTERNAL;
13133 newDoc->intSubset = doc->intSubset;
13134 newDoc->extSubset = doc->extSubset;
13135 newDoc->dict = doc->dict;
13136 xmlDictReference(newDoc->dict);
13138 if (doc->URL != NULL) {
13139 newDoc->URL = xmlStrdup(doc->URL);
13141 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13142 if (newRoot == NULL) {
13144 ctxt->sax = oldsax;
13145 ctxt->node_seq.maximum = 0;
13146 ctxt->node_seq.length = 0;
13147 ctxt->node_seq.buffer = NULL;
13148 xmlFreeParserCtxt(ctxt);
13149 newDoc->intSubset = NULL;
13150 newDoc->extSubset = NULL;
13151 xmlFreeDoc(newDoc);
13152 return(XML_ERR_INTERNAL_ERROR);
13154 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13155 nodePush(ctxt, newDoc->children);
13157 newRoot->doc = doc;
13160 * Get the 4 first bytes and decode the charset
13161 * if enc != XML_CHAR_ENCODING_NONE
13162 * plug some encoding conversion routines.
13165 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13170 enc = xmlDetectCharEncoding(start, 4);
13171 if (enc != XML_CHAR_ENCODING_NONE) {
13172 xmlSwitchEncoding(ctxt, enc);
13177 * Parse a possible text declaration first
13179 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13180 xmlParseTextDecl(ctxt);
13183 ctxt->instate = XML_PARSER_CONTENT;
13184 ctxt->depth = depth;
13186 xmlParseContent(ctxt);
13188 if ((RAW == '<') && (NXT(1) == '/')) {
13189 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13190 } else if (RAW != 0) {
13191 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13193 if (ctxt->node != newDoc->children) {
13194 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13197 if (!ctxt->wellFormed) {
13198 if (ctxt->errNo == 0)
13199 ret = XML_ERR_INTERNAL_ERROR;
13201 ret = (xmlParserErrors)ctxt->errNo;
13203 if (list != NULL) {
13207 * Return the newly created nodeset after unlinking it from
13208 * they pseudo parent.
13210 cur = newDoc->children->children;
13212 while (cur != NULL) {
13213 cur->parent = NULL;
13216 newDoc->children->children = NULL;
13222 * Record in the parent context the number of entities replacement
13223 * done when parsing that reference.
13225 if (oldctxt != NULL)
13226 oldctxt->nbentities += ctxt->nbentities;
13229 * Also record the size of the entity parsed
13231 if (ctxt->input != NULL && oldctxt != NULL) {
13232 oldctxt->sizeentities += ctxt->input->consumed;
13233 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13236 * And record the last error if any
13238 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13239 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13242 ctxt->sax = oldsax;
13243 if (oldctxt != NULL) {
13244 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13245 oldctxt->node_seq.length = ctxt->node_seq.length;
13246 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13248 ctxt->node_seq.maximum = 0;
13249 ctxt->node_seq.length = 0;
13250 ctxt->node_seq.buffer = NULL;
13251 xmlFreeParserCtxt(ctxt);
13252 newDoc->intSubset = NULL;
13253 newDoc->extSubset = NULL;
13254 xmlFreeDoc(newDoc);
13259 #ifdef LIBXML_SAX1_ENABLED
13261 * xmlParseExternalEntity:
13262 * @doc: the document the chunk pertains to
13263 * @sax: the SAX handler bloc (possibly NULL)
13264 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13265 * @depth: Used for loop detection, use 0
13266 * @URL: the URL for the entity to load
13267 * @ID: the System ID for the entity to load
13268 * @lst: the return value for the set of parsed nodes
13270 * Parse an external general entity
13271 * An external general parsed entity is well-formed if it matches the
13272 * production labeled extParsedEnt.
13274 * [78] extParsedEnt ::= TextDecl? content
13276 * Returns 0 if the entity is well formed, -1 in case of args problem and
13277 * the parser error code otherwise
13281 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13282 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13283 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13288 * xmlParseBalancedChunkMemory:
13289 * @doc: the document the chunk pertains to
13290 * @sax: the SAX handler bloc (possibly NULL)
13291 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13292 * @depth: Used for loop detection, use 0
13293 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13294 * @lst: the return value for the set of parsed nodes
13296 * Parse a well-balanced chunk of an XML document
13297 * called by the parser
13298 * The allowed sequence for the Well Balanced Chunk is the one defined by
13299 * the content production in the XML grammar:
13301 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13303 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13304 * the parser error code otherwise
13308 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13309 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13310 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13311 depth, string, lst, 0 );
13313 #endif /* LIBXML_SAX1_ENABLED */
13316 * xmlParseBalancedChunkMemoryInternal:
13317 * @oldctxt: the existing parsing context
13318 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13319 * @user_data: the user data field for the parser context
13320 * @lst: the return value for the set of parsed nodes
13323 * Parse a well-balanced chunk of an XML document
13324 * called by the parser
13325 * The allowed sequence for the Well Balanced Chunk is the one defined by
13326 * the content production in the XML grammar:
13328 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13330 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13331 * error code otherwise
13333 * In case recover is set to 1, the nodelist will not be empty even if
13334 * the parsed chunk is not well balanced.
13336 static xmlParserErrors
13337 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13338 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13339 xmlParserCtxtPtr ctxt;
13340 xmlDocPtr newDoc = NULL;
13341 xmlNodePtr newRoot;
13342 xmlSAXHandlerPtr oldsax = NULL;
13343 xmlNodePtr content = NULL;
13344 xmlNodePtr last = NULL;
13346 xmlParserErrors ret = XML_ERR_OK;
13351 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13352 (oldctxt->depth > 1024)) {
13353 return(XML_ERR_ENTITY_LOOP);
13359 if (string == NULL)
13360 return(XML_ERR_INTERNAL_ERROR);
13362 size = xmlStrlen(string);
13364 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13365 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13366 if (user_data != NULL)
13367 ctxt->userData = user_data;
13369 ctxt->userData = ctxt;
13370 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13371 ctxt->dict = oldctxt->dict;
13372 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13373 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13374 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13377 /* propagate namespaces down the entity */
13378 for (i = 0;i < oldctxt->nsNr;i += 2) {
13379 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13383 oldsax = ctxt->sax;
13384 ctxt->sax = oldctxt->sax;
13385 xmlDetectSAX2(ctxt);
13386 ctxt->replaceEntities = oldctxt->replaceEntities;
13387 ctxt->options = oldctxt->options;
13389 ctxt->_private = oldctxt->_private;
13390 if (oldctxt->myDoc == NULL) {
13391 newDoc = xmlNewDoc(BAD_CAST "1.0");
13392 if (newDoc == NULL) {
13393 ctxt->sax = oldsax;
13395 xmlFreeParserCtxt(ctxt);
13396 return(XML_ERR_INTERNAL_ERROR);
13398 newDoc->properties = XML_DOC_INTERNAL;
13399 newDoc->dict = ctxt->dict;
13400 xmlDictReference(newDoc->dict);
13401 ctxt->myDoc = newDoc;
13403 ctxt->myDoc = oldctxt->myDoc;
13404 content = ctxt->myDoc->children;
13405 last = ctxt->myDoc->last;
13407 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13408 if (newRoot == NULL) {
13409 ctxt->sax = oldsax;
13411 xmlFreeParserCtxt(ctxt);
13412 if (newDoc != NULL) {
13413 xmlFreeDoc(newDoc);
13415 return(XML_ERR_INTERNAL_ERROR);
13417 ctxt->myDoc->children = NULL;
13418 ctxt->myDoc->last = NULL;
13419 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13420 nodePush(ctxt, ctxt->myDoc->children);
13421 ctxt->instate = XML_PARSER_CONTENT;
13422 ctxt->depth = oldctxt->depth + 1;
13424 ctxt->validate = 0;
13425 ctxt->loadsubset = oldctxt->loadsubset;
13426 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13428 * ID/IDREF registration will be done in xmlValidateElement below
13430 ctxt->loadsubset |= XML_SKIP_IDS;
13432 ctxt->dictNames = oldctxt->dictNames;
13433 ctxt->attsDefault = oldctxt->attsDefault;
13434 ctxt->attsSpecial = oldctxt->attsSpecial;
13436 xmlParseContent(ctxt);
13437 if ((RAW == '<') && (NXT(1) == '/')) {
13438 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13439 } else if (RAW != 0) {
13440 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13442 if (ctxt->node != ctxt->myDoc->children) {
13443 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13446 if (!ctxt->wellFormed) {
13447 if (ctxt->errNo == 0)
13448 ret = XML_ERR_INTERNAL_ERROR;
13450 ret = (xmlParserErrors)ctxt->errNo;
13455 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13459 * Return the newly created nodeset after unlinking it from
13460 * they pseudo parent.
13462 cur = ctxt->myDoc->children->children;
13464 while (cur != NULL) {
13465 #ifdef LIBXML_VALID_ENABLED
13466 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13467 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13468 (cur->type == XML_ELEMENT_NODE)) {
13469 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13470 oldctxt->myDoc, cur);
13472 #endif /* LIBXML_VALID_ENABLED */
13473 cur->parent = NULL;
13476 ctxt->myDoc->children->children = NULL;
13478 if (ctxt->myDoc != NULL) {
13479 xmlFreeNode(ctxt->myDoc->children);
13480 ctxt->myDoc->children = content;
13481 ctxt->myDoc->last = last;
13485 * Record in the parent context the number of entities replacement
13486 * done when parsing that reference.
13488 if (oldctxt != NULL)
13489 oldctxt->nbentities += ctxt->nbentities;
13492 * Also record the last error if any
13494 if (ctxt->lastError.code != XML_ERR_OK)
13495 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13497 ctxt->sax = oldsax;
13499 ctxt->attsDefault = NULL;
13500 ctxt->attsSpecial = NULL;
13501 xmlFreeParserCtxt(ctxt);
13502 if (newDoc != NULL) {
13503 xmlFreeDoc(newDoc);
13510 * xmlParseInNodeContext:
13511 * @node: the context node
13512 * @data: the input string
13513 * @datalen: the input string length in bytes
13514 * @options: a combination of xmlParserOption
13515 * @lst: the return value for the set of parsed nodes
13517 * Parse a well-balanced chunk of an XML document
13518 * within the context (DTD, namespaces, etc ...) of the given node.
13520 * The allowed sequence for the data is a Well Balanced Chunk defined by
13521 * the content production in the XML grammar:
13523 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13525 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13526 * error code otherwise
13529 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13530 int options, xmlNodePtr *lst) {
13532 xmlParserCtxtPtr ctxt;
13533 xmlDocPtr doc = NULL;
13534 xmlNodePtr fake, cur;
13537 xmlParserErrors ret = XML_ERR_OK;
13540 * check all input parameters, grab the document
13542 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13543 return(XML_ERR_INTERNAL_ERROR);
13544 switch (node->type) {
13545 case XML_ELEMENT_NODE:
13546 case XML_ATTRIBUTE_NODE:
13547 case XML_TEXT_NODE:
13548 case XML_CDATA_SECTION_NODE:
13549 case XML_ENTITY_REF_NODE:
13551 case XML_COMMENT_NODE:
13552 case XML_DOCUMENT_NODE:
13553 case XML_HTML_DOCUMENT_NODE:
13556 return(XML_ERR_INTERNAL_ERROR);
13559 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13560 (node->type != XML_DOCUMENT_NODE) &&
13561 (node->type != XML_HTML_DOCUMENT_NODE))
13562 node = node->parent;
13564 return(XML_ERR_INTERNAL_ERROR);
13565 if (node->type == XML_ELEMENT_NODE)
13568 doc = (xmlDocPtr) node;
13570 return(XML_ERR_INTERNAL_ERROR);
13573 * allocate a context and set-up everything not related to the
13574 * node position in the tree
13576 if (doc->type == XML_DOCUMENT_NODE)
13577 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13578 #ifdef LIBXML_HTML_ENABLED
13579 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13580 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13582 * When parsing in context, it makes no sense to add implied
13583 * elements like html/body/etc...
13585 options |= HTML_PARSE_NOIMPLIED;
13589 return(XML_ERR_INTERNAL_ERROR);
13592 return(XML_ERR_NO_MEMORY);
13595 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13596 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13597 * we must wait until the last moment to free the original one.
13599 if (doc->dict != NULL) {
13600 if (ctxt->dict != NULL)
13601 xmlDictFree(ctxt->dict);
13602 ctxt->dict = doc->dict;
13604 options |= XML_PARSE_NODICT;
13606 if (doc->encoding != NULL) {
13607 xmlCharEncodingHandlerPtr hdlr;
13609 if (ctxt->encoding != NULL)
13610 xmlFree((xmlChar *) ctxt->encoding);
13611 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13613 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13614 if (hdlr != NULL) {
13615 xmlSwitchToEncoding(ctxt, hdlr);
13617 return(XML_ERR_UNSUPPORTED_ENCODING);
13621 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13622 xmlDetectSAX2(ctxt);
13624 /* parsing in context, i.e. as within existing content */
13625 ctxt->instate = XML_PARSER_CONTENT;
13627 fake = xmlNewComment(NULL);
13628 if (fake == NULL) {
13629 xmlFreeParserCtxt(ctxt);
13630 return(XML_ERR_NO_MEMORY);
13632 xmlAddChild(node, fake);
13634 if (node->type == XML_ELEMENT_NODE) {
13635 nodePush(ctxt, node);
13637 * initialize the SAX2 namespaces stack
13640 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13641 xmlNsPtr ns = cur->nsDef;
13642 const xmlChar *iprefix, *ihref;
13644 while (ns != NULL) {
13646 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13647 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13649 iprefix = ns->prefix;
13653 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13654 nsPush(ctxt, iprefix, ihref);
13663 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13665 * ID/IDREF registration will be done in xmlValidateElement below
13667 ctxt->loadsubset |= XML_SKIP_IDS;
13670 #ifdef LIBXML_HTML_ENABLED
13671 if (doc->type == XML_HTML_DOCUMENT_NODE)
13672 __htmlParseContent(ctxt);
13675 xmlParseContent(ctxt);
13678 if ((RAW == '<') && (NXT(1) == '/')) {
13679 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13680 } else if (RAW != 0) {
13681 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13683 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13684 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13685 ctxt->wellFormed = 0;
13688 if (!ctxt->wellFormed) {
13689 if (ctxt->errNo == 0)
13690 ret = XML_ERR_INTERNAL_ERROR;
13692 ret = (xmlParserErrors)ctxt->errNo;
13698 * Return the newly created nodeset after unlinking it from
13699 * the pseudo sibling.
13712 while (cur != NULL) {
13713 cur->parent = NULL;
13717 xmlUnlinkNode(fake);
13721 if (ret != XML_ERR_OK) {
13722 xmlFreeNodeList(*lst);
13726 if (doc->dict != NULL)
13728 xmlFreeParserCtxt(ctxt);
13732 return(XML_ERR_INTERNAL_ERROR);
13736 #ifdef LIBXML_SAX1_ENABLED
13738 * xmlParseBalancedChunkMemoryRecover:
13739 * @doc: the document the chunk pertains to
13740 * @sax: the SAX handler bloc (possibly NULL)
13741 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13742 * @depth: Used for loop detection, use 0
13743 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13744 * @lst: the return value for the set of parsed nodes
13745 * @recover: return nodes even if the data is broken (use 0)
13748 * Parse a well-balanced chunk of an XML document
13749 * called by the parser
13750 * The allowed sequence for the Well Balanced Chunk is the one defined by
13751 * the content production in the XML grammar:
13753 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13755 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13756 * the parser error code otherwise
13758 * In case recover is set to 1, the nodelist will not be empty even if
13759 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13763 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13764 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13766 xmlParserCtxtPtr ctxt;
13768 xmlSAXHandlerPtr oldsax = NULL;
13769 xmlNodePtr content, newRoot;
13774 return(XML_ERR_ENTITY_LOOP);
13780 if (string == NULL)
13783 size = xmlStrlen(string);
13785 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13786 if (ctxt == NULL) return(-1);
13787 ctxt->userData = ctxt;
13789 oldsax = ctxt->sax;
13791 if (user_data != NULL)
13792 ctxt->userData = user_data;
13794 newDoc = xmlNewDoc(BAD_CAST "1.0");
13795 if (newDoc == NULL) {
13796 xmlFreeParserCtxt(ctxt);
13799 newDoc->properties = XML_DOC_INTERNAL;
13800 if ((doc != NULL) && (doc->dict != NULL)) {
13801 xmlDictFree(ctxt->dict);
13802 ctxt->dict = doc->dict;
13803 xmlDictReference(ctxt->dict);
13804 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13805 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13806 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13807 ctxt->dictNames = 1;
13809 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13812 newDoc->intSubset = doc->intSubset;
13813 newDoc->extSubset = doc->extSubset;
13815 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13816 if (newRoot == NULL) {
13818 ctxt->sax = oldsax;
13819 xmlFreeParserCtxt(ctxt);
13820 newDoc->intSubset = NULL;
13821 newDoc->extSubset = NULL;
13822 xmlFreeDoc(newDoc);
13825 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13826 nodePush(ctxt, newRoot);
13828 ctxt->myDoc = newDoc;
13830 ctxt->myDoc = newDoc;
13831 newDoc->children->doc = doc;
13832 /* Ensure that doc has XML spec namespace */
13833 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13834 newDoc->oldNs = doc->oldNs;
13836 ctxt->instate = XML_PARSER_CONTENT;
13837 ctxt->depth = depth;
13840 * Doing validity checking on chunk doesn't make sense
13842 ctxt->validate = 0;
13843 ctxt->loadsubset = 0;
13844 xmlDetectSAX2(ctxt);
13846 if ( doc != NULL ){
13847 content = doc->children;
13848 doc->children = NULL;
13849 xmlParseContent(ctxt);
13850 doc->children = content;
13853 xmlParseContent(ctxt);
13855 if ((RAW == '<') && (NXT(1) == '/')) {
13856 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13857 } else if (RAW != 0) {
13858 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13860 if (ctxt->node != newDoc->children) {
13861 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13864 if (!ctxt->wellFormed) {
13865 if (ctxt->errNo == 0)
13873 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13877 * Return the newly created nodeset after unlinking it from
13878 * they pseudo parent.
13880 cur = newDoc->children->children;
13882 while (cur != NULL) {
13883 xmlSetTreeDoc(cur, doc);
13884 cur->parent = NULL;
13887 newDoc->children->children = NULL;
13891 ctxt->sax = oldsax;
13892 xmlFreeParserCtxt(ctxt);
13893 newDoc->intSubset = NULL;
13894 newDoc->extSubset = NULL;
13895 newDoc->oldNs = NULL;
13896 xmlFreeDoc(newDoc);
13902 * xmlSAXParseEntity:
13903 * @sax: the SAX handler block
13904 * @filename: the filename
13906 * parse an XML external entity out of context and build a tree.
13907 * It use the given SAX function block to handle the parsing callback.
13908 * If sax is NULL, fallback to the default DOM tree building routines.
13910 * [78] extParsedEnt ::= TextDecl? content
13912 * This correspond to a "Well Balanced" chunk
13914 * Returns the resulting document tree
13918 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13920 xmlParserCtxtPtr ctxt;
13922 ctxt = xmlCreateFileParserCtxt(filename);
13923 if (ctxt == NULL) {
13927 if (ctxt->sax != NULL)
13928 xmlFree(ctxt->sax);
13930 ctxt->userData = NULL;
13933 xmlParseExtParsedEnt(ctxt);
13935 if (ctxt->wellFormed)
13939 xmlFreeDoc(ctxt->myDoc);
13940 ctxt->myDoc = NULL;
13944 xmlFreeParserCtxt(ctxt);
13951 * @filename: the filename
13953 * parse an XML external entity out of context and build a tree.
13955 * [78] extParsedEnt ::= TextDecl? content
13957 * This correspond to a "Well Balanced" chunk
13959 * Returns the resulting document tree
13963 xmlParseEntity(const char *filename) {
13964 return(xmlSAXParseEntity(NULL, filename));
13966 #endif /* LIBXML_SAX1_ENABLED */
13969 * xmlCreateEntityParserCtxtInternal:
13970 * @URL: the entity URL
13971 * @ID: the entity PUBLIC ID
13972 * @base: a possible base for the target URI
13973 * @pctx: parser context used to set options on new context
13975 * Create a parser context for an external entity
13976 * Automatic support for ZLIB/Compress compressed document is provided
13977 * by default if found at compile-time.
13979 * Returns the new parser context or NULL
13981 static xmlParserCtxtPtr
13982 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13983 const xmlChar *base, xmlParserCtxtPtr pctx) {
13984 xmlParserCtxtPtr ctxt;
13985 xmlParserInputPtr inputStream;
13986 char *directory = NULL;
13989 ctxt = xmlNewParserCtxt();
13990 if (ctxt == NULL) {
13994 if (pctx != NULL) {
13995 ctxt->options = pctx->options;
13996 ctxt->_private = pctx->_private;
13999 uri = xmlBuildURI(URL, base);
14002 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14003 if (inputStream == NULL) {
14004 xmlFreeParserCtxt(ctxt);
14008 inputPush(ctxt, inputStream);
14010 if ((ctxt->directory == NULL) && (directory == NULL))
14011 directory = xmlParserGetDirectory((char *)URL);
14012 if ((ctxt->directory == NULL) && (directory != NULL))
14013 ctxt->directory = directory;
14015 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14016 if (inputStream == NULL) {
14018 xmlFreeParserCtxt(ctxt);
14022 inputPush(ctxt, inputStream);
14024 if ((ctxt->directory == NULL) && (directory == NULL))
14025 directory = xmlParserGetDirectory((char *)uri);
14026 if ((ctxt->directory == NULL) && (directory != NULL))
14027 ctxt->directory = directory;
14034 * xmlCreateEntityParserCtxt:
14035 * @URL: the entity URL
14036 * @ID: the entity PUBLIC ID
14037 * @base: a possible base for the target URI
14039 * Create a parser context for an external entity
14040 * Automatic support for ZLIB/Compress compressed document is provided
14041 * by default if found at compile-time.
14043 * Returns the new parser context or NULL
14046 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14047 const xmlChar *base) {
14048 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14052 /************************************************************************
14054 * Front ends when parsing from a file *
14056 ************************************************************************/
14059 * xmlCreateURLParserCtxt:
14060 * @filename: the filename or URL
14061 * @options: a combination of xmlParserOption
14063 * Create a parser context for a file or URL content.
14064 * Automatic support for ZLIB/Compress compressed document is provided
14065 * by default if found at compile-time and for file accesses
14067 * Returns the new parser context or NULL
14070 xmlCreateURLParserCtxt(const char *filename, int options)
14072 xmlParserCtxtPtr ctxt;
14073 xmlParserInputPtr inputStream;
14074 char *directory = NULL;
14076 ctxt = xmlNewParserCtxt();
14077 if (ctxt == NULL) {
14078 xmlErrMemory(NULL, "cannot allocate parser context");
14083 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14084 ctxt->linenumbers = 1;
14086 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14087 if (inputStream == NULL) {
14088 xmlFreeParserCtxt(ctxt);
14092 inputPush(ctxt, inputStream);
14093 if ((ctxt->directory == NULL) && (directory == NULL))
14094 directory = xmlParserGetDirectory(filename);
14095 if ((ctxt->directory == NULL) && (directory != NULL))
14096 ctxt->directory = directory;
14102 * xmlCreateFileParserCtxt:
14103 * @filename: the filename
14105 * Create a parser context for a file content.
14106 * Automatic support for ZLIB/Compress compressed document is provided
14107 * by default if found at compile-time.
14109 * Returns the new parser context or NULL
14112 xmlCreateFileParserCtxt(const char *filename)
14114 return(xmlCreateURLParserCtxt(filename, 0));
14117 #ifdef LIBXML_SAX1_ENABLED
14119 * xmlSAXParseFileWithData:
14120 * @sax: the SAX handler block
14121 * @filename: the filename
14122 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14124 * @data: the userdata
14126 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14127 * compressed document is provided by default if found at compile-time.
14128 * It use the given SAX function block to handle the parsing callback.
14129 * If sax is NULL, fallback to the default DOM tree building routines.
14131 * User data (void *) is stored within the parser context in the
14132 * context's _private member, so it is available nearly everywhere in libxml
14134 * Returns the resulting document tree
14138 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14139 int recovery, void *data) {
14141 xmlParserCtxtPtr ctxt;
14145 ctxt = xmlCreateFileParserCtxt(filename);
14146 if (ctxt == NULL) {
14150 if (ctxt->sax != NULL)
14151 xmlFree(ctxt->sax);
14154 xmlDetectSAX2(ctxt);
14156 ctxt->_private = data;
14159 if (ctxt->directory == NULL)
14160 ctxt->directory = xmlParserGetDirectory(filename);
14162 ctxt->recovery = recovery;
14164 xmlParseDocument(ctxt);
14166 if ((ctxt->wellFormed) || recovery) {
14169 if (ctxt->input->buf->compressed > 0)
14170 ret->compression = 9;
14172 ret->compression = ctxt->input->buf->compressed;
14177 xmlFreeDoc(ctxt->myDoc);
14178 ctxt->myDoc = NULL;
14182 xmlFreeParserCtxt(ctxt);
14189 * @sax: the SAX handler block
14190 * @filename: the filename
14191 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14194 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14195 * compressed document is provided by default if found at compile-time.
14196 * It use the given SAX function block to handle the parsing callback.
14197 * If sax is NULL, fallback to the default DOM tree building routines.
14199 * Returns the resulting document tree
14203 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14205 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14210 * @cur: a pointer to an array of xmlChar
14212 * parse an XML in-memory document and build a tree.
14213 * In the case the document is not Well Formed, a attempt to build a
14214 * tree is tried anyway
14216 * Returns the resulting document tree or NULL in case of failure
14220 xmlRecoverDoc(const xmlChar *cur) {
14221 return(xmlSAXParseDoc(NULL, cur, 1));
14226 * @filename: the filename
14228 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14229 * compressed document is provided by default if found at compile-time.
14231 * Returns the resulting document tree if the file was wellformed,
14236 xmlParseFile(const char *filename) {
14237 return(xmlSAXParseFile(NULL, filename, 0));
14242 * @filename: the filename
14244 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14245 * compressed document is provided by default if found at compile-time.
14246 * In the case the document is not Well Formed, it attempts to build
14249 * Returns the resulting document tree or NULL in case of failure
14253 xmlRecoverFile(const char *filename) {
14254 return(xmlSAXParseFile(NULL, filename, 1));
14259 * xmlSetupParserForBuffer:
14260 * @ctxt: an XML parser context
14261 * @buffer: a xmlChar * buffer
14262 * @filename: a file name
14264 * Setup the parser context to parse a new buffer; Clears any prior
14265 * contents from the parser context. The buffer parameter must not be
14266 * NULL, but the filename parameter can be
14269 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14270 const char* filename)
14272 xmlParserInputPtr input;
14274 if ((ctxt == NULL) || (buffer == NULL))
14277 input = xmlNewInputStream(ctxt);
14278 if (input == NULL) {
14279 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14280 xmlClearParserCtxt(ctxt);
14284 xmlClearParserCtxt(ctxt);
14285 if (filename != NULL)
14286 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14287 input->base = buffer;
14288 input->cur = buffer;
14289 input->end = &buffer[xmlStrlen(buffer)];
14290 inputPush(ctxt, input);
14294 * xmlSAXUserParseFile:
14295 * @sax: a SAX handler
14296 * @user_data: The user data returned on SAX callbacks
14297 * @filename: a file name
14299 * parse an XML file and call the given SAX handler routines.
14300 * Automatic support for ZLIB/Compress compressed document is provided
14302 * Returns 0 in case of success or a error number otherwise
14305 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14306 const char *filename) {
14308 xmlParserCtxtPtr ctxt;
14310 ctxt = xmlCreateFileParserCtxt(filename);
14311 if (ctxt == NULL) return -1;
14312 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14313 xmlFree(ctxt->sax);
14315 xmlDetectSAX2(ctxt);
14317 if (user_data != NULL)
14318 ctxt->userData = user_data;
14320 xmlParseDocument(ctxt);
14322 if (ctxt->wellFormed)
14325 if (ctxt->errNo != 0)
14332 if (ctxt->myDoc != NULL) {
14333 xmlFreeDoc(ctxt->myDoc);
14334 ctxt->myDoc = NULL;
14336 xmlFreeParserCtxt(ctxt);
14340 #endif /* LIBXML_SAX1_ENABLED */
14342 /************************************************************************
14344 * Front ends when parsing from memory *
14346 ************************************************************************/
14349 * xmlCreateMemoryParserCtxt:
14350 * @buffer: a pointer to a char array
14351 * @size: the size of the array
14353 * Create a parser context for an XML in-memory document.
14355 * Returns the new parser context or NULL
14358 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14359 xmlParserCtxtPtr ctxt;
14360 xmlParserInputPtr input;
14361 xmlParserInputBufferPtr buf;
14363 if (buffer == NULL)
14368 ctxt = xmlNewParserCtxt();
14372 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14373 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14375 xmlFreeParserCtxt(ctxt);
14379 input = xmlNewInputStream(ctxt);
14380 if (input == NULL) {
14381 xmlFreeParserInputBuffer(buf);
14382 xmlFreeParserCtxt(ctxt);
14386 input->filename = NULL;
14388 xmlBufResetInput(input->buf->buffer, input);
14390 inputPush(ctxt, input);
14394 #ifdef LIBXML_SAX1_ENABLED
14396 * xmlSAXParseMemoryWithData:
14397 * @sax: the SAX handler block
14398 * @buffer: an pointer to a char array
14399 * @size: the size of the array
14400 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14402 * @data: the userdata
14404 * parse an XML in-memory block and use the given SAX function block
14405 * to handle the parsing callback. If sax is NULL, fallback to the default
14406 * DOM tree building routines.
14408 * User data (void *) is stored within the parser context in the
14409 * context's _private member, so it is available nearly everywhere in libxml
14411 * Returns the resulting document tree
14415 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14416 int size, int recovery, void *data) {
14418 xmlParserCtxtPtr ctxt;
14422 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14423 if (ctxt == NULL) return(NULL);
14425 if (ctxt->sax != NULL)
14426 xmlFree(ctxt->sax);
14429 xmlDetectSAX2(ctxt);
14431 ctxt->_private=data;
14434 ctxt->recovery = recovery;
14436 xmlParseDocument(ctxt);
14438 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14441 xmlFreeDoc(ctxt->myDoc);
14442 ctxt->myDoc = NULL;
14446 xmlFreeParserCtxt(ctxt);
14452 * xmlSAXParseMemory:
14453 * @sax: the SAX handler block
14454 * @buffer: an pointer to a char array
14455 * @size: the size of the array
14456 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14459 * parse an XML in-memory block and use the given SAX function block
14460 * to handle the parsing callback. If sax is NULL, fallback to the default
14461 * DOM tree building routines.
14463 * Returns the resulting document tree
14466 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14467 int size, int recovery) {
14468 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14473 * @buffer: an pointer to a char array
14474 * @size: the size of the array
14476 * parse an XML in-memory block and build a tree.
14478 * Returns the resulting document tree
14481 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14482 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14486 * xmlRecoverMemory:
14487 * @buffer: an pointer to a char array
14488 * @size: the size of the array
14490 * parse an XML in-memory block and build a tree.
14491 * In the case the document is not Well Formed, an attempt to
14492 * build a tree is tried anyway
14494 * Returns the resulting document tree or NULL in case of error
14497 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14498 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14502 * xmlSAXUserParseMemory:
14503 * @sax: a SAX handler
14504 * @user_data: The user data returned on SAX callbacks
14505 * @buffer: an in-memory XML document input
14506 * @size: the length of the XML document in bytes
14508 * A better SAX parsing routine.
14509 * parse an XML in-memory buffer and call the given SAX handler routines.
14511 * Returns 0 in case of success or a error number otherwise
14513 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14514 const char *buffer, int size) {
14516 xmlParserCtxtPtr ctxt;
14520 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14521 if (ctxt == NULL) return -1;
14522 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14523 xmlFree(ctxt->sax);
14525 xmlDetectSAX2(ctxt);
14527 if (user_data != NULL)
14528 ctxt->userData = user_data;
14530 xmlParseDocument(ctxt);
14532 if (ctxt->wellFormed)
14535 if (ctxt->errNo != 0)
14542 if (ctxt->myDoc != NULL) {
14543 xmlFreeDoc(ctxt->myDoc);
14544 ctxt->myDoc = NULL;
14546 xmlFreeParserCtxt(ctxt);
14550 #endif /* LIBXML_SAX1_ENABLED */
14553 * xmlCreateDocParserCtxt:
14554 * @cur: a pointer to an array of xmlChar
14556 * Creates a parser context for an XML in-memory document.
14558 * Returns the new parser context or NULL
14561 xmlCreateDocParserCtxt(const xmlChar *cur) {
14566 len = xmlStrlen(cur);
14567 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14570 #ifdef LIBXML_SAX1_ENABLED
14573 * @sax: the SAX handler block
14574 * @cur: a pointer to an array of xmlChar
14575 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14578 * parse an XML in-memory document and build a tree.
14579 * It use the given SAX function block to handle the parsing callback.
14580 * If sax is NULL, fallback to the default DOM tree building routines.
14582 * Returns the resulting document tree
14586 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14588 xmlParserCtxtPtr ctxt;
14589 xmlSAXHandlerPtr oldsax = NULL;
14591 if (cur == NULL) return(NULL);
14594 ctxt = xmlCreateDocParserCtxt(cur);
14595 if (ctxt == NULL) return(NULL);
14597 oldsax = ctxt->sax;
14599 ctxt->userData = NULL;
14601 xmlDetectSAX2(ctxt);
14603 xmlParseDocument(ctxt);
14604 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14607 xmlFreeDoc(ctxt->myDoc);
14608 ctxt->myDoc = NULL;
14611 ctxt->sax = oldsax;
14612 xmlFreeParserCtxt(ctxt);
14619 * @cur: a pointer to an array of xmlChar
14621 * parse an XML in-memory document and build a tree.
14623 * Returns the resulting document tree
14627 xmlParseDoc(const xmlChar *cur) {
14628 return(xmlSAXParseDoc(NULL, cur, 0));
14630 #endif /* LIBXML_SAX1_ENABLED */
14632 #ifdef LIBXML_LEGACY_ENABLED
14633 /************************************************************************
14635 * Specific function to keep track of entities references *
14636 * and used by the XSLT debugger *
14638 ************************************************************************/
14640 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14643 * xmlAddEntityReference:
14644 * @ent : A valid entity
14645 * @firstNode : A valid first node for children of entity
14646 * @lastNode : A valid last node of children entity
14648 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14651 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14652 xmlNodePtr lastNode)
14654 if (xmlEntityRefFunc != NULL) {
14655 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14661 * xmlSetEntityReferenceFunc:
14662 * @func: A valid function
14664 * Set the function to call call back when a xml reference has been made
14667 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14669 xmlEntityRefFunc = func;
14671 #endif /* LIBXML_LEGACY_ENABLED */
14673 /************************************************************************
14677 ************************************************************************/
14679 #ifdef LIBXML_XPATH_ENABLED
14680 #include <libxml/xpath.h>
14683 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14684 static int xmlParserInitialized = 0;
14689 * Initialization function for the XML parser.
14690 * This is not reentrant. Call once before processing in case of
14691 * use in multithreaded programs.
14695 xmlInitParser(void) {
14696 if (xmlParserInitialized != 0)
14699 #ifdef LIBXML_THREAD_ENABLED
14700 __xmlGlobalInitMutexLock();
14701 if (xmlParserInitialized == 0) {
14705 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14706 (xmlGenericError == NULL))
14707 initGenericErrorDefaultFunc(NULL);
14709 xmlInitializeDict();
14710 xmlInitCharEncodingHandlers();
14711 xmlDefaultSAXHandlerInit();
14712 xmlRegisterDefaultInputCallbacks();
14713 #ifdef LIBXML_OUTPUT_ENABLED
14714 xmlRegisterDefaultOutputCallbacks();
14715 #endif /* LIBXML_OUTPUT_ENABLED */
14716 #ifdef LIBXML_HTML_ENABLED
14717 htmlInitAutoClose();
14718 htmlDefaultSAXHandlerInit();
14720 #ifdef LIBXML_XPATH_ENABLED
14723 xmlParserInitialized = 1;
14724 #ifdef LIBXML_THREAD_ENABLED
14726 __xmlGlobalInitMutexUnlock();
14731 * xmlCleanupParser:
14733 * This function name is somewhat misleading. It does not clean up
14734 * parser state, it cleans up memory allocated by the library itself.
14735 * It is a cleanup function for the XML library. It tries to reclaim all
14736 * related global memory allocated for the library processing.
14737 * It doesn't deallocate any document related memory. One should
14738 * call xmlCleanupParser() only when the process has finished using
14739 * the library and all XML/HTML documents built with it.
14740 * See also xmlInitParser() which has the opposite function of preparing
14741 * the library for operations.
14743 * WARNING: if your application is multithreaded or has plugin support
14744 * calling this may crash the application if another thread or
14745 * a plugin is still using libxml2. It's sometimes very hard to
14746 * guess if libxml2 is in use in the application, some libraries
14747 * or plugins may use it without notice. In case of doubt abstain
14748 * from calling this function or do it just before calling exit()
14749 * to avoid leak reports from valgrind !
14753 xmlCleanupParser(void) {
14754 if (!xmlParserInitialized)
14757 xmlCleanupCharEncodingHandlers();
14758 #ifdef LIBXML_CATALOG_ENABLED
14759 xmlCatalogCleanup();
14762 xmlCleanupInputCallbacks();
14763 #ifdef LIBXML_OUTPUT_ENABLED
14764 xmlCleanupOutputCallbacks();
14766 #ifdef LIBXML_SCHEMAS_ENABLED
14767 xmlSchemaCleanupTypes();
14768 xmlRelaxNGCleanupTypes();
14770 xmlResetLastError();
14771 xmlCleanupGlobals();
14772 xmlCleanupThreads(); /* must be last if called not from the main thread */
14773 xmlCleanupMemory();
14774 xmlParserInitialized = 0;
14777 /************************************************************************
14779 * New set (2.6.0) of simpler and more flexible APIs *
14781 ************************************************************************/
14787 * Free a string if it is not owned by the "dict" dictionary in the
14790 #define DICT_FREE(str) \
14791 if ((str) && ((!dict) || \
14792 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14793 xmlFree((char *)(str));
14797 * @ctxt: an XML parser context
14799 * Reset a parser context
14802 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14804 xmlParserInputPtr input;
14812 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14813 xmlFreeInputStream(input);
14816 ctxt->input = NULL;
14819 if (ctxt->spaceTab != NULL) {
14820 ctxt->spaceTab[0] = -1;
14821 ctxt->space = &ctxt->spaceTab[0];
14823 ctxt->space = NULL;
14833 DICT_FREE(ctxt->version);
14834 ctxt->version = NULL;
14835 DICT_FREE(ctxt->encoding);
14836 ctxt->encoding = NULL;
14837 DICT_FREE(ctxt->directory);
14838 ctxt->directory = NULL;
14839 DICT_FREE(ctxt->extSubURI);
14840 ctxt->extSubURI = NULL;
14841 DICT_FREE(ctxt->extSubSystem);
14842 ctxt->extSubSystem = NULL;
14843 if (ctxt->myDoc != NULL)
14844 xmlFreeDoc(ctxt->myDoc);
14845 ctxt->myDoc = NULL;
14847 ctxt->standalone = -1;
14848 ctxt->hasExternalSubset = 0;
14849 ctxt->hasPErefs = 0;
14851 ctxt->external = 0;
14852 ctxt->instate = XML_PARSER_START;
14855 ctxt->wellFormed = 1;
14856 ctxt->nsWellFormed = 1;
14857 ctxt->disableSAX = 0;
14860 ctxt->vctxt.userData = ctxt;
14861 ctxt->vctxt.error = xmlParserValidityError;
14862 ctxt->vctxt.warning = xmlParserValidityWarning;
14864 ctxt->record_info = 0;
14866 ctxt->checkIndex = 0;
14867 ctxt->inSubset = 0;
14868 ctxt->errNo = XML_ERR_OK;
14870 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14871 ctxt->catalogs = NULL;
14872 ctxt->nbentities = 0;
14873 ctxt->sizeentities = 0;
14874 ctxt->sizeentcopy = 0;
14875 xmlInitNodeInfoSeq(&ctxt->node_seq);
14877 if (ctxt->attsDefault != NULL) {
14878 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14879 ctxt->attsDefault = NULL;
14881 if (ctxt->attsSpecial != NULL) {
14882 xmlHashFree(ctxt->attsSpecial, NULL);
14883 ctxt->attsSpecial = NULL;
14886 #ifdef LIBXML_CATALOG_ENABLED
14887 if (ctxt->catalogs != NULL)
14888 xmlCatalogFreeLocal(ctxt->catalogs);
14890 if (ctxt->lastError.code != XML_ERR_OK)
14891 xmlResetError(&ctxt->lastError);
14895 * xmlCtxtResetPush:
14896 * @ctxt: an XML parser context
14897 * @chunk: a pointer to an array of chars
14898 * @size: number of chars in the array
14899 * @filename: an optional file name or URI
14900 * @encoding: the document encoding, or NULL
14902 * Reset a push parser context
14904 * Returns 0 in case of success and 1 in case of error
14907 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14908 int size, const char *filename, const char *encoding)
14910 xmlParserInputPtr inputStream;
14911 xmlParserInputBufferPtr buf;
14912 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14917 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14918 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14920 buf = xmlAllocParserInputBuffer(enc);
14924 if (ctxt == NULL) {
14925 xmlFreeParserInputBuffer(buf);
14929 xmlCtxtReset(ctxt);
14931 if (ctxt->pushTab == NULL) {
14932 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14933 sizeof(xmlChar *));
14934 if (ctxt->pushTab == NULL) {
14935 xmlErrMemory(ctxt, NULL);
14936 xmlFreeParserInputBuffer(buf);
14941 if (filename == NULL) {
14942 ctxt->directory = NULL;
14944 ctxt->directory = xmlParserGetDirectory(filename);
14947 inputStream = xmlNewInputStream(ctxt);
14948 if (inputStream == NULL) {
14949 xmlFreeParserInputBuffer(buf);
14953 if (filename == NULL)
14954 inputStream->filename = NULL;
14956 inputStream->filename = (char *)
14957 xmlCanonicPath((const xmlChar *) filename);
14958 inputStream->buf = buf;
14959 xmlBufResetInput(buf->buffer, inputStream);
14961 inputPush(ctxt, inputStream);
14963 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14964 (ctxt->input->buf != NULL)) {
14965 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14966 size_t cur = ctxt->input->cur - ctxt->input->base;
14968 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14970 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14972 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14976 if (encoding != NULL) {
14977 xmlCharEncodingHandlerPtr hdlr;
14979 if (ctxt->encoding != NULL)
14980 xmlFree((xmlChar *) ctxt->encoding);
14981 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14983 hdlr = xmlFindCharEncodingHandler(encoding);
14984 if (hdlr != NULL) {
14985 xmlSwitchToEncoding(ctxt, hdlr);
14987 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14988 "Unsupported encoding %s\n", BAD_CAST encoding);
14990 } else if (enc != XML_CHAR_ENCODING_NONE) {
14991 xmlSwitchEncoding(ctxt, enc);
14999 * xmlCtxtUseOptionsInternal:
15000 * @ctxt: an XML parser context
15001 * @options: a combination of xmlParserOption
15002 * @encoding: the user provided encoding to use
15004 * Applies the options to the parser context
15006 * Returns 0 in case of success, the set of unknown or unimplemented options
15007 * in case of error.
15010 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15014 if (encoding != NULL) {
15015 if (ctxt->encoding != NULL)
15016 xmlFree((xmlChar *) ctxt->encoding);
15017 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15019 if (options & XML_PARSE_RECOVER) {
15020 ctxt->recovery = 1;
15021 options -= XML_PARSE_RECOVER;
15022 ctxt->options |= XML_PARSE_RECOVER;
15024 ctxt->recovery = 0;
15025 if (options & XML_PARSE_DTDLOAD) {
15026 ctxt->loadsubset = XML_DETECT_IDS;
15027 options -= XML_PARSE_DTDLOAD;
15028 ctxt->options |= XML_PARSE_DTDLOAD;
15030 ctxt->loadsubset = 0;
15031 if (options & XML_PARSE_DTDATTR) {
15032 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15033 options -= XML_PARSE_DTDATTR;
15034 ctxt->options |= XML_PARSE_DTDATTR;
15036 if (options & XML_PARSE_NOENT) {
15037 ctxt->replaceEntities = 1;
15038 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15039 options -= XML_PARSE_NOENT;
15040 ctxt->options |= XML_PARSE_NOENT;
15042 ctxt->replaceEntities = 0;
15043 if (options & XML_PARSE_PEDANTIC) {
15044 ctxt->pedantic = 1;
15045 options -= XML_PARSE_PEDANTIC;
15046 ctxt->options |= XML_PARSE_PEDANTIC;
15048 ctxt->pedantic = 0;
15049 if (options & XML_PARSE_NOBLANKS) {
15050 ctxt->keepBlanks = 0;
15051 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15052 options -= XML_PARSE_NOBLANKS;
15053 ctxt->options |= XML_PARSE_NOBLANKS;
15055 ctxt->keepBlanks = 1;
15056 if (options & XML_PARSE_DTDVALID) {
15057 ctxt->validate = 1;
15058 if (options & XML_PARSE_NOWARNING)
15059 ctxt->vctxt.warning = NULL;
15060 if (options & XML_PARSE_NOERROR)
15061 ctxt->vctxt.error = NULL;
15062 options -= XML_PARSE_DTDVALID;
15063 ctxt->options |= XML_PARSE_DTDVALID;
15065 ctxt->validate = 0;
15066 if (options & XML_PARSE_NOWARNING) {
15067 ctxt->sax->warning = NULL;
15068 options -= XML_PARSE_NOWARNING;
15070 if (options & XML_PARSE_NOERROR) {
15071 ctxt->sax->error = NULL;
15072 ctxt->sax->fatalError = NULL;
15073 options -= XML_PARSE_NOERROR;
15075 #ifdef LIBXML_SAX1_ENABLED
15076 if (options & XML_PARSE_SAX1) {
15077 ctxt->sax->startElement = xmlSAX2StartElement;
15078 ctxt->sax->endElement = xmlSAX2EndElement;
15079 ctxt->sax->startElementNs = NULL;
15080 ctxt->sax->endElementNs = NULL;
15081 ctxt->sax->initialized = 1;
15082 options -= XML_PARSE_SAX1;
15083 ctxt->options |= XML_PARSE_SAX1;
15085 #endif /* LIBXML_SAX1_ENABLED */
15086 if (options & XML_PARSE_NODICT) {
15087 ctxt->dictNames = 0;
15088 options -= XML_PARSE_NODICT;
15089 ctxt->options |= XML_PARSE_NODICT;
15091 ctxt->dictNames = 1;
15093 if (options & XML_PARSE_NOCDATA) {
15094 ctxt->sax->cdataBlock = NULL;
15095 options -= XML_PARSE_NOCDATA;
15096 ctxt->options |= XML_PARSE_NOCDATA;
15098 if (options & XML_PARSE_NSCLEAN) {
15099 ctxt->options |= XML_PARSE_NSCLEAN;
15100 options -= XML_PARSE_NSCLEAN;
15102 if (options & XML_PARSE_NONET) {
15103 ctxt->options |= XML_PARSE_NONET;
15104 options -= XML_PARSE_NONET;
15106 if (options & XML_PARSE_COMPACT) {
15107 ctxt->options |= XML_PARSE_COMPACT;
15108 options -= XML_PARSE_COMPACT;
15110 if (options & XML_PARSE_OLD10) {
15111 ctxt->options |= XML_PARSE_OLD10;
15112 options -= XML_PARSE_OLD10;
15114 if (options & XML_PARSE_NOBASEFIX) {
15115 ctxt->options |= XML_PARSE_NOBASEFIX;
15116 options -= XML_PARSE_NOBASEFIX;
15118 if (options & XML_PARSE_HUGE) {
15119 ctxt->options |= XML_PARSE_HUGE;
15120 options -= XML_PARSE_HUGE;
15121 if (ctxt->dict != NULL)
15122 xmlDictSetLimit(ctxt->dict, 0);
15124 if (options & XML_PARSE_OLDSAX) {
15125 ctxt->options |= XML_PARSE_OLDSAX;
15126 options -= XML_PARSE_OLDSAX;
15128 if (options & XML_PARSE_IGNORE_ENC) {
15129 ctxt->options |= XML_PARSE_IGNORE_ENC;
15130 options -= XML_PARSE_IGNORE_ENC;
15132 if (options & XML_PARSE_BIG_LINES) {
15133 ctxt->options |= XML_PARSE_BIG_LINES;
15134 options -= XML_PARSE_BIG_LINES;
15136 ctxt->linenumbers = 1;
15141 * xmlCtxtUseOptions:
15142 * @ctxt: an XML parser context
15143 * @options: a combination of xmlParserOption
15145 * Applies the options to the parser context
15147 * Returns 0 in case of success, the set of unknown or unimplemented options
15148 * in case of error.
15151 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15153 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15158 * @ctxt: an XML parser context
15159 * @URL: the base URL to use for the document
15160 * @encoding: the document encoding, or NULL
15161 * @options: a combination of xmlParserOption
15162 * @reuse: keep the context for reuse
15164 * Common front-end for the xmlRead functions
15166 * Returns the resulting document tree or NULL
15169 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15170 int options, int reuse)
15174 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15175 if (encoding != NULL) {
15176 xmlCharEncodingHandlerPtr hdlr;
15178 hdlr = xmlFindCharEncodingHandler(encoding);
15180 xmlSwitchToEncoding(ctxt, hdlr);
15182 if ((URL != NULL) && (ctxt->input != NULL) &&
15183 (ctxt->input->filename == NULL))
15184 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15185 xmlParseDocument(ctxt);
15186 if ((ctxt->wellFormed) || ctxt->recovery)
15190 if (ctxt->myDoc != NULL) {
15191 xmlFreeDoc(ctxt->myDoc);
15194 ctxt->myDoc = NULL;
15196 xmlFreeParserCtxt(ctxt);
15204 * @cur: a pointer to a zero terminated string
15205 * @URL: the base URL to use for the document
15206 * @encoding: the document encoding, or NULL
15207 * @options: a combination of xmlParserOption
15209 * parse an XML in-memory document and build a tree.
15211 * Returns the resulting document tree
15214 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15216 xmlParserCtxtPtr ctxt;
15222 ctxt = xmlCreateDocParserCtxt(cur);
15225 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15230 * @filename: a file or URL
15231 * @encoding: the document encoding, or NULL
15232 * @options: a combination of xmlParserOption
15234 * parse an XML file from the filesystem or the network.
15236 * Returns the resulting document tree
15239 xmlReadFile(const char *filename, const char *encoding, int options)
15241 xmlParserCtxtPtr ctxt;
15244 ctxt = xmlCreateURLParserCtxt(filename, options);
15247 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15252 * @buffer: a pointer to a char array
15253 * @size: the size of the array
15254 * @URL: the base URL to use for the document
15255 * @encoding: the document encoding, or NULL
15256 * @options: a combination of xmlParserOption
15258 * parse an XML in-memory document and build a tree.
15260 * Returns the resulting document tree
15263 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15265 xmlParserCtxtPtr ctxt;
15268 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15271 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15276 * @fd: an open file descriptor
15277 * @URL: the base URL to use for the document
15278 * @encoding: the document encoding, or NULL
15279 * @options: a combination of xmlParserOption
15281 * parse an XML from a file descriptor and build a tree.
15282 * NOTE that the file descriptor will not be closed when the
15283 * reader is closed or reset.
15285 * Returns the resulting document tree
15288 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15290 xmlParserCtxtPtr ctxt;
15291 xmlParserInputBufferPtr input;
15292 xmlParserInputPtr stream;
15298 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15301 input->closecallback = NULL;
15302 ctxt = xmlNewParserCtxt();
15303 if (ctxt == NULL) {
15304 xmlFreeParserInputBuffer(input);
15307 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15308 if (stream == NULL) {
15309 xmlFreeParserInputBuffer(input);
15310 xmlFreeParserCtxt(ctxt);
15313 inputPush(ctxt, stream);
15314 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15319 * @ioread: an I/O read function
15320 * @ioclose: an I/O close function
15321 * @ioctx: an I/O handler
15322 * @URL: the base URL to use for the document
15323 * @encoding: the document encoding, or NULL
15324 * @options: a combination of xmlParserOption
15326 * parse an XML document from I/O functions and source and build a tree.
15328 * Returns the resulting document tree
15331 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15332 void *ioctx, const char *URL, const char *encoding, int options)
15334 xmlParserCtxtPtr ctxt;
15335 xmlParserInputBufferPtr input;
15336 xmlParserInputPtr stream;
15338 if (ioread == NULL)
15342 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15343 XML_CHAR_ENCODING_NONE);
15344 if (input == NULL) {
15345 if (ioclose != NULL)
15349 ctxt = xmlNewParserCtxt();
15350 if (ctxt == NULL) {
15351 xmlFreeParserInputBuffer(input);
15354 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15355 if (stream == NULL) {
15356 xmlFreeParserInputBuffer(input);
15357 xmlFreeParserCtxt(ctxt);
15360 inputPush(ctxt, stream);
15361 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15366 * @ctxt: an XML parser context
15367 * @cur: a pointer to a zero terminated string
15368 * @URL: the base URL to use for the document
15369 * @encoding: the document encoding, or NULL
15370 * @options: a combination of xmlParserOption
15372 * parse an XML in-memory document and build a tree.
15373 * This reuses the existing @ctxt parser context
15375 * Returns the resulting document tree
15378 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15379 const char *URL, const char *encoding, int options)
15381 xmlParserInputPtr stream;
15389 xmlCtxtReset(ctxt);
15391 stream = xmlNewStringInputStream(ctxt, cur);
15392 if (stream == NULL) {
15395 inputPush(ctxt, stream);
15396 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15401 * @ctxt: an XML parser context
15402 * @filename: a file or URL
15403 * @encoding: the document encoding, or NULL
15404 * @options: a combination of xmlParserOption
15406 * parse an XML file from the filesystem or the network.
15407 * This reuses the existing @ctxt parser context
15409 * Returns the resulting document tree
15412 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15413 const char *encoding, int options)
15415 xmlParserInputPtr stream;
15417 if (filename == NULL)
15423 xmlCtxtReset(ctxt);
15425 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15426 if (stream == NULL) {
15429 inputPush(ctxt, stream);
15430 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15434 * xmlCtxtReadMemory:
15435 * @ctxt: an XML parser context
15436 * @buffer: a pointer to a char array
15437 * @size: the size of the array
15438 * @URL: the base URL to use for the document
15439 * @encoding: the document encoding, or NULL
15440 * @options: a combination of xmlParserOption
15442 * parse an XML in-memory document and build a tree.
15443 * This reuses the existing @ctxt parser context
15445 * Returns the resulting document tree
15448 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15449 const char *URL, const char *encoding, int options)
15451 xmlParserInputBufferPtr input;
15452 xmlParserInputPtr stream;
15456 if (buffer == NULL)
15460 xmlCtxtReset(ctxt);
15462 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15463 if (input == NULL) {
15467 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15468 if (stream == NULL) {
15469 xmlFreeParserInputBuffer(input);
15473 inputPush(ctxt, stream);
15474 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15479 * @ctxt: an XML parser context
15480 * @fd: an open file descriptor
15481 * @URL: the base URL to use for the document
15482 * @encoding: the document encoding, or NULL
15483 * @options: a combination of xmlParserOption
15485 * parse an XML from a file descriptor and build a tree.
15486 * This reuses the existing @ctxt parser context
15487 * NOTE that the file descriptor will not be closed when the
15488 * reader is closed or reset.
15490 * Returns the resulting document tree
15493 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15494 const char *URL, const char *encoding, int options)
15496 xmlParserInputBufferPtr input;
15497 xmlParserInputPtr stream;
15505 xmlCtxtReset(ctxt);
15508 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15511 input->closecallback = NULL;
15512 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15513 if (stream == NULL) {
15514 xmlFreeParserInputBuffer(input);
15517 inputPush(ctxt, stream);
15518 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15523 * @ctxt: an XML parser context
15524 * @ioread: an I/O read function
15525 * @ioclose: an I/O close function
15526 * @ioctx: an I/O handler
15527 * @URL: the base URL to use for the document
15528 * @encoding: the document encoding, or NULL
15529 * @options: a combination of xmlParserOption
15531 * parse an XML document from I/O functions and source and build a tree.
15532 * This reuses the existing @ctxt parser context
15534 * Returns the resulting document tree
15537 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15538 xmlInputCloseCallback ioclose, void *ioctx,
15540 const char *encoding, int options)
15542 xmlParserInputBufferPtr input;
15543 xmlParserInputPtr stream;
15545 if (ioread == NULL)
15551 xmlCtxtReset(ctxt);
15553 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15554 XML_CHAR_ENCODING_NONE);
15555 if (input == NULL) {
15556 if (ioclose != NULL)
15560 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15561 if (stream == NULL) {
15562 xmlFreeParserInputBuffer(input);
15565 inputPush(ctxt, stream);
15566 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15569 #define bottom_parser
15570 #include "elfgcchack.h"