2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
28 * See Copyright for the status of this software.
33 /* To avoid EBCDIC trouble when parsing on zOS */
35 #pragma convert("ISO8859-1")
41 #if defined(WIN32) && !defined (__CYGWIN__)
42 #define XML_DIR_SEP '\\'
44 #define XML_DIR_SEP '/'
52 #include <libxml/xmlmemory.h>
53 #include <libxml/threads.h>
54 #include <libxml/globals.h>
55 #include <libxml/tree.h>
56 #include <libxml/parser.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #ifdef LIBXML_CATALOG_ENABLED
65 #include <libxml/catalog.h>
67 #ifdef LIBXML_SCHEMAS_ENABLED
68 #include <libxml/xmlschemastypes.h>
69 #include <libxml/relaxng.h>
77 #ifdef HAVE_SYS_STAT_H
97 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
99 static xmlParserCtxtPtr
100 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
101 const xmlChar *base, xmlParserCtxtPtr pctx);
103 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
105 /************************************************************************
107 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
109 ************************************************************************/
111 #define XML_PARSER_BIG_ENTITY 1000
112 #define XML_PARSER_LOT_ENTITY 5000
115 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
116 * replacement over the size in byte of the input indicates that you have
117 * and eponential behaviour. A value of 10 correspond to at least 3 entity
118 * replacement per byte of input.
120 #define XML_PARSER_NON_LINEAR 10
123 * xmlParserEntityCheck
125 * Function to check non-linear entity expansion behaviour
126 * This is here to detect and stop exponential linear entity expansion
127 * This is not a limitation of the parser but a safety
128 * boundary feature. It can be disabled with the XML_PARSE_HUGE
132 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
133 xmlEntityPtr ent, size_t replacement)
137 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
139 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
143 * This may look absurd but is needed to detect
146 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
147 (ent->content != NULL) && (ent->checked == 0) &&
148 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
149 unsigned long oldnbent = ctxt->nbentities;
155 rep = xmlStringDecodeEntities(ctxt, ent->content,
156 XML_SUBSTITUTE_REF, 0, 0, 0);
158 if (ctxt->errNo == XML_ERR_ENTITY_LOOP) {
162 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
164 if (xmlStrchr(rep, '<'))
170 if (replacement != 0) {
171 if (replacement < XML_MAX_TEXT_LENGTH)
175 * If the volume of entity copy reaches 10 times the
176 * amount of parsed data and over the large text threshold
177 * then that's very likely to be an abuse.
179 if (ctxt->input != NULL) {
180 consumed = ctxt->input->consumed +
181 (ctxt->input->cur - ctxt->input->base);
183 consumed += ctxt->sizeentities;
185 if (replacement < XML_PARSER_NON_LINEAR * consumed)
187 } else if (size != 0) {
189 * Do the check based on the replacement size of the entity
191 if (size < XML_PARSER_BIG_ENTITY)
195 * A limit on the amount of text data reasonably used
197 if (ctxt->input != NULL) {
198 consumed = ctxt->input->consumed +
199 (ctxt->input->cur - ctxt->input->base);
201 consumed += ctxt->sizeentities;
203 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
204 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
206 } else if (ent != NULL) {
208 * use the number of parsed entities in the replacement
210 size = ent->checked / 2;
213 * The amount of data parsed counting entities size only once
215 if (ctxt->input != NULL) {
216 consumed = ctxt->input->consumed +
217 (ctxt->input->cur - ctxt->input->base);
219 consumed += ctxt->sizeentities;
222 * Check the density of entities for the amount of data
223 * knowing an entity reference will take at least 3 bytes
225 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
229 * strange we got no data for checking
231 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
232 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
233 (ctxt->nbentities <= 10000))
236 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
243 * arbitrary depth limit for the XML documents that we allow to
244 * process. This is not a limitation of the parser but a safety
245 * boundary feature. It can be disabled with the XML_PARSE_HUGE
248 unsigned int xmlParserMaxDepth = 256;
253 #define XML_PARSER_BIG_BUFFER_SIZE 300
254 #define XML_PARSER_BUFFER_SIZE 100
255 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
258 * XML_PARSER_CHUNK_SIZE
260 * When calling GROW that's the minimal amount of data
261 * the parser expected to have received. It is not a hard
262 * limit but an optimization when reading strings like Names
263 * It is not strictly needed as long as inputs available characters
264 * are followed by 0, which should be provided by the I/O level
266 #define XML_PARSER_CHUNK_SIZE 100
269 * List of XML prefixed PI allowed by W3C specs
272 static const char *xmlW3CPIs[] = {
279 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
280 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
281 const xmlChar **str);
283 static xmlParserErrors
284 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
285 xmlSAXHandlerPtr sax,
286 void *user_data, int depth, const xmlChar *URL,
287 const xmlChar *ID, xmlNodePtr *list);
290 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
291 const char *encoding);
292 #ifdef LIBXML_LEGACY_ENABLED
294 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
295 xmlNodePtr lastNode);
296 #endif /* LIBXML_LEGACY_ENABLED */
298 static xmlParserErrors
299 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
300 const xmlChar *string, void *user_data, xmlNodePtr *lst);
303 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
305 /************************************************************************
307 * Some factorized error routines *
309 ************************************************************************/
312 * xmlErrAttributeDup:
313 * @ctxt: an XML parser context
314 * @prefix: the attribute prefix
315 * @localname: the attribute localname
317 * Handle a redefinition of attribute error
320 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
321 const xmlChar * localname)
323 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
324 (ctxt->instate == XML_PARSER_EOF))
327 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
330 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
331 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
332 (const char *) localname, NULL, NULL, 0, 0,
333 "Attribute %s redefined\n", localname);
335 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
336 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
337 (const char *) prefix, (const char *) localname,
338 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
341 ctxt->wellFormed = 0;
342 if (ctxt->recovery == 0)
343 ctxt->disableSAX = 1;
349 * @ctxt: an XML parser context
350 * @error: the error number
351 * @extra: extra information string
353 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
356 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
360 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
361 (ctxt->instate == XML_PARSER_EOF))
364 case XML_ERR_INVALID_HEX_CHARREF:
365 errmsg = "CharRef: invalid hexadecimal value";
367 case XML_ERR_INVALID_DEC_CHARREF:
368 errmsg = "CharRef: invalid decimal value";
370 case XML_ERR_INVALID_CHARREF:
371 errmsg = "CharRef: invalid value";
373 case XML_ERR_INTERNAL_ERROR:
374 errmsg = "internal error";
376 case XML_ERR_PEREF_AT_EOF:
377 errmsg = "PEReference at end of document";
379 case XML_ERR_PEREF_IN_PROLOG:
380 errmsg = "PEReference in prolog";
382 case XML_ERR_PEREF_IN_EPILOG:
383 errmsg = "PEReference in epilog";
385 case XML_ERR_PEREF_NO_NAME:
386 errmsg = "PEReference: no name";
388 case XML_ERR_PEREF_SEMICOL_MISSING:
389 errmsg = "PEReference: expecting ';'";
391 case XML_ERR_ENTITY_LOOP:
392 errmsg = "Detected an entity reference loop";
394 case XML_ERR_ENTITY_NOT_STARTED:
395 errmsg = "EntityValue: \" or ' expected";
397 case XML_ERR_ENTITY_PE_INTERNAL:
398 errmsg = "PEReferences forbidden in internal subset";
400 case XML_ERR_ENTITY_NOT_FINISHED:
401 errmsg = "EntityValue: \" or ' expected";
403 case XML_ERR_ATTRIBUTE_NOT_STARTED:
404 errmsg = "AttValue: \" or ' expected";
406 case XML_ERR_LT_IN_ATTRIBUTE:
407 errmsg = "Unescaped '<' not allowed in attributes values";
409 case XML_ERR_LITERAL_NOT_STARTED:
410 errmsg = "SystemLiteral \" or ' expected";
412 case XML_ERR_LITERAL_NOT_FINISHED:
413 errmsg = "Unfinished System or Public ID \" or ' expected";
415 case XML_ERR_MISPLACED_CDATA_END:
416 errmsg = "Sequence ']]>' not allowed in content";
418 case XML_ERR_URI_REQUIRED:
419 errmsg = "SYSTEM or PUBLIC, the URI is missing";
421 case XML_ERR_PUBID_REQUIRED:
422 errmsg = "PUBLIC, the Public Identifier is missing";
424 case XML_ERR_HYPHEN_IN_COMMENT:
425 errmsg = "Comment must not contain '--' (double-hyphen)";
427 case XML_ERR_PI_NOT_STARTED:
428 errmsg = "xmlParsePI : no target name";
430 case XML_ERR_RESERVED_XML_NAME:
431 errmsg = "Invalid PI name";
433 case XML_ERR_NOTATION_NOT_STARTED:
434 errmsg = "NOTATION: Name expected here";
436 case XML_ERR_NOTATION_NOT_FINISHED:
437 errmsg = "'>' required to close NOTATION declaration";
439 case XML_ERR_VALUE_REQUIRED:
440 errmsg = "Entity value required";
442 case XML_ERR_URI_FRAGMENT:
443 errmsg = "Fragment not allowed";
445 case XML_ERR_ATTLIST_NOT_STARTED:
446 errmsg = "'(' required to start ATTLIST enumeration";
448 case XML_ERR_NMTOKEN_REQUIRED:
449 errmsg = "NmToken expected in ATTLIST enumeration";
451 case XML_ERR_ATTLIST_NOT_FINISHED:
452 errmsg = "')' required to finish ATTLIST enumeration";
454 case XML_ERR_MIXED_NOT_STARTED:
455 errmsg = "MixedContentDecl : '|' or ')*' expected";
457 case XML_ERR_PCDATA_REQUIRED:
458 errmsg = "MixedContentDecl : '#PCDATA' expected";
460 case XML_ERR_ELEMCONTENT_NOT_STARTED:
461 errmsg = "ContentDecl : Name or '(' expected";
463 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
464 errmsg = "ContentDecl : ',' '|' or ')' expected";
466 case XML_ERR_PEREF_IN_INT_SUBSET:
468 "PEReference: forbidden within markup decl in internal subset";
470 case XML_ERR_GT_REQUIRED:
471 errmsg = "expected '>'";
473 case XML_ERR_CONDSEC_INVALID:
474 errmsg = "XML conditional section '[' expected";
476 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
477 errmsg = "Content error in the external subset";
479 case XML_ERR_CONDSEC_INVALID_KEYWORD:
481 "conditional section INCLUDE or IGNORE keyword expected";
483 case XML_ERR_CONDSEC_NOT_FINISHED:
484 errmsg = "XML conditional section not closed";
486 case XML_ERR_XMLDECL_NOT_STARTED:
487 errmsg = "Text declaration '<?xml' required";
489 case XML_ERR_XMLDECL_NOT_FINISHED:
490 errmsg = "parsing XML declaration: '?>' expected";
492 case XML_ERR_EXT_ENTITY_STANDALONE:
493 errmsg = "external parsed entities cannot be standalone";
495 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
496 errmsg = "EntityRef: expecting ';'";
498 case XML_ERR_DOCTYPE_NOT_FINISHED:
499 errmsg = "DOCTYPE improperly terminated";
501 case XML_ERR_LTSLASH_REQUIRED:
502 errmsg = "EndTag: '</' not found";
504 case XML_ERR_EQUAL_REQUIRED:
505 errmsg = "expected '='";
507 case XML_ERR_STRING_NOT_CLOSED:
508 errmsg = "String not closed expecting \" or '";
510 case XML_ERR_STRING_NOT_STARTED:
511 errmsg = "String not started expecting ' or \"";
513 case XML_ERR_ENCODING_NAME:
514 errmsg = "Invalid XML encoding name";
516 case XML_ERR_STANDALONE_VALUE:
517 errmsg = "standalone accepts only 'yes' or 'no'";
519 case XML_ERR_DOCUMENT_EMPTY:
520 errmsg = "Document is empty";
522 case XML_ERR_DOCUMENT_END:
523 errmsg = "Extra content at the end of the document";
525 case XML_ERR_NOT_WELL_BALANCED:
526 errmsg = "chunk is not well balanced";
528 case XML_ERR_EXTRA_CONTENT:
529 errmsg = "extra content at the end of well balanced chunk";
531 case XML_ERR_VERSION_MISSING:
532 errmsg = "Malformed declaration expecting version";
534 case XML_ERR_NAME_TOO_LONG:
535 errmsg = "Name too long use XML_PARSE_HUGE option";
543 errmsg = "Unregistered error message";
548 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
549 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
552 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
553 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
557 ctxt->wellFormed = 0;
558 if (ctxt->recovery == 0)
559 ctxt->disableSAX = 1;
565 * @ctxt: an XML parser context
566 * @error: the error number
567 * @msg: the error message
569 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
571 static void LIBXML_ATTR_FORMAT(3,0)
572 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
575 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
576 (ctxt->instate == XML_PARSER_EOF))
580 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
581 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
583 ctxt->wellFormed = 0;
584 if (ctxt->recovery == 0)
585 ctxt->disableSAX = 1;
591 * @ctxt: an XML parser context
592 * @error: the error number
593 * @msg: the error message
599 static void LIBXML_ATTR_FORMAT(3,0)
600 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601 const char *msg, const xmlChar *str1, const xmlChar *str2)
603 xmlStructuredErrorFunc schannel = NULL;
605 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
606 (ctxt->instate == XML_PARSER_EOF))
608 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
609 (ctxt->sax->initialized == XML_SAX2_MAGIC))
610 schannel = ctxt->sax->serror;
612 __xmlRaiseError(schannel,
613 (ctxt->sax) ? ctxt->sax->warning : NULL,
615 ctxt, NULL, XML_FROM_PARSER, error,
616 XML_ERR_WARNING, NULL, 0,
617 (const char *) str1, (const char *) str2, NULL, 0, 0,
618 msg, (const char *) str1, (const char *) str2);
620 __xmlRaiseError(schannel, NULL, NULL,
621 ctxt, NULL, XML_FROM_PARSER, error,
622 XML_ERR_WARNING, NULL, 0,
623 (const char *) str1, (const char *) str2, NULL, 0, 0,
624 msg, (const char *) str1, (const char *) str2);
630 * @ctxt: an XML parser context
631 * @error: the error number
632 * @msg: the error message
635 * Handle a validity error.
637 static void LIBXML_ATTR_FORMAT(3,0)
638 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
639 const char *msg, const xmlChar *str1, const xmlChar *str2)
641 xmlStructuredErrorFunc schannel = NULL;
643 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
644 (ctxt->instate == XML_PARSER_EOF))
648 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
649 schannel = ctxt->sax->serror;
652 __xmlRaiseError(schannel,
653 ctxt->vctxt.error, ctxt->vctxt.userData,
654 ctxt, NULL, XML_FROM_DTD, error,
655 XML_ERR_ERROR, NULL, 0, (const char *) str1,
656 (const char *) str2, NULL, 0, 0,
657 msg, (const char *) str1, (const char *) str2);
660 __xmlRaiseError(schannel, NULL, NULL,
661 ctxt, NULL, XML_FROM_DTD, error,
662 XML_ERR_ERROR, NULL, 0, (const char *) str1,
663 (const char *) str2, NULL, 0, 0,
664 msg, (const char *) str1, (const char *) str2);
670 * @ctxt: an XML parser context
671 * @error: the error number
672 * @msg: the error message
673 * @val: an integer value
675 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
677 static void LIBXML_ATTR_FORMAT(3,0)
678 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
679 const char *msg, int val)
681 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
682 (ctxt->instate == XML_PARSER_EOF))
686 __xmlRaiseError(NULL, NULL, NULL,
687 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
688 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
690 ctxt->wellFormed = 0;
691 if (ctxt->recovery == 0)
692 ctxt->disableSAX = 1;
697 * xmlFatalErrMsgStrIntStr:
698 * @ctxt: an XML parser context
699 * @error: the error number
700 * @msg: the error message
701 * @str1: an string info
702 * @val: an integer value
703 * @str2: an string info
705 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
707 static void LIBXML_ATTR_FORMAT(3,0)
708 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
709 const char *msg, const xmlChar *str1, int val,
712 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
713 (ctxt->instate == XML_PARSER_EOF))
717 __xmlRaiseError(NULL, NULL, NULL,
718 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
719 NULL, 0, (const char *) str1, (const char *) str2,
720 NULL, val, 0, msg, str1, val, str2);
722 ctxt->wellFormed = 0;
723 if (ctxt->recovery == 0)
724 ctxt->disableSAX = 1;
730 * @ctxt: an XML parser context
731 * @error: the error number
732 * @msg: the error message
733 * @val: a string value
735 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
737 static void LIBXML_ATTR_FORMAT(3,0)
738 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
739 const char *msg, const xmlChar * val)
741 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
742 (ctxt->instate == XML_PARSER_EOF))
746 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
747 XML_FROM_PARSER, error, XML_ERR_FATAL,
748 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
751 ctxt->wellFormed = 0;
752 if (ctxt->recovery == 0)
753 ctxt->disableSAX = 1;
759 * @ctxt: an XML parser context
760 * @error: the error number
761 * @msg: the error message
762 * @val: a string value
764 * Handle a non fatal parser error
766 static void LIBXML_ATTR_FORMAT(3,0)
767 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
768 const char *msg, const xmlChar * val)
770 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
771 (ctxt->instate == XML_PARSER_EOF))
775 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
776 XML_FROM_PARSER, error, XML_ERR_ERROR,
777 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
783 * @ctxt: an XML parser context
784 * @error: the error number
786 * @info1: extra information string
787 * @info2: extra information string
789 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
791 static void LIBXML_ATTR_FORMAT(3,0)
792 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
794 const xmlChar * info1, const xmlChar * info2,
795 const xmlChar * info3)
797 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
798 (ctxt->instate == XML_PARSER_EOF))
802 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
803 XML_ERR_ERROR, NULL, 0, (const char *) info1,
804 (const char *) info2, (const char *) info3, 0, 0, msg,
805 info1, info2, info3);
807 ctxt->nsWellFormed = 0;
812 * @ctxt: an XML parser context
813 * @error: the error number
815 * @info1: extra information string
816 * @info2: extra information string
818 * Handle a namespace warning error
820 static void LIBXML_ATTR_FORMAT(3,0)
821 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
823 const xmlChar * info1, const xmlChar * info2,
824 const xmlChar * info3)
826 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
827 (ctxt->instate == XML_PARSER_EOF))
829 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
830 XML_ERR_WARNING, NULL, 0, (const char *) info1,
831 (const char *) info2, (const char *) info3, 0, 0, msg,
832 info1, info2, info3);
835 /************************************************************************
837 * Library wide options *
839 ************************************************************************/
843 * @feature: the feature to be examined
845 * Examines if the library has been compiled with a given feature.
847 * Returns a non-zero value if the feature exist, otherwise zero.
848 * Returns zero (0) if the feature does not exist or an unknown
849 * unknown feature is requested, non-zero otherwise.
852 xmlHasFeature(xmlFeature feature)
855 case XML_WITH_THREAD:
856 #ifdef LIBXML_THREAD_ENABLED
862 #ifdef LIBXML_TREE_ENABLED
867 case XML_WITH_OUTPUT:
868 #ifdef LIBXML_OUTPUT_ENABLED
874 #ifdef LIBXML_PUSH_ENABLED
879 case XML_WITH_READER:
880 #ifdef LIBXML_READER_ENABLED
885 case XML_WITH_PATTERN:
886 #ifdef LIBXML_PATTERN_ENABLED
891 case XML_WITH_WRITER:
892 #ifdef LIBXML_WRITER_ENABLED
898 #ifdef LIBXML_SAX1_ENABLED
904 #ifdef LIBXML_FTP_ENABLED
910 #ifdef LIBXML_HTTP_ENABLED
916 #ifdef LIBXML_VALID_ENABLED
922 #ifdef LIBXML_HTML_ENABLED
927 case XML_WITH_LEGACY:
928 #ifdef LIBXML_LEGACY_ENABLED
934 #ifdef LIBXML_C14N_ENABLED
939 case XML_WITH_CATALOG:
940 #ifdef LIBXML_CATALOG_ENABLED
946 #ifdef LIBXML_XPATH_ENABLED
952 #ifdef LIBXML_XPTR_ENABLED
957 case XML_WITH_XINCLUDE:
958 #ifdef LIBXML_XINCLUDE_ENABLED
964 #ifdef LIBXML_ICONV_ENABLED
969 case XML_WITH_ISO8859X:
970 #ifdef LIBXML_ISO8859X_ENABLED
975 case XML_WITH_UNICODE:
976 #ifdef LIBXML_UNICODE_ENABLED
981 case XML_WITH_REGEXP:
982 #ifdef LIBXML_REGEXP_ENABLED
987 case XML_WITH_AUTOMATA:
988 #ifdef LIBXML_AUTOMATA_ENABLED
994 #ifdef LIBXML_EXPR_ENABLED
999 case XML_WITH_SCHEMAS:
1000 #ifdef LIBXML_SCHEMAS_ENABLED
1005 case XML_WITH_SCHEMATRON:
1006 #ifdef LIBXML_SCHEMATRON_ENABLED
1011 case XML_WITH_MODULES:
1012 #ifdef LIBXML_MODULES_ENABLED
1017 case XML_WITH_DEBUG:
1018 #ifdef LIBXML_DEBUG_ENABLED
1023 case XML_WITH_DEBUG_MEM:
1024 #ifdef DEBUG_MEMORY_LOCATION
1029 case XML_WITH_DEBUG_RUN:
1030 #ifdef LIBXML_DEBUG_RUNTIME
1036 #ifdef LIBXML_ZLIB_ENABLED
1042 #ifdef LIBXML_LZMA_ENABLED
1048 #ifdef LIBXML_ICU_ENABLED
1059 /************************************************************************
1061 * SAX2 defaulted attributes handling *
1063 ************************************************************************/
1067 * @ctxt: an XML parser context
1069 * Do the SAX2 detection and specific intialization
1072 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1073 if (ctxt == NULL) return;
1074 #ifdef LIBXML_SAX1_ENABLED
1075 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1076 ((ctxt->sax->startElementNs != NULL) ||
1077 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1080 #endif /* LIBXML_SAX1_ENABLED */
1082 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1083 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1084 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1085 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1086 (ctxt->str_xml_ns == NULL)) {
1087 xmlErrMemory(ctxt, NULL);
1091 typedef struct _xmlDefAttrs xmlDefAttrs;
1092 typedef xmlDefAttrs *xmlDefAttrsPtr;
1093 struct _xmlDefAttrs {
1094 int nbAttrs; /* number of defaulted attributes on that element */
1095 int maxAttrs; /* the size of the array */
1096 #if __STDC_VERSION__ >= 199901L
1097 /* Using a C99 flexible array member avoids UBSan errors. */
1098 const xmlChar *values[]; /* array of localname/prefix/values/external */
1100 const xmlChar *values[5];
1105 * xmlAttrNormalizeSpace:
1106 * @src: the source string
1107 * @dst: the target string
1109 * Normalize the space in non CDATA attribute values:
1110 * If the attribute type is not CDATA, then the XML processor MUST further
1111 * process the normalized attribute value by discarding any leading and
1112 * trailing space (#x20) characters, and by replacing sequences of space
1113 * (#x20) characters by a single space (#x20) character.
1114 * Note that the size of dst need to be at least src, and if one doesn't need
1115 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1116 * passing src as dst is just fine.
1118 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1122 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1124 if ((src == NULL) || (dst == NULL))
1127 while (*src == 0x20) src++;
1130 while (*src == 0x20) src++;
1144 * xmlAttrNormalizeSpace2:
1145 * @src: the source string
1147 * Normalize the space in non CDATA attribute values, a slightly more complex
1148 * front end to avoid allocation problems when running on attribute values
1149 * coming from the input.
1151 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1154 static const xmlChar *
1155 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1158 int remove_head = 0;
1159 int need_realloc = 0;
1162 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1169 while (*cur == 0x20) {
1176 if ((*cur == 0x20) || (*cur == 0)) {
1186 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1188 xmlErrMemory(ctxt, NULL);
1191 xmlAttrNormalizeSpace(ret, ret);
1192 *len = (int) strlen((const char *)ret);
1194 } else if (remove_head) {
1195 *len -= remove_head;
1196 memmove(src, src + remove_head, 1 + *len);
1204 * @ctxt: an XML parser context
1205 * @fullname: the element fullname
1206 * @fullattr: the attribute fullname
1207 * @value: the attribute value
1209 * Add a defaulted attribute for an element
1212 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1213 const xmlChar *fullname,
1214 const xmlChar *fullattr,
1215 const xmlChar *value) {
1216 xmlDefAttrsPtr defaults;
1218 const xmlChar *name;
1219 const xmlChar *prefix;
1222 * Allows to detect attribute redefinitions
1224 if (ctxt->attsSpecial != NULL) {
1225 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1229 if (ctxt->attsDefault == NULL) {
1230 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1231 if (ctxt->attsDefault == NULL)
1236 * split the element name into prefix:localname , the string found
1237 * are within the DTD and then not associated to namespace names.
1239 name = xmlSplitQName3(fullname, &len);
1241 name = xmlDictLookup(ctxt->dict, fullname, -1);
1244 name = xmlDictLookup(ctxt->dict, name, -1);
1245 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1249 * make sure there is some storage
1251 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1252 if (defaults == NULL) {
1253 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1254 (4 * 5) * sizeof(const xmlChar *));
1255 if (defaults == NULL)
1257 defaults->nbAttrs = 0;
1258 defaults->maxAttrs = 4;
1259 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1260 defaults, NULL) < 0) {
1264 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1265 xmlDefAttrsPtr temp;
1267 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1268 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1272 defaults->maxAttrs *= 2;
1273 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1274 defaults, NULL) < 0) {
1281 * Split the element name into prefix:localname , the string found
1282 * are within the DTD and hen not associated to namespace names.
1284 name = xmlSplitQName3(fullattr, &len);
1286 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1289 name = xmlDictLookup(ctxt->dict, name, -1);
1290 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1293 defaults->values[5 * defaults->nbAttrs] = name;
1294 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1295 /* intern the string and precompute the end */
1296 len = xmlStrlen(value);
1297 value = xmlDictLookup(ctxt->dict, value, len);
1298 defaults->values[5 * defaults->nbAttrs + 2] = value;
1299 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1301 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1303 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1304 defaults->nbAttrs++;
1309 xmlErrMemory(ctxt, NULL);
1314 * xmlAddSpecialAttr:
1315 * @ctxt: an XML parser context
1316 * @fullname: the element fullname
1317 * @fullattr: the attribute fullname
1318 * @type: the attribute type
1320 * Register this attribute type
1323 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1324 const xmlChar *fullname,
1325 const xmlChar *fullattr,
1328 if (ctxt->attsSpecial == NULL) {
1329 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1330 if (ctxt->attsSpecial == NULL)
1334 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1337 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1338 (void *) (long) type);
1342 xmlErrMemory(ctxt, NULL);
1347 * xmlCleanSpecialAttrCallback:
1349 * Removes CDATA attributes from the special attribute table
1352 xmlCleanSpecialAttrCallback(void *payload, void *data,
1353 const xmlChar *fullname, const xmlChar *fullattr,
1354 const xmlChar *unused ATTRIBUTE_UNUSED) {
1355 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1357 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1358 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1363 * xmlCleanSpecialAttr:
1364 * @ctxt: an XML parser context
1366 * Trim the list of attributes defined to remove all those of type
1367 * CDATA as they are not special. This call should be done when finishing
1368 * to parse the DTD and before starting to parse the document root.
1371 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1373 if (ctxt->attsSpecial == NULL)
1376 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1378 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1379 xmlHashFree(ctxt->attsSpecial, NULL);
1380 ctxt->attsSpecial = NULL;
1386 * xmlCheckLanguageID:
1387 * @lang: pointer to the string value
1389 * Checks that the value conforms to the LanguageID production:
1391 * NOTE: this is somewhat deprecated, those productions were removed from
1392 * the XML Second edition.
1394 * [33] LanguageID ::= Langcode ('-' Subcode)*
1395 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1396 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1397 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1398 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1399 * [38] Subcode ::= ([a-z] | [A-Z])+
1401 * The current REC reference the sucessors of RFC 1766, currently 5646
1403 * http://www.rfc-editor.org/rfc/rfc5646.txt
1404 * langtag = language
1410 * language = 2*3ALPHA ; shortest ISO 639 code
1411 * ["-" extlang] ; sometimes followed by
1412 * ; extended language subtags
1413 * / 4ALPHA ; or reserved for future use
1414 * / 5*8ALPHA ; or registered language subtag
1416 * extlang = 3ALPHA ; selected ISO 639 codes
1417 * *2("-" 3ALPHA) ; permanently reserved
1419 * script = 4ALPHA ; ISO 15924 code
1421 * region = 2ALPHA ; ISO 3166-1 code
1422 * / 3DIGIT ; UN M.49 code
1424 * variant = 5*8alphanum ; registered variants
1425 * / (DIGIT 3alphanum)
1427 * extension = singleton 1*("-" (2*8alphanum))
1429 * ; Single alphanumerics
1430 * ; "x" reserved for private use
1431 * singleton = DIGIT ; 0 - 9
1437 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1438 * The parser below doesn't try to cope with extension or privateuse
1439 * that could be added but that's not interoperable anyway
1441 * Returns 1 if correct 0 otherwise
1444 xmlCheckLanguageID(const xmlChar * lang)
1446 const xmlChar *cur = lang, *nxt;
1450 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1451 ((cur[0] == 'I') && (cur[1] == '-')) ||
1452 ((cur[0] == 'x') && (cur[1] == '-')) ||
1453 ((cur[0] == 'X') && (cur[1] == '-'))) {
1455 * Still allow IANA code and user code which were coming
1456 * from the previous version of the XML-1.0 specification
1457 * it's deprecated but we should not fail
1460 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1461 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1463 return(cur[0] == 0);
1466 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1467 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1469 if (nxt - cur >= 4) {
1473 if ((nxt - cur > 8) || (nxt[0] != 0))
1479 /* we got an ISO 639 code */
1487 /* now we can have extlang or script or region or variant */
1488 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1491 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1492 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1498 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1502 /* we parsed an extlang */
1510 /* now we can have script or region or variant */
1511 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1514 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1515 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1523 /* we parsed a script */
1532 /* now we can have region or variant */
1533 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1536 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1537 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1540 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1544 /* we parsed a region */
1553 /* now we can just have a variant */
1554 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1555 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1558 if ((nxt - cur < 5) || (nxt - cur > 8))
1561 /* we parsed a variant */
1567 /* extensions and private use subtags not checked */
1571 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1572 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1579 /************************************************************************
1581 * Parser stacks related functions and macros *
1583 ************************************************************************/
1585 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1586 const xmlChar ** str);
1591 * @ctxt: an XML parser context
1592 * @prefix: the namespace prefix or NULL
1593 * @URL: the namespace name
1595 * Pushes a new parser namespace on top of the ns stack
1597 * Returns -1 in case of error, -2 if the namespace should be discarded
1598 * and the index in the stack otherwise.
1601 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1603 if (ctxt->options & XML_PARSE_NSCLEAN) {
1605 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1606 if (ctxt->nsTab[i] == prefix) {
1608 if (ctxt->nsTab[i + 1] == URL)
1610 /* out of scope keep it */
1615 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1618 ctxt->nsTab = (const xmlChar **)
1619 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1620 if (ctxt->nsTab == NULL) {
1621 xmlErrMemory(ctxt, NULL);
1625 } else if (ctxt->nsNr >= ctxt->nsMax) {
1626 const xmlChar ** tmp;
1628 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1629 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1631 xmlErrMemory(ctxt, NULL);
1637 ctxt->nsTab[ctxt->nsNr++] = prefix;
1638 ctxt->nsTab[ctxt->nsNr++] = URL;
1639 return (ctxt->nsNr);
1643 * @ctxt: an XML parser context
1644 * @nr: the number to pop
1646 * Pops the top @nr parser prefix/namespace from the ns stack
1648 * Returns the number of namespaces removed
1651 nsPop(xmlParserCtxtPtr ctxt, int nr)
1655 if (ctxt->nsTab == NULL) return(0);
1656 if (ctxt->nsNr < nr) {
1657 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1660 if (ctxt->nsNr <= 0)
1663 for (i = 0;i < nr;i++) {
1665 ctxt->nsTab[ctxt->nsNr] = NULL;
1672 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1673 const xmlChar **atts;
1677 if (ctxt->atts == NULL) {
1678 maxatts = 55; /* allow for 10 attrs by default */
1679 atts = (const xmlChar **)
1680 xmlMalloc(maxatts * sizeof(xmlChar *));
1681 if (atts == NULL) goto mem_error;
1683 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1684 if (attallocs == NULL) goto mem_error;
1685 ctxt->attallocs = attallocs;
1686 ctxt->maxatts = maxatts;
1687 } else if (nr + 5 > ctxt->maxatts) {
1688 maxatts = (nr + 5) * 2;
1689 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1690 maxatts * sizeof(const xmlChar *));
1691 if (atts == NULL) goto mem_error;
1693 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1694 (maxatts / 5) * sizeof(int));
1695 if (attallocs == NULL) goto mem_error;
1696 ctxt->attallocs = attallocs;
1697 ctxt->maxatts = maxatts;
1699 return(ctxt->maxatts);
1701 xmlErrMemory(ctxt, NULL);
1707 * @ctxt: an XML parser context
1708 * @value: the parser input
1710 * Pushes a new parser input on top of the input stack
1712 * Returns -1 in case of error, the index in the stack otherwise
1715 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1717 if ((ctxt == NULL) || (value == NULL))
1719 if (ctxt->inputNr >= ctxt->inputMax) {
1720 ctxt->inputMax *= 2;
1722 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1724 sizeof(ctxt->inputTab[0]));
1725 if (ctxt->inputTab == NULL) {
1726 xmlErrMemory(ctxt, NULL);
1727 xmlFreeInputStream(value);
1728 ctxt->inputMax /= 2;
1733 ctxt->inputTab[ctxt->inputNr] = value;
1734 ctxt->input = value;
1735 return (ctxt->inputNr++);
1739 * @ctxt: an XML parser context
1741 * Pops the top parser input from the input stack
1743 * Returns the input just removed
1746 inputPop(xmlParserCtxtPtr ctxt)
1748 xmlParserInputPtr ret;
1752 if (ctxt->inputNr <= 0)
1755 if (ctxt->inputNr > 0)
1756 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1759 ret = ctxt->inputTab[ctxt->inputNr];
1760 ctxt->inputTab[ctxt->inputNr] = NULL;
1765 * @ctxt: an XML parser context
1766 * @value: the element node
1768 * Pushes a new element node on top of the node stack
1770 * Returns -1 in case of error, the index in the stack otherwise
1773 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1775 if (ctxt == NULL) return(0);
1776 if (ctxt->nodeNr >= ctxt->nodeMax) {
1779 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1781 sizeof(ctxt->nodeTab[0]));
1783 xmlErrMemory(ctxt, NULL);
1786 ctxt->nodeTab = tmp;
1789 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1790 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1791 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1792 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1794 xmlHaltParser(ctxt);
1797 ctxt->nodeTab[ctxt->nodeNr] = value;
1799 return (ctxt->nodeNr++);
1804 * @ctxt: an XML parser context
1806 * Pops the top element node from the node stack
1808 * Returns the node just removed
1811 nodePop(xmlParserCtxtPtr ctxt)
1815 if (ctxt == NULL) return(NULL);
1816 if (ctxt->nodeNr <= 0)
1819 if (ctxt->nodeNr > 0)
1820 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1823 ret = ctxt->nodeTab[ctxt->nodeNr];
1824 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1828 #ifdef LIBXML_PUSH_ENABLED
1831 * @ctxt: an XML parser context
1832 * @value: the element name
1833 * @prefix: the element prefix
1834 * @URI: the element namespace name
1836 * Pushes a new element name/prefix/URL on top of the name stack
1838 * Returns -1 in case of error, the index in the stack otherwise
1841 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1842 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1844 if (ctxt->nameNr >= ctxt->nameMax) {
1845 const xmlChar * *tmp;
1848 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1850 sizeof(ctxt->nameTab[0]));
1855 ctxt->nameTab = tmp;
1856 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1858 sizeof(ctxt->pushTab[0]));
1863 ctxt->pushTab = tmp2;
1865 ctxt->nameTab[ctxt->nameNr] = value;
1867 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1868 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1869 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1870 return (ctxt->nameNr++);
1872 xmlErrMemory(ctxt, NULL);
1877 * @ctxt: an XML parser context
1879 * Pops the top element/prefix/URI name from the name stack
1881 * Returns the name just removed
1883 static const xmlChar *
1884 nameNsPop(xmlParserCtxtPtr ctxt)
1888 if (ctxt->nameNr <= 0)
1891 if (ctxt->nameNr > 0)
1892 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1895 ret = ctxt->nameTab[ctxt->nameNr];
1896 ctxt->nameTab[ctxt->nameNr] = NULL;
1899 #endif /* LIBXML_PUSH_ENABLED */
1903 * @ctxt: an XML parser context
1904 * @value: the element name
1906 * Pushes a new element name on top of the name stack
1908 * Returns -1 in case of error, the index in the stack otherwise
1911 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1913 if (ctxt == NULL) return (-1);
1915 if (ctxt->nameNr >= ctxt->nameMax) {
1916 const xmlChar * *tmp;
1917 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1919 sizeof(ctxt->nameTab[0]));
1923 ctxt->nameTab = tmp;
1926 ctxt->nameTab[ctxt->nameNr] = value;
1928 return (ctxt->nameNr++);
1930 xmlErrMemory(ctxt, NULL);
1935 * @ctxt: an XML parser context
1937 * Pops the top element name from the name stack
1939 * Returns the name just removed
1942 namePop(xmlParserCtxtPtr ctxt)
1946 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1949 if (ctxt->nameNr > 0)
1950 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1953 ret = ctxt->nameTab[ctxt->nameNr];
1954 ctxt->nameTab[ctxt->nameNr] = NULL;
1958 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1959 if (ctxt->spaceNr >= ctxt->spaceMax) {
1962 ctxt->spaceMax *= 2;
1963 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1964 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1966 xmlErrMemory(ctxt, NULL);
1970 ctxt->spaceTab = tmp;
1972 ctxt->spaceTab[ctxt->spaceNr] = val;
1973 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1974 return(ctxt->spaceNr++);
1977 static int spacePop(xmlParserCtxtPtr ctxt) {
1979 if (ctxt->spaceNr <= 0) return(0);
1981 if (ctxt->spaceNr > 0)
1982 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1984 ctxt->space = &ctxt->spaceTab[0];
1985 ret = ctxt->spaceTab[ctxt->spaceNr];
1986 ctxt->spaceTab[ctxt->spaceNr] = -1;
1991 * Macros for accessing the content. Those should be used only by the parser,
1994 * Dirty macros, i.e. one often need to make assumption on the context to
1997 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1998 * To be used with extreme caution since operations consuming
1999 * characters may move the input buffer to a different location !
2000 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2001 * This should be used internally by the parser
2002 * only to compare to ASCII values otherwise it would break when
2003 * running with UTF-8 encoding.
2004 * RAW same as CUR but in the input buffer, bypass any token
2005 * extraction that may have been done
2006 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2007 * to compare on ASCII based substring.
2008 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2009 * strings without newlines within the parser.
2010 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2011 * defined char within the parser.
2012 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2014 * NEXT Skip to the next character, this does the proper decoding
2015 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2016 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2017 * CUR_CHAR(l) returns the current unicode character (int), set l
2018 * to the number of xmlChars used for the encoding [0-5].
2019 * CUR_SCHAR same but operate on a string instead of the context
2020 * COPY_BUF copy the current unicode char to the target buffer, increment
2022 * GROW, SHRINK handling of input buffers
2025 #define RAW (*ctxt->input->cur)
2026 #define CUR (*ctxt->input->cur)
2027 #define NXT(val) ctxt->input->cur[(val)]
2028 #define CUR_PTR ctxt->input->cur
2029 #define BASE_PTR ctxt->input->base
2031 #define CMP4( s, c1, c2, c3, c4 ) \
2032 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2033 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2034 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2035 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2036 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2037 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2038 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2039 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2040 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2041 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2042 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2043 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2044 ((unsigned char *) s)[ 8 ] == c9 )
2045 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2046 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2047 ((unsigned char *) s)[ 9 ] == c10 )
2049 #define SKIP(val) do { \
2050 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2051 if (*ctxt->input->cur == 0) \
2052 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2055 #define SKIPL(val) do { \
2057 for(skipl=0; skipl<val; skipl++) { \
2058 if (*(ctxt->input->cur) == '\n') { \
2059 ctxt->input->line++; ctxt->input->col = 1; \
2060 } else ctxt->input->col++; \
2062 ctxt->input->cur++; \
2064 if (*ctxt->input->cur == 0) \
2065 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2068 #define SHRINK if ((ctxt->progressive == 0) && \
2069 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2070 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2073 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2074 xmlParserInputShrink(ctxt->input);
2075 if (*ctxt->input->cur == 0)
2076 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2079 #define GROW if ((ctxt->progressive == 0) && \
2080 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2083 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2084 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2085 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2087 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2088 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2089 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2090 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2091 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2092 xmlHaltParser(ctxt);
2095 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2096 if ((ctxt->input->cur > ctxt->input->end) ||
2097 (ctxt->input->cur < ctxt->input->base)) {
2098 xmlHaltParser(ctxt);
2099 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2102 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2103 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2106 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2108 #define NEXT xmlNextChar(ctxt)
2111 ctxt->input->col++; \
2112 ctxt->input->cur++; \
2114 if (*ctxt->input->cur == 0) \
2115 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2118 #define NEXTL(l) do { \
2119 if (*(ctxt->input->cur) == '\n') { \
2120 ctxt->input->line++; ctxt->input->col = 1; \
2121 } else ctxt->input->col++; \
2122 ctxt->input->cur += l; \
2125 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2126 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2128 #define COPY_BUF(l,b,i,v) \
2129 if (l == 1) b[i++] = (xmlChar) v; \
2130 else i += xmlCopyCharMultiByte(&b[i],v)
2133 * xmlSkipBlankChars:
2134 * @ctxt: the XML parser context
2136 * skip all blanks character found at that point in the input streams.
2137 * It pops up finished entities in the process if allowable at that point.
2139 * Returns the number of space chars skipped
2143 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2147 * It's Okay to use CUR/NEXT here since all the blanks are on
2150 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2153 * if we are in the document content, go really fast
2155 cur = ctxt->input->cur;
2156 while (IS_BLANK_CH(*cur)) {
2158 ctxt->input->line++; ctxt->input->col = 1;
2165 ctxt->input->cur = cur;
2166 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2167 cur = ctxt->input->cur;
2170 ctxt->input->cur = cur;
2172 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2175 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2177 } else if (CUR == '%') {
2179 * Need to handle support of entities branching here
2181 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2183 xmlParsePEReference(ctxt);
2184 } else if (CUR == 0) {
2185 if (ctxt->inputNr <= 1)
2193 * Also increase the counter when entering or exiting a PERef.
2194 * The spec says: "When a parameter-entity reference is recognized
2195 * in the DTD and included, its replacement text MUST be enlarged
2196 * by the attachment of one leading and one following space (#x20)
2205 /************************************************************************
2207 * Commodity functions to handle entities *
2209 ************************************************************************/
2213 * @ctxt: an XML parser context
2215 * xmlPopInput: the current input pointed by ctxt->input came to an end
2216 * pop it and return the next char.
2218 * Returns the current xmlChar in the parser context
2221 xmlPopInput(xmlParserCtxtPtr ctxt) {
2222 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2223 if (xmlParserDebugEntities)
2224 xmlGenericError(xmlGenericErrorContext,
2225 "Popping input %d\n", ctxt->inputNr);
2226 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2227 (ctxt->instate != XML_PARSER_EOF))
2228 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2229 "Unfinished entity outside the DTD");
2230 xmlFreeInputStream(inputPop(ctxt));
2231 if (*ctxt->input->cur == 0)
2232 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2238 * @ctxt: an XML parser context
2239 * @input: an XML parser input fragment (entity, XML fragment ...).
2241 * xmlPushInput: switch to a new input stream which is stacked on top
2242 * of the previous one(s).
2243 * Returns -1 in case of error or the index in the input stack
2246 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2248 if (input == NULL) return(-1);
2250 if (xmlParserDebugEntities) {
2251 if ((ctxt->input != NULL) && (ctxt->input->filename))
2252 xmlGenericError(xmlGenericErrorContext,
2253 "%s(%d): ", ctxt->input->filename,
2255 xmlGenericError(xmlGenericErrorContext,
2256 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2258 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2259 (ctxt->inputNr > 1024)) {
2260 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2261 while (ctxt->inputNr > 1)
2262 xmlFreeInputStream(inputPop(ctxt));
2265 ret = inputPush(ctxt, input);
2266 if (ctxt->instate == XML_PARSER_EOF)
2274 * @ctxt: an XML parser context
2276 * parse Reference declarations
2278 * [66] CharRef ::= '&#' [0-9]+ ';' |
2279 * '&#x' [0-9a-fA-F]+ ';'
2281 * [ WFC: Legal Character ]
2282 * Characters referred to using character references must match the
2283 * production for Char.
2285 * Returns the value parsed (as an int), 0 in case of error
2288 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2289 unsigned int val = 0;
2291 unsigned int outofrange = 0;
2294 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2296 if ((RAW == '&') && (NXT(1) == '#') &&
2300 while (RAW != ';') { /* loop blocked by count */
2304 if (ctxt->instate == XML_PARSER_EOF)
2307 if ((RAW >= '0') && (RAW <= '9'))
2308 val = val * 16 + (CUR - '0');
2309 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2310 val = val * 16 + (CUR - 'a') + 10;
2311 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2312 val = val * 16 + (CUR - 'A') + 10;
2314 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2325 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2330 } else if ((RAW == '&') && (NXT(1) == '#')) {
2333 while (RAW != ';') { /* loop blocked by count */
2337 if (ctxt->instate == XML_PARSER_EOF)
2340 if ((RAW >= '0') && (RAW <= '9'))
2341 val = val * 10 + (CUR - '0');
2343 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2354 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2360 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2364 * [ WFC: Legal Character ]
2365 * Characters referred to using character references must match the
2366 * production for Char.
2368 if ((IS_CHAR(val) && (outofrange == 0))) {
2371 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2372 "xmlParseCharRef: invalid xmlChar value %d\n",
2379 * xmlParseStringCharRef:
2380 * @ctxt: an XML parser context
2381 * @str: a pointer to an index in the string
2383 * parse Reference declarations, variant parsing from a string rather
2384 * than an an input flow.
2386 * [66] CharRef ::= '&#' [0-9]+ ';' |
2387 * '&#x' [0-9a-fA-F]+ ';'
2389 * [ WFC: Legal Character ]
2390 * Characters referred to using character references must match the
2391 * production for Char.
2393 * Returns the value parsed (as an int), 0 in case of error, str will be
2394 * updated to the current value of the index
2397 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2400 unsigned int val = 0;
2401 unsigned int outofrange = 0;
2403 if ((str == NULL) || (*str == NULL)) return(0);
2406 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2409 while (cur != ';') { /* Non input consuming loop */
2410 if ((cur >= '0') && (cur <= '9'))
2411 val = val * 16 + (cur - '0');
2412 else if ((cur >= 'a') && (cur <= 'f'))
2413 val = val * 16 + (cur - 'a') + 10;
2414 else if ((cur >= 'A') && (cur <= 'F'))
2415 val = val * 16 + (cur - 'A') + 10;
2417 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2429 } else if ((cur == '&') && (ptr[1] == '#')){
2432 while (cur != ';') { /* Non input consuming loops */
2433 if ((cur >= '0') && (cur <= '9'))
2434 val = val * 10 + (cur - '0');
2436 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2449 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2455 * [ WFC: Legal Character ]
2456 * Characters referred to using character references must match the
2457 * production for Char.
2459 if ((IS_CHAR(val) && (outofrange == 0))) {
2462 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2463 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2470 * xmlParserHandlePEReference:
2471 * @ctxt: the parser context
2473 * [69] PEReference ::= '%' Name ';'
2475 * [ WFC: No Recursion ]
2476 * A parsed entity must not contain a recursive
2477 * reference to itself, either directly or indirectly.
2479 * [ WFC: Entity Declared ]
2480 * In a document without any DTD, a document with only an internal DTD
2481 * subset which contains no parameter entity references, or a document
2482 * with "standalone='yes'", ... ... The declaration of a parameter
2483 * entity must precede any reference to it...
2485 * [ VC: Entity Declared ]
2486 * In a document with an external subset or external parameter entities
2487 * with "standalone='no'", ... ... The declaration of a parameter entity
2488 * must precede any reference to it...
2491 * Parameter-entity references may only appear in the DTD.
2492 * NOTE: misleading but this is handled.
2494 * A PEReference may have been detected in the current input stream
2495 * the handling is done accordingly to
2496 * http://www.w3.org/TR/REC-xml#entproc
2498 * - Included in literal in entity values
2499 * - Included as Parameter Entity reference within DTDs
2502 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2503 switch(ctxt->instate) {
2504 case XML_PARSER_CDATA_SECTION:
2506 case XML_PARSER_COMMENT:
2508 case XML_PARSER_START_TAG:
2510 case XML_PARSER_END_TAG:
2512 case XML_PARSER_EOF:
2513 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2515 case XML_PARSER_PROLOG:
2516 case XML_PARSER_START:
2517 case XML_PARSER_MISC:
2518 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2520 case XML_PARSER_ENTITY_DECL:
2521 case XML_PARSER_CONTENT:
2522 case XML_PARSER_ATTRIBUTE_VALUE:
2524 case XML_PARSER_SYSTEM_LITERAL:
2525 case XML_PARSER_PUBLIC_LITERAL:
2526 /* we just ignore it there */
2528 case XML_PARSER_EPILOG:
2529 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2531 case XML_PARSER_ENTITY_VALUE:
2533 * NOTE: in the case of entity values, we don't do the
2534 * substitution here since we need the literal
2535 * entity value to be able to save the internal
2536 * subset of the document.
2537 * This will be handled by xmlStringDecodeEntities
2540 case XML_PARSER_DTD:
2542 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2543 * In the internal DTD subset, parameter-entity references
2544 * can occur only where markup declarations can occur, not
2545 * within markup declarations.
2546 * In that case this is handled in xmlParseMarkupDecl
2548 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2550 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2553 case XML_PARSER_IGNORE:
2557 xmlParsePEReference(ctxt);
2561 * Macro used to grow the current buffer.
2562 * buffer##_size is expected to be a size_t
2563 * mem_error: is expected to handle memory allocation failures
2565 #define growBuffer(buffer, n) { \
2567 size_t new_size = buffer##_size * 2 + n; \
2568 if (new_size < buffer##_size) goto mem_error; \
2569 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2570 if (tmp == NULL) goto mem_error; \
2572 buffer##_size = new_size; \
2576 * xmlStringLenDecodeEntities:
2577 * @ctxt: the parser context
2578 * @str: the input string
2579 * @len: the string length
2580 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2581 * @end: an end marker xmlChar, 0 if none
2582 * @end2: an end marker xmlChar, 0 if none
2583 * @end3: an end marker xmlChar, 0 if none
2585 * Takes a entity string content and process to do the adequate substitutions.
2587 * [67] Reference ::= EntityRef | CharRef
2589 * [69] PEReference ::= '%' Name ';'
2591 * Returns A newly allocated string with the substitution done. The caller
2592 * must deallocate it !
2595 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2596 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2597 xmlChar *buffer = NULL;
2598 size_t buffer_size = 0;
2601 xmlChar *current = NULL;
2602 xmlChar *rep = NULL;
2603 const xmlChar *last;
2607 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2611 if (((ctxt->depth > 40) &&
2612 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2613 (ctxt->depth > 1024)) {
2614 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2619 * allocate a translation buffer.
2621 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2622 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2623 if (buffer == NULL) goto mem_error;
2626 * OK loop until we reach one of the ending char or a size limit.
2627 * we are operating on already parsed values.
2630 c = CUR_SCHAR(str, l);
2633 while ((c != 0) && (c != end) && /* non input consuming loop */
2634 (c != end2) && (c != end3)) {
2637 if ((c == '&') && (str[1] == '#')) {
2638 int val = xmlParseStringCharRef(ctxt, &str);
2640 COPY_BUF(0,buffer,nbchars,val);
2642 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2643 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2645 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2646 if (xmlParserDebugEntities)
2647 xmlGenericError(xmlGenericErrorContext,
2648 "String decoding Entity Reference: %.30s\n",
2650 ent = xmlParseStringEntityRef(ctxt, &str);
2651 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2652 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2654 xmlParserEntityCheck(ctxt, 0, ent, 0);
2656 ctxt->nbentities += ent->checked / 2;
2657 if ((ent != NULL) &&
2658 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2659 if (ent->content != NULL) {
2660 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2661 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2662 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2665 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2666 "predefined entity has no content\n");
2668 } else if ((ent != NULL) && (ent->content != NULL)) {
2670 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2674 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2675 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2680 while (*current != 0) { /* non input consuming loop */
2681 buffer[nbchars++] = *current++;
2682 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2683 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2685 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2691 } else if (ent != NULL) {
2692 int i = xmlStrlen(ent->name);
2693 const xmlChar *cur = ent->name;
2695 buffer[nbchars++] = '&';
2696 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2697 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2700 buffer[nbchars++] = *cur++;
2701 buffer[nbchars++] = ';';
2703 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2704 if (xmlParserDebugEntities)
2705 xmlGenericError(xmlGenericErrorContext,
2706 "String decoding PE Reference: %.30s\n", str);
2707 ent = xmlParseStringPEReference(ctxt, &str);
2708 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2710 xmlParserEntityCheck(ctxt, 0, ent, 0);
2712 ctxt->nbentities += ent->checked / 2;
2714 if (ent->content == NULL) {
2716 * Note: external parsed entities will not be loaded,
2717 * it is not required for a non-validating parser to
2718 * complete external PEreferences coming from the
2721 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2722 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2723 (ctxt->validate != 0)) {
2724 xmlLoadEntityContent(ctxt, ent);
2726 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2727 "not validating will not read content for PE entity %s\n",
2732 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2737 while (*current != 0) { /* non input consuming loop */
2738 buffer[nbchars++] = *current++;
2739 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2740 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2742 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2750 COPY_BUF(l,buffer,nbchars,c);
2752 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2753 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2757 c = CUR_SCHAR(str, l);
2761 buffer[nbchars] = 0;
2765 xmlErrMemory(ctxt, NULL);
2775 * xmlStringDecodeEntities:
2776 * @ctxt: the parser context
2777 * @str: the input string
2778 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2779 * @end: an end marker xmlChar, 0 if none
2780 * @end2: an end marker xmlChar, 0 if none
2781 * @end3: an end marker xmlChar, 0 if none
2783 * Takes a entity string content and process to do the adequate substitutions.
2785 * [67] Reference ::= EntityRef | CharRef
2787 * [69] PEReference ::= '%' Name ';'
2789 * Returns A newly allocated string with the substitution done. The caller
2790 * must deallocate it !
2793 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2794 xmlChar end, xmlChar end2, xmlChar end3) {
2795 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2796 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2800 /************************************************************************
2802 * Commodity functions, cleanup needed ? *
2804 ************************************************************************/
2808 * @ctxt: an XML parser context
2810 * @len: the size of @str
2811 * @blank_chars: we know the chars are blanks
2813 * Is this a sequence of blank chars that one can ignore ?
2815 * Returns 1 if ignorable 0 otherwise.
2818 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2821 xmlNodePtr lastChild;
2824 * Don't spend time trying to differentiate them, the same callback is
2827 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2831 * Check for xml:space value.
2833 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2834 (*(ctxt->space) == -2))
2838 * Check that the string is made of blanks
2840 if (blank_chars == 0) {
2841 for (i = 0;i < len;i++)
2842 if (!(IS_BLANK_CH(str[i]))) return(0);
2846 * Look if the element is mixed content in the DTD if available
2848 if (ctxt->node == NULL) return(0);
2849 if (ctxt->myDoc != NULL) {
2850 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2851 if (ret == 0) return(1);
2852 if (ret == 1) return(0);
2856 * Otherwise, heuristic :-\
2858 if ((RAW != '<') && (RAW != 0xD)) return(0);
2859 if ((ctxt->node->children == NULL) &&
2860 (RAW == '<') && (NXT(1) == '/')) return(0);
2862 lastChild = xmlGetLastChild(ctxt->node);
2863 if (lastChild == NULL) {
2864 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2865 (ctxt->node->content != NULL)) return(0);
2866 } else if (xmlNodeIsText(lastChild))
2868 else if ((ctxt->node->children != NULL) &&
2869 (xmlNodeIsText(ctxt->node->children)))
2874 /************************************************************************
2876 * Extra stuff for namespace support *
2877 * Relates to http://www.w3.org/TR/WD-xml-names *
2879 ************************************************************************/
2883 * @ctxt: an XML parser context
2884 * @name: an XML parser context
2885 * @prefix: a xmlChar **
2887 * parse an UTF8 encoded XML qualified name string
2889 * [NS 5] QName ::= (Prefix ':')? LocalPart
2891 * [NS 6] Prefix ::= NCName
2893 * [NS 7] LocalPart ::= NCName
2895 * Returns the local part, and prefix is updated
2896 * to get the Prefix if any.
2900 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2901 xmlChar buf[XML_MAX_NAMELEN + 5];
2902 xmlChar *buffer = NULL;
2904 int max = XML_MAX_NAMELEN;
2905 xmlChar *ret = NULL;
2906 const xmlChar *cur = name;
2909 if (prefix == NULL) return(NULL);
2912 if (cur == NULL) return(NULL);
2914 #ifndef XML_XML_NAMESPACE
2915 /* xml: prefix is not really a namespace */
2916 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2917 (cur[2] == 'l') && (cur[3] == ':'))
2918 return(xmlStrdup(name));
2921 /* nasty but well=formed */
2923 return(xmlStrdup(name));
2926 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2932 * Okay someone managed to make a huge name, so he's ready to pay
2933 * for the processing speed.
2937 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2938 if (buffer == NULL) {
2939 xmlErrMemory(ctxt, NULL);
2942 memcpy(buffer, buf, len);
2943 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2944 if (len + 10 > max) {
2948 tmp = (xmlChar *) xmlRealloc(buffer,
2949 max * sizeof(xmlChar));
2952 xmlErrMemory(ctxt, NULL);
2963 if ((c == ':') && (*cur == 0)) {
2967 return(xmlStrdup(name));
2971 ret = xmlStrndup(buf, len);
2975 max = XML_MAX_NAMELEN;
2983 return(xmlStrndup(BAD_CAST "", 0));
2988 * Check that the first character is proper to start
2991 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2992 ((c >= 0x41) && (c <= 0x5A)) ||
2993 (c == '_') || (c == ':'))) {
2995 int first = CUR_SCHAR(cur, l);
2997 if (!IS_LETTER(first) && (first != '_')) {
2998 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2999 "Name %s is not XML Namespace compliant\n",
3005 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3011 * Okay someone managed to make a huge name, so he's ready to pay
3012 * for the processing speed.
3016 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3017 if (buffer == NULL) {
3018 xmlErrMemory(ctxt, NULL);
3021 memcpy(buffer, buf, len);
3022 while (c != 0) { /* tested bigname2.xml */
3023 if (len + 10 > max) {
3027 tmp = (xmlChar *) xmlRealloc(buffer,
3028 max * sizeof(xmlChar));
3030 xmlErrMemory(ctxt, NULL);
3043 ret = xmlStrndup(buf, len);
3052 /************************************************************************
3054 * The parser itself *
3055 * Relates to http://www.w3.org/TR/REC-xml *
3057 ************************************************************************/
3059 /************************************************************************
3061 * Routines to parse Name, NCName and NmToken *
3063 ************************************************************************/
3065 static unsigned long nbParseName = 0;
3066 static unsigned long nbParseNmToken = 0;
3067 static unsigned long nbParseNCName = 0;
3068 static unsigned long nbParseNCNameComplex = 0;
3069 static unsigned long nbParseNameComplex = 0;
3070 static unsigned long nbParseStringName = 0;
3074 * The two following functions are related to the change of accepted
3075 * characters for Name and NmToken in the Revision 5 of XML-1.0
3076 * They correspond to the modified production [4] and the new production [4a]
3077 * changes in that revision. Also note that the macros used for the
3078 * productions Letter, Digit, CombiningChar and Extender are not needed
3080 * We still keep compatibility to pre-revision5 parsing semantic if the
3081 * new XML_PARSE_OLD10 option is given to the parser.
3084 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3085 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3087 * Use the new checks of production [4] [4a] amd [5] of the
3088 * Update 5 of XML-1.0
3090 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3091 (((c >= 'a') && (c <= 'z')) ||
3092 ((c >= 'A') && (c <= 'Z')) ||
3093 (c == '_') || (c == ':') ||
3094 ((c >= 0xC0) && (c <= 0xD6)) ||
3095 ((c >= 0xD8) && (c <= 0xF6)) ||
3096 ((c >= 0xF8) && (c <= 0x2FF)) ||
3097 ((c >= 0x370) && (c <= 0x37D)) ||
3098 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3099 ((c >= 0x200C) && (c <= 0x200D)) ||
3100 ((c >= 0x2070) && (c <= 0x218F)) ||
3101 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3102 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3103 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3104 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3105 ((c >= 0x10000) && (c <= 0xEFFFF))))
3108 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3115 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3116 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3118 * Use the new checks of production [4] [4a] amd [5] of the
3119 * Update 5 of XML-1.0
3121 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3122 (((c >= 'a') && (c <= 'z')) ||
3123 ((c >= 'A') && (c <= 'Z')) ||
3124 ((c >= '0') && (c <= '9')) || /* !start */
3125 (c == '_') || (c == ':') ||
3126 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3127 ((c >= 0xC0) && (c <= 0xD6)) ||
3128 ((c >= 0xD8) && (c <= 0xF6)) ||
3129 ((c >= 0xF8) && (c <= 0x2FF)) ||
3130 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3131 ((c >= 0x370) && (c <= 0x37D)) ||
3132 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3133 ((c >= 0x200C) && (c <= 0x200D)) ||
3134 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3135 ((c >= 0x2070) && (c <= 0x218F)) ||
3136 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3137 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3138 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3139 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3140 ((c >= 0x10000) && (c <= 0xEFFFF))))
3143 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3144 (c == '.') || (c == '-') ||
3145 (c == '_') || (c == ':') ||
3146 (IS_COMBINING(c)) ||
3153 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3154 int *len, int *alloc, int normalize);
3156 static const xmlChar *
3157 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3163 nbParseNameComplex++;
3167 * Handler for more complex cases
3170 if (ctxt->instate == XML_PARSER_EOF)
3173 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3175 * Use the new checks of production [4] [4a] amd [5] of the
3176 * Update 5 of XML-1.0
3178 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3179 (!(((c >= 'a') && (c <= 'z')) ||
3180 ((c >= 'A') && (c <= 'Z')) ||
3181 (c == '_') || (c == ':') ||
3182 ((c >= 0xC0) && (c <= 0xD6)) ||
3183 ((c >= 0xD8) && (c <= 0xF6)) ||
3184 ((c >= 0xF8) && (c <= 0x2FF)) ||
3185 ((c >= 0x370) && (c <= 0x37D)) ||
3186 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3187 ((c >= 0x200C) && (c <= 0x200D)) ||
3188 ((c >= 0x2070) && (c <= 0x218F)) ||
3189 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3190 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3191 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3192 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3193 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3199 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3200 (((c >= 'a') && (c <= 'z')) ||
3201 ((c >= 'A') && (c <= 'Z')) ||
3202 ((c >= '0') && (c <= '9')) || /* !start */
3203 (c == '_') || (c == ':') ||
3204 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3205 ((c >= 0xC0) && (c <= 0xD6)) ||
3206 ((c >= 0xD8) && (c <= 0xF6)) ||
3207 ((c >= 0xF8) && (c <= 0x2FF)) ||
3208 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3209 ((c >= 0x370) && (c <= 0x37D)) ||
3210 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3211 ((c >= 0x200C) && (c <= 0x200D)) ||
3212 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3213 ((c >= 0x2070) && (c <= 0x218F)) ||
3214 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3215 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3216 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3217 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3218 ((c >= 0x10000) && (c <= 0xEFFFF))
3220 if (count++ > XML_PARSER_CHUNK_SIZE) {
3223 if (ctxt->instate == XML_PARSER_EOF)
3231 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3232 (!IS_LETTER(c) && (c != '_') &&
3240 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3241 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3242 (c == '.') || (c == '-') ||
3243 (c == '_') || (c == ':') ||
3244 (IS_COMBINING(c)) ||
3245 (IS_EXTENDER(c)))) {
3246 if (count++ > XML_PARSER_CHUNK_SIZE) {
3249 if (ctxt->instate == XML_PARSER_EOF)
3257 if ((len > XML_MAX_NAME_LENGTH) &&
3258 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3259 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3262 if (ctxt->input->cur - ctxt->input->base < len) {
3264 * There were a couple of bugs where PERefs lead to to a change
3265 * of the buffer. Check the buffer size to avoid passing an invalid
3266 * pointer to xmlDictLookup.
3268 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3269 "unexpected change of input buffer");
3272 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3273 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3274 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3279 * @ctxt: an XML parser context
3281 * parse an XML name.
3283 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3284 * CombiningChar | Extender
3286 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3288 * [6] Names ::= Name (#x20 Name)*
3290 * Returns the Name parsed or NULL
3294 xmlParseName(xmlParserCtxtPtr ctxt) {
3306 * Accelerator for simple ASCII names
3308 in = ctxt->input->cur;
3309 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3310 ((*in >= 0x41) && (*in <= 0x5A)) ||
3311 (*in == '_') || (*in == ':')) {
3313 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3314 ((*in >= 0x41) && (*in <= 0x5A)) ||
3315 ((*in >= 0x30) && (*in <= 0x39)) ||
3316 (*in == '_') || (*in == '-') ||
3317 (*in == ':') || (*in == '.'))
3319 if ((*in > 0) && (*in < 0x80)) {
3320 count = in - ctxt->input->cur;
3321 if ((count > XML_MAX_NAME_LENGTH) &&
3322 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3323 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3326 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3327 ctxt->input->cur = in;
3328 ctxt->nbChars += count;
3329 ctxt->input->col += count;
3331 xmlErrMemory(ctxt, NULL);
3335 /* accelerator for special cases */
3336 return(xmlParseNameComplex(ctxt));
3339 static const xmlChar *
3340 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3344 size_t startPosition = 0;
3347 nbParseNCNameComplex++;
3351 * Handler for more complex cases
3354 startPosition = CUR_PTR - BASE_PTR;
3356 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3357 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3361 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3362 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3363 if (count++ > XML_PARSER_CHUNK_SIZE) {
3364 if ((len > XML_MAX_NAME_LENGTH) &&
3365 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3366 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3371 if (ctxt->instate == XML_PARSER_EOF)
3380 * when shrinking to extend the buffer we really need to preserve
3381 * the part of the name we already parsed. Hence rolling back
3382 * by current lenght.
3384 ctxt->input->cur -= l;
3386 ctxt->input->cur += l;
3387 if (ctxt->instate == XML_PARSER_EOF)
3392 if ((len > XML_MAX_NAME_LENGTH) &&
3393 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3394 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3397 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3402 * @ctxt: an XML parser context
3403 * @len: length of the string parsed
3405 * parse an XML name.
3407 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3408 * CombiningChar | Extender
3410 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3412 * Returns the Name parsed or NULL
3415 static const xmlChar *
3416 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3417 const xmlChar *in, *e;
3426 * Accelerator for simple ASCII names
3428 in = ctxt->input->cur;
3429 e = ctxt->input->end;
3430 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3431 ((*in >= 0x41) && (*in <= 0x5A)) ||
3432 (*in == '_')) && (in < e)) {
3434 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3435 ((*in >= 0x41) && (*in <= 0x5A)) ||
3436 ((*in >= 0x30) && (*in <= 0x39)) ||
3437 (*in == '_') || (*in == '-') ||
3438 (*in == '.')) && (in < e))
3442 if ((*in > 0) && (*in < 0x80)) {
3443 count = in - ctxt->input->cur;
3444 if ((count > XML_MAX_NAME_LENGTH) &&
3445 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3446 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3449 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3450 ctxt->input->cur = in;
3451 ctxt->nbChars += count;
3452 ctxt->input->col += count;
3454 xmlErrMemory(ctxt, NULL);
3460 return(xmlParseNCNameComplex(ctxt));
3464 * xmlParseNameAndCompare:
3465 * @ctxt: an XML parser context
3467 * parse an XML name and compares for match
3468 * (specialized for endtag parsing)
3470 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3471 * and the name for mismatch
3474 static const xmlChar *
3475 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3476 register const xmlChar *cmp = other;
3477 register const xmlChar *in;
3481 if (ctxt->instate == XML_PARSER_EOF)
3484 in = ctxt->input->cur;
3485 while (*in != 0 && *in == *cmp) {
3490 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3492 ctxt->input->cur = in;
3493 return (const xmlChar*) 1;
3495 /* failure (or end of input buffer), check with full function */
3496 ret = xmlParseName (ctxt);
3497 /* strings coming from the dictionary direct compare possible */
3499 return (const xmlChar*) 1;
3505 * xmlParseStringName:
3506 * @ctxt: an XML parser context
3507 * @str: a pointer to the string pointer (IN/OUT)
3509 * parse an XML name.
3511 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3512 * CombiningChar | Extender
3514 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3516 * [6] Names ::= Name (#x20 Name)*
3518 * Returns the Name parsed or NULL. The @str pointer
3519 * is updated to the current location in the string.
3523 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3524 xmlChar buf[XML_MAX_NAMELEN + 5];
3525 const xmlChar *cur = *str;
3530 nbParseStringName++;
3533 c = CUR_SCHAR(cur, l);
3534 if (!xmlIsNameStartChar(ctxt, c)) {
3538 COPY_BUF(l,buf,len,c);
3540 c = CUR_SCHAR(cur, l);
3541 while (xmlIsNameChar(ctxt, c)) {
3542 COPY_BUF(l,buf,len,c);
3544 c = CUR_SCHAR(cur, l);
3545 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3547 * Okay someone managed to make a huge name, so he's ready to pay
3548 * for the processing speed.
3553 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3554 if (buffer == NULL) {
3555 xmlErrMemory(ctxt, NULL);
3558 memcpy(buffer, buf, len);
3559 while (xmlIsNameChar(ctxt, c)) {
3560 if (len + 10 > max) {
3563 if ((len > XML_MAX_NAME_LENGTH) &&
3564 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3565 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3570 tmp = (xmlChar *) xmlRealloc(buffer,
3571 max * sizeof(xmlChar));
3573 xmlErrMemory(ctxt, NULL);
3579 COPY_BUF(l,buffer,len,c);
3581 c = CUR_SCHAR(cur, l);
3588 if ((len > XML_MAX_NAME_LENGTH) &&
3589 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3590 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3594 return(xmlStrndup(buf, len));
3599 * @ctxt: an XML parser context
3601 * parse an XML Nmtoken.
3603 * [7] Nmtoken ::= (NameChar)+
3605 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3607 * Returns the Nmtoken parsed or NULL
3611 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3612 xmlChar buf[XML_MAX_NAMELEN + 5];
3622 if (ctxt->instate == XML_PARSER_EOF)
3626 while (xmlIsNameChar(ctxt, c)) {
3627 if (count++ > XML_PARSER_CHUNK_SIZE) {
3631 COPY_BUF(l,buf,len,c);
3637 if (ctxt->instate == XML_PARSER_EOF)
3641 if (len >= XML_MAX_NAMELEN) {
3643 * Okay someone managed to make a huge token, so he's ready to pay
3644 * for the processing speed.
3649 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3650 if (buffer == NULL) {
3651 xmlErrMemory(ctxt, NULL);
3654 memcpy(buffer, buf, len);
3655 while (xmlIsNameChar(ctxt, c)) {
3656 if (count++ > XML_PARSER_CHUNK_SIZE) {
3659 if (ctxt->instate == XML_PARSER_EOF) {
3664 if (len + 10 > max) {
3667 if ((max > XML_MAX_NAME_LENGTH) &&
3668 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3669 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3674 tmp = (xmlChar *) xmlRealloc(buffer,
3675 max * sizeof(xmlChar));
3677 xmlErrMemory(ctxt, NULL);
3683 COPY_BUF(l,buffer,len,c);
3693 if ((len > XML_MAX_NAME_LENGTH) &&
3694 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3695 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3698 return(xmlStrndup(buf, len));
3702 * xmlParseEntityValue:
3703 * @ctxt: an XML parser context
3704 * @orig: if non-NULL store a copy of the original entity value
3706 * parse a value for ENTITY declarations
3708 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3709 * "'" ([^%&'] | PEReference | Reference)* "'"
3711 * Returns the EntityValue parsed with reference substituted or NULL
3715 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3716 xmlChar *buf = NULL;
3718 int size = XML_PARSER_BUFFER_SIZE;
3721 xmlChar *ret = NULL;
3722 const xmlChar *cur = NULL;
3723 xmlParserInputPtr input;
3725 if (RAW == '"') stop = '"';
3726 else if (RAW == '\'') stop = '\'';
3728 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3731 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3733 xmlErrMemory(ctxt, NULL);
3738 * The content of the entity definition is copied in a buffer.
3741 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3742 input = ctxt->input;
3744 if (ctxt->instate == XML_PARSER_EOF) {
3751 * NOTE: 4.4.5 Included in Literal
3752 * When a parameter entity reference appears in a literal entity
3753 * value, ... a single or double quote character in the replacement
3754 * text is always treated as a normal data character and will not
3755 * terminate the literal.
3756 * In practice it means we stop the loop only when back at parsing
3757 * the initial entity and the quote is found
3759 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3760 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3761 if (len + 5 >= size) {
3765 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3767 xmlErrMemory(ctxt, NULL);
3773 COPY_BUF(l,buf,len,c);
3784 if (ctxt->instate == XML_PARSER_EOF) {
3790 * Raise problem w.r.t. '&' and '%' being used in non-entities
3791 * reference constructs. Note Charref will be handled in
3792 * xmlStringDecodeEntities()
3795 while (*cur != 0) { /* non input consuming */
3796 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3801 name = xmlParseStringName(ctxt, &cur);
3802 if ((name == NULL) || (*cur != ';')) {
3803 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3804 "EntityValue: '%c' forbidden except for entities references\n",
3807 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3808 (ctxt->inputNr == 1)) {
3809 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3820 * Then PEReference entities are substituted.
3823 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3828 * NOTE: 4.4.7 Bypassed
3829 * When a general entity reference appears in the EntityValue in
3830 * an entity declaration, it is bypassed and left as is.
3831 * so XML_SUBSTITUTE_REF is not set here.
3834 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3847 * xmlParseAttValueComplex:
3848 * @ctxt: an XML parser context
3849 * @len: the resulting attribute len
3850 * @normalize: wether to apply the inner normalization
3852 * parse a value for an attribute, this is the fallback function
3853 * of xmlParseAttValue() when the attribute parsing requires handling
3854 * of non-ASCII characters, or normalization compaction.
3856 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3859 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3861 xmlChar *buf = NULL;
3862 xmlChar *rep = NULL;
3864 size_t buf_size = 0;
3865 int c, l, in_space = 0;
3866 xmlChar *current = NULL;
3869 if (NXT(0) == '"') {
3870 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3873 } else if (NXT(0) == '\'') {
3875 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3878 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3883 * allocate a translation buffer.
3885 buf_size = XML_PARSER_BUFFER_SIZE;
3886 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3887 if (buf == NULL) goto mem_error;
3890 * OK loop until we reach one of the ending char or a size limit.
3893 while (((NXT(0) != limit) && /* checked */
3894 (IS_CHAR(c)) && (c != '<')) &&
3895 (ctxt->instate != XML_PARSER_EOF)) {
3897 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3898 * special option is given
3900 if ((len > XML_MAX_TEXT_LENGTH) &&
3901 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3902 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3903 "AttValue length too long\n");
3909 if (NXT(1) == '#') {
3910 int val = xmlParseCharRef(ctxt);
3913 if (ctxt->replaceEntities) {
3914 if (len + 10 > buf_size) {
3915 growBuffer(buf, 10);
3920 * The reparsing will be done in xmlStringGetNodeList()
3921 * called by the attribute() function in SAX.c
3923 if (len + 10 > buf_size) {
3924 growBuffer(buf, 10);
3932 } else if (val != 0) {
3933 if (len + 10 > buf_size) {
3934 growBuffer(buf, 10);
3936 len += xmlCopyChar(0, &buf[len], val);
3939 ent = xmlParseEntityRef(ctxt);
3942 ctxt->nbentities += ent->owner;
3943 if ((ent != NULL) &&
3944 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3945 if (len + 10 > buf_size) {
3946 growBuffer(buf, 10);
3948 if ((ctxt->replaceEntities == 0) &&
3949 (ent->content[0] == '&')) {
3956 buf[len++] = ent->content[0];
3958 } else if ((ent != NULL) &&
3959 (ctxt->replaceEntities != 0)) {
3960 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3962 rep = xmlStringDecodeEntities(ctxt, ent->content,
3968 while (*current != 0) { /* non input consuming */
3969 if ((*current == 0xD) || (*current == 0xA) ||
3970 (*current == 0x9)) {
3974 buf[len++] = *current++;
3975 if (len + 10 > buf_size) {
3976 growBuffer(buf, 10);
3983 if (len + 10 > buf_size) {
3984 growBuffer(buf, 10);
3986 if (ent->content != NULL)
3987 buf[len++] = ent->content[0];
3989 } else if (ent != NULL) {
3990 int i = xmlStrlen(ent->name);
3991 const xmlChar *cur = ent->name;
3994 * This may look absurd but is needed to detect
3997 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3998 (ent->content != NULL) && (ent->checked == 0)) {
3999 unsigned long oldnbent = ctxt->nbentities;
4002 rep = xmlStringDecodeEntities(ctxt, ent->content,
4003 XML_SUBSTITUTE_REF, 0, 0, 0);
4006 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4008 if (xmlStrchr(rep, '<'))
4016 * Just output the reference
4019 while (len + i + 10 > buf_size) {
4020 growBuffer(buf, i + 10);
4023 buf[len++] = *cur++;
4028 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4029 if ((len != 0) || (!normalize)) {
4030 if ((!normalize) || (!in_space)) {
4031 COPY_BUF(l,buf,len,0x20);
4032 while (len + 10 > buf_size) {
4033 growBuffer(buf, 10);
4040 COPY_BUF(l,buf,len,c);
4041 if (len + 10 > buf_size) {
4042 growBuffer(buf, 10);
4050 if (ctxt->instate == XML_PARSER_EOF)
4053 if ((in_space) && (normalize)) {
4054 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4058 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4059 } else if (RAW != limit) {
4060 if ((c != 0) && (!IS_CHAR(c))) {
4061 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4062 "invalid character in attribute value\n");
4064 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4065 "AttValue: ' expected\n");
4071 * There we potentially risk an overflow, don't allow attribute value of
4072 * length more than INT_MAX it is a very reasonnable assumption !
4074 if (len >= INT_MAX) {
4075 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4076 "AttValue length too long\n");
4080 if (attlen != NULL) *attlen = (int) len;
4084 xmlErrMemory(ctxt, NULL);
4095 * @ctxt: an XML parser context
4097 * parse a value for an attribute
4098 * Note: the parser won't do substitution of entities here, this
4099 * will be handled later in xmlStringGetNodeList
4101 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4102 * "'" ([^<&'] | Reference)* "'"
4104 * 3.3.3 Attribute-Value Normalization:
4105 * Before the value of an attribute is passed to the application or
4106 * checked for validity, the XML processor must normalize it as follows:
4107 * - a character reference is processed by appending the referenced
4108 * character to the attribute value
4109 * - an entity reference is processed by recursively processing the
4110 * replacement text of the entity
4111 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4112 * appending #x20 to the normalized value, except that only a single
4113 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4114 * parsed entity or the literal entity value of an internal parsed entity
4115 * - other characters are processed by appending them to the normalized value
4116 * If the declared value is not CDATA, then the XML processor must further
4117 * process the normalized attribute value by discarding any leading and
4118 * trailing space (#x20) characters, and by replacing sequences of space
4119 * (#x20) characters by a single space (#x20) character.
4120 * All attributes for which no declaration has been read should be treated
4121 * by a non-validating parser as if declared CDATA.
4123 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4128 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4129 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4130 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4134 * xmlParseSystemLiteral:
4135 * @ctxt: an XML parser context
4137 * parse an XML Literal
4139 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4141 * Returns the SystemLiteral parsed or NULL
4145 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4146 xmlChar *buf = NULL;
4148 int size = XML_PARSER_BUFFER_SIZE;
4151 int state = ctxt->instate;
4158 } else if (RAW == '\'') {
4162 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4166 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4168 xmlErrMemory(ctxt, NULL);
4171 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4173 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4174 if (len + 5 >= size) {
4177 if ((size > XML_MAX_NAME_LENGTH) &&
4178 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4179 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4181 ctxt->instate = (xmlParserInputState) state;
4185 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4188 xmlErrMemory(ctxt, NULL);
4189 ctxt->instate = (xmlParserInputState) state;
4198 if (ctxt->instate == XML_PARSER_EOF) {
4203 COPY_BUF(l,buf,len,cur);
4213 ctxt->instate = (xmlParserInputState) state;
4214 if (!IS_CHAR(cur)) {
4215 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4223 * xmlParsePubidLiteral:
4224 * @ctxt: an XML parser context
4226 * parse an XML public literal
4228 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4230 * Returns the PubidLiteral parsed or NULL.
4234 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4235 xmlChar *buf = NULL;
4237 int size = XML_PARSER_BUFFER_SIZE;
4241 xmlParserInputState oldstate = ctxt->instate;
4247 } else if (RAW == '\'') {
4251 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4254 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4256 xmlErrMemory(ctxt, NULL);
4259 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4261 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4262 if (len + 1 >= size) {
4265 if ((size > XML_MAX_NAME_LENGTH) &&
4266 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4267 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4272 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4274 xmlErrMemory(ctxt, NULL);
4285 if (ctxt->instate == XML_PARSER_EOF) {
4300 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4304 ctxt->instate = oldstate;
4308 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4311 * used for the test in the inner loop of the char data testing
4313 static const unsigned char test_char_data[256] = {
4314 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4315 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4316 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4317 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4318 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4319 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4320 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4321 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4322 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4323 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4324 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4325 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4326 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4327 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4328 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4329 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4330 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4331 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4332 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4333 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4334 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4335 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4336 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4337 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4338 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4339 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4340 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4341 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4342 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4343 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4344 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4345 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4350 * @ctxt: an XML parser context
4351 * @cdata: int indicating whether we are within a CDATA section
4353 * parse a CharData section.
4354 * if we are within a CDATA section ']]>' marks an end of section.
4356 * The right angle bracket (>) may be represented using the string ">",
4357 * and must, for compatibility, be escaped using ">" or a character
4358 * reference when it appears in the string "]]>" in content, when that
4359 * string is not marking the end of a CDATA section.
4361 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4365 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4368 int line = ctxt->input->line;
4369 int col = ctxt->input->col;
4375 * Accelerated common case where input don't need to be
4376 * modified before passing it to the handler.
4379 in = ctxt->input->cur;
4382 while (*in == 0x20) { in++; ctxt->input->col++; }
4385 ctxt->input->line++; ctxt->input->col = 1;
4387 } while (*in == 0xA);
4388 goto get_more_space;
4391 nbchar = in - ctxt->input->cur;
4393 const xmlChar *tmp = ctxt->input->cur;
4394 ctxt->input->cur = in;
4396 if ((ctxt->sax != NULL) &&
4397 (ctxt->sax->ignorableWhitespace !=
4398 ctxt->sax->characters)) {
4399 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4400 if (ctxt->sax->ignorableWhitespace != NULL)
4401 ctxt->sax->ignorableWhitespace(ctxt->userData,
4404 if (ctxt->sax->characters != NULL)
4405 ctxt->sax->characters(ctxt->userData,
4407 if (*ctxt->space == -1)
4410 } else if ((ctxt->sax != NULL) &&
4411 (ctxt->sax->characters != NULL)) {
4412 ctxt->sax->characters(ctxt->userData,
4420 ccol = ctxt->input->col;
4421 while (test_char_data[*in]) {
4425 ctxt->input->col = ccol;
4428 ctxt->input->line++; ctxt->input->col = 1;
4430 } while (*in == 0xA);
4434 if ((in[1] == ']') && (in[2] == '>')) {
4435 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4436 ctxt->input->cur = in + 1;
4443 nbchar = in - ctxt->input->cur;
4445 if ((ctxt->sax != NULL) &&
4446 (ctxt->sax->ignorableWhitespace !=
4447 ctxt->sax->characters) &&
4448 (IS_BLANK_CH(*ctxt->input->cur))) {
4449 const xmlChar *tmp = ctxt->input->cur;
4450 ctxt->input->cur = in;
4452 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4453 if (ctxt->sax->ignorableWhitespace != NULL)
4454 ctxt->sax->ignorableWhitespace(ctxt->userData,
4457 if (ctxt->sax->characters != NULL)
4458 ctxt->sax->characters(ctxt->userData,
4460 if (*ctxt->space == -1)
4463 line = ctxt->input->line;
4464 col = ctxt->input->col;
4465 } else if (ctxt->sax != NULL) {
4466 if (ctxt->sax->characters != NULL)
4467 ctxt->sax->characters(ctxt->userData,
4468 ctxt->input->cur, nbchar);
4469 line = ctxt->input->line;
4470 col = ctxt->input->col;
4472 /* something really bad happened in the SAX callback */
4473 if (ctxt->instate != XML_PARSER_CONTENT)
4476 ctxt->input->cur = in;
4480 ctxt->input->cur = in;
4482 ctxt->input->line++; ctxt->input->col = 1;
4483 continue; /* while */
4495 if (ctxt->instate == XML_PARSER_EOF)
4497 in = ctxt->input->cur;
4498 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4501 ctxt->input->line = line;
4502 ctxt->input->col = col;
4503 xmlParseCharDataComplex(ctxt, cdata);
4507 * xmlParseCharDataComplex:
4508 * @ctxt: an XML parser context
4509 * @cdata: int indicating whether we are within a CDATA section
4511 * parse a CharData section.this is the fallback function
4512 * of xmlParseCharData() when the parsing requires handling
4513 * of non-ASCII characters.
4516 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4517 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4525 while ((cur != '<') && /* checked */
4527 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4528 if ((cur == ']') && (NXT(1) == ']') &&
4532 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4535 COPY_BUF(l,buf,nbchar,cur);
4536 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4540 * OK the segment is to be consumed as chars.
4542 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4543 if (areBlanks(ctxt, buf, nbchar, 0)) {
4544 if (ctxt->sax->ignorableWhitespace != NULL)
4545 ctxt->sax->ignorableWhitespace(ctxt->userData,
4548 if (ctxt->sax->characters != NULL)
4549 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4550 if ((ctxt->sax->characters !=
4551 ctxt->sax->ignorableWhitespace) &&
4552 (*ctxt->space == -1))
4557 /* something really bad happened in the SAX callback */
4558 if (ctxt->instate != XML_PARSER_CONTENT)
4565 if (ctxt->instate == XML_PARSER_EOF)
4574 * OK the segment is to be consumed as chars.
4576 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4577 if (areBlanks(ctxt, buf, nbchar, 0)) {
4578 if (ctxt->sax->ignorableWhitespace != NULL)
4579 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4581 if (ctxt->sax->characters != NULL)
4582 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4583 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4584 (*ctxt->space == -1))
4589 if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4590 /* Generate the error and skip the offending character */
4591 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4592 "PCDATA invalid Char value %d\n",
4599 * xmlParseExternalID:
4600 * @ctxt: an XML parser context
4601 * @publicID: a xmlChar** receiving PubidLiteral
4602 * @strict: indicate whether we should restrict parsing to only
4603 * production [75], see NOTE below
4605 * Parse an External ID or a Public ID
4607 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4608 * 'PUBLIC' S PubidLiteral S SystemLiteral
4610 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4611 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4613 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4615 * Returns the function returns SystemLiteral and in the second
4616 * case publicID receives PubidLiteral, is strict is off
4617 * it is possible to return NULL and have publicID set.
4621 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4622 xmlChar *URI = NULL;
4627 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4629 if (SKIP_BLANKS == 0) {
4630 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4631 "Space required after 'SYSTEM'\n");
4633 URI = xmlParseSystemLiteral(ctxt);
4635 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4637 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4639 if (SKIP_BLANKS == 0) {
4640 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4641 "Space required after 'PUBLIC'\n");
4643 *publicID = xmlParsePubidLiteral(ctxt);
4644 if (*publicID == NULL) {
4645 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4649 * We don't handle [83] so "S SystemLiteral" is required.
4651 if (SKIP_BLANKS == 0) {
4652 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4653 "Space required after the Public Identifier\n");
4657 * We handle [83] so we return immediately, if
4658 * "S SystemLiteral" is not detected. We skip blanks if no
4659 * system literal was found, but this is harmless since we must
4660 * be at the end of a NotationDecl.
4662 if (SKIP_BLANKS == 0) return(NULL);
4663 if ((CUR != '\'') && (CUR != '"')) return(NULL);
4665 URI = xmlParseSystemLiteral(ctxt);
4667 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4674 * xmlParseCommentComplex:
4675 * @ctxt: an XML parser context
4676 * @buf: the already parsed part of the buffer
4677 * @len: number of bytes filles in the buffer
4678 * @size: allocated size of the buffer
4680 * Skip an XML (SGML) comment <!-- .... -->
4681 * The spec says that "For compatibility, the string "--" (double-hyphen)
4682 * must not occur within comments. "
4683 * This is the slow routine in case the accelerator for ascii didn't work
4685 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4688 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4689 size_t len, size_t size) {
4696 inputid = ctxt->input->id;
4700 size = XML_PARSER_BUFFER_SIZE;
4701 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4703 xmlErrMemory(ctxt, NULL);
4707 GROW; /* Assure there's enough input data */
4710 goto not_terminated;
4712 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4713 "xmlParseComment: invalid xmlChar value %d\n",
4721 goto not_terminated;
4723 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4724 "xmlParseComment: invalid xmlChar value %d\n",
4732 goto not_terminated;
4733 while (IS_CHAR(cur) && /* checked */
4735 (r != '-') || (q != '-'))) {
4736 if ((r == '-') && (q == '-')) {
4737 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4739 if ((len > XML_MAX_TEXT_LENGTH) &&
4740 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4741 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4742 "Comment too big found", NULL);
4746 if (len + 5 >= size) {
4750 new_size = size * 2;
4751 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4752 if (new_buf == NULL) {
4754 xmlErrMemory(ctxt, NULL);
4760 COPY_BUF(ql,buf,len,q);
4770 if (ctxt->instate == XML_PARSER_EOF) {
4785 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4786 "Comment not terminated \n<!--%.50s\n", buf);
4787 } else if (!IS_CHAR(cur)) {
4788 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4789 "xmlParseComment: invalid xmlChar value %d\n",
4792 if (inputid != ctxt->input->id) {
4793 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4794 "Comment doesn't start and stop in the same"
4798 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4799 (!ctxt->disableSAX))
4800 ctxt->sax->comment(ctxt->userData, buf);
4805 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4806 "Comment not terminated\n", NULL);
4813 * @ctxt: an XML parser context
4815 * Skip an XML (SGML) comment <!-- .... -->
4816 * The spec says that "For compatibility, the string "--" (double-hyphen)
4817 * must not occur within comments. "
4819 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4822 xmlParseComment(xmlParserCtxtPtr ctxt) {
4823 xmlChar *buf = NULL;
4824 size_t size = XML_PARSER_BUFFER_SIZE;
4826 xmlParserInputState state;
4833 * Check that there is a comment right here.
4835 if ((RAW != '<') || (NXT(1) != '!') ||
4836 (NXT(2) != '-') || (NXT(3) != '-')) return;
4837 state = ctxt->instate;
4838 ctxt->instate = XML_PARSER_COMMENT;
4839 inputid = ctxt->input->id;
4845 * Accelerated common case where input don't need to be
4846 * modified before passing it to the handler.
4848 in = ctxt->input->cur;
4852 ctxt->input->line++; ctxt->input->col = 1;
4854 } while (*in == 0xA);
4857 ccol = ctxt->input->col;
4858 while (((*in > '-') && (*in <= 0x7F)) ||
4859 ((*in >= 0x20) && (*in < '-')) ||
4864 ctxt->input->col = ccol;
4867 ctxt->input->line++; ctxt->input->col = 1;
4869 } while (*in == 0xA);
4872 nbchar = in - ctxt->input->cur;
4874 * save current set of data
4877 if ((ctxt->sax != NULL) &&
4878 (ctxt->sax->comment != NULL)) {
4880 if ((*in == '-') && (in[1] == '-'))
4883 size = XML_PARSER_BUFFER_SIZE + nbchar;
4884 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4886 xmlErrMemory(ctxt, NULL);
4887 ctxt->instate = state;
4891 } else if (len + nbchar + 1 >= size) {
4893 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4894 new_buf = (xmlChar *) xmlRealloc(buf,
4895 size * sizeof(xmlChar));
4896 if (new_buf == NULL) {
4898 xmlErrMemory(ctxt, NULL);
4899 ctxt->instate = state;
4904 memcpy(&buf[len], ctxt->input->cur, nbchar);
4909 if ((len > XML_MAX_TEXT_LENGTH) &&
4910 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4911 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4912 "Comment too big found", NULL);
4916 ctxt->input->cur = in;
4919 ctxt->input->line++; ctxt->input->col = 1;
4924 ctxt->input->cur = in;
4926 ctxt->input->line++; ctxt->input->col = 1;
4927 continue; /* while */
4933 if (ctxt->instate == XML_PARSER_EOF) {
4937 in = ctxt->input->cur;
4941 if (ctxt->input->id != inputid) {
4942 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4943 "comment doesn't start and stop in the"
4947 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4948 (!ctxt->disableSAX)) {
4950 ctxt->sax->comment(ctxt->userData, buf);
4952 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4956 if (ctxt->instate != XML_PARSER_EOF)
4957 ctxt->instate = state;
4961 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4962 "Double hyphen within comment: "
4966 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4967 "Double hyphen within comment\n", NULL);
4975 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4976 xmlParseCommentComplex(ctxt, buf, len, size);
4977 ctxt->instate = state;
4984 * @ctxt: an XML parser context
4986 * parse the name of a PI
4988 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4990 * Returns the PITarget name or NULL
4994 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4995 const xmlChar *name;
4997 name = xmlParseName(ctxt);
4998 if ((name != NULL) &&
4999 ((name[0] == 'x') || (name[0] == 'X')) &&
5000 ((name[1] == 'm') || (name[1] == 'M')) &&
5001 ((name[2] == 'l') || (name[2] == 'L'))) {
5003 if ((name[0] == 'x') && (name[1] == 'm') &&
5004 (name[2] == 'l') && (name[3] == 0)) {
5005 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5006 "XML declaration allowed only at the start of the document\n");
5008 } else if (name[3] == 0) {
5009 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5013 if (xmlW3CPIs[i] == NULL) break;
5014 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5017 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5018 "xmlParsePITarget: invalid name prefix 'xml'\n",
5021 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5022 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5023 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5028 #ifdef LIBXML_CATALOG_ENABLED
5030 * xmlParseCatalogPI:
5031 * @ctxt: an XML parser context
5032 * @catalog: the PI value string
5034 * parse an XML Catalog Processing Instruction.
5036 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5038 * Occurs only if allowed by the user and if happening in the Misc
5039 * part of the document before any doctype informations
5040 * This will add the given catalog to the parsing context in order
5041 * to be used if there is a resolution need further down in the document
5045 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5046 xmlChar *URL = NULL;
5047 const xmlChar *tmp, *base;
5051 while (IS_BLANK_CH(*tmp)) tmp++;
5052 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5055 while (IS_BLANK_CH(*tmp)) tmp++;
5060 while (IS_BLANK_CH(*tmp)) tmp++;
5062 if ((marker != '\'') && (marker != '"'))
5066 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5069 URL = xmlStrndup(base, tmp - base);
5071 while (IS_BLANK_CH(*tmp)) tmp++;
5076 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5082 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5083 "Catalog PI syntax error: %s\n",
5092 * @ctxt: an XML parser context
5094 * parse an XML Processing Instruction.
5096 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5098 * The processing is transfered to SAX once parsed.
5102 xmlParsePI(xmlParserCtxtPtr ctxt) {
5103 xmlChar *buf = NULL;
5105 size_t size = XML_PARSER_BUFFER_SIZE;
5107 const xmlChar *target;
5108 xmlParserInputState state;
5111 if ((RAW == '<') && (NXT(1) == '?')) {
5112 int inputid = ctxt->input->id;
5113 state = ctxt->instate;
5114 ctxt->instate = XML_PARSER_PI;
5116 * this is a Processing Instruction.
5122 * Parse the target name and check for special support like
5125 target = xmlParsePITarget(ctxt);
5126 if (target != NULL) {
5127 if ((RAW == '?') && (NXT(1) == '>')) {
5128 if (inputid != ctxt->input->id) {
5129 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5130 "PI declaration doesn't start and stop in"
5131 " the same entity\n");
5138 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5139 (ctxt->sax->processingInstruction != NULL))
5140 ctxt->sax->processingInstruction(ctxt->userData,
5142 if (ctxt->instate != XML_PARSER_EOF)
5143 ctxt->instate = state;
5146 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5148 xmlErrMemory(ctxt, NULL);
5149 ctxt->instate = state;
5152 if (SKIP_BLANKS == 0) {
5153 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5154 "ParsePI: PI %s space expected\n", target);
5157 while (IS_CHAR(cur) && /* checked */
5158 ((cur != '?') || (NXT(1) != '>'))) {
5159 if (len + 5 >= size) {
5161 size_t new_size = size * 2;
5162 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5164 xmlErrMemory(ctxt, NULL);
5166 ctxt->instate = state;
5175 if (ctxt->instate == XML_PARSER_EOF) {
5180 if ((len > XML_MAX_TEXT_LENGTH) &&
5181 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5182 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5183 "PI %s too big found", target);
5185 ctxt->instate = state;
5189 COPY_BUF(l,buf,len,cur);
5198 if ((len > XML_MAX_TEXT_LENGTH) &&
5199 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5200 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5201 "PI %s too big found", target);
5203 ctxt->instate = state;
5208 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5209 "ParsePI: PI %s never end ...\n", target);
5211 if (inputid != ctxt->input->id) {
5212 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5213 "PI declaration doesn't start and stop in"
5214 " the same entity\n");
5218 #ifdef LIBXML_CATALOG_ENABLED
5219 if (((state == XML_PARSER_MISC) ||
5220 (state == XML_PARSER_START)) &&
5221 (xmlStrEqual(target, XML_CATALOG_PI))) {
5222 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5223 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5224 (allow == XML_CATA_ALLOW_ALL))
5225 xmlParseCatalogPI(ctxt, buf);
5233 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5234 (ctxt->sax->processingInstruction != NULL))
5235 ctxt->sax->processingInstruction(ctxt->userData,
5240 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5242 if (ctxt->instate != XML_PARSER_EOF)
5243 ctxt->instate = state;
5248 * xmlParseNotationDecl:
5249 * @ctxt: an XML parser context
5251 * parse a notation declaration
5253 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5255 * Hence there is actually 3 choices:
5256 * 'PUBLIC' S PubidLiteral
5257 * 'PUBLIC' S PubidLiteral S SystemLiteral
5258 * and 'SYSTEM' S SystemLiteral
5260 * See the NOTE on xmlParseExternalID().
5264 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5265 const xmlChar *name;
5269 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5270 int inputid = ctxt->input->id;
5273 if (SKIP_BLANKS == 0) {
5274 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5275 "Space required after '<!NOTATION'\n");
5279 name = xmlParseName(ctxt);
5281 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5284 if (xmlStrchr(name, ':') != NULL) {
5285 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5286 "colons are forbidden from notation names '%s'\n",
5289 if (SKIP_BLANKS == 0) {
5290 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5291 "Space required after the NOTATION name'\n");
5298 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5302 if (inputid != ctxt->input->id) {
5303 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5304 "Notation declaration doesn't start and stop"
5305 " in the same entity\n");
5308 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5309 (ctxt->sax->notationDecl != NULL))
5310 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5312 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5314 if (Systemid != NULL) xmlFree(Systemid);
5315 if (Pubid != NULL) xmlFree(Pubid);
5320 * xmlParseEntityDecl:
5321 * @ctxt: an XML parser context
5323 * parse <!ENTITY declarations
5325 * [70] EntityDecl ::= GEDecl | PEDecl
5327 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5329 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5331 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5333 * [74] PEDef ::= EntityValue | ExternalID
5335 * [76] NDataDecl ::= S 'NDATA' S Name
5337 * [ VC: Notation Declared ]
5338 * The Name must match the declared name of a notation.
5342 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5343 const xmlChar *name = NULL;
5344 xmlChar *value = NULL;
5345 xmlChar *URI = NULL, *literal = NULL;
5346 const xmlChar *ndata = NULL;
5347 int isParameter = 0;
5348 xmlChar *orig = NULL;
5350 /* GROW; done in the caller */
5351 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5352 int inputid = ctxt->input->id;
5355 if (SKIP_BLANKS == 0) {
5356 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5357 "Space required after '<!ENTITY'\n");
5362 if (SKIP_BLANKS == 0) {
5363 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5364 "Space required after '%%'\n");
5369 name = xmlParseName(ctxt);
5371 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5372 "xmlParseEntityDecl: no name\n");
5375 if (xmlStrchr(name, ':') != NULL) {
5376 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5377 "colons are forbidden from entities names '%s'\n",
5380 if (SKIP_BLANKS == 0) {
5381 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5382 "Space required after the entity name\n");
5385 ctxt->instate = XML_PARSER_ENTITY_DECL;
5387 * handle the various case of definitions...
5390 if ((RAW == '"') || (RAW == '\'')) {
5391 value = xmlParseEntityValue(ctxt, &orig);
5393 if ((ctxt->sax != NULL) &&
5394 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5395 ctxt->sax->entityDecl(ctxt->userData, name,
5396 XML_INTERNAL_PARAMETER_ENTITY,
5400 URI = xmlParseExternalID(ctxt, &literal, 1);
5401 if ((URI == NULL) && (literal == NULL)) {
5402 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5407 uri = xmlParseURI((const char *) URI);
5409 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5410 "Invalid URI: %s\n", URI);
5412 * This really ought to be a well formedness error
5413 * but the XML Core WG decided otherwise c.f. issue
5414 * E26 of the XML erratas.
5417 if (uri->fragment != NULL) {
5419 * Okay this is foolish to block those but not
5422 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5424 if ((ctxt->sax != NULL) &&
5425 (!ctxt->disableSAX) &&
5426 (ctxt->sax->entityDecl != NULL))
5427 ctxt->sax->entityDecl(ctxt->userData, name,
5428 XML_EXTERNAL_PARAMETER_ENTITY,
5429 literal, URI, NULL);
5436 if ((RAW == '"') || (RAW == '\'')) {
5437 value = xmlParseEntityValue(ctxt, &orig);
5438 if ((ctxt->sax != NULL) &&
5439 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5440 ctxt->sax->entityDecl(ctxt->userData, name,
5441 XML_INTERNAL_GENERAL_ENTITY,
5444 * For expat compatibility in SAX mode.
5446 if ((ctxt->myDoc == NULL) ||
5447 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5448 if (ctxt->myDoc == NULL) {
5449 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5450 if (ctxt->myDoc == NULL) {
5451 xmlErrMemory(ctxt, "New Doc failed");
5454 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5456 if (ctxt->myDoc->intSubset == NULL)
5457 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5458 BAD_CAST "fake", NULL, NULL);
5460 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5464 URI = xmlParseExternalID(ctxt, &literal, 1);
5465 if ((URI == NULL) && (literal == NULL)) {
5466 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5471 uri = xmlParseURI((const char *)URI);
5473 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5474 "Invalid URI: %s\n", URI);
5476 * This really ought to be a well formedness error
5477 * but the XML Core WG decided otherwise c.f. issue
5478 * E26 of the XML erratas.
5481 if (uri->fragment != NULL) {
5483 * Okay this is foolish to block those but not
5486 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5491 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5492 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5493 "Space required before 'NDATA'\n");
5495 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5497 if (SKIP_BLANKS == 0) {
5498 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5499 "Space required after 'NDATA'\n");
5501 ndata = xmlParseName(ctxt);
5502 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5503 (ctxt->sax->unparsedEntityDecl != NULL))
5504 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5505 literal, URI, ndata);
5507 if ((ctxt->sax != NULL) &&
5508 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5509 ctxt->sax->entityDecl(ctxt->userData, name,
5510 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5511 literal, URI, NULL);
5513 * For expat compatibility in SAX mode.
5514 * assuming the entity repalcement was asked for
5516 if ((ctxt->replaceEntities != 0) &&
5517 ((ctxt->myDoc == NULL) ||
5518 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5519 if (ctxt->myDoc == NULL) {
5520 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5521 if (ctxt->myDoc == NULL) {
5522 xmlErrMemory(ctxt, "New Doc failed");
5525 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5528 if (ctxt->myDoc->intSubset == NULL)
5529 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5530 BAD_CAST "fake", NULL, NULL);
5531 xmlSAX2EntityDecl(ctxt, name,
5532 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5533 literal, URI, NULL);
5538 if (ctxt->instate == XML_PARSER_EOF)
5542 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5543 "xmlParseEntityDecl: entity %s not terminated\n", name);
5544 xmlHaltParser(ctxt);
5546 if (inputid != ctxt->input->id) {
5547 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5548 "Entity declaration doesn't start and stop in"
5549 " the same entity\n");
5555 * Ugly mechanism to save the raw entity value.
5557 xmlEntityPtr cur = NULL;
5560 if ((ctxt->sax != NULL) &&
5561 (ctxt->sax->getParameterEntity != NULL))
5562 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5564 if ((ctxt->sax != NULL) &&
5565 (ctxt->sax->getEntity != NULL))
5566 cur = ctxt->sax->getEntity(ctxt->userData, name);
5567 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5568 cur = xmlSAX2GetEntity(ctxt, name);
5571 if ((cur != NULL) && (cur->orig == NULL)) {
5578 if (value != NULL) xmlFree(value);
5579 if (URI != NULL) xmlFree(URI);
5580 if (literal != NULL) xmlFree(literal);
5581 if (orig != NULL) xmlFree(orig);
5586 * xmlParseDefaultDecl:
5587 * @ctxt: an XML parser context
5588 * @value: Receive a possible fixed default value for the attribute
5590 * Parse an attribute default declaration
5592 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5594 * [ VC: Required Attribute ]
5595 * if the default declaration is the keyword #REQUIRED, then the
5596 * attribute must be specified for all elements of the type in the
5597 * attribute-list declaration.
5599 * [ VC: Attribute Default Legal ]
5600 * The declared default value must meet the lexical constraints of
5601 * the declared attribute type c.f. xmlValidateAttributeDecl()
5603 * [ VC: Fixed Attribute Default ]
5604 * if an attribute has a default value declared with the #FIXED
5605 * keyword, instances of that attribute must match the default value.
5607 * [ WFC: No < in Attribute Values ]
5608 * handled in xmlParseAttValue()
5610 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5611 * or XML_ATTRIBUTE_FIXED.
5615 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5620 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5622 return(XML_ATTRIBUTE_REQUIRED);
5624 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5626 return(XML_ATTRIBUTE_IMPLIED);
5628 val = XML_ATTRIBUTE_NONE;
5629 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5631 val = XML_ATTRIBUTE_FIXED;
5632 if (SKIP_BLANKS == 0) {
5633 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5634 "Space required after '#FIXED'\n");
5637 ret = xmlParseAttValue(ctxt);
5638 ctxt->instate = XML_PARSER_DTD;
5640 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5641 "Attribute default value declaration error\n");
5648 * xmlParseNotationType:
5649 * @ctxt: an XML parser context
5651 * parse an Notation attribute type.
5653 * Note: the leading 'NOTATION' S part has already being parsed...
5655 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5657 * [ VC: Notation Attributes ]
5658 * Values of this type must match one of the notation names included
5659 * in the declaration; all notation names in the declaration must be declared.
5661 * Returns: the notation attribute tree built while parsing
5665 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5666 const xmlChar *name;
5667 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5670 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5677 name = xmlParseName(ctxt);
5679 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5680 "Name expected in NOTATION declaration\n");
5681 xmlFreeEnumeration(ret);
5685 while (tmp != NULL) {
5686 if (xmlStrEqual(name, tmp->name)) {
5687 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5688 "standalone: attribute notation value token %s duplicated\n",
5690 if (!xmlDictOwns(ctxt->dict, name))
5691 xmlFree((xmlChar *) name);
5697 cur = xmlCreateEnumeration(name);
5699 xmlFreeEnumeration(ret);
5702 if (last == NULL) ret = last = cur;
5709 } while (RAW == '|');
5711 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5712 xmlFreeEnumeration(ret);
5720 * xmlParseEnumerationType:
5721 * @ctxt: an XML parser context
5723 * parse an Enumeration attribute type.
5725 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5727 * [ VC: Enumeration ]
5728 * Values of this type must match one of the Nmtoken tokens in
5731 * Returns: the enumeration attribute tree built while parsing
5735 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5737 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5740 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5747 name = xmlParseNmtoken(ctxt);
5749 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5753 while (tmp != NULL) {
5754 if (xmlStrEqual(name, tmp->name)) {
5755 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5756 "standalone: attribute enumeration value token %s duplicated\n",
5758 if (!xmlDictOwns(ctxt->dict, name))
5765 cur = xmlCreateEnumeration(name);
5766 if (!xmlDictOwns(ctxt->dict, name))
5769 xmlFreeEnumeration(ret);
5772 if (last == NULL) ret = last = cur;
5779 } while (RAW == '|');
5781 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5789 * xmlParseEnumeratedType:
5790 * @ctxt: an XML parser context
5791 * @tree: the enumeration tree built while parsing
5793 * parse an Enumerated attribute type.
5795 * [57] EnumeratedType ::= NotationType | Enumeration
5797 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5800 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5804 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5805 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5807 if (SKIP_BLANKS == 0) {
5808 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5809 "Space required after 'NOTATION'\n");
5812 *tree = xmlParseNotationType(ctxt);
5813 if (*tree == NULL) return(0);
5814 return(XML_ATTRIBUTE_NOTATION);
5816 *tree = xmlParseEnumerationType(ctxt);
5817 if (*tree == NULL) return(0);
5818 return(XML_ATTRIBUTE_ENUMERATION);
5822 * xmlParseAttributeType:
5823 * @ctxt: an XML parser context
5824 * @tree: the enumeration tree built while parsing
5826 * parse the Attribute list def for an element
5828 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5830 * [55] StringType ::= 'CDATA'
5832 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5833 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5835 * Validity constraints for attribute values syntax are checked in
5836 * xmlValidateAttributeValue()
5839 * Values of type ID must match the Name production. A name must not
5840 * appear more than once in an XML document as a value of this type;
5841 * i.e., ID values must uniquely identify the elements which bear them.
5843 * [ VC: One ID per Element Type ]
5844 * No element type may have more than one ID attribute specified.
5846 * [ VC: ID Attribute Default ]
5847 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5850 * Values of type IDREF must match the Name production, and values
5851 * of type IDREFS must match Names; each IDREF Name must match the value
5852 * of an ID attribute on some element in the XML document; i.e. IDREF
5853 * values must match the value of some ID attribute.
5855 * [ VC: Entity Name ]
5856 * Values of type ENTITY must match the Name production, values
5857 * of type ENTITIES must match Names; each Entity Name must match the
5858 * name of an unparsed entity declared in the DTD.
5860 * [ VC: Name Token ]
5861 * Values of type NMTOKEN must match the Nmtoken production; values
5862 * of type NMTOKENS must match Nmtokens.
5864 * Returns the attribute type
5867 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5869 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5871 return(XML_ATTRIBUTE_CDATA);
5872 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5874 return(XML_ATTRIBUTE_IDREFS);
5875 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5877 return(XML_ATTRIBUTE_IDREF);
5878 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5880 return(XML_ATTRIBUTE_ID);
5881 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5883 return(XML_ATTRIBUTE_ENTITY);
5884 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5886 return(XML_ATTRIBUTE_ENTITIES);
5887 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5889 return(XML_ATTRIBUTE_NMTOKENS);
5890 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5892 return(XML_ATTRIBUTE_NMTOKEN);
5894 return(xmlParseEnumeratedType(ctxt, tree));
5898 * xmlParseAttributeListDecl:
5899 * @ctxt: an XML parser context
5901 * : parse the Attribute list def for an element
5903 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5905 * [53] AttDef ::= S Name S AttType S DefaultDecl
5909 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5910 const xmlChar *elemName;
5911 const xmlChar *attrName;
5912 xmlEnumerationPtr tree;
5914 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5915 int inputid = ctxt->input->id;
5918 if (SKIP_BLANKS == 0) {
5919 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5920 "Space required after '<!ATTLIST'\n");
5922 elemName = xmlParseName(ctxt);
5923 if (elemName == NULL) {
5924 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5925 "ATTLIST: no name for Element\n");
5930 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5933 xmlChar *defaultValue = NULL;
5937 attrName = xmlParseName(ctxt);
5938 if (attrName == NULL) {
5939 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5940 "ATTLIST: no name for Attribute\n");
5944 if (SKIP_BLANKS == 0) {
5945 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5946 "Space required after the attribute name\n");
5950 type = xmlParseAttributeType(ctxt, &tree);
5956 if (SKIP_BLANKS == 0) {
5957 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5958 "Space required after the attribute type\n");
5960 xmlFreeEnumeration(tree);
5964 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5966 if (defaultValue != NULL)
5967 xmlFree(defaultValue);
5969 xmlFreeEnumeration(tree);
5972 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5973 xmlAttrNormalizeSpace(defaultValue, defaultValue);
5977 if (SKIP_BLANKS == 0) {
5978 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5979 "Space required after the attribute default value\n");
5980 if (defaultValue != NULL)
5981 xmlFree(defaultValue);
5983 xmlFreeEnumeration(tree);
5987 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5988 (ctxt->sax->attributeDecl != NULL))
5989 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5990 type, def, defaultValue, tree);
5991 else if (tree != NULL)
5992 xmlFreeEnumeration(tree);
5994 if ((ctxt->sax2) && (defaultValue != NULL) &&
5995 (def != XML_ATTRIBUTE_IMPLIED) &&
5996 (def != XML_ATTRIBUTE_REQUIRED)) {
5997 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6000 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6002 if (defaultValue != NULL)
6003 xmlFree(defaultValue);
6007 if (inputid != ctxt->input->id) {
6008 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6009 "Attribute list declaration doesn't start and"
6010 " stop in the same entity\n");
6018 * xmlParseElementMixedContentDecl:
6019 * @ctxt: an XML parser context
6020 * @inputchk: the input used for the current entity, needed for boundary checks
6022 * parse the declaration for a Mixed Element content
6023 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6025 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6026 * '(' S? '#PCDATA' S? ')'
6028 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6030 * [ VC: No Duplicate Types ]
6031 * The same name must not appear more than once in a single
6032 * mixed-content declaration.
6034 * returns: the list of the xmlElementContentPtr describing the element choices
6036 xmlElementContentPtr
6037 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6038 xmlElementContentPtr ret = NULL, cur = NULL, n;
6039 const xmlChar *elem = NULL;
6042 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6047 if (ctxt->input->id != inputchk) {
6048 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6049 "Element content declaration doesn't start and"
6050 " stop in the same entity\n");
6053 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6057 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6062 if ((RAW == '(') || (RAW == '|')) {
6063 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6064 if (ret == NULL) return(NULL);
6066 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6069 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6070 if (ret == NULL) return(NULL);
6076 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6077 if (n == NULL) return(NULL);
6078 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6087 elem = xmlParseName(ctxt);
6089 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6090 "xmlParseElementMixedContentDecl : Name expected\n");
6091 xmlFreeDocElementContent(ctxt->myDoc, ret);
6097 if ((RAW == ')') && (NXT(1) == '*')) {
6099 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6100 XML_ELEMENT_CONTENT_ELEMENT);
6101 if (cur->c2 != NULL)
6102 cur->c2->parent = cur;
6105 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6106 if (ctxt->input->id != inputchk) {
6107 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6108 "Element content declaration doesn't start and"
6109 " stop in the same entity\n");
6113 xmlFreeDocElementContent(ctxt->myDoc, ret);
6114 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6119 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6125 * xmlParseElementChildrenContentDeclPriv:
6126 * @ctxt: an XML parser context
6127 * @inputchk: the input used for the current entity, needed for boundary checks
6128 * @depth: the level of recursion
6130 * parse the declaration for a Mixed Element content
6131 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6134 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6136 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6138 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6140 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6142 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6143 * TODO Parameter-entity replacement text must be properly nested
6144 * with parenthesized groups. That is to say, if either of the
6145 * opening or closing parentheses in a choice, seq, or Mixed
6146 * construct is contained in the replacement text for a parameter
6147 * entity, both must be contained in the same replacement text. For
6148 * interoperability, if a parameter-entity reference appears in a
6149 * choice, seq, or Mixed construct, its replacement text should not
6150 * be empty, and neither the first nor last non-blank character of
6151 * the replacement text should be a connector (| or ,).
6153 * Returns the tree of xmlElementContentPtr describing the element
6156 static xmlElementContentPtr
6157 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6159 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6160 const xmlChar *elem;
6163 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6165 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6166 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6173 int inputid = ctxt->input->id;
6175 /* Recurse on first child */
6178 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6183 elem = xmlParseName(ctxt);
6185 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6188 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6190 xmlErrMemory(ctxt, NULL);
6195 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6197 } else if (RAW == '*') {
6198 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6200 } else if (RAW == '+') {
6201 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6204 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6210 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6212 * Each loop we parse one separator and one element.
6215 if (type == 0) type = CUR;
6218 * Detect "Name | Name , Name" error
6220 else if (type != CUR) {
6221 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6222 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6224 if ((last != NULL) && (last != ret))
6225 xmlFreeDocElementContent(ctxt->myDoc, last);
6227 xmlFreeDocElementContent(ctxt->myDoc, ret);
6232 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6234 if ((last != NULL) && (last != ret))
6235 xmlFreeDocElementContent(ctxt->myDoc, last);
6236 xmlFreeDocElementContent(ctxt->myDoc, ret);
6254 } else if (RAW == '|') {
6255 if (type == 0) type = CUR;
6258 * Detect "Name , Name | Name" error
6260 else if (type != CUR) {
6261 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6262 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6264 if ((last != NULL) && (last != ret))
6265 xmlFreeDocElementContent(ctxt->myDoc, last);
6267 xmlFreeDocElementContent(ctxt->myDoc, ret);
6272 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6274 if ((last != NULL) && (last != ret))
6275 xmlFreeDocElementContent(ctxt->myDoc, last);
6277 xmlFreeDocElementContent(ctxt->myDoc, ret);
6296 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6297 if ((last != NULL) && (last != ret))
6298 xmlFreeDocElementContent(ctxt->myDoc, last);
6300 xmlFreeDocElementContent(ctxt->myDoc, ret);
6307 int inputid = ctxt->input->id;
6308 /* Recurse on second child */
6311 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6315 elem = xmlParseName(ctxt);
6317 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6319 xmlFreeDocElementContent(ctxt->myDoc, ret);
6322 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6325 xmlFreeDocElementContent(ctxt->myDoc, ret);
6329 last->ocur = XML_ELEMENT_CONTENT_OPT;
6331 } else if (RAW == '*') {
6332 last->ocur = XML_ELEMENT_CONTENT_MULT;
6334 } else if (RAW == '+') {
6335 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6338 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6344 if ((cur != NULL) && (last != NULL)) {
6349 if (ctxt->input->id != inputchk) {
6350 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6351 "Element content declaration doesn't start and stop in"
6352 " the same entity\n");
6357 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6358 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6359 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6361 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6364 } else if (RAW == '*') {
6366 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6369 * Some normalization:
6370 * (a | b* | c?)* == (a | b | c)*
6372 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6373 if ((cur->c1 != NULL) &&
6374 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6375 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6376 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6377 if ((cur->c2 != NULL) &&
6378 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6379 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6380 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6385 } else if (RAW == '+') {
6389 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6390 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6391 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6393 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6395 * Some normalization:
6396 * (a | b*)+ == (a | b)*
6397 * (a | b?)+ == (a | b)*
6399 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6400 if ((cur->c1 != NULL) &&
6401 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6402 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6403 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6406 if ((cur->c2 != NULL) &&
6407 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6408 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6409 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6415 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6423 * xmlParseElementChildrenContentDecl:
6424 * @ctxt: an XML parser context
6425 * @inputchk: the input used for the current entity, needed for boundary checks
6427 * parse the declaration for a Mixed Element content
6428 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6430 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6432 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6434 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6436 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6438 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6439 * TODO Parameter-entity replacement text must be properly nested
6440 * with parenthesized groups. That is to say, if either of the
6441 * opening or closing parentheses in a choice, seq, or Mixed
6442 * construct is contained in the replacement text for a parameter
6443 * entity, both must be contained in the same replacement text. For
6444 * interoperability, if a parameter-entity reference appears in a
6445 * choice, seq, or Mixed construct, its replacement text should not
6446 * be empty, and neither the first nor last non-blank character of
6447 * the replacement text should be a connector (| or ,).
6449 * Returns the tree of xmlElementContentPtr describing the element
6452 xmlElementContentPtr
6453 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6454 /* stub left for API/ABI compat */
6455 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6459 * xmlParseElementContentDecl:
6460 * @ctxt: an XML parser context
6461 * @name: the name of the element being defined.
6462 * @result: the Element Content pointer will be stored here if any
6464 * parse the declaration for an Element content either Mixed or Children,
6465 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6467 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6469 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6473 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6474 xmlElementContentPtr *result) {
6476 xmlElementContentPtr tree = NULL;
6477 int inputid = ctxt->input->id;
6483 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6484 "xmlParseElementContentDecl : %s '(' expected\n", name);
6489 if (ctxt->instate == XML_PARSER_EOF)
6492 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6493 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6494 res = XML_ELEMENT_TYPE_MIXED;
6496 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6497 res = XML_ELEMENT_TYPE_ELEMENT;
6505 * xmlParseElementDecl:
6506 * @ctxt: an XML parser context
6508 * parse an Element declaration.
6510 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6512 * [ VC: Unique Element Type Declaration ]
6513 * No element type may be declared more than once
6515 * Returns the type of the element, or -1 in case of error
6518 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6519 const xmlChar *name;
6521 xmlElementContentPtr content = NULL;
6523 /* GROW; done in the caller */
6524 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6525 int inputid = ctxt->input->id;
6528 if (SKIP_BLANKS == 0) {
6529 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6530 "Space required after 'ELEMENT'\n");
6533 name = xmlParseName(ctxt);
6535 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6536 "xmlParseElementDecl: no name for Element\n");
6539 if (SKIP_BLANKS == 0) {
6540 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6541 "Space required after the element name\n");
6543 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6546 * Element must always be empty.
6548 ret = XML_ELEMENT_TYPE_EMPTY;
6549 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6553 * Element is a generic container.
6555 ret = XML_ELEMENT_TYPE_ANY;
6556 } else if (RAW == '(') {
6557 ret = xmlParseElementContentDecl(ctxt, name, &content);
6560 * [ WFC: PEs in Internal Subset ] error handling.
6562 if ((RAW == '%') && (ctxt->external == 0) &&
6563 (ctxt->inputNr == 1)) {
6564 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6565 "PEReference: forbidden within markup decl in internal subset\n");
6567 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6568 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6576 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6577 if (content != NULL) {
6578 xmlFreeDocElementContent(ctxt->myDoc, content);
6581 if (inputid != ctxt->input->id) {
6582 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6583 "Element declaration doesn't start and stop in"
6584 " the same entity\n");
6588 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6589 (ctxt->sax->elementDecl != NULL)) {
6590 if (content != NULL)
6591 content->parent = NULL;
6592 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6594 if ((content != NULL) && (content->parent == NULL)) {
6596 * this is a trick: if xmlAddElementDecl is called,
6597 * instead of copying the full tree it is plugged directly
6598 * if called from the parser. Avoid duplicating the
6599 * interfaces or change the API/ABI
6601 xmlFreeDocElementContent(ctxt->myDoc, content);
6603 } else if (content != NULL) {
6604 xmlFreeDocElementContent(ctxt->myDoc, content);
6612 * xmlParseConditionalSections
6613 * @ctxt: an XML parser context
6615 * [61] conditionalSect ::= includeSect | ignoreSect
6616 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6617 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6618 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6619 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6623 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6624 int id = ctxt->input->id;
6628 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6632 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6633 xmlHaltParser(ctxt);
6636 if (ctxt->input->id != id) {
6637 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6638 "All markup of the conditional section is not"
6639 " in the same entity\n");
6643 if (xmlParserDebugEntities) {
6644 if ((ctxt->input != NULL) && (ctxt->input->filename))
6645 xmlGenericError(xmlGenericErrorContext,
6646 "%s(%d): ", ctxt->input->filename,
6648 xmlGenericError(xmlGenericErrorContext,
6649 "Entering INCLUDE Conditional Section\n");
6654 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6655 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6656 const xmlChar *check = CUR_PTR;
6657 unsigned int cons = ctxt->input->consumed;
6659 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6660 xmlParseConditionalSections(ctxt);
6662 xmlParseMarkupDecl(ctxt);
6667 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6668 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6669 xmlHaltParser(ctxt);
6673 if (xmlParserDebugEntities) {
6674 if ((ctxt->input != NULL) && (ctxt->input->filename))
6675 xmlGenericError(xmlGenericErrorContext,
6676 "%s(%d): ", ctxt->input->filename,
6678 xmlGenericError(xmlGenericErrorContext,
6679 "Leaving INCLUDE Conditional Section\n");
6682 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6684 xmlParserInputState instate;
6690 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6691 xmlHaltParser(ctxt);
6694 if (ctxt->input->id != id) {
6695 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6696 "All markup of the conditional section is not"
6697 " in the same entity\n");
6701 if (xmlParserDebugEntities) {
6702 if ((ctxt->input != NULL) && (ctxt->input->filename))
6703 xmlGenericError(xmlGenericErrorContext,
6704 "%s(%d): ", ctxt->input->filename,
6706 xmlGenericError(xmlGenericErrorContext,
6707 "Entering IGNORE Conditional Section\n");
6711 * Parse up to the end of the conditional section
6712 * But disable SAX event generating DTD building in the meantime
6714 state = ctxt->disableSAX;
6715 instate = ctxt->instate;
6716 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6717 ctxt->instate = XML_PARSER_IGNORE;
6719 while (((depth >= 0) && (RAW != 0)) &&
6720 (ctxt->instate != XML_PARSER_EOF)) {
6721 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6726 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6727 if (--depth >= 0) SKIP(3);
6734 ctxt->disableSAX = state;
6735 ctxt->instate = instate;
6737 if (xmlParserDebugEntities) {
6738 if ((ctxt->input != NULL) && (ctxt->input->filename))
6739 xmlGenericError(xmlGenericErrorContext,
6740 "%s(%d): ", ctxt->input->filename,
6742 xmlGenericError(xmlGenericErrorContext,
6743 "Leaving IGNORE Conditional Section\n");
6747 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6748 xmlHaltParser(ctxt);
6756 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6758 if (ctxt->input->id != id) {
6759 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6760 "All markup of the conditional section is not in"
6761 " the same entity\n");
6763 if ((ctxt-> instate != XML_PARSER_EOF) &&
6764 ((ctxt->input->cur + 3) <= ctxt->input->end))
6770 * xmlParseMarkupDecl:
6771 * @ctxt: an XML parser context
6773 * parse Markup declarations
6775 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6776 * NotationDecl | PI | Comment
6778 * [ VC: Proper Declaration/PE Nesting ]
6779 * Parameter-entity replacement text must be properly nested with
6780 * markup declarations. That is to say, if either the first character
6781 * or the last character of a markup declaration (markupdecl above) is
6782 * contained in the replacement text for a parameter-entity reference,
6783 * both must be contained in the same replacement text.
6785 * [ WFC: PEs in Internal Subset ]
6786 * In the internal DTD subset, parameter-entity references can occur
6787 * only where markup declarations can occur, not within markup declarations.
6788 * (This does not apply to references that occur in external parameter
6789 * entities or to the external subset.)
6792 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6795 if (NXT(1) == '!') {
6799 xmlParseElementDecl(ctxt);
6800 else if (NXT(3) == 'N')
6801 xmlParseEntityDecl(ctxt);
6804 xmlParseAttributeListDecl(ctxt);
6807 xmlParseNotationDecl(ctxt);
6810 xmlParseComment(ctxt);
6813 /* there is an error but it will be detected later */
6816 } else if (NXT(1) == '?') {
6822 * detect requirement to exit there and act accordingly
6823 * and avoid having instate overriden later on
6825 if (ctxt->instate == XML_PARSER_EOF)
6829 * Conditional sections are allowed from entities included
6830 * by PE References in the internal subset.
6832 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6833 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6834 xmlParseConditionalSections(ctxt);
6838 ctxt->instate = XML_PARSER_DTD;
6843 * @ctxt: an XML parser context
6845 * parse an XML declaration header for external entities
6847 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6851 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6853 const xmlChar *encoding;
6856 * We know that '<?xml' is here.
6858 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6861 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6865 if (SKIP_BLANKS == 0) {
6866 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6867 "Space needed after '<?xml'\n");
6871 * We may have the VersionInfo here.
6873 version = xmlParseVersionInfo(ctxt);
6874 if (version == NULL)
6875 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6877 if (SKIP_BLANKS == 0) {
6878 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6879 "Space needed here\n");
6882 ctxt->input->version = version;
6885 * We must have the encoding declaration
6887 encoding = xmlParseEncodingDecl(ctxt);
6888 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6890 * The XML REC instructs us to stop parsing right here
6894 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6895 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6896 "Missing encoding in text declaration\n");
6900 if ((RAW == '?') && (NXT(1) == '>')) {
6902 } else if (RAW == '>') {
6903 /* Deprecated old WD ... */
6904 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6907 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6908 MOVETO_ENDTAG(CUR_PTR);
6914 * xmlParseExternalSubset:
6915 * @ctxt: an XML parser context
6916 * @ExternalID: the external identifier
6917 * @SystemID: the system identifier (or URL)
6919 * parse Markup declarations from an external subset
6921 * [30] extSubset ::= textDecl? extSubsetDecl
6923 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6926 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6927 const xmlChar *SystemID) {
6928 xmlDetectSAX2(ctxt);
6931 if ((ctxt->encoding == NULL) &&
6932 (ctxt->input->end - ctxt->input->cur >= 4)) {
6934 xmlCharEncoding enc;
6940 enc = xmlDetectCharEncoding(start, 4);
6941 if (enc != XML_CHAR_ENCODING_NONE)
6942 xmlSwitchEncoding(ctxt, enc);
6945 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6946 xmlParseTextDecl(ctxt);
6947 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6949 * The XML REC instructs us to stop parsing right here
6951 xmlHaltParser(ctxt);
6955 if (ctxt->myDoc == NULL) {
6956 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6957 if (ctxt->myDoc == NULL) {
6958 xmlErrMemory(ctxt, "New Doc failed");
6961 ctxt->myDoc->properties = XML_DOC_INTERNAL;
6963 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6964 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6966 ctxt->instate = XML_PARSER_DTD;
6969 while (((RAW == '<') && (NXT(1) == '?')) ||
6970 ((RAW == '<') && (NXT(1) == '!')) ||
6972 const xmlChar *check = CUR_PTR;
6973 unsigned int cons = ctxt->input->consumed;
6976 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6977 xmlParseConditionalSections(ctxt);
6979 xmlParseMarkupDecl(ctxt);
6982 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6983 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6989 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6995 * xmlParseReference:
6996 * @ctxt: an XML parser context
6998 * parse and handle entity references in content, depending on the SAX
6999 * interface, this may end-up in a call to character() if this is a
7000 * CharRef, a predefined entity, if there is no reference() callback.
7001 * or if the parser was asked to switch to that mode.
7003 * [67] Reference ::= EntityRef | CharRef
7006 xmlParseReference(xmlParserCtxtPtr ctxt) {
7010 xmlNodePtr list = NULL;
7011 xmlParserErrors ret = XML_ERR_OK;
7018 * Simple case of a CharRef
7020 if (NXT(1) == '#') {
7024 int value = xmlParseCharRef(ctxt);
7028 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7030 * So we are using non-UTF-8 buffers
7031 * Check that the char fit on 8bits, if not
7032 * generate a CharRef.
7034 if (value <= 0xFF) {
7037 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7038 (!ctxt->disableSAX))
7039 ctxt->sax->characters(ctxt->userData, out, 1);
7041 if ((hex == 'x') || (hex == 'X'))
7042 snprintf((char *)out, sizeof(out), "#x%X", value);
7044 snprintf((char *)out, sizeof(out), "#%d", value);
7045 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7046 (!ctxt->disableSAX))
7047 ctxt->sax->reference(ctxt->userData, out);
7051 * Just encode the value in UTF-8
7053 COPY_BUF(0 ,out, i, value);
7055 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7056 (!ctxt->disableSAX))
7057 ctxt->sax->characters(ctxt->userData, out, i);
7063 * We are seeing an entity reference
7065 ent = xmlParseEntityRef(ctxt);
7066 if (ent == NULL) return;
7067 if (!ctxt->wellFormed)
7069 was_checked = ent->checked;
7071 /* special case of predefined entities */
7072 if ((ent->name == NULL) ||
7073 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7075 if (val == NULL) return;
7077 * inline the entity.
7079 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7080 (!ctxt->disableSAX))
7081 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7086 * The first reference to the entity trigger a parsing phase
7087 * where the ent->children is filled with the result from
7089 * Note: external parsed entities will not be loaded, it is not
7090 * required for a non-validating parser, unless the parsing option
7091 * of validating, or substituting entities were given. Doing so is
7092 * far more secure as the parser will only process data coming from
7093 * the document entity by default.
7095 if (((ent->checked == 0) ||
7096 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7097 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7098 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7099 unsigned long oldnbent = ctxt->nbentities;
7102 * This is a bit hackish but this seems the best
7103 * way to make sure both SAX and DOM entity support
7107 if (ctxt->userData == ctxt)
7110 user_data = ctxt->userData;
7113 * Check that this entity is well formed
7114 * 4.3.2: An internal general parsed entity is well-formed
7115 * if its replacement text matches the production labeled
7118 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7120 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7124 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7126 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7127 user_data, ctxt->depth, ent->URI,
7128 ent->ExternalID, &list);
7131 ret = XML_ERR_ENTITY_PE_INTERNAL;
7132 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7133 "invalid entity type found\n", NULL);
7137 * Store the number of entities needing parsing for this entity
7138 * content and do checkings
7140 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7141 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7143 if (ret == XML_ERR_ENTITY_LOOP) {
7144 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7145 xmlFreeNodeList(list);
7148 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7149 xmlFreeNodeList(list);
7153 if ((ret == XML_ERR_OK) && (list != NULL)) {
7154 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7155 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7156 (ent->children == NULL)) {
7157 ent->children = list;
7158 if (ctxt->replaceEntities) {
7160 * Prune it directly in the generated document
7161 * except for single text nodes.
7163 if (((list->type == XML_TEXT_NODE) &&
7164 (list->next == NULL)) ||
7165 (ctxt->parseMode == XML_PARSE_READER)) {
7166 list->parent = (xmlNodePtr) ent;
7171 while (list != NULL) {
7172 list->parent = (xmlNodePtr) ctxt->node;
7173 list->doc = ctxt->myDoc;
7174 if (list->next == NULL)
7178 list = ent->children;
7179 #ifdef LIBXML_LEGACY_ENABLED
7180 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7181 xmlAddEntityReference(ent, list, NULL);
7182 #endif /* LIBXML_LEGACY_ENABLED */
7186 while (list != NULL) {
7187 list->parent = (xmlNodePtr) ent;
7188 xmlSetTreeDoc(list, ent->doc);
7189 if (list->next == NULL)
7195 xmlFreeNodeList(list);
7198 } else if ((ret != XML_ERR_OK) &&
7199 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7200 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7201 "Entity '%s' failed to parse\n", ent->name);
7202 xmlParserEntityCheck(ctxt, 0, ent, 0);
7203 } else if (list != NULL) {
7204 xmlFreeNodeList(list);
7207 if (ent->checked == 0)
7210 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7212 } else if (ent->checked != 1) {
7213 ctxt->nbentities += ent->checked / 2;
7217 * Now that the entity content has been gathered
7218 * provide it to the application, this can take different forms based
7219 * on the parsing modes.
7221 if (ent->children == NULL) {
7223 * Probably running in SAX mode and the callbacks don't
7224 * build the entity content. So unless we already went
7225 * though parsing for first checking go though the entity
7226 * content to generate callbacks associated to the entity
7228 if (was_checked != 0) {
7231 * This is a bit hackish but this seems the best
7232 * way to make sure both SAX and DOM entity support
7235 if (ctxt->userData == ctxt)
7238 user_data = ctxt->userData;
7240 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7242 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7243 ent->content, user_data, NULL);
7245 } else if (ent->etype ==
7246 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7248 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7249 ctxt->sax, user_data, ctxt->depth,
7250 ent->URI, ent->ExternalID, NULL);
7253 ret = XML_ERR_ENTITY_PE_INTERNAL;
7254 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7255 "invalid entity type found\n", NULL);
7257 if (ret == XML_ERR_ENTITY_LOOP) {
7258 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7262 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7263 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7265 * Entity reference callback comes second, it's somewhat
7266 * superfluous but a compatibility to historical behaviour
7268 ctxt->sax->reference(ctxt->userData, ent->name);
7274 * If we didn't get any children for the entity being built
7276 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7277 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7281 ctxt->sax->reference(ctxt->userData, ent->name);
7285 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7287 * There is a problem on the handling of _private for entities
7288 * (bug 155816): Should we copy the content of the field from
7289 * the entity (possibly overwriting some value set by the user
7290 * when a copy is created), should we leave it alone, or should
7291 * we try to take care of different situations? The problem
7292 * is exacerbated by the usage of this field by the xmlReader.
7293 * To fix this bug, we look at _private on the created node
7294 * and, if it's NULL, we copy in whatever was in the entity.
7295 * If it's not NULL we leave it alone. This is somewhat of a
7296 * hack - maybe we should have further tests to determine
7299 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7301 * Seems we are generating the DOM content, do
7302 * a simple tree copy for all references except the first
7303 * In the first occurrence list contains the replacement.
7305 if (((list == NULL) && (ent->owner == 0)) ||
7306 (ctxt->parseMode == XML_PARSE_READER)) {
7307 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7310 * We are copying here, make sure there is no abuse
7312 ctxt->sizeentcopy += ent->length + 5;
7313 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7317 * when operating on a reader, the entities definitions
7318 * are always owning the entities subtree.
7319 if (ctxt->parseMode == XML_PARSE_READER)
7323 cur = ent->children;
7324 while (cur != NULL) {
7325 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7327 if (nw->_private == NULL)
7328 nw->_private = cur->_private;
7329 if (firstChild == NULL){
7332 nw = xmlAddChild(ctxt->node, nw);
7334 if (cur == ent->last) {
7336 * needed to detect some strange empty
7337 * node cases in the reader tests
7339 if ((ctxt->parseMode == XML_PARSE_READER) &&
7341 (nw->type == XML_ELEMENT_NODE) &&
7342 (nw->children == NULL))
7349 #ifdef LIBXML_LEGACY_ENABLED
7350 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7351 xmlAddEntityReference(ent, firstChild, nw);
7352 #endif /* LIBXML_LEGACY_ENABLED */
7353 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7354 xmlNodePtr nw = NULL, cur, next, last,
7358 * We are copying here, make sure there is no abuse
7360 ctxt->sizeentcopy += ent->length + 5;
7361 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7365 * Copy the entity child list and make it the new
7366 * entity child list. The goal is to make sure any
7367 * ID or REF referenced will be the one from the
7368 * document content and not the entity copy.
7370 cur = ent->children;
7371 ent->children = NULL;
7374 while (cur != NULL) {
7378 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7380 if (nw->_private == NULL)
7381 nw->_private = cur->_private;
7382 if (firstChild == NULL){
7385 xmlAddChild((xmlNodePtr) ent, nw);
7386 xmlAddChild(ctxt->node, cur);
7392 if (ent->owner == 0)
7394 #ifdef LIBXML_LEGACY_ENABLED
7395 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7396 xmlAddEntityReference(ent, firstChild, nw);
7397 #endif /* LIBXML_LEGACY_ENABLED */
7399 const xmlChar *nbktext;
7402 * the name change is to avoid coalescing of the
7403 * node with a possible previous text one which
7404 * would make ent->children a dangling pointer
7406 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7408 if (ent->children->type == XML_TEXT_NODE)
7409 ent->children->name = nbktext;
7410 if ((ent->last != ent->children) &&
7411 (ent->last->type == XML_TEXT_NODE))
7412 ent->last->name = nbktext;
7413 xmlAddChildList(ctxt->node, ent->children);
7417 * This is to avoid a nasty side effect, see
7418 * characters() in SAX.c
7428 * xmlParseEntityRef:
7429 * @ctxt: an XML parser context
7431 * parse ENTITY references declarations
7433 * [68] EntityRef ::= '&' Name ';'
7435 * [ WFC: Entity Declared ]
7436 * In a document without any DTD, a document with only an internal DTD
7437 * subset which contains no parameter entity references, or a document
7438 * with "standalone='yes'", the Name given in the entity reference
7439 * must match that in an entity declaration, except that well-formed
7440 * documents need not declare any of the following entities: amp, lt,
7441 * gt, apos, quot. The declaration of a parameter entity must precede
7442 * any reference to it. Similarly, the declaration of a general entity
7443 * must precede any reference to it which appears in a default value in an
7444 * attribute-list declaration. Note that if entities are declared in the
7445 * external subset or in external parameter entities, a non-validating
7446 * processor is not obligated to read and process their declarations;
7447 * for such documents, the rule that an entity must be declared is a
7448 * well-formedness constraint only if standalone='yes'.
7450 * [ WFC: Parsed Entity ]
7451 * An entity reference must not contain the name of an unparsed entity
7453 * Returns the xmlEntityPtr if found, or NULL otherwise.
7456 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7457 const xmlChar *name;
7458 xmlEntityPtr ent = NULL;
7461 if (ctxt->instate == XML_PARSER_EOF)
7467 name = xmlParseName(ctxt);
7469 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7470 "xmlParseEntityRef: no name\n");
7474 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7480 * Predefined entities override any extra definition
7482 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7483 ent = xmlGetPredefinedEntity(name);
7489 * Increase the number of entity references parsed
7494 * Ask first SAX for entity resolution, otherwise try the
7495 * entities which may have stored in the parser context.
7497 if (ctxt->sax != NULL) {
7498 if (ctxt->sax->getEntity != NULL)
7499 ent = ctxt->sax->getEntity(ctxt->userData, name);
7500 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7501 (ctxt->options & XML_PARSE_OLDSAX))
7502 ent = xmlGetPredefinedEntity(name);
7503 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7504 (ctxt->userData==ctxt)) {
7505 ent = xmlSAX2GetEntity(ctxt, name);
7508 if (ctxt->instate == XML_PARSER_EOF)
7511 * [ WFC: Entity Declared ]
7512 * In a document without any DTD, a document with only an
7513 * internal DTD subset which contains no parameter entity
7514 * references, or a document with "standalone='yes'", the
7515 * Name given in the entity reference must match that in an
7516 * entity declaration, except that well-formed documents
7517 * need not declare any of the following entities: amp, lt,
7519 * The declaration of a parameter entity must precede any
7521 * Similarly, the declaration of a general entity must
7522 * precede any reference to it which appears in a default
7523 * value in an attribute-list declaration. Note that if
7524 * entities are declared in the external subset or in
7525 * external parameter entities, a non-validating processor
7526 * is not obligated to read and process their declarations;
7527 * for such documents, the rule that an entity must be
7528 * declared is a well-formedness constraint only if
7532 if ((ctxt->standalone == 1) ||
7533 ((ctxt->hasExternalSubset == 0) &&
7534 (ctxt->hasPErefs == 0))) {
7535 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7536 "Entity '%s' not defined\n", name);
7538 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7539 "Entity '%s' not defined\n", name);
7540 if ((ctxt->inSubset == 0) &&
7541 (ctxt->sax != NULL) &&
7542 (ctxt->sax->reference != NULL)) {
7543 ctxt->sax->reference(ctxt->userData, name);
7546 xmlParserEntityCheck(ctxt, 0, ent, 0);
7551 * [ WFC: Parsed Entity ]
7552 * An entity reference must not contain the name of an
7555 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7556 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7557 "Entity reference to unparsed entity %s\n", name);
7561 * [ WFC: No External Entity References ]
7562 * Attribute values cannot contain direct or indirect
7563 * entity references to external entities.
7565 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7566 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7567 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7568 "Attribute references external entity '%s'\n", name);
7571 * [ WFC: No < in Attribute Values ]
7572 * The replacement text of any entity referred to directly or
7573 * indirectly in an attribute value (other than "<") must
7576 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7578 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7579 if (((ent->checked & 1) || (ent->checked == 0)) &&
7580 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7581 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7582 "'<' in entity '%s' is not allowed in attributes values\n", name);
7587 * Internal check, no parameter entities here ...
7590 switch (ent->etype) {
7591 case XML_INTERNAL_PARAMETER_ENTITY:
7592 case XML_EXTERNAL_PARAMETER_ENTITY:
7593 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7594 "Attempt to reference the parameter entity '%s'\n",
7603 * [ WFC: No Recursion ]
7604 * A parsed entity must not contain a recursive reference
7605 * to itself, either directly or indirectly.
7606 * Done somewhere else
7612 * xmlParseStringEntityRef:
7613 * @ctxt: an XML parser context
7614 * @str: a pointer to an index in the string
7616 * parse ENTITY references declarations, but this version parses it from
7619 * [68] EntityRef ::= '&' Name ';'
7621 * [ WFC: Entity Declared ]
7622 * In a document without any DTD, a document with only an internal DTD
7623 * subset which contains no parameter entity references, or a document
7624 * with "standalone='yes'", the Name given in the entity reference
7625 * must match that in an entity declaration, except that well-formed
7626 * documents need not declare any of the following entities: amp, lt,
7627 * gt, apos, quot. The declaration of a parameter entity must precede
7628 * any reference to it. Similarly, the declaration of a general entity
7629 * must precede any reference to it which appears in a default value in an
7630 * attribute-list declaration. Note that if entities are declared in the
7631 * external subset or in external parameter entities, a non-validating
7632 * processor is not obligated to read and process their declarations;
7633 * for such documents, the rule that an entity must be declared is a
7634 * well-formedness constraint only if standalone='yes'.
7636 * [ WFC: Parsed Entity ]
7637 * An entity reference must not contain the name of an unparsed entity
7639 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7640 * is updated to the current location in the string.
7643 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7647 xmlEntityPtr ent = NULL;
7649 if ((str == NULL) || (*str == NULL))
7657 name = xmlParseStringName(ctxt, &ptr);
7659 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7660 "xmlParseStringEntityRef: no name\n");
7665 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7674 * Predefined entities override any extra definition
7676 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7677 ent = xmlGetPredefinedEntity(name);
7686 * Increate the number of entity references parsed
7691 * Ask first SAX for entity resolution, otherwise try the
7692 * entities which may have stored in the parser context.
7694 if (ctxt->sax != NULL) {
7695 if (ctxt->sax->getEntity != NULL)
7696 ent = ctxt->sax->getEntity(ctxt->userData, name);
7697 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7698 ent = xmlGetPredefinedEntity(name);
7699 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7700 ent = xmlSAX2GetEntity(ctxt, name);
7703 if (ctxt->instate == XML_PARSER_EOF) {
7709 * [ WFC: Entity Declared ]
7710 * In a document without any DTD, a document with only an
7711 * internal DTD subset which contains no parameter entity
7712 * references, or a document with "standalone='yes'", the
7713 * Name given in the entity reference must match that in an
7714 * entity declaration, except that well-formed documents
7715 * need not declare any of the following entities: amp, lt,
7717 * The declaration of a parameter entity must precede any
7719 * Similarly, the declaration of a general entity must
7720 * precede any reference to it which appears in a default
7721 * value in an attribute-list declaration. Note that if
7722 * entities are declared in the external subset or in
7723 * external parameter entities, a non-validating processor
7724 * is not obligated to read and process their declarations;
7725 * for such documents, the rule that an entity must be
7726 * declared is a well-formedness constraint only if
7730 if ((ctxt->standalone == 1) ||
7731 ((ctxt->hasExternalSubset == 0) &&
7732 (ctxt->hasPErefs == 0))) {
7733 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7734 "Entity '%s' not defined\n", name);
7736 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7737 "Entity '%s' not defined\n",
7740 xmlParserEntityCheck(ctxt, 0, ent, 0);
7741 /* TODO ? check regressions ctxt->valid = 0; */
7745 * [ WFC: Parsed Entity ]
7746 * An entity reference must not contain the name of an
7749 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7750 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7751 "Entity reference to unparsed entity %s\n", name);
7755 * [ WFC: No External Entity References ]
7756 * Attribute values cannot contain direct or indirect
7757 * entity references to external entities.
7759 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7760 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7761 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7762 "Attribute references external entity '%s'\n", name);
7765 * [ WFC: No < in Attribute Values ]
7766 * The replacement text of any entity referred to directly or
7767 * indirectly in an attribute value (other than "<") must
7770 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7771 (ent != NULL) && (ent->content != NULL) &&
7772 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7773 (xmlStrchr(ent->content, '<'))) {
7774 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7775 "'<' in entity '%s' is not allowed in attributes values\n",
7780 * Internal check, no parameter entities here ...
7783 switch (ent->etype) {
7784 case XML_INTERNAL_PARAMETER_ENTITY:
7785 case XML_EXTERNAL_PARAMETER_ENTITY:
7786 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7787 "Attempt to reference the parameter entity '%s'\n",
7796 * [ WFC: No Recursion ]
7797 * A parsed entity must not contain a recursive reference
7798 * to itself, either directly or indirectly.
7799 * Done somewhere else
7808 * xmlParsePEReference:
7809 * @ctxt: an XML parser context
7811 * parse PEReference declarations
7812 * The entity content is handled directly by pushing it's content as
7813 * a new input stream.
7815 * [69] PEReference ::= '%' Name ';'
7817 * [ WFC: No Recursion ]
7818 * A parsed entity must not contain a recursive
7819 * reference to itself, either directly or indirectly.
7821 * [ WFC: Entity Declared ]
7822 * In a document without any DTD, a document with only an internal DTD
7823 * subset which contains no parameter entity references, or a document
7824 * with "standalone='yes'", ... ... The declaration of a parameter
7825 * entity must precede any reference to it...
7827 * [ VC: Entity Declared ]
7828 * In a document with an external subset or external parameter entities
7829 * with "standalone='no'", ... ... The declaration of a parameter entity
7830 * must precede any reference to it...
7833 * Parameter-entity references may only appear in the DTD.
7834 * NOTE: misleading but this is handled.
7837 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7839 const xmlChar *name;
7840 xmlEntityPtr entity = NULL;
7841 xmlParserInputPtr input;
7846 name = xmlParseName(ctxt);
7848 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7851 if (xmlParserDebugEntities)
7852 xmlGenericError(xmlGenericErrorContext,
7853 "PEReference: %s\n", name);
7855 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7862 * Increate the number of entity references parsed
7867 * Request the entity from SAX
7869 if ((ctxt->sax != NULL) &&
7870 (ctxt->sax->getParameterEntity != NULL))
7871 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7872 if (ctxt->instate == XML_PARSER_EOF)
7874 if (entity == NULL) {
7876 * [ WFC: Entity Declared ]
7877 * In a document without any DTD, a document with only an
7878 * internal DTD subset which contains no parameter entity
7879 * references, or a document with "standalone='yes'", ...
7880 * ... The declaration of a parameter entity must precede
7881 * any reference to it...
7883 if ((ctxt->standalone == 1) ||
7884 ((ctxt->hasExternalSubset == 0) &&
7885 (ctxt->hasPErefs == 0))) {
7886 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7887 "PEReference: %%%s; not found\n",
7891 * [ VC: Entity Declared ]
7892 * In a document with an external subset or external
7893 * parameter entities with "standalone='no'", ...
7894 * ... The declaration of a parameter entity must
7895 * precede any reference to it...
7897 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7898 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7899 "PEReference: %%%s; not found\n",
7902 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7903 "PEReference: %%%s; not found\n",
7907 xmlParserEntityCheck(ctxt, 0, NULL, 0);
7910 * Internal checking in case the entity quest barfed
7912 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7913 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7914 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7915 "Internal: %%%s; is not a parameter entity\n",
7919 xmlCharEncoding enc;
7921 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7922 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7923 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7924 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7925 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7926 (ctxt->replaceEntities == 0) &&
7927 (ctxt->validate == 0))
7930 input = xmlNewEntityInputStream(ctxt, entity);
7931 if (xmlPushInput(ctxt, input) < 0) {
7932 xmlFreeInputStream(input);
7936 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7938 * Get the 4 first bytes and decode the charset
7939 * if enc != XML_CHAR_ENCODING_NONE
7940 * plug some encoding conversion routines.
7941 * Note that, since we may have some non-UTF8
7942 * encoding (like UTF16, bug 135229), the 'length'
7943 * is not known, but we can calculate based upon
7944 * the amount of data in the buffer.
7947 if (ctxt->instate == XML_PARSER_EOF)
7949 if ((ctxt->input->end - ctxt->input->cur)>=4) {
7954 enc = xmlDetectCharEncoding(start, 4);
7955 if (enc != XML_CHAR_ENCODING_NONE) {
7956 xmlSwitchEncoding(ctxt, enc);
7960 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7961 (IS_BLANK_CH(NXT(5)))) {
7962 xmlParseTextDecl(ctxt);
7967 ctxt->hasPErefs = 1;
7971 * xmlLoadEntityContent:
7972 * @ctxt: an XML parser context
7973 * @entity: an unloaded system entity
7975 * Load the original content of the given system entity from the
7976 * ExternalID/SystemID given. This is to be used for Included in Literal
7977 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7979 * Returns 0 in case of success and -1 in case of failure
7982 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7983 xmlParserInputPtr input;
7988 if ((ctxt == NULL) || (entity == NULL) ||
7989 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7990 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7991 (entity->content != NULL)) {
7992 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7993 "xmlLoadEntityContent parameter error");
7997 if (xmlParserDebugEntities)
7998 xmlGenericError(xmlGenericErrorContext,
7999 "Reading %s entity content input\n", entity->name);
8001 buf = xmlBufferCreate();
8003 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8004 "xmlLoadEntityContent parameter error");
8008 input = xmlNewEntityInputStream(ctxt, entity);
8009 if (input == NULL) {
8010 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8011 "xmlLoadEntityContent input error");
8017 * Push the entity as the current input, read char by char
8018 * saving to the buffer until the end of the entity or an error
8020 if (xmlPushInput(ctxt, input) < 0) {
8027 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8029 xmlBufferAdd(buf, ctxt->input->cur, l);
8030 if (count++ > XML_PARSER_CHUNK_SIZE) {
8033 if (ctxt->instate == XML_PARSER_EOF) {
8043 if (ctxt->instate == XML_PARSER_EOF) {
8051 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8053 } else if (!IS_CHAR(c)) {
8054 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8055 "xmlLoadEntityContent: invalid char value %d\n",
8060 entity->content = buf->content;
8061 buf->content = NULL;
8068 * xmlParseStringPEReference:
8069 * @ctxt: an XML parser context
8070 * @str: a pointer to an index in the string
8072 * parse PEReference declarations
8074 * [69] PEReference ::= '%' Name ';'
8076 * [ WFC: No Recursion ]
8077 * A parsed entity must not contain a recursive
8078 * reference to itself, either directly or indirectly.
8080 * [ WFC: Entity Declared ]
8081 * In a document without any DTD, a document with only an internal DTD
8082 * subset which contains no parameter entity references, or a document
8083 * with "standalone='yes'", ... ... The declaration of a parameter
8084 * entity must precede any reference to it...
8086 * [ VC: Entity Declared ]
8087 * In a document with an external subset or external parameter entities
8088 * with "standalone='no'", ... ... The declaration of a parameter entity
8089 * must precede any reference to it...
8092 * Parameter-entity references may only appear in the DTD.
8093 * NOTE: misleading but this is handled.
8095 * Returns the string of the entity content.
8096 * str is updated to the current value of the index
8099 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8103 xmlEntityPtr entity = NULL;
8105 if ((str == NULL) || (*str == NULL)) return(NULL);
8111 name = xmlParseStringName(ctxt, &ptr);
8113 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8114 "xmlParseStringPEReference: no name\n");
8120 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8128 * Increate the number of entity references parsed
8133 * Request the entity from SAX
8135 if ((ctxt->sax != NULL) &&
8136 (ctxt->sax->getParameterEntity != NULL))
8137 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8138 if (ctxt->instate == XML_PARSER_EOF) {
8143 if (entity == NULL) {
8145 * [ WFC: Entity Declared ]
8146 * In a document without any DTD, a document with only an
8147 * internal DTD subset which contains no parameter entity
8148 * references, or a document with "standalone='yes'", ...
8149 * ... The declaration of a parameter entity must precede
8150 * any reference to it...
8152 if ((ctxt->standalone == 1) ||
8153 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8154 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8155 "PEReference: %%%s; not found\n", name);
8158 * [ VC: Entity Declared ]
8159 * In a document with an external subset or external
8160 * parameter entities with "standalone='no'", ...
8161 * ... The declaration of a parameter entity must
8162 * precede any reference to it...
8164 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8165 "PEReference: %%%s; not found\n",
8169 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8172 * Internal checking in case the entity quest barfed
8174 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8175 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8176 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8177 "%%%s; is not a parameter entity\n",
8181 ctxt->hasPErefs = 1;
8188 * xmlParseDocTypeDecl:
8189 * @ctxt: an XML parser context
8191 * parse a DOCTYPE declaration
8193 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8194 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8196 * [ VC: Root Element Type ]
8197 * The Name in the document type declaration must match the element
8198 * type of the root element.
8202 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8203 const xmlChar *name = NULL;
8204 xmlChar *ExternalID = NULL;
8205 xmlChar *URI = NULL;
8208 * We know that '<!DOCTYPE' has been detected.
8215 * Parse the DOCTYPE name.
8217 name = xmlParseName(ctxt);
8219 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8220 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8222 ctxt->intSubName = name;
8227 * Check for SystemID and ExternalID
8229 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8231 if ((URI != NULL) || (ExternalID != NULL)) {
8232 ctxt->hasExternalSubset = 1;
8234 ctxt->extSubURI = URI;
8235 ctxt->extSubSystem = ExternalID;
8240 * Create and update the internal subset.
8242 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8243 (!ctxt->disableSAX))
8244 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8245 if (ctxt->instate == XML_PARSER_EOF)
8249 * Is there any internal subset declarations ?
8250 * they are handled separately in xmlParseInternalSubset()
8256 * We should be at the end of the DOCTYPE declaration.
8259 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8265 * xmlParseInternalSubset:
8266 * @ctxt: an XML parser context
8268 * parse the internal subset declaration
8270 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8274 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8276 * Is there any DTD definition ?
8279 ctxt->instate = XML_PARSER_DTD;
8282 * Parse the succession of Markup declarations and
8284 * Subsequence (markupdecl | PEReference | S)*
8286 while (((RAW != ']') || (ctxt->inputNr > 1)) &&
8287 (ctxt->instate != XML_PARSER_EOF)) {
8288 const xmlChar *check = CUR_PTR;
8289 unsigned int cons = ctxt->input->consumed;
8292 xmlParseMarkupDecl(ctxt);
8293 xmlParsePEReference(ctxt);
8295 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8296 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8297 "xmlParseInternalSubset: error detected in Markup declaration\n");
8298 if (ctxt->inputNr > 1)
8311 * We should be at the end of the DOCTYPE declaration.
8314 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8320 #ifdef LIBXML_SAX1_ENABLED
8322 * xmlParseAttribute:
8323 * @ctxt: an XML parser context
8324 * @value: a xmlChar ** used to store the value of the attribute
8326 * parse an attribute
8328 * [41] Attribute ::= Name Eq AttValue
8330 * [ WFC: No External Entity References ]
8331 * Attribute values cannot contain direct or indirect entity references
8332 * to external entities.
8334 * [ WFC: No < in Attribute Values ]
8335 * The replacement text of any entity referred to directly or indirectly in
8336 * an attribute value (other than "<") must not contain a <.
8338 * [ VC: Attribute Value Type ]
8339 * The attribute must have been declared; the value must be of the type
8342 * [25] Eq ::= S? '=' S?
8346 * [NS 11] Attribute ::= QName Eq AttValue
8348 * Also the case QName == xmlns:??? is handled independently as a namespace
8351 * Returns the attribute name, and the value in *value.
8355 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8356 const xmlChar *name;
8361 name = xmlParseName(ctxt);
8363 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8364 "error parsing attribute name\n");
8375 val = xmlParseAttValue(ctxt);
8376 ctxt->instate = XML_PARSER_CONTENT;
8378 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8379 "Specification mandates value for attribute %s\n", name);
8384 * Check that xml:lang conforms to the specification
8385 * No more registered as an error, just generate a warning now
8386 * since this was deprecated in XML second edition
8388 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8389 if (!xmlCheckLanguageID(val)) {
8390 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8391 "Malformed value for xml:lang : %s\n",
8397 * Check that xml:space conforms to the specification
8399 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8400 if (xmlStrEqual(val, BAD_CAST "default"))
8402 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8405 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8406 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8417 * @ctxt: an XML parser context
8419 * parse a start of tag either for rule element or
8420 * EmptyElement. In both case we don't parse the tag closing chars.
8422 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8424 * [ WFC: Unique Att Spec ]
8425 * No attribute name may appear more than once in the same start-tag or
8426 * empty-element tag.
8428 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8430 * [ WFC: Unique Att Spec ]
8431 * No attribute name may appear more than once in the same start-tag or
8432 * empty-element tag.
8436 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8438 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8440 * Returns the element name parsed
8444 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8445 const xmlChar *name;
8446 const xmlChar *attname;
8448 const xmlChar **atts = ctxt->atts;
8450 int maxatts = ctxt->maxatts;
8453 if (RAW != '<') return(NULL);
8456 name = xmlParseName(ctxt);
8458 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8459 "xmlParseStartTag: invalid element name\n");
8464 * Now parse the attributes, it ends up with the ending
8471 while (((RAW != '>') &&
8472 ((RAW != '/') || (NXT(1) != '>')) &&
8473 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8474 const xmlChar *q = CUR_PTR;
8475 unsigned int cons = ctxt->input->consumed;
8477 attname = xmlParseAttribute(ctxt, &attvalue);
8478 if ((attname != NULL) && (attvalue != NULL)) {
8480 * [ WFC: Unique Att Spec ]
8481 * No attribute name may appear more than once in the same
8482 * start-tag or empty-element tag.
8484 for (i = 0; i < nbatts;i += 2) {
8485 if (xmlStrEqual(atts[i], attname)) {
8486 xmlErrAttributeDup(ctxt, NULL, attname);
8492 * Add the pair to atts
8495 maxatts = 22; /* allow for 10 attrs by default */
8496 atts = (const xmlChar **)
8497 xmlMalloc(maxatts * sizeof(xmlChar *));
8499 xmlErrMemory(ctxt, NULL);
8500 if (attvalue != NULL)
8505 ctxt->maxatts = maxatts;
8506 } else if (nbatts + 4 > maxatts) {
8510 n = (const xmlChar **) xmlRealloc((void *) atts,
8511 maxatts * sizeof(const xmlChar *));
8513 xmlErrMemory(ctxt, NULL);
8514 if (attvalue != NULL)
8520 ctxt->maxatts = maxatts;
8522 atts[nbatts++] = attname;
8523 atts[nbatts++] = attvalue;
8524 atts[nbatts] = NULL;
8525 atts[nbatts + 1] = NULL;
8527 if (attvalue != NULL)
8534 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8536 if (SKIP_BLANKS == 0) {
8537 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8538 "attributes construct error\n");
8540 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8541 (attname == NULL) && (attvalue == NULL)) {
8542 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8543 "xmlParseStartTag: problem parsing attributes\n");
8551 * SAX: Start of Element !
8553 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8554 (!ctxt->disableSAX)) {
8556 ctxt->sax->startElement(ctxt->userData, name, atts);
8558 ctxt->sax->startElement(ctxt->userData, name, NULL);
8562 /* Free only the content strings */
8563 for (i = 1;i < nbatts;i+=2)
8564 if (atts[i] != NULL)
8565 xmlFree((xmlChar *) atts[i]);
8572 * @ctxt: an XML parser context
8573 * @line: line of the start tag
8574 * @nsNr: number of namespaces on the start tag
8576 * parse an end of tag
8578 * [42] ETag ::= '</' Name S? '>'
8582 * [NS 9] ETag ::= '</' QName S? '>'
8586 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8587 const xmlChar *name;
8590 if ((RAW != '<') || (NXT(1) != '/')) {
8591 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8592 "xmlParseEndTag: '</' not found\n");
8597 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8600 * We should definitely be at the ending "S? '>'" part
8604 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8605 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8610 * [ WFC: Element Type Match ]
8611 * The Name in an element's end-tag must match the element type in the
8615 if (name != (xmlChar*)1) {
8616 if (name == NULL) name = BAD_CAST "unparseable";
8617 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8618 "Opening and ending tag mismatch: %s line %d and %s\n",
8619 ctxt->name, line, name);
8625 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8626 (!ctxt->disableSAX))
8627 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8636 * @ctxt: an XML parser context
8638 * parse an end of tag
8640 * [42] ETag ::= '</' Name S? '>'
8644 * [NS 9] ETag ::= '</' QName S? '>'
8648 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8649 xmlParseEndTag1(ctxt, 0);
8651 #endif /* LIBXML_SAX1_ENABLED */
8653 /************************************************************************
8655 * SAX 2 specific operations *
8657 ************************************************************************/
8661 * @ctxt: an XML parser context
8662 * @prefix: the prefix to lookup
8664 * Lookup the namespace name for the @prefix (which ca be NULL)
8665 * The prefix must come from the @ctxt->dict dictionary
8667 * Returns the namespace name or NULL if not bound
8669 static const xmlChar *
8670 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8673 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8674 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8675 if (ctxt->nsTab[i] == prefix) {
8676 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8678 return(ctxt->nsTab[i + 1]);
8685 * @ctxt: an XML parser context
8686 * @prefix: pointer to store the prefix part
8688 * parse an XML Namespace QName
8690 * [6] QName ::= (Prefix ':')? LocalPart
8691 * [7] Prefix ::= NCName
8692 * [8] LocalPart ::= NCName
8694 * Returns the Name parsed or NULL
8697 static const xmlChar *
8698 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8699 const xmlChar *l, *p;
8703 l = xmlParseNCName(ctxt);
8706 l = xmlParseName(ctxt);
8708 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8709 "Failed to parse QName '%s'\n", l, NULL, NULL);
8719 l = xmlParseNCName(ctxt);
8723 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8724 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8725 l = xmlParseNmtoken(ctxt);
8727 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8729 tmp = xmlBuildQName(l, p, NULL, 0);
8732 p = xmlDictLookup(ctxt->dict, tmp, -1);
8733 if (tmp != NULL) xmlFree(tmp);
8740 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8741 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8743 tmp = (xmlChar *) xmlParseName(ctxt);
8745 tmp = xmlBuildQName(tmp, l, NULL, 0);
8746 l = xmlDictLookup(ctxt->dict, tmp, -1);
8747 if (tmp != NULL) xmlFree(tmp);
8751 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8752 l = xmlDictLookup(ctxt->dict, tmp, -1);
8753 if (tmp != NULL) xmlFree(tmp);
8764 * xmlParseQNameAndCompare:
8765 * @ctxt: an XML parser context
8766 * @name: the localname
8767 * @prefix: the prefix, if any.
8769 * parse an XML name and compares for match
8770 * (specialized for endtag parsing)
8772 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8773 * and the name for mismatch
8776 static const xmlChar *
8777 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8778 xmlChar const *prefix) {
8782 const xmlChar *prefix2;
8784 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8787 in = ctxt->input->cur;
8790 while (*in != 0 && *in == *cmp) {
8794 if ((*cmp == 0) && (*in == ':')) {
8797 while (*in != 0 && *in == *cmp) {
8801 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8803 ctxt->input->cur = in;
8804 return((const xmlChar*) 1);
8808 * all strings coms from the dictionary, equality can be done directly
8810 ret = xmlParseQName (ctxt, &prefix2);
8811 if ((ret == name) && (prefix == prefix2))
8812 return((const xmlChar*) 1);
8817 * xmlParseAttValueInternal:
8818 * @ctxt: an XML parser context
8819 * @len: attribute len result
8820 * @alloc: whether the attribute was reallocated as a new string
8821 * @normalize: if 1 then further non-CDATA normalization must be done
8823 * parse a value for an attribute.
8824 * NOTE: if no normalization is needed, the routine will return pointers
8825 * directly from the data buffer.
8827 * 3.3.3 Attribute-Value Normalization:
8828 * Before the value of an attribute is passed to the application or
8829 * checked for validity, the XML processor must normalize it as follows:
8830 * - a character reference is processed by appending the referenced
8831 * character to the attribute value
8832 * - an entity reference is processed by recursively processing the
8833 * replacement text of the entity
8834 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8835 * appending #x20 to the normalized value, except that only a single
8836 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8837 * parsed entity or the literal entity value of an internal parsed entity
8838 * - other characters are processed by appending them to the normalized value
8839 * If the declared value is not CDATA, then the XML processor must further
8840 * process the normalized attribute value by discarding any leading and
8841 * trailing space (#x20) characters, and by replacing sequences of space
8842 * (#x20) characters by a single space (#x20) character.
8843 * All attributes for which no declaration has been read should be treated
8844 * by a non-validating parser as if declared CDATA.
8846 * Returns the AttValue parsed or NULL. The value has to be freed by the
8847 * caller if it was copied, this can be detected by val[*len] == 0.
8851 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8855 const xmlChar *in = NULL, *start, *end, *last;
8856 xmlChar *ret = NULL;
8860 in = (xmlChar *) CUR_PTR;
8861 line = ctxt->input->line;
8862 col = ctxt->input->col;
8863 if (*in != '"' && *in != '\'') {
8864 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8867 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8870 * try to handle in this routine the most common case where no
8871 * allocation of a new string is required and where content is
8876 end = ctxt->input->end;
8879 const xmlChar *oldbase = ctxt->input->base;
8881 if (oldbase != ctxt->input->base) {
8882 long delta = ctxt->input->base - oldbase;
8883 start = start + delta;
8886 end = ctxt->input->end;
8890 * Skip any leading spaces
8892 while ((in < end) && (*in != limit) &&
8893 ((*in == 0x20) || (*in == 0x9) ||
8894 (*in == 0xA) || (*in == 0xD))) {
8903 const xmlChar *oldbase = ctxt->input->base;
8905 if (ctxt->instate == XML_PARSER_EOF)
8907 if (oldbase != ctxt->input->base) {
8908 long delta = ctxt->input->base - oldbase;
8909 start = start + delta;
8912 end = ctxt->input->end;
8913 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8914 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8915 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8916 "AttValue length too long\n");
8921 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8922 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8924 if ((*in++ == 0x20) && (*in == 0x20)) break;
8926 const xmlChar *oldbase = ctxt->input->base;
8928 if (ctxt->instate == XML_PARSER_EOF)
8930 if (oldbase != ctxt->input->base) {
8931 long delta = ctxt->input->base - oldbase;
8932 start = start + delta;
8935 end = ctxt->input->end;
8936 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8937 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8938 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8939 "AttValue length too long\n");
8946 * skip the trailing blanks
8948 while ((last[-1] == 0x20) && (last > start)) last--;
8949 while ((in < end) && (*in != limit) &&
8950 ((*in == 0x20) || (*in == 0x9) ||
8951 (*in == 0xA) || (*in == 0xD))) {
8959 const xmlChar *oldbase = ctxt->input->base;
8961 if (ctxt->instate == XML_PARSER_EOF)
8963 if (oldbase != ctxt->input->base) {
8964 long delta = ctxt->input->base - oldbase;
8965 start = start + delta;
8967 last = last + delta;
8969 end = ctxt->input->end;
8970 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8971 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8972 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8973 "AttValue length too long\n");
8978 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8979 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8980 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8981 "AttValue length too long\n");
8984 if (*in != limit) goto need_complex;
8986 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8987 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8991 const xmlChar *oldbase = ctxt->input->base;
8993 if (ctxt->instate == XML_PARSER_EOF)
8995 if (oldbase != ctxt->input->base) {
8996 long delta = ctxt->input->base - oldbase;
8997 start = start + delta;
9000 end = ctxt->input->end;
9001 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9002 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9003 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9004 "AttValue length too long\n");
9010 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9011 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9012 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9013 "AttValue length too long\n");
9016 if (*in != limit) goto need_complex;
9021 *len = last - start;
9022 ret = (xmlChar *) start;
9024 if (alloc) *alloc = 1;
9025 ret = xmlStrndup(start, last - start);
9028 ctxt->input->line = line;
9029 ctxt->input->col = col;
9030 if (alloc) *alloc = 0;
9033 if (alloc) *alloc = 1;
9034 return xmlParseAttValueComplex(ctxt, len, normalize);
9038 * xmlParseAttribute2:
9039 * @ctxt: an XML parser context
9040 * @pref: the element prefix
9041 * @elem: the element name
9042 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9043 * @value: a xmlChar ** used to store the value of the attribute
9044 * @len: an int * to save the length of the attribute
9045 * @alloc: an int * to indicate if the attribute was allocated
9047 * parse an attribute in the new SAX2 framework.
9049 * Returns the attribute name, and the value in *value, .
9052 static const xmlChar *
9053 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9054 const xmlChar * pref, const xmlChar * elem,
9055 const xmlChar ** prefix, xmlChar ** value,
9056 int *len, int *alloc)
9058 const xmlChar *name;
9059 xmlChar *val, *internal_val = NULL;
9064 name = xmlParseQName(ctxt, prefix);
9066 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9067 "error parsing attribute name\n");
9072 * get the type if needed
9074 if (ctxt->attsSpecial != NULL) {
9077 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
9078 pref, elem, *prefix, name);
9090 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9093 * Sometimes a second normalisation pass for spaces is needed
9094 * but that only happens if charrefs or entities refernces
9095 * have been used in the attribute value, i.e. the attribute
9096 * value have been extracted in an allocated string already.
9099 const xmlChar *val2;
9101 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9102 if ((val2 != NULL) && (val2 != val)) {
9104 val = (xmlChar *) val2;
9108 ctxt->instate = XML_PARSER_CONTENT;
9110 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9111 "Specification mandates value for attribute %s\n",
9116 if (*prefix == ctxt->str_xml) {
9118 * Check that xml:lang conforms to the specification
9119 * No more registered as an error, just generate a warning now
9120 * since this was deprecated in XML second edition
9122 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9123 internal_val = xmlStrndup(val, *len);
9124 if (!xmlCheckLanguageID(internal_val)) {
9125 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9126 "Malformed value for xml:lang : %s\n",
9127 internal_val, NULL);
9132 * Check that xml:space conforms to the specification
9134 if (xmlStrEqual(name, BAD_CAST "space")) {
9135 internal_val = xmlStrndup(val, *len);
9136 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9138 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9141 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9142 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9143 internal_val, NULL);
9147 xmlFree(internal_val);
9155 * xmlParseStartTag2:
9156 * @ctxt: an XML parser context
9158 * parse a start of tag either for rule element or
9159 * EmptyElement. In both case we don't parse the tag closing chars.
9160 * This routine is called when running SAX2 parsing
9162 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9164 * [ WFC: Unique Att Spec ]
9165 * No attribute name may appear more than once in the same start-tag or
9166 * empty-element tag.
9168 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9170 * [ WFC: Unique Att Spec ]
9171 * No attribute name may appear more than once in the same start-tag or
9172 * empty-element tag.
9176 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9178 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9180 * Returns the element name parsed
9183 static const xmlChar *
9184 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9185 const xmlChar **URI, int *tlen) {
9186 const xmlChar *localname;
9187 const xmlChar *prefix;
9188 const xmlChar *attname;
9189 const xmlChar *aprefix;
9190 const xmlChar *nsname;
9192 const xmlChar **atts = ctxt->atts;
9193 int maxatts = ctxt->maxatts;
9194 int nratts, nbatts, nbdef, inputid;
9195 int i, j, nbNs, attval;
9197 int nsNr = ctxt->nsNr;
9199 if (RAW != '<') return(NULL);
9203 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9204 * point since the attribute values may be stored as pointers to
9205 * the buffer and calling SHRINK would destroy them !
9206 * The Shrinking is only possible once the full set of attribute
9207 * callbacks have been done.
9210 cur = ctxt->input->cur - ctxt->input->base;
9211 inputid = ctxt->input->id;
9217 /* Forget any namespaces added during an earlier parse of this element. */
9220 localname = xmlParseQName(ctxt, &prefix);
9221 if (localname == NULL) {
9222 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9223 "StartTag: invalid element name\n");
9226 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9229 * Now parse the attributes, it ends up with the ending
9236 while (((RAW != '>') &&
9237 ((RAW != '/') || (NXT(1) != '>')) &&
9238 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9239 const xmlChar *q = CUR_PTR;
9240 unsigned int cons = ctxt->input->consumed;
9241 int len = -1, alloc = 0;
9243 attname = xmlParseAttribute2(ctxt, prefix, localname,
9244 &aprefix, &attvalue, &len, &alloc);
9245 if ((attname == NULL) || (attvalue == NULL))
9247 if (len < 0) len = xmlStrlen(attvalue);
9249 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9250 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9254 xmlErrMemory(ctxt, "dictionary allocation failure");
9255 if ((attvalue != NULL) && (alloc != 0))
9260 uri = xmlParseURI((const char *) URL);
9262 xmlNsErr(ctxt, XML_WAR_NS_URI,
9263 "xmlns: '%s' is not a valid URI\n",
9266 if (uri->scheme == NULL) {
9267 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9268 "xmlns: URI %s is not absolute\n",
9273 if (URL == ctxt->str_xml_ns) {
9274 if (attname != ctxt->str_xml) {
9275 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9276 "xml namespace URI cannot be the default namespace\n",
9283 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9284 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9285 "reuse of the xmlns namespace name is forbidden\n",
9291 * check that it's not a defined namespace
9293 for (j = 1;j <= nbNs;j++)
9294 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9297 xmlErrAttributeDup(ctxt, NULL, attname);
9299 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9301 } else if (aprefix == ctxt->str_xmlns) {
9302 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9305 if (attname == ctxt->str_xml) {
9306 if (URL != ctxt->str_xml_ns) {
9307 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9308 "xml namespace prefix mapped to wrong URI\n",
9312 * Do not keep a namespace definition node
9316 if (URL == ctxt->str_xml_ns) {
9317 if (attname != ctxt->str_xml) {
9318 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9319 "xml namespace URI mapped to wrong prefix\n",
9324 if (attname == ctxt->str_xmlns) {
9325 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9326 "redefinition of the xmlns prefix is forbidden\n",
9332 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9333 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9334 "reuse of the xmlns namespace name is forbidden\n",
9338 if ((URL == NULL) || (URL[0] == 0)) {
9339 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9340 "xmlns:%s: Empty XML namespace is not allowed\n",
9341 attname, NULL, NULL);
9344 uri = xmlParseURI((const char *) URL);
9346 xmlNsErr(ctxt, XML_WAR_NS_URI,
9347 "xmlns:%s: '%s' is not a valid URI\n",
9348 attname, URL, NULL);
9350 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9351 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9352 "xmlns:%s: URI %s is not absolute\n",
9353 attname, URL, NULL);
9360 * check that it's not a defined namespace
9362 for (j = 1;j <= nbNs;j++)
9363 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9366 xmlErrAttributeDup(ctxt, aprefix, attname);
9368 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9372 * Add the pair to atts
9374 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9375 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9378 maxatts = ctxt->maxatts;
9381 ctxt->attallocs[nratts++] = alloc;
9382 atts[nbatts++] = attname;
9383 atts[nbatts++] = aprefix;
9385 * The namespace URI field is used temporarily to point at the
9386 * base of the current input buffer for non-alloced attributes.
9387 * When the input buffer is reallocated, all the pointers become
9388 * invalid, but they can be reconstructed later.
9391 atts[nbatts++] = NULL;
9393 atts[nbatts++] = ctxt->input->base;
9394 atts[nbatts++] = attvalue;
9396 atts[nbatts++] = attvalue;
9398 * tag if some deallocation is needed
9400 if (alloc != 0) attval = 1;
9401 attvalue = NULL; /* moved into atts */
9405 if ((attvalue != NULL) && (alloc != 0)) {
9411 if (ctxt->instate == XML_PARSER_EOF)
9413 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9415 if (SKIP_BLANKS == 0) {
9416 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9417 "attributes construct error\n");
9420 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9421 (attname == NULL) && (attvalue == NULL)) {
9422 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9423 "xmlParseStartTag: problem parsing attributes\n");
9429 if (ctxt->input->id != inputid) {
9430 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9431 "Unexpected change of input\n");
9436 /* Reconstruct attribute value pointers. */
9437 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9438 if (atts[i+2] != NULL) {
9440 * Arithmetic on dangling pointers is technically undefined
9441 * behavior, but well...
9443 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9444 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9445 atts[i+3] += offset; /* value */
9446 atts[i+4] += offset; /* valuend */
9451 * The attributes defaulting
9453 if (ctxt->attsDefault != NULL) {
9454 xmlDefAttrsPtr defaults;
9456 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9457 if (defaults != NULL) {
9458 for (i = 0;i < defaults->nbAttrs;i++) {
9459 attname = defaults->values[5 * i];
9460 aprefix = defaults->values[5 * i + 1];
9463 * special work for namespaces defaulted defs
9465 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9467 * check that it's not a defined namespace
9469 for (j = 1;j <= nbNs;j++)
9470 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9472 if (j <= nbNs) continue;
9474 nsname = xmlGetNamespace(ctxt, NULL);
9475 if (nsname != defaults->values[5 * i + 2]) {
9476 if (nsPush(ctxt, NULL,
9477 defaults->values[5 * i + 2]) > 0)
9480 } else if (aprefix == ctxt->str_xmlns) {
9482 * check that it's not a defined namespace
9484 for (j = 1;j <= nbNs;j++)
9485 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9487 if (j <= nbNs) continue;
9489 nsname = xmlGetNamespace(ctxt, attname);
9490 if (nsname != defaults->values[2]) {
9491 if (nsPush(ctxt, attname,
9492 defaults->values[5 * i + 2]) > 0)
9497 * check that it's not a defined attribute
9499 for (j = 0;j < nbatts;j+=5) {
9500 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9503 if (j < nbatts) continue;
9505 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9506 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9509 maxatts = ctxt->maxatts;
9512 atts[nbatts++] = attname;
9513 atts[nbatts++] = aprefix;
9514 if (aprefix == NULL)
9515 atts[nbatts++] = NULL;
9517 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9518 atts[nbatts++] = defaults->values[5 * i + 2];
9519 atts[nbatts++] = defaults->values[5 * i + 3];
9520 if ((ctxt->standalone == 1) &&
9521 (defaults->values[5 * i + 4] != NULL)) {
9522 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9523 "standalone: attribute %s on %s defaulted from external subset\n",
9524 attname, localname);
9533 * The attributes checkings
9535 for (i = 0; i < nbatts;i += 5) {
9537 * The default namespace does not apply to attribute names.
9539 if (atts[i + 1] != NULL) {
9540 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9541 if (nsname == NULL) {
9542 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9543 "Namespace prefix %s for %s on %s is not defined\n",
9544 atts[i + 1], atts[i], localname);
9546 atts[i + 2] = nsname;
9550 * [ WFC: Unique Att Spec ]
9551 * No attribute name may appear more than once in the same
9552 * start-tag or empty-element tag.
9553 * As extended by the Namespace in XML REC.
9555 for (j = 0; j < i;j += 5) {
9556 if (atts[i] == atts[j]) {
9557 if (atts[i+1] == atts[j+1]) {
9558 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9561 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9562 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9563 "Namespaced Attribute %s in '%s' redefined\n",
9564 atts[i], nsname, NULL);
9571 nsname = xmlGetNamespace(ctxt, prefix);
9572 if ((prefix != NULL) && (nsname == NULL)) {
9573 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9574 "Namespace prefix %s on %s is not defined\n",
9575 prefix, localname, NULL);
9581 * SAX: Start of Element !
9583 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9584 (!ctxt->disableSAX)) {
9586 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9587 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9588 nbatts / 5, nbdef, atts);
9590 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9591 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9596 * Free up attribute allocated strings if needed
9599 for (i = 3,j = 0; j < nratts;i += 5,j++)
9600 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9601 xmlFree((xmlChar *) atts[i]);
9609 * @ctxt: an XML parser context
9610 * @line: line of the start tag
9611 * @nsNr: number of namespaces on the start tag
9613 * parse an end of tag
9615 * [42] ETag ::= '</' Name S? '>'
9619 * [NS 9] ETag ::= '</' QName S? '>'
9623 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9624 const xmlChar *URI, int line, int nsNr, int tlen) {
9625 const xmlChar *name;
9629 if ((RAW != '<') || (NXT(1) != '/')) {
9630 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9635 curLength = ctxt->input->end - ctxt->input->cur;
9636 if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9637 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9638 if ((curLength >= (size_t)(tlen + 1)) &&
9639 (ctxt->input->cur[tlen] == '>')) {
9640 ctxt->input->cur += tlen + 1;
9641 ctxt->input->col += tlen + 1;
9644 ctxt->input->cur += tlen;
9645 ctxt->input->col += tlen;
9649 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9651 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9655 * We should definitely be at the ending "S? '>'" part
9658 if (ctxt->instate == XML_PARSER_EOF)
9661 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9662 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9667 * [ WFC: Element Type Match ]
9668 * The Name in an element's end-tag must match the element type in the
9672 if (name != (xmlChar*)1) {
9673 if (name == NULL) name = BAD_CAST "unparseable";
9674 if ((line == 0) && (ctxt->node != NULL))
9675 line = ctxt->node->line;
9676 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9677 "Opening and ending tag mismatch: %s line %d and %s\n",
9678 ctxt->name, line, name);
9685 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9686 (!ctxt->disableSAX))
9687 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9697 * @ctxt: an XML parser context
9699 * Parse escaped pure raw content.
9701 * [18] CDSect ::= CDStart CData CDEnd
9703 * [19] CDStart ::= '<![CDATA['
9705 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9707 * [21] CDEnd ::= ']]>'
9710 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9711 xmlChar *buf = NULL;
9713 int size = XML_PARSER_BUFFER_SIZE;
9719 /* Check 2.6.0 was NXT(0) not RAW */
9720 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9725 ctxt->instate = XML_PARSER_CDATA_SECTION;
9728 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9729 ctxt->instate = XML_PARSER_CONTENT;
9735 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9736 ctxt->instate = XML_PARSER_CONTENT;
9741 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9743 xmlErrMemory(ctxt, NULL);
9746 while (IS_CHAR(cur) &&
9747 ((r != ']') || (s != ']') || (cur != '>'))) {
9748 if (len + 5 >= size) {
9751 if ((size > XML_MAX_TEXT_LENGTH) &&
9752 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9753 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9754 "CData section too big found", NULL);
9758 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9761 xmlErrMemory(ctxt, NULL);
9767 COPY_BUF(rl,buf,len,r);
9775 if (ctxt->instate == XML_PARSER_EOF) {
9785 ctxt->instate = XML_PARSER_CONTENT;
9787 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9788 "CData section not finished\n%.50s\n", buf);
9795 * OK the buffer is to be consumed as cdata.
9797 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9798 if (ctxt->sax->cdataBlock != NULL)
9799 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9800 else if (ctxt->sax->characters != NULL)
9801 ctxt->sax->characters(ctxt->userData, buf, len);
9808 * @ctxt: an XML parser context
9812 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9816 xmlParseContent(xmlParserCtxtPtr ctxt) {
9818 while ((RAW != 0) &&
9819 ((RAW != '<') || (NXT(1) != '/')) &&
9820 (ctxt->instate != XML_PARSER_EOF)) {
9821 const xmlChar *test = CUR_PTR;
9822 unsigned int cons = ctxt->input->consumed;
9823 const xmlChar *cur = ctxt->input->cur;
9826 * First case : a Processing Instruction.
9828 if ((*cur == '<') && (cur[1] == '?')) {
9833 * Second case : a CDSection
9835 /* 2.6.0 test was *cur not RAW */
9836 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9837 xmlParseCDSect(ctxt);
9841 * Third case : a comment
9843 else if ((*cur == '<') && (NXT(1) == '!') &&
9844 (NXT(2) == '-') && (NXT(3) == '-')) {
9845 xmlParseComment(ctxt);
9846 ctxt->instate = XML_PARSER_CONTENT;
9850 * Fourth case : a sub-element.
9852 else if (*cur == '<') {
9853 xmlParseElement(ctxt);
9857 * Fifth case : a reference. If if has not been resolved,
9858 * parsing returns it's Name, create the node
9861 else if (*cur == '&') {
9862 xmlParseReference(ctxt);
9866 * Last case, text. Note that References are handled directly.
9869 xmlParseCharData(ctxt, 0);
9875 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9876 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9877 "detected an error in element content\n");
9878 xmlHaltParser(ctxt);
9886 * @ctxt: an XML parser context
9888 * parse an XML element, this is highly recursive
9890 * [39] element ::= EmptyElemTag | STag content ETag
9892 * [ WFC: Element Type Match ]
9893 * The Name in an element's end-tag must match the element type in the
9899 xmlParseElement(xmlParserCtxtPtr ctxt) {
9900 const xmlChar *name;
9901 const xmlChar *prefix = NULL;
9902 const xmlChar *URI = NULL;
9903 xmlParserNodeInfo node_info;
9906 int nsNr = ctxt->nsNr;
9908 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9909 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9910 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9911 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9913 xmlHaltParser(ctxt);
9917 /* Capture start position */
9918 if (ctxt->record_info) {
9919 node_info.begin_pos = ctxt->input->consumed +
9920 (CUR_PTR - ctxt->input->base);
9921 node_info.begin_line = ctxt->input->line;
9924 if (ctxt->spaceNr == 0)
9925 spacePush(ctxt, -1);
9926 else if (*ctxt->space == -2)
9927 spacePush(ctxt, -1);
9929 spacePush(ctxt, *ctxt->space);
9931 line = ctxt->input->line;
9932 #ifdef LIBXML_SAX1_ENABLED
9934 #endif /* LIBXML_SAX1_ENABLED */
9935 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9936 #ifdef LIBXML_SAX1_ENABLED
9938 name = xmlParseStartTag(ctxt);
9939 #endif /* LIBXML_SAX1_ENABLED */
9940 if (ctxt->instate == XML_PARSER_EOF)
9946 namePush(ctxt, name);
9949 #ifdef LIBXML_VALID_ENABLED
9951 * [ VC: Root Element Type ]
9952 * The Name in the document type declaration must match the element
9953 * type of the root element.
9955 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9956 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9957 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9958 #endif /* LIBXML_VALID_ENABLED */
9961 * Check for an Empty Element.
9963 if ((RAW == '/') && (NXT(1) == '>')) {
9966 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9967 (!ctxt->disableSAX))
9968 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9969 #ifdef LIBXML_SAX1_ENABLED
9971 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9972 (!ctxt->disableSAX))
9973 ctxt->sax->endElement(ctxt->userData, name);
9974 #endif /* LIBXML_SAX1_ENABLED */
9978 if (nsNr != ctxt->nsNr)
9979 nsPop(ctxt, ctxt->nsNr - nsNr);
9980 if ( ret != NULL && ctxt->record_info ) {
9981 node_info.end_pos = ctxt->input->consumed +
9982 (CUR_PTR - ctxt->input->base);
9983 node_info.end_line = ctxt->input->line;
9984 node_info.node = ret;
9985 xmlParserAddNodeInfo(ctxt, &node_info);
9992 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9993 "Couldn't find end of Start Tag %s line %d\n",
9997 * end of parsing of this node.
10002 if (nsNr != ctxt->nsNr)
10003 nsPop(ctxt, ctxt->nsNr - nsNr);
10006 * Capture end position and add node
10008 if ( ret != NULL && ctxt->record_info ) {
10009 node_info.end_pos = ctxt->input->consumed +
10010 (CUR_PTR - ctxt->input->base);
10011 node_info.end_line = ctxt->input->line;
10012 node_info.node = ret;
10013 xmlParserAddNodeInfo(ctxt, &node_info);
10019 * Parse the content of the element:
10021 xmlParseContent(ctxt);
10022 if (ctxt->instate == XML_PARSER_EOF)
10024 if (!IS_BYTE_CHAR(RAW)) {
10025 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10026 "Premature end of data in tag %s line %d\n",
10030 * end of parsing of this node.
10035 if (nsNr != ctxt->nsNr)
10036 nsPop(ctxt, ctxt->nsNr - nsNr);
10041 * parse the end of tag: '</' should be here.
10044 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10047 #ifdef LIBXML_SAX1_ENABLED
10049 xmlParseEndTag1(ctxt, line);
10050 #endif /* LIBXML_SAX1_ENABLED */
10053 * Capture end position and add node
10055 if ( ret != NULL && ctxt->record_info ) {
10056 node_info.end_pos = ctxt->input->consumed +
10057 (CUR_PTR - ctxt->input->base);
10058 node_info.end_line = ctxt->input->line;
10059 node_info.node = ret;
10060 xmlParserAddNodeInfo(ctxt, &node_info);
10065 * xmlParseVersionNum:
10066 * @ctxt: an XML parser context
10068 * parse the XML version value.
10070 * [26] VersionNum ::= '1.' [0-9]+
10072 * In practice allow [0-9].[0-9]+ at that level
10074 * Returns the string giving the XML version number, or NULL
10077 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10078 xmlChar *buf = NULL;
10083 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10085 xmlErrMemory(ctxt, NULL);
10089 if (!((cur >= '0') && (cur <= '9'))) {
10103 while ((cur >= '0') && (cur <= '9')) {
10104 if (len + 1 >= size) {
10108 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10111 xmlErrMemory(ctxt, NULL);
10125 * xmlParseVersionInfo:
10126 * @ctxt: an XML parser context
10128 * parse the XML version.
10130 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10132 * [25] Eq ::= S? '=' S?
10134 * Returns the version string, e.g. "1.0"
10138 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10139 xmlChar *version = NULL;
10141 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10145 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10152 version = xmlParseVersionNum(ctxt);
10154 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10157 } else if (RAW == '\''){
10159 version = xmlParseVersionNum(ctxt);
10161 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10165 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10173 * @ctxt: an XML parser context
10175 * parse the XML encoding name
10177 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10179 * Returns the encoding name value or NULL
10182 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10183 xmlChar *buf = NULL;
10189 if (((cur >= 'a') && (cur <= 'z')) ||
10190 ((cur >= 'A') && (cur <= 'Z'))) {
10191 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10193 xmlErrMemory(ctxt, NULL);
10200 while (((cur >= 'a') && (cur <= 'z')) ||
10201 ((cur >= 'A') && (cur <= 'Z')) ||
10202 ((cur >= '0') && (cur <= '9')) ||
10203 (cur == '.') || (cur == '_') ||
10205 if (len + 1 >= size) {
10209 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10211 xmlErrMemory(ctxt, NULL);
10228 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10234 * xmlParseEncodingDecl:
10235 * @ctxt: an XML parser context
10237 * parse the XML encoding declaration
10239 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10241 * this setups the conversion filters.
10243 * Returns the encoding value or NULL
10247 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10248 xmlChar *encoding = NULL;
10251 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10255 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10262 encoding = xmlParseEncName(ctxt);
10264 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10265 xmlFree((xmlChar *) encoding);
10269 } else if (RAW == '\''){
10271 encoding = xmlParseEncName(ctxt);
10273 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10274 xmlFree((xmlChar *) encoding);
10279 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10283 * Non standard parsing, allowing the user to ignore encoding
10285 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10286 xmlFree((xmlChar *) encoding);
10291 * UTF-16 encoding stwich has already taken place at this stage,
10292 * more over the little-endian/big-endian selection is already done
10294 if ((encoding != NULL) &&
10295 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10296 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10298 * If no encoding was passed to the parser, that we are
10299 * using UTF-16 and no decoder is present i.e. the
10300 * document is apparently UTF-8 compatible, then raise an
10301 * encoding mismatch fatal error
10303 if ((ctxt->encoding == NULL) &&
10304 (ctxt->input->buf != NULL) &&
10305 (ctxt->input->buf->encoder == NULL)) {
10306 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10307 "Document labelled UTF-16 but has UTF-8 content\n");
10309 if (ctxt->encoding != NULL)
10310 xmlFree((xmlChar *) ctxt->encoding);
10311 ctxt->encoding = encoding;
10314 * UTF-8 encoding is handled natively
10316 else if ((encoding != NULL) &&
10317 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10318 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10319 if (ctxt->encoding != NULL)
10320 xmlFree((xmlChar *) ctxt->encoding);
10321 ctxt->encoding = encoding;
10323 else if (encoding != NULL) {
10324 xmlCharEncodingHandlerPtr handler;
10326 if (ctxt->input->encoding != NULL)
10327 xmlFree((xmlChar *) ctxt->input->encoding);
10328 ctxt->input->encoding = encoding;
10330 handler = xmlFindCharEncodingHandler((const char *) encoding);
10331 if (handler != NULL) {
10332 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10333 /* failed to convert */
10334 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10338 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10339 "Unsupported encoding %s\n", encoding);
10349 * @ctxt: an XML parser context
10351 * parse the XML standalone declaration
10353 * [32] SDDecl ::= S 'standalone' Eq
10354 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10356 * [ VC: Standalone Document Declaration ]
10357 * TODO The standalone document declaration must have the value "no"
10358 * if any external markup declarations contain declarations of:
10359 * - attributes with default values, if elements to which these
10360 * attributes apply appear in the document without specifications
10361 * of values for these attributes, or
10362 * - entities (other than amp, lt, gt, apos, quot), if references
10363 * to those entities appear in the document, or
10364 * - attributes with values subject to normalization, where the
10365 * attribute appears in the document with a value which will change
10366 * as a result of normalization, or
10367 * - element types with element content, if white space occurs directly
10368 * within any instance of those types.
10371 * 1 if standalone="yes"
10372 * 0 if standalone="no"
10373 * -2 if standalone attribute is missing or invalid
10374 * (A standalone value of -2 means that the XML declaration was found,
10375 * but no value was specified for the standalone attribute).
10379 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10380 int standalone = -2;
10383 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10387 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10388 return(standalone);
10394 if ((RAW == 'n') && (NXT(1) == 'o')) {
10397 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10402 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10405 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10408 } else if (RAW == '"'){
10410 if ((RAW == 'n') && (NXT(1) == 'o')) {
10413 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10418 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10421 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10425 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10428 return(standalone);
10433 * @ctxt: an XML parser context
10435 * parse an XML declaration header
10437 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10441 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10445 * This value for standalone indicates that the document has an
10446 * XML declaration but it does not have a standalone attribute.
10447 * It will be overwritten later if a standalone attribute is found.
10449 ctxt->input->standalone = -2;
10452 * We know that '<?xml' is here.
10456 if (!IS_BLANK_CH(RAW)) {
10457 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10458 "Blank needed after '<?xml'\n");
10463 * We must have the VersionInfo here.
10465 version = xmlParseVersionInfo(ctxt);
10466 if (version == NULL) {
10467 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10469 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10471 * Changed here for XML-1.0 5th edition
10473 if (ctxt->options & XML_PARSE_OLD10) {
10474 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10475 "Unsupported version '%s'\n",
10478 if ((version[0] == '1') && ((version[1] == '.'))) {
10479 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10480 "Unsupported version '%s'\n",
10483 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10484 "Unsupported version '%s'\n",
10489 if (ctxt->version != NULL)
10490 xmlFree((void *) ctxt->version);
10491 ctxt->version = version;
10495 * We may have the encoding declaration
10497 if (!IS_BLANK_CH(RAW)) {
10498 if ((RAW == '?') && (NXT(1) == '>')) {
10502 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10504 xmlParseEncodingDecl(ctxt);
10505 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10506 (ctxt->instate == XML_PARSER_EOF)) {
10508 * The XML REC instructs us to stop parsing right here
10514 * We may have the standalone status.
10516 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10517 if ((RAW == '?') && (NXT(1) == '>')) {
10521 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10525 * We can grow the input buffer freely at that point
10530 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10533 if ((RAW == '?') && (NXT(1) == '>')) {
10535 } else if (RAW == '>') {
10536 /* Deprecated old WD ... */
10537 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10540 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10541 MOVETO_ENDTAG(CUR_PTR);
10548 * @ctxt: an XML parser context
10550 * parse an XML Misc* optional field.
10552 * [27] Misc ::= Comment | PI | S
10556 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10557 while ((ctxt->instate != XML_PARSER_EOF) &&
10558 (((RAW == '<') && (NXT(1) == '?')) ||
10559 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10560 IS_BLANK_CH(CUR))) {
10561 if ((RAW == '<') && (NXT(1) == '?')) {
10563 } else if (IS_BLANK_CH(CUR)) {
10566 xmlParseComment(ctxt);
10571 * xmlParseDocument:
10572 * @ctxt: an XML parser context
10574 * parse an XML document (and build a tree if using the standard SAX
10577 * [1] document ::= prolog element Misc*
10579 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10581 * Returns 0, -1 in case of error. the parser context is augmented
10582 * as a result of the parsing.
10586 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10588 xmlCharEncoding enc;
10592 if ((ctxt == NULL) || (ctxt->input == NULL))
10598 * SAX: detecting the level.
10600 xmlDetectSAX2(ctxt);
10603 * SAX: beginning of the document processing.
10605 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10606 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10607 if (ctxt->instate == XML_PARSER_EOF)
10610 if ((ctxt->encoding == NULL) &&
10611 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10613 * Get the 4 first bytes and decode the charset
10614 * if enc != XML_CHAR_ENCODING_NONE
10615 * plug some encoding conversion routines.
10621 enc = xmlDetectCharEncoding(&start[0], 4);
10622 if (enc != XML_CHAR_ENCODING_NONE) {
10623 xmlSwitchEncoding(ctxt, enc);
10629 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10634 * Check for the XMLDecl in the Prolog.
10635 * do not GROW here to avoid the detected encoder to decode more
10636 * than just the first line, unless the amount of data is really
10637 * too small to hold "<?xml version="1.0" encoding="foo"
10639 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10642 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10645 * Note that we will switch encoding on the fly.
10647 xmlParseXMLDecl(ctxt);
10648 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10649 (ctxt->instate == XML_PARSER_EOF)) {
10651 * The XML REC instructs us to stop parsing right here
10655 ctxt->standalone = ctxt->input->standalone;
10658 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10660 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10661 ctxt->sax->startDocument(ctxt->userData);
10662 if (ctxt->instate == XML_PARSER_EOF)
10664 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10665 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10666 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10670 * The Misc part of the Prolog
10673 xmlParseMisc(ctxt);
10676 * Then possibly doc type declaration(s) and more Misc
10677 * (doctypedecl Misc*)?
10680 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10682 ctxt->inSubset = 1;
10683 xmlParseDocTypeDecl(ctxt);
10685 ctxt->instate = XML_PARSER_DTD;
10686 xmlParseInternalSubset(ctxt);
10687 if (ctxt->instate == XML_PARSER_EOF)
10692 * Create and update the external subset.
10694 ctxt->inSubset = 2;
10695 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10696 (!ctxt->disableSAX))
10697 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10698 ctxt->extSubSystem, ctxt->extSubURI);
10699 if (ctxt->instate == XML_PARSER_EOF)
10701 ctxt->inSubset = 0;
10703 xmlCleanSpecialAttr(ctxt);
10705 ctxt->instate = XML_PARSER_PROLOG;
10706 xmlParseMisc(ctxt);
10710 * Time to start parsing the tree itself
10714 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10715 "Start tag expected, '<' not found\n");
10717 ctxt->instate = XML_PARSER_CONTENT;
10718 xmlParseElement(ctxt);
10719 ctxt->instate = XML_PARSER_EPILOG;
10723 * The Misc part at the end
10725 xmlParseMisc(ctxt);
10728 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10730 ctxt->instate = XML_PARSER_EOF;
10734 * SAX: end of the document processing.
10736 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10737 ctxt->sax->endDocument(ctxt->userData);
10740 * Remove locally kept entity definitions if the tree was not built
10742 if ((ctxt->myDoc != NULL) &&
10743 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10744 xmlFreeDoc(ctxt->myDoc);
10745 ctxt->myDoc = NULL;
10748 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10749 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10751 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10752 if (ctxt->nsWellFormed)
10753 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10754 if (ctxt->options & XML_PARSE_OLD10)
10755 ctxt->myDoc->properties |= XML_DOC_OLD10;
10757 if (! ctxt->wellFormed) {
10765 * xmlParseExtParsedEnt:
10766 * @ctxt: an XML parser context
10768 * parse a general parsed entity
10769 * An external general parsed entity is well-formed if it matches the
10770 * production labeled extParsedEnt.
10772 * [78] extParsedEnt ::= TextDecl? content
10774 * Returns 0, -1 in case of error. the parser context is augmented
10775 * as a result of the parsing.
10779 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10781 xmlCharEncoding enc;
10783 if ((ctxt == NULL) || (ctxt->input == NULL))
10786 xmlDefaultSAXHandlerInit();
10788 xmlDetectSAX2(ctxt);
10793 * SAX: beginning of the document processing.
10795 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10796 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10799 * Get the 4 first bytes and decode the charset
10800 * if enc != XML_CHAR_ENCODING_NONE
10801 * plug some encoding conversion routines.
10803 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10808 enc = xmlDetectCharEncoding(start, 4);
10809 if (enc != XML_CHAR_ENCODING_NONE) {
10810 xmlSwitchEncoding(ctxt, enc);
10816 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10820 * Check for the XMLDecl in the Prolog.
10823 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10826 * Note that we will switch encoding on the fly.
10828 xmlParseXMLDecl(ctxt);
10829 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10831 * The XML REC instructs us to stop parsing right here
10837 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10839 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10840 ctxt->sax->startDocument(ctxt->userData);
10841 if (ctxt->instate == XML_PARSER_EOF)
10845 * Doing validity checking on chunk doesn't make sense
10847 ctxt->instate = XML_PARSER_CONTENT;
10848 ctxt->validate = 0;
10849 ctxt->loadsubset = 0;
10852 xmlParseContent(ctxt);
10853 if (ctxt->instate == XML_PARSER_EOF)
10856 if ((RAW == '<') && (NXT(1) == '/')) {
10857 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10858 } else if (RAW != 0) {
10859 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10863 * SAX: end of the document processing.
10865 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10866 ctxt->sax->endDocument(ctxt->userData);
10868 if (! ctxt->wellFormed) return(-1);
10872 #ifdef LIBXML_PUSH_ENABLED
10873 /************************************************************************
10875 * Progressive parsing interfaces *
10877 ************************************************************************/
10880 * xmlParseLookupSequence:
10881 * @ctxt: an XML parser context
10882 * @first: the first char to lookup
10883 * @next: the next char to lookup or zero
10884 * @third: the next char to lookup or zero
10886 * Try to find if a sequence (first, next, third) or just (first next) or
10887 * (first) is available in the input stream.
10888 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10889 * to avoid rescanning sequences of bytes, it DOES change the state of the
10890 * parser, do not use liberally.
10892 * Returns the index to the current parsing point if the full sequence
10893 * is available, -1 otherwise.
10896 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10897 xmlChar next, xmlChar third) {
10899 xmlParserInputPtr in;
10900 const xmlChar *buf;
10903 if (in == NULL) return(-1);
10904 base = in->cur - in->base;
10905 if (base < 0) return(-1);
10906 if (ctxt->checkIndex > base)
10907 base = ctxt->checkIndex;
10908 if (in->buf == NULL) {
10912 buf = xmlBufContent(in->buf->buffer);
10913 len = xmlBufUse(in->buf->buffer);
10915 /* take into account the sequence length */
10916 if (third) len -= 2;
10917 else if (next) len --;
10918 for (;base < len;base++) {
10919 if (buf[base] == first) {
10921 if ((buf[base + 1] != next) ||
10922 (buf[base + 2] != third)) continue;
10923 } else if (next != 0) {
10924 if (buf[base + 1] != next) continue;
10926 ctxt->checkIndex = 0;
10929 xmlGenericError(xmlGenericErrorContext,
10930 "PP: lookup '%c' found at %d\n",
10932 else if (third == 0)
10933 xmlGenericError(xmlGenericErrorContext,
10934 "PP: lookup '%c%c' found at %d\n",
10935 first, next, base);
10937 xmlGenericError(xmlGenericErrorContext,
10938 "PP: lookup '%c%c%c' found at %d\n",
10939 first, next, third, base);
10941 return(base - (in->cur - in->base));
10944 ctxt->checkIndex = base;
10947 xmlGenericError(xmlGenericErrorContext,
10948 "PP: lookup '%c' failed\n", first);
10949 else if (third == 0)
10950 xmlGenericError(xmlGenericErrorContext,
10951 "PP: lookup '%c%c' failed\n", first, next);
10953 xmlGenericError(xmlGenericErrorContext,
10954 "PP: lookup '%c%c%c' failed\n", first, next, third);
10960 * xmlParseGetLasts:
10961 * @ctxt: an XML parser context
10962 * @lastlt: pointer to store the last '<' from the input
10963 * @lastgt: pointer to store the last '>' from the input
10965 * Lookup the last < and > in the current chunk
10968 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10969 const xmlChar **lastgt) {
10970 const xmlChar *tmp;
10972 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10973 xmlGenericError(xmlGenericErrorContext,
10974 "Internal error: xmlParseGetLasts\n");
10977 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10978 tmp = ctxt->input->end;
10980 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10981 if (tmp < ctxt->input->base) {
10987 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10988 if (*tmp == '\'') {
10990 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10991 if (tmp < ctxt->input->end) tmp++;
10992 } else if (*tmp == '"') {
10994 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10995 if (tmp < ctxt->input->end) tmp++;
10999 if (tmp < ctxt->input->end)
11004 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11005 if (tmp >= ctxt->input->base)
11017 * xmlCheckCdataPush:
11018 * @cur: pointer to the block of characters
11019 * @len: length of the block in bytes
11020 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11022 * Check that the block of characters is okay as SCdata content [20]
11024 * Returns the number of bytes to pass if okay, a negative index where an
11025 * UTF-8 error occurred otherwise
11028 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11033 if ((utf == NULL) || (len <= 0))
11036 for (ix = 0; ix < len;) { /* string is 0-terminated */
11038 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11041 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11045 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11046 if (ix + 2 > len) return(complete ? -ix : ix);
11047 if ((utf[ix+1] & 0xc0 ) != 0x80)
11049 codepoint = (utf[ix] & 0x1f) << 6;
11050 codepoint |= utf[ix+1] & 0x3f;
11051 if (!xmlIsCharQ(codepoint))
11054 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11055 if (ix + 3 > len) return(complete ? -ix : ix);
11056 if (((utf[ix+1] & 0xc0) != 0x80) ||
11057 ((utf[ix+2] & 0xc0) != 0x80))
11059 codepoint = (utf[ix] & 0xf) << 12;
11060 codepoint |= (utf[ix+1] & 0x3f) << 6;
11061 codepoint |= utf[ix+2] & 0x3f;
11062 if (!xmlIsCharQ(codepoint))
11065 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11066 if (ix + 4 > len) return(complete ? -ix : ix);
11067 if (((utf[ix+1] & 0xc0) != 0x80) ||
11068 ((utf[ix+2] & 0xc0) != 0x80) ||
11069 ((utf[ix+3] & 0xc0) != 0x80))
11071 codepoint = (utf[ix] & 0x7) << 18;
11072 codepoint |= (utf[ix+1] & 0x3f) << 12;
11073 codepoint |= (utf[ix+2] & 0x3f) << 6;
11074 codepoint |= utf[ix+3] & 0x3f;
11075 if (!xmlIsCharQ(codepoint))
11078 } else /* unknown encoding */
11085 * xmlParseTryOrFinish:
11086 * @ctxt: an XML parser context
11087 * @terminate: last chunk indicator
11089 * Try to progress on parsing
11091 * Returns zero if no parsing was possible
11094 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11098 const xmlChar *lastlt, *lastgt;
11100 if (ctxt->input == NULL)
11104 switch (ctxt->instate) {
11105 case XML_PARSER_EOF:
11106 xmlGenericError(xmlGenericErrorContext,
11107 "PP: try EOF\n"); break;
11108 case XML_PARSER_START:
11109 xmlGenericError(xmlGenericErrorContext,
11110 "PP: try START\n"); break;
11111 case XML_PARSER_MISC:
11112 xmlGenericError(xmlGenericErrorContext,
11113 "PP: try MISC\n");break;
11114 case XML_PARSER_COMMENT:
11115 xmlGenericError(xmlGenericErrorContext,
11116 "PP: try COMMENT\n");break;
11117 case XML_PARSER_PROLOG:
11118 xmlGenericError(xmlGenericErrorContext,
11119 "PP: try PROLOG\n");break;
11120 case XML_PARSER_START_TAG:
11121 xmlGenericError(xmlGenericErrorContext,
11122 "PP: try START_TAG\n");break;
11123 case XML_PARSER_CONTENT:
11124 xmlGenericError(xmlGenericErrorContext,
11125 "PP: try CONTENT\n");break;
11126 case XML_PARSER_CDATA_SECTION:
11127 xmlGenericError(xmlGenericErrorContext,
11128 "PP: try CDATA_SECTION\n");break;
11129 case XML_PARSER_END_TAG:
11130 xmlGenericError(xmlGenericErrorContext,
11131 "PP: try END_TAG\n");break;
11132 case XML_PARSER_ENTITY_DECL:
11133 xmlGenericError(xmlGenericErrorContext,
11134 "PP: try ENTITY_DECL\n");break;
11135 case XML_PARSER_ENTITY_VALUE:
11136 xmlGenericError(xmlGenericErrorContext,
11137 "PP: try ENTITY_VALUE\n");break;
11138 case XML_PARSER_ATTRIBUTE_VALUE:
11139 xmlGenericError(xmlGenericErrorContext,
11140 "PP: try ATTRIBUTE_VALUE\n");break;
11141 case XML_PARSER_DTD:
11142 xmlGenericError(xmlGenericErrorContext,
11143 "PP: try DTD\n");break;
11144 case XML_PARSER_EPILOG:
11145 xmlGenericError(xmlGenericErrorContext,
11146 "PP: try EPILOG\n");break;
11147 case XML_PARSER_PI:
11148 xmlGenericError(xmlGenericErrorContext,
11149 "PP: try PI\n");break;
11150 case XML_PARSER_IGNORE:
11151 xmlGenericError(xmlGenericErrorContext,
11152 "PP: try IGNORE\n");break;
11156 if ((ctxt->input != NULL) &&
11157 (ctxt->input->cur - ctxt->input->base > 4096)) {
11159 ctxt->checkIndex = 0;
11161 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11163 while (ctxt->instate != XML_PARSER_EOF) {
11164 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11167 if (ctxt->input == NULL) break;
11168 if (ctxt->input->buf == NULL)
11169 avail = ctxt->input->length -
11170 (ctxt->input->cur - ctxt->input->base);
11173 * If we are operating on converted input, try to flush
11174 * remainng chars to avoid them stalling in the non-converted
11175 * buffer. But do not do this in document start where
11176 * encoding="..." may not have been read and we work on a
11177 * guessed encoding.
11179 if ((ctxt->instate != XML_PARSER_START) &&
11180 (ctxt->input->buf->raw != NULL) &&
11181 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11182 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11184 size_t current = ctxt->input->cur - ctxt->input->base;
11186 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11187 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11190 avail = xmlBufUse(ctxt->input->buf->buffer) -
11191 (ctxt->input->cur - ctxt->input->base);
11195 switch (ctxt->instate) {
11196 case XML_PARSER_EOF:
11198 * Document parsing is done !
11201 case XML_PARSER_START:
11202 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11204 xmlCharEncoding enc;
11207 * Very first chars read from the document flow.
11213 * Get the 4 first bytes and decode the charset
11214 * if enc != XML_CHAR_ENCODING_NONE
11215 * plug some encoding conversion routines,
11216 * else xmlSwitchEncoding will set to (default)
11223 enc = xmlDetectCharEncoding(start, 4);
11224 xmlSwitchEncoding(ctxt, enc);
11230 cur = ctxt->input->cur[0];
11231 next = ctxt->input->cur[1];
11233 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11234 ctxt->sax->setDocumentLocator(ctxt->userData,
11235 &xmlDefaultSAXLocator);
11236 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11237 xmlHaltParser(ctxt);
11239 xmlGenericError(xmlGenericErrorContext,
11240 "PP: entering EOF\n");
11242 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11243 ctxt->sax->endDocument(ctxt->userData);
11246 if ((cur == '<') && (next == '?')) {
11247 /* PI or XML decl */
11248 if (avail < 5) return(ret);
11249 if ((!terminate) &&
11250 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11252 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11253 ctxt->sax->setDocumentLocator(ctxt->userData,
11254 &xmlDefaultSAXLocator);
11255 if ((ctxt->input->cur[2] == 'x') &&
11256 (ctxt->input->cur[3] == 'm') &&
11257 (ctxt->input->cur[4] == 'l') &&
11258 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11261 xmlGenericError(xmlGenericErrorContext,
11262 "PP: Parsing XML Decl\n");
11264 xmlParseXMLDecl(ctxt);
11265 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11267 * The XML REC instructs us to stop parsing right
11270 xmlHaltParser(ctxt);
11273 ctxt->standalone = ctxt->input->standalone;
11274 if ((ctxt->encoding == NULL) &&
11275 (ctxt->input->encoding != NULL))
11276 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11277 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11278 (!ctxt->disableSAX))
11279 ctxt->sax->startDocument(ctxt->userData);
11280 ctxt->instate = XML_PARSER_MISC;
11282 xmlGenericError(xmlGenericErrorContext,
11283 "PP: entering MISC\n");
11286 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11287 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11288 (!ctxt->disableSAX))
11289 ctxt->sax->startDocument(ctxt->userData);
11290 ctxt->instate = XML_PARSER_MISC;
11292 xmlGenericError(xmlGenericErrorContext,
11293 "PP: entering MISC\n");
11297 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11298 ctxt->sax->setDocumentLocator(ctxt->userData,
11299 &xmlDefaultSAXLocator);
11300 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11301 if (ctxt->version == NULL) {
11302 xmlErrMemory(ctxt, NULL);
11305 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11306 (!ctxt->disableSAX))
11307 ctxt->sax->startDocument(ctxt->userData);
11308 ctxt->instate = XML_PARSER_MISC;
11310 xmlGenericError(xmlGenericErrorContext,
11311 "PP: entering MISC\n");
11315 case XML_PARSER_START_TAG: {
11316 const xmlChar *name;
11317 const xmlChar *prefix = NULL;
11318 const xmlChar *URI = NULL;
11319 int nsNr = ctxt->nsNr;
11321 if ((avail < 2) && (ctxt->inputNr == 1))
11323 cur = ctxt->input->cur[0];
11325 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11326 xmlHaltParser(ctxt);
11327 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11328 ctxt->sax->endDocument(ctxt->userData);
11332 if (ctxt->progressive) {
11333 /* > can be found unescaped in attribute values */
11334 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11336 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11340 if (ctxt->spaceNr == 0)
11341 spacePush(ctxt, -1);
11342 else if (*ctxt->space == -2)
11343 spacePush(ctxt, -1);
11345 spacePush(ctxt, *ctxt->space);
11346 #ifdef LIBXML_SAX1_ENABLED
11348 #endif /* LIBXML_SAX1_ENABLED */
11349 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11350 #ifdef LIBXML_SAX1_ENABLED
11352 name = xmlParseStartTag(ctxt);
11353 #endif /* LIBXML_SAX1_ENABLED */
11354 if (ctxt->instate == XML_PARSER_EOF)
11356 if (name == NULL) {
11358 xmlHaltParser(ctxt);
11359 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11360 ctxt->sax->endDocument(ctxt->userData);
11363 #ifdef LIBXML_VALID_ENABLED
11365 * [ VC: Root Element Type ]
11366 * The Name in the document type declaration must match
11367 * the element type of the root element.
11369 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11370 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11371 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11372 #endif /* LIBXML_VALID_ENABLED */
11375 * Check for an Empty Element.
11377 if ((RAW == '/') && (NXT(1) == '>')) {
11381 if ((ctxt->sax != NULL) &&
11382 (ctxt->sax->endElementNs != NULL) &&
11383 (!ctxt->disableSAX))
11384 ctxt->sax->endElementNs(ctxt->userData, name,
11386 if (ctxt->nsNr - nsNr > 0)
11387 nsPop(ctxt, ctxt->nsNr - nsNr);
11388 #ifdef LIBXML_SAX1_ENABLED
11390 if ((ctxt->sax != NULL) &&
11391 (ctxt->sax->endElement != NULL) &&
11392 (!ctxt->disableSAX))
11393 ctxt->sax->endElement(ctxt->userData, name);
11394 #endif /* LIBXML_SAX1_ENABLED */
11396 if (ctxt->instate == XML_PARSER_EOF)
11399 if (ctxt->nameNr == 0) {
11400 ctxt->instate = XML_PARSER_EPILOG;
11402 ctxt->instate = XML_PARSER_CONTENT;
11404 ctxt->progressive = 1;
11410 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11411 "Couldn't find end of Start Tag %s\n",
11417 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11418 #ifdef LIBXML_SAX1_ENABLED
11420 namePush(ctxt, name);
11421 #endif /* LIBXML_SAX1_ENABLED */
11423 ctxt->instate = XML_PARSER_CONTENT;
11424 ctxt->progressive = 1;
11427 case XML_PARSER_CONTENT: {
11428 const xmlChar *test;
11430 if ((avail < 2) && (ctxt->inputNr == 1))
11432 cur = ctxt->input->cur[0];
11433 next = ctxt->input->cur[1];
11436 cons = ctxt->input->consumed;
11437 if ((cur == '<') && (next == '/')) {
11438 ctxt->instate = XML_PARSER_END_TAG;
11440 } else if ((cur == '<') && (next == '?')) {
11441 if ((!terminate) &&
11442 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11443 ctxt->progressive = XML_PARSER_PI;
11447 ctxt->instate = XML_PARSER_CONTENT;
11448 ctxt->progressive = 1;
11449 } else if ((cur == '<') && (next != '!')) {
11450 ctxt->instate = XML_PARSER_START_TAG;
11452 } else if ((cur == '<') && (next == '!') &&
11453 (ctxt->input->cur[2] == '-') &&
11454 (ctxt->input->cur[3] == '-')) {
11459 ctxt->input->cur += 4;
11460 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11461 ctxt->input->cur -= 4;
11462 if ((!terminate) && (term < 0)) {
11463 ctxt->progressive = XML_PARSER_COMMENT;
11466 xmlParseComment(ctxt);
11467 ctxt->instate = XML_PARSER_CONTENT;
11468 ctxt->progressive = 1;
11469 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11470 (ctxt->input->cur[2] == '[') &&
11471 (ctxt->input->cur[3] == 'C') &&
11472 (ctxt->input->cur[4] == 'D') &&
11473 (ctxt->input->cur[5] == 'A') &&
11474 (ctxt->input->cur[6] == 'T') &&
11475 (ctxt->input->cur[7] == 'A') &&
11476 (ctxt->input->cur[8] == '[')) {
11478 ctxt->instate = XML_PARSER_CDATA_SECTION;
11480 } else if ((cur == '<') && (next == '!') &&
11483 } else if (cur == '&') {
11484 if ((!terminate) &&
11485 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11487 xmlParseReference(ctxt);
11489 /* TODO Avoid the extra copy, handle directly !!! */
11491 * Goal of the following test is:
11492 * - minimize calls to the SAX 'character' callback
11493 * when they are mergeable
11494 * - handle an problem for isBlank when we only parse
11495 * a sequence of blank chars and the next one is
11496 * not available to check against '<' presence.
11497 * - tries to homogenize the differences in SAX
11498 * callbacks between the push and pull versions
11501 if ((ctxt->inputNr == 1) &&
11502 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11504 if (ctxt->progressive) {
11505 if ((lastlt == NULL) ||
11506 (ctxt->input->cur > lastlt))
11508 } else if (xmlParseLookupSequence(ctxt,
11514 ctxt->checkIndex = 0;
11515 xmlParseCharData(ctxt, 0);
11517 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11518 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11519 "detected an error in element content\n");
11520 xmlHaltParser(ctxt);
11525 case XML_PARSER_END_TAG:
11529 if (ctxt->progressive) {
11530 /* > can be found unescaped in attribute values */
11531 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11533 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11538 xmlParseEndTag2(ctxt,
11539 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11540 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11541 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11544 #ifdef LIBXML_SAX1_ENABLED
11546 xmlParseEndTag1(ctxt, 0);
11547 #endif /* LIBXML_SAX1_ENABLED */
11548 if (ctxt->instate == XML_PARSER_EOF) {
11550 } else if (ctxt->nameNr == 0) {
11551 ctxt->instate = XML_PARSER_EPILOG;
11553 ctxt->instate = XML_PARSER_CONTENT;
11556 case XML_PARSER_CDATA_SECTION: {
11558 * The Push mode need to have the SAX callback for
11559 * cdataBlock merge back contiguous callbacks.
11563 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11565 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11568 tmp = xmlCheckCdataPush(ctxt->input->cur,
11569 XML_PARSER_BIG_BUFFER_SIZE, 0);
11572 ctxt->input->cur += tmp;
11573 goto encoding_error;
11575 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11576 if (ctxt->sax->cdataBlock != NULL)
11577 ctxt->sax->cdataBlock(ctxt->userData,
11578 ctxt->input->cur, tmp);
11579 else if (ctxt->sax->characters != NULL)
11580 ctxt->sax->characters(ctxt->userData,
11581 ctxt->input->cur, tmp);
11583 if (ctxt->instate == XML_PARSER_EOF)
11586 ctxt->checkIndex = 0;
11592 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11593 if ((tmp < 0) || (tmp != base)) {
11595 ctxt->input->cur += tmp;
11596 goto encoding_error;
11598 if ((ctxt->sax != NULL) && (base == 0) &&
11599 (ctxt->sax->cdataBlock != NULL) &&
11600 (!ctxt->disableSAX)) {
11602 * Special case to provide identical behaviour
11603 * between pull and push parsers on enpty CDATA
11606 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11607 (!strncmp((const char *)&ctxt->input->cur[-9],
11609 ctxt->sax->cdataBlock(ctxt->userData,
11611 } else if ((ctxt->sax != NULL) && (base > 0) &&
11612 (!ctxt->disableSAX)) {
11613 if (ctxt->sax->cdataBlock != NULL)
11614 ctxt->sax->cdataBlock(ctxt->userData,
11615 ctxt->input->cur, base);
11616 else if (ctxt->sax->characters != NULL)
11617 ctxt->sax->characters(ctxt->userData,
11618 ctxt->input->cur, base);
11620 if (ctxt->instate == XML_PARSER_EOF)
11623 ctxt->checkIndex = 0;
11624 ctxt->instate = XML_PARSER_CONTENT;
11626 xmlGenericError(xmlGenericErrorContext,
11627 "PP: entering CONTENT\n");
11632 case XML_PARSER_MISC:
11634 if (ctxt->input->buf == NULL)
11635 avail = ctxt->input->length -
11636 (ctxt->input->cur - ctxt->input->base);
11638 avail = xmlBufUse(ctxt->input->buf->buffer) -
11639 (ctxt->input->cur - ctxt->input->base);
11642 cur = ctxt->input->cur[0];
11643 next = ctxt->input->cur[1];
11644 if ((cur == '<') && (next == '?')) {
11645 if ((!terminate) &&
11646 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11647 ctxt->progressive = XML_PARSER_PI;
11651 xmlGenericError(xmlGenericErrorContext,
11652 "PP: Parsing PI\n");
11655 if (ctxt->instate == XML_PARSER_EOF)
11657 ctxt->instate = XML_PARSER_MISC;
11658 ctxt->progressive = 1;
11659 ctxt->checkIndex = 0;
11660 } else if ((cur == '<') && (next == '!') &&
11661 (ctxt->input->cur[2] == '-') &&
11662 (ctxt->input->cur[3] == '-')) {
11663 if ((!terminate) &&
11664 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11665 ctxt->progressive = XML_PARSER_COMMENT;
11669 xmlGenericError(xmlGenericErrorContext,
11670 "PP: Parsing Comment\n");
11672 xmlParseComment(ctxt);
11673 if (ctxt->instate == XML_PARSER_EOF)
11675 ctxt->instate = XML_PARSER_MISC;
11676 ctxt->progressive = 1;
11677 ctxt->checkIndex = 0;
11678 } else if ((cur == '<') && (next == '!') &&
11679 (ctxt->input->cur[2] == 'D') &&
11680 (ctxt->input->cur[3] == 'O') &&
11681 (ctxt->input->cur[4] == 'C') &&
11682 (ctxt->input->cur[5] == 'T') &&
11683 (ctxt->input->cur[6] == 'Y') &&
11684 (ctxt->input->cur[7] == 'P') &&
11685 (ctxt->input->cur[8] == 'E')) {
11686 if ((!terminate) &&
11687 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11688 ctxt->progressive = XML_PARSER_DTD;
11692 xmlGenericError(xmlGenericErrorContext,
11693 "PP: Parsing internal subset\n");
11695 ctxt->inSubset = 1;
11696 ctxt->progressive = 0;
11697 ctxt->checkIndex = 0;
11698 xmlParseDocTypeDecl(ctxt);
11699 if (ctxt->instate == XML_PARSER_EOF)
11702 ctxt->instate = XML_PARSER_DTD;
11704 xmlGenericError(xmlGenericErrorContext,
11705 "PP: entering DTD\n");
11709 * Create and update the external subset.
11711 ctxt->inSubset = 2;
11712 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11713 (ctxt->sax->externalSubset != NULL))
11714 ctxt->sax->externalSubset(ctxt->userData,
11715 ctxt->intSubName, ctxt->extSubSystem,
11717 ctxt->inSubset = 0;
11718 xmlCleanSpecialAttr(ctxt);
11719 ctxt->instate = XML_PARSER_PROLOG;
11721 xmlGenericError(xmlGenericErrorContext,
11722 "PP: entering PROLOG\n");
11725 } else if ((cur == '<') && (next == '!') &&
11729 ctxt->instate = XML_PARSER_START_TAG;
11730 ctxt->progressive = XML_PARSER_START_TAG;
11731 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11733 xmlGenericError(xmlGenericErrorContext,
11734 "PP: entering START_TAG\n");
11738 case XML_PARSER_PROLOG:
11740 if (ctxt->input->buf == NULL)
11741 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11743 avail = xmlBufUse(ctxt->input->buf->buffer) -
11744 (ctxt->input->cur - ctxt->input->base);
11747 cur = ctxt->input->cur[0];
11748 next = ctxt->input->cur[1];
11749 if ((cur == '<') && (next == '?')) {
11750 if ((!terminate) &&
11751 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11752 ctxt->progressive = XML_PARSER_PI;
11756 xmlGenericError(xmlGenericErrorContext,
11757 "PP: Parsing PI\n");
11760 if (ctxt->instate == XML_PARSER_EOF)
11762 ctxt->instate = XML_PARSER_PROLOG;
11763 ctxt->progressive = 1;
11764 } else if ((cur == '<') && (next == '!') &&
11765 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11766 if ((!terminate) &&
11767 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11768 ctxt->progressive = XML_PARSER_COMMENT;
11772 xmlGenericError(xmlGenericErrorContext,
11773 "PP: Parsing Comment\n");
11775 xmlParseComment(ctxt);
11776 if (ctxt->instate == XML_PARSER_EOF)
11778 ctxt->instate = XML_PARSER_PROLOG;
11779 ctxt->progressive = 1;
11780 } else if ((cur == '<') && (next == '!') &&
11784 ctxt->instate = XML_PARSER_START_TAG;
11785 if (ctxt->progressive == 0)
11786 ctxt->progressive = XML_PARSER_START_TAG;
11787 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11789 xmlGenericError(xmlGenericErrorContext,
11790 "PP: entering START_TAG\n");
11794 case XML_PARSER_EPILOG:
11796 if (ctxt->input->buf == NULL)
11797 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11799 avail = xmlBufUse(ctxt->input->buf->buffer) -
11800 (ctxt->input->cur - ctxt->input->base);
11803 cur = ctxt->input->cur[0];
11804 next = ctxt->input->cur[1];
11805 if ((cur == '<') && (next == '?')) {
11806 if ((!terminate) &&
11807 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11808 ctxt->progressive = XML_PARSER_PI;
11812 xmlGenericError(xmlGenericErrorContext,
11813 "PP: Parsing PI\n");
11816 if (ctxt->instate == XML_PARSER_EOF)
11818 ctxt->instate = XML_PARSER_EPILOG;
11819 ctxt->progressive = 1;
11820 } else if ((cur == '<') && (next == '!') &&
11821 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11822 if ((!terminate) &&
11823 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11824 ctxt->progressive = XML_PARSER_COMMENT;
11828 xmlGenericError(xmlGenericErrorContext,
11829 "PP: Parsing Comment\n");
11831 xmlParseComment(ctxt);
11832 if (ctxt->instate == XML_PARSER_EOF)
11834 ctxt->instate = XML_PARSER_EPILOG;
11835 ctxt->progressive = 1;
11836 } else if ((cur == '<') && (next == '!') &&
11840 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11841 xmlHaltParser(ctxt);
11843 xmlGenericError(xmlGenericErrorContext,
11844 "PP: entering EOF\n");
11846 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11847 ctxt->sax->endDocument(ctxt->userData);
11851 case XML_PARSER_DTD: {
11853 * Sorry but progressive parsing of the internal subset
11854 * is not expected to be supported. We first check that
11855 * the full content of the internal subset is available and
11856 * the parsing is launched only at that point.
11857 * Internal subset ends up with "']' S? '>'" in an unescaped
11858 * section and not in a ']]>' sequence which are conditional
11859 * sections (whoever argued to keep that crap in XML deserve
11860 * a place in hell !).
11867 base = ctxt->input->cur - ctxt->input->base;
11868 if (base < 0) return(0);
11869 if (ctxt->checkIndex > base)
11870 base = ctxt->checkIndex;
11871 buf = xmlBufContent(ctxt->input->buf->buffer);
11872 use = xmlBufUse(ctxt->input->buf->buffer);
11873 for (;(unsigned int) base < use; base++) {
11875 if (buf[base] == quote)
11879 if ((quote == 0) && (buf[base] == '<')) {
11881 /* special handling of comments */
11882 if (((unsigned int) base + 4 < use) &&
11883 (buf[base + 1] == '!') &&
11884 (buf[base + 2] == '-') &&
11885 (buf[base + 3] == '-')) {
11886 for (;(unsigned int) base + 3 < use; base++) {
11887 if ((buf[base] == '-') &&
11888 (buf[base + 1] == '-') &&
11889 (buf[base + 2] == '>')) {
11897 fprintf(stderr, "unfinished comment\n");
11904 if (buf[base] == '"') {
11908 if (buf[base] == '\'') {
11912 if (buf[base] == ']') {
11914 fprintf(stderr, "%c%c%c%c: ", buf[base],
11915 buf[base + 1], buf[base + 2], buf[base + 3]);
11917 if ((unsigned int) base +1 >= use)
11919 if (buf[base + 1] == ']') {
11920 /* conditional crap, skip both ']' ! */
11924 for (i = 1; (unsigned int) base + i < use; i++) {
11925 if (buf[base + i] == '>') {
11927 fprintf(stderr, "found\n");
11929 goto found_end_int_subset;
11931 if (!IS_BLANK_CH(buf[base + i])) {
11933 fprintf(stderr, "not found\n");
11935 goto not_end_of_int_subset;
11939 fprintf(stderr, "end of stream\n");
11944 not_end_of_int_subset:
11945 continue; /* for */
11948 * We didn't found the end of the Internal subset
11951 ctxt->checkIndex = base;
11953 ctxt->checkIndex = 0;
11956 xmlGenericError(xmlGenericErrorContext,
11957 "PP: lookup of int subset end filed\n");
11961 found_end_int_subset:
11962 ctxt->checkIndex = 0;
11963 xmlParseInternalSubset(ctxt);
11964 if (ctxt->instate == XML_PARSER_EOF)
11966 ctxt->inSubset = 2;
11967 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11968 (ctxt->sax->externalSubset != NULL))
11969 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11970 ctxt->extSubSystem, ctxt->extSubURI);
11971 ctxt->inSubset = 0;
11972 xmlCleanSpecialAttr(ctxt);
11973 if (ctxt->instate == XML_PARSER_EOF)
11975 ctxt->instate = XML_PARSER_PROLOG;
11976 ctxt->checkIndex = 0;
11978 xmlGenericError(xmlGenericErrorContext,
11979 "PP: entering PROLOG\n");
11983 case XML_PARSER_COMMENT:
11984 xmlGenericError(xmlGenericErrorContext,
11985 "PP: internal error, state == COMMENT\n");
11986 ctxt->instate = XML_PARSER_CONTENT;
11988 xmlGenericError(xmlGenericErrorContext,
11989 "PP: entering CONTENT\n");
11992 case XML_PARSER_IGNORE:
11993 xmlGenericError(xmlGenericErrorContext,
11994 "PP: internal error, state == IGNORE");
11995 ctxt->instate = XML_PARSER_DTD;
11997 xmlGenericError(xmlGenericErrorContext,
11998 "PP: entering DTD\n");
12001 case XML_PARSER_PI:
12002 xmlGenericError(xmlGenericErrorContext,
12003 "PP: internal error, state == PI\n");
12004 ctxt->instate = XML_PARSER_CONTENT;
12006 xmlGenericError(xmlGenericErrorContext,
12007 "PP: entering CONTENT\n");
12010 case XML_PARSER_ENTITY_DECL:
12011 xmlGenericError(xmlGenericErrorContext,
12012 "PP: internal error, state == ENTITY_DECL\n");
12013 ctxt->instate = XML_PARSER_DTD;
12015 xmlGenericError(xmlGenericErrorContext,
12016 "PP: entering DTD\n");
12019 case XML_PARSER_ENTITY_VALUE:
12020 xmlGenericError(xmlGenericErrorContext,
12021 "PP: internal error, state == ENTITY_VALUE\n");
12022 ctxt->instate = XML_PARSER_CONTENT;
12024 xmlGenericError(xmlGenericErrorContext,
12025 "PP: entering DTD\n");
12028 case XML_PARSER_ATTRIBUTE_VALUE:
12029 xmlGenericError(xmlGenericErrorContext,
12030 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12031 ctxt->instate = XML_PARSER_START_TAG;
12033 xmlGenericError(xmlGenericErrorContext,
12034 "PP: entering START_TAG\n");
12037 case XML_PARSER_SYSTEM_LITERAL:
12038 xmlGenericError(xmlGenericErrorContext,
12039 "PP: internal error, state == SYSTEM_LITERAL\n");
12040 ctxt->instate = XML_PARSER_START_TAG;
12042 xmlGenericError(xmlGenericErrorContext,
12043 "PP: entering START_TAG\n");
12046 case XML_PARSER_PUBLIC_LITERAL:
12047 xmlGenericError(xmlGenericErrorContext,
12048 "PP: internal error, state == PUBLIC_LITERAL\n");
12049 ctxt->instate = XML_PARSER_START_TAG;
12051 xmlGenericError(xmlGenericErrorContext,
12052 "PP: entering START_TAG\n");
12059 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12066 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12067 ctxt->input->cur[0], ctxt->input->cur[1],
12068 ctxt->input->cur[2], ctxt->input->cur[3]);
12069 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12070 "Input is not proper UTF-8, indicate encoding !\n%s",
12071 BAD_CAST buffer, NULL);
12077 * xmlParseCheckTransition:
12078 * @ctxt: an XML parser context
12079 * @chunk: a char array
12080 * @size: the size in byte of the chunk
12082 * Check depending on the current parser state if the chunk given must be
12083 * processed immediately or one need more data to advance on parsing.
12085 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12088 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12089 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12091 if (ctxt->instate == XML_PARSER_START_TAG) {
12092 if (memchr(chunk, '>', size) != NULL)
12096 if (ctxt->progressive == XML_PARSER_COMMENT) {
12097 if (memchr(chunk, '>', size) != NULL)
12101 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12102 if (memchr(chunk, '>', size) != NULL)
12106 if (ctxt->progressive == XML_PARSER_PI) {
12107 if (memchr(chunk, '>', size) != NULL)
12111 if (ctxt->instate == XML_PARSER_END_TAG) {
12112 if (memchr(chunk, '>', size) != NULL)
12116 if ((ctxt->progressive == XML_PARSER_DTD) ||
12117 (ctxt->instate == XML_PARSER_DTD)) {
12118 if (memchr(chunk, '>', size) != NULL)
12127 * @ctxt: an XML parser context
12128 * @chunk: an char array
12129 * @size: the size in byte of the chunk
12130 * @terminate: last chunk indicator
12132 * Parse a Chunk of memory
12134 * Returns zero if no error, the xmlParserErrors otherwise.
12137 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12141 size_t old_avail = 0;
12145 return(XML_ERR_INTERNAL_ERROR);
12146 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12147 return(ctxt->errNo);
12148 if (ctxt->instate == XML_PARSER_EOF)
12150 if (ctxt->instate == XML_PARSER_START)
12151 xmlDetectSAX2(ctxt);
12152 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12153 (chunk[size - 1] == '\r')) {
12160 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12161 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12162 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12163 size_t cur = ctxt->input->cur - ctxt->input->base;
12166 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12168 * Specific handling if we autodetected an encoding, we should not
12169 * push more than the first line ... which depend on the encoding
12170 * And only push the rest once the final encoding was detected
12172 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12173 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12174 unsigned int len = 45;
12176 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177 BAD_CAST "UTF-16")) ||
12178 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12179 BAD_CAST "UTF16")))
12181 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182 BAD_CAST "UCS-4")) ||
12183 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12187 if (ctxt->input->buf->rawconsumed < len)
12188 len -= ctxt->input->buf->rawconsumed;
12191 * Change size for reading the initial declaration only
12192 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12193 * will blindly copy extra bytes from memory.
12195 if ((unsigned int) size > len) {
12196 remain = size - len;
12202 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12204 ctxt->errNo = XML_PARSER_EOF;
12205 xmlHaltParser(ctxt);
12206 return (XML_PARSER_EOF);
12208 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12210 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12213 } else if (ctxt->instate != XML_PARSER_EOF) {
12214 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12215 xmlParserInputBufferPtr in = ctxt->input->buf;
12216 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12217 (in->raw != NULL)) {
12219 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12220 size_t current = ctxt->input->cur - ctxt->input->base;
12222 nbchars = xmlCharEncInput(in, terminate);
12225 xmlGenericError(xmlGenericErrorContext,
12226 "xmlParseChunk: encoder error\n");
12227 return(XML_ERR_INVALID_ENCODING);
12229 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12234 xmlParseTryOrFinish(ctxt, 0);
12236 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12237 avail = xmlBufUse(ctxt->input->buf->buffer);
12239 * Depending on the current state it may not be such
12240 * a good idea to try parsing if there is nothing in the chunk
12241 * which would be worth doing a parser state transition and we
12242 * need to wait for more data
12244 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12245 (old_avail == 0) || (avail == 0) ||
12246 (xmlParseCheckTransition(ctxt,
12247 (const char *)&ctxt->input->base[old_avail],
12248 avail - old_avail)))
12249 xmlParseTryOrFinish(ctxt, terminate);
12251 if (ctxt->instate == XML_PARSER_EOF)
12252 return(ctxt->errNo);
12254 if ((ctxt->input != NULL) &&
12255 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12256 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12257 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12258 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12259 xmlHaltParser(ctxt);
12261 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12262 return(ctxt->errNo);
12270 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12271 (ctxt->input->buf != NULL)) {
12272 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12274 size_t current = ctxt->input->cur - ctxt->input->base;
12276 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12278 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12283 * Check for termination
12287 if (ctxt->input != NULL) {
12288 if (ctxt->input->buf == NULL)
12289 cur_avail = ctxt->input->length -
12290 (ctxt->input->cur - ctxt->input->base);
12292 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12293 (ctxt->input->cur - ctxt->input->base);
12296 if ((ctxt->instate != XML_PARSER_EOF) &&
12297 (ctxt->instate != XML_PARSER_EPILOG)) {
12298 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12300 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12301 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12303 if (ctxt->instate != XML_PARSER_EOF) {
12304 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12305 ctxt->sax->endDocument(ctxt->userData);
12307 ctxt->instate = XML_PARSER_EOF;
12309 if (ctxt->wellFormed == 0)
12310 return((xmlParserErrors) ctxt->errNo);
12315 /************************************************************************
12317 * I/O front end functions to the parser *
12319 ************************************************************************/
12322 * xmlCreatePushParserCtxt:
12323 * @sax: a SAX handler
12324 * @user_data: The user data returned on SAX callbacks
12325 * @chunk: a pointer to an array of chars
12326 * @size: number of chars in the array
12327 * @filename: an optional file name or URI
12329 * Create a parser context for using the XML parser in push mode.
12330 * If @buffer and @size are non-NULL, the data is used to detect
12331 * the encoding. The remaining characters will be parsed so they
12332 * don't need to be fed in again through xmlParseChunk.
12333 * To allow content encoding detection, @size should be >= 4
12334 * The value of @filename is used for fetching external entities
12335 * and error/warning reports.
12337 * Returns the new parser context or NULL
12341 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12342 const char *chunk, int size, const char *filename) {
12343 xmlParserCtxtPtr ctxt;
12344 xmlParserInputPtr inputStream;
12345 xmlParserInputBufferPtr buf;
12346 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12349 * plug some encoding conversion routines
12351 if ((chunk != NULL) && (size >= 4))
12352 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12354 buf = xmlAllocParserInputBuffer(enc);
12355 if (buf == NULL) return(NULL);
12357 ctxt = xmlNewParserCtxt();
12358 if (ctxt == NULL) {
12359 xmlErrMemory(NULL, "creating parser: out of memory\n");
12360 xmlFreeParserInputBuffer(buf);
12363 ctxt->dictNames = 1;
12364 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12365 if (ctxt->pushTab == NULL) {
12366 xmlErrMemory(ctxt, NULL);
12367 xmlFreeParserInputBuffer(buf);
12368 xmlFreeParserCtxt(ctxt);
12372 #ifdef LIBXML_SAX1_ENABLED
12373 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12374 #endif /* LIBXML_SAX1_ENABLED */
12375 xmlFree(ctxt->sax);
12376 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12377 if (ctxt->sax == NULL) {
12378 xmlErrMemory(ctxt, NULL);
12379 xmlFreeParserInputBuffer(buf);
12380 xmlFreeParserCtxt(ctxt);
12383 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12384 if (sax->initialized == XML_SAX2_MAGIC)
12385 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12387 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12388 if (user_data != NULL)
12389 ctxt->userData = user_data;
12391 if (filename == NULL) {
12392 ctxt->directory = NULL;
12394 ctxt->directory = xmlParserGetDirectory(filename);
12397 inputStream = xmlNewInputStream(ctxt);
12398 if (inputStream == NULL) {
12399 xmlFreeParserCtxt(ctxt);
12400 xmlFreeParserInputBuffer(buf);
12404 if (filename == NULL)
12405 inputStream->filename = NULL;
12407 inputStream->filename = (char *)
12408 xmlCanonicPath((const xmlChar *) filename);
12409 if (inputStream->filename == NULL) {
12410 xmlFreeParserCtxt(ctxt);
12411 xmlFreeParserInputBuffer(buf);
12415 inputStream->buf = buf;
12416 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12417 inputPush(ctxt, inputStream);
12420 * If the caller didn't provide an initial 'chunk' for determining
12421 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12422 * that it can be automatically determined later
12424 if ((size == 0) || (chunk == NULL)) {
12425 ctxt->charset = XML_CHAR_ENCODING_NONE;
12426 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12427 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12428 size_t cur = ctxt->input->cur - ctxt->input->base;
12430 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12432 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12434 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12438 if (enc != XML_CHAR_ENCODING_NONE) {
12439 xmlSwitchEncoding(ctxt, enc);
12444 #endif /* LIBXML_PUSH_ENABLED */
12448 * @ctxt: an XML parser context
12450 * Blocks further parser processing don't override error
12454 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12457 ctxt->instate = XML_PARSER_EOF;
12458 ctxt->disableSAX = 1;
12459 while (ctxt->inputNr > 1)
12460 xmlFreeInputStream(inputPop(ctxt));
12461 if (ctxt->input != NULL) {
12463 * in case there was a specific allocation deallocate before
12466 if (ctxt->input->free != NULL) {
12467 ctxt->input->free((xmlChar *) ctxt->input->base);
12468 ctxt->input->free = NULL;
12470 ctxt->input->cur = BAD_CAST"";
12471 ctxt->input->base = ctxt->input->cur;
12472 ctxt->input->end = ctxt->input->cur;
12478 * @ctxt: an XML parser context
12480 * Blocks further parser processing
12483 xmlStopParser(xmlParserCtxtPtr ctxt) {
12486 xmlHaltParser(ctxt);
12487 ctxt->errNo = XML_ERR_USER_STOP;
12491 * xmlCreateIOParserCtxt:
12492 * @sax: a SAX handler
12493 * @user_data: The user data returned on SAX callbacks
12494 * @ioread: an I/O read function
12495 * @ioclose: an I/O close function
12496 * @ioctx: an I/O handler
12497 * @enc: the charset encoding if known
12499 * Create a parser context for using the XML parser with an existing
12502 * Returns the new parser context or NULL
12505 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12506 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12507 void *ioctx, xmlCharEncoding enc) {
12508 xmlParserCtxtPtr ctxt;
12509 xmlParserInputPtr inputStream;
12510 xmlParserInputBufferPtr buf;
12512 if (ioread == NULL) return(NULL);
12514 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12516 if (ioclose != NULL)
12521 ctxt = xmlNewParserCtxt();
12522 if (ctxt == NULL) {
12523 xmlFreeParserInputBuffer(buf);
12527 #ifdef LIBXML_SAX1_ENABLED
12528 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12529 #endif /* LIBXML_SAX1_ENABLED */
12530 xmlFree(ctxt->sax);
12531 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12532 if (ctxt->sax == NULL) {
12533 xmlErrMemory(ctxt, NULL);
12534 xmlFreeParserCtxt(ctxt);
12537 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12538 if (sax->initialized == XML_SAX2_MAGIC)
12539 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12541 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12542 if (user_data != NULL)
12543 ctxt->userData = user_data;
12546 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12547 if (inputStream == NULL) {
12548 xmlFreeParserCtxt(ctxt);
12551 inputPush(ctxt, inputStream);
12556 #ifdef LIBXML_VALID_ENABLED
12557 /************************************************************************
12559 * Front ends when parsing a DTD *
12561 ************************************************************************/
12565 * @sax: the SAX handler block or NULL
12566 * @input: an Input Buffer
12567 * @enc: the charset encoding if known
12569 * Load and parse a DTD
12571 * Returns the resulting xmlDtdPtr or NULL in case of error.
12572 * @input will be freed by the function in any case.
12576 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12577 xmlCharEncoding enc) {
12578 xmlDtdPtr ret = NULL;
12579 xmlParserCtxtPtr ctxt;
12580 xmlParserInputPtr pinput = NULL;
12586 ctxt = xmlNewParserCtxt();
12587 if (ctxt == NULL) {
12588 xmlFreeParserInputBuffer(input);
12592 /* We are loading a DTD */
12593 ctxt->options |= XML_PARSE_DTDLOAD;
12596 * Set-up the SAX context
12599 if (ctxt->sax != NULL)
12600 xmlFree(ctxt->sax);
12602 ctxt->userData = ctxt;
12604 xmlDetectSAX2(ctxt);
12607 * generate a parser input from the I/O handler
12610 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12611 if (pinput == NULL) {
12612 if (sax != NULL) ctxt->sax = NULL;
12613 xmlFreeParserInputBuffer(input);
12614 xmlFreeParserCtxt(ctxt);
12619 * plug some encoding conversion routines here.
12621 if (xmlPushInput(ctxt, pinput) < 0) {
12622 if (sax != NULL) ctxt->sax = NULL;
12623 xmlFreeParserCtxt(ctxt);
12626 if (enc != XML_CHAR_ENCODING_NONE) {
12627 xmlSwitchEncoding(ctxt, enc);
12630 pinput->filename = NULL;
12633 pinput->base = ctxt->input->cur;
12634 pinput->cur = ctxt->input->cur;
12635 pinput->free = NULL;
12638 * let's parse that entity knowing it's an external subset.
12640 ctxt->inSubset = 2;
12641 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12642 if (ctxt->myDoc == NULL) {
12643 xmlErrMemory(ctxt, "New Doc failed");
12646 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12647 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12648 BAD_CAST "none", BAD_CAST "none");
12650 if ((enc == XML_CHAR_ENCODING_NONE) &&
12651 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12653 * Get the 4 first bytes and decode the charset
12654 * if enc != XML_CHAR_ENCODING_NONE
12655 * plug some encoding conversion routines.
12661 enc = xmlDetectCharEncoding(start, 4);
12662 if (enc != XML_CHAR_ENCODING_NONE) {
12663 xmlSwitchEncoding(ctxt, enc);
12667 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12669 if (ctxt->myDoc != NULL) {
12670 if (ctxt->wellFormed) {
12671 ret = ctxt->myDoc->extSubset;
12672 ctxt->myDoc->extSubset = NULL;
12677 tmp = ret->children;
12678 while (tmp != NULL) {
12686 xmlFreeDoc(ctxt->myDoc);
12687 ctxt->myDoc = NULL;
12689 if (sax != NULL) ctxt->sax = NULL;
12690 xmlFreeParserCtxt(ctxt);
12697 * @sax: the SAX handler block
12698 * @ExternalID: a NAME* containing the External ID of the DTD
12699 * @SystemID: a NAME* containing the URL to the DTD
12701 * Load and parse an external subset.
12703 * Returns the resulting xmlDtdPtr or NULL in case of error.
12707 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12708 const xmlChar *SystemID) {
12709 xmlDtdPtr ret = NULL;
12710 xmlParserCtxtPtr ctxt;
12711 xmlParserInputPtr input = NULL;
12712 xmlCharEncoding enc;
12713 xmlChar* systemIdCanonic;
12715 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12717 ctxt = xmlNewParserCtxt();
12718 if (ctxt == NULL) {
12722 /* We are loading a DTD */
12723 ctxt->options |= XML_PARSE_DTDLOAD;
12726 * Set-up the SAX context
12729 if (ctxt->sax != NULL)
12730 xmlFree(ctxt->sax);
12732 ctxt->userData = ctxt;
12736 * Canonicalise the system ID
12738 systemIdCanonic = xmlCanonicPath(SystemID);
12739 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12740 xmlFreeParserCtxt(ctxt);
12745 * Ask the Entity resolver to load the damn thing
12748 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12749 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12751 if (input == NULL) {
12752 if (sax != NULL) ctxt->sax = NULL;
12753 xmlFreeParserCtxt(ctxt);
12754 if (systemIdCanonic != NULL)
12755 xmlFree(systemIdCanonic);
12760 * plug some encoding conversion routines here.
12762 if (xmlPushInput(ctxt, input) < 0) {
12763 if (sax != NULL) ctxt->sax = NULL;
12764 xmlFreeParserCtxt(ctxt);
12765 if (systemIdCanonic != NULL)
12766 xmlFree(systemIdCanonic);
12769 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12770 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12771 xmlSwitchEncoding(ctxt, enc);
12774 if (input->filename == NULL)
12775 input->filename = (char *) systemIdCanonic;
12777 xmlFree(systemIdCanonic);
12780 input->base = ctxt->input->cur;
12781 input->cur = ctxt->input->cur;
12782 input->free = NULL;
12785 * let's parse that entity knowing it's an external subset.
12787 ctxt->inSubset = 2;
12788 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12789 if (ctxt->myDoc == NULL) {
12790 xmlErrMemory(ctxt, "New Doc failed");
12791 if (sax != NULL) ctxt->sax = NULL;
12792 xmlFreeParserCtxt(ctxt);
12795 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12796 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12797 ExternalID, SystemID);
12798 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12800 if (ctxt->myDoc != NULL) {
12801 if (ctxt->wellFormed) {
12802 ret = ctxt->myDoc->extSubset;
12803 ctxt->myDoc->extSubset = NULL;
12808 tmp = ret->children;
12809 while (tmp != NULL) {
12817 xmlFreeDoc(ctxt->myDoc);
12818 ctxt->myDoc = NULL;
12820 if (sax != NULL) ctxt->sax = NULL;
12821 xmlFreeParserCtxt(ctxt);
12829 * @ExternalID: a NAME* containing the External ID of the DTD
12830 * @SystemID: a NAME* containing the URL to the DTD
12832 * Load and parse an external subset.
12834 * Returns the resulting xmlDtdPtr or NULL in case of error.
12838 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12839 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12841 #endif /* LIBXML_VALID_ENABLED */
12843 /************************************************************************
12845 * Front ends when parsing an Entity *
12847 ************************************************************************/
12850 * xmlParseCtxtExternalEntity:
12851 * @ctx: the existing parsing context
12852 * @URL: the URL for the entity to load
12853 * @ID: the System ID for the entity to load
12854 * @lst: the return value for the set of parsed nodes
12856 * Parse an external general entity within an existing parsing context
12857 * An external general parsed entity is well-formed if it matches the
12858 * production labeled extParsedEnt.
12860 * [78] extParsedEnt ::= TextDecl? content
12862 * Returns 0 if the entity is well formed, -1 in case of args problem and
12863 * the parser error code otherwise
12867 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12868 const xmlChar *ID, xmlNodePtr *lst) {
12869 xmlParserCtxtPtr ctxt;
12871 xmlNodePtr newRoot;
12872 xmlSAXHandlerPtr oldsax = NULL;
12875 xmlCharEncoding enc;
12877 if (ctx == NULL) return(-1);
12879 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12880 (ctx->depth > 1024)) {
12881 return(XML_ERR_ENTITY_LOOP);
12886 if ((URL == NULL) && (ID == NULL))
12888 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12891 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12892 if (ctxt == NULL) {
12896 oldsax = ctxt->sax;
12897 ctxt->sax = ctx->sax;
12898 xmlDetectSAX2(ctxt);
12899 newDoc = xmlNewDoc(BAD_CAST "1.0");
12900 if (newDoc == NULL) {
12901 xmlFreeParserCtxt(ctxt);
12904 newDoc->properties = XML_DOC_INTERNAL;
12905 if (ctx->myDoc->dict) {
12906 newDoc->dict = ctx->myDoc->dict;
12907 xmlDictReference(newDoc->dict);
12909 if (ctx->myDoc != NULL) {
12910 newDoc->intSubset = ctx->myDoc->intSubset;
12911 newDoc->extSubset = ctx->myDoc->extSubset;
12913 if (ctx->myDoc->URL != NULL) {
12914 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12916 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12917 if (newRoot == NULL) {
12918 ctxt->sax = oldsax;
12919 xmlFreeParserCtxt(ctxt);
12920 newDoc->intSubset = NULL;
12921 newDoc->extSubset = NULL;
12922 xmlFreeDoc(newDoc);
12925 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12926 nodePush(ctxt, newDoc->children);
12927 if (ctx->myDoc == NULL) {
12928 ctxt->myDoc = newDoc;
12930 ctxt->myDoc = ctx->myDoc;
12931 newDoc->children->doc = ctx->myDoc;
12935 * Get the 4 first bytes and decode the charset
12936 * if enc != XML_CHAR_ENCODING_NONE
12937 * plug some encoding conversion routines.
12940 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12945 enc = xmlDetectCharEncoding(start, 4);
12946 if (enc != XML_CHAR_ENCODING_NONE) {
12947 xmlSwitchEncoding(ctxt, enc);
12952 * Parse a possible text declaration first
12954 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12955 xmlParseTextDecl(ctxt);
12957 * An XML-1.0 document can't reference an entity not XML-1.0
12959 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12960 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12961 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12962 "Version mismatch between document and entity\n");
12967 * If the user provided its own SAX callbacks then reuse the
12968 * useData callback field, otherwise the expected setup in a
12969 * DOM builder is to have userData == ctxt
12971 if (ctx->userData == ctx)
12972 ctxt->userData = ctxt;
12974 ctxt->userData = ctx->userData;
12977 * Doing validity checking on chunk doesn't make sense
12979 ctxt->instate = XML_PARSER_CONTENT;
12980 ctxt->validate = ctx->validate;
12981 ctxt->valid = ctx->valid;
12982 ctxt->loadsubset = ctx->loadsubset;
12983 ctxt->depth = ctx->depth + 1;
12984 ctxt->replaceEntities = ctx->replaceEntities;
12985 if (ctxt->validate) {
12986 ctxt->vctxt.error = ctx->vctxt.error;
12987 ctxt->vctxt.warning = ctx->vctxt.warning;
12989 ctxt->vctxt.error = NULL;
12990 ctxt->vctxt.warning = NULL;
12992 ctxt->vctxt.nodeTab = NULL;
12993 ctxt->vctxt.nodeNr = 0;
12994 ctxt->vctxt.nodeMax = 0;
12995 ctxt->vctxt.node = NULL;
12996 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12997 ctxt->dict = ctx->dict;
12998 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12999 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13000 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13001 ctxt->dictNames = ctx->dictNames;
13002 ctxt->attsDefault = ctx->attsDefault;
13003 ctxt->attsSpecial = ctx->attsSpecial;
13004 ctxt->linenumbers = ctx->linenumbers;
13006 xmlParseContent(ctxt);
13008 ctx->validate = ctxt->validate;
13009 ctx->valid = ctxt->valid;
13010 if ((RAW == '<') && (NXT(1) == '/')) {
13011 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13012 } else if (RAW != 0) {
13013 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13015 if (ctxt->node != newDoc->children) {
13016 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13019 if (!ctxt->wellFormed) {
13020 if (ctxt->errNo == 0)
13029 * Return the newly created nodeset after unlinking it from
13030 * they pseudo parent.
13032 cur = newDoc->children->children;
13034 while (cur != NULL) {
13035 cur->parent = NULL;
13038 newDoc->children->children = NULL;
13042 ctxt->sax = oldsax;
13044 ctxt->attsDefault = NULL;
13045 ctxt->attsSpecial = NULL;
13046 xmlFreeParserCtxt(ctxt);
13047 newDoc->intSubset = NULL;
13048 newDoc->extSubset = NULL;
13049 xmlFreeDoc(newDoc);
13055 * xmlParseExternalEntityPrivate:
13056 * @doc: the document the chunk pertains to
13057 * @oldctxt: the previous parser context if available
13058 * @sax: the SAX handler bloc (possibly NULL)
13059 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13060 * @depth: Used for loop detection, use 0
13061 * @URL: the URL for the entity to load
13062 * @ID: the System ID for the entity to load
13063 * @list: the return value for the set of parsed nodes
13065 * Private version of xmlParseExternalEntity()
13067 * Returns 0 if the entity is well formed, -1 in case of args problem and
13068 * the parser error code otherwise
13071 static xmlParserErrors
13072 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13073 xmlSAXHandlerPtr sax,
13074 void *user_data, int depth, const xmlChar *URL,
13075 const xmlChar *ID, xmlNodePtr *list) {
13076 xmlParserCtxtPtr ctxt;
13078 xmlNodePtr newRoot;
13079 xmlSAXHandlerPtr oldsax = NULL;
13080 xmlParserErrors ret = XML_ERR_OK;
13082 xmlCharEncoding enc;
13084 if (((depth > 40) &&
13085 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13087 return(XML_ERR_ENTITY_LOOP);
13092 if ((URL == NULL) && (ID == NULL))
13093 return(XML_ERR_INTERNAL_ERROR);
13095 return(XML_ERR_INTERNAL_ERROR);
13098 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13099 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13100 ctxt->userData = ctxt;
13101 if (oldctxt != NULL) {
13102 ctxt->_private = oldctxt->_private;
13103 ctxt->loadsubset = oldctxt->loadsubset;
13104 ctxt->validate = oldctxt->validate;
13105 ctxt->external = oldctxt->external;
13106 ctxt->record_info = oldctxt->record_info;
13107 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13108 ctxt->node_seq.length = oldctxt->node_seq.length;
13109 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13112 * Doing validity checking on chunk without context
13113 * doesn't make sense
13115 ctxt->_private = NULL;
13116 ctxt->validate = 0;
13117 ctxt->external = 2;
13118 ctxt->loadsubset = 0;
13121 oldsax = ctxt->sax;
13123 if (user_data != NULL)
13124 ctxt->userData = user_data;
13126 xmlDetectSAX2(ctxt);
13127 newDoc = xmlNewDoc(BAD_CAST "1.0");
13128 if (newDoc == NULL) {
13129 ctxt->node_seq.maximum = 0;
13130 ctxt->node_seq.length = 0;
13131 ctxt->node_seq.buffer = NULL;
13132 xmlFreeParserCtxt(ctxt);
13133 return(XML_ERR_INTERNAL_ERROR);
13135 newDoc->properties = XML_DOC_INTERNAL;
13136 newDoc->intSubset = doc->intSubset;
13137 newDoc->extSubset = doc->extSubset;
13138 newDoc->dict = doc->dict;
13139 xmlDictReference(newDoc->dict);
13141 if (doc->URL != NULL) {
13142 newDoc->URL = xmlStrdup(doc->URL);
13144 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13145 if (newRoot == NULL) {
13147 ctxt->sax = oldsax;
13148 ctxt->node_seq.maximum = 0;
13149 ctxt->node_seq.length = 0;
13150 ctxt->node_seq.buffer = NULL;
13151 xmlFreeParserCtxt(ctxt);
13152 newDoc->intSubset = NULL;
13153 newDoc->extSubset = NULL;
13154 xmlFreeDoc(newDoc);
13155 return(XML_ERR_INTERNAL_ERROR);
13157 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13158 nodePush(ctxt, newDoc->children);
13160 newRoot->doc = doc;
13163 * Get the 4 first bytes and decode the charset
13164 * if enc != XML_CHAR_ENCODING_NONE
13165 * plug some encoding conversion routines.
13168 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13173 enc = xmlDetectCharEncoding(start, 4);
13174 if (enc != XML_CHAR_ENCODING_NONE) {
13175 xmlSwitchEncoding(ctxt, enc);
13180 * Parse a possible text declaration first
13182 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13183 xmlParseTextDecl(ctxt);
13186 ctxt->instate = XML_PARSER_CONTENT;
13187 ctxt->depth = depth;
13189 xmlParseContent(ctxt);
13191 if ((RAW == '<') && (NXT(1) == '/')) {
13192 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13193 } else if (RAW != 0) {
13194 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13196 if (ctxt->node != newDoc->children) {
13197 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13200 if (!ctxt->wellFormed) {
13201 if (ctxt->errNo == 0)
13202 ret = XML_ERR_INTERNAL_ERROR;
13204 ret = (xmlParserErrors)ctxt->errNo;
13206 if (list != NULL) {
13210 * Return the newly created nodeset after unlinking it from
13211 * they pseudo parent.
13213 cur = newDoc->children->children;
13215 while (cur != NULL) {
13216 cur->parent = NULL;
13219 newDoc->children->children = NULL;
13225 * Record in the parent context the number of entities replacement
13226 * done when parsing that reference.
13228 if (oldctxt != NULL)
13229 oldctxt->nbentities += ctxt->nbentities;
13232 * Also record the size of the entity parsed
13234 if (ctxt->input != NULL && oldctxt != NULL) {
13235 oldctxt->sizeentities += ctxt->input->consumed;
13236 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13239 * And record the last error if any
13241 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13242 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13245 ctxt->sax = oldsax;
13246 if (oldctxt != NULL) {
13247 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13248 oldctxt->node_seq.length = ctxt->node_seq.length;
13249 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13251 ctxt->node_seq.maximum = 0;
13252 ctxt->node_seq.length = 0;
13253 ctxt->node_seq.buffer = NULL;
13254 xmlFreeParserCtxt(ctxt);
13255 newDoc->intSubset = NULL;
13256 newDoc->extSubset = NULL;
13257 xmlFreeDoc(newDoc);
13262 #ifdef LIBXML_SAX1_ENABLED
13264 * xmlParseExternalEntity:
13265 * @doc: the document the chunk pertains to
13266 * @sax: the SAX handler bloc (possibly NULL)
13267 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13268 * @depth: Used for loop detection, use 0
13269 * @URL: the URL for the entity to load
13270 * @ID: the System ID for the entity to load
13271 * @lst: the return value for the set of parsed nodes
13273 * Parse an external general entity
13274 * An external general parsed entity is well-formed if it matches the
13275 * production labeled extParsedEnt.
13277 * [78] extParsedEnt ::= TextDecl? content
13279 * Returns 0 if the entity is well formed, -1 in case of args problem and
13280 * the parser error code otherwise
13284 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13285 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13286 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13291 * xmlParseBalancedChunkMemory:
13292 * @doc: the document the chunk pertains to
13293 * @sax: the SAX handler bloc (possibly NULL)
13294 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13295 * @depth: Used for loop detection, use 0
13296 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13297 * @lst: the return value for the set of parsed nodes
13299 * Parse a well-balanced chunk of an XML document
13300 * called by the parser
13301 * The allowed sequence for the Well Balanced Chunk is the one defined by
13302 * the content production in the XML grammar:
13304 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13306 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13307 * the parser error code otherwise
13311 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13312 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13313 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13314 depth, string, lst, 0 );
13316 #endif /* LIBXML_SAX1_ENABLED */
13319 * xmlParseBalancedChunkMemoryInternal:
13320 * @oldctxt: the existing parsing context
13321 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13322 * @user_data: the user data field for the parser context
13323 * @lst: the return value for the set of parsed nodes
13326 * Parse a well-balanced chunk of an XML document
13327 * called by the parser
13328 * The allowed sequence for the Well Balanced Chunk is the one defined by
13329 * the content production in the XML grammar:
13331 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13333 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13334 * error code otherwise
13336 * In case recover is set to 1, the nodelist will not be empty even if
13337 * the parsed chunk is not well balanced.
13339 static xmlParserErrors
13340 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13341 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13342 xmlParserCtxtPtr ctxt;
13343 xmlDocPtr newDoc = NULL;
13344 xmlNodePtr newRoot;
13345 xmlSAXHandlerPtr oldsax = NULL;
13346 xmlNodePtr content = NULL;
13347 xmlNodePtr last = NULL;
13349 xmlParserErrors ret = XML_ERR_OK;
13354 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13355 (oldctxt->depth > 1024)) {
13356 return(XML_ERR_ENTITY_LOOP);
13362 if (string == NULL)
13363 return(XML_ERR_INTERNAL_ERROR);
13365 size = xmlStrlen(string);
13367 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13368 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13369 if (user_data != NULL)
13370 ctxt->userData = user_data;
13372 ctxt->userData = ctxt;
13373 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13374 ctxt->dict = oldctxt->dict;
13375 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13376 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13377 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13380 /* propagate namespaces down the entity */
13381 for (i = 0;i < oldctxt->nsNr;i += 2) {
13382 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13386 oldsax = ctxt->sax;
13387 ctxt->sax = oldctxt->sax;
13388 xmlDetectSAX2(ctxt);
13389 ctxt->replaceEntities = oldctxt->replaceEntities;
13390 ctxt->options = oldctxt->options;
13392 ctxt->_private = oldctxt->_private;
13393 if (oldctxt->myDoc == NULL) {
13394 newDoc = xmlNewDoc(BAD_CAST "1.0");
13395 if (newDoc == NULL) {
13396 ctxt->sax = oldsax;
13398 xmlFreeParserCtxt(ctxt);
13399 return(XML_ERR_INTERNAL_ERROR);
13401 newDoc->properties = XML_DOC_INTERNAL;
13402 newDoc->dict = ctxt->dict;
13403 xmlDictReference(newDoc->dict);
13404 ctxt->myDoc = newDoc;
13406 ctxt->myDoc = oldctxt->myDoc;
13407 content = ctxt->myDoc->children;
13408 last = ctxt->myDoc->last;
13410 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13411 if (newRoot == NULL) {
13412 ctxt->sax = oldsax;
13414 xmlFreeParserCtxt(ctxt);
13415 if (newDoc != NULL) {
13416 xmlFreeDoc(newDoc);
13418 return(XML_ERR_INTERNAL_ERROR);
13420 ctxt->myDoc->children = NULL;
13421 ctxt->myDoc->last = NULL;
13422 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13423 nodePush(ctxt, ctxt->myDoc->children);
13424 ctxt->instate = XML_PARSER_CONTENT;
13425 ctxt->depth = oldctxt->depth + 1;
13427 ctxt->validate = 0;
13428 ctxt->loadsubset = oldctxt->loadsubset;
13429 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13431 * ID/IDREF registration will be done in xmlValidateElement below
13433 ctxt->loadsubset |= XML_SKIP_IDS;
13435 ctxt->dictNames = oldctxt->dictNames;
13436 ctxt->attsDefault = oldctxt->attsDefault;
13437 ctxt->attsSpecial = oldctxt->attsSpecial;
13439 xmlParseContent(ctxt);
13440 if ((RAW == '<') && (NXT(1) == '/')) {
13441 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13442 } else if (RAW != 0) {
13443 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13445 if (ctxt->node != ctxt->myDoc->children) {
13446 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13449 if (!ctxt->wellFormed) {
13450 if (ctxt->errNo == 0)
13451 ret = XML_ERR_INTERNAL_ERROR;
13453 ret = (xmlParserErrors)ctxt->errNo;
13458 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13462 * Return the newly created nodeset after unlinking it from
13463 * they pseudo parent.
13465 cur = ctxt->myDoc->children->children;
13467 while (cur != NULL) {
13468 #ifdef LIBXML_VALID_ENABLED
13469 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13470 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13471 (cur->type == XML_ELEMENT_NODE)) {
13472 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13473 oldctxt->myDoc, cur);
13475 #endif /* LIBXML_VALID_ENABLED */
13476 cur->parent = NULL;
13479 ctxt->myDoc->children->children = NULL;
13481 if (ctxt->myDoc != NULL) {
13482 xmlFreeNode(ctxt->myDoc->children);
13483 ctxt->myDoc->children = content;
13484 ctxt->myDoc->last = last;
13488 * Record in the parent context the number of entities replacement
13489 * done when parsing that reference.
13491 if (oldctxt != NULL)
13492 oldctxt->nbentities += ctxt->nbentities;
13495 * Also record the last error if any
13497 if (ctxt->lastError.code != XML_ERR_OK)
13498 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13500 ctxt->sax = oldsax;
13502 ctxt->attsDefault = NULL;
13503 ctxt->attsSpecial = NULL;
13504 xmlFreeParserCtxt(ctxt);
13505 if (newDoc != NULL) {
13506 xmlFreeDoc(newDoc);
13513 * xmlParseInNodeContext:
13514 * @node: the context node
13515 * @data: the input string
13516 * @datalen: the input string length in bytes
13517 * @options: a combination of xmlParserOption
13518 * @lst: the return value for the set of parsed nodes
13520 * Parse a well-balanced chunk of an XML document
13521 * within the context (DTD, namespaces, etc ...) of the given node.
13523 * The allowed sequence for the data is a Well Balanced Chunk defined by
13524 * the content production in the XML grammar:
13526 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13528 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13529 * error code otherwise
13532 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13533 int options, xmlNodePtr *lst) {
13535 xmlParserCtxtPtr ctxt;
13536 xmlDocPtr doc = NULL;
13537 xmlNodePtr fake, cur;
13540 xmlParserErrors ret = XML_ERR_OK;
13543 * check all input parameters, grab the document
13545 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13546 return(XML_ERR_INTERNAL_ERROR);
13547 switch (node->type) {
13548 case XML_ELEMENT_NODE:
13549 case XML_ATTRIBUTE_NODE:
13550 case XML_TEXT_NODE:
13551 case XML_CDATA_SECTION_NODE:
13552 case XML_ENTITY_REF_NODE:
13554 case XML_COMMENT_NODE:
13555 case XML_DOCUMENT_NODE:
13556 case XML_HTML_DOCUMENT_NODE:
13559 return(XML_ERR_INTERNAL_ERROR);
13562 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13563 (node->type != XML_DOCUMENT_NODE) &&
13564 (node->type != XML_HTML_DOCUMENT_NODE))
13565 node = node->parent;
13567 return(XML_ERR_INTERNAL_ERROR);
13568 if (node->type == XML_ELEMENT_NODE)
13571 doc = (xmlDocPtr) node;
13573 return(XML_ERR_INTERNAL_ERROR);
13576 * allocate a context and set-up everything not related to the
13577 * node position in the tree
13579 if (doc->type == XML_DOCUMENT_NODE)
13580 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13581 #ifdef LIBXML_HTML_ENABLED
13582 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13583 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13585 * When parsing in context, it makes no sense to add implied
13586 * elements like html/body/etc...
13588 options |= HTML_PARSE_NOIMPLIED;
13592 return(XML_ERR_INTERNAL_ERROR);
13595 return(XML_ERR_NO_MEMORY);
13598 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13599 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13600 * we must wait until the last moment to free the original one.
13602 if (doc->dict != NULL) {
13603 if (ctxt->dict != NULL)
13604 xmlDictFree(ctxt->dict);
13605 ctxt->dict = doc->dict;
13607 options |= XML_PARSE_NODICT;
13609 if (doc->encoding != NULL) {
13610 xmlCharEncodingHandlerPtr hdlr;
13612 if (ctxt->encoding != NULL)
13613 xmlFree((xmlChar *) ctxt->encoding);
13614 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13616 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13617 if (hdlr != NULL) {
13618 xmlSwitchToEncoding(ctxt, hdlr);
13620 return(XML_ERR_UNSUPPORTED_ENCODING);
13624 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13625 xmlDetectSAX2(ctxt);
13627 /* parsing in context, i.e. as within existing content */
13628 ctxt->instate = XML_PARSER_CONTENT;
13630 fake = xmlNewComment(NULL);
13631 if (fake == NULL) {
13632 xmlFreeParserCtxt(ctxt);
13633 return(XML_ERR_NO_MEMORY);
13635 xmlAddChild(node, fake);
13637 if (node->type == XML_ELEMENT_NODE) {
13638 nodePush(ctxt, node);
13640 * initialize the SAX2 namespaces stack
13643 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13644 xmlNsPtr ns = cur->nsDef;
13645 const xmlChar *iprefix, *ihref;
13647 while (ns != NULL) {
13649 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13650 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13652 iprefix = ns->prefix;
13656 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13657 nsPush(ctxt, iprefix, ihref);
13666 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13668 * ID/IDREF registration will be done in xmlValidateElement below
13670 ctxt->loadsubset |= XML_SKIP_IDS;
13673 #ifdef LIBXML_HTML_ENABLED
13674 if (doc->type == XML_HTML_DOCUMENT_NODE)
13675 __htmlParseContent(ctxt);
13678 xmlParseContent(ctxt);
13681 if ((RAW == '<') && (NXT(1) == '/')) {
13682 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13683 } else if (RAW != 0) {
13684 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13686 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13687 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13688 ctxt->wellFormed = 0;
13691 if (!ctxt->wellFormed) {
13692 if (ctxt->errNo == 0)
13693 ret = XML_ERR_INTERNAL_ERROR;
13695 ret = (xmlParserErrors)ctxt->errNo;
13701 * Return the newly created nodeset after unlinking it from
13702 * the pseudo sibling.
13715 while (cur != NULL) {
13716 cur->parent = NULL;
13720 xmlUnlinkNode(fake);
13724 if (ret != XML_ERR_OK) {
13725 xmlFreeNodeList(*lst);
13729 if (doc->dict != NULL)
13731 xmlFreeParserCtxt(ctxt);
13735 return(XML_ERR_INTERNAL_ERROR);
13739 #ifdef LIBXML_SAX1_ENABLED
13741 * xmlParseBalancedChunkMemoryRecover:
13742 * @doc: the document the chunk pertains to
13743 * @sax: the SAX handler bloc (possibly NULL)
13744 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13745 * @depth: Used for loop detection, use 0
13746 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13747 * @lst: the return value for the set of parsed nodes
13748 * @recover: return nodes even if the data is broken (use 0)
13751 * Parse a well-balanced chunk of an XML document
13752 * called by the parser
13753 * The allowed sequence for the Well Balanced Chunk is the one defined by
13754 * the content production in the XML grammar:
13756 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13758 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13759 * the parser error code otherwise
13761 * In case recover is set to 1, the nodelist will not be empty even if
13762 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13766 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13767 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13769 xmlParserCtxtPtr ctxt;
13771 xmlSAXHandlerPtr oldsax = NULL;
13772 xmlNodePtr content, newRoot;
13777 return(XML_ERR_ENTITY_LOOP);
13783 if (string == NULL)
13786 size = xmlStrlen(string);
13788 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13789 if (ctxt == NULL) return(-1);
13790 ctxt->userData = ctxt;
13792 oldsax = ctxt->sax;
13794 if (user_data != NULL)
13795 ctxt->userData = user_data;
13797 newDoc = xmlNewDoc(BAD_CAST "1.0");
13798 if (newDoc == NULL) {
13799 xmlFreeParserCtxt(ctxt);
13802 newDoc->properties = XML_DOC_INTERNAL;
13803 if ((doc != NULL) && (doc->dict != NULL)) {
13804 xmlDictFree(ctxt->dict);
13805 ctxt->dict = doc->dict;
13806 xmlDictReference(ctxt->dict);
13807 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13808 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13809 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13810 ctxt->dictNames = 1;
13812 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13815 newDoc->intSubset = doc->intSubset;
13816 newDoc->extSubset = doc->extSubset;
13818 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13819 if (newRoot == NULL) {
13821 ctxt->sax = oldsax;
13822 xmlFreeParserCtxt(ctxt);
13823 newDoc->intSubset = NULL;
13824 newDoc->extSubset = NULL;
13825 xmlFreeDoc(newDoc);
13828 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13829 nodePush(ctxt, newRoot);
13831 ctxt->myDoc = newDoc;
13833 ctxt->myDoc = newDoc;
13834 newDoc->children->doc = doc;
13835 /* Ensure that doc has XML spec namespace */
13836 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13837 newDoc->oldNs = doc->oldNs;
13839 ctxt->instate = XML_PARSER_CONTENT;
13840 ctxt->depth = depth;
13843 * Doing validity checking on chunk doesn't make sense
13845 ctxt->validate = 0;
13846 ctxt->loadsubset = 0;
13847 xmlDetectSAX2(ctxt);
13849 if ( doc != NULL ){
13850 content = doc->children;
13851 doc->children = NULL;
13852 xmlParseContent(ctxt);
13853 doc->children = content;
13856 xmlParseContent(ctxt);
13858 if ((RAW == '<') && (NXT(1) == '/')) {
13859 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13860 } else if (RAW != 0) {
13861 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13863 if (ctxt->node != newDoc->children) {
13864 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13867 if (!ctxt->wellFormed) {
13868 if (ctxt->errNo == 0)
13876 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13880 * Return the newly created nodeset after unlinking it from
13881 * they pseudo parent.
13883 cur = newDoc->children->children;
13885 while (cur != NULL) {
13886 xmlSetTreeDoc(cur, doc);
13887 cur->parent = NULL;
13890 newDoc->children->children = NULL;
13894 ctxt->sax = oldsax;
13895 xmlFreeParserCtxt(ctxt);
13896 newDoc->intSubset = NULL;
13897 newDoc->extSubset = NULL;
13898 newDoc->oldNs = NULL;
13899 xmlFreeDoc(newDoc);
13905 * xmlSAXParseEntity:
13906 * @sax: the SAX handler block
13907 * @filename: the filename
13909 * parse an XML external entity out of context and build a tree.
13910 * It use the given SAX function block to handle the parsing callback.
13911 * If sax is NULL, fallback to the default DOM tree building routines.
13913 * [78] extParsedEnt ::= TextDecl? content
13915 * This correspond to a "Well Balanced" chunk
13917 * Returns the resulting document tree
13921 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13923 xmlParserCtxtPtr ctxt;
13925 ctxt = xmlCreateFileParserCtxt(filename);
13926 if (ctxt == NULL) {
13930 if (ctxt->sax != NULL)
13931 xmlFree(ctxt->sax);
13933 ctxt->userData = NULL;
13936 xmlParseExtParsedEnt(ctxt);
13938 if (ctxt->wellFormed)
13942 xmlFreeDoc(ctxt->myDoc);
13943 ctxt->myDoc = NULL;
13947 xmlFreeParserCtxt(ctxt);
13954 * @filename: the filename
13956 * parse an XML external entity out of context and build a tree.
13958 * [78] extParsedEnt ::= TextDecl? content
13960 * This correspond to a "Well Balanced" chunk
13962 * Returns the resulting document tree
13966 xmlParseEntity(const char *filename) {
13967 return(xmlSAXParseEntity(NULL, filename));
13969 #endif /* LIBXML_SAX1_ENABLED */
13972 * xmlCreateEntityParserCtxtInternal:
13973 * @URL: the entity URL
13974 * @ID: the entity PUBLIC ID
13975 * @base: a possible base for the target URI
13976 * @pctx: parser context used to set options on new context
13978 * Create a parser context for an external entity
13979 * Automatic support for ZLIB/Compress compressed document is provided
13980 * by default if found at compile-time.
13982 * Returns the new parser context or NULL
13984 static xmlParserCtxtPtr
13985 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13986 const xmlChar *base, xmlParserCtxtPtr pctx) {
13987 xmlParserCtxtPtr ctxt;
13988 xmlParserInputPtr inputStream;
13989 char *directory = NULL;
13992 ctxt = xmlNewParserCtxt();
13993 if (ctxt == NULL) {
13997 if (pctx != NULL) {
13998 ctxt->options = pctx->options;
13999 ctxt->_private = pctx->_private;
14002 uri = xmlBuildURI(URL, base);
14005 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14006 if (inputStream == NULL) {
14007 xmlFreeParserCtxt(ctxt);
14011 inputPush(ctxt, inputStream);
14013 if ((ctxt->directory == NULL) && (directory == NULL))
14014 directory = xmlParserGetDirectory((char *)URL);
14015 if ((ctxt->directory == NULL) && (directory != NULL))
14016 ctxt->directory = directory;
14018 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14019 if (inputStream == NULL) {
14021 xmlFreeParserCtxt(ctxt);
14025 inputPush(ctxt, inputStream);
14027 if ((ctxt->directory == NULL) && (directory == NULL))
14028 directory = xmlParserGetDirectory((char *)uri);
14029 if ((ctxt->directory == NULL) && (directory != NULL))
14030 ctxt->directory = directory;
14037 * xmlCreateEntityParserCtxt:
14038 * @URL: the entity URL
14039 * @ID: the entity PUBLIC ID
14040 * @base: a possible base for the target URI
14042 * Create a parser context for an external entity
14043 * Automatic support for ZLIB/Compress compressed document is provided
14044 * by default if found at compile-time.
14046 * Returns the new parser context or NULL
14049 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14050 const xmlChar *base) {
14051 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14055 /************************************************************************
14057 * Front ends when parsing from a file *
14059 ************************************************************************/
14062 * xmlCreateURLParserCtxt:
14063 * @filename: the filename or URL
14064 * @options: a combination of xmlParserOption
14066 * Create a parser context for a file or URL content.
14067 * Automatic support for ZLIB/Compress compressed document is provided
14068 * by default if found at compile-time and for file accesses
14070 * Returns the new parser context or NULL
14073 xmlCreateURLParserCtxt(const char *filename, int options)
14075 xmlParserCtxtPtr ctxt;
14076 xmlParserInputPtr inputStream;
14077 char *directory = NULL;
14079 ctxt = xmlNewParserCtxt();
14080 if (ctxt == NULL) {
14081 xmlErrMemory(NULL, "cannot allocate parser context");
14086 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14087 ctxt->linenumbers = 1;
14089 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14090 if (inputStream == NULL) {
14091 xmlFreeParserCtxt(ctxt);
14095 inputPush(ctxt, inputStream);
14096 if ((ctxt->directory == NULL) && (directory == NULL))
14097 directory = xmlParserGetDirectory(filename);
14098 if ((ctxt->directory == NULL) && (directory != NULL))
14099 ctxt->directory = directory;
14105 * xmlCreateFileParserCtxt:
14106 * @filename: the filename
14108 * Create a parser context for a file content.
14109 * Automatic support for ZLIB/Compress compressed document is provided
14110 * by default if found at compile-time.
14112 * Returns the new parser context or NULL
14115 xmlCreateFileParserCtxt(const char *filename)
14117 return(xmlCreateURLParserCtxt(filename, 0));
14120 #ifdef LIBXML_SAX1_ENABLED
14122 * xmlSAXParseFileWithData:
14123 * @sax: the SAX handler block
14124 * @filename: the filename
14125 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14127 * @data: the userdata
14129 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14130 * compressed document is provided by default if found at compile-time.
14131 * It use the given SAX function block to handle the parsing callback.
14132 * If sax is NULL, fallback to the default DOM tree building routines.
14134 * User data (void *) is stored within the parser context in the
14135 * context's _private member, so it is available nearly everywhere in libxml
14137 * Returns the resulting document tree
14141 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14142 int recovery, void *data) {
14144 xmlParserCtxtPtr ctxt;
14148 ctxt = xmlCreateFileParserCtxt(filename);
14149 if (ctxt == NULL) {
14153 if (ctxt->sax != NULL)
14154 xmlFree(ctxt->sax);
14157 xmlDetectSAX2(ctxt);
14159 ctxt->_private = data;
14162 if (ctxt->directory == NULL)
14163 ctxt->directory = xmlParserGetDirectory(filename);
14165 ctxt->recovery = recovery;
14167 xmlParseDocument(ctxt);
14169 if ((ctxt->wellFormed) || recovery) {
14172 if (ctxt->input->buf->compressed > 0)
14173 ret->compression = 9;
14175 ret->compression = ctxt->input->buf->compressed;
14180 xmlFreeDoc(ctxt->myDoc);
14181 ctxt->myDoc = NULL;
14185 xmlFreeParserCtxt(ctxt);
14192 * @sax: the SAX handler block
14193 * @filename: the filename
14194 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14197 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14198 * compressed document is provided by default if found at compile-time.
14199 * It use the given SAX function block to handle the parsing callback.
14200 * If sax is NULL, fallback to the default DOM tree building routines.
14202 * Returns the resulting document tree
14206 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14208 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14213 * @cur: a pointer to an array of xmlChar
14215 * parse an XML in-memory document and build a tree.
14216 * In the case the document is not Well Formed, a attempt to build a
14217 * tree is tried anyway
14219 * Returns the resulting document tree or NULL in case of failure
14223 xmlRecoverDoc(const xmlChar *cur) {
14224 return(xmlSAXParseDoc(NULL, cur, 1));
14229 * @filename: the filename
14231 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14232 * compressed document is provided by default if found at compile-time.
14234 * Returns the resulting document tree if the file was wellformed,
14239 xmlParseFile(const char *filename) {
14240 return(xmlSAXParseFile(NULL, filename, 0));
14245 * @filename: the filename
14247 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14248 * compressed document is provided by default if found at compile-time.
14249 * In the case the document is not Well Formed, it attempts to build
14252 * Returns the resulting document tree or NULL in case of failure
14256 xmlRecoverFile(const char *filename) {
14257 return(xmlSAXParseFile(NULL, filename, 1));
14262 * xmlSetupParserForBuffer:
14263 * @ctxt: an XML parser context
14264 * @buffer: a xmlChar * buffer
14265 * @filename: a file name
14267 * Setup the parser context to parse a new buffer; Clears any prior
14268 * contents from the parser context. The buffer parameter must not be
14269 * NULL, but the filename parameter can be
14272 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14273 const char* filename)
14275 xmlParserInputPtr input;
14277 if ((ctxt == NULL) || (buffer == NULL))
14280 input = xmlNewInputStream(ctxt);
14281 if (input == NULL) {
14282 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14283 xmlClearParserCtxt(ctxt);
14287 xmlClearParserCtxt(ctxt);
14288 if (filename != NULL)
14289 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14290 input->base = buffer;
14291 input->cur = buffer;
14292 input->end = &buffer[xmlStrlen(buffer)];
14293 inputPush(ctxt, input);
14297 * xmlSAXUserParseFile:
14298 * @sax: a SAX handler
14299 * @user_data: The user data returned on SAX callbacks
14300 * @filename: a file name
14302 * parse an XML file and call the given SAX handler routines.
14303 * Automatic support for ZLIB/Compress compressed document is provided
14305 * Returns 0 in case of success or a error number otherwise
14308 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14309 const char *filename) {
14311 xmlParserCtxtPtr ctxt;
14313 ctxt = xmlCreateFileParserCtxt(filename);
14314 if (ctxt == NULL) return -1;
14315 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14316 xmlFree(ctxt->sax);
14318 xmlDetectSAX2(ctxt);
14320 if (user_data != NULL)
14321 ctxt->userData = user_data;
14323 xmlParseDocument(ctxt);
14325 if (ctxt->wellFormed)
14328 if (ctxt->errNo != 0)
14335 if (ctxt->myDoc != NULL) {
14336 xmlFreeDoc(ctxt->myDoc);
14337 ctxt->myDoc = NULL;
14339 xmlFreeParserCtxt(ctxt);
14343 #endif /* LIBXML_SAX1_ENABLED */
14345 /************************************************************************
14347 * Front ends when parsing from memory *
14349 ************************************************************************/
14352 * xmlCreateMemoryParserCtxt:
14353 * @buffer: a pointer to a char array
14354 * @size: the size of the array
14356 * Create a parser context for an XML in-memory document.
14358 * Returns the new parser context or NULL
14361 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14362 xmlParserCtxtPtr ctxt;
14363 xmlParserInputPtr input;
14364 xmlParserInputBufferPtr buf;
14366 if (buffer == NULL)
14371 ctxt = xmlNewParserCtxt();
14375 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14376 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14378 xmlFreeParserCtxt(ctxt);
14382 input = xmlNewInputStream(ctxt);
14383 if (input == NULL) {
14384 xmlFreeParserInputBuffer(buf);
14385 xmlFreeParserCtxt(ctxt);
14389 input->filename = NULL;
14391 xmlBufResetInput(input->buf->buffer, input);
14393 inputPush(ctxt, input);
14397 #ifdef LIBXML_SAX1_ENABLED
14399 * xmlSAXParseMemoryWithData:
14400 * @sax: the SAX handler block
14401 * @buffer: an pointer to a char array
14402 * @size: the size of the array
14403 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14405 * @data: the userdata
14407 * parse an XML in-memory block and use the given SAX function block
14408 * to handle the parsing callback. If sax is NULL, fallback to the default
14409 * DOM tree building routines.
14411 * User data (void *) is stored within the parser context in the
14412 * context's _private member, so it is available nearly everywhere in libxml
14414 * Returns the resulting document tree
14418 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14419 int size, int recovery, void *data) {
14421 xmlParserCtxtPtr ctxt;
14425 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14426 if (ctxt == NULL) return(NULL);
14428 if (ctxt->sax != NULL)
14429 xmlFree(ctxt->sax);
14432 xmlDetectSAX2(ctxt);
14434 ctxt->_private=data;
14437 ctxt->recovery = recovery;
14439 xmlParseDocument(ctxt);
14441 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14444 xmlFreeDoc(ctxt->myDoc);
14445 ctxt->myDoc = NULL;
14449 xmlFreeParserCtxt(ctxt);
14455 * xmlSAXParseMemory:
14456 * @sax: the SAX handler block
14457 * @buffer: an pointer to a char array
14458 * @size: the size of the array
14459 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14462 * parse an XML in-memory block and use the given SAX function block
14463 * to handle the parsing callback. If sax is NULL, fallback to the default
14464 * DOM tree building routines.
14466 * Returns the resulting document tree
14469 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14470 int size, int recovery) {
14471 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14476 * @buffer: an pointer to a char array
14477 * @size: the size of the array
14479 * parse an XML in-memory block and build a tree.
14481 * Returns the resulting document tree
14484 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14485 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14489 * xmlRecoverMemory:
14490 * @buffer: an pointer to a char array
14491 * @size: the size of the array
14493 * parse an XML in-memory block and build a tree.
14494 * In the case the document is not Well Formed, an attempt to
14495 * build a tree is tried anyway
14497 * Returns the resulting document tree or NULL in case of error
14500 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14501 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14505 * xmlSAXUserParseMemory:
14506 * @sax: a SAX handler
14507 * @user_data: The user data returned on SAX callbacks
14508 * @buffer: an in-memory XML document input
14509 * @size: the length of the XML document in bytes
14511 * A better SAX parsing routine.
14512 * parse an XML in-memory buffer and call the given SAX handler routines.
14514 * Returns 0 in case of success or a error number otherwise
14516 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14517 const char *buffer, int size) {
14519 xmlParserCtxtPtr ctxt;
14523 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14524 if (ctxt == NULL) return -1;
14525 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14526 xmlFree(ctxt->sax);
14528 xmlDetectSAX2(ctxt);
14530 if (user_data != NULL)
14531 ctxt->userData = user_data;
14533 xmlParseDocument(ctxt);
14535 if (ctxt->wellFormed)
14538 if (ctxt->errNo != 0)
14545 if (ctxt->myDoc != NULL) {
14546 xmlFreeDoc(ctxt->myDoc);
14547 ctxt->myDoc = NULL;
14549 xmlFreeParserCtxt(ctxt);
14553 #endif /* LIBXML_SAX1_ENABLED */
14556 * xmlCreateDocParserCtxt:
14557 * @cur: a pointer to an array of xmlChar
14559 * Creates a parser context for an XML in-memory document.
14561 * Returns the new parser context or NULL
14564 xmlCreateDocParserCtxt(const xmlChar *cur) {
14569 len = xmlStrlen(cur);
14570 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14573 #ifdef LIBXML_SAX1_ENABLED
14576 * @sax: the SAX handler block
14577 * @cur: a pointer to an array of xmlChar
14578 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14581 * parse an XML in-memory document and build a tree.
14582 * It use the given SAX function block to handle the parsing callback.
14583 * If sax is NULL, fallback to the default DOM tree building routines.
14585 * Returns the resulting document tree
14589 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14591 xmlParserCtxtPtr ctxt;
14592 xmlSAXHandlerPtr oldsax = NULL;
14594 if (cur == NULL) return(NULL);
14597 ctxt = xmlCreateDocParserCtxt(cur);
14598 if (ctxt == NULL) return(NULL);
14600 oldsax = ctxt->sax;
14602 ctxt->userData = NULL;
14604 xmlDetectSAX2(ctxt);
14606 xmlParseDocument(ctxt);
14607 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14610 xmlFreeDoc(ctxt->myDoc);
14611 ctxt->myDoc = NULL;
14614 ctxt->sax = oldsax;
14615 xmlFreeParserCtxt(ctxt);
14622 * @cur: a pointer to an array of xmlChar
14624 * parse an XML in-memory document and build a tree.
14626 * Returns the resulting document tree
14630 xmlParseDoc(const xmlChar *cur) {
14631 return(xmlSAXParseDoc(NULL, cur, 0));
14633 #endif /* LIBXML_SAX1_ENABLED */
14635 #ifdef LIBXML_LEGACY_ENABLED
14636 /************************************************************************
14638 * Specific function to keep track of entities references *
14639 * and used by the XSLT debugger *
14641 ************************************************************************/
14643 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14646 * xmlAddEntityReference:
14647 * @ent : A valid entity
14648 * @firstNode : A valid first node for children of entity
14649 * @lastNode : A valid last node of children entity
14651 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14654 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14655 xmlNodePtr lastNode)
14657 if (xmlEntityRefFunc != NULL) {
14658 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14664 * xmlSetEntityReferenceFunc:
14665 * @func: A valid function
14667 * Set the function to call call back when a xml reference has been made
14670 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14672 xmlEntityRefFunc = func;
14674 #endif /* LIBXML_LEGACY_ENABLED */
14676 /************************************************************************
14680 ************************************************************************/
14682 #ifdef LIBXML_XPATH_ENABLED
14683 #include <libxml/xpath.h>
14686 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14687 static int xmlParserInitialized = 0;
14692 * Initialization function for the XML parser.
14693 * This is not reentrant. Call once before processing in case of
14694 * use in multithreaded programs.
14698 xmlInitParser(void) {
14699 if (xmlParserInitialized != 0)
14702 #ifdef LIBXML_THREAD_ENABLED
14703 __xmlGlobalInitMutexLock();
14704 if (xmlParserInitialized == 0) {
14708 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14709 (xmlGenericError == NULL))
14710 initGenericErrorDefaultFunc(NULL);
14712 xmlInitializeDict();
14713 xmlInitCharEncodingHandlers();
14714 xmlDefaultSAXHandlerInit();
14715 xmlRegisterDefaultInputCallbacks();
14716 #ifdef LIBXML_OUTPUT_ENABLED
14717 xmlRegisterDefaultOutputCallbacks();
14718 #endif /* LIBXML_OUTPUT_ENABLED */
14719 #ifdef LIBXML_HTML_ENABLED
14720 htmlInitAutoClose();
14721 htmlDefaultSAXHandlerInit();
14723 #ifdef LIBXML_XPATH_ENABLED
14726 xmlParserInitialized = 1;
14727 #ifdef LIBXML_THREAD_ENABLED
14729 __xmlGlobalInitMutexUnlock();
14734 * xmlCleanupParser:
14736 * This function name is somewhat misleading. It does not clean up
14737 * parser state, it cleans up memory allocated by the library itself.
14738 * It is a cleanup function for the XML library. It tries to reclaim all
14739 * related global memory allocated for the library processing.
14740 * It doesn't deallocate any document related memory. One should
14741 * call xmlCleanupParser() only when the process has finished using
14742 * the library and all XML/HTML documents built with it.
14743 * See also xmlInitParser() which has the opposite function of preparing
14744 * the library for operations.
14746 * WARNING: if your application is multithreaded or has plugin support
14747 * calling this may crash the application if another thread or
14748 * a plugin is still using libxml2. It's sometimes very hard to
14749 * guess if libxml2 is in use in the application, some libraries
14750 * or plugins may use it without notice. In case of doubt abstain
14751 * from calling this function or do it just before calling exit()
14752 * to avoid leak reports from valgrind !
14756 xmlCleanupParser(void) {
14757 if (!xmlParserInitialized)
14760 xmlCleanupCharEncodingHandlers();
14761 #ifdef LIBXML_CATALOG_ENABLED
14762 xmlCatalogCleanup();
14765 xmlCleanupInputCallbacks();
14766 #ifdef LIBXML_OUTPUT_ENABLED
14767 xmlCleanupOutputCallbacks();
14769 #ifdef LIBXML_SCHEMAS_ENABLED
14770 xmlSchemaCleanupTypes();
14771 xmlRelaxNGCleanupTypes();
14773 xmlResetLastError();
14774 xmlCleanupGlobals();
14775 xmlCleanupThreads(); /* must be last if called not from the main thread */
14776 xmlCleanupMemory();
14777 xmlParserInitialized = 0;
14780 /************************************************************************
14782 * New set (2.6.0) of simpler and more flexible APIs *
14784 ************************************************************************/
14790 * Free a string if it is not owned by the "dict" dictionary in the
14793 #define DICT_FREE(str) \
14794 if ((str) && ((!dict) || \
14795 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14796 xmlFree((char *)(str));
14800 * @ctxt: an XML parser context
14802 * Reset a parser context
14805 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14807 xmlParserInputPtr input;
14815 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14816 xmlFreeInputStream(input);
14819 ctxt->input = NULL;
14822 if (ctxt->spaceTab != NULL) {
14823 ctxt->spaceTab[0] = -1;
14824 ctxt->space = &ctxt->spaceTab[0];
14826 ctxt->space = NULL;
14836 DICT_FREE(ctxt->version);
14837 ctxt->version = NULL;
14838 DICT_FREE(ctxt->encoding);
14839 ctxt->encoding = NULL;
14840 DICT_FREE(ctxt->directory);
14841 ctxt->directory = NULL;
14842 DICT_FREE(ctxt->extSubURI);
14843 ctxt->extSubURI = NULL;
14844 DICT_FREE(ctxt->extSubSystem);
14845 ctxt->extSubSystem = NULL;
14846 if (ctxt->myDoc != NULL)
14847 xmlFreeDoc(ctxt->myDoc);
14848 ctxt->myDoc = NULL;
14850 ctxt->standalone = -1;
14851 ctxt->hasExternalSubset = 0;
14852 ctxt->hasPErefs = 0;
14854 ctxt->external = 0;
14855 ctxt->instate = XML_PARSER_START;
14858 ctxt->wellFormed = 1;
14859 ctxt->nsWellFormed = 1;
14860 ctxt->disableSAX = 0;
14863 ctxt->vctxt.userData = ctxt;
14864 ctxt->vctxt.error = xmlParserValidityError;
14865 ctxt->vctxt.warning = xmlParserValidityWarning;
14867 ctxt->record_info = 0;
14869 ctxt->checkIndex = 0;
14870 ctxt->inSubset = 0;
14871 ctxt->errNo = XML_ERR_OK;
14873 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14874 ctxt->catalogs = NULL;
14875 ctxt->nbentities = 0;
14876 ctxt->sizeentities = 0;
14877 ctxt->sizeentcopy = 0;
14878 xmlInitNodeInfoSeq(&ctxt->node_seq);
14880 if (ctxt->attsDefault != NULL) {
14881 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14882 ctxt->attsDefault = NULL;
14884 if (ctxt->attsSpecial != NULL) {
14885 xmlHashFree(ctxt->attsSpecial, NULL);
14886 ctxt->attsSpecial = NULL;
14889 #ifdef LIBXML_CATALOG_ENABLED
14890 if (ctxt->catalogs != NULL)
14891 xmlCatalogFreeLocal(ctxt->catalogs);
14893 if (ctxt->lastError.code != XML_ERR_OK)
14894 xmlResetError(&ctxt->lastError);
14898 * xmlCtxtResetPush:
14899 * @ctxt: an XML parser context
14900 * @chunk: a pointer to an array of chars
14901 * @size: number of chars in the array
14902 * @filename: an optional file name or URI
14903 * @encoding: the document encoding, or NULL
14905 * Reset a push parser context
14907 * Returns 0 in case of success and 1 in case of error
14910 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14911 int size, const char *filename, const char *encoding)
14913 xmlParserInputPtr inputStream;
14914 xmlParserInputBufferPtr buf;
14915 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14920 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14921 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14923 buf = xmlAllocParserInputBuffer(enc);
14927 if (ctxt == NULL) {
14928 xmlFreeParserInputBuffer(buf);
14932 xmlCtxtReset(ctxt);
14934 if (ctxt->pushTab == NULL) {
14935 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14936 sizeof(xmlChar *));
14937 if (ctxt->pushTab == NULL) {
14938 xmlErrMemory(ctxt, NULL);
14939 xmlFreeParserInputBuffer(buf);
14944 if (filename == NULL) {
14945 ctxt->directory = NULL;
14947 ctxt->directory = xmlParserGetDirectory(filename);
14950 inputStream = xmlNewInputStream(ctxt);
14951 if (inputStream == NULL) {
14952 xmlFreeParserInputBuffer(buf);
14956 if (filename == NULL)
14957 inputStream->filename = NULL;
14959 inputStream->filename = (char *)
14960 xmlCanonicPath((const xmlChar *) filename);
14961 inputStream->buf = buf;
14962 xmlBufResetInput(buf->buffer, inputStream);
14964 inputPush(ctxt, inputStream);
14966 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14967 (ctxt->input->buf != NULL)) {
14968 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14969 size_t cur = ctxt->input->cur - ctxt->input->base;
14971 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14973 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14975 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14979 if (encoding != NULL) {
14980 xmlCharEncodingHandlerPtr hdlr;
14982 if (ctxt->encoding != NULL)
14983 xmlFree((xmlChar *) ctxt->encoding);
14984 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14986 hdlr = xmlFindCharEncodingHandler(encoding);
14987 if (hdlr != NULL) {
14988 xmlSwitchToEncoding(ctxt, hdlr);
14990 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14991 "Unsupported encoding %s\n", BAD_CAST encoding);
14993 } else if (enc != XML_CHAR_ENCODING_NONE) {
14994 xmlSwitchEncoding(ctxt, enc);
15002 * xmlCtxtUseOptionsInternal:
15003 * @ctxt: an XML parser context
15004 * @options: a combination of xmlParserOption
15005 * @encoding: the user provided encoding to use
15007 * Applies the options to the parser context
15009 * Returns 0 in case of success, the set of unknown or unimplemented options
15010 * in case of error.
15013 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15017 if (encoding != NULL) {
15018 if (ctxt->encoding != NULL)
15019 xmlFree((xmlChar *) ctxt->encoding);
15020 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15022 if (options & XML_PARSE_RECOVER) {
15023 ctxt->recovery = 1;
15024 options -= XML_PARSE_RECOVER;
15025 ctxt->options |= XML_PARSE_RECOVER;
15027 ctxt->recovery = 0;
15028 if (options & XML_PARSE_DTDLOAD) {
15029 ctxt->loadsubset = XML_DETECT_IDS;
15030 options -= XML_PARSE_DTDLOAD;
15031 ctxt->options |= XML_PARSE_DTDLOAD;
15033 ctxt->loadsubset = 0;
15034 if (options & XML_PARSE_DTDATTR) {
15035 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15036 options -= XML_PARSE_DTDATTR;
15037 ctxt->options |= XML_PARSE_DTDATTR;
15039 if (options & XML_PARSE_NOENT) {
15040 ctxt->replaceEntities = 1;
15041 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15042 options -= XML_PARSE_NOENT;
15043 ctxt->options |= XML_PARSE_NOENT;
15045 ctxt->replaceEntities = 0;
15046 if (options & XML_PARSE_PEDANTIC) {
15047 ctxt->pedantic = 1;
15048 options -= XML_PARSE_PEDANTIC;
15049 ctxt->options |= XML_PARSE_PEDANTIC;
15051 ctxt->pedantic = 0;
15052 if (options & XML_PARSE_NOBLANKS) {
15053 ctxt->keepBlanks = 0;
15054 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15055 options -= XML_PARSE_NOBLANKS;
15056 ctxt->options |= XML_PARSE_NOBLANKS;
15058 ctxt->keepBlanks = 1;
15059 if (options & XML_PARSE_DTDVALID) {
15060 ctxt->validate = 1;
15061 if (options & XML_PARSE_NOWARNING)
15062 ctxt->vctxt.warning = NULL;
15063 if (options & XML_PARSE_NOERROR)
15064 ctxt->vctxt.error = NULL;
15065 options -= XML_PARSE_DTDVALID;
15066 ctxt->options |= XML_PARSE_DTDVALID;
15068 ctxt->validate = 0;
15069 if (options & XML_PARSE_NOWARNING) {
15070 ctxt->sax->warning = NULL;
15071 options -= XML_PARSE_NOWARNING;
15073 if (options & XML_PARSE_NOERROR) {
15074 ctxt->sax->error = NULL;
15075 ctxt->sax->fatalError = NULL;
15076 options -= XML_PARSE_NOERROR;
15078 #ifdef LIBXML_SAX1_ENABLED
15079 if (options & XML_PARSE_SAX1) {
15080 ctxt->sax->startElement = xmlSAX2StartElement;
15081 ctxt->sax->endElement = xmlSAX2EndElement;
15082 ctxt->sax->startElementNs = NULL;
15083 ctxt->sax->endElementNs = NULL;
15084 ctxt->sax->initialized = 1;
15085 options -= XML_PARSE_SAX1;
15086 ctxt->options |= XML_PARSE_SAX1;
15088 #endif /* LIBXML_SAX1_ENABLED */
15089 if (options & XML_PARSE_NODICT) {
15090 ctxt->dictNames = 0;
15091 options -= XML_PARSE_NODICT;
15092 ctxt->options |= XML_PARSE_NODICT;
15094 ctxt->dictNames = 1;
15096 if (options & XML_PARSE_NOCDATA) {
15097 ctxt->sax->cdataBlock = NULL;
15098 options -= XML_PARSE_NOCDATA;
15099 ctxt->options |= XML_PARSE_NOCDATA;
15101 if (options & XML_PARSE_NSCLEAN) {
15102 ctxt->options |= XML_PARSE_NSCLEAN;
15103 options -= XML_PARSE_NSCLEAN;
15105 if (options & XML_PARSE_NONET) {
15106 ctxt->options |= XML_PARSE_NONET;
15107 options -= XML_PARSE_NONET;
15109 if (options & XML_PARSE_COMPACT) {
15110 ctxt->options |= XML_PARSE_COMPACT;
15111 options -= XML_PARSE_COMPACT;
15113 if (options & XML_PARSE_OLD10) {
15114 ctxt->options |= XML_PARSE_OLD10;
15115 options -= XML_PARSE_OLD10;
15117 if (options & XML_PARSE_NOBASEFIX) {
15118 ctxt->options |= XML_PARSE_NOBASEFIX;
15119 options -= XML_PARSE_NOBASEFIX;
15121 if (options & XML_PARSE_HUGE) {
15122 ctxt->options |= XML_PARSE_HUGE;
15123 options -= XML_PARSE_HUGE;
15124 if (ctxt->dict != NULL)
15125 xmlDictSetLimit(ctxt->dict, 0);
15127 if (options & XML_PARSE_OLDSAX) {
15128 ctxt->options |= XML_PARSE_OLDSAX;
15129 options -= XML_PARSE_OLDSAX;
15131 if (options & XML_PARSE_IGNORE_ENC) {
15132 ctxt->options |= XML_PARSE_IGNORE_ENC;
15133 options -= XML_PARSE_IGNORE_ENC;
15135 if (options & XML_PARSE_BIG_LINES) {
15136 ctxt->options |= XML_PARSE_BIG_LINES;
15137 options -= XML_PARSE_BIG_LINES;
15139 ctxt->linenumbers = 1;
15144 * xmlCtxtUseOptions:
15145 * @ctxt: an XML parser context
15146 * @options: a combination of xmlParserOption
15148 * Applies the options to the parser context
15150 * Returns 0 in case of success, the set of unknown or unimplemented options
15151 * in case of error.
15154 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15156 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15161 * @ctxt: an XML parser context
15162 * @URL: the base URL to use for the document
15163 * @encoding: the document encoding, or NULL
15164 * @options: a combination of xmlParserOption
15165 * @reuse: keep the context for reuse
15167 * Common front-end for the xmlRead functions
15169 * Returns the resulting document tree or NULL
15172 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15173 int options, int reuse)
15177 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15178 if (encoding != NULL) {
15179 xmlCharEncodingHandlerPtr hdlr;
15181 hdlr = xmlFindCharEncodingHandler(encoding);
15183 xmlSwitchToEncoding(ctxt, hdlr);
15185 if ((URL != NULL) && (ctxt->input != NULL) &&
15186 (ctxt->input->filename == NULL))
15187 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15188 xmlParseDocument(ctxt);
15189 if ((ctxt->wellFormed) || ctxt->recovery)
15193 if (ctxt->myDoc != NULL) {
15194 xmlFreeDoc(ctxt->myDoc);
15197 ctxt->myDoc = NULL;
15199 xmlFreeParserCtxt(ctxt);
15207 * @cur: a pointer to a zero terminated string
15208 * @URL: the base URL to use for the document
15209 * @encoding: the document encoding, or NULL
15210 * @options: a combination of xmlParserOption
15212 * parse an XML in-memory document and build a tree.
15214 * Returns the resulting document tree
15217 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15219 xmlParserCtxtPtr ctxt;
15225 ctxt = xmlCreateDocParserCtxt(cur);
15228 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15233 * @filename: a file or URL
15234 * @encoding: the document encoding, or NULL
15235 * @options: a combination of xmlParserOption
15237 * parse an XML file from the filesystem or the network.
15239 * Returns the resulting document tree
15242 xmlReadFile(const char *filename, const char *encoding, int options)
15244 xmlParserCtxtPtr ctxt;
15247 ctxt = xmlCreateURLParserCtxt(filename, options);
15250 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15255 * @buffer: a pointer to a char array
15256 * @size: the size of the array
15257 * @URL: the base URL to use for the document
15258 * @encoding: the document encoding, or NULL
15259 * @options: a combination of xmlParserOption
15261 * parse an XML in-memory document and build a tree.
15263 * Returns the resulting document tree
15266 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15268 xmlParserCtxtPtr ctxt;
15271 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15274 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15279 * @fd: an open file descriptor
15280 * @URL: the base URL to use for the document
15281 * @encoding: the document encoding, or NULL
15282 * @options: a combination of xmlParserOption
15284 * parse an XML from a file descriptor and build a tree.
15285 * NOTE that the file descriptor will not be closed when the
15286 * reader is closed or reset.
15288 * Returns the resulting document tree
15291 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15293 xmlParserCtxtPtr ctxt;
15294 xmlParserInputBufferPtr input;
15295 xmlParserInputPtr stream;
15301 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15304 input->closecallback = NULL;
15305 ctxt = xmlNewParserCtxt();
15306 if (ctxt == NULL) {
15307 xmlFreeParserInputBuffer(input);
15310 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15311 if (stream == NULL) {
15312 xmlFreeParserInputBuffer(input);
15313 xmlFreeParserCtxt(ctxt);
15316 inputPush(ctxt, stream);
15317 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15322 * @ioread: an I/O read function
15323 * @ioclose: an I/O close function
15324 * @ioctx: an I/O handler
15325 * @URL: the base URL to use for the document
15326 * @encoding: the document encoding, or NULL
15327 * @options: a combination of xmlParserOption
15329 * parse an XML document from I/O functions and source and build a tree.
15331 * Returns the resulting document tree
15334 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15335 void *ioctx, const char *URL, const char *encoding, int options)
15337 xmlParserCtxtPtr ctxt;
15338 xmlParserInputBufferPtr input;
15339 xmlParserInputPtr stream;
15341 if (ioread == NULL)
15345 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15346 XML_CHAR_ENCODING_NONE);
15347 if (input == NULL) {
15348 if (ioclose != NULL)
15352 ctxt = xmlNewParserCtxt();
15353 if (ctxt == NULL) {
15354 xmlFreeParserInputBuffer(input);
15357 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15358 if (stream == NULL) {
15359 xmlFreeParserInputBuffer(input);
15360 xmlFreeParserCtxt(ctxt);
15363 inputPush(ctxt, stream);
15364 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15369 * @ctxt: an XML parser context
15370 * @cur: a pointer to a zero terminated string
15371 * @URL: the base URL to use for the document
15372 * @encoding: the document encoding, or NULL
15373 * @options: a combination of xmlParserOption
15375 * parse an XML in-memory document and build a tree.
15376 * This reuses the existing @ctxt parser context
15378 * Returns the resulting document tree
15381 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15382 const char *URL, const char *encoding, int options)
15384 xmlParserInputPtr stream;
15392 xmlCtxtReset(ctxt);
15394 stream = xmlNewStringInputStream(ctxt, cur);
15395 if (stream == NULL) {
15398 inputPush(ctxt, stream);
15399 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15404 * @ctxt: an XML parser context
15405 * @filename: a file or URL
15406 * @encoding: the document encoding, or NULL
15407 * @options: a combination of xmlParserOption
15409 * parse an XML file from the filesystem or the network.
15410 * This reuses the existing @ctxt parser context
15412 * Returns the resulting document tree
15415 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15416 const char *encoding, int options)
15418 xmlParserInputPtr stream;
15420 if (filename == NULL)
15426 xmlCtxtReset(ctxt);
15428 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15429 if (stream == NULL) {
15432 inputPush(ctxt, stream);
15433 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15437 * xmlCtxtReadMemory:
15438 * @ctxt: an XML parser context
15439 * @buffer: a pointer to a char array
15440 * @size: the size of the array
15441 * @URL: the base URL to use for the document
15442 * @encoding: the document encoding, or NULL
15443 * @options: a combination of xmlParserOption
15445 * parse an XML in-memory document and build a tree.
15446 * This reuses the existing @ctxt parser context
15448 * Returns the resulting document tree
15451 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15452 const char *URL, const char *encoding, int options)
15454 xmlParserInputBufferPtr input;
15455 xmlParserInputPtr stream;
15459 if (buffer == NULL)
15463 xmlCtxtReset(ctxt);
15465 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15466 if (input == NULL) {
15470 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15471 if (stream == NULL) {
15472 xmlFreeParserInputBuffer(input);
15476 inputPush(ctxt, stream);
15477 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15482 * @ctxt: an XML parser context
15483 * @fd: an open file descriptor
15484 * @URL: the base URL to use for the document
15485 * @encoding: the document encoding, or NULL
15486 * @options: a combination of xmlParserOption
15488 * parse an XML from a file descriptor and build a tree.
15489 * This reuses the existing @ctxt parser context
15490 * NOTE that the file descriptor will not be closed when the
15491 * reader is closed or reset.
15493 * Returns the resulting document tree
15496 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15497 const char *URL, const char *encoding, int options)
15499 xmlParserInputBufferPtr input;
15500 xmlParserInputPtr stream;
15508 xmlCtxtReset(ctxt);
15511 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15514 input->closecallback = NULL;
15515 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15516 if (stream == NULL) {
15517 xmlFreeParserInputBuffer(input);
15520 inputPush(ctxt, stream);
15521 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15526 * @ctxt: an XML parser context
15527 * @ioread: an I/O read function
15528 * @ioclose: an I/O close function
15529 * @ioctx: an I/O handler
15530 * @URL: the base URL to use for the document
15531 * @encoding: the document encoding, or NULL
15532 * @options: a combination of xmlParserOption
15534 * parse an XML document from I/O functions and source and build a tree.
15535 * This reuses the existing @ctxt parser context
15537 * Returns the resulting document tree
15540 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15541 xmlInputCloseCallback ioclose, void *ioctx,
15543 const char *encoding, int options)
15545 xmlParserInputBufferPtr input;
15546 xmlParserInputPtr stream;
15548 if (ioread == NULL)
15554 xmlCtxtReset(ctxt);
15556 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15557 XML_CHAR_ENCODING_NONE);
15558 if (input == NULL) {
15559 if (ioclose != NULL)
15563 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15564 if (stream == NULL) {
15565 xmlFreeParserInputBuffer(input);
15568 inputPush(ctxt, stream);
15569 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15572 #define bottom_parser
15573 #include "elfgcchack.h"