2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
28 * See Copyright for the status of this software.
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
39 #define XML_DIR_SEP '/'
46 #include <libxml/xmlmemory.h>
47 #include <libxml/threads.h>
48 #include <libxml/globals.h>
49 #include <libxml/tree.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/valid.h>
53 #include <libxml/entities.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/encoding.h>
56 #include <libxml/xmlIO.h>
57 #include <libxml/uri.h>
58 #ifdef LIBXML_CATALOG_ENABLED
59 #include <libxml/catalog.h>
61 #ifdef LIBXML_SCHEMAS_ENABLED
62 #include <libxml/xmlschemastypes.h>
63 #include <libxml/relaxng.h>
71 #ifdef HAVE_SYS_STAT_H
91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
93 static xmlParserCtxtPtr
94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
97 /************************************************************************
99 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
101 ************************************************************************/
103 #define XML_PARSER_BIG_ENTITY 1000
104 #define XML_PARSER_LOT_ENTITY 5000
107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108 * replacement over the size in byte of the input indicates that you have
109 * and eponential behaviour. A value of 10 correspond to at least 3 entity
110 * replacement per byte of input.
112 #define XML_PARSER_NON_LINEAR 10
115 * xmlParserEntityCheck
117 * Function to check non-linear entity expansion behaviour
118 * This is here to detect and stop exponential linear entity expansion
119 * This is not a limitation of the parser but a safety
120 * boundary feature. It can be disabled with the XML_PARSE_HUGE
124 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
125 xmlEntityPtr ent, size_t replacement)
129 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
131 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
133 if (replacement != 0) {
134 if (replacement < XML_MAX_TEXT_LENGTH)
138 * If the volume of entity copy reaches 10 times the
139 * amount of parsed data and over the large text threshold
140 * then that's very likely to be an abuse.
142 if (ctxt->input != NULL) {
143 consumed = ctxt->input->consumed +
144 (ctxt->input->cur - ctxt->input->base);
146 consumed += ctxt->sizeentities;
148 if (replacement < XML_PARSER_NON_LINEAR * consumed)
150 } else if (size != 0) {
152 * Do the check based on the replacement size of the entity
154 if (size < XML_PARSER_BIG_ENTITY)
158 * A limit on the amount of text data reasonably used
160 if (ctxt->input != NULL) {
161 consumed = ctxt->input->consumed +
162 (ctxt->input->cur - ctxt->input->base);
164 consumed += ctxt->sizeentities;
166 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
167 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
169 } else if (ent != NULL) {
171 * use the number of parsed entities in the replacement
173 size = ent->checked / 2;
176 * The amount of data parsed counting entities size only once
178 if (ctxt->input != NULL) {
179 consumed = ctxt->input->consumed +
180 (ctxt->input->cur - ctxt->input->base);
182 consumed += ctxt->sizeentities;
185 * Check the density of entities for the amount of data
186 * knowing an entity reference will take at least 3 bytes
188 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
192 * strange we got no data for checking just return
196 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
203 * arbitrary depth limit for the XML documents that we allow to
204 * process. This is not a limitation of the parser but a safety
205 * boundary feature. It can be disabled with the XML_PARSE_HUGE
208 unsigned int xmlParserMaxDepth = 256;
213 #define XML_PARSER_BIG_BUFFER_SIZE 300
214 #define XML_PARSER_BUFFER_SIZE 100
215 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
218 * XML_PARSER_CHUNK_SIZE
220 * When calling GROW that's the minimal amount of data
221 * the parser expected to have received. It is not a hard
222 * limit but an optimization when reading strings like Names
223 * It is not strictly needed as long as inputs available characters
224 * are followed by 0, which should be provided by the I/O level
226 #define XML_PARSER_CHUNK_SIZE 100
229 * List of XML prefixed PI allowed by W3C specs
232 static const char *xmlW3CPIs[] = {
239 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
240 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
241 const xmlChar **str);
243 static xmlParserErrors
244 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
245 xmlSAXHandlerPtr sax,
246 void *user_data, int depth, const xmlChar *URL,
247 const xmlChar *ID, xmlNodePtr *list);
250 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
251 const char *encoding);
252 #ifdef LIBXML_LEGACY_ENABLED
254 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
255 xmlNodePtr lastNode);
256 #endif /* LIBXML_LEGACY_ENABLED */
258 static xmlParserErrors
259 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
260 const xmlChar *string, void *user_data, xmlNodePtr *lst);
263 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
265 /************************************************************************
267 * Some factorized error routines *
269 ************************************************************************/
272 * xmlErrAttributeDup:
273 * @ctxt: an XML parser context
274 * @prefix: the attribute prefix
275 * @localname: the attribute localname
277 * Handle a redefinition of attribute error
280 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
281 const xmlChar * localname)
283 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
284 (ctxt->instate == XML_PARSER_EOF))
287 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
290 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
291 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
292 (const char *) localname, NULL, NULL, 0, 0,
293 "Attribute %s redefined\n", localname);
295 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
296 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
297 (const char *) prefix, (const char *) localname,
298 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
301 ctxt->wellFormed = 0;
302 if (ctxt->recovery == 0)
303 ctxt->disableSAX = 1;
309 * @ctxt: an XML parser context
310 * @error: the error number
311 * @extra: extra information string
313 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
316 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
319 char errstr[129] = "";
321 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
322 (ctxt->instate == XML_PARSER_EOF))
325 case XML_ERR_INVALID_HEX_CHARREF:
326 errmsg = "CharRef: invalid hexadecimal value";
328 case XML_ERR_INVALID_DEC_CHARREF:
329 errmsg = "CharRef: invalid decimal value";
331 case XML_ERR_INVALID_CHARREF:
332 errmsg = "CharRef: invalid value";
334 case XML_ERR_INTERNAL_ERROR:
335 errmsg = "internal error";
337 case XML_ERR_PEREF_AT_EOF:
338 errmsg = "PEReference at end of document";
340 case XML_ERR_PEREF_IN_PROLOG:
341 errmsg = "PEReference in prolog";
343 case XML_ERR_PEREF_IN_EPILOG:
344 errmsg = "PEReference in epilog";
346 case XML_ERR_PEREF_NO_NAME:
347 errmsg = "PEReference: no name";
349 case XML_ERR_PEREF_SEMICOL_MISSING:
350 errmsg = "PEReference: expecting ';'";
352 case XML_ERR_ENTITY_LOOP:
353 errmsg = "Detected an entity reference loop";
355 case XML_ERR_ENTITY_NOT_STARTED:
356 errmsg = "EntityValue: \" or ' expected";
358 case XML_ERR_ENTITY_PE_INTERNAL:
359 errmsg = "PEReferences forbidden in internal subset";
361 case XML_ERR_ENTITY_NOT_FINISHED:
362 errmsg = "EntityValue: \" or ' expected";
364 case XML_ERR_ATTRIBUTE_NOT_STARTED:
365 errmsg = "AttValue: \" or ' expected";
367 case XML_ERR_LT_IN_ATTRIBUTE:
368 errmsg = "Unescaped '<' not allowed in attributes values";
370 case XML_ERR_LITERAL_NOT_STARTED:
371 errmsg = "SystemLiteral \" or ' expected";
373 case XML_ERR_LITERAL_NOT_FINISHED:
374 errmsg = "Unfinished System or Public ID \" or ' expected";
376 case XML_ERR_MISPLACED_CDATA_END:
377 errmsg = "Sequence ']]>' not allowed in content";
379 case XML_ERR_URI_REQUIRED:
380 errmsg = "SYSTEM or PUBLIC, the URI is missing";
382 case XML_ERR_PUBID_REQUIRED:
383 errmsg = "PUBLIC, the Public Identifier is missing";
385 case XML_ERR_HYPHEN_IN_COMMENT:
386 errmsg = "Comment must not contain '--' (double-hyphen)";
388 case XML_ERR_PI_NOT_STARTED:
389 errmsg = "xmlParsePI : no target name";
391 case XML_ERR_RESERVED_XML_NAME:
392 errmsg = "Invalid PI name";
394 case XML_ERR_NOTATION_NOT_STARTED:
395 errmsg = "NOTATION: Name expected here";
397 case XML_ERR_NOTATION_NOT_FINISHED:
398 errmsg = "'>' required to close NOTATION declaration";
400 case XML_ERR_VALUE_REQUIRED:
401 errmsg = "Entity value required";
403 case XML_ERR_URI_FRAGMENT:
404 errmsg = "Fragment not allowed";
406 case XML_ERR_ATTLIST_NOT_STARTED:
407 errmsg = "'(' required to start ATTLIST enumeration";
409 case XML_ERR_NMTOKEN_REQUIRED:
410 errmsg = "NmToken expected in ATTLIST enumeration";
412 case XML_ERR_ATTLIST_NOT_FINISHED:
413 errmsg = "')' required to finish ATTLIST enumeration";
415 case XML_ERR_MIXED_NOT_STARTED:
416 errmsg = "MixedContentDecl : '|' or ')*' expected";
418 case XML_ERR_PCDATA_REQUIRED:
419 errmsg = "MixedContentDecl : '#PCDATA' expected";
421 case XML_ERR_ELEMCONTENT_NOT_STARTED:
422 errmsg = "ContentDecl : Name or '(' expected";
424 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
425 errmsg = "ContentDecl : ',' '|' or ')' expected";
427 case XML_ERR_PEREF_IN_INT_SUBSET:
429 "PEReference: forbidden within markup decl in internal subset";
431 case XML_ERR_GT_REQUIRED:
432 errmsg = "expected '>'";
434 case XML_ERR_CONDSEC_INVALID:
435 errmsg = "XML conditional section '[' expected";
437 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
438 errmsg = "Content error in the external subset";
440 case XML_ERR_CONDSEC_INVALID_KEYWORD:
442 "conditional section INCLUDE or IGNORE keyword expected";
444 case XML_ERR_CONDSEC_NOT_FINISHED:
445 errmsg = "XML conditional section not closed";
447 case XML_ERR_XMLDECL_NOT_STARTED:
448 errmsg = "Text declaration '<?xml' required";
450 case XML_ERR_XMLDECL_NOT_FINISHED:
451 errmsg = "parsing XML declaration: '?>' expected";
453 case XML_ERR_EXT_ENTITY_STANDALONE:
454 errmsg = "external parsed entities cannot be standalone";
456 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
457 errmsg = "EntityRef: expecting ';'";
459 case XML_ERR_DOCTYPE_NOT_FINISHED:
460 errmsg = "DOCTYPE improperly terminated";
462 case XML_ERR_LTSLASH_REQUIRED:
463 errmsg = "EndTag: '</' not found";
465 case XML_ERR_EQUAL_REQUIRED:
466 errmsg = "expected '='";
468 case XML_ERR_STRING_NOT_CLOSED:
469 errmsg = "String not closed expecting \" or '";
471 case XML_ERR_STRING_NOT_STARTED:
472 errmsg = "String not started expecting ' or \"";
474 case XML_ERR_ENCODING_NAME:
475 errmsg = "Invalid XML encoding name";
477 case XML_ERR_STANDALONE_VALUE:
478 errmsg = "standalone accepts only 'yes' or 'no'";
480 case XML_ERR_DOCUMENT_EMPTY:
481 errmsg = "Document is empty";
483 case XML_ERR_DOCUMENT_END:
484 errmsg = "Extra content at the end of the document";
486 case XML_ERR_NOT_WELL_BALANCED:
487 errmsg = "chunk is not well balanced";
489 case XML_ERR_EXTRA_CONTENT:
490 errmsg = "extra content at the end of well balanced chunk";
492 case XML_ERR_VERSION_MISSING:
493 errmsg = "Malformed declaration expecting version";
495 case XML_ERR_NAME_TOO_LONG:
496 errmsg = "Name too long use XML_PARSE_HUGE option";
504 errmsg = "Unregistered error message";
507 snprintf(errstr, 128, "%s\n", errmsg);
509 snprintf(errstr, 128, "%s: %%s\n", errmsg);
512 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
513 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
516 ctxt->wellFormed = 0;
517 if (ctxt->recovery == 0)
518 ctxt->disableSAX = 1;
524 * @ctxt: an XML parser context
525 * @error: the error number
526 * @msg: the error message
528 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
531 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
534 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
535 (ctxt->instate == XML_PARSER_EOF))
539 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
540 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
542 ctxt->wellFormed = 0;
543 if (ctxt->recovery == 0)
544 ctxt->disableSAX = 1;
550 * @ctxt: an XML parser context
551 * @error: the error number
552 * @msg: the error message
559 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
560 const char *msg, const xmlChar *str1, const xmlChar *str2)
562 xmlStructuredErrorFunc schannel = NULL;
564 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
565 (ctxt->instate == XML_PARSER_EOF))
567 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
568 (ctxt->sax->initialized == XML_SAX2_MAGIC))
569 schannel = ctxt->sax->serror;
571 __xmlRaiseError(schannel,
572 (ctxt->sax) ? ctxt->sax->warning : NULL,
574 ctxt, NULL, XML_FROM_PARSER, error,
575 XML_ERR_WARNING, NULL, 0,
576 (const char *) str1, (const char *) str2, NULL, 0, 0,
577 msg, (const char *) str1, (const char *) str2);
579 __xmlRaiseError(schannel, NULL, NULL,
580 ctxt, NULL, XML_FROM_PARSER, error,
581 XML_ERR_WARNING, NULL, 0,
582 (const char *) str1, (const char *) str2, NULL, 0, 0,
583 msg, (const char *) str1, (const char *) str2);
589 * @ctxt: an XML parser context
590 * @error: the error number
591 * @msg: the error message
594 * Handle a validity error.
597 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
598 const char *msg, const xmlChar *str1, const xmlChar *str2)
600 xmlStructuredErrorFunc schannel = NULL;
602 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
603 (ctxt->instate == XML_PARSER_EOF))
607 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
608 schannel = ctxt->sax->serror;
611 __xmlRaiseError(schannel,
612 ctxt->vctxt.error, ctxt->vctxt.userData,
613 ctxt, NULL, XML_FROM_DTD, error,
614 XML_ERR_ERROR, NULL, 0, (const char *) str1,
615 (const char *) str2, NULL, 0, 0,
616 msg, (const char *) str1, (const char *) str2);
619 __xmlRaiseError(schannel, NULL, NULL,
620 ctxt, NULL, XML_FROM_DTD, error,
621 XML_ERR_ERROR, NULL, 0, (const char *) str1,
622 (const char *) str2, NULL, 0, 0,
623 msg, (const char *) str1, (const char *) str2);
629 * @ctxt: an XML parser context
630 * @error: the error number
631 * @msg: the error message
632 * @val: an integer value
634 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
637 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
638 const char *msg, int val)
640 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
641 (ctxt->instate == XML_PARSER_EOF))
645 __xmlRaiseError(NULL, NULL, NULL,
646 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
647 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
649 ctxt->wellFormed = 0;
650 if (ctxt->recovery == 0)
651 ctxt->disableSAX = 1;
656 * xmlFatalErrMsgStrIntStr:
657 * @ctxt: an XML parser context
658 * @error: the error number
659 * @msg: the error message
660 * @str1: an string info
661 * @val: an integer value
662 * @str2: an string info
664 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
667 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
668 const char *msg, const xmlChar *str1, int val,
671 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672 (ctxt->instate == XML_PARSER_EOF))
676 __xmlRaiseError(NULL, NULL, NULL,
677 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
678 NULL, 0, (const char *) str1, (const char *) str2,
679 NULL, val, 0, msg, str1, val, str2);
681 ctxt->wellFormed = 0;
682 if (ctxt->recovery == 0)
683 ctxt->disableSAX = 1;
689 * @ctxt: an XML parser context
690 * @error: the error number
691 * @msg: the error message
692 * @val: a string value
694 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
697 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
698 const char *msg, const xmlChar * val)
700 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
701 (ctxt->instate == XML_PARSER_EOF))
705 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
706 XML_FROM_PARSER, error, XML_ERR_FATAL,
707 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
710 ctxt->wellFormed = 0;
711 if (ctxt->recovery == 0)
712 ctxt->disableSAX = 1;
718 * @ctxt: an XML parser context
719 * @error: the error number
720 * @msg: the error message
721 * @val: a string value
723 * Handle a non fatal parser error
726 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
727 const char *msg, const xmlChar * val)
729 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
730 (ctxt->instate == XML_PARSER_EOF))
734 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
735 XML_FROM_PARSER, error, XML_ERR_ERROR,
736 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
742 * @ctxt: an XML parser context
743 * @error: the error number
745 * @info1: extra information string
746 * @info2: extra information string
748 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
751 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
753 const xmlChar * info1, const xmlChar * info2,
754 const xmlChar * info3)
756 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
757 (ctxt->instate == XML_PARSER_EOF))
761 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
762 XML_ERR_ERROR, NULL, 0, (const char *) info1,
763 (const char *) info2, (const char *) info3, 0, 0, msg,
764 info1, info2, info3);
766 ctxt->nsWellFormed = 0;
771 * @ctxt: an XML parser context
772 * @error: the error number
774 * @info1: extra information string
775 * @info2: extra information string
777 * Handle a namespace warning error
780 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
782 const xmlChar * info1, const xmlChar * info2,
783 const xmlChar * info3)
785 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
786 (ctxt->instate == XML_PARSER_EOF))
788 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
789 XML_ERR_WARNING, NULL, 0, (const char *) info1,
790 (const char *) info2, (const char *) info3, 0, 0, msg,
791 info1, info2, info3);
794 /************************************************************************
796 * Library wide options *
798 ************************************************************************/
802 * @feature: the feature to be examined
804 * Examines if the library has been compiled with a given feature.
806 * Returns a non-zero value if the feature exist, otherwise zero.
807 * Returns zero (0) if the feature does not exist or an unknown
808 * unknown feature is requested, non-zero otherwise.
811 xmlHasFeature(xmlFeature feature)
814 case XML_WITH_THREAD:
815 #ifdef LIBXML_THREAD_ENABLED
821 #ifdef LIBXML_TREE_ENABLED
826 case XML_WITH_OUTPUT:
827 #ifdef LIBXML_OUTPUT_ENABLED
833 #ifdef LIBXML_PUSH_ENABLED
838 case XML_WITH_READER:
839 #ifdef LIBXML_READER_ENABLED
844 case XML_WITH_PATTERN:
845 #ifdef LIBXML_PATTERN_ENABLED
850 case XML_WITH_WRITER:
851 #ifdef LIBXML_WRITER_ENABLED
857 #ifdef LIBXML_SAX1_ENABLED
863 #ifdef LIBXML_FTP_ENABLED
869 #ifdef LIBXML_HTTP_ENABLED
875 #ifdef LIBXML_VALID_ENABLED
881 #ifdef LIBXML_HTML_ENABLED
886 case XML_WITH_LEGACY:
887 #ifdef LIBXML_LEGACY_ENABLED
893 #ifdef LIBXML_C14N_ENABLED
898 case XML_WITH_CATALOG:
899 #ifdef LIBXML_CATALOG_ENABLED
905 #ifdef LIBXML_XPATH_ENABLED
911 #ifdef LIBXML_XPTR_ENABLED
916 case XML_WITH_XINCLUDE:
917 #ifdef LIBXML_XINCLUDE_ENABLED
923 #ifdef LIBXML_ICONV_ENABLED
928 case XML_WITH_ISO8859X:
929 #ifdef LIBXML_ISO8859X_ENABLED
934 case XML_WITH_UNICODE:
935 #ifdef LIBXML_UNICODE_ENABLED
940 case XML_WITH_REGEXP:
941 #ifdef LIBXML_REGEXP_ENABLED
946 case XML_WITH_AUTOMATA:
947 #ifdef LIBXML_AUTOMATA_ENABLED
953 #ifdef LIBXML_EXPR_ENABLED
958 case XML_WITH_SCHEMAS:
959 #ifdef LIBXML_SCHEMAS_ENABLED
964 case XML_WITH_SCHEMATRON:
965 #ifdef LIBXML_SCHEMATRON_ENABLED
970 case XML_WITH_MODULES:
971 #ifdef LIBXML_MODULES_ENABLED
977 #ifdef LIBXML_DEBUG_ENABLED
982 case XML_WITH_DEBUG_MEM:
983 #ifdef DEBUG_MEMORY_LOCATION
988 case XML_WITH_DEBUG_RUN:
989 #ifdef LIBXML_DEBUG_RUNTIME
995 #ifdef LIBXML_ZLIB_ENABLED
1001 #ifdef LIBXML_LZMA_ENABLED
1007 #ifdef LIBXML_ICU_ENABLED
1018 /************************************************************************
1020 * SAX2 defaulted attributes handling *
1022 ************************************************************************/
1026 * @ctxt: an XML parser context
1028 * Do the SAX2 detection and specific intialization
1031 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1032 if (ctxt == NULL) return;
1033 #ifdef LIBXML_SAX1_ENABLED
1034 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1035 ((ctxt->sax->startElementNs != NULL) ||
1036 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1039 #endif /* LIBXML_SAX1_ENABLED */
1041 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1042 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1043 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1044 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1045 (ctxt->str_xml_ns == NULL)) {
1046 xmlErrMemory(ctxt, NULL);
1050 typedef struct _xmlDefAttrs xmlDefAttrs;
1051 typedef xmlDefAttrs *xmlDefAttrsPtr;
1052 struct _xmlDefAttrs {
1053 int nbAttrs; /* number of defaulted attributes on that element */
1054 int maxAttrs; /* the size of the array */
1055 const xmlChar *values[5]; /* array of localname/prefix/values/external */
1059 * xmlAttrNormalizeSpace:
1060 * @src: the source string
1061 * @dst: the target string
1063 * Normalize the space in non CDATA attribute values:
1064 * If the attribute type is not CDATA, then the XML processor MUST further
1065 * process the normalized attribute value by discarding any leading and
1066 * trailing space (#x20) characters, and by replacing sequences of space
1067 * (#x20) characters by a single space (#x20) character.
1068 * Note that the size of dst need to be at least src, and if one doesn't need
1069 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1070 * passing src as dst is just fine.
1072 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1076 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1078 if ((src == NULL) || (dst == NULL))
1081 while (*src == 0x20) src++;
1084 while (*src == 0x20) src++;
1098 * xmlAttrNormalizeSpace2:
1099 * @src: the source string
1101 * Normalize the space in non CDATA attribute values, a slightly more complex
1102 * front end to avoid allocation problems when running on attribute values
1103 * coming from the input.
1105 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1108 static const xmlChar *
1109 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1112 int remove_head = 0;
1113 int need_realloc = 0;
1116 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1123 while (*cur == 0x20) {
1130 if ((*cur == 0x20) || (*cur == 0)) {
1140 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1142 xmlErrMemory(ctxt, NULL);
1145 xmlAttrNormalizeSpace(ret, ret);
1146 *len = (int) strlen((const char *)ret);
1148 } else if (remove_head) {
1149 *len -= remove_head;
1150 memmove(src, src + remove_head, 1 + *len);
1158 * @ctxt: an XML parser context
1159 * @fullname: the element fullname
1160 * @fullattr: the attribute fullname
1161 * @value: the attribute value
1163 * Add a defaulted attribute for an element
1166 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1167 const xmlChar *fullname,
1168 const xmlChar *fullattr,
1169 const xmlChar *value) {
1170 xmlDefAttrsPtr defaults;
1172 const xmlChar *name;
1173 const xmlChar *prefix;
1176 * Allows to detect attribute redefinitions
1178 if (ctxt->attsSpecial != NULL) {
1179 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1183 if (ctxt->attsDefault == NULL) {
1184 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1185 if (ctxt->attsDefault == NULL)
1190 * split the element name into prefix:localname , the string found
1191 * are within the DTD and then not associated to namespace names.
1193 name = xmlSplitQName3(fullname, &len);
1195 name = xmlDictLookup(ctxt->dict, fullname, -1);
1198 name = xmlDictLookup(ctxt->dict, name, -1);
1199 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1203 * make sure there is some storage
1205 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1206 if (defaults == NULL) {
1207 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1208 (4 * 5) * sizeof(const xmlChar *));
1209 if (defaults == NULL)
1211 defaults->nbAttrs = 0;
1212 defaults->maxAttrs = 4;
1213 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1214 defaults, NULL) < 0) {
1218 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1219 xmlDefAttrsPtr temp;
1221 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1222 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1226 defaults->maxAttrs *= 2;
1227 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1228 defaults, NULL) < 0) {
1235 * Split the element name into prefix:localname , the string found
1236 * are within the DTD and hen not associated to namespace names.
1238 name = xmlSplitQName3(fullattr, &len);
1240 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1243 name = xmlDictLookup(ctxt->dict, name, -1);
1244 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1247 defaults->values[5 * defaults->nbAttrs] = name;
1248 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1249 /* intern the string and precompute the end */
1250 len = xmlStrlen(value);
1251 value = xmlDictLookup(ctxt->dict, value, len);
1252 defaults->values[5 * defaults->nbAttrs + 2] = value;
1253 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1255 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1257 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1258 defaults->nbAttrs++;
1263 xmlErrMemory(ctxt, NULL);
1268 * xmlAddSpecialAttr:
1269 * @ctxt: an XML parser context
1270 * @fullname: the element fullname
1271 * @fullattr: the attribute fullname
1272 * @type: the attribute type
1274 * Register this attribute type
1277 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1278 const xmlChar *fullname,
1279 const xmlChar *fullattr,
1282 if (ctxt->attsSpecial == NULL) {
1283 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1284 if (ctxt->attsSpecial == NULL)
1288 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1291 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1292 (void *) (long) type);
1296 xmlErrMemory(ctxt, NULL);
1301 * xmlCleanSpecialAttrCallback:
1303 * Removes CDATA attributes from the special attribute table
1306 xmlCleanSpecialAttrCallback(void *payload, void *data,
1307 const xmlChar *fullname, const xmlChar *fullattr,
1308 const xmlChar *unused ATTRIBUTE_UNUSED) {
1309 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1311 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1312 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1317 * xmlCleanSpecialAttr:
1318 * @ctxt: an XML parser context
1320 * Trim the list of attributes defined to remove all those of type
1321 * CDATA as they are not special. This call should be done when finishing
1322 * to parse the DTD and before starting to parse the document root.
1325 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1327 if (ctxt->attsSpecial == NULL)
1330 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1332 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1333 xmlHashFree(ctxt->attsSpecial, NULL);
1334 ctxt->attsSpecial = NULL;
1340 * xmlCheckLanguageID:
1341 * @lang: pointer to the string value
1343 * Checks that the value conforms to the LanguageID production:
1345 * NOTE: this is somewhat deprecated, those productions were removed from
1346 * the XML Second edition.
1348 * [33] LanguageID ::= Langcode ('-' Subcode)*
1349 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1350 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1351 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1352 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1353 * [38] Subcode ::= ([a-z] | [A-Z])+
1355 * The current REC reference the sucessors of RFC 1766, currently 5646
1357 * http://www.rfc-editor.org/rfc/rfc5646.txt
1358 * langtag = language
1364 * language = 2*3ALPHA ; shortest ISO 639 code
1365 * ["-" extlang] ; sometimes followed by
1366 * ; extended language subtags
1367 * / 4ALPHA ; or reserved for future use
1368 * / 5*8ALPHA ; or registered language subtag
1370 * extlang = 3ALPHA ; selected ISO 639 codes
1371 * *2("-" 3ALPHA) ; permanently reserved
1373 * script = 4ALPHA ; ISO 15924 code
1375 * region = 2ALPHA ; ISO 3166-1 code
1376 * / 3DIGIT ; UN M.49 code
1378 * variant = 5*8alphanum ; registered variants
1379 * / (DIGIT 3alphanum)
1381 * extension = singleton 1*("-" (2*8alphanum))
1383 * ; Single alphanumerics
1384 * ; "x" reserved for private use
1385 * singleton = DIGIT ; 0 - 9
1391 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1392 * The parser below doesn't try to cope with extension or privateuse
1393 * that could be added but that's not interoperable anyway
1395 * Returns 1 if correct 0 otherwise
1398 xmlCheckLanguageID(const xmlChar * lang)
1400 const xmlChar *cur = lang, *nxt;
1404 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1405 ((cur[0] == 'I') && (cur[1] == '-')) ||
1406 ((cur[0] == 'x') && (cur[1] == '-')) ||
1407 ((cur[0] == 'X') && (cur[1] == '-'))) {
1409 * Still allow IANA code and user code which were coming
1410 * from the previous version of the XML-1.0 specification
1411 * it's deprecated but we should not fail
1414 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1415 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1417 return(cur[0] == 0);
1420 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1421 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1423 if (nxt - cur >= 4) {
1427 if ((nxt - cur > 8) || (nxt[0] != 0))
1433 /* we got an ISO 639 code */
1441 /* now we can have extlang or script or region or variant */
1442 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1445 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1446 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1452 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1456 /* we parsed an extlang */
1464 /* now we can have script or region or variant */
1465 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1468 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1469 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1473 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1477 /* we parsed a script */
1486 /* now we can have region or variant */
1487 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1490 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1491 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1494 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1498 /* we parsed a region */
1507 /* now we can just have a variant */
1508 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1512 if ((nxt - cur < 5) || (nxt - cur > 8))
1515 /* we parsed a variant */
1521 /* extensions and private use subtags not checked */
1525 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1526 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1533 /************************************************************************
1535 * Parser stacks related functions and macros *
1537 ************************************************************************/
1539 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1540 const xmlChar ** str);
1545 * @ctxt: an XML parser context
1546 * @prefix: the namespace prefix or NULL
1547 * @URL: the namespace name
1549 * Pushes a new parser namespace on top of the ns stack
1551 * Returns -1 in case of error, -2 if the namespace should be discarded
1552 * and the index in the stack otherwise.
1555 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1557 if (ctxt->options & XML_PARSE_NSCLEAN) {
1559 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1560 if (ctxt->nsTab[i] == prefix) {
1562 if (ctxt->nsTab[i + 1] == URL)
1564 /* out of scope keep it */
1569 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1572 ctxt->nsTab = (const xmlChar **)
1573 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1574 if (ctxt->nsTab == NULL) {
1575 xmlErrMemory(ctxt, NULL);
1579 } else if (ctxt->nsNr >= ctxt->nsMax) {
1580 const xmlChar ** tmp;
1582 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1583 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1585 xmlErrMemory(ctxt, NULL);
1591 ctxt->nsTab[ctxt->nsNr++] = prefix;
1592 ctxt->nsTab[ctxt->nsNr++] = URL;
1593 return (ctxt->nsNr);
1597 * @ctxt: an XML parser context
1598 * @nr: the number to pop
1600 * Pops the top @nr parser prefix/namespace from the ns stack
1602 * Returns the number of namespaces removed
1605 nsPop(xmlParserCtxtPtr ctxt, int nr)
1609 if (ctxt->nsTab == NULL) return(0);
1610 if (ctxt->nsNr < nr) {
1611 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1614 if (ctxt->nsNr <= 0)
1617 for (i = 0;i < nr;i++) {
1619 ctxt->nsTab[ctxt->nsNr] = NULL;
1626 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1627 const xmlChar **atts;
1631 if (ctxt->atts == NULL) {
1632 maxatts = 55; /* allow for 10 attrs by default */
1633 atts = (const xmlChar **)
1634 xmlMalloc(maxatts * sizeof(xmlChar *));
1635 if (atts == NULL) goto mem_error;
1637 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1638 if (attallocs == NULL) goto mem_error;
1639 ctxt->attallocs = attallocs;
1640 ctxt->maxatts = maxatts;
1641 } else if (nr + 5 > ctxt->maxatts) {
1642 maxatts = (nr + 5) * 2;
1643 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1644 maxatts * sizeof(const xmlChar *));
1645 if (atts == NULL) goto mem_error;
1647 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1648 (maxatts / 5) * sizeof(int));
1649 if (attallocs == NULL) goto mem_error;
1650 ctxt->attallocs = attallocs;
1651 ctxt->maxatts = maxatts;
1653 return(ctxt->maxatts);
1655 xmlErrMemory(ctxt, NULL);
1661 * @ctxt: an XML parser context
1662 * @value: the parser input
1664 * Pushes a new parser input on top of the input stack
1666 * Returns -1 in case of error, the index in the stack otherwise
1669 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1671 if ((ctxt == NULL) || (value == NULL))
1673 if (ctxt->inputNr >= ctxt->inputMax) {
1674 ctxt->inputMax *= 2;
1676 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1678 sizeof(ctxt->inputTab[0]));
1679 if (ctxt->inputTab == NULL) {
1680 xmlErrMemory(ctxt, NULL);
1681 xmlFreeInputStream(value);
1682 ctxt->inputMax /= 2;
1687 ctxt->inputTab[ctxt->inputNr] = value;
1688 ctxt->input = value;
1689 return (ctxt->inputNr++);
1693 * @ctxt: an XML parser context
1695 * Pops the top parser input from the input stack
1697 * Returns the input just removed
1700 inputPop(xmlParserCtxtPtr ctxt)
1702 xmlParserInputPtr ret;
1706 if (ctxt->inputNr <= 0)
1709 if (ctxt->inputNr > 0)
1710 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1713 ret = ctxt->inputTab[ctxt->inputNr];
1714 ctxt->inputTab[ctxt->inputNr] = NULL;
1719 * @ctxt: an XML parser context
1720 * @value: the element node
1722 * Pushes a new element node on top of the node stack
1724 * Returns -1 in case of error, the index in the stack otherwise
1727 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1729 if (ctxt == NULL) return(0);
1730 if (ctxt->nodeNr >= ctxt->nodeMax) {
1733 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1735 sizeof(ctxt->nodeTab[0]));
1737 xmlErrMemory(ctxt, NULL);
1740 ctxt->nodeTab = tmp;
1743 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1744 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1745 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1746 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1748 ctxt->instate = XML_PARSER_EOF;
1751 ctxt->nodeTab[ctxt->nodeNr] = value;
1753 return (ctxt->nodeNr++);
1758 * @ctxt: an XML parser context
1760 * Pops the top element node from the node stack
1762 * Returns the node just removed
1765 nodePop(xmlParserCtxtPtr ctxt)
1769 if (ctxt == NULL) return(NULL);
1770 if (ctxt->nodeNr <= 0)
1773 if (ctxt->nodeNr > 0)
1774 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1777 ret = ctxt->nodeTab[ctxt->nodeNr];
1778 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1782 #ifdef LIBXML_PUSH_ENABLED
1785 * @ctxt: an XML parser context
1786 * @value: the element name
1787 * @prefix: the element prefix
1788 * @URI: the element namespace name
1790 * Pushes a new element name/prefix/URL on top of the name stack
1792 * Returns -1 in case of error, the index in the stack otherwise
1795 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1796 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1798 if (ctxt->nameNr >= ctxt->nameMax) {
1799 const xmlChar * *tmp;
1802 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1804 sizeof(ctxt->nameTab[0]));
1809 ctxt->nameTab = tmp;
1810 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1812 sizeof(ctxt->pushTab[0]));
1817 ctxt->pushTab = tmp2;
1819 ctxt->nameTab[ctxt->nameNr] = value;
1821 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1822 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1823 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1824 return (ctxt->nameNr++);
1826 xmlErrMemory(ctxt, NULL);
1831 * @ctxt: an XML parser context
1833 * Pops the top element/prefix/URI name from the name stack
1835 * Returns the name just removed
1837 static const xmlChar *
1838 nameNsPop(xmlParserCtxtPtr ctxt)
1842 if (ctxt->nameNr <= 0)
1845 if (ctxt->nameNr > 0)
1846 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1849 ret = ctxt->nameTab[ctxt->nameNr];
1850 ctxt->nameTab[ctxt->nameNr] = NULL;
1853 #endif /* LIBXML_PUSH_ENABLED */
1857 * @ctxt: an XML parser context
1858 * @value: the element name
1860 * Pushes a new element name on top of the name stack
1862 * Returns -1 in case of error, the index in the stack otherwise
1865 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1867 if (ctxt == NULL) return (-1);
1869 if (ctxt->nameNr >= ctxt->nameMax) {
1870 const xmlChar * *tmp;
1871 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1873 sizeof(ctxt->nameTab[0]));
1877 ctxt->nameTab = tmp;
1880 ctxt->nameTab[ctxt->nameNr] = value;
1882 return (ctxt->nameNr++);
1884 xmlErrMemory(ctxt, NULL);
1889 * @ctxt: an XML parser context
1891 * Pops the top element name from the name stack
1893 * Returns the name just removed
1896 namePop(xmlParserCtxtPtr ctxt)
1900 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1903 if (ctxt->nameNr > 0)
1904 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1907 ret = ctxt->nameTab[ctxt->nameNr];
1908 ctxt->nameTab[ctxt->nameNr] = NULL;
1912 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1913 if (ctxt->spaceNr >= ctxt->spaceMax) {
1916 ctxt->spaceMax *= 2;
1917 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1918 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1920 xmlErrMemory(ctxt, NULL);
1924 ctxt->spaceTab = tmp;
1926 ctxt->spaceTab[ctxt->spaceNr] = val;
1927 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1928 return(ctxt->spaceNr++);
1931 static int spacePop(xmlParserCtxtPtr ctxt) {
1933 if (ctxt->spaceNr <= 0) return(0);
1935 if (ctxt->spaceNr > 0)
1936 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1938 ctxt->space = &ctxt->spaceTab[0];
1939 ret = ctxt->spaceTab[ctxt->spaceNr];
1940 ctxt->spaceTab[ctxt->spaceNr] = -1;
1945 * Macros for accessing the content. Those should be used only by the parser,
1948 * Dirty macros, i.e. one often need to make assumption on the context to
1951 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1952 * To be used with extreme caution since operations consuming
1953 * characters may move the input buffer to a different location !
1954 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1955 * This should be used internally by the parser
1956 * only to compare to ASCII values otherwise it would break when
1957 * running with UTF-8 encoding.
1958 * RAW same as CUR but in the input buffer, bypass any token
1959 * extraction that may have been done
1960 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1961 * to compare on ASCII based substring.
1962 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1963 * strings without newlines within the parser.
1964 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1965 * defined char within the parser.
1966 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1968 * NEXT Skip to the next character, this does the proper decoding
1969 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1970 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1971 * CUR_CHAR(l) returns the current unicode character (int), set l
1972 * to the number of xmlChars used for the encoding [0-5].
1973 * CUR_SCHAR same but operate on a string instead of the context
1974 * COPY_BUF copy the current unicode char to the target buffer, increment
1976 * GROW, SHRINK handling of input buffers
1979 #define RAW (*ctxt->input->cur)
1980 #define CUR (*ctxt->input->cur)
1981 #define NXT(val) ctxt->input->cur[(val)]
1982 #define CUR_PTR ctxt->input->cur
1984 #define CMP4( s, c1, c2, c3, c4 ) \
1985 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1986 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1987 #define CMP5( s, c1, c2, c3, c4, c5 ) \
1988 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1989 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1990 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1991 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1992 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1993 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1994 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1995 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1996 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1997 ((unsigned char *) s)[ 8 ] == c9 )
1998 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1999 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2000 ((unsigned char *) s)[ 9 ] == c10 )
2002 #define SKIP(val) do { \
2003 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2004 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2005 if ((*ctxt->input->cur == 0) && \
2006 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2007 xmlPopInput(ctxt); \
2010 #define SKIPL(val) do { \
2012 for(skipl=0; skipl<val; skipl++) { \
2013 if (*(ctxt->input->cur) == '\n') { \
2014 ctxt->input->line++; ctxt->input->col = 1; \
2015 } else ctxt->input->col++; \
2017 ctxt->input->cur++; \
2019 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2020 if ((*ctxt->input->cur == 0) && \
2021 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2022 xmlPopInput(ctxt); \
2025 #define SHRINK if ((ctxt->progressive == 0) && \
2026 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2027 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2030 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2031 xmlParserInputShrink(ctxt->input);
2032 if ((*ctxt->input->cur == 0) &&
2033 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2037 #define GROW if ((ctxt->progressive == 0) && \
2038 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2041 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2042 if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
2043 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
2044 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2045 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2046 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2047 ctxt->instate = XML_PARSER_EOF;
2049 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2050 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2051 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2055 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2057 #define NEXT xmlNextChar(ctxt)
2060 ctxt->input->col++; \
2061 ctxt->input->cur++; \
2063 if (*ctxt->input->cur == 0) \
2064 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2067 #define NEXTL(l) do { \
2068 if (*(ctxt->input->cur) == '\n') { \
2069 ctxt->input->line++; ctxt->input->col = 1; \
2070 } else ctxt->input->col++; \
2071 ctxt->input->cur += l; \
2072 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2075 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2076 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2078 #define COPY_BUF(l,b,i,v) \
2079 if (l == 1) b[i++] = (xmlChar) v; \
2080 else i += xmlCopyCharMultiByte(&b[i],v)
2083 * xmlSkipBlankChars:
2084 * @ctxt: the XML parser context
2086 * skip all blanks character found at that point in the input streams.
2087 * It pops up finished entities in the process if allowable at that point.
2089 * Returns the number of space chars skipped
2093 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2097 * It's Okay to use CUR/NEXT here since all the blanks are on
2100 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2103 * if we are in the document content, go really fast
2105 cur = ctxt->input->cur;
2106 while (IS_BLANK_CH(*cur)) {
2108 ctxt->input->line++; ctxt->input->col = 1;
2113 ctxt->input->cur = cur;
2114 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2115 cur = ctxt->input->cur;
2118 ctxt->input->cur = cur;
2123 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2128 while ((cur == 0) && (ctxt->inputNr > 1) &&
2129 (ctxt->instate != XML_PARSER_COMMENT)) {
2134 * Need to handle support of entities branching here
2136 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2137 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2142 /************************************************************************
2144 * Commodity functions to handle entities *
2146 ************************************************************************/
2150 * @ctxt: an XML parser context
2152 * xmlPopInput: the current input pointed by ctxt->input came to an end
2153 * pop it and return the next char.
2155 * Returns the current xmlChar in the parser context
2158 xmlPopInput(xmlParserCtxtPtr ctxt) {
2159 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2160 if (xmlParserDebugEntities)
2161 xmlGenericError(xmlGenericErrorContext,
2162 "Popping input %d\n", ctxt->inputNr);
2163 xmlFreeInputStream(inputPop(ctxt));
2164 if ((*ctxt->input->cur == 0) &&
2165 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2166 return(xmlPopInput(ctxt));
2172 * @ctxt: an XML parser context
2173 * @input: an XML parser input fragment (entity, XML fragment ...).
2175 * xmlPushInput: switch to a new input stream which is stacked on top
2176 * of the previous one(s).
2177 * Returns -1 in case of error or the index in the input stack
2180 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2182 if (input == NULL) return(-1);
2184 if (xmlParserDebugEntities) {
2185 if ((ctxt->input != NULL) && (ctxt->input->filename))
2186 xmlGenericError(xmlGenericErrorContext,
2187 "%s(%d): ", ctxt->input->filename,
2189 xmlGenericError(xmlGenericErrorContext,
2190 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2192 ret = inputPush(ctxt, input);
2193 if (ctxt->instate == XML_PARSER_EOF)
2201 * @ctxt: an XML parser context
2203 * parse Reference declarations
2205 * [66] CharRef ::= '&#' [0-9]+ ';' |
2206 * '&#x' [0-9a-fA-F]+ ';'
2208 * [ WFC: Legal Character ]
2209 * Characters referred to using character references must match the
2210 * production for Char.
2212 * Returns the value parsed (as an int), 0 in case of error
2215 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2216 unsigned int val = 0;
2218 unsigned int outofrange = 0;
2221 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2223 if ((RAW == '&') && (NXT(1) == '#') &&
2227 while (RAW != ';') { /* loop blocked by count */
2231 if (ctxt->instate == XML_PARSER_EOF)
2234 if ((RAW >= '0') && (RAW <= '9'))
2235 val = val * 16 + (CUR - '0');
2236 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2237 val = val * 16 + (CUR - 'a') + 10;
2238 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2239 val = val * 16 + (CUR - 'A') + 10;
2241 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2252 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2257 } else if ((RAW == '&') && (NXT(1) == '#')) {
2260 while (RAW != ';') { /* loop blocked by count */
2264 if (ctxt->instate == XML_PARSER_EOF)
2267 if ((RAW >= '0') && (RAW <= '9'))
2268 val = val * 10 + (CUR - '0');
2270 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2281 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2287 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2291 * [ WFC: Legal Character ]
2292 * Characters referred to using character references must match the
2293 * production for Char.
2295 if ((IS_CHAR(val) && (outofrange == 0))) {
2298 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2299 "xmlParseCharRef: invalid xmlChar value %d\n",
2306 * xmlParseStringCharRef:
2307 * @ctxt: an XML parser context
2308 * @str: a pointer to an index in the string
2310 * parse Reference declarations, variant parsing from a string rather
2311 * than an an input flow.
2313 * [66] CharRef ::= '&#' [0-9]+ ';' |
2314 * '&#x' [0-9a-fA-F]+ ';'
2316 * [ WFC: Legal Character ]
2317 * Characters referred to using character references must match the
2318 * production for Char.
2320 * Returns the value parsed (as an int), 0 in case of error, str will be
2321 * updated to the current value of the index
2324 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2327 unsigned int val = 0;
2328 unsigned int outofrange = 0;
2330 if ((str == NULL) || (*str == NULL)) return(0);
2333 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2336 while (cur != ';') { /* Non input consuming loop */
2337 if ((cur >= '0') && (cur <= '9'))
2338 val = val * 16 + (cur - '0');
2339 else if ((cur >= 'a') && (cur <= 'f'))
2340 val = val * 16 + (cur - 'a') + 10;
2341 else if ((cur >= 'A') && (cur <= 'F'))
2342 val = val * 16 + (cur - 'A') + 10;
2344 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2356 } else if ((cur == '&') && (ptr[1] == '#')){
2359 while (cur != ';') { /* Non input consuming loops */
2360 if ((cur >= '0') && (cur <= '9'))
2361 val = val * 10 + (cur - '0');
2363 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2376 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2382 * [ WFC: Legal Character ]
2383 * Characters referred to using character references must match the
2384 * production for Char.
2386 if ((IS_CHAR(val) && (outofrange == 0))) {
2389 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2397 * xmlNewBlanksWrapperInputStream:
2398 * @ctxt: an XML parser context
2399 * @entity: an Entity pointer
2401 * Create a new input stream for wrapping
2402 * blanks around a PEReference
2404 * Returns the new input stream or NULL
2407 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2409 static xmlParserInputPtr
2410 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2411 xmlParserInputPtr input;
2414 if (entity == NULL) {
2415 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2416 "xmlNewBlanksWrapperInputStream entity\n");
2419 if (xmlParserDebugEntities)
2420 xmlGenericError(xmlGenericErrorContext,
2421 "new blanks wrapper for entity: %s\n", entity->name);
2422 input = xmlNewInputStream(ctxt);
2423 if (input == NULL) {
2426 length = xmlStrlen(entity->name) + 5;
2427 buffer = xmlMallocAtomic(length);
2428 if (buffer == NULL) {
2429 xmlErrMemory(ctxt, NULL);
2435 buffer [length-3] = ';';
2436 buffer [length-2] = ' ';
2437 buffer [length-1] = 0;
2438 memcpy(buffer + 2, entity->name, length - 5);
2439 input->free = deallocblankswrapper;
2440 input->base = buffer;
2441 input->cur = buffer;
2442 input->length = length;
2443 input->end = &buffer[length];
2448 * xmlParserHandlePEReference:
2449 * @ctxt: the parser context
2451 * [69] PEReference ::= '%' Name ';'
2453 * [ WFC: No Recursion ]
2454 * A parsed entity must not contain a recursive
2455 * reference to itself, either directly or indirectly.
2457 * [ WFC: Entity Declared ]
2458 * In a document without any DTD, a document with only an internal DTD
2459 * subset which contains no parameter entity references, or a document
2460 * with "standalone='yes'", ... ... The declaration of a parameter
2461 * entity must precede any reference to it...
2463 * [ VC: Entity Declared ]
2464 * In a document with an external subset or external parameter entities
2465 * with "standalone='no'", ... ... The declaration of a parameter entity
2466 * must precede any reference to it...
2469 * Parameter-entity references may only appear in the DTD.
2470 * NOTE: misleading but this is handled.
2472 * A PEReference may have been detected in the current input stream
2473 * the handling is done accordingly to
2474 * http://www.w3.org/TR/REC-xml#entproc
2476 * - Included in literal in entity values
2477 * - Included as Parameter Entity reference within DTDs
2480 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2481 const xmlChar *name;
2482 xmlEntityPtr entity = NULL;
2483 xmlParserInputPtr input;
2485 if (RAW != '%') return;
2486 switch(ctxt->instate) {
2487 case XML_PARSER_CDATA_SECTION:
2489 case XML_PARSER_COMMENT:
2491 case XML_PARSER_START_TAG:
2493 case XML_PARSER_END_TAG:
2495 case XML_PARSER_EOF:
2496 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2498 case XML_PARSER_PROLOG:
2499 case XML_PARSER_START:
2500 case XML_PARSER_MISC:
2501 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2503 case XML_PARSER_ENTITY_DECL:
2504 case XML_PARSER_CONTENT:
2505 case XML_PARSER_ATTRIBUTE_VALUE:
2507 case XML_PARSER_SYSTEM_LITERAL:
2508 case XML_PARSER_PUBLIC_LITERAL:
2509 /* we just ignore it there */
2511 case XML_PARSER_EPILOG:
2512 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2514 case XML_PARSER_ENTITY_VALUE:
2516 * NOTE: in the case of entity values, we don't do the
2517 * substitution here since we need the literal
2518 * entity value to be able to save the internal
2519 * subset of the document.
2520 * This will be handled by xmlStringDecodeEntities
2523 case XML_PARSER_DTD:
2525 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2526 * In the internal DTD subset, parameter-entity references
2527 * can occur only where markup declarations can occur, not
2528 * within markup declarations.
2529 * In that case this is handled in xmlParseMarkupDecl
2531 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2533 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2536 case XML_PARSER_IGNORE:
2541 name = xmlParseName(ctxt);
2542 if (xmlParserDebugEntities)
2543 xmlGenericError(xmlGenericErrorContext,
2544 "PEReference: %s\n", name);
2546 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2550 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2551 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2552 if (ctxt->instate == XML_PARSER_EOF)
2554 if (entity == NULL) {
2557 * [ WFC: Entity Declared ]
2558 * In a document without any DTD, a document with only an
2559 * internal DTD subset which contains no parameter entity
2560 * references, or a document with "standalone='yes'", ...
2561 * ... The declaration of a parameter entity must precede
2562 * any reference to it...
2564 if ((ctxt->standalone == 1) ||
2565 ((ctxt->hasExternalSubset == 0) &&
2566 (ctxt->hasPErefs == 0))) {
2567 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2568 "PEReference: %%%s; not found\n", name);
2571 * [ VC: Entity Declared ]
2572 * In a document with an external subset or external
2573 * parameter entities with "standalone='no'", ...
2574 * ... The declaration of a parameter entity must precede
2575 * any reference to it...
2577 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2578 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2579 "PEReference: %%%s; not found\n",
2582 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2583 "PEReference: %%%s; not found\n",
2587 } else if (ctxt->input->free != deallocblankswrapper) {
2588 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2589 if (xmlPushInput(ctxt, input) < 0)
2592 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2593 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2595 xmlCharEncoding enc;
2598 * handle the extra spaces added before and after
2599 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2600 * this is done independently.
2602 input = xmlNewEntityInputStream(ctxt, entity);
2603 if (xmlPushInput(ctxt, input) < 0)
2607 * Get the 4 first bytes and decode the charset
2608 * if enc != XML_CHAR_ENCODING_NONE
2609 * plug some encoding conversion routines.
2610 * Note that, since we may have some non-UTF8
2611 * encoding (like UTF16, bug 135229), the 'length'
2612 * is not known, but we can calculate based upon
2613 * the amount of data in the buffer.
2616 if (ctxt->instate == XML_PARSER_EOF)
2618 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2623 enc = xmlDetectCharEncoding(start, 4);
2624 if (enc != XML_CHAR_ENCODING_NONE) {
2625 xmlSwitchEncoding(ctxt, enc);
2629 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2630 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2631 (IS_BLANK_CH(NXT(5)))) {
2632 xmlParseTextDecl(ctxt);
2635 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2636 "PEReference: %s is not a parameter entity\n",
2641 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2647 * Macro used to grow the current buffer.
2648 * buffer##_size is expected to be a size_t
2649 * mem_error: is expected to handle memory allocation failures
2651 #define growBuffer(buffer, n) { \
2653 size_t new_size = buffer##_size * 2 + n; \
2654 if (new_size < buffer##_size) goto mem_error; \
2655 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2656 if (tmp == NULL) goto mem_error; \
2658 buffer##_size = new_size; \
2662 * xmlStringLenDecodeEntities:
2663 * @ctxt: the parser context
2664 * @str: the input string
2665 * @len: the string length
2666 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2667 * @end: an end marker xmlChar, 0 if none
2668 * @end2: an end marker xmlChar, 0 if none
2669 * @end3: an end marker xmlChar, 0 if none
2671 * Takes a entity string content and process to do the adequate substitutions.
2673 * [67] Reference ::= EntityRef | CharRef
2675 * [69] PEReference ::= '%' Name ';'
2677 * Returns A newly allocated string with the substitution done. The caller
2678 * must deallocate it !
2681 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2682 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2683 xmlChar *buffer = NULL;
2684 size_t buffer_size = 0;
2687 xmlChar *current = NULL;
2688 xmlChar *rep = NULL;
2689 const xmlChar *last;
2693 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2697 if (((ctxt->depth > 40) &&
2698 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2699 (ctxt->depth > 1024)) {
2700 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2705 * allocate a translation buffer.
2707 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2708 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2709 if (buffer == NULL) goto mem_error;
2712 * OK loop until we reach one of the ending char or a size limit.
2713 * we are operating on already parsed values.
2716 c = CUR_SCHAR(str, l);
2719 while ((c != 0) && (c != end) && /* non input consuming loop */
2720 (c != end2) && (c != end3)) {
2723 if ((c == '&') && (str[1] == '#')) {
2724 int val = xmlParseStringCharRef(ctxt, &str);
2726 COPY_BUF(0,buffer,nbchars,val);
2728 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2729 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2731 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2732 if (xmlParserDebugEntities)
2733 xmlGenericError(xmlGenericErrorContext,
2734 "String decoding Entity Reference: %.30s\n",
2736 ent = xmlParseStringEntityRef(ctxt, &str);
2737 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2738 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2741 ctxt->nbentities += ent->checked / 2;
2742 if ((ent != NULL) &&
2743 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2744 if (ent->content != NULL) {
2745 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2746 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2747 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2750 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2751 "predefined entity has no content\n");
2753 } else if ((ent != NULL) && (ent->content != NULL)) {
2755 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2761 while (*current != 0) { /* non input consuming loop */
2762 buffer[nbchars++] = *current++;
2763 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2764 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2766 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2772 } else if (ent != NULL) {
2773 int i = xmlStrlen(ent->name);
2774 const xmlChar *cur = ent->name;
2776 buffer[nbchars++] = '&';
2777 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2778 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2781 buffer[nbchars++] = *cur++;
2782 buffer[nbchars++] = ';';
2784 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2785 if (xmlParserDebugEntities)
2786 xmlGenericError(xmlGenericErrorContext,
2787 "String decoding PE Reference: %.30s\n", str);
2788 ent = xmlParseStringPEReference(ctxt, &str);
2789 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2792 ctxt->nbentities += ent->checked / 2;
2794 if (ent->content == NULL) {
2795 xmlLoadEntityContent(ctxt, ent);
2798 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2803 while (*current != 0) { /* non input consuming loop */
2804 buffer[nbchars++] = *current++;
2805 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2806 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2808 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2816 COPY_BUF(l,buffer,nbchars,c);
2818 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2819 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2823 c = CUR_SCHAR(str, l);
2827 buffer[nbchars] = 0;
2831 xmlErrMemory(ctxt, NULL);
2841 * xmlStringDecodeEntities:
2842 * @ctxt: the parser context
2843 * @str: the input string
2844 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2845 * @end: an end marker xmlChar, 0 if none
2846 * @end2: an end marker xmlChar, 0 if none
2847 * @end3: an end marker xmlChar, 0 if none
2849 * Takes a entity string content and process to do the adequate substitutions.
2851 * [67] Reference ::= EntityRef | CharRef
2853 * [69] PEReference ::= '%' Name ';'
2855 * Returns A newly allocated string with the substitution done. The caller
2856 * must deallocate it !
2859 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2860 xmlChar end, xmlChar end2, xmlChar end3) {
2861 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2862 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2866 /************************************************************************
2868 * Commodity functions, cleanup needed ? *
2870 ************************************************************************/
2874 * @ctxt: an XML parser context
2876 * @len: the size of @str
2877 * @blank_chars: we know the chars are blanks
2879 * Is this a sequence of blank chars that one can ignore ?
2881 * Returns 1 if ignorable 0 otherwise.
2884 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2887 xmlNodePtr lastChild;
2890 * Don't spend time trying to differentiate them, the same callback is
2893 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2897 * Check for xml:space value.
2899 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2900 (*(ctxt->space) == -2))
2904 * Check that the string is made of blanks
2906 if (blank_chars == 0) {
2907 for (i = 0;i < len;i++)
2908 if (!(IS_BLANK_CH(str[i]))) return(0);
2912 * Look if the element is mixed content in the DTD if available
2914 if (ctxt->node == NULL) return(0);
2915 if (ctxt->myDoc != NULL) {
2916 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2917 if (ret == 0) return(1);
2918 if (ret == 1) return(0);
2922 * Otherwise, heuristic :-\
2924 if ((RAW != '<') && (RAW != 0xD)) return(0);
2925 if ((ctxt->node->children == NULL) &&
2926 (RAW == '<') && (NXT(1) == '/')) return(0);
2928 lastChild = xmlGetLastChild(ctxt->node);
2929 if (lastChild == NULL) {
2930 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2931 (ctxt->node->content != NULL)) return(0);
2932 } else if (xmlNodeIsText(lastChild))
2934 else if ((ctxt->node->children != NULL) &&
2935 (xmlNodeIsText(ctxt->node->children)))
2940 /************************************************************************
2942 * Extra stuff for namespace support *
2943 * Relates to http://www.w3.org/TR/WD-xml-names *
2945 ************************************************************************/
2949 * @ctxt: an XML parser context
2950 * @name: an XML parser context
2951 * @prefix: a xmlChar **
2953 * parse an UTF8 encoded XML qualified name string
2955 * [NS 5] QName ::= (Prefix ':')? LocalPart
2957 * [NS 6] Prefix ::= NCName
2959 * [NS 7] LocalPart ::= NCName
2961 * Returns the local part, and prefix is updated
2962 * to get the Prefix if any.
2966 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2967 xmlChar buf[XML_MAX_NAMELEN + 5];
2968 xmlChar *buffer = NULL;
2970 int max = XML_MAX_NAMELEN;
2971 xmlChar *ret = NULL;
2972 const xmlChar *cur = name;
2975 if (prefix == NULL) return(NULL);
2978 if (cur == NULL) return(NULL);
2980 #ifndef XML_XML_NAMESPACE
2981 /* xml: prefix is not really a namespace */
2982 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2983 (cur[2] == 'l') && (cur[3] == ':'))
2984 return(xmlStrdup(name));
2987 /* nasty but well=formed */
2989 return(xmlStrdup(name));
2992 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2998 * Okay someone managed to make a huge name, so he's ready to pay
2999 * for the processing speed.
3003 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3004 if (buffer == NULL) {
3005 xmlErrMemory(ctxt, NULL);
3008 memcpy(buffer, buf, len);
3009 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3010 if (len + 10 > max) {
3014 tmp = (xmlChar *) xmlRealloc(buffer,
3015 max * sizeof(xmlChar));
3018 xmlErrMemory(ctxt, NULL);
3029 if ((c == ':') && (*cur == 0)) {
3033 return(xmlStrdup(name));
3037 ret = xmlStrndup(buf, len);
3041 max = XML_MAX_NAMELEN;
3049 return(xmlStrndup(BAD_CAST "", 0));
3054 * Check that the first character is proper to start
3057 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3058 ((c >= 0x41) && (c <= 0x5A)) ||
3059 (c == '_') || (c == ':'))) {
3061 int first = CUR_SCHAR(cur, l);
3063 if (!IS_LETTER(first) && (first != '_')) {
3064 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3065 "Name %s is not XML Namespace compliant\n",
3071 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3077 * Okay someone managed to make a huge name, so he's ready to pay
3078 * for the processing speed.
3082 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3083 if (buffer == NULL) {
3084 xmlErrMemory(ctxt, NULL);
3087 memcpy(buffer, buf, len);
3088 while (c != 0) { /* tested bigname2.xml */
3089 if (len + 10 > max) {
3093 tmp = (xmlChar *) xmlRealloc(buffer,
3094 max * sizeof(xmlChar));
3096 xmlErrMemory(ctxt, NULL);
3109 ret = xmlStrndup(buf, len);
3118 /************************************************************************
3120 * The parser itself *
3121 * Relates to http://www.w3.org/TR/REC-xml *
3123 ************************************************************************/
3125 /************************************************************************
3127 * Routines to parse Name, NCName and NmToken *
3129 ************************************************************************/
3131 static unsigned long nbParseName = 0;
3132 static unsigned long nbParseNmToken = 0;
3133 static unsigned long nbParseNCName = 0;
3134 static unsigned long nbParseNCNameComplex = 0;
3135 static unsigned long nbParseNameComplex = 0;
3136 static unsigned long nbParseStringName = 0;
3140 * The two following functions are related to the change of accepted
3141 * characters for Name and NmToken in the Revision 5 of XML-1.0
3142 * They correspond to the modified production [4] and the new production [4a]
3143 * changes in that revision. Also note that the macros used for the
3144 * productions Letter, Digit, CombiningChar and Extender are not needed
3146 * We still keep compatibility to pre-revision5 parsing semantic if the
3147 * new XML_PARSE_OLD10 option is given to the parser.
3150 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3151 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3153 * Use the new checks of production [4] [4a] amd [5] of the
3154 * Update 5 of XML-1.0
3156 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3157 (((c >= 'a') && (c <= 'z')) ||
3158 ((c >= 'A') && (c <= 'Z')) ||
3159 (c == '_') || (c == ':') ||
3160 ((c >= 0xC0) && (c <= 0xD6)) ||
3161 ((c >= 0xD8) && (c <= 0xF6)) ||
3162 ((c >= 0xF8) && (c <= 0x2FF)) ||
3163 ((c >= 0x370) && (c <= 0x37D)) ||
3164 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3165 ((c >= 0x200C) && (c <= 0x200D)) ||
3166 ((c >= 0x2070) && (c <= 0x218F)) ||
3167 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3168 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3169 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3170 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3171 ((c >= 0x10000) && (c <= 0xEFFFF))))
3174 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3181 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3182 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3184 * Use the new checks of production [4] [4a] amd [5] of the
3185 * Update 5 of XML-1.0
3187 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3188 (((c >= 'a') && (c <= 'z')) ||
3189 ((c >= 'A') && (c <= 'Z')) ||
3190 ((c >= '0') && (c <= '9')) || /* !start */
3191 (c == '_') || (c == ':') ||
3192 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3193 ((c >= 0xC0) && (c <= 0xD6)) ||
3194 ((c >= 0xD8) && (c <= 0xF6)) ||
3195 ((c >= 0xF8) && (c <= 0x2FF)) ||
3196 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3197 ((c >= 0x370) && (c <= 0x37D)) ||
3198 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3199 ((c >= 0x200C) && (c <= 0x200D)) ||
3200 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3201 ((c >= 0x2070) && (c <= 0x218F)) ||
3202 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3203 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3204 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3205 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3206 ((c >= 0x10000) && (c <= 0xEFFFF))))
3209 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3210 (c == '.') || (c == '-') ||
3211 (c == '_') || (c == ':') ||
3212 (IS_COMBINING(c)) ||
3219 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3220 int *len, int *alloc, int normalize);
3222 static const xmlChar *
3223 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3229 nbParseNameComplex++;
3233 * Handler for more complex cases
3236 if (ctxt->instate == XML_PARSER_EOF)
3239 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3241 * Use the new checks of production [4] [4a] amd [5] of the
3242 * Update 5 of XML-1.0
3244 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3245 (!(((c >= 'a') && (c <= 'z')) ||
3246 ((c >= 'A') && (c <= 'Z')) ||
3247 (c == '_') || (c == ':') ||
3248 ((c >= 0xC0) && (c <= 0xD6)) ||
3249 ((c >= 0xD8) && (c <= 0xF6)) ||
3250 ((c >= 0xF8) && (c <= 0x2FF)) ||
3251 ((c >= 0x370) && (c <= 0x37D)) ||
3252 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3253 ((c >= 0x200C) && (c <= 0x200D)) ||
3254 ((c >= 0x2070) && (c <= 0x218F)) ||
3255 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3256 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3257 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3258 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3259 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3266 (((c >= 'a') && (c <= 'z')) ||
3267 ((c >= 'A') && (c <= 'Z')) ||
3268 ((c >= '0') && (c <= '9')) || /* !start */
3269 (c == '_') || (c == ':') ||
3270 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3271 ((c >= 0xC0) && (c <= 0xD6)) ||
3272 ((c >= 0xD8) && (c <= 0xF6)) ||
3273 ((c >= 0xF8) && (c <= 0x2FF)) ||
3274 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3275 ((c >= 0x370) && (c <= 0x37D)) ||
3276 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3277 ((c >= 0x200C) && (c <= 0x200D)) ||
3278 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3279 ((c >= 0x2070) && (c <= 0x218F)) ||
3280 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3281 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3282 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3283 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3284 ((c >= 0x10000) && (c <= 0xEFFFF))
3286 if (count++ > XML_PARSER_CHUNK_SIZE) {
3289 if (ctxt->instate == XML_PARSER_EOF)
3297 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3298 (!IS_LETTER(c) && (c != '_') &&
3306 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3307 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3308 (c == '.') || (c == '-') ||
3309 (c == '_') || (c == ':') ||
3310 (IS_COMBINING(c)) ||
3311 (IS_EXTENDER(c)))) {
3312 if (count++ > XML_PARSER_CHUNK_SIZE) {
3315 if (ctxt->instate == XML_PARSER_EOF)
3324 if (ctxt->instate == XML_PARSER_EOF)
3330 if ((len > XML_MAX_NAME_LENGTH) &&
3331 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3332 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3335 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3336 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3337 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3342 * @ctxt: an XML parser context
3344 * parse an XML name.
3346 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3347 * CombiningChar | Extender
3349 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3351 * [6] Names ::= Name (#x20 Name)*
3353 * Returns the Name parsed or NULL
3357 xmlParseName(xmlParserCtxtPtr ctxt) {
3369 * Accelerator for simple ASCII names
3371 in = ctxt->input->cur;
3372 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3373 ((*in >= 0x41) && (*in <= 0x5A)) ||
3374 (*in == '_') || (*in == ':')) {
3376 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3377 ((*in >= 0x41) && (*in <= 0x5A)) ||
3378 ((*in >= 0x30) && (*in <= 0x39)) ||
3379 (*in == '_') || (*in == '-') ||
3380 (*in == ':') || (*in == '.'))
3382 if ((*in > 0) && (*in < 0x80)) {
3383 count = in - ctxt->input->cur;
3384 if ((count > XML_MAX_NAME_LENGTH) &&
3385 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3386 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3389 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3390 ctxt->input->cur = in;
3391 ctxt->nbChars += count;
3392 ctxt->input->col += count;
3394 xmlErrMemory(ctxt, NULL);
3398 /* accelerator for special cases */
3399 return(xmlParseNameComplex(ctxt));
3402 static const xmlChar *
3403 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3409 nbParseNCNameComplex++;
3413 * Handler for more complex cases
3417 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3418 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3422 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3423 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3424 if (count++ > XML_PARSER_CHUNK_SIZE) {
3425 if ((len > XML_MAX_NAME_LENGTH) &&
3426 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3427 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3432 if (ctxt->instate == XML_PARSER_EOF)
3441 if (ctxt->instate == XML_PARSER_EOF)
3446 if ((len > XML_MAX_NAME_LENGTH) &&
3447 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3448 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3451 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3456 * @ctxt: an XML parser context
3457 * @len: length of the string parsed
3459 * parse an XML name.
3461 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3462 * CombiningChar | Extender
3464 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3466 * Returns the Name parsed or NULL
3469 static const xmlChar *
3470 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3480 * Accelerator for simple ASCII names
3482 in = ctxt->input->cur;
3483 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3484 ((*in >= 0x41) && (*in <= 0x5A)) ||
3487 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3488 ((*in >= 0x41) && (*in <= 0x5A)) ||
3489 ((*in >= 0x30) && (*in <= 0x39)) ||
3490 (*in == '_') || (*in == '-') ||
3493 if ((*in > 0) && (*in < 0x80)) {
3494 count = in - ctxt->input->cur;
3495 if ((count > XML_MAX_NAME_LENGTH) &&
3496 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3497 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3500 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3501 ctxt->input->cur = in;
3502 ctxt->nbChars += count;
3503 ctxt->input->col += count;
3505 xmlErrMemory(ctxt, NULL);
3510 return(xmlParseNCNameComplex(ctxt));
3514 * xmlParseNameAndCompare:
3515 * @ctxt: an XML parser context
3517 * parse an XML name and compares for match
3518 * (specialized for endtag parsing)
3520 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3521 * and the name for mismatch
3524 static const xmlChar *
3525 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3526 register const xmlChar *cmp = other;
3527 register const xmlChar *in;
3531 if (ctxt->instate == XML_PARSER_EOF)
3534 in = ctxt->input->cur;
3535 while (*in != 0 && *in == *cmp) {
3540 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3542 ctxt->input->cur = in;
3543 return (const xmlChar*) 1;
3545 /* failure (or end of input buffer), check with full function */
3546 ret = xmlParseName (ctxt);
3547 /* strings coming from the dictionnary direct compare possible */
3549 return (const xmlChar*) 1;
3555 * xmlParseStringName:
3556 * @ctxt: an XML parser context
3557 * @str: a pointer to the string pointer (IN/OUT)
3559 * parse an XML name.
3561 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3562 * CombiningChar | Extender
3564 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3566 * [6] Names ::= Name (#x20 Name)*
3568 * Returns the Name parsed or NULL. The @str pointer
3569 * is updated to the current location in the string.
3573 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3574 xmlChar buf[XML_MAX_NAMELEN + 5];
3575 const xmlChar *cur = *str;
3580 nbParseStringName++;
3583 c = CUR_SCHAR(cur, l);
3584 if (!xmlIsNameStartChar(ctxt, c)) {
3588 COPY_BUF(l,buf,len,c);
3590 c = CUR_SCHAR(cur, l);
3591 while (xmlIsNameChar(ctxt, c)) {
3592 COPY_BUF(l,buf,len,c);
3594 c = CUR_SCHAR(cur, l);
3595 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3597 * Okay someone managed to make a huge name, so he's ready to pay
3598 * for the processing speed.
3603 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3604 if (buffer == NULL) {
3605 xmlErrMemory(ctxt, NULL);
3608 memcpy(buffer, buf, len);
3609 while (xmlIsNameChar(ctxt, c)) {
3610 if (len + 10 > max) {
3613 if ((len > XML_MAX_NAME_LENGTH) &&
3614 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3615 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3620 tmp = (xmlChar *) xmlRealloc(buffer,
3621 max * sizeof(xmlChar));
3623 xmlErrMemory(ctxt, NULL);
3629 COPY_BUF(l,buffer,len,c);
3631 c = CUR_SCHAR(cur, l);
3638 if ((len > XML_MAX_NAME_LENGTH) &&
3639 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3640 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3644 return(xmlStrndup(buf, len));
3649 * @ctxt: an XML parser context
3651 * parse an XML Nmtoken.
3653 * [7] Nmtoken ::= (NameChar)+
3655 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3657 * Returns the Nmtoken parsed or NULL
3661 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3662 xmlChar buf[XML_MAX_NAMELEN + 5];
3672 if (ctxt->instate == XML_PARSER_EOF)
3676 while (xmlIsNameChar(ctxt, c)) {
3677 if (count++ > XML_PARSER_CHUNK_SIZE) {
3681 COPY_BUF(l,buf,len,c);
3687 if (ctxt->instate == XML_PARSER_EOF)
3691 if (len >= XML_MAX_NAMELEN) {
3693 * Okay someone managed to make a huge token, so he's ready to pay
3694 * for the processing speed.
3699 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3700 if (buffer == NULL) {
3701 xmlErrMemory(ctxt, NULL);
3704 memcpy(buffer, buf, len);
3705 while (xmlIsNameChar(ctxt, c)) {
3706 if (count++ > XML_PARSER_CHUNK_SIZE) {
3709 if (ctxt->instate == XML_PARSER_EOF) {
3714 if (len + 10 > max) {
3717 if ((max > XML_MAX_NAME_LENGTH) &&
3718 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3719 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3724 tmp = (xmlChar *) xmlRealloc(buffer,
3725 max * sizeof(xmlChar));
3727 xmlErrMemory(ctxt, NULL);
3733 COPY_BUF(l,buffer,len,c);
3743 if ((len > XML_MAX_NAME_LENGTH) &&
3744 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3745 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3748 return(xmlStrndup(buf, len));
3752 * xmlParseEntityValue:
3753 * @ctxt: an XML parser context
3754 * @orig: if non-NULL store a copy of the original entity value
3756 * parse a value for ENTITY declarations
3758 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3759 * "'" ([^%&'] | PEReference | Reference)* "'"
3761 * Returns the EntityValue parsed with reference substituted or NULL
3765 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3766 xmlChar *buf = NULL;
3768 int size = XML_PARSER_BUFFER_SIZE;
3771 xmlChar *ret = NULL;
3772 const xmlChar *cur = NULL;
3773 xmlParserInputPtr input;
3775 if (RAW == '"') stop = '"';
3776 else if (RAW == '\'') stop = '\'';
3778 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3781 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3783 xmlErrMemory(ctxt, NULL);
3788 * The content of the entity definition is copied in a buffer.
3791 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3792 input = ctxt->input;
3794 if (ctxt->instate == XML_PARSER_EOF) {
3801 * NOTE: 4.4.5 Included in Literal
3802 * When a parameter entity reference appears in a literal entity
3803 * value, ... a single or double quote character in the replacement
3804 * text is always treated as a normal data character and will not
3805 * terminate the literal.
3806 * In practice it means we stop the loop only when back at parsing
3807 * the initial entity and the quote is found
3809 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3810 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3811 if (len + 5 >= size) {
3815 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3817 xmlErrMemory(ctxt, NULL);
3823 COPY_BUF(l,buf,len,c);
3826 * Pop-up of finished entities.
3828 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3839 if (ctxt->instate == XML_PARSER_EOF) {
3845 * Raise problem w.r.t. '&' and '%' being used in non-entities
3846 * reference constructs. Note Charref will be handled in
3847 * xmlStringDecodeEntities()
3850 while (*cur != 0) { /* non input consuming */
3851 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3856 name = xmlParseStringName(ctxt, &cur);
3857 if ((name == NULL) || (*cur != ';')) {
3858 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3859 "EntityValue: '%c' forbidden except for entities references\n",
3862 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3863 (ctxt->inputNr == 1)) {
3864 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3875 * Then PEReference entities are substituted.
3878 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3883 * NOTE: 4.4.7 Bypassed
3884 * When a general entity reference appears in the EntityValue in
3885 * an entity declaration, it is bypassed and left as is.
3886 * so XML_SUBSTITUTE_REF is not set here.
3888 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3900 * xmlParseAttValueComplex:
3901 * @ctxt: an XML parser context
3902 * @len: the resulting attribute len
3903 * @normalize: wether to apply the inner normalization
3905 * parse a value for an attribute, this is the fallback function
3906 * of xmlParseAttValue() when the attribute parsing requires handling
3907 * of non-ASCII characters, or normalization compaction.
3909 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3912 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3914 xmlChar *buf = NULL;
3915 xmlChar *rep = NULL;
3917 size_t buf_size = 0;
3918 int c, l, in_space = 0;
3919 xmlChar *current = NULL;
3922 if (NXT(0) == '"') {
3923 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3926 } else if (NXT(0) == '\'') {
3928 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3931 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3936 * allocate a translation buffer.
3938 buf_size = XML_PARSER_BUFFER_SIZE;
3939 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3940 if (buf == NULL) goto mem_error;
3943 * OK loop until we reach one of the ending char or a size limit.
3946 while (((NXT(0) != limit) && /* checked */
3947 (IS_CHAR(c)) && (c != '<')) &&
3948 (ctxt->instate != XML_PARSER_EOF)) {
3950 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3951 * special option is given
3953 if ((len > XML_MAX_TEXT_LENGTH) &&
3954 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3955 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3956 "AttValue length too long\n");
3962 if (NXT(1) == '#') {
3963 int val = xmlParseCharRef(ctxt);
3966 if (ctxt->replaceEntities) {
3967 if (len + 10 > buf_size) {
3968 growBuffer(buf, 10);
3973 * The reparsing will be done in xmlStringGetNodeList()
3974 * called by the attribute() function in SAX.c
3976 if (len + 10 > buf_size) {
3977 growBuffer(buf, 10);
3985 } else if (val != 0) {
3986 if (len + 10 > buf_size) {
3987 growBuffer(buf, 10);
3989 len += xmlCopyChar(0, &buf[len], val);
3992 ent = xmlParseEntityRef(ctxt);
3995 ctxt->nbentities += ent->owner;
3996 if ((ent != NULL) &&
3997 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3998 if (len + 10 > buf_size) {
3999 growBuffer(buf, 10);
4001 if ((ctxt->replaceEntities == 0) &&
4002 (ent->content[0] == '&')) {
4009 buf[len++] = ent->content[0];
4011 } else if ((ent != NULL) &&
4012 (ctxt->replaceEntities != 0)) {
4013 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4014 rep = xmlStringDecodeEntities(ctxt, ent->content,
4019 while (*current != 0) { /* non input consuming */
4020 if ((*current == 0xD) || (*current == 0xA) ||
4021 (*current == 0x9)) {
4025 buf[len++] = *current++;
4026 if (len + 10 > buf_size) {
4027 growBuffer(buf, 10);
4034 if (len + 10 > buf_size) {
4035 growBuffer(buf, 10);
4037 if (ent->content != NULL)
4038 buf[len++] = ent->content[0];
4040 } else if (ent != NULL) {
4041 int i = xmlStrlen(ent->name);
4042 const xmlChar *cur = ent->name;
4045 * This may look absurd but is needed to detect
4048 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4049 (ent->content != NULL) && (ent->checked == 0)) {
4050 unsigned long oldnbent = ctxt->nbentities;
4052 rep = xmlStringDecodeEntities(ctxt, ent->content,
4053 XML_SUBSTITUTE_REF, 0, 0, 0);
4055 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4057 if (xmlStrchr(rep, '<'))
4065 * Just output the reference
4068 while (len + i + 10 > buf_size) {
4069 growBuffer(buf, i + 10);
4072 buf[len++] = *cur++;
4077 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4078 if ((len != 0) || (!normalize)) {
4079 if ((!normalize) || (!in_space)) {
4080 COPY_BUF(l,buf,len,0x20);
4081 while (len + 10 > buf_size) {
4082 growBuffer(buf, 10);
4089 COPY_BUF(l,buf,len,c);
4090 if (len + 10 > buf_size) {
4091 growBuffer(buf, 10);
4099 if (ctxt->instate == XML_PARSER_EOF)
4102 if ((in_space) && (normalize)) {
4103 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4107 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4108 } else if (RAW != limit) {
4109 if ((c != 0) && (!IS_CHAR(c))) {
4110 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4111 "invalid character in attribute value\n");
4113 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4114 "AttValue: ' expected\n");
4120 * There we potentially risk an overflow, don't allow attribute value of
4121 * length more than INT_MAX it is a very reasonnable assumption !
4123 if (len >= INT_MAX) {
4124 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4125 "AttValue length too long\n");
4129 if (attlen != NULL) *attlen = (int) len;
4133 xmlErrMemory(ctxt, NULL);
4144 * @ctxt: an XML parser context
4146 * parse a value for an attribute
4147 * Note: the parser won't do substitution of entities here, this
4148 * will be handled later in xmlStringGetNodeList
4150 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4151 * "'" ([^<&'] | Reference)* "'"
4153 * 3.3.3 Attribute-Value Normalization:
4154 * Before the value of an attribute is passed to the application or
4155 * checked for validity, the XML processor must normalize it as follows:
4156 * - a character reference is processed by appending the referenced
4157 * character to the attribute value
4158 * - an entity reference is processed by recursively processing the
4159 * replacement text of the entity
4160 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4161 * appending #x20 to the normalized value, except that only a single
4162 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4163 * parsed entity or the literal entity value of an internal parsed entity
4164 * - other characters are processed by appending them to the normalized value
4165 * If the declared value is not CDATA, then the XML processor must further
4166 * process the normalized attribute value by discarding any leading and
4167 * trailing space (#x20) characters, and by replacing sequences of space
4168 * (#x20) characters by a single space (#x20) character.
4169 * All attributes for which no declaration has been read should be treated
4170 * by a non-validating parser as if declared CDATA.
4172 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4177 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4178 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4179 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4183 * xmlParseSystemLiteral:
4184 * @ctxt: an XML parser context
4186 * parse an XML Literal
4188 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4190 * Returns the SystemLiteral parsed or NULL
4194 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4195 xmlChar *buf = NULL;
4197 int size = XML_PARSER_BUFFER_SIZE;
4200 int state = ctxt->instate;
4207 } else if (RAW == '\'') {
4211 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4215 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4217 xmlErrMemory(ctxt, NULL);
4220 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4222 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4223 if (len + 5 >= size) {
4226 if ((size > XML_MAX_NAME_LENGTH) &&
4227 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4228 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4230 ctxt->instate = (xmlParserInputState) state;
4234 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4237 xmlErrMemory(ctxt, NULL);
4238 ctxt->instate = (xmlParserInputState) state;
4247 if (ctxt->instate == XML_PARSER_EOF) {
4252 COPY_BUF(l,buf,len,cur);
4262 ctxt->instate = (xmlParserInputState) state;
4263 if (!IS_CHAR(cur)) {
4264 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4272 * xmlParsePubidLiteral:
4273 * @ctxt: an XML parser context
4275 * parse an XML public literal
4277 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4279 * Returns the PubidLiteral parsed or NULL.
4283 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4284 xmlChar *buf = NULL;
4286 int size = XML_PARSER_BUFFER_SIZE;
4290 xmlParserInputState oldstate = ctxt->instate;
4296 } else if (RAW == '\'') {
4300 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4303 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4305 xmlErrMemory(ctxt, NULL);
4308 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4310 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4311 if (len + 1 >= size) {
4314 if ((size > XML_MAX_NAME_LENGTH) &&
4315 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4316 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4321 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4323 xmlErrMemory(ctxt, NULL);
4334 if (ctxt->instate == XML_PARSER_EOF) {
4349 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4353 ctxt->instate = oldstate;
4357 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4360 * used for the test in the inner loop of the char data testing
4362 static const unsigned char test_char_data[256] = {
4363 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4364 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4365 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4366 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4367 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4368 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4369 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4370 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4371 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4372 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4373 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4374 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4375 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4376 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4377 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4378 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4379 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4380 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4381 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4382 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4383 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4384 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4385 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4386 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4387 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4388 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4389 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4399 * @ctxt: an XML parser context
4400 * @cdata: int indicating whether we are within a CDATA section
4402 * parse a CharData section.
4403 * if we are within a CDATA section ']]>' marks an end of section.
4405 * The right angle bracket (>) may be represented using the string ">",
4406 * and must, for compatibility, be escaped using ">" or a character
4407 * reference when it appears in the string "]]>" in content, when that
4408 * string is not marking the end of a CDATA section.
4410 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4414 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4417 int line = ctxt->input->line;
4418 int col = ctxt->input->col;
4424 * Accelerated common case where input don't need to be
4425 * modified before passing it to the handler.
4428 in = ctxt->input->cur;
4431 while (*in == 0x20) { in++; ctxt->input->col++; }
4434 ctxt->input->line++; ctxt->input->col = 1;
4436 } while (*in == 0xA);
4437 goto get_more_space;
4440 nbchar = in - ctxt->input->cur;
4442 const xmlChar *tmp = ctxt->input->cur;
4443 ctxt->input->cur = in;
4445 if ((ctxt->sax != NULL) &&
4446 (ctxt->sax->ignorableWhitespace !=
4447 ctxt->sax->characters)) {
4448 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4449 if (ctxt->sax->ignorableWhitespace != NULL)
4450 ctxt->sax->ignorableWhitespace(ctxt->userData,
4453 if (ctxt->sax->characters != NULL)
4454 ctxt->sax->characters(ctxt->userData,
4456 if (*ctxt->space == -1)
4459 } else if ((ctxt->sax != NULL) &&
4460 (ctxt->sax->characters != NULL)) {
4461 ctxt->sax->characters(ctxt->userData,
4469 ccol = ctxt->input->col;
4470 while (test_char_data[*in]) {
4474 ctxt->input->col = ccol;
4477 ctxt->input->line++; ctxt->input->col = 1;
4479 } while (*in == 0xA);
4483 if ((in[1] == ']') && (in[2] == '>')) {
4484 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4485 ctxt->input->cur = in;
4492 nbchar = in - ctxt->input->cur;
4494 if ((ctxt->sax != NULL) &&
4495 (ctxt->sax->ignorableWhitespace !=
4496 ctxt->sax->characters) &&
4497 (IS_BLANK_CH(*ctxt->input->cur))) {
4498 const xmlChar *tmp = ctxt->input->cur;
4499 ctxt->input->cur = in;
4501 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4502 if (ctxt->sax->ignorableWhitespace != NULL)
4503 ctxt->sax->ignorableWhitespace(ctxt->userData,
4506 if (ctxt->sax->characters != NULL)
4507 ctxt->sax->characters(ctxt->userData,
4509 if (*ctxt->space == -1)
4512 line = ctxt->input->line;
4513 col = ctxt->input->col;
4514 } else if (ctxt->sax != NULL) {
4515 if (ctxt->sax->characters != NULL)
4516 ctxt->sax->characters(ctxt->userData,
4517 ctxt->input->cur, nbchar);
4518 line = ctxt->input->line;
4519 col = ctxt->input->col;
4521 /* something really bad happened in the SAX callback */
4522 if (ctxt->instate != XML_PARSER_CONTENT)
4525 ctxt->input->cur = in;
4529 ctxt->input->cur = in;
4531 ctxt->input->line++; ctxt->input->col = 1;
4532 continue; /* while */
4544 if (ctxt->instate == XML_PARSER_EOF)
4546 in = ctxt->input->cur;
4547 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4550 ctxt->input->line = line;
4551 ctxt->input->col = col;
4552 xmlParseCharDataComplex(ctxt, cdata);
4556 * xmlParseCharDataComplex:
4557 * @ctxt: an XML parser context
4558 * @cdata: int indicating whether we are within a CDATA section
4560 * parse a CharData section.this is the fallback function
4561 * of xmlParseCharData() when the parsing requires handling
4562 * of non-ASCII characters.
4565 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4566 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4574 while ((cur != '<') && /* checked */
4576 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4577 if ((cur == ']') && (NXT(1) == ']') &&
4581 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4584 COPY_BUF(l,buf,nbchar,cur);
4585 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4589 * OK the segment is to be consumed as chars.
4591 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4592 if (areBlanks(ctxt, buf, nbchar, 0)) {
4593 if (ctxt->sax->ignorableWhitespace != NULL)
4594 ctxt->sax->ignorableWhitespace(ctxt->userData,
4597 if (ctxt->sax->characters != NULL)
4598 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4599 if ((ctxt->sax->characters !=
4600 ctxt->sax->ignorableWhitespace) &&
4601 (*ctxt->space == -1))
4606 /* something really bad happened in the SAX callback */
4607 if (ctxt->instate != XML_PARSER_CONTENT)
4614 if (ctxt->instate == XML_PARSER_EOF)
4623 * OK the segment is to be consumed as chars.
4625 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4626 if (areBlanks(ctxt, buf, nbchar, 0)) {
4627 if (ctxt->sax->ignorableWhitespace != NULL)
4628 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4630 if (ctxt->sax->characters != NULL)
4631 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4632 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4633 (*ctxt->space == -1))
4638 if ((cur != 0) && (!IS_CHAR(cur))) {
4639 /* Generate the error and skip the offending character */
4640 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4641 "PCDATA invalid Char value %d\n",
4648 * xmlParseExternalID:
4649 * @ctxt: an XML parser context
4650 * @publicID: a xmlChar** receiving PubidLiteral
4651 * @strict: indicate whether we should restrict parsing to only
4652 * production [75], see NOTE below
4654 * Parse an External ID or a Public ID
4656 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4657 * 'PUBLIC' S PubidLiteral S SystemLiteral
4659 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4660 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4662 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4664 * Returns the function returns SystemLiteral and in the second
4665 * case publicID receives PubidLiteral, is strict is off
4666 * it is possible to return NULL and have publicID set.
4670 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4671 xmlChar *URI = NULL;
4676 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4678 if (!IS_BLANK_CH(CUR)) {
4679 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4680 "Space required after 'SYSTEM'\n");
4683 URI = xmlParseSystemLiteral(ctxt);
4685 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4687 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4689 if (!IS_BLANK_CH(CUR)) {
4690 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4691 "Space required after 'PUBLIC'\n");
4694 *publicID = xmlParsePubidLiteral(ctxt);
4695 if (*publicID == NULL) {
4696 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4700 * We don't handle [83] so "S SystemLiteral" is required.
4702 if (!IS_BLANK_CH(CUR)) {
4703 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4704 "Space required after the Public Identifier\n");
4708 * We handle [83] so we return immediately, if
4709 * "S SystemLiteral" is not detected. From a purely parsing
4710 * point of view that's a nice mess.
4716 if (!IS_BLANK_CH(*ptr)) return(NULL);
4718 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4719 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4722 URI = xmlParseSystemLiteral(ctxt);
4724 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4731 * xmlParseCommentComplex:
4732 * @ctxt: an XML parser context
4733 * @buf: the already parsed part of the buffer
4734 * @len: number of bytes filles in the buffer
4735 * @size: allocated size of the buffer
4737 * Skip an XML (SGML) comment <!-- .... -->
4738 * The spec says that "For compatibility, the string "--" (double-hyphen)
4739 * must not occur within comments. "
4740 * This is the slow routine in case the accelerator for ascii didn't work
4742 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4745 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4746 size_t len, size_t size) {
4753 inputid = ctxt->input->id;
4757 size = XML_PARSER_BUFFER_SIZE;
4758 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4760 xmlErrMemory(ctxt, NULL);
4764 GROW; /* Assure there's enough input data */
4767 goto not_terminated;
4769 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4770 "xmlParseComment: invalid xmlChar value %d\n",
4778 goto not_terminated;
4780 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4781 "xmlParseComment: invalid xmlChar value %d\n",
4789 goto not_terminated;
4790 while (IS_CHAR(cur) && /* checked */
4792 (r != '-') || (q != '-'))) {
4793 if ((r == '-') && (q == '-')) {
4794 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4796 if ((len > XML_MAX_TEXT_LENGTH) &&
4797 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4798 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4799 "Comment too big found", NULL);
4803 if (len + 5 >= size) {
4807 new_size = size * 2;
4808 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4809 if (new_buf == NULL) {
4811 xmlErrMemory(ctxt, NULL);
4817 COPY_BUF(ql,buf,len,q);
4827 if (ctxt->instate == XML_PARSER_EOF) {
4842 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4843 "Comment not terminated \n<!--%.50s\n", buf);
4844 } else if (!IS_CHAR(cur)) {
4845 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4846 "xmlParseComment: invalid xmlChar value %d\n",
4849 if (inputid != ctxt->input->id) {
4850 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4851 "Comment doesn't start and stop in the same entity\n");
4854 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4855 (!ctxt->disableSAX))
4856 ctxt->sax->comment(ctxt->userData, buf);
4861 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4862 "Comment not terminated\n", NULL);
4869 * @ctxt: an XML parser context
4871 * Skip an XML (SGML) comment <!-- .... -->
4872 * The spec says that "For compatibility, the string "--" (double-hyphen)
4873 * must not occur within comments. "
4875 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4878 xmlParseComment(xmlParserCtxtPtr ctxt) {
4879 xmlChar *buf = NULL;
4880 size_t size = XML_PARSER_BUFFER_SIZE;
4882 xmlParserInputState state;
4889 * Check that there is a comment right here.
4891 if ((RAW != '<') || (NXT(1) != '!') ||
4892 (NXT(2) != '-') || (NXT(3) != '-')) return;
4893 state = ctxt->instate;
4894 ctxt->instate = XML_PARSER_COMMENT;
4895 inputid = ctxt->input->id;
4901 * Accelerated common case where input don't need to be
4902 * modified before passing it to the handler.
4904 in = ctxt->input->cur;
4908 ctxt->input->line++; ctxt->input->col = 1;
4910 } while (*in == 0xA);
4913 ccol = ctxt->input->col;
4914 while (((*in > '-') && (*in <= 0x7F)) ||
4915 ((*in >= 0x20) && (*in < '-')) ||
4920 ctxt->input->col = ccol;
4923 ctxt->input->line++; ctxt->input->col = 1;
4925 } while (*in == 0xA);
4928 nbchar = in - ctxt->input->cur;
4930 * save current set of data
4933 if ((ctxt->sax != NULL) &&
4934 (ctxt->sax->comment != NULL)) {
4936 if ((*in == '-') && (in[1] == '-'))
4939 size = XML_PARSER_BUFFER_SIZE + nbchar;
4940 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4942 xmlErrMemory(ctxt, NULL);
4943 ctxt->instate = state;
4947 } else if (len + nbchar + 1 >= size) {
4949 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4950 new_buf = (xmlChar *) xmlRealloc(buf,
4951 size * sizeof(xmlChar));
4952 if (new_buf == NULL) {
4954 xmlErrMemory(ctxt, NULL);
4955 ctxt->instate = state;
4960 memcpy(&buf[len], ctxt->input->cur, nbchar);
4965 if ((len > XML_MAX_TEXT_LENGTH) &&
4966 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4967 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4968 "Comment too big found", NULL);
4972 ctxt->input->cur = in;
4975 ctxt->input->line++; ctxt->input->col = 1;
4980 ctxt->input->cur = in;
4982 ctxt->input->line++; ctxt->input->col = 1;
4983 continue; /* while */
4989 if (ctxt->instate == XML_PARSER_EOF) {
4993 in = ctxt->input->cur;
4997 if (ctxt->input->id != inputid) {
4998 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4999 "comment doesn't start and stop in the same entity\n");
5002 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5003 (!ctxt->disableSAX)) {
5005 ctxt->sax->comment(ctxt->userData, buf);
5007 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5011 if (ctxt->instate != XML_PARSER_EOF)
5012 ctxt->instate = state;
5016 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5017 "Double hyphen within comment: "
5021 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5022 "Double hyphen within comment\n", NULL);
5030 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5031 xmlParseCommentComplex(ctxt, buf, len, size);
5032 ctxt->instate = state;
5039 * @ctxt: an XML parser context
5041 * parse the name of a PI
5043 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5045 * Returns the PITarget name or NULL
5049 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5050 const xmlChar *name;
5052 name = xmlParseName(ctxt);
5053 if ((name != NULL) &&
5054 ((name[0] == 'x') || (name[0] == 'X')) &&
5055 ((name[1] == 'm') || (name[1] == 'M')) &&
5056 ((name[2] == 'l') || (name[2] == 'L'))) {
5058 if ((name[0] == 'x') && (name[1] == 'm') &&
5059 (name[2] == 'l') && (name[3] == 0)) {
5060 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5061 "XML declaration allowed only at the start of the document\n");
5063 } else if (name[3] == 0) {
5064 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5068 if (xmlW3CPIs[i] == NULL) break;
5069 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5072 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5073 "xmlParsePITarget: invalid name prefix 'xml'\n",
5076 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5077 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5078 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
5083 #ifdef LIBXML_CATALOG_ENABLED
5085 * xmlParseCatalogPI:
5086 * @ctxt: an XML parser context
5087 * @catalog: the PI value string
5089 * parse an XML Catalog Processing Instruction.
5091 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5093 * Occurs only if allowed by the user and if happening in the Misc
5094 * part of the document before any doctype informations
5095 * This will add the given catalog to the parsing context in order
5096 * to be used if there is a resolution need further down in the document
5100 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5101 xmlChar *URL = NULL;
5102 const xmlChar *tmp, *base;
5106 while (IS_BLANK_CH(*tmp)) tmp++;
5107 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5110 while (IS_BLANK_CH(*tmp)) tmp++;
5115 while (IS_BLANK_CH(*tmp)) tmp++;
5117 if ((marker != '\'') && (marker != '"'))
5121 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5124 URL = xmlStrndup(base, tmp - base);
5126 while (IS_BLANK_CH(*tmp)) tmp++;
5131 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5137 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5138 "Catalog PI syntax error: %s\n",
5147 * @ctxt: an XML parser context
5149 * parse an XML Processing Instruction.
5151 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5153 * The processing is transfered to SAX once parsed.
5157 xmlParsePI(xmlParserCtxtPtr ctxt) {
5158 xmlChar *buf = NULL;
5160 size_t size = XML_PARSER_BUFFER_SIZE;
5162 const xmlChar *target;
5163 xmlParserInputState state;
5166 if ((RAW == '<') && (NXT(1) == '?')) {
5167 xmlParserInputPtr input = ctxt->input;
5168 state = ctxt->instate;
5169 ctxt->instate = XML_PARSER_PI;
5171 * this is a Processing Instruction.
5177 * Parse the target name and check for special support like
5180 target = xmlParsePITarget(ctxt);
5181 if (target != NULL) {
5182 if ((RAW == '?') && (NXT(1) == '>')) {
5183 if (input != ctxt->input) {
5184 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5185 "PI declaration doesn't start and stop in the same entity\n");
5192 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5193 (ctxt->sax->processingInstruction != NULL))
5194 ctxt->sax->processingInstruction(ctxt->userData,
5196 if (ctxt->instate != XML_PARSER_EOF)
5197 ctxt->instate = state;
5200 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5202 xmlErrMemory(ctxt, NULL);
5203 ctxt->instate = state;
5207 if (!IS_BLANK(cur)) {
5208 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5209 "ParsePI: PI %s space expected\n", target);
5213 while (IS_CHAR(cur) && /* checked */
5214 ((cur != '?') || (NXT(1) != '>'))) {
5215 if (len + 5 >= size) {
5217 size_t new_size = size * 2;
5218 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5220 xmlErrMemory(ctxt, NULL);
5222 ctxt->instate = state;
5231 if (ctxt->instate == XML_PARSER_EOF) {
5236 if ((len > XML_MAX_TEXT_LENGTH) &&
5237 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5238 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5239 "PI %s too big found", target);
5241 ctxt->instate = state;
5245 COPY_BUF(l,buf,len,cur);
5254 if ((len > XML_MAX_TEXT_LENGTH) &&
5255 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5256 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5257 "PI %s too big found", target);
5259 ctxt->instate = state;
5264 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5265 "ParsePI: PI %s never end ...\n", target);
5267 if (input != ctxt->input) {
5268 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5269 "PI declaration doesn't start and stop in the same entity\n");
5273 #ifdef LIBXML_CATALOG_ENABLED
5274 if (((state == XML_PARSER_MISC) ||
5275 (state == XML_PARSER_START)) &&
5276 (xmlStrEqual(target, XML_CATALOG_PI))) {
5277 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5278 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5279 (allow == XML_CATA_ALLOW_ALL))
5280 xmlParseCatalogPI(ctxt, buf);
5288 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5289 (ctxt->sax->processingInstruction != NULL))
5290 ctxt->sax->processingInstruction(ctxt->userData,
5295 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5297 if (ctxt->instate != XML_PARSER_EOF)
5298 ctxt->instate = state;
5303 * xmlParseNotationDecl:
5304 * @ctxt: an XML parser context
5306 * parse a notation declaration
5308 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5310 * Hence there is actually 3 choices:
5311 * 'PUBLIC' S PubidLiteral
5312 * 'PUBLIC' S PubidLiteral S SystemLiteral
5313 * and 'SYSTEM' S SystemLiteral
5315 * See the NOTE on xmlParseExternalID().
5319 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5320 const xmlChar *name;
5324 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5325 xmlParserInputPtr input = ctxt->input;
5328 if (!IS_BLANK_CH(CUR)) {
5329 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5330 "Space required after '<!NOTATION'\n");
5335 name = xmlParseName(ctxt);
5337 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5340 if (!IS_BLANK_CH(CUR)) {
5341 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5342 "Space required after the NOTATION name'\n");
5345 if (xmlStrchr(name, ':') != NULL) {
5346 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5347 "colon are forbidden from notation names '%s'\n",
5355 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5359 if (input != ctxt->input) {
5360 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5361 "Notation declaration doesn't start and stop in the same entity\n");
5364 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5365 (ctxt->sax->notationDecl != NULL))
5366 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5368 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5370 if (Systemid != NULL) xmlFree(Systemid);
5371 if (Pubid != NULL) xmlFree(Pubid);
5376 * xmlParseEntityDecl:
5377 * @ctxt: an XML parser context
5379 * parse <!ENTITY declarations
5381 * [70] EntityDecl ::= GEDecl | PEDecl
5383 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5385 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5387 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5389 * [74] PEDef ::= EntityValue | ExternalID
5391 * [76] NDataDecl ::= S 'NDATA' S Name
5393 * [ VC: Notation Declared ]
5394 * The Name must match the declared name of a notation.
5398 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5399 const xmlChar *name = NULL;
5400 xmlChar *value = NULL;
5401 xmlChar *URI = NULL, *literal = NULL;
5402 const xmlChar *ndata = NULL;
5403 int isParameter = 0;
5404 xmlChar *orig = NULL;
5407 /* GROW; done in the caller */
5408 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5409 xmlParserInputPtr input = ctxt->input;
5412 skipped = SKIP_BLANKS;
5414 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5415 "Space required after '<!ENTITY'\n");
5420 skipped = SKIP_BLANKS;
5422 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423 "Space required after '%'\n");
5428 name = xmlParseName(ctxt);
5430 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5431 "xmlParseEntityDecl: no name\n");
5434 if (xmlStrchr(name, ':') != NULL) {
5435 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5436 "colon are forbidden from entities names '%s'\n",
5439 skipped = SKIP_BLANKS;
5441 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5442 "Space required after the entity name\n");
5445 ctxt->instate = XML_PARSER_ENTITY_DECL;
5447 * handle the various case of definitions...
5450 if ((RAW == '"') || (RAW == '\'')) {
5451 value = xmlParseEntityValue(ctxt, &orig);
5453 if ((ctxt->sax != NULL) &&
5454 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5455 ctxt->sax->entityDecl(ctxt->userData, name,
5456 XML_INTERNAL_PARAMETER_ENTITY,
5460 URI = xmlParseExternalID(ctxt, &literal, 1);
5461 if ((URI == NULL) && (literal == NULL)) {
5462 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5467 uri = xmlParseURI((const char *) URI);
5469 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5470 "Invalid URI: %s\n", URI);
5472 * This really ought to be a well formedness error
5473 * but the XML Core WG decided otherwise c.f. issue
5474 * E26 of the XML erratas.
5477 if (uri->fragment != NULL) {
5479 * Okay this is foolish to block those but not
5482 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5484 if ((ctxt->sax != NULL) &&
5485 (!ctxt->disableSAX) &&
5486 (ctxt->sax->entityDecl != NULL))
5487 ctxt->sax->entityDecl(ctxt->userData, name,
5488 XML_EXTERNAL_PARAMETER_ENTITY,
5489 literal, URI, NULL);
5496 if ((RAW == '"') || (RAW == '\'')) {
5497 value = xmlParseEntityValue(ctxt, &orig);
5498 if ((ctxt->sax != NULL) &&
5499 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5500 ctxt->sax->entityDecl(ctxt->userData, name,
5501 XML_INTERNAL_GENERAL_ENTITY,
5504 * For expat compatibility in SAX mode.
5506 if ((ctxt->myDoc == NULL) ||
5507 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5508 if (ctxt->myDoc == NULL) {
5509 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5510 if (ctxt->myDoc == NULL) {
5511 xmlErrMemory(ctxt, "New Doc failed");
5514 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5516 if (ctxt->myDoc->intSubset == NULL)
5517 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5518 BAD_CAST "fake", NULL, NULL);
5520 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5524 URI = xmlParseExternalID(ctxt, &literal, 1);
5525 if ((URI == NULL) && (literal == NULL)) {
5526 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5531 uri = xmlParseURI((const char *)URI);
5533 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5534 "Invalid URI: %s\n", URI);
5536 * This really ought to be a well formedness error
5537 * but the XML Core WG decided otherwise c.f. issue
5538 * E26 of the XML erratas.
5541 if (uri->fragment != NULL) {
5543 * Okay this is foolish to block those but not
5546 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5551 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5552 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5553 "Space required before 'NDATA'\n");
5556 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5558 if (!IS_BLANK_CH(CUR)) {
5559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5560 "Space required after 'NDATA'\n");
5563 ndata = xmlParseName(ctxt);
5564 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5565 (ctxt->sax->unparsedEntityDecl != NULL))
5566 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5567 literal, URI, ndata);
5569 if ((ctxt->sax != NULL) &&
5570 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5571 ctxt->sax->entityDecl(ctxt->userData, name,
5572 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5573 literal, URI, NULL);
5575 * For expat compatibility in SAX mode.
5576 * assuming the entity repalcement was asked for
5578 if ((ctxt->replaceEntities != 0) &&
5579 ((ctxt->myDoc == NULL) ||
5580 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5581 if (ctxt->myDoc == NULL) {
5582 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5583 if (ctxt->myDoc == NULL) {
5584 xmlErrMemory(ctxt, "New Doc failed");
5587 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5590 if (ctxt->myDoc->intSubset == NULL)
5591 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5592 BAD_CAST "fake", NULL, NULL);
5593 xmlSAX2EntityDecl(ctxt, name,
5594 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5595 literal, URI, NULL);
5600 if (ctxt->instate == XML_PARSER_EOF)
5604 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5605 "xmlParseEntityDecl: entity %s not terminated\n", name);
5607 if (input != ctxt->input) {
5608 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5609 "Entity declaration doesn't start and stop in the same entity\n");
5615 * Ugly mechanism to save the raw entity value.
5617 xmlEntityPtr cur = NULL;
5620 if ((ctxt->sax != NULL) &&
5621 (ctxt->sax->getParameterEntity != NULL))
5622 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5624 if ((ctxt->sax != NULL) &&
5625 (ctxt->sax->getEntity != NULL))
5626 cur = ctxt->sax->getEntity(ctxt->userData, name);
5627 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5628 cur = xmlSAX2GetEntity(ctxt, name);
5632 if (cur->orig != NULL)
5639 if (value != NULL) xmlFree(value);
5640 if (URI != NULL) xmlFree(URI);
5641 if (literal != NULL) xmlFree(literal);
5646 * xmlParseDefaultDecl:
5647 * @ctxt: an XML parser context
5648 * @value: Receive a possible fixed default value for the attribute
5650 * Parse an attribute default declaration
5652 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5654 * [ VC: Required Attribute ]
5655 * if the default declaration is the keyword #REQUIRED, then the
5656 * attribute must be specified for all elements of the type in the
5657 * attribute-list declaration.
5659 * [ VC: Attribute Default Legal ]
5660 * The declared default value must meet the lexical constraints of
5661 * the declared attribute type c.f. xmlValidateAttributeDecl()
5663 * [ VC: Fixed Attribute Default ]
5664 * if an attribute has a default value declared with the #FIXED
5665 * keyword, instances of that attribute must match the default value.
5667 * [ WFC: No < in Attribute Values ]
5668 * handled in xmlParseAttValue()
5670 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5671 * or XML_ATTRIBUTE_FIXED.
5675 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5680 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5682 return(XML_ATTRIBUTE_REQUIRED);
5684 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5686 return(XML_ATTRIBUTE_IMPLIED);
5688 val = XML_ATTRIBUTE_NONE;
5689 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5691 val = XML_ATTRIBUTE_FIXED;
5692 if (!IS_BLANK_CH(CUR)) {
5693 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5694 "Space required after '#FIXED'\n");
5698 ret = xmlParseAttValue(ctxt);
5699 ctxt->instate = XML_PARSER_DTD;
5701 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5702 "Attribute default value declaration error\n");
5709 * xmlParseNotationType:
5710 * @ctxt: an XML parser context
5712 * parse an Notation attribute type.
5714 * Note: the leading 'NOTATION' S part has already being parsed...
5716 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5718 * [ VC: Notation Attributes ]
5719 * Values of this type must match one of the notation names included
5720 * in the declaration; all notation names in the declaration must be declared.
5722 * Returns: the notation attribute tree built while parsing
5726 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5727 const xmlChar *name;
5728 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5731 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5738 name = xmlParseName(ctxt);
5740 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5741 "Name expected in NOTATION declaration\n");
5742 xmlFreeEnumeration(ret);
5746 while (tmp != NULL) {
5747 if (xmlStrEqual(name, tmp->name)) {
5748 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5749 "standalone: attribute notation value token %s duplicated\n",
5751 if (!xmlDictOwns(ctxt->dict, name))
5752 xmlFree((xmlChar *) name);
5758 cur = xmlCreateEnumeration(name);
5760 xmlFreeEnumeration(ret);
5763 if (last == NULL) ret = last = cur;
5770 } while (RAW == '|');
5772 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5773 xmlFreeEnumeration(ret);
5781 * xmlParseEnumerationType:
5782 * @ctxt: an XML parser context
5784 * parse an Enumeration attribute type.
5786 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5788 * [ VC: Enumeration ]
5789 * Values of this type must match one of the Nmtoken tokens in
5792 * Returns: the enumeration attribute tree built while parsing
5796 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5798 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5801 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5808 name = xmlParseNmtoken(ctxt);
5810 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5814 while (tmp != NULL) {
5815 if (xmlStrEqual(name, tmp->name)) {
5816 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5817 "standalone: attribute enumeration value token %s duplicated\n",
5819 if (!xmlDictOwns(ctxt->dict, name))
5826 cur = xmlCreateEnumeration(name);
5827 if (!xmlDictOwns(ctxt->dict, name))
5830 xmlFreeEnumeration(ret);
5833 if (last == NULL) ret = last = cur;
5840 } while (RAW == '|');
5842 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5850 * xmlParseEnumeratedType:
5851 * @ctxt: an XML parser context
5852 * @tree: the enumeration tree built while parsing
5854 * parse an Enumerated attribute type.
5856 * [57] EnumeratedType ::= NotationType | Enumeration
5858 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5861 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5865 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5866 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5868 if (!IS_BLANK_CH(CUR)) {
5869 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5870 "Space required after 'NOTATION'\n");
5874 *tree = xmlParseNotationType(ctxt);
5875 if (*tree == NULL) return(0);
5876 return(XML_ATTRIBUTE_NOTATION);
5878 *tree = xmlParseEnumerationType(ctxt);
5879 if (*tree == NULL) return(0);
5880 return(XML_ATTRIBUTE_ENUMERATION);
5884 * xmlParseAttributeType:
5885 * @ctxt: an XML parser context
5886 * @tree: the enumeration tree built while parsing
5888 * parse the Attribute list def for an element
5890 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5892 * [55] StringType ::= 'CDATA'
5894 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5895 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5897 * Validity constraints for attribute values syntax are checked in
5898 * xmlValidateAttributeValue()
5901 * Values of type ID must match the Name production. A name must not
5902 * appear more than once in an XML document as a value of this type;
5903 * i.e., ID values must uniquely identify the elements which bear them.
5905 * [ VC: One ID per Element Type ]
5906 * No element type may have more than one ID attribute specified.
5908 * [ VC: ID Attribute Default ]
5909 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5912 * Values of type IDREF must match the Name production, and values
5913 * of type IDREFS must match Names; each IDREF Name must match the value
5914 * of an ID attribute on some element in the XML document; i.e. IDREF
5915 * values must match the value of some ID attribute.
5917 * [ VC: Entity Name ]
5918 * Values of type ENTITY must match the Name production, values
5919 * of type ENTITIES must match Names; each Entity Name must match the
5920 * name of an unparsed entity declared in the DTD.
5922 * [ VC: Name Token ]
5923 * Values of type NMTOKEN must match the Nmtoken production; values
5924 * of type NMTOKENS must match Nmtokens.
5926 * Returns the attribute type
5929 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5931 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5933 return(XML_ATTRIBUTE_CDATA);
5934 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5936 return(XML_ATTRIBUTE_IDREFS);
5937 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5939 return(XML_ATTRIBUTE_IDREF);
5940 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5942 return(XML_ATTRIBUTE_ID);
5943 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5945 return(XML_ATTRIBUTE_ENTITY);
5946 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5948 return(XML_ATTRIBUTE_ENTITIES);
5949 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5951 return(XML_ATTRIBUTE_NMTOKENS);
5952 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5954 return(XML_ATTRIBUTE_NMTOKEN);
5956 return(xmlParseEnumeratedType(ctxt, tree));
5960 * xmlParseAttributeListDecl:
5961 * @ctxt: an XML parser context
5963 * : parse the Attribute list def for an element
5965 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5967 * [53] AttDef ::= S Name S AttType S DefaultDecl
5971 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5972 const xmlChar *elemName;
5973 const xmlChar *attrName;
5974 xmlEnumerationPtr tree;
5976 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5977 xmlParserInputPtr input = ctxt->input;
5980 if (!IS_BLANK_CH(CUR)) {
5981 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5982 "Space required after '<!ATTLIST'\n");
5985 elemName = xmlParseName(ctxt);
5986 if (elemName == NULL) {
5987 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5988 "ATTLIST: no name for Element\n");
5993 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5994 const xmlChar *check = CUR_PTR;
5997 xmlChar *defaultValue = NULL;
6001 attrName = xmlParseName(ctxt);
6002 if (attrName == NULL) {
6003 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6004 "ATTLIST: no name for Attribute\n");
6008 if (!IS_BLANK_CH(CUR)) {
6009 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6010 "Space required after the attribute name\n");
6015 type = xmlParseAttributeType(ctxt, &tree);
6021 if (!IS_BLANK_CH(CUR)) {
6022 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6023 "Space required after the attribute type\n");
6025 xmlFreeEnumeration(tree);
6030 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6032 if (defaultValue != NULL)
6033 xmlFree(defaultValue);
6035 xmlFreeEnumeration(tree);
6038 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6039 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6043 if (!IS_BLANK_CH(CUR)) {
6044 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6045 "Space required after the attribute default value\n");
6046 if (defaultValue != NULL)
6047 xmlFree(defaultValue);
6049 xmlFreeEnumeration(tree);
6054 if (check == CUR_PTR) {
6055 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6056 "in xmlParseAttributeListDecl\n");
6057 if (defaultValue != NULL)
6058 xmlFree(defaultValue);
6060 xmlFreeEnumeration(tree);
6063 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6064 (ctxt->sax->attributeDecl != NULL))
6065 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6066 type, def, defaultValue, tree);
6067 else if (tree != NULL)
6068 xmlFreeEnumeration(tree);
6070 if ((ctxt->sax2) && (defaultValue != NULL) &&
6071 (def != XML_ATTRIBUTE_IMPLIED) &&
6072 (def != XML_ATTRIBUTE_REQUIRED)) {
6073 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6076 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6078 if (defaultValue != NULL)
6079 xmlFree(defaultValue);
6083 if (input != ctxt->input) {
6084 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6085 "Attribute list declaration doesn't start and stop in the same entity\n",
6094 * xmlParseElementMixedContentDecl:
6095 * @ctxt: an XML parser context
6096 * @inputchk: the input used for the current entity, needed for boundary checks
6098 * parse the declaration for a Mixed Element content
6099 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6101 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6102 * '(' S? '#PCDATA' S? ')'
6104 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6106 * [ VC: No Duplicate Types ]
6107 * The same name must not appear more than once in a single
6108 * mixed-content declaration.
6110 * returns: the list of the xmlElementContentPtr describing the element choices
6112 xmlElementContentPtr
6113 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6114 xmlElementContentPtr ret = NULL, cur = NULL, n;
6115 const xmlChar *elem = NULL;
6118 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6123 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6124 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6125 "Element content declaration doesn't start and stop in the same entity\n",
6129 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6133 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6138 if ((RAW == '(') || (RAW == '|')) {
6139 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6140 if (ret == NULL) return(NULL);
6142 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6145 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6146 if (ret == NULL) return(NULL);
6152 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6153 if (n == NULL) return(NULL);
6154 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6163 elem = xmlParseName(ctxt);
6165 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6166 "xmlParseElementMixedContentDecl : Name expected\n");
6167 xmlFreeDocElementContent(ctxt->myDoc, cur);
6173 if ((RAW == ')') && (NXT(1) == '*')) {
6175 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6176 XML_ELEMENT_CONTENT_ELEMENT);
6177 if (cur->c2 != NULL)
6178 cur->c2->parent = cur;
6181 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6182 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6183 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6184 "Element content declaration doesn't start and stop in the same entity\n",
6189 xmlFreeDocElementContent(ctxt->myDoc, ret);
6190 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6195 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6201 * xmlParseElementChildrenContentDeclPriv:
6202 * @ctxt: an XML parser context
6203 * @inputchk: the input used for the current entity, needed for boundary checks
6204 * @depth: the level of recursion
6206 * parse the declaration for a Mixed Element content
6207 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6210 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6212 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6214 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6216 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6218 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6219 * TODO Parameter-entity replacement text must be properly nested
6220 * with parenthesized groups. That is to say, if either of the
6221 * opening or closing parentheses in a choice, seq, or Mixed
6222 * construct is contained in the replacement text for a parameter
6223 * entity, both must be contained in the same replacement text. For
6224 * interoperability, if a parameter-entity reference appears in a
6225 * choice, seq, or Mixed construct, its replacement text should not
6226 * be empty, and neither the first nor last non-blank character of
6227 * the replacement text should be a connector (| or ,).
6229 * Returns the tree of xmlElementContentPtr describing the element
6232 static xmlElementContentPtr
6233 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6235 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6236 const xmlChar *elem;
6239 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6241 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6242 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6249 int inputid = ctxt->input->id;
6251 /* Recurse on first child */
6254 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6259 elem = xmlParseName(ctxt);
6261 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6264 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6266 xmlErrMemory(ctxt, NULL);
6271 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6273 } else if (RAW == '*') {
6274 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6276 } else if (RAW == '+') {
6277 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6280 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6286 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6288 * Each loop we parse one separator and one element.
6291 if (type == 0) type = CUR;
6294 * Detect "Name | Name , Name" error
6296 else if (type != CUR) {
6297 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6298 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6300 if ((last != NULL) && (last != ret))
6301 xmlFreeDocElementContent(ctxt->myDoc, last);
6303 xmlFreeDocElementContent(ctxt->myDoc, ret);
6308 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6310 if ((last != NULL) && (last != ret))
6311 xmlFreeDocElementContent(ctxt->myDoc, last);
6312 xmlFreeDocElementContent(ctxt->myDoc, ret);
6330 } else if (RAW == '|') {
6331 if (type == 0) type = CUR;
6334 * Detect "Name , Name | Name" error
6336 else if (type != CUR) {
6337 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6338 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6340 if ((last != NULL) && (last != ret))
6341 xmlFreeDocElementContent(ctxt->myDoc, last);
6343 xmlFreeDocElementContent(ctxt->myDoc, ret);
6348 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6350 if ((last != NULL) && (last != ret))
6351 xmlFreeDocElementContent(ctxt->myDoc, last);
6353 xmlFreeDocElementContent(ctxt->myDoc, ret);
6372 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6373 if ((last != NULL) && (last != ret))
6374 xmlFreeDocElementContent(ctxt->myDoc, last);
6376 xmlFreeDocElementContent(ctxt->myDoc, ret);
6383 int inputid = ctxt->input->id;
6384 /* Recurse on second child */
6387 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6391 elem = xmlParseName(ctxt);
6393 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6395 xmlFreeDocElementContent(ctxt->myDoc, ret);
6398 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6401 xmlFreeDocElementContent(ctxt->myDoc, ret);
6405 last->ocur = XML_ELEMENT_CONTENT_OPT;
6407 } else if (RAW == '*') {
6408 last->ocur = XML_ELEMENT_CONTENT_MULT;
6410 } else if (RAW == '+') {
6411 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6414 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6420 if ((cur != NULL) && (last != NULL)) {
6425 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6426 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6427 "Element content declaration doesn't start and stop in the same entity\n",
6433 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6434 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6435 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6437 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6440 } else if (RAW == '*') {
6442 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6445 * Some normalization:
6446 * (a | b* | c?)* == (a | b | c)*
6448 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6449 if ((cur->c1 != NULL) &&
6450 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6451 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6452 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6453 if ((cur->c2 != NULL) &&
6454 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6455 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6456 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6461 } else if (RAW == '+') {
6465 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6466 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6467 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6469 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6471 * Some normalization:
6472 * (a | b*)+ == (a | b)*
6473 * (a | b?)+ == (a | b)*
6475 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6476 if ((cur->c1 != NULL) &&
6477 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6478 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6479 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6482 if ((cur->c2 != NULL) &&
6483 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6484 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6485 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6491 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6499 * xmlParseElementChildrenContentDecl:
6500 * @ctxt: an XML parser context
6501 * @inputchk: the input used for the current entity, needed for boundary checks
6503 * parse the declaration for a Mixed Element content
6504 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6506 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6508 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6510 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6512 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6514 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6515 * TODO Parameter-entity replacement text must be properly nested
6516 * with parenthesized groups. That is to say, if either of the
6517 * opening or closing parentheses in a choice, seq, or Mixed
6518 * construct is contained in the replacement text for a parameter
6519 * entity, both must be contained in the same replacement text. For
6520 * interoperability, if a parameter-entity reference appears in a
6521 * choice, seq, or Mixed construct, its replacement text should not
6522 * be empty, and neither the first nor last non-blank character of
6523 * the replacement text should be a connector (| or ,).
6525 * Returns the tree of xmlElementContentPtr describing the element
6528 xmlElementContentPtr
6529 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6530 /* stub left for API/ABI compat */
6531 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6535 * xmlParseElementContentDecl:
6536 * @ctxt: an XML parser context
6537 * @name: the name of the element being defined.
6538 * @result: the Element Content pointer will be stored here if any
6540 * parse the declaration for an Element content either Mixed or Children,
6541 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6543 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6545 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6549 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6550 xmlElementContentPtr *result) {
6552 xmlElementContentPtr tree = NULL;
6553 int inputid = ctxt->input->id;
6559 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6560 "xmlParseElementContentDecl : %s '(' expected\n", name);
6565 if (ctxt->instate == XML_PARSER_EOF)
6568 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6569 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6570 res = XML_ELEMENT_TYPE_MIXED;
6572 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6573 res = XML_ELEMENT_TYPE_ELEMENT;
6581 * xmlParseElementDecl:
6582 * @ctxt: an XML parser context
6584 * parse an Element declaration.
6586 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6588 * [ VC: Unique Element Type Declaration ]
6589 * No element type may be declared more than once
6591 * Returns the type of the element, or -1 in case of error
6594 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6595 const xmlChar *name;
6597 xmlElementContentPtr content = NULL;
6599 /* GROW; done in the caller */
6600 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6601 xmlParserInputPtr input = ctxt->input;
6604 if (!IS_BLANK_CH(CUR)) {
6605 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6606 "Space required after 'ELEMENT'\n");
6609 name = xmlParseName(ctxt);
6611 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6612 "xmlParseElementDecl: no name for Element\n");
6615 while ((RAW == 0) && (ctxt->inputNr > 1))
6617 if (!IS_BLANK_CH(CUR)) {
6618 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6619 "Space required after the element name\n");
6622 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6625 * Element must always be empty.
6627 ret = XML_ELEMENT_TYPE_EMPTY;
6628 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6632 * Element is a generic container.
6634 ret = XML_ELEMENT_TYPE_ANY;
6635 } else if (RAW == '(') {
6636 ret = xmlParseElementContentDecl(ctxt, name, &content);
6639 * [ WFC: PEs in Internal Subset ] error handling.
6641 if ((RAW == '%') && (ctxt->external == 0) &&
6642 (ctxt->inputNr == 1)) {
6643 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6644 "PEReference: forbidden within markup decl in internal subset\n");
6646 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6647 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6654 * Pop-up of finished entities.
6656 while ((RAW == 0) && (ctxt->inputNr > 1))
6661 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6662 if (content != NULL) {
6663 xmlFreeDocElementContent(ctxt->myDoc, content);
6666 if (input != ctxt->input) {
6667 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6668 "Element declaration doesn't start and stop in the same entity\n");
6672 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6673 (ctxt->sax->elementDecl != NULL)) {
6674 if (content != NULL)
6675 content->parent = NULL;
6676 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6678 if ((content != NULL) && (content->parent == NULL)) {
6680 * this is a trick: if xmlAddElementDecl is called,
6681 * instead of copying the full tree it is plugged directly
6682 * if called from the parser. Avoid duplicating the
6683 * interfaces or change the API/ABI
6685 xmlFreeDocElementContent(ctxt->myDoc, content);
6687 } else if (content != NULL) {
6688 xmlFreeDocElementContent(ctxt->myDoc, content);
6696 * xmlParseConditionalSections
6697 * @ctxt: an XML parser context
6699 * [61] conditionalSect ::= includeSect | ignoreSect
6700 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6701 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6702 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6703 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6707 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6708 int id = ctxt->input->id;
6712 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6716 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6718 if (ctxt->input->id != id) {
6719 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6720 "All markup of the conditional section is not in the same entity\n",
6725 if (xmlParserDebugEntities) {
6726 if ((ctxt->input != NULL) && (ctxt->input->filename))
6727 xmlGenericError(xmlGenericErrorContext,
6728 "%s(%d): ", ctxt->input->filename,
6730 xmlGenericError(xmlGenericErrorContext,
6731 "Entering INCLUDE Conditional Section\n");
6734 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6735 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6736 const xmlChar *check = CUR_PTR;
6737 unsigned int cons = ctxt->input->consumed;
6739 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6740 xmlParseConditionalSections(ctxt);
6741 } else if (IS_BLANK_CH(CUR)) {
6743 } else if (RAW == '%') {
6744 xmlParsePEReference(ctxt);
6746 xmlParseMarkupDecl(ctxt);
6749 * Pop-up of finished entities.
6751 while ((RAW == 0) && (ctxt->inputNr > 1))
6754 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6755 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6759 if (xmlParserDebugEntities) {
6760 if ((ctxt->input != NULL) && (ctxt->input->filename))
6761 xmlGenericError(xmlGenericErrorContext,
6762 "%s(%d): ", ctxt->input->filename,
6764 xmlGenericError(xmlGenericErrorContext,
6765 "Leaving INCLUDE Conditional Section\n");
6768 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6770 xmlParserInputState instate;
6776 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6778 if (ctxt->input->id != id) {
6779 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6780 "All markup of the conditional section is not in the same entity\n",
6785 if (xmlParserDebugEntities) {
6786 if ((ctxt->input != NULL) && (ctxt->input->filename))
6787 xmlGenericError(xmlGenericErrorContext,
6788 "%s(%d): ", ctxt->input->filename,
6790 xmlGenericError(xmlGenericErrorContext,
6791 "Entering IGNORE Conditional Section\n");
6795 * Parse up to the end of the conditional section
6796 * But disable SAX event generating DTD building in the meantime
6798 state = ctxt->disableSAX;
6799 instate = ctxt->instate;
6800 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6801 ctxt->instate = XML_PARSER_IGNORE;
6803 while (((depth >= 0) && (RAW != 0)) &&
6804 (ctxt->instate != XML_PARSER_EOF)) {
6805 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6810 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6811 if (--depth >= 0) SKIP(3);
6818 ctxt->disableSAX = state;
6819 ctxt->instate = instate;
6821 if (xmlParserDebugEntities) {
6822 if ((ctxt->input != NULL) && (ctxt->input->filename))
6823 xmlGenericError(xmlGenericErrorContext,
6824 "%s(%d): ", ctxt->input->filename,
6826 xmlGenericError(xmlGenericErrorContext,
6827 "Leaving IGNORE Conditional Section\n");
6831 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6838 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6840 if (ctxt->input->id != id) {
6841 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6842 "All markup of the conditional section is not in the same entity\n",
6850 * xmlParseMarkupDecl:
6851 * @ctxt: an XML parser context
6853 * parse Markup declarations
6855 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6856 * NotationDecl | PI | Comment
6858 * [ VC: Proper Declaration/PE Nesting ]
6859 * Parameter-entity replacement text must be properly nested with
6860 * markup declarations. That is to say, if either the first character
6861 * or the last character of a markup declaration (markupdecl above) is
6862 * contained in the replacement text for a parameter-entity reference,
6863 * both must be contained in the same replacement text.
6865 * [ WFC: PEs in Internal Subset ]
6866 * In the internal DTD subset, parameter-entity references can occur
6867 * only where markup declarations can occur, not within markup declarations.
6868 * (This does not apply to references that occur in external parameter
6869 * entities or to the external subset.)
6872 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6875 if (NXT(1) == '!') {
6879 xmlParseElementDecl(ctxt);
6880 else if (NXT(3) == 'N')
6881 xmlParseEntityDecl(ctxt);
6884 xmlParseAttributeListDecl(ctxt);
6887 xmlParseNotationDecl(ctxt);
6890 xmlParseComment(ctxt);
6893 /* there is an error but it will be detected later */
6896 } else if (NXT(1) == '?') {
6901 * This is only for internal subset. On external entities,
6902 * the replacement is done before parsing stage
6904 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6905 xmlParsePEReference(ctxt);
6908 * Conditional sections are allowed from entities included
6909 * by PE References in the internal subset.
6911 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6912 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6913 xmlParseConditionalSections(ctxt);
6917 ctxt->instate = XML_PARSER_DTD;
6922 * @ctxt: an XML parser context
6924 * parse an XML declaration header for external entities
6926 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6930 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6932 const xmlChar *encoding;
6935 * We know that '<?xml' is here.
6937 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6940 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6944 if (!IS_BLANK_CH(CUR)) {
6945 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6946 "Space needed after '<?xml'\n");
6951 * We may have the VersionInfo here.
6953 version = xmlParseVersionInfo(ctxt);
6954 if (version == NULL)
6955 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6957 if (!IS_BLANK_CH(CUR)) {
6958 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6959 "Space needed here\n");
6962 ctxt->input->version = version;
6965 * We must have the encoding declaration
6967 encoding = xmlParseEncodingDecl(ctxt);
6968 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6970 * The XML REC instructs us to stop parsing right here
6974 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6975 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6976 "Missing encoding in text declaration\n");
6980 if ((RAW == '?') && (NXT(1) == '>')) {
6982 } else if (RAW == '>') {
6983 /* Deprecated old WD ... */
6984 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6987 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6988 MOVETO_ENDTAG(CUR_PTR);
6994 * xmlParseExternalSubset:
6995 * @ctxt: an XML parser context
6996 * @ExternalID: the external identifier
6997 * @SystemID: the system identifier (or URL)
6999 * parse Markup declarations from an external subset
7001 * [30] extSubset ::= textDecl? extSubsetDecl
7003 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7006 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7007 const xmlChar *SystemID) {
7008 xmlDetectSAX2(ctxt);
7011 if ((ctxt->encoding == NULL) &&
7012 (ctxt->input->end - ctxt->input->cur >= 4)) {
7014 xmlCharEncoding enc;
7020 enc = xmlDetectCharEncoding(start, 4);
7021 if (enc != XML_CHAR_ENCODING_NONE)
7022 xmlSwitchEncoding(ctxt, enc);
7025 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7026 xmlParseTextDecl(ctxt);
7027 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7029 * The XML REC instructs us to stop parsing right here
7031 ctxt->instate = XML_PARSER_EOF;
7035 if (ctxt->myDoc == NULL) {
7036 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7037 if (ctxt->myDoc == NULL) {
7038 xmlErrMemory(ctxt, "New Doc failed");
7041 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7043 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7044 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7046 ctxt->instate = XML_PARSER_DTD;
7048 while (((RAW == '<') && (NXT(1) == '?')) ||
7049 ((RAW == '<') && (NXT(1) == '!')) ||
7050 (RAW == '%') || IS_BLANK_CH(CUR)) {
7051 const xmlChar *check = CUR_PTR;
7052 unsigned int cons = ctxt->input->consumed;
7055 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7056 xmlParseConditionalSections(ctxt);
7057 } else if (IS_BLANK_CH(CUR)) {
7059 } else if (RAW == '%') {
7060 xmlParsePEReference(ctxt);
7062 xmlParseMarkupDecl(ctxt);
7065 * Pop-up of finished entities.
7067 while ((RAW == 0) && (ctxt->inputNr > 1))
7070 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7071 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7077 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7083 * xmlParseReference:
7084 * @ctxt: an XML parser context
7086 * parse and handle entity references in content, depending on the SAX
7087 * interface, this may end-up in a call to character() if this is a
7088 * CharRef, a predefined entity, if there is no reference() callback.
7089 * or if the parser was asked to switch to that mode.
7091 * [67] Reference ::= EntityRef | CharRef
7094 xmlParseReference(xmlParserCtxtPtr ctxt) {
7098 xmlNodePtr list = NULL;
7099 xmlParserErrors ret = XML_ERR_OK;
7106 * Simple case of a CharRef
7108 if (NXT(1) == '#') {
7112 int value = xmlParseCharRef(ctxt);
7116 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7118 * So we are using non-UTF-8 buffers
7119 * Check that the char fit on 8bits, if not
7120 * generate a CharRef.
7122 if (value <= 0xFF) {
7125 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7126 (!ctxt->disableSAX))
7127 ctxt->sax->characters(ctxt->userData, out, 1);
7129 if ((hex == 'x') || (hex == 'X'))
7130 snprintf((char *)out, sizeof(out), "#x%X", value);
7132 snprintf((char *)out, sizeof(out), "#%d", value);
7133 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7134 (!ctxt->disableSAX))
7135 ctxt->sax->reference(ctxt->userData, out);
7139 * Just encode the value in UTF-8
7141 COPY_BUF(0 ,out, i, value);
7143 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7144 (!ctxt->disableSAX))
7145 ctxt->sax->characters(ctxt->userData, out, i);
7151 * We are seeing an entity reference
7153 ent = xmlParseEntityRef(ctxt);
7154 if (ent == NULL) return;
7155 if (!ctxt->wellFormed)
7157 was_checked = ent->checked;
7159 /* special case of predefined entities */
7160 if ((ent->name == NULL) ||
7161 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7163 if (val == NULL) return;
7165 * inline the entity.
7167 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7168 (!ctxt->disableSAX))
7169 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7174 * The first reference to the entity trigger a parsing phase
7175 * where the ent->children is filled with the result from
7177 * Note: external parsed entities will not be loaded, it is not
7178 * required for a non-validating parser, unless the parsing option
7179 * of validating, or substituting entities were given. Doing so is
7180 * far more secure as the parser will only process data coming from
7181 * the document entity by default.
7183 if ((ent->checked == 0) &&
7184 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7185 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7186 unsigned long oldnbent = ctxt->nbentities;
7189 * This is a bit hackish but this seems the best
7190 * way to make sure both SAX and DOM entity support
7194 if (ctxt->userData == ctxt)
7197 user_data = ctxt->userData;
7200 * Check that this entity is well formed
7201 * 4.3.2: An internal general parsed entity is well-formed
7202 * if its replacement text matches the production labeled
7205 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7207 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7211 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7213 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7214 user_data, ctxt->depth, ent->URI,
7215 ent->ExternalID, &list);
7218 ret = XML_ERR_ENTITY_PE_INTERNAL;
7219 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7220 "invalid entity type found\n", NULL);
7224 * Store the number of entities needing parsing for this entity
7225 * content and do checkings
7227 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7228 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7230 if (ret == XML_ERR_ENTITY_LOOP) {
7231 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7232 xmlFreeNodeList(list);
7235 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7236 xmlFreeNodeList(list);
7240 if ((ret == XML_ERR_OK) && (list != NULL)) {
7241 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7242 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7243 (ent->children == NULL)) {
7244 ent->children = list;
7245 if (ctxt->replaceEntities) {
7247 * Prune it directly in the generated document
7248 * except for single text nodes.
7250 if (((list->type == XML_TEXT_NODE) &&
7251 (list->next == NULL)) ||
7252 (ctxt->parseMode == XML_PARSE_READER)) {
7253 list->parent = (xmlNodePtr) ent;
7258 while (list != NULL) {
7259 list->parent = (xmlNodePtr) ctxt->node;
7260 list->doc = ctxt->myDoc;
7261 if (list->next == NULL)
7265 list = ent->children;
7266 #ifdef LIBXML_LEGACY_ENABLED
7267 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7268 xmlAddEntityReference(ent, list, NULL);
7269 #endif /* LIBXML_LEGACY_ENABLED */
7273 while (list != NULL) {
7274 list->parent = (xmlNodePtr) ent;
7275 xmlSetTreeDoc(list, ent->doc);
7276 if (list->next == NULL)
7282 xmlFreeNodeList(list);
7285 } else if ((ret != XML_ERR_OK) &&
7286 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7287 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7288 "Entity '%s' failed to parse\n", ent->name);
7289 } else if (list != NULL) {
7290 xmlFreeNodeList(list);
7293 if (ent->checked == 0)
7295 } else if (ent->checked != 1) {
7296 ctxt->nbentities += ent->checked / 2;
7300 * Now that the entity content has been gathered
7301 * provide it to the application, this can take different forms based
7302 * on the parsing modes.
7304 if (ent->children == NULL) {
7306 * Probably running in SAX mode and the callbacks don't
7307 * build the entity content. So unless we already went
7308 * though parsing for first checking go though the entity
7309 * content to generate callbacks associated to the entity
7311 if (was_checked != 0) {
7314 * This is a bit hackish but this seems the best
7315 * way to make sure both SAX and DOM entity support
7318 if (ctxt->userData == ctxt)
7321 user_data = ctxt->userData;
7323 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7325 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7326 ent->content, user_data, NULL);
7328 } else if (ent->etype ==
7329 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7331 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7332 ctxt->sax, user_data, ctxt->depth,
7333 ent->URI, ent->ExternalID, NULL);
7336 ret = XML_ERR_ENTITY_PE_INTERNAL;
7337 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7338 "invalid entity type found\n", NULL);
7340 if (ret == XML_ERR_ENTITY_LOOP) {
7341 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7345 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7346 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7348 * Entity reference callback comes second, it's somewhat
7349 * superfluous but a compatibility to historical behaviour
7351 ctxt->sax->reference(ctxt->userData, ent->name);
7357 * If we didn't get any children for the entity being built
7359 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7360 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7364 ctxt->sax->reference(ctxt->userData, ent->name);
7368 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7370 * There is a problem on the handling of _private for entities
7371 * (bug 155816): Should we copy the content of the field from
7372 * the entity (possibly overwriting some value set by the user
7373 * when a copy is created), should we leave it alone, or should
7374 * we try to take care of different situations? The problem
7375 * is exacerbated by the usage of this field by the xmlReader.
7376 * To fix this bug, we look at _private on the created node
7377 * and, if it's NULL, we copy in whatever was in the entity.
7378 * If it's not NULL we leave it alone. This is somewhat of a
7379 * hack - maybe we should have further tests to determine
7382 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7384 * Seems we are generating the DOM content, do
7385 * a simple tree copy for all references except the first
7386 * In the first occurrence list contains the replacement.
7388 if (((list == NULL) && (ent->owner == 0)) ||
7389 (ctxt->parseMode == XML_PARSE_READER)) {
7390 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7393 * We are copying here, make sure there is no abuse
7395 ctxt->sizeentcopy += ent->length;
7396 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7400 * when operating on a reader, the entities definitions
7401 * are always owning the entities subtree.
7402 if (ctxt->parseMode == XML_PARSE_READER)
7406 cur = ent->children;
7407 while (cur != NULL) {
7408 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7410 if (nw->_private == NULL)
7411 nw->_private = cur->_private;
7412 if (firstChild == NULL){
7415 nw = xmlAddChild(ctxt->node, nw);
7417 if (cur == ent->last) {
7419 * needed to detect some strange empty
7420 * node cases in the reader tests
7422 if ((ctxt->parseMode == XML_PARSE_READER) &&
7424 (nw->type == XML_ELEMENT_NODE) &&
7425 (nw->children == NULL))
7432 #ifdef LIBXML_LEGACY_ENABLED
7433 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7434 xmlAddEntityReference(ent, firstChild, nw);
7435 #endif /* LIBXML_LEGACY_ENABLED */
7436 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7437 xmlNodePtr nw = NULL, cur, next, last,
7441 * We are copying here, make sure there is no abuse
7443 ctxt->sizeentcopy += ent->length;
7444 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7448 * Copy the entity child list and make it the new
7449 * entity child list. The goal is to make sure any
7450 * ID or REF referenced will be the one from the
7451 * document content and not the entity copy.
7453 cur = ent->children;
7454 ent->children = NULL;
7457 while (cur != NULL) {
7461 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7463 if (nw->_private == NULL)
7464 nw->_private = cur->_private;
7465 if (firstChild == NULL){
7468 xmlAddChild((xmlNodePtr) ent, nw);
7469 xmlAddChild(ctxt->node, cur);
7475 if (ent->owner == 0)
7477 #ifdef LIBXML_LEGACY_ENABLED
7478 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7479 xmlAddEntityReference(ent, firstChild, nw);
7480 #endif /* LIBXML_LEGACY_ENABLED */
7482 const xmlChar *nbktext;
7485 * the name change is to avoid coalescing of the
7486 * node with a possible previous text one which
7487 * would make ent->children a dangling pointer
7489 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7491 if (ent->children->type == XML_TEXT_NODE)
7492 ent->children->name = nbktext;
7493 if ((ent->last != ent->children) &&
7494 (ent->last->type == XML_TEXT_NODE))
7495 ent->last->name = nbktext;
7496 xmlAddChildList(ctxt->node, ent->children);
7500 * This is to avoid a nasty side effect, see
7501 * characters() in SAX.c
7511 * xmlParseEntityRef:
7512 * @ctxt: an XML parser context
7514 * parse ENTITY references declarations
7516 * [68] EntityRef ::= '&' Name ';'
7518 * [ WFC: Entity Declared ]
7519 * In a document without any DTD, a document with only an internal DTD
7520 * subset which contains no parameter entity references, or a document
7521 * with "standalone='yes'", the Name given in the entity reference
7522 * must match that in an entity declaration, except that well-formed
7523 * documents need not declare any of the following entities: amp, lt,
7524 * gt, apos, quot. The declaration of a parameter entity must precede
7525 * any reference to it. Similarly, the declaration of a general entity
7526 * must precede any reference to it which appears in a default value in an
7527 * attribute-list declaration. Note that if entities are declared in the
7528 * external subset or in external parameter entities, a non-validating
7529 * processor is not obligated to read and process their declarations;
7530 * for such documents, the rule that an entity must be declared is a
7531 * well-formedness constraint only if standalone='yes'.
7533 * [ WFC: Parsed Entity ]
7534 * An entity reference must not contain the name of an unparsed entity
7536 * Returns the xmlEntityPtr if found, or NULL otherwise.
7539 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7540 const xmlChar *name;
7541 xmlEntityPtr ent = NULL;
7544 if (ctxt->instate == XML_PARSER_EOF)
7550 name = xmlParseName(ctxt);
7552 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7553 "xmlParseEntityRef: no name\n");
7557 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7563 * Predefined entities override any extra definition
7565 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7566 ent = xmlGetPredefinedEntity(name);
7572 * Increase the number of entity references parsed
7577 * Ask first SAX for entity resolution, otherwise try the
7578 * entities which may have stored in the parser context.
7580 if (ctxt->sax != NULL) {
7581 if (ctxt->sax->getEntity != NULL)
7582 ent = ctxt->sax->getEntity(ctxt->userData, name);
7583 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7584 (ctxt->options & XML_PARSE_OLDSAX))
7585 ent = xmlGetPredefinedEntity(name);
7586 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7587 (ctxt->userData==ctxt)) {
7588 ent = xmlSAX2GetEntity(ctxt, name);
7591 if (ctxt->instate == XML_PARSER_EOF)
7594 * [ WFC: Entity Declared ]
7595 * In a document without any DTD, a document with only an
7596 * internal DTD subset which contains no parameter entity
7597 * references, or a document with "standalone='yes'", the
7598 * Name given in the entity reference must match that in an
7599 * entity declaration, except that well-formed documents
7600 * need not declare any of the following entities: amp, lt,
7602 * The declaration of a parameter entity must precede any
7604 * Similarly, the declaration of a general entity must
7605 * precede any reference to it which appears in a default
7606 * value in an attribute-list declaration. Note that if
7607 * entities are declared in the external subset or in
7608 * external parameter entities, a non-validating processor
7609 * is not obligated to read and process their declarations;
7610 * for such documents, the rule that an entity must be
7611 * declared is a well-formedness constraint only if
7615 if ((ctxt->standalone == 1) ||
7616 ((ctxt->hasExternalSubset == 0) &&
7617 (ctxt->hasPErefs == 0))) {
7618 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7619 "Entity '%s' not defined\n", name);
7621 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7622 "Entity '%s' not defined\n", name);
7623 if ((ctxt->inSubset == 0) &&
7624 (ctxt->sax != NULL) &&
7625 (ctxt->sax->reference != NULL)) {
7626 ctxt->sax->reference(ctxt->userData, name);
7633 * [ WFC: Parsed Entity ]
7634 * An entity reference must not contain the name of an
7637 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7638 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7639 "Entity reference to unparsed entity %s\n", name);
7643 * [ WFC: No External Entity References ]
7644 * Attribute values cannot contain direct or indirect
7645 * entity references to external entities.
7647 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7648 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7649 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7650 "Attribute references external entity '%s'\n", name);
7653 * [ WFC: No < in Attribute Values ]
7654 * The replacement text of any entity referred to directly or
7655 * indirectly in an attribute value (other than "<") must
7658 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7660 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7661 if ((ent->checked & 1) || ((ent->checked == 0) &&
7662 (ent->content != NULL) &&(xmlStrchr(ent->content, '<')))) {
7663 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7664 "'<' in entity '%s' is not allowed in attributes values\n", name);
7669 * Internal check, no parameter entities here ...
7672 switch (ent->etype) {
7673 case XML_INTERNAL_PARAMETER_ENTITY:
7674 case XML_EXTERNAL_PARAMETER_ENTITY:
7675 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7676 "Attempt to reference the parameter entity '%s'\n",
7685 * [ WFC: No Recursion ]
7686 * A parsed entity must not contain a recursive reference
7687 * to itself, either directly or indirectly.
7688 * Done somewhere else
7694 * xmlParseStringEntityRef:
7695 * @ctxt: an XML parser context
7696 * @str: a pointer to an index in the string
7698 * parse ENTITY references declarations, but this version parses it from
7701 * [68] EntityRef ::= '&' Name ';'
7703 * [ WFC: Entity Declared ]
7704 * In a document without any DTD, a document with only an internal DTD
7705 * subset which contains no parameter entity references, or a document
7706 * with "standalone='yes'", the Name given in the entity reference
7707 * must match that in an entity declaration, except that well-formed
7708 * documents need not declare any of the following entities: amp, lt,
7709 * gt, apos, quot. The declaration of a parameter entity must precede
7710 * any reference to it. Similarly, the declaration of a general entity
7711 * must precede any reference to it which appears in a default value in an
7712 * attribute-list declaration. Note that if entities are declared in the
7713 * external subset or in external parameter entities, a non-validating
7714 * processor is not obligated to read and process their declarations;
7715 * for such documents, the rule that an entity must be declared is a
7716 * well-formedness constraint only if standalone='yes'.
7718 * [ WFC: Parsed Entity ]
7719 * An entity reference must not contain the name of an unparsed entity
7721 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7722 * is updated to the current location in the string.
7725 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7729 xmlEntityPtr ent = NULL;
7731 if ((str == NULL) || (*str == NULL))
7739 name = xmlParseStringName(ctxt, &ptr);
7741 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7742 "xmlParseStringEntityRef: no name\n");
7747 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7756 * Predefined entites override any extra definition
7758 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7759 ent = xmlGetPredefinedEntity(name);
7768 * Increate the number of entity references parsed
7773 * Ask first SAX for entity resolution, otherwise try the
7774 * entities which may have stored in the parser context.
7776 if (ctxt->sax != NULL) {
7777 if (ctxt->sax->getEntity != NULL)
7778 ent = ctxt->sax->getEntity(ctxt->userData, name);
7779 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7780 ent = xmlGetPredefinedEntity(name);
7781 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7782 ent = xmlSAX2GetEntity(ctxt, name);
7785 if (ctxt->instate == XML_PARSER_EOF) {
7791 * [ WFC: Entity Declared ]
7792 * In a document without any DTD, a document with only an
7793 * internal DTD subset which contains no parameter entity
7794 * references, or a document with "standalone='yes'", the
7795 * Name given in the entity reference must match that in an
7796 * entity declaration, except that well-formed documents
7797 * need not declare any of the following entities: amp, lt,
7799 * The declaration of a parameter entity must precede any
7801 * Similarly, the declaration of a general entity must
7802 * precede any reference to it which appears in a default
7803 * value in an attribute-list declaration. Note that if
7804 * entities are declared in the external subset or in
7805 * external parameter entities, a non-validating processor
7806 * is not obligated to read and process their declarations;
7807 * for such documents, the rule that an entity must be
7808 * declared is a well-formedness constraint only if
7812 if ((ctxt->standalone == 1) ||
7813 ((ctxt->hasExternalSubset == 0) &&
7814 (ctxt->hasPErefs == 0))) {
7815 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7816 "Entity '%s' not defined\n", name);
7818 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7819 "Entity '%s' not defined\n",
7822 /* TODO ? check regressions ctxt->valid = 0; */
7826 * [ WFC: Parsed Entity ]
7827 * An entity reference must not contain the name of an
7830 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7831 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7832 "Entity reference to unparsed entity %s\n", name);
7836 * [ WFC: No External Entity References ]
7837 * Attribute values cannot contain direct or indirect
7838 * entity references to external entities.
7840 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7841 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7842 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7843 "Attribute references external entity '%s'\n", name);
7846 * [ WFC: No < in Attribute Values ]
7847 * The replacement text of any entity referred to directly or
7848 * indirectly in an attribute value (other than "<") must
7851 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7852 (ent != NULL) && (ent->content != NULL) &&
7853 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7854 (xmlStrchr(ent->content, '<'))) {
7855 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7856 "'<' in entity '%s' is not allowed in attributes values\n",
7861 * Internal check, no parameter entities here ...
7864 switch (ent->etype) {
7865 case XML_INTERNAL_PARAMETER_ENTITY:
7866 case XML_EXTERNAL_PARAMETER_ENTITY:
7867 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7868 "Attempt to reference the parameter entity '%s'\n",
7877 * [ WFC: No Recursion ]
7878 * A parsed entity must not contain a recursive reference
7879 * to itself, either directly or indirectly.
7880 * Done somewhere else
7889 * xmlParsePEReference:
7890 * @ctxt: an XML parser context
7892 * parse PEReference declarations
7893 * The entity content is handled directly by pushing it's content as
7894 * a new input stream.
7896 * [69] PEReference ::= '%' Name ';'
7898 * [ WFC: No Recursion ]
7899 * A parsed entity must not contain a recursive
7900 * reference to itself, either directly or indirectly.
7902 * [ WFC: Entity Declared ]
7903 * In a document without any DTD, a document with only an internal DTD
7904 * subset which contains no parameter entity references, or a document
7905 * with "standalone='yes'", ... ... The declaration of a parameter
7906 * entity must precede any reference to it...
7908 * [ VC: Entity Declared ]
7909 * In a document with an external subset or external parameter entities
7910 * with "standalone='no'", ... ... The declaration of a parameter entity
7911 * must precede any reference to it...
7914 * Parameter-entity references may only appear in the DTD.
7915 * NOTE: misleading but this is handled.
7918 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7920 const xmlChar *name;
7921 xmlEntityPtr entity = NULL;
7922 xmlParserInputPtr input;
7927 name = xmlParseName(ctxt);
7929 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7930 "xmlParsePEReference: no name\n");
7934 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7941 * Increate the number of entity references parsed
7946 * Request the entity from SAX
7948 if ((ctxt->sax != NULL) &&
7949 (ctxt->sax->getParameterEntity != NULL))
7950 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7951 if (ctxt->instate == XML_PARSER_EOF)
7953 if (entity == NULL) {
7955 * [ WFC: Entity Declared ]
7956 * In a document without any DTD, a document with only an
7957 * internal DTD subset which contains no parameter entity
7958 * references, or a document with "standalone='yes'", ...
7959 * ... The declaration of a parameter entity must precede
7960 * any reference to it...
7962 if ((ctxt->standalone == 1) ||
7963 ((ctxt->hasExternalSubset == 0) &&
7964 (ctxt->hasPErefs == 0))) {
7965 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7966 "PEReference: %%%s; not found\n",
7970 * [ VC: Entity Declared ]
7971 * In a document with an external subset or external
7972 * parameter entities with "standalone='no'", ...
7973 * ... The declaration of a parameter entity must
7974 * precede any reference to it...
7976 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7977 "PEReference: %%%s; not found\n",
7983 * Internal checking in case the entity quest barfed
7985 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7986 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7987 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7988 "Internal: %%%s; is not a parameter entity\n",
7990 } else if (ctxt->input->free != deallocblankswrapper) {
7991 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7992 if (xmlPushInput(ctxt, input) < 0)
7997 * handle the extra spaces added before and after
7998 * c.f. http://www.w3.org/TR/REC-xml#as-PE
8000 input = xmlNewEntityInputStream(ctxt, entity);
8001 if (xmlPushInput(ctxt, input) < 0)
8003 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8004 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8005 (IS_BLANK_CH(NXT(5)))) {
8006 xmlParseTextDecl(ctxt);
8008 XML_ERR_UNSUPPORTED_ENCODING) {
8010 * The XML REC instructs us to stop parsing
8013 ctxt->instate = XML_PARSER_EOF;
8019 ctxt->hasPErefs = 1;
8023 * xmlLoadEntityContent:
8024 * @ctxt: an XML parser context
8025 * @entity: an unloaded system entity
8027 * Load the original content of the given system entity from the
8028 * ExternalID/SystemID given. This is to be used for Included in Literal
8029 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8031 * Returns 0 in case of success and -1 in case of failure
8034 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8035 xmlParserInputPtr input;
8040 if ((ctxt == NULL) || (entity == NULL) ||
8041 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8042 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8043 (entity->content != NULL)) {
8044 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8045 "xmlLoadEntityContent parameter error");
8049 if (xmlParserDebugEntities)
8050 xmlGenericError(xmlGenericErrorContext,
8051 "Reading %s entity content input\n", entity->name);
8053 buf = xmlBufferCreate();
8055 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8056 "xmlLoadEntityContent parameter error");
8060 input = xmlNewEntityInputStream(ctxt, entity);
8061 if (input == NULL) {
8062 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8063 "xmlLoadEntityContent input error");
8069 * Push the entity as the current input, read char by char
8070 * saving to the buffer until the end of the entity or an error
8072 if (xmlPushInput(ctxt, input) < 0) {
8079 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8081 xmlBufferAdd(buf, ctxt->input->cur, l);
8082 if (count++ > XML_PARSER_CHUNK_SIZE) {
8085 if (ctxt->instate == XML_PARSER_EOF) {
8095 if (ctxt->instate == XML_PARSER_EOF) {
8103 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8105 } else if (!IS_CHAR(c)) {
8106 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8107 "xmlLoadEntityContent: invalid char value %d\n",
8112 entity->content = buf->content;
8113 buf->content = NULL;
8120 * xmlParseStringPEReference:
8121 * @ctxt: an XML parser context
8122 * @str: a pointer to an index in the string
8124 * parse PEReference declarations
8126 * [69] PEReference ::= '%' Name ';'
8128 * [ WFC: No Recursion ]
8129 * A parsed entity must not contain a recursive
8130 * reference to itself, either directly or indirectly.
8132 * [ WFC: Entity Declared ]
8133 * In a document without any DTD, a document with only an internal DTD
8134 * subset which contains no parameter entity references, or a document
8135 * with "standalone='yes'", ... ... The declaration of a parameter
8136 * entity must precede any reference to it...
8138 * [ VC: Entity Declared ]
8139 * In a document with an external subset or external parameter entities
8140 * with "standalone='no'", ... ... The declaration of a parameter entity
8141 * must precede any reference to it...
8144 * Parameter-entity references may only appear in the DTD.
8145 * NOTE: misleading but this is handled.
8147 * Returns the string of the entity content.
8148 * str is updated to the current value of the index
8151 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8155 xmlEntityPtr entity = NULL;
8157 if ((str == NULL) || (*str == NULL)) return(NULL);
8163 name = xmlParseStringName(ctxt, &ptr);
8165 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8166 "xmlParseStringPEReference: no name\n");
8172 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8180 * Increate the number of entity references parsed
8185 * Request the entity from SAX
8187 if ((ctxt->sax != NULL) &&
8188 (ctxt->sax->getParameterEntity != NULL))
8189 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8190 if (ctxt->instate == XML_PARSER_EOF) {
8194 if (entity == NULL) {
8196 * [ WFC: Entity Declared ]
8197 * In a document without any DTD, a document with only an
8198 * internal DTD subset which contains no parameter entity
8199 * references, or a document with "standalone='yes'", ...
8200 * ... The declaration of a parameter entity must precede
8201 * any reference to it...
8203 if ((ctxt->standalone == 1) ||
8204 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8205 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8206 "PEReference: %%%s; not found\n", name);
8209 * [ VC: Entity Declared ]
8210 * In a document with an external subset or external
8211 * parameter entities with "standalone='no'", ...
8212 * ... The declaration of a parameter entity must
8213 * precede any reference to it...
8215 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8216 "PEReference: %%%s; not found\n",
8222 * Internal checking in case the entity quest barfed
8224 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8225 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8226 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8227 "%%%s; is not a parameter entity\n",
8231 ctxt->hasPErefs = 1;
8238 * xmlParseDocTypeDecl:
8239 * @ctxt: an XML parser context
8241 * parse a DOCTYPE declaration
8243 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8244 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8246 * [ VC: Root Element Type ]
8247 * The Name in the document type declaration must match the element
8248 * type of the root element.
8252 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8253 const xmlChar *name = NULL;
8254 xmlChar *ExternalID = NULL;
8255 xmlChar *URI = NULL;
8258 * We know that '<!DOCTYPE' has been detected.
8265 * Parse the DOCTYPE name.
8267 name = xmlParseName(ctxt);
8269 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8270 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8272 ctxt->intSubName = name;
8277 * Check for SystemID and ExternalID
8279 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8281 if ((URI != NULL) || (ExternalID != NULL)) {
8282 ctxt->hasExternalSubset = 1;
8284 ctxt->extSubURI = URI;
8285 ctxt->extSubSystem = ExternalID;
8290 * Create and update the internal subset.
8292 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8293 (!ctxt->disableSAX))
8294 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8295 if (ctxt->instate == XML_PARSER_EOF)
8299 * Is there any internal subset declarations ?
8300 * they are handled separately in xmlParseInternalSubset()
8306 * We should be at the end of the DOCTYPE declaration.
8309 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8315 * xmlParseInternalSubset:
8316 * @ctxt: an XML parser context
8318 * parse the internal subset declaration
8320 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8324 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8326 * Is there any DTD definition ?
8329 ctxt->instate = XML_PARSER_DTD;
8332 * Parse the succession of Markup declarations and
8334 * Subsequence (markupdecl | PEReference | S)*
8336 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
8337 const xmlChar *check = CUR_PTR;
8338 unsigned int cons = ctxt->input->consumed;
8341 xmlParseMarkupDecl(ctxt);
8342 xmlParsePEReference(ctxt);
8345 * Pop-up of finished entities.
8347 while ((RAW == 0) && (ctxt->inputNr > 1))
8350 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8351 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8352 "xmlParseInternalSubset: error detected in Markup declaration\n");
8363 * We should be at the end of the DOCTYPE declaration.
8366 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8371 #ifdef LIBXML_SAX1_ENABLED
8373 * xmlParseAttribute:
8374 * @ctxt: an XML parser context
8375 * @value: a xmlChar ** used to store the value of the attribute
8377 * parse an attribute
8379 * [41] Attribute ::= Name Eq AttValue
8381 * [ WFC: No External Entity References ]
8382 * Attribute values cannot contain direct or indirect entity references
8383 * to external entities.
8385 * [ WFC: No < in Attribute Values ]
8386 * The replacement text of any entity referred to directly or indirectly in
8387 * an attribute value (other than "<") must not contain a <.
8389 * [ VC: Attribute Value Type ]
8390 * The attribute must have been declared; the value must be of the type
8393 * [25] Eq ::= S? '=' S?
8397 * [NS 11] Attribute ::= QName Eq AttValue
8399 * Also the case QName == xmlns:??? is handled independently as a namespace
8402 * Returns the attribute name, and the value in *value.
8406 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8407 const xmlChar *name;
8412 name = xmlParseName(ctxt);
8414 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8415 "error parsing attribute name\n");
8426 val = xmlParseAttValue(ctxt);
8427 ctxt->instate = XML_PARSER_CONTENT;
8429 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8430 "Specification mandate value for attribute %s\n", name);
8435 * Check that xml:lang conforms to the specification
8436 * No more registered as an error, just generate a warning now
8437 * since this was deprecated in XML second edition
8439 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8440 if (!xmlCheckLanguageID(val)) {
8441 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8442 "Malformed value for xml:lang : %s\n",
8448 * Check that xml:space conforms to the specification
8450 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8451 if (xmlStrEqual(val, BAD_CAST "default"))
8453 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8456 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8457 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8468 * @ctxt: an XML parser context
8470 * parse a start of tag either for rule element or
8471 * EmptyElement. In both case we don't parse the tag closing chars.
8473 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8475 * [ WFC: Unique Att Spec ]
8476 * No attribute name may appear more than once in the same start-tag or
8477 * empty-element tag.
8479 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8481 * [ WFC: Unique Att Spec ]
8482 * No attribute name may appear more than once in the same start-tag or
8483 * empty-element tag.
8487 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8489 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8491 * Returns the element name parsed
8495 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8496 const xmlChar *name;
8497 const xmlChar *attname;
8499 const xmlChar **atts = ctxt->atts;
8501 int maxatts = ctxt->maxatts;
8504 if (RAW != '<') return(NULL);
8507 name = xmlParseName(ctxt);
8509 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8510 "xmlParseStartTag: invalid element name\n");
8515 * Now parse the attributes, it ends up with the ending
8522 while (((RAW != '>') &&
8523 ((RAW != '/') || (NXT(1) != '>')) &&
8524 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8525 const xmlChar *q = CUR_PTR;
8526 unsigned int cons = ctxt->input->consumed;
8528 attname = xmlParseAttribute(ctxt, &attvalue);
8529 if ((attname != NULL) && (attvalue != NULL)) {
8531 * [ WFC: Unique Att Spec ]
8532 * No attribute name may appear more than once in the same
8533 * start-tag or empty-element tag.
8535 for (i = 0; i < nbatts;i += 2) {
8536 if (xmlStrEqual(atts[i], attname)) {
8537 xmlErrAttributeDup(ctxt, NULL, attname);
8543 * Add the pair to atts
8546 maxatts = 22; /* allow for 10 attrs by default */
8547 atts = (const xmlChar **)
8548 xmlMalloc(maxatts * sizeof(xmlChar *));
8550 xmlErrMemory(ctxt, NULL);
8551 if (attvalue != NULL)
8556 ctxt->maxatts = maxatts;
8557 } else if (nbatts + 4 > maxatts) {
8561 n = (const xmlChar **) xmlRealloc((void *) atts,
8562 maxatts * sizeof(const xmlChar *));
8564 xmlErrMemory(ctxt, NULL);
8565 if (attvalue != NULL)
8571 ctxt->maxatts = maxatts;
8573 atts[nbatts++] = attname;
8574 atts[nbatts++] = attvalue;
8575 atts[nbatts] = NULL;
8576 atts[nbatts + 1] = NULL;
8578 if (attvalue != NULL)
8585 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8587 if (!IS_BLANK_CH(RAW)) {
8588 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8589 "attributes construct error\n");
8592 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8593 (attname == NULL) && (attvalue == NULL)) {
8594 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8595 "xmlParseStartTag: problem parsing attributes\n");
8603 * SAX: Start of Element !
8605 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8606 (!ctxt->disableSAX)) {
8608 ctxt->sax->startElement(ctxt->userData, name, atts);
8610 ctxt->sax->startElement(ctxt->userData, name, NULL);
8614 /* Free only the content strings */
8615 for (i = 1;i < nbatts;i+=2)
8616 if (atts[i] != NULL)
8617 xmlFree((xmlChar *) atts[i]);
8624 * @ctxt: an XML parser context
8625 * @line: line of the start tag
8626 * @nsNr: number of namespaces on the start tag
8628 * parse an end of tag
8630 * [42] ETag ::= '</' Name S? '>'
8634 * [NS 9] ETag ::= '</' QName S? '>'
8638 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8639 const xmlChar *name;
8642 if ((RAW != '<') || (NXT(1) != '/')) {
8643 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8644 "xmlParseEndTag: '</' not found\n");
8649 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8652 * We should definitely be at the ending "S? '>'" part
8656 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8657 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8662 * [ WFC: Element Type Match ]
8663 * The Name in an element's end-tag must match the element type in the
8667 if (name != (xmlChar*)1) {
8668 if (name == NULL) name = BAD_CAST "unparseable";
8669 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8670 "Opening and ending tag mismatch: %s line %d and %s\n",
8671 ctxt->name, line, name);
8677 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8678 (!ctxt->disableSAX))
8679 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8688 * @ctxt: an XML parser context
8690 * parse an end of tag
8692 * [42] ETag ::= '</' Name S? '>'
8696 * [NS 9] ETag ::= '</' QName S? '>'
8700 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8701 xmlParseEndTag1(ctxt, 0);
8703 #endif /* LIBXML_SAX1_ENABLED */
8705 /************************************************************************
8707 * SAX 2 specific operations *
8709 ************************************************************************/
8713 * @ctxt: an XML parser context
8714 * @prefix: the prefix to lookup
8716 * Lookup the namespace name for the @prefix (which ca be NULL)
8717 * The prefix must come from the @ctxt->dict dictionnary
8719 * Returns the namespace name or NULL if not bound
8721 static const xmlChar *
8722 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8725 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8726 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8727 if (ctxt->nsTab[i] == prefix) {
8728 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8730 return(ctxt->nsTab[i + 1]);
8737 * @ctxt: an XML parser context
8738 * @prefix: pointer to store the prefix part
8740 * parse an XML Namespace QName
8742 * [6] QName ::= (Prefix ':')? LocalPart
8743 * [7] Prefix ::= NCName
8744 * [8] LocalPart ::= NCName
8746 * Returns the Name parsed or NULL
8749 static const xmlChar *
8750 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8751 const xmlChar *l, *p;
8755 l = xmlParseNCName(ctxt);
8758 l = xmlParseName(ctxt);
8760 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8761 "Failed to parse QName '%s'\n", l, NULL, NULL);
8771 l = xmlParseNCName(ctxt);
8775 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8776 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8777 l = xmlParseNmtoken(ctxt);
8779 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8781 tmp = xmlBuildQName(l, p, NULL, 0);
8784 p = xmlDictLookup(ctxt->dict, tmp, -1);
8785 if (tmp != NULL) xmlFree(tmp);
8792 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8793 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8795 tmp = (xmlChar *) xmlParseName(ctxt);
8797 tmp = xmlBuildQName(tmp, l, NULL, 0);
8798 l = xmlDictLookup(ctxt->dict, tmp, -1);
8799 if (tmp != NULL) xmlFree(tmp);
8803 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8804 l = xmlDictLookup(ctxt->dict, tmp, -1);
8805 if (tmp != NULL) xmlFree(tmp);
8816 * xmlParseQNameAndCompare:
8817 * @ctxt: an XML parser context
8818 * @name: the localname
8819 * @prefix: the prefix, if any.
8821 * parse an XML name and compares for match
8822 * (specialized for endtag parsing)
8824 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8825 * and the name for mismatch
8828 static const xmlChar *
8829 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8830 xmlChar const *prefix) {
8834 const xmlChar *prefix2;
8836 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8839 in = ctxt->input->cur;
8842 while (*in != 0 && *in == *cmp) {
8846 if ((*cmp == 0) && (*in == ':')) {
8849 while (*in != 0 && *in == *cmp) {
8853 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8855 ctxt->input->cur = in;
8856 return((const xmlChar*) 1);
8860 * all strings coms from the dictionary, equality can be done directly
8862 ret = xmlParseQName (ctxt, &prefix2);
8863 if ((ret == name) && (prefix == prefix2))
8864 return((const xmlChar*) 1);
8869 * xmlParseAttValueInternal:
8870 * @ctxt: an XML parser context
8871 * @len: attribute len result
8872 * @alloc: whether the attribute was reallocated as a new string
8873 * @normalize: if 1 then further non-CDATA normalization must be done
8875 * parse a value for an attribute.
8876 * NOTE: if no normalization is needed, the routine will return pointers
8877 * directly from the data buffer.
8879 * 3.3.3 Attribute-Value Normalization:
8880 * Before the value of an attribute is passed to the application or
8881 * checked for validity, the XML processor must normalize it as follows:
8882 * - a character reference is processed by appending the referenced
8883 * character to the attribute value
8884 * - an entity reference is processed by recursively processing the
8885 * replacement text of the entity
8886 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8887 * appending #x20 to the normalized value, except that only a single
8888 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8889 * parsed entity or the literal entity value of an internal parsed entity
8890 * - other characters are processed by appending them to the normalized value
8891 * If the declared value is not CDATA, then the XML processor must further
8892 * process the normalized attribute value by discarding any leading and
8893 * trailing space (#x20) characters, and by replacing sequences of space
8894 * (#x20) characters by a single space (#x20) character.
8895 * All attributes for which no declaration has been read should be treated
8896 * by a non-validating parser as if declared CDATA.
8898 * Returns the AttValue parsed or NULL. The value has to be freed by the
8899 * caller if it was copied, this can be detected by val[*len] == 0.
8903 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8907 const xmlChar *in = NULL, *start, *end, *last;
8908 xmlChar *ret = NULL;
8911 in = (xmlChar *) CUR_PTR;
8912 if (*in != '"' && *in != '\'') {
8913 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8916 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8919 * try to handle in this routine the most common case where no
8920 * allocation of a new string is required and where content is
8924 end = ctxt->input->end;
8927 const xmlChar *oldbase = ctxt->input->base;
8929 if (oldbase != ctxt->input->base) {
8930 long delta = ctxt->input->base - oldbase;
8931 start = start + delta;
8934 end = ctxt->input->end;
8938 * Skip any leading spaces
8940 while ((in < end) && (*in != limit) &&
8941 ((*in == 0x20) || (*in == 0x9) ||
8942 (*in == 0xA) || (*in == 0xD))) {
8946 const xmlChar *oldbase = ctxt->input->base;
8948 if (ctxt->instate == XML_PARSER_EOF)
8950 if (oldbase != ctxt->input->base) {
8951 long delta = ctxt->input->base - oldbase;
8952 start = start + delta;
8955 end = ctxt->input->end;
8956 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8957 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8958 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8959 "AttValue length too long\n");
8964 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8965 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8966 if ((*in++ == 0x20) && (*in == 0x20)) break;
8968 const xmlChar *oldbase = ctxt->input->base;
8970 if (ctxt->instate == XML_PARSER_EOF)
8972 if (oldbase != ctxt->input->base) {
8973 long delta = ctxt->input->base - oldbase;
8974 start = start + delta;
8977 end = ctxt->input->end;
8978 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8979 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8980 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8981 "AttValue length too long\n");
8988 * skip the trailing blanks
8990 while ((last[-1] == 0x20) && (last > start)) last--;
8991 while ((in < end) && (*in != limit) &&
8992 ((*in == 0x20) || (*in == 0x9) ||
8993 (*in == 0xA) || (*in == 0xD))) {
8996 const xmlChar *oldbase = ctxt->input->base;
8998 if (ctxt->instate == XML_PARSER_EOF)
9000 if (oldbase != ctxt->input->base) {
9001 long delta = ctxt->input->base - oldbase;
9002 start = start + delta;
9004 last = last + delta;
9006 end = ctxt->input->end;
9007 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9008 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9009 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9010 "AttValue length too long\n");
9015 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9016 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9017 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9018 "AttValue length too long\n");
9021 if (*in != limit) goto need_complex;
9023 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9024 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9027 const xmlChar *oldbase = ctxt->input->base;
9029 if (ctxt->instate == XML_PARSER_EOF)
9031 if (oldbase != ctxt->input->base) {
9032 long delta = ctxt->input->base - oldbase;
9033 start = start + delta;
9036 end = ctxt->input->end;
9037 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9038 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9039 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9040 "AttValue length too long\n");
9046 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9047 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9048 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9049 "AttValue length too long\n");
9052 if (*in != limit) goto need_complex;
9056 *len = last - start;
9057 ret = (xmlChar *) start;
9059 if (alloc) *alloc = 1;
9060 ret = xmlStrndup(start, last - start);
9063 if (alloc) *alloc = 0;
9066 if (alloc) *alloc = 1;
9067 return xmlParseAttValueComplex(ctxt, len, normalize);
9071 * xmlParseAttribute2:
9072 * @ctxt: an XML parser context
9073 * @pref: the element prefix
9074 * @elem: the element name
9075 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9076 * @value: a xmlChar ** used to store the value of the attribute
9077 * @len: an int * to save the length of the attribute
9078 * @alloc: an int * to indicate if the attribute was allocated
9080 * parse an attribute in the new SAX2 framework.
9082 * Returns the attribute name, and the value in *value, .
9085 static const xmlChar *
9086 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9087 const xmlChar * pref, const xmlChar * elem,
9088 const xmlChar ** prefix, xmlChar ** value,
9089 int *len, int *alloc)
9091 const xmlChar *name;
9092 xmlChar *val, *internal_val = NULL;
9097 name = xmlParseQName(ctxt, prefix);
9099 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9100 "error parsing attribute name\n");
9105 * get the type if needed
9107 if (ctxt->attsSpecial != NULL) {
9110 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
9111 pref, elem, *prefix, name);
9123 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9126 * Sometimes a second normalisation pass for spaces is needed
9127 * but that only happens if charrefs or entities refernces
9128 * have been used in the attribute value, i.e. the attribute
9129 * value have been extracted in an allocated string already.
9132 const xmlChar *val2;
9134 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9135 if ((val2 != NULL) && (val2 != val)) {
9137 val = (xmlChar *) val2;
9141 ctxt->instate = XML_PARSER_CONTENT;
9143 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9144 "Specification mandate value for attribute %s\n",
9149 if (*prefix == ctxt->str_xml) {
9151 * Check that xml:lang conforms to the specification
9152 * No more registered as an error, just generate a warning now
9153 * since this was deprecated in XML second edition
9155 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9156 internal_val = xmlStrndup(val, *len);
9157 if (!xmlCheckLanguageID(internal_val)) {
9158 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9159 "Malformed value for xml:lang : %s\n",
9160 internal_val, NULL);
9165 * Check that xml:space conforms to the specification
9167 if (xmlStrEqual(name, BAD_CAST "space")) {
9168 internal_val = xmlStrndup(val, *len);
9169 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9171 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9174 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9175 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9176 internal_val, NULL);
9180 xmlFree(internal_val);
9188 * xmlParseStartTag2:
9189 * @ctxt: an XML parser context
9191 * parse a start of tag either for rule element or
9192 * EmptyElement. In both case we don't parse the tag closing chars.
9193 * This routine is called when running SAX2 parsing
9195 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9197 * [ WFC: Unique Att Spec ]
9198 * No attribute name may appear more than once in the same start-tag or
9199 * empty-element tag.
9201 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9203 * [ WFC: Unique Att Spec ]
9204 * No attribute name may appear more than once in the same start-tag or
9205 * empty-element tag.
9209 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9211 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9213 * Returns the element name parsed
9216 static const xmlChar *
9217 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9218 const xmlChar **URI, int *tlen) {
9219 const xmlChar *localname;
9220 const xmlChar *prefix;
9221 const xmlChar *attname;
9222 const xmlChar *aprefix;
9223 const xmlChar *nsname;
9225 const xmlChar **atts = ctxt->atts;
9226 int maxatts = ctxt->maxatts;
9227 int nratts, nbatts, nbdef;
9228 int i, j, nbNs, attval, oldline, oldcol;
9229 const xmlChar *base;
9231 int nsNr = ctxt->nsNr;
9233 if (RAW != '<') return(NULL);
9237 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9238 * point since the attribute values may be stored as pointers to
9239 * the buffer and calling SHRINK would destroy them !
9240 * The Shrinking is only possible once the full set of attribute
9241 * callbacks have been done.
9245 base = ctxt->input->base;
9246 cur = ctxt->input->cur - ctxt->input->base;
9247 oldline = ctxt->input->line;
9248 oldcol = ctxt->input->col;
9254 /* Forget any namespaces added during an earlier parse of this element. */
9257 localname = xmlParseQName(ctxt, &prefix);
9258 if (localname == NULL) {
9259 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9260 "StartTag: invalid element name\n");
9263 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9266 * Now parse the attributes, it ends up with the ending
9272 if (ctxt->input->base != base) goto base_changed;
9274 while (((RAW != '>') &&
9275 ((RAW != '/') || (NXT(1) != '>')) &&
9276 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9277 const xmlChar *q = CUR_PTR;
9278 unsigned int cons = ctxt->input->consumed;
9279 int len = -1, alloc = 0;
9281 attname = xmlParseAttribute2(ctxt, prefix, localname,
9282 &aprefix, &attvalue, &len, &alloc);
9283 if (ctxt->input->base != base) {
9284 if ((attvalue != NULL) && (alloc != 0))
9289 if ((attname != NULL) && (attvalue != NULL)) {
9290 if (len < 0) len = xmlStrlen(attvalue);
9291 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9292 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9296 uri = xmlParseURI((const char *) URL);
9298 xmlNsErr(ctxt, XML_WAR_NS_URI,
9299 "xmlns: '%s' is not a valid URI\n",
9302 if (uri->scheme == NULL) {
9303 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9304 "xmlns: URI %s is not absolute\n",
9309 if (URL == ctxt->str_xml_ns) {
9310 if (attname != ctxt->str_xml) {
9311 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9312 "xml namespace URI cannot be the default namespace\n",
9315 goto skip_default_ns;
9319 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9320 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9321 "reuse of the xmlns namespace name is forbidden\n",
9323 goto skip_default_ns;
9327 * check that it's not a defined namespace
9329 for (j = 1;j <= nbNs;j++)
9330 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9333 xmlErrAttributeDup(ctxt, NULL, attname);
9335 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9337 if (alloc != 0) xmlFree(attvalue);
9341 if (aprefix == ctxt->str_xmlns) {
9342 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9345 if (attname == ctxt->str_xml) {
9346 if (URL != ctxt->str_xml_ns) {
9347 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9348 "xml namespace prefix mapped to wrong URI\n",
9352 * Do not keep a namespace definition node
9356 if (URL == ctxt->str_xml_ns) {
9357 if (attname != ctxt->str_xml) {
9358 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9359 "xml namespace URI mapped to wrong prefix\n",
9364 if (attname == ctxt->str_xmlns) {
9365 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9366 "redefinition of the xmlns prefix is forbidden\n",
9372 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9373 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9374 "reuse of the xmlns namespace name is forbidden\n",
9378 if ((URL == NULL) || (URL[0] == 0)) {
9379 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9380 "xmlns:%s: Empty XML namespace is not allowed\n",
9381 attname, NULL, NULL);
9384 uri = xmlParseURI((const char *) URL);
9386 xmlNsErr(ctxt, XML_WAR_NS_URI,
9387 "xmlns:%s: '%s' is not a valid URI\n",
9388 attname, URL, NULL);
9390 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9391 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9392 "xmlns:%s: URI %s is not absolute\n",
9393 attname, URL, NULL);
9400 * check that it's not a defined namespace
9402 for (j = 1;j <= nbNs;j++)
9403 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9406 xmlErrAttributeDup(ctxt, aprefix, attname);
9408 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9410 if (alloc != 0) xmlFree(attvalue);
9412 if (ctxt->input->base != base) goto base_changed;
9417 * Add the pair to atts
9419 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9420 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9421 if (attvalue[len] == 0)
9425 maxatts = ctxt->maxatts;
9428 ctxt->attallocs[nratts++] = alloc;
9429 atts[nbatts++] = attname;
9430 atts[nbatts++] = aprefix;
9431 atts[nbatts++] = NULL; /* the URI will be fetched later */
9432 atts[nbatts++] = attvalue;
9434 atts[nbatts++] = attvalue;
9436 * tag if some deallocation is needed
9438 if (alloc != 0) attval = 1;
9440 if ((attvalue != NULL) && (attvalue[len] == 0))
9447 if (ctxt->instate == XML_PARSER_EOF)
9449 if (ctxt->input->base != base) goto base_changed;
9450 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9452 if (!IS_BLANK_CH(RAW)) {
9453 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9454 "attributes construct error\n");
9458 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9459 (attname == NULL) && (attvalue == NULL)) {
9460 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9461 "xmlParseStartTag: problem parsing attributes\n");
9465 if (ctxt->input->base != base) goto base_changed;
9469 * The attributes defaulting
9471 if (ctxt->attsDefault != NULL) {
9472 xmlDefAttrsPtr defaults;
9474 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9475 if (defaults != NULL) {
9476 for (i = 0;i < defaults->nbAttrs;i++) {
9477 attname = defaults->values[5 * i];
9478 aprefix = defaults->values[5 * i + 1];
9481 * special work for namespaces defaulted defs
9483 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9485 * check that it's not a defined namespace
9487 for (j = 1;j <= nbNs;j++)
9488 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9490 if (j <= nbNs) continue;
9492 nsname = xmlGetNamespace(ctxt, NULL);
9493 if (nsname != defaults->values[5 * i + 2]) {
9494 if (nsPush(ctxt, NULL,
9495 defaults->values[5 * i + 2]) > 0)
9498 } else if (aprefix == ctxt->str_xmlns) {
9500 * check that it's not a defined namespace
9502 for (j = 1;j <= nbNs;j++)
9503 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9505 if (j <= nbNs) continue;
9507 nsname = xmlGetNamespace(ctxt, attname);
9508 if (nsname != defaults->values[2]) {
9509 if (nsPush(ctxt, attname,
9510 defaults->values[5 * i + 2]) > 0)
9515 * check that it's not a defined attribute
9517 for (j = 0;j < nbatts;j+=5) {
9518 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9521 if (j < nbatts) continue;
9523 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9524 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9527 maxatts = ctxt->maxatts;
9530 atts[nbatts++] = attname;
9531 atts[nbatts++] = aprefix;
9532 if (aprefix == NULL)
9533 atts[nbatts++] = NULL;
9535 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9536 atts[nbatts++] = defaults->values[5 * i + 2];
9537 atts[nbatts++] = defaults->values[5 * i + 3];
9538 if ((ctxt->standalone == 1) &&
9539 (defaults->values[5 * i + 4] != NULL)) {
9540 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9541 "standalone: attribute %s on %s defaulted from external subset\n",
9542 attname, localname);
9551 * The attributes checkings
9553 for (i = 0; i < nbatts;i += 5) {
9555 * The default namespace does not apply to attribute names.
9557 if (atts[i + 1] != NULL) {
9558 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9559 if (nsname == NULL) {
9560 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9561 "Namespace prefix %s for %s on %s is not defined\n",
9562 atts[i + 1], atts[i], localname);
9564 atts[i + 2] = nsname;
9568 * [ WFC: Unique Att Spec ]
9569 * No attribute name may appear more than once in the same
9570 * start-tag or empty-element tag.
9571 * As extended by the Namespace in XML REC.
9573 for (j = 0; j < i;j += 5) {
9574 if (atts[i] == atts[j]) {
9575 if (atts[i+1] == atts[j+1]) {
9576 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9579 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9580 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9581 "Namespaced Attribute %s in '%s' redefined\n",
9582 atts[i], nsname, NULL);
9589 nsname = xmlGetNamespace(ctxt, prefix);
9590 if ((prefix != NULL) && (nsname == NULL)) {
9591 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9592 "Namespace prefix %s on %s is not defined\n",
9593 prefix, localname, NULL);
9599 * SAX: Start of Element !
9601 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9602 (!ctxt->disableSAX)) {
9604 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9605 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9606 nbatts / 5, nbdef, atts);
9608 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9609 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9613 * Free up attribute allocated strings if needed
9616 for (i = 3,j = 0; j < nratts;i += 5,j++)
9617 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9618 xmlFree((xmlChar *) atts[i]);
9625 * the attribute strings are valid iif the base didn't changed
9628 for (i = 3,j = 0; j < nratts;i += 5,j++)
9629 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9630 xmlFree((xmlChar *) atts[i]);
9632 ctxt->input->cur = ctxt->input->base + cur;
9633 ctxt->input->line = oldline;
9634 ctxt->input->col = oldcol;
9635 if (ctxt->wellFormed == 1) {
9643 * @ctxt: an XML parser context
9644 * @line: line of the start tag
9645 * @nsNr: number of namespaces on the start tag
9647 * parse an end of tag
9649 * [42] ETag ::= '</' Name S? '>'
9653 * [NS 9] ETag ::= '</' QName S? '>'
9657 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9658 const xmlChar *URI, int line, int nsNr, int tlen) {
9659 const xmlChar *name;
9662 if ((RAW != '<') || (NXT(1) != '/')) {
9663 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9668 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9669 if (ctxt->input->cur[tlen] == '>') {
9670 ctxt->input->cur += tlen + 1;
9673 ctxt->input->cur += tlen;
9677 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9679 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9683 * We should definitely be at the ending "S? '>'" part
9686 if (ctxt->instate == XML_PARSER_EOF)
9689 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9690 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9695 * [ WFC: Element Type Match ]
9696 * The Name in an element's end-tag must match the element type in the
9700 if (name != (xmlChar*)1) {
9701 if (name == NULL) name = BAD_CAST "unparseable";
9702 if ((line == 0) && (ctxt->node != NULL))
9703 line = ctxt->node->line;
9704 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9705 "Opening and ending tag mismatch: %s line %d and %s\n",
9706 ctxt->name, line, name);
9713 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9714 (!ctxt->disableSAX))
9715 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9725 * @ctxt: an XML parser context
9727 * Parse escaped pure raw content.
9729 * [18] CDSect ::= CDStart CData CDEnd
9731 * [19] CDStart ::= '<![CDATA['
9733 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9735 * [21] CDEnd ::= ']]>'
9738 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9739 xmlChar *buf = NULL;
9741 int size = XML_PARSER_BUFFER_SIZE;
9747 /* Check 2.6.0 was NXT(0) not RAW */
9748 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9753 ctxt->instate = XML_PARSER_CDATA_SECTION;
9756 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9757 ctxt->instate = XML_PARSER_CONTENT;
9763 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9764 ctxt->instate = XML_PARSER_CONTENT;
9769 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9771 xmlErrMemory(ctxt, NULL);
9774 while (IS_CHAR(cur) &&
9775 ((r != ']') || (s != ']') || (cur != '>'))) {
9776 if (len + 5 >= size) {
9779 if ((size > XML_MAX_TEXT_LENGTH) &&
9780 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9781 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9782 "CData section too big found", NULL);
9786 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9789 xmlErrMemory(ctxt, NULL);
9795 COPY_BUF(rl,buf,len,r);
9803 if (ctxt->instate == XML_PARSER_EOF) {
9813 ctxt->instate = XML_PARSER_CONTENT;
9815 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9816 "CData section not finished\n%.50s\n", buf);
9823 * OK the buffer is to be consumed as cdata.
9825 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9826 if (ctxt->sax->cdataBlock != NULL)
9827 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9828 else if (ctxt->sax->characters != NULL)
9829 ctxt->sax->characters(ctxt->userData, buf, len);
9836 * @ctxt: an XML parser context
9840 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9844 xmlParseContent(xmlParserCtxtPtr ctxt) {
9846 while ((RAW != 0) &&
9847 ((RAW != '<') || (NXT(1) != '/')) &&
9848 (ctxt->instate != XML_PARSER_EOF)) {
9849 const xmlChar *test = CUR_PTR;
9850 unsigned int cons = ctxt->input->consumed;
9851 const xmlChar *cur = ctxt->input->cur;
9854 * First case : a Processing Instruction.
9856 if ((*cur == '<') && (cur[1] == '?')) {
9861 * Second case : a CDSection
9863 /* 2.6.0 test was *cur not RAW */
9864 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9865 xmlParseCDSect(ctxt);
9869 * Third case : a comment
9871 else if ((*cur == '<') && (NXT(1) == '!') &&
9872 (NXT(2) == '-') && (NXT(3) == '-')) {
9873 xmlParseComment(ctxt);
9874 ctxt->instate = XML_PARSER_CONTENT;
9878 * Fourth case : a sub-element.
9880 else if (*cur == '<') {
9881 xmlParseElement(ctxt);
9885 * Fifth case : a reference. If if has not been resolved,
9886 * parsing returns it's Name, create the node
9889 else if (*cur == '&') {
9890 xmlParseReference(ctxt);
9894 * Last case, text. Note that References are handled directly.
9897 xmlParseCharData(ctxt, 0);
9902 * Pop-up of finished entities.
9904 while ((RAW == 0) && (ctxt->inputNr > 1))
9908 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9909 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9910 "detected an error in element content\n");
9911 ctxt->instate = XML_PARSER_EOF;
9919 * @ctxt: an XML parser context
9921 * parse an XML element, this is highly recursive
9923 * [39] element ::= EmptyElemTag | STag content ETag
9925 * [ WFC: Element Type Match ]
9926 * The Name in an element's end-tag must match the element type in the
9932 xmlParseElement(xmlParserCtxtPtr ctxt) {
9933 const xmlChar *name;
9934 const xmlChar *prefix = NULL;
9935 const xmlChar *URI = NULL;
9936 xmlParserNodeInfo node_info;
9939 int nsNr = ctxt->nsNr;
9941 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9942 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9943 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9944 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9946 ctxt->instate = XML_PARSER_EOF;
9950 /* Capture start position */
9951 if (ctxt->record_info) {
9952 node_info.begin_pos = ctxt->input->consumed +
9953 (CUR_PTR - ctxt->input->base);
9954 node_info.begin_line = ctxt->input->line;
9957 if (ctxt->spaceNr == 0)
9958 spacePush(ctxt, -1);
9959 else if (*ctxt->space == -2)
9960 spacePush(ctxt, -1);
9962 spacePush(ctxt, *ctxt->space);
9964 line = ctxt->input->line;
9965 #ifdef LIBXML_SAX1_ENABLED
9967 #endif /* LIBXML_SAX1_ENABLED */
9968 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9969 #ifdef LIBXML_SAX1_ENABLED
9971 name = xmlParseStartTag(ctxt);
9972 #endif /* LIBXML_SAX1_ENABLED */
9973 if (ctxt->instate == XML_PARSER_EOF)
9979 namePush(ctxt, name);
9982 #ifdef LIBXML_VALID_ENABLED
9984 * [ VC: Root Element Type ]
9985 * The Name in the document type declaration must match the element
9986 * type of the root element.
9988 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9989 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9990 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9991 #endif /* LIBXML_VALID_ENABLED */
9994 * Check for an Empty Element.
9996 if ((RAW == '/') && (NXT(1) == '>')) {
9999 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10000 (!ctxt->disableSAX))
10001 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10002 #ifdef LIBXML_SAX1_ENABLED
10004 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10005 (!ctxt->disableSAX))
10006 ctxt->sax->endElement(ctxt->userData, name);
10007 #endif /* LIBXML_SAX1_ENABLED */
10011 if (nsNr != ctxt->nsNr)
10012 nsPop(ctxt, ctxt->nsNr - nsNr);
10013 if ( ret != NULL && ctxt->record_info ) {
10014 node_info.end_pos = ctxt->input->consumed +
10015 (CUR_PTR - ctxt->input->base);
10016 node_info.end_line = ctxt->input->line;
10017 node_info.node = ret;
10018 xmlParserAddNodeInfo(ctxt, &node_info);
10025 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10026 "Couldn't find end of Start Tag %s line %d\n",
10030 * end of parsing of this node.
10035 if (nsNr != ctxt->nsNr)
10036 nsPop(ctxt, ctxt->nsNr - nsNr);
10039 * Capture end position and add node
10041 if ( ret != NULL && ctxt->record_info ) {
10042 node_info.end_pos = ctxt->input->consumed +
10043 (CUR_PTR - ctxt->input->base);
10044 node_info.end_line = ctxt->input->line;
10045 node_info.node = ret;
10046 xmlParserAddNodeInfo(ctxt, &node_info);
10052 * Parse the content of the element:
10054 xmlParseContent(ctxt);
10055 if (ctxt->instate == XML_PARSER_EOF)
10057 if (!IS_BYTE_CHAR(RAW)) {
10058 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10059 "Premature end of data in tag %s line %d\n",
10063 * end of parsing of this node.
10068 if (nsNr != ctxt->nsNr)
10069 nsPop(ctxt, ctxt->nsNr - nsNr);
10074 * parse the end of tag: '</' should be here.
10077 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10080 #ifdef LIBXML_SAX1_ENABLED
10082 xmlParseEndTag1(ctxt, line);
10083 #endif /* LIBXML_SAX1_ENABLED */
10086 * Capture end position and add node
10088 if ( ret != NULL && ctxt->record_info ) {
10089 node_info.end_pos = ctxt->input->consumed +
10090 (CUR_PTR - ctxt->input->base);
10091 node_info.end_line = ctxt->input->line;
10092 node_info.node = ret;
10093 xmlParserAddNodeInfo(ctxt, &node_info);
10098 * xmlParseVersionNum:
10099 * @ctxt: an XML parser context
10101 * parse the XML version value.
10103 * [26] VersionNum ::= '1.' [0-9]+
10105 * In practice allow [0-9].[0-9]+ at that level
10107 * Returns the string giving the XML version number, or NULL
10110 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10111 xmlChar *buf = NULL;
10116 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10118 xmlErrMemory(ctxt, NULL);
10122 if (!((cur >= '0') && (cur <= '9'))) {
10136 while ((cur >= '0') && (cur <= '9')) {
10137 if (len + 1 >= size) {
10141 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10144 xmlErrMemory(ctxt, NULL);
10158 * xmlParseVersionInfo:
10159 * @ctxt: an XML parser context
10161 * parse the XML version.
10163 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10165 * [25] Eq ::= S? '=' S?
10167 * Returns the version string, e.g. "1.0"
10171 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10172 xmlChar *version = NULL;
10174 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10178 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10185 version = xmlParseVersionNum(ctxt);
10187 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10190 } else if (RAW == '\''){
10192 version = xmlParseVersionNum(ctxt);
10194 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10198 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10206 * @ctxt: an XML parser context
10208 * parse the XML encoding name
10210 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10212 * Returns the encoding name value or NULL
10215 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10216 xmlChar *buf = NULL;
10222 if (((cur >= 'a') && (cur <= 'z')) ||
10223 ((cur >= 'A') && (cur <= 'Z'))) {
10224 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10226 xmlErrMemory(ctxt, NULL);
10233 while (((cur >= 'a') && (cur <= 'z')) ||
10234 ((cur >= 'A') && (cur <= 'Z')) ||
10235 ((cur >= '0') && (cur <= '9')) ||
10236 (cur == '.') || (cur == '_') ||
10238 if (len + 1 >= size) {
10242 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10244 xmlErrMemory(ctxt, NULL);
10261 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10267 * xmlParseEncodingDecl:
10268 * @ctxt: an XML parser context
10270 * parse the XML encoding declaration
10272 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10274 * this setups the conversion filters.
10276 * Returns the encoding value or NULL
10280 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10281 xmlChar *encoding = NULL;
10284 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10288 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10295 encoding = xmlParseEncName(ctxt);
10297 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10300 } else if (RAW == '\''){
10302 encoding = xmlParseEncName(ctxt);
10304 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10308 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10312 * Non standard parsing, allowing the user to ignore encoding
10314 if (ctxt->options & XML_PARSE_IGNORE_ENC)
10318 * UTF-16 encoding stwich has already taken place at this stage,
10319 * more over the little-endian/big-endian selection is already done
10321 if ((encoding != NULL) &&
10322 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10323 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10325 * If no encoding was passed to the parser, that we are
10326 * using UTF-16 and no decoder is present i.e. the
10327 * document is apparently UTF-8 compatible, then raise an
10328 * encoding mismatch fatal error
10330 if ((ctxt->encoding == NULL) &&
10331 (ctxt->input->buf != NULL) &&
10332 (ctxt->input->buf->encoder == NULL)) {
10333 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10334 "Document labelled UTF-16 but has UTF-8 content\n");
10336 if (ctxt->encoding != NULL)
10337 xmlFree((xmlChar *) ctxt->encoding);
10338 ctxt->encoding = encoding;
10341 * UTF-8 encoding is handled natively
10343 else if ((encoding != NULL) &&
10344 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10345 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10346 if (ctxt->encoding != NULL)
10347 xmlFree((xmlChar *) ctxt->encoding);
10348 ctxt->encoding = encoding;
10350 else if (encoding != NULL) {
10351 xmlCharEncodingHandlerPtr handler;
10353 if (ctxt->input->encoding != NULL)
10354 xmlFree((xmlChar *) ctxt->input->encoding);
10355 ctxt->input->encoding = encoding;
10357 handler = xmlFindCharEncodingHandler((const char *) encoding);
10358 if (handler != NULL) {
10359 xmlSwitchToEncoding(ctxt, handler);
10361 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10362 "Unsupported encoding %s\n", encoding);
10372 * @ctxt: an XML parser context
10374 * parse the XML standalone declaration
10376 * [32] SDDecl ::= S 'standalone' Eq
10377 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10379 * [ VC: Standalone Document Declaration ]
10380 * TODO The standalone document declaration must have the value "no"
10381 * if any external markup declarations contain declarations of:
10382 * - attributes with default values, if elements to which these
10383 * attributes apply appear in the document without specifications
10384 * of values for these attributes, or
10385 * - entities (other than amp, lt, gt, apos, quot), if references
10386 * to those entities appear in the document, or
10387 * - attributes with values subject to normalization, where the
10388 * attribute appears in the document with a value which will change
10389 * as a result of normalization, or
10390 * - element types with element content, if white space occurs directly
10391 * within any instance of those types.
10394 * 1 if standalone="yes"
10395 * 0 if standalone="no"
10396 * -2 if standalone attribute is missing or invalid
10397 * (A standalone value of -2 means that the XML declaration was found,
10398 * but no value was specified for the standalone attribute).
10402 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10403 int standalone = -2;
10406 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10410 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10411 return(standalone);
10417 if ((RAW == 'n') && (NXT(1) == 'o')) {
10420 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10425 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10428 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10431 } else if (RAW == '"'){
10433 if ((RAW == 'n') && (NXT(1) == 'o')) {
10436 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10441 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10444 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10448 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10451 return(standalone);
10456 * @ctxt: an XML parser context
10458 * parse an XML declaration header
10460 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10464 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10468 * This value for standalone indicates that the document has an
10469 * XML declaration but it does not have a standalone attribute.
10470 * It will be overwritten later if a standalone attribute is found.
10472 ctxt->input->standalone = -2;
10475 * We know that '<?xml' is here.
10479 if (!IS_BLANK_CH(RAW)) {
10480 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10481 "Blank needed after '<?xml'\n");
10486 * We must have the VersionInfo here.
10488 version = xmlParseVersionInfo(ctxt);
10489 if (version == NULL) {
10490 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10492 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10494 * Changed here for XML-1.0 5th edition
10496 if (ctxt->options & XML_PARSE_OLD10) {
10497 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10498 "Unsupported version '%s'\n",
10501 if ((version[0] == '1') && ((version[1] == '.'))) {
10502 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10503 "Unsupported version '%s'\n",
10506 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10507 "Unsupported version '%s'\n",
10512 if (ctxt->version != NULL)
10513 xmlFree((void *) ctxt->version);
10514 ctxt->version = version;
10518 * We may have the encoding declaration
10520 if (!IS_BLANK_CH(RAW)) {
10521 if ((RAW == '?') && (NXT(1) == '>')) {
10525 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10527 xmlParseEncodingDecl(ctxt);
10528 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10530 * The XML REC instructs us to stop parsing right here
10536 * We may have the standalone status.
10538 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10539 if ((RAW == '?') && (NXT(1) == '>')) {
10543 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10547 * We can grow the input buffer freely at that point
10552 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10555 if ((RAW == '?') && (NXT(1) == '>')) {
10557 } else if (RAW == '>') {
10558 /* Deprecated old WD ... */
10559 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10562 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10563 MOVETO_ENDTAG(CUR_PTR);
10570 * @ctxt: an XML parser context
10572 * parse an XML Misc* optional field.
10574 * [27] Misc ::= Comment | PI | S
10578 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10579 while ((ctxt->instate != XML_PARSER_EOF) &&
10580 (((RAW == '<') && (NXT(1) == '?')) ||
10581 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10582 IS_BLANK_CH(CUR))) {
10583 if ((RAW == '<') && (NXT(1) == '?')) {
10585 } else if (IS_BLANK_CH(CUR)) {
10588 xmlParseComment(ctxt);
10593 * xmlParseDocument:
10594 * @ctxt: an XML parser context
10596 * parse an XML document (and build a tree if using the standard SAX
10599 * [1] document ::= prolog element Misc*
10601 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10603 * Returns 0, -1 in case of error. the parser context is augmented
10604 * as a result of the parsing.
10608 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10610 xmlCharEncoding enc;
10614 if ((ctxt == NULL) || (ctxt->input == NULL))
10620 * SAX: detecting the level.
10622 xmlDetectSAX2(ctxt);
10625 * SAX: beginning of the document processing.
10627 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10628 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10629 if (ctxt->instate == XML_PARSER_EOF)
10632 if ((ctxt->encoding == NULL) &&
10633 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10635 * Get the 4 first bytes and decode the charset
10636 * if enc != XML_CHAR_ENCODING_NONE
10637 * plug some encoding conversion routines.
10643 enc = xmlDetectCharEncoding(&start[0], 4);
10644 if (enc != XML_CHAR_ENCODING_NONE) {
10645 xmlSwitchEncoding(ctxt, enc);
10651 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10655 * Check for the XMLDecl in the Prolog.
10656 * do not GROW here to avoid the detected encoder to decode more
10657 * than just the first line, unless the amount of data is really
10658 * too small to hold "<?xml version="1.0" encoding="foo"
10660 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10663 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10666 * Note that we will switch encoding on the fly.
10668 xmlParseXMLDecl(ctxt);
10669 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10671 * The XML REC instructs us to stop parsing right here
10675 ctxt->standalone = ctxt->input->standalone;
10678 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10680 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10681 ctxt->sax->startDocument(ctxt->userData);
10682 if (ctxt->instate == XML_PARSER_EOF)
10686 * The Misc part of the Prolog
10689 xmlParseMisc(ctxt);
10692 * Then possibly doc type declaration(s) and more Misc
10693 * (doctypedecl Misc*)?
10696 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10698 ctxt->inSubset = 1;
10699 xmlParseDocTypeDecl(ctxt);
10701 ctxt->instate = XML_PARSER_DTD;
10702 xmlParseInternalSubset(ctxt);
10703 if (ctxt->instate == XML_PARSER_EOF)
10708 * Create and update the external subset.
10710 ctxt->inSubset = 2;
10711 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10712 (!ctxt->disableSAX))
10713 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10714 ctxt->extSubSystem, ctxt->extSubURI);
10715 if (ctxt->instate == XML_PARSER_EOF)
10717 ctxt->inSubset = 0;
10719 xmlCleanSpecialAttr(ctxt);
10721 ctxt->instate = XML_PARSER_PROLOG;
10722 xmlParseMisc(ctxt);
10726 * Time to start parsing the tree itself
10730 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10731 "Start tag expected, '<' not found\n");
10733 ctxt->instate = XML_PARSER_CONTENT;
10734 xmlParseElement(ctxt);
10735 ctxt->instate = XML_PARSER_EPILOG;
10739 * The Misc part at the end
10741 xmlParseMisc(ctxt);
10744 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10746 ctxt->instate = XML_PARSER_EOF;
10750 * SAX: end of the document processing.
10752 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10753 ctxt->sax->endDocument(ctxt->userData);
10756 * Remove locally kept entity definitions if the tree was not built
10758 if ((ctxt->myDoc != NULL) &&
10759 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10760 xmlFreeDoc(ctxt->myDoc);
10761 ctxt->myDoc = NULL;
10764 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10765 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10767 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10768 if (ctxt->nsWellFormed)
10769 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10770 if (ctxt->options & XML_PARSE_OLD10)
10771 ctxt->myDoc->properties |= XML_DOC_OLD10;
10773 if (! ctxt->wellFormed) {
10781 * xmlParseExtParsedEnt:
10782 * @ctxt: an XML parser context
10784 * parse a general parsed entity
10785 * An external general parsed entity is well-formed if it matches the
10786 * production labeled extParsedEnt.
10788 * [78] extParsedEnt ::= TextDecl? content
10790 * Returns 0, -1 in case of error. the parser context is augmented
10791 * as a result of the parsing.
10795 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10797 xmlCharEncoding enc;
10799 if ((ctxt == NULL) || (ctxt->input == NULL))
10802 xmlDefaultSAXHandlerInit();
10804 xmlDetectSAX2(ctxt);
10809 * SAX: beginning of the document processing.
10811 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10812 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10815 * Get the 4 first bytes and decode the charset
10816 * if enc != XML_CHAR_ENCODING_NONE
10817 * plug some encoding conversion routines.
10819 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10824 enc = xmlDetectCharEncoding(start, 4);
10825 if (enc != XML_CHAR_ENCODING_NONE) {
10826 xmlSwitchEncoding(ctxt, enc);
10832 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10836 * Check for the XMLDecl in the Prolog.
10839 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10842 * Note that we will switch encoding on the fly.
10844 xmlParseXMLDecl(ctxt);
10845 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10847 * The XML REC instructs us to stop parsing right here
10853 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10855 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10856 ctxt->sax->startDocument(ctxt->userData);
10857 if (ctxt->instate == XML_PARSER_EOF)
10861 * Doing validity checking on chunk doesn't make sense
10863 ctxt->instate = XML_PARSER_CONTENT;
10864 ctxt->validate = 0;
10865 ctxt->loadsubset = 0;
10868 xmlParseContent(ctxt);
10869 if (ctxt->instate == XML_PARSER_EOF)
10872 if ((RAW == '<') && (NXT(1) == '/')) {
10873 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10874 } else if (RAW != 0) {
10875 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10879 * SAX: end of the document processing.
10881 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10882 ctxt->sax->endDocument(ctxt->userData);
10884 if (! ctxt->wellFormed) return(-1);
10888 #ifdef LIBXML_PUSH_ENABLED
10889 /************************************************************************
10891 * Progressive parsing interfaces *
10893 ************************************************************************/
10896 * xmlParseLookupSequence:
10897 * @ctxt: an XML parser context
10898 * @first: the first char to lookup
10899 * @next: the next char to lookup or zero
10900 * @third: the next char to lookup or zero
10902 * Try to find if a sequence (first, next, third) or just (first next) or
10903 * (first) is available in the input stream.
10904 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10905 * to avoid rescanning sequences of bytes, it DOES change the state of the
10906 * parser, do not use liberally.
10908 * Returns the index to the current parsing point if the full sequence
10909 * is available, -1 otherwise.
10912 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10913 xmlChar next, xmlChar third) {
10915 xmlParserInputPtr in;
10916 const xmlChar *buf;
10919 if (in == NULL) return(-1);
10920 base = in->cur - in->base;
10921 if (base < 0) return(-1);
10922 if (ctxt->checkIndex > base)
10923 base = ctxt->checkIndex;
10924 if (in->buf == NULL) {
10928 buf = xmlBufContent(in->buf->buffer);
10929 len = xmlBufUse(in->buf->buffer);
10931 /* take into account the sequence length */
10932 if (third) len -= 2;
10933 else if (next) len --;
10934 for (;base < len;base++) {
10935 if (buf[base] == first) {
10937 if ((buf[base + 1] != next) ||
10938 (buf[base + 2] != third)) continue;
10939 } else if (next != 0) {
10940 if (buf[base + 1] != next) continue;
10942 ctxt->checkIndex = 0;
10945 xmlGenericError(xmlGenericErrorContext,
10946 "PP: lookup '%c' found at %d\n",
10948 else if (third == 0)
10949 xmlGenericError(xmlGenericErrorContext,
10950 "PP: lookup '%c%c' found at %d\n",
10951 first, next, base);
10953 xmlGenericError(xmlGenericErrorContext,
10954 "PP: lookup '%c%c%c' found at %d\n",
10955 first, next, third, base);
10957 return(base - (in->cur - in->base));
10960 ctxt->checkIndex = base;
10963 xmlGenericError(xmlGenericErrorContext,
10964 "PP: lookup '%c' failed\n", first);
10965 else if (third == 0)
10966 xmlGenericError(xmlGenericErrorContext,
10967 "PP: lookup '%c%c' failed\n", first, next);
10969 xmlGenericError(xmlGenericErrorContext,
10970 "PP: lookup '%c%c%c' failed\n", first, next, third);
10976 * xmlParseGetLasts:
10977 * @ctxt: an XML parser context
10978 * @lastlt: pointer to store the last '<' from the input
10979 * @lastgt: pointer to store the last '>' from the input
10981 * Lookup the last < and > in the current chunk
10984 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10985 const xmlChar **lastgt) {
10986 const xmlChar *tmp;
10988 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10989 xmlGenericError(xmlGenericErrorContext,
10990 "Internal error: xmlParseGetLasts\n");
10993 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10994 tmp = ctxt->input->end;
10996 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10997 if (tmp < ctxt->input->base) {
11003 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11004 if (*tmp == '\'') {
11006 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11007 if (tmp < ctxt->input->end) tmp++;
11008 } else if (*tmp == '"') {
11010 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11011 if (tmp < ctxt->input->end) tmp++;
11015 if (tmp < ctxt->input->end)
11020 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11021 if (tmp >= ctxt->input->base)
11033 * xmlCheckCdataPush:
11034 * @cur: pointer to the bock of characters
11035 * @len: length of the block in bytes
11037 * Check that the block of characters is okay as SCdata content [20]
11039 * Returns the number of bytes to pass if okay, a negative index where an
11040 * UTF-8 error occured otherwise
11043 xmlCheckCdataPush(const xmlChar *utf, int len) {
11048 if ((utf == NULL) || (len <= 0))
11051 for (ix = 0; ix < len;) { /* string is 0-terminated */
11053 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11056 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11060 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11061 if (ix + 2 > len) return(ix);
11062 if ((utf[ix+1] & 0xc0 ) != 0x80)
11064 codepoint = (utf[ix] & 0x1f) << 6;
11065 codepoint |= utf[ix+1] & 0x3f;
11066 if (!xmlIsCharQ(codepoint))
11069 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11070 if (ix + 3 > len) return(ix);
11071 if (((utf[ix+1] & 0xc0) != 0x80) ||
11072 ((utf[ix+2] & 0xc0) != 0x80))
11074 codepoint = (utf[ix] & 0xf) << 12;
11075 codepoint |= (utf[ix+1] & 0x3f) << 6;
11076 codepoint |= utf[ix+2] & 0x3f;
11077 if (!xmlIsCharQ(codepoint))
11080 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11081 if (ix + 4 > len) return(ix);
11082 if (((utf[ix+1] & 0xc0) != 0x80) ||
11083 ((utf[ix+2] & 0xc0) != 0x80) ||
11084 ((utf[ix+3] & 0xc0) != 0x80))
11086 codepoint = (utf[ix] & 0x7) << 18;
11087 codepoint |= (utf[ix+1] & 0x3f) << 12;
11088 codepoint |= (utf[ix+2] & 0x3f) << 6;
11089 codepoint |= utf[ix+3] & 0x3f;
11090 if (!xmlIsCharQ(codepoint))
11093 } else /* unknown encoding */
11100 * xmlParseTryOrFinish:
11101 * @ctxt: an XML parser context
11102 * @terminate: last chunk indicator
11104 * Try to progress on parsing
11106 * Returns zero if no parsing was possible
11109 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11113 const xmlChar *lastlt, *lastgt;
11115 if (ctxt->input == NULL)
11119 switch (ctxt->instate) {
11120 case XML_PARSER_EOF:
11121 xmlGenericError(xmlGenericErrorContext,
11122 "PP: try EOF\n"); break;
11123 case XML_PARSER_START:
11124 xmlGenericError(xmlGenericErrorContext,
11125 "PP: try START\n"); break;
11126 case XML_PARSER_MISC:
11127 xmlGenericError(xmlGenericErrorContext,
11128 "PP: try MISC\n");break;
11129 case XML_PARSER_COMMENT:
11130 xmlGenericError(xmlGenericErrorContext,
11131 "PP: try COMMENT\n");break;
11132 case XML_PARSER_PROLOG:
11133 xmlGenericError(xmlGenericErrorContext,
11134 "PP: try PROLOG\n");break;
11135 case XML_PARSER_START_TAG:
11136 xmlGenericError(xmlGenericErrorContext,
11137 "PP: try START_TAG\n");break;
11138 case XML_PARSER_CONTENT:
11139 xmlGenericError(xmlGenericErrorContext,
11140 "PP: try CONTENT\n");break;
11141 case XML_PARSER_CDATA_SECTION:
11142 xmlGenericError(xmlGenericErrorContext,
11143 "PP: try CDATA_SECTION\n");break;
11144 case XML_PARSER_END_TAG:
11145 xmlGenericError(xmlGenericErrorContext,
11146 "PP: try END_TAG\n");break;
11147 case XML_PARSER_ENTITY_DECL:
11148 xmlGenericError(xmlGenericErrorContext,
11149 "PP: try ENTITY_DECL\n");break;
11150 case XML_PARSER_ENTITY_VALUE:
11151 xmlGenericError(xmlGenericErrorContext,
11152 "PP: try ENTITY_VALUE\n");break;
11153 case XML_PARSER_ATTRIBUTE_VALUE:
11154 xmlGenericError(xmlGenericErrorContext,
11155 "PP: try ATTRIBUTE_VALUE\n");break;
11156 case XML_PARSER_DTD:
11157 xmlGenericError(xmlGenericErrorContext,
11158 "PP: try DTD\n");break;
11159 case XML_PARSER_EPILOG:
11160 xmlGenericError(xmlGenericErrorContext,
11161 "PP: try EPILOG\n");break;
11162 case XML_PARSER_PI:
11163 xmlGenericError(xmlGenericErrorContext,
11164 "PP: try PI\n");break;
11165 case XML_PARSER_IGNORE:
11166 xmlGenericError(xmlGenericErrorContext,
11167 "PP: try IGNORE\n");break;
11171 if ((ctxt->input != NULL) &&
11172 (ctxt->input->cur - ctxt->input->base > 4096)) {
11174 ctxt->checkIndex = 0;
11176 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11178 while (ctxt->instate != XML_PARSER_EOF) {
11179 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11184 * Pop-up of finished entities.
11186 while ((RAW == 0) && (ctxt->inputNr > 1))
11189 if (ctxt->input == NULL) break;
11190 if (ctxt->input->buf == NULL)
11191 avail = ctxt->input->length -
11192 (ctxt->input->cur - ctxt->input->base);
11195 * If we are operating on converted input, try to flush
11196 * remainng chars to avoid them stalling in the non-converted
11197 * buffer. But do not do this in document start where
11198 * encoding="..." may not have been read and we work on a
11199 * guessed encoding.
11201 if ((ctxt->instate != XML_PARSER_START) &&
11202 (ctxt->input->buf->raw != NULL) &&
11203 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11204 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11206 size_t current = ctxt->input->cur - ctxt->input->base;
11208 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11209 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11212 avail = xmlBufUse(ctxt->input->buf->buffer) -
11213 (ctxt->input->cur - ctxt->input->base);
11217 switch (ctxt->instate) {
11218 case XML_PARSER_EOF:
11220 * Document parsing is done !
11223 case XML_PARSER_START:
11224 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11226 xmlCharEncoding enc;
11229 * Very first chars read from the document flow.
11235 * Get the 4 first bytes and decode the charset
11236 * if enc != XML_CHAR_ENCODING_NONE
11237 * plug some encoding conversion routines,
11238 * else xmlSwitchEncoding will set to (default)
11245 enc = xmlDetectCharEncoding(start, 4);
11246 xmlSwitchEncoding(ctxt, enc);
11252 cur = ctxt->input->cur[0];
11253 next = ctxt->input->cur[1];
11255 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11256 ctxt->sax->setDocumentLocator(ctxt->userData,
11257 &xmlDefaultSAXLocator);
11258 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11259 ctxt->instate = XML_PARSER_EOF;
11261 xmlGenericError(xmlGenericErrorContext,
11262 "PP: entering EOF\n");
11264 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11265 ctxt->sax->endDocument(ctxt->userData);
11268 if ((cur == '<') && (next == '?')) {
11269 /* PI or XML decl */
11270 if (avail < 5) return(ret);
11271 if ((!terminate) &&
11272 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11274 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11275 ctxt->sax->setDocumentLocator(ctxt->userData,
11276 &xmlDefaultSAXLocator);
11277 if ((ctxt->input->cur[2] == 'x') &&
11278 (ctxt->input->cur[3] == 'm') &&
11279 (ctxt->input->cur[4] == 'l') &&
11280 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11283 xmlGenericError(xmlGenericErrorContext,
11284 "PP: Parsing XML Decl\n");
11286 xmlParseXMLDecl(ctxt);
11287 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11289 * The XML REC instructs us to stop parsing right
11292 ctxt->instate = XML_PARSER_EOF;
11295 ctxt->standalone = ctxt->input->standalone;
11296 if ((ctxt->encoding == NULL) &&
11297 (ctxt->input->encoding != NULL))
11298 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11299 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11300 (!ctxt->disableSAX))
11301 ctxt->sax->startDocument(ctxt->userData);
11302 ctxt->instate = XML_PARSER_MISC;
11304 xmlGenericError(xmlGenericErrorContext,
11305 "PP: entering MISC\n");
11308 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11309 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11310 (!ctxt->disableSAX))
11311 ctxt->sax->startDocument(ctxt->userData);
11312 ctxt->instate = XML_PARSER_MISC;
11314 xmlGenericError(xmlGenericErrorContext,
11315 "PP: entering MISC\n");
11319 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11320 ctxt->sax->setDocumentLocator(ctxt->userData,
11321 &xmlDefaultSAXLocator);
11322 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11323 if (ctxt->version == NULL) {
11324 xmlErrMemory(ctxt, NULL);
11327 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11328 (!ctxt->disableSAX))
11329 ctxt->sax->startDocument(ctxt->userData);
11330 ctxt->instate = XML_PARSER_MISC;
11332 xmlGenericError(xmlGenericErrorContext,
11333 "PP: entering MISC\n");
11337 case XML_PARSER_START_TAG: {
11338 const xmlChar *name;
11339 const xmlChar *prefix = NULL;
11340 const xmlChar *URI = NULL;
11341 int nsNr = ctxt->nsNr;
11343 if ((avail < 2) && (ctxt->inputNr == 1))
11345 cur = ctxt->input->cur[0];
11347 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11348 ctxt->instate = XML_PARSER_EOF;
11349 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11350 ctxt->sax->endDocument(ctxt->userData);
11354 if (ctxt->progressive) {
11355 /* > can be found unescaped in attribute values */
11356 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11358 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11362 if (ctxt->spaceNr == 0)
11363 spacePush(ctxt, -1);
11364 else if (*ctxt->space == -2)
11365 spacePush(ctxt, -1);
11367 spacePush(ctxt, *ctxt->space);
11368 #ifdef LIBXML_SAX1_ENABLED
11370 #endif /* LIBXML_SAX1_ENABLED */
11371 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11372 #ifdef LIBXML_SAX1_ENABLED
11374 name = xmlParseStartTag(ctxt);
11375 #endif /* LIBXML_SAX1_ENABLED */
11376 if (ctxt->instate == XML_PARSER_EOF)
11378 if (name == NULL) {
11380 ctxt->instate = XML_PARSER_EOF;
11381 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11382 ctxt->sax->endDocument(ctxt->userData);
11385 #ifdef LIBXML_VALID_ENABLED
11387 * [ VC: Root Element Type ]
11388 * The Name in the document type declaration must match
11389 * the element type of the root element.
11391 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11392 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11393 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11394 #endif /* LIBXML_VALID_ENABLED */
11397 * Check for an Empty Element.
11399 if ((RAW == '/') && (NXT(1) == '>')) {
11403 if ((ctxt->sax != NULL) &&
11404 (ctxt->sax->endElementNs != NULL) &&
11405 (!ctxt->disableSAX))
11406 ctxt->sax->endElementNs(ctxt->userData, name,
11408 if (ctxt->nsNr - nsNr > 0)
11409 nsPop(ctxt, ctxt->nsNr - nsNr);
11410 #ifdef LIBXML_SAX1_ENABLED
11412 if ((ctxt->sax != NULL) &&
11413 (ctxt->sax->endElement != NULL) &&
11414 (!ctxt->disableSAX))
11415 ctxt->sax->endElement(ctxt->userData, name);
11416 #endif /* LIBXML_SAX1_ENABLED */
11418 if (ctxt->instate == XML_PARSER_EOF)
11421 if (ctxt->nameNr == 0) {
11422 ctxt->instate = XML_PARSER_EPILOG;
11424 ctxt->instate = XML_PARSER_CONTENT;
11426 ctxt->progressive = 1;
11432 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11433 "Couldn't find end of Start Tag %s\n",
11439 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11440 #ifdef LIBXML_SAX1_ENABLED
11442 namePush(ctxt, name);
11443 #endif /* LIBXML_SAX1_ENABLED */
11445 ctxt->instate = XML_PARSER_CONTENT;
11446 ctxt->progressive = 1;
11449 case XML_PARSER_CONTENT: {
11450 const xmlChar *test;
11452 if ((avail < 2) && (ctxt->inputNr == 1))
11454 cur = ctxt->input->cur[0];
11455 next = ctxt->input->cur[1];
11458 cons = ctxt->input->consumed;
11459 if ((cur == '<') && (next == '/')) {
11460 ctxt->instate = XML_PARSER_END_TAG;
11462 } else if ((cur == '<') && (next == '?')) {
11463 if ((!terminate) &&
11464 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11465 ctxt->progressive = XML_PARSER_PI;
11469 ctxt->instate = XML_PARSER_CONTENT;
11470 ctxt->progressive = 1;
11471 } else if ((cur == '<') && (next != '!')) {
11472 ctxt->instate = XML_PARSER_START_TAG;
11474 } else if ((cur == '<') && (next == '!') &&
11475 (ctxt->input->cur[2] == '-') &&
11476 (ctxt->input->cur[3] == '-')) {
11481 ctxt->input->cur += 4;
11482 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11483 ctxt->input->cur -= 4;
11484 if ((!terminate) && (term < 0)) {
11485 ctxt->progressive = XML_PARSER_COMMENT;
11488 xmlParseComment(ctxt);
11489 ctxt->instate = XML_PARSER_CONTENT;
11490 ctxt->progressive = 1;
11491 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11492 (ctxt->input->cur[2] == '[') &&
11493 (ctxt->input->cur[3] == 'C') &&
11494 (ctxt->input->cur[4] == 'D') &&
11495 (ctxt->input->cur[5] == 'A') &&
11496 (ctxt->input->cur[6] == 'T') &&
11497 (ctxt->input->cur[7] == 'A') &&
11498 (ctxt->input->cur[8] == '[')) {
11500 ctxt->instate = XML_PARSER_CDATA_SECTION;
11502 } else if ((cur == '<') && (next == '!') &&
11505 } else if (cur == '&') {
11506 if ((!terminate) &&
11507 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11509 xmlParseReference(ctxt);
11511 /* TODO Avoid the extra copy, handle directly !!! */
11513 * Goal of the following test is:
11514 * - minimize calls to the SAX 'character' callback
11515 * when they are mergeable
11516 * - handle an problem for isBlank when we only parse
11517 * a sequence of blank chars and the next one is
11518 * not available to check against '<' presence.
11519 * - tries to homogenize the differences in SAX
11520 * callbacks between the push and pull versions
11523 if ((ctxt->inputNr == 1) &&
11524 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11526 if (ctxt->progressive) {
11527 if ((lastlt == NULL) ||
11528 (ctxt->input->cur > lastlt))
11530 } else if (xmlParseLookupSequence(ctxt,
11536 ctxt->checkIndex = 0;
11537 xmlParseCharData(ctxt, 0);
11540 * Pop-up of finished entities.
11542 while ((RAW == 0) && (ctxt->inputNr > 1))
11544 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11545 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11546 "detected an error in element content\n");
11547 ctxt->instate = XML_PARSER_EOF;
11552 case XML_PARSER_END_TAG:
11556 if (ctxt->progressive) {
11557 /* > can be found unescaped in attribute values */
11558 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11560 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11565 xmlParseEndTag2(ctxt,
11566 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11567 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11568 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11571 #ifdef LIBXML_SAX1_ENABLED
11573 xmlParseEndTag1(ctxt, 0);
11574 #endif /* LIBXML_SAX1_ENABLED */
11575 if (ctxt->instate == XML_PARSER_EOF) {
11577 } else if (ctxt->nameNr == 0) {
11578 ctxt->instate = XML_PARSER_EPILOG;
11580 ctxt->instate = XML_PARSER_CONTENT;
11583 case XML_PARSER_CDATA_SECTION: {
11585 * The Push mode need to have the SAX callback for
11586 * cdataBlock merge back contiguous callbacks.
11590 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11592 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11595 tmp = xmlCheckCdataPush(ctxt->input->cur,
11596 XML_PARSER_BIG_BUFFER_SIZE);
11599 ctxt->input->cur += tmp;
11600 goto encoding_error;
11602 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11603 if (ctxt->sax->cdataBlock != NULL)
11604 ctxt->sax->cdataBlock(ctxt->userData,
11605 ctxt->input->cur, tmp);
11606 else if (ctxt->sax->characters != NULL)
11607 ctxt->sax->characters(ctxt->userData,
11608 ctxt->input->cur, tmp);
11610 if (ctxt->instate == XML_PARSER_EOF)
11613 ctxt->checkIndex = 0;
11619 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11620 if ((tmp < 0) || (tmp != base)) {
11622 ctxt->input->cur += tmp;
11623 goto encoding_error;
11625 if ((ctxt->sax != NULL) && (base == 0) &&
11626 (ctxt->sax->cdataBlock != NULL) &&
11627 (!ctxt->disableSAX)) {
11629 * Special case to provide identical behaviour
11630 * between pull and push parsers on enpty CDATA
11633 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11634 (!strncmp((const char *)&ctxt->input->cur[-9],
11636 ctxt->sax->cdataBlock(ctxt->userData,
11638 } else if ((ctxt->sax != NULL) && (base > 0) &&
11639 (!ctxt->disableSAX)) {
11640 if (ctxt->sax->cdataBlock != NULL)
11641 ctxt->sax->cdataBlock(ctxt->userData,
11642 ctxt->input->cur, base);
11643 else if (ctxt->sax->characters != NULL)
11644 ctxt->sax->characters(ctxt->userData,
11645 ctxt->input->cur, base);
11647 if (ctxt->instate == XML_PARSER_EOF)
11650 ctxt->checkIndex = 0;
11651 ctxt->instate = XML_PARSER_CONTENT;
11653 xmlGenericError(xmlGenericErrorContext,
11654 "PP: entering CONTENT\n");
11659 case XML_PARSER_MISC:
11661 if (ctxt->input->buf == NULL)
11662 avail = ctxt->input->length -
11663 (ctxt->input->cur - ctxt->input->base);
11665 avail = xmlBufUse(ctxt->input->buf->buffer) -
11666 (ctxt->input->cur - ctxt->input->base);
11669 cur = ctxt->input->cur[0];
11670 next = ctxt->input->cur[1];
11671 if ((cur == '<') && (next == '?')) {
11672 if ((!terminate) &&
11673 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11674 ctxt->progressive = XML_PARSER_PI;
11678 xmlGenericError(xmlGenericErrorContext,
11679 "PP: Parsing PI\n");
11682 if (ctxt->instate == XML_PARSER_EOF)
11684 ctxt->instate = XML_PARSER_MISC;
11685 ctxt->progressive = 1;
11686 ctxt->checkIndex = 0;
11687 } else if ((cur == '<') && (next == '!') &&
11688 (ctxt->input->cur[2] == '-') &&
11689 (ctxt->input->cur[3] == '-')) {
11690 if ((!terminate) &&
11691 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11692 ctxt->progressive = XML_PARSER_COMMENT;
11696 xmlGenericError(xmlGenericErrorContext,
11697 "PP: Parsing Comment\n");
11699 xmlParseComment(ctxt);
11700 if (ctxt->instate == XML_PARSER_EOF)
11702 ctxt->instate = XML_PARSER_MISC;
11703 ctxt->progressive = 1;
11704 ctxt->checkIndex = 0;
11705 } else if ((cur == '<') && (next == '!') &&
11706 (ctxt->input->cur[2] == 'D') &&
11707 (ctxt->input->cur[3] == 'O') &&
11708 (ctxt->input->cur[4] == 'C') &&
11709 (ctxt->input->cur[5] == 'T') &&
11710 (ctxt->input->cur[6] == 'Y') &&
11711 (ctxt->input->cur[7] == 'P') &&
11712 (ctxt->input->cur[8] == 'E')) {
11713 if ((!terminate) &&
11714 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11715 ctxt->progressive = XML_PARSER_DTD;
11719 xmlGenericError(xmlGenericErrorContext,
11720 "PP: Parsing internal subset\n");
11722 ctxt->inSubset = 1;
11723 ctxt->progressive = 0;
11724 ctxt->checkIndex = 0;
11725 xmlParseDocTypeDecl(ctxt);
11726 if (ctxt->instate == XML_PARSER_EOF)
11729 ctxt->instate = XML_PARSER_DTD;
11731 xmlGenericError(xmlGenericErrorContext,
11732 "PP: entering DTD\n");
11736 * Create and update the external subset.
11738 ctxt->inSubset = 2;
11739 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11740 (ctxt->sax->externalSubset != NULL))
11741 ctxt->sax->externalSubset(ctxt->userData,
11742 ctxt->intSubName, ctxt->extSubSystem,
11744 ctxt->inSubset = 0;
11745 xmlCleanSpecialAttr(ctxt);
11746 ctxt->instate = XML_PARSER_PROLOG;
11748 xmlGenericError(xmlGenericErrorContext,
11749 "PP: entering PROLOG\n");
11752 } else if ((cur == '<') && (next == '!') &&
11756 ctxt->instate = XML_PARSER_START_TAG;
11757 ctxt->progressive = XML_PARSER_START_TAG;
11758 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11760 xmlGenericError(xmlGenericErrorContext,
11761 "PP: entering START_TAG\n");
11765 case XML_PARSER_PROLOG:
11767 if (ctxt->input->buf == NULL)
11768 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11770 avail = xmlBufUse(ctxt->input->buf->buffer) -
11771 (ctxt->input->cur - ctxt->input->base);
11774 cur = ctxt->input->cur[0];
11775 next = ctxt->input->cur[1];
11776 if ((cur == '<') && (next == '?')) {
11777 if ((!terminate) &&
11778 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11779 ctxt->progressive = XML_PARSER_PI;
11783 xmlGenericError(xmlGenericErrorContext,
11784 "PP: Parsing PI\n");
11787 if (ctxt->instate == XML_PARSER_EOF)
11789 ctxt->instate = XML_PARSER_PROLOG;
11790 ctxt->progressive = 1;
11791 } else if ((cur == '<') && (next == '!') &&
11792 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11793 if ((!terminate) &&
11794 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11795 ctxt->progressive = XML_PARSER_COMMENT;
11799 xmlGenericError(xmlGenericErrorContext,
11800 "PP: Parsing Comment\n");
11802 xmlParseComment(ctxt);
11803 if (ctxt->instate == XML_PARSER_EOF)
11805 ctxt->instate = XML_PARSER_PROLOG;
11806 ctxt->progressive = 1;
11807 } else if ((cur == '<') && (next == '!') &&
11811 ctxt->instate = XML_PARSER_START_TAG;
11812 if (ctxt->progressive == 0)
11813 ctxt->progressive = XML_PARSER_START_TAG;
11814 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11816 xmlGenericError(xmlGenericErrorContext,
11817 "PP: entering START_TAG\n");
11821 case XML_PARSER_EPILOG:
11823 if (ctxt->input->buf == NULL)
11824 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11826 avail = xmlBufUse(ctxt->input->buf->buffer) -
11827 (ctxt->input->cur - ctxt->input->base);
11830 cur = ctxt->input->cur[0];
11831 next = ctxt->input->cur[1];
11832 if ((cur == '<') && (next == '?')) {
11833 if ((!terminate) &&
11834 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11835 ctxt->progressive = XML_PARSER_PI;
11839 xmlGenericError(xmlGenericErrorContext,
11840 "PP: Parsing PI\n");
11843 if (ctxt->instate == XML_PARSER_EOF)
11845 ctxt->instate = XML_PARSER_EPILOG;
11846 ctxt->progressive = 1;
11847 } else if ((cur == '<') && (next == '!') &&
11848 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11849 if ((!terminate) &&
11850 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11851 ctxt->progressive = XML_PARSER_COMMENT;
11855 xmlGenericError(xmlGenericErrorContext,
11856 "PP: Parsing Comment\n");
11858 xmlParseComment(ctxt);
11859 if (ctxt->instate == XML_PARSER_EOF)
11861 ctxt->instate = XML_PARSER_EPILOG;
11862 ctxt->progressive = 1;
11863 } else if ((cur == '<') && (next == '!') &&
11867 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11868 ctxt->instate = XML_PARSER_EOF;
11870 xmlGenericError(xmlGenericErrorContext,
11871 "PP: entering EOF\n");
11873 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11874 ctxt->sax->endDocument(ctxt->userData);
11878 case XML_PARSER_DTD: {
11880 * Sorry but progressive parsing of the internal subset
11881 * is not expected to be supported. We first check that
11882 * the full content of the internal subset is available and
11883 * the parsing is launched only at that point.
11884 * Internal subset ends up with "']' S? '>'" in an unescaped
11885 * section and not in a ']]>' sequence which are conditional
11886 * sections (whoever argued to keep that crap in XML deserve
11887 * a place in hell !).
11894 base = ctxt->input->cur - ctxt->input->base;
11895 if (base < 0) return(0);
11896 if (ctxt->checkIndex > base)
11897 base = ctxt->checkIndex;
11898 buf = xmlBufContent(ctxt->input->buf->buffer);
11899 use = xmlBufUse(ctxt->input->buf->buffer);
11900 for (;(unsigned int) base < use; base++) {
11902 if (buf[base] == quote)
11906 if ((quote == 0) && (buf[base] == '<')) {
11908 /* special handling of comments */
11909 if (((unsigned int) base + 4 < use) &&
11910 (buf[base + 1] == '!') &&
11911 (buf[base + 2] == '-') &&
11912 (buf[base + 3] == '-')) {
11913 for (;(unsigned int) base + 3 < use; base++) {
11914 if ((buf[base] == '-') &&
11915 (buf[base + 1] == '-') &&
11916 (buf[base + 2] == '>')) {
11924 fprintf(stderr, "unfinished comment\n");
11931 if (buf[base] == '"') {
11935 if (buf[base] == '\'') {
11939 if (buf[base] == ']') {
11941 fprintf(stderr, "%c%c%c%c: ", buf[base],
11942 buf[base + 1], buf[base + 2], buf[base + 3]);
11944 if ((unsigned int) base +1 >= use)
11946 if (buf[base + 1] == ']') {
11947 /* conditional crap, skip both ']' ! */
11951 for (i = 1; (unsigned int) base + i < use; i++) {
11952 if (buf[base + i] == '>') {
11954 fprintf(stderr, "found\n");
11956 goto found_end_int_subset;
11958 if (!IS_BLANK_CH(buf[base + i])) {
11960 fprintf(stderr, "not found\n");
11962 goto not_end_of_int_subset;
11966 fprintf(stderr, "end of stream\n");
11971 not_end_of_int_subset:
11972 continue; /* for */
11975 * We didn't found the end of the Internal subset
11978 ctxt->checkIndex = base;
11980 ctxt->checkIndex = 0;
11983 xmlGenericError(xmlGenericErrorContext,
11984 "PP: lookup of int subset end filed\n");
11988 found_end_int_subset:
11989 ctxt->checkIndex = 0;
11990 xmlParseInternalSubset(ctxt);
11991 if (ctxt->instate == XML_PARSER_EOF)
11993 ctxt->inSubset = 2;
11994 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11995 (ctxt->sax->externalSubset != NULL))
11996 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11997 ctxt->extSubSystem, ctxt->extSubURI);
11998 ctxt->inSubset = 0;
11999 xmlCleanSpecialAttr(ctxt);
12000 if (ctxt->instate == XML_PARSER_EOF)
12002 ctxt->instate = XML_PARSER_PROLOG;
12003 ctxt->checkIndex = 0;
12005 xmlGenericError(xmlGenericErrorContext,
12006 "PP: entering PROLOG\n");
12010 case XML_PARSER_COMMENT:
12011 xmlGenericError(xmlGenericErrorContext,
12012 "PP: internal error, state == COMMENT\n");
12013 ctxt->instate = XML_PARSER_CONTENT;
12015 xmlGenericError(xmlGenericErrorContext,
12016 "PP: entering CONTENT\n");
12019 case XML_PARSER_IGNORE:
12020 xmlGenericError(xmlGenericErrorContext,
12021 "PP: internal error, state == IGNORE");
12022 ctxt->instate = XML_PARSER_DTD;
12024 xmlGenericError(xmlGenericErrorContext,
12025 "PP: entering DTD\n");
12028 case XML_PARSER_PI:
12029 xmlGenericError(xmlGenericErrorContext,
12030 "PP: internal error, state == PI\n");
12031 ctxt->instate = XML_PARSER_CONTENT;
12033 xmlGenericError(xmlGenericErrorContext,
12034 "PP: entering CONTENT\n");
12037 case XML_PARSER_ENTITY_DECL:
12038 xmlGenericError(xmlGenericErrorContext,
12039 "PP: internal error, state == ENTITY_DECL\n");
12040 ctxt->instate = XML_PARSER_DTD;
12042 xmlGenericError(xmlGenericErrorContext,
12043 "PP: entering DTD\n");
12046 case XML_PARSER_ENTITY_VALUE:
12047 xmlGenericError(xmlGenericErrorContext,
12048 "PP: internal error, state == ENTITY_VALUE\n");
12049 ctxt->instate = XML_PARSER_CONTENT;
12051 xmlGenericError(xmlGenericErrorContext,
12052 "PP: entering DTD\n");
12055 case XML_PARSER_ATTRIBUTE_VALUE:
12056 xmlGenericError(xmlGenericErrorContext,
12057 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12058 ctxt->instate = XML_PARSER_START_TAG;
12060 xmlGenericError(xmlGenericErrorContext,
12061 "PP: entering START_TAG\n");
12064 case XML_PARSER_SYSTEM_LITERAL:
12065 xmlGenericError(xmlGenericErrorContext,
12066 "PP: internal error, state == SYSTEM_LITERAL\n");
12067 ctxt->instate = XML_PARSER_START_TAG;
12069 xmlGenericError(xmlGenericErrorContext,
12070 "PP: entering START_TAG\n");
12073 case XML_PARSER_PUBLIC_LITERAL:
12074 xmlGenericError(xmlGenericErrorContext,
12075 "PP: internal error, state == PUBLIC_LITERAL\n");
12076 ctxt->instate = XML_PARSER_START_TAG;
12078 xmlGenericError(xmlGenericErrorContext,
12079 "PP: entering START_TAG\n");
12086 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12093 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12094 ctxt->input->cur[0], ctxt->input->cur[1],
12095 ctxt->input->cur[2], ctxt->input->cur[3]);
12096 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12097 "Input is not proper UTF-8, indicate encoding !\n%s",
12098 BAD_CAST buffer, NULL);
12104 * xmlParseCheckTransition:
12105 * @ctxt: an XML parser context
12106 * @chunk: a char array
12107 * @size: the size in byte of the chunk
12109 * Check depending on the current parser state if the chunk given must be
12110 * processed immediately or one need more data to advance on parsing.
12112 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12115 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12116 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12118 if (ctxt->instate == XML_PARSER_START_TAG) {
12119 if (memchr(chunk, '>', size) != NULL)
12123 if (ctxt->progressive == XML_PARSER_COMMENT) {
12124 if (memchr(chunk, '>', size) != NULL)
12128 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12129 if (memchr(chunk, '>', size) != NULL)
12133 if (ctxt->progressive == XML_PARSER_PI) {
12134 if (memchr(chunk, '>', size) != NULL)
12138 if (ctxt->instate == XML_PARSER_END_TAG) {
12139 if (memchr(chunk, '>', size) != NULL)
12143 if ((ctxt->progressive == XML_PARSER_DTD) ||
12144 (ctxt->instate == XML_PARSER_DTD)) {
12145 if (memchr(chunk, '>', size) != NULL)
12154 * @ctxt: an XML parser context
12155 * @chunk: an char array
12156 * @size: the size in byte of the chunk
12157 * @terminate: last chunk indicator
12159 * Parse a Chunk of memory
12161 * Returns zero if no error, the xmlParserErrors otherwise.
12164 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12168 size_t old_avail = 0;
12172 return(XML_ERR_INTERNAL_ERROR);
12173 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12174 return(ctxt->errNo);
12175 if (ctxt->instate == XML_PARSER_EOF)
12177 if (ctxt->instate == XML_PARSER_START)
12178 xmlDetectSAX2(ctxt);
12179 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12180 (chunk[size - 1] == '\r')) {
12187 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12188 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12189 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12190 size_t cur = ctxt->input->cur - ctxt->input->base;
12193 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12195 * Specific handling if we autodetected an encoding, we should not
12196 * push more than the first line ... which depend on the encoding
12197 * And only push the rest once the final encoding was detected
12199 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12200 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12201 unsigned int len = 45;
12203 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12204 BAD_CAST "UTF-16")) ||
12205 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12206 BAD_CAST "UTF16")))
12208 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12209 BAD_CAST "UCS-4")) ||
12210 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12214 if (ctxt->input->buf->rawconsumed < len)
12215 len -= ctxt->input->buf->rawconsumed;
12218 * Change size for reading the initial declaration only
12219 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12220 * will blindly copy extra bytes from memory.
12222 if ((unsigned int) size > len) {
12223 remain = size - len;
12229 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12231 ctxt->errNo = XML_PARSER_EOF;
12232 ctxt->disableSAX = 1;
12233 return (XML_PARSER_EOF);
12235 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12237 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12240 } else if (ctxt->instate != XML_PARSER_EOF) {
12241 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12242 xmlParserInputBufferPtr in = ctxt->input->buf;
12243 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12244 (in->raw != NULL)) {
12246 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12247 size_t current = ctxt->input->cur - ctxt->input->base;
12249 nbchars = xmlCharEncInput(in, terminate);
12252 xmlGenericError(xmlGenericErrorContext,
12253 "xmlParseChunk: encoder error\n");
12254 return(XML_ERR_INVALID_ENCODING);
12256 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12261 xmlParseTryOrFinish(ctxt, 0);
12263 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12264 avail = xmlBufUse(ctxt->input->buf->buffer);
12266 * Depending on the current state it may not be such
12267 * a good idea to try parsing if there is nothing in the chunk
12268 * which would be worth doing a parser state transition and we
12269 * need to wait for more data
12271 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12272 (old_avail == 0) || (avail == 0) ||
12273 (xmlParseCheckTransition(ctxt,
12274 (const char *)&ctxt->input->base[old_avail],
12275 avail - old_avail)))
12276 xmlParseTryOrFinish(ctxt, terminate);
12278 if (ctxt->instate == XML_PARSER_EOF)
12279 return(ctxt->errNo);
12281 if ((ctxt->input != NULL) &&
12282 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12283 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12284 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12285 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12286 ctxt->instate = XML_PARSER_EOF;
12288 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12289 return(ctxt->errNo);
12297 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12298 (ctxt->input->buf != NULL)) {
12299 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12301 size_t current = ctxt->input->cur - ctxt->input->base;
12303 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12305 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12310 * Check for termination
12314 if (ctxt->input != NULL) {
12315 if (ctxt->input->buf == NULL)
12316 cur_avail = ctxt->input->length -
12317 (ctxt->input->cur - ctxt->input->base);
12319 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12320 (ctxt->input->cur - ctxt->input->base);
12323 if ((ctxt->instate != XML_PARSER_EOF) &&
12324 (ctxt->instate != XML_PARSER_EPILOG)) {
12325 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12327 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12328 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12330 if (ctxt->instate != XML_PARSER_EOF) {
12331 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12332 ctxt->sax->endDocument(ctxt->userData);
12334 ctxt->instate = XML_PARSER_EOF;
12336 if (ctxt->wellFormed == 0)
12337 return((xmlParserErrors) ctxt->errNo);
12342 /************************************************************************
12344 * I/O front end functions to the parser *
12346 ************************************************************************/
12349 * xmlCreatePushParserCtxt:
12350 * @sax: a SAX handler
12351 * @user_data: The user data returned on SAX callbacks
12352 * @chunk: a pointer to an array of chars
12353 * @size: number of chars in the array
12354 * @filename: an optional file name or URI
12356 * Create a parser context for using the XML parser in push mode.
12357 * If @buffer and @size are non-NULL, the data is used to detect
12358 * the encoding. The remaining characters will be parsed so they
12359 * don't need to be fed in again through xmlParseChunk.
12360 * To allow content encoding detection, @size should be >= 4
12361 * The value of @filename is used for fetching external entities
12362 * and error/warning reports.
12364 * Returns the new parser context or NULL
12368 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12369 const char *chunk, int size, const char *filename) {
12370 xmlParserCtxtPtr ctxt;
12371 xmlParserInputPtr inputStream;
12372 xmlParserInputBufferPtr buf;
12373 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12376 * plug some encoding conversion routines
12378 if ((chunk != NULL) && (size >= 4))
12379 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12381 buf = xmlAllocParserInputBuffer(enc);
12382 if (buf == NULL) return(NULL);
12384 ctxt = xmlNewParserCtxt();
12385 if (ctxt == NULL) {
12386 xmlErrMemory(NULL, "creating parser: out of memory\n");
12387 xmlFreeParserInputBuffer(buf);
12390 ctxt->dictNames = 1;
12391 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12392 if (ctxt->pushTab == NULL) {
12393 xmlErrMemory(ctxt, NULL);
12394 xmlFreeParserInputBuffer(buf);
12395 xmlFreeParserCtxt(ctxt);
12399 #ifdef LIBXML_SAX1_ENABLED
12400 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12401 #endif /* LIBXML_SAX1_ENABLED */
12402 xmlFree(ctxt->sax);
12403 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12404 if (ctxt->sax == NULL) {
12405 xmlErrMemory(ctxt, NULL);
12406 xmlFreeParserInputBuffer(buf);
12407 xmlFreeParserCtxt(ctxt);
12410 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12411 if (sax->initialized == XML_SAX2_MAGIC)
12412 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12414 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12415 if (user_data != NULL)
12416 ctxt->userData = user_data;
12418 if (filename == NULL) {
12419 ctxt->directory = NULL;
12421 ctxt->directory = xmlParserGetDirectory(filename);
12424 inputStream = xmlNewInputStream(ctxt);
12425 if (inputStream == NULL) {
12426 xmlFreeParserCtxt(ctxt);
12427 xmlFreeParserInputBuffer(buf);
12431 if (filename == NULL)
12432 inputStream->filename = NULL;
12434 inputStream->filename = (char *)
12435 xmlCanonicPath((const xmlChar *) filename);
12436 if (inputStream->filename == NULL) {
12437 xmlFreeParserCtxt(ctxt);
12438 xmlFreeParserInputBuffer(buf);
12442 inputStream->buf = buf;
12443 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12444 inputPush(ctxt, inputStream);
12447 * If the caller didn't provide an initial 'chunk' for determining
12448 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12449 * that it can be automatically determined later
12451 if ((size == 0) || (chunk == NULL)) {
12452 ctxt->charset = XML_CHAR_ENCODING_NONE;
12453 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12454 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12455 size_t cur = ctxt->input->cur - ctxt->input->base;
12457 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12459 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12461 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12465 if (enc != XML_CHAR_ENCODING_NONE) {
12466 xmlSwitchEncoding(ctxt, enc);
12471 #endif /* LIBXML_PUSH_ENABLED */
12475 * @ctxt: an XML parser context
12477 * Blocks further parser processing
12480 xmlStopParser(xmlParserCtxtPtr ctxt) {
12483 ctxt->instate = XML_PARSER_EOF;
12484 ctxt->errNo = XML_ERR_USER_STOP;
12485 ctxt->disableSAX = 1;
12486 if (ctxt->input != NULL) {
12487 ctxt->input->cur = BAD_CAST"";
12488 ctxt->input->base = ctxt->input->cur;
12493 * xmlCreateIOParserCtxt:
12494 * @sax: a SAX handler
12495 * @user_data: The user data returned on SAX callbacks
12496 * @ioread: an I/O read function
12497 * @ioclose: an I/O close function
12498 * @ioctx: an I/O handler
12499 * @enc: the charset encoding if known
12501 * Create a parser context for using the XML parser with an existing
12504 * Returns the new parser context or NULL
12507 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12508 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12509 void *ioctx, xmlCharEncoding enc) {
12510 xmlParserCtxtPtr ctxt;
12511 xmlParserInputPtr inputStream;
12512 xmlParserInputBufferPtr buf;
12514 if (ioread == NULL) return(NULL);
12516 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12518 if (ioclose != NULL)
12523 ctxt = xmlNewParserCtxt();
12524 if (ctxt == NULL) {
12525 xmlFreeParserInputBuffer(buf);
12529 #ifdef LIBXML_SAX1_ENABLED
12530 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12531 #endif /* LIBXML_SAX1_ENABLED */
12532 xmlFree(ctxt->sax);
12533 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12534 if (ctxt->sax == NULL) {
12535 xmlErrMemory(ctxt, NULL);
12536 xmlFreeParserCtxt(ctxt);
12539 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12540 if (sax->initialized == XML_SAX2_MAGIC)
12541 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12543 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12544 if (user_data != NULL)
12545 ctxt->userData = user_data;
12548 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12549 if (inputStream == NULL) {
12550 xmlFreeParserCtxt(ctxt);
12553 inputPush(ctxt, inputStream);
12558 #ifdef LIBXML_VALID_ENABLED
12559 /************************************************************************
12561 * Front ends when parsing a DTD *
12563 ************************************************************************/
12567 * @sax: the SAX handler block or NULL
12568 * @input: an Input Buffer
12569 * @enc: the charset encoding if known
12571 * Load and parse a DTD
12573 * Returns the resulting xmlDtdPtr or NULL in case of error.
12574 * @input will be freed by the function in any case.
12578 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12579 xmlCharEncoding enc) {
12580 xmlDtdPtr ret = NULL;
12581 xmlParserCtxtPtr ctxt;
12582 xmlParserInputPtr pinput = NULL;
12588 ctxt = xmlNewParserCtxt();
12589 if (ctxt == NULL) {
12590 xmlFreeParserInputBuffer(input);
12595 * Set-up the SAX context
12598 if (ctxt->sax != NULL)
12599 xmlFree(ctxt->sax);
12601 ctxt->userData = ctxt;
12603 xmlDetectSAX2(ctxt);
12606 * generate a parser input from the I/O handler
12609 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12610 if (pinput == NULL) {
12611 if (sax != NULL) ctxt->sax = NULL;
12612 xmlFreeParserInputBuffer(input);
12613 xmlFreeParserCtxt(ctxt);
12618 * plug some encoding conversion routines here.
12620 if (xmlPushInput(ctxt, pinput) < 0) {
12621 if (sax != NULL) ctxt->sax = NULL;
12622 xmlFreeParserCtxt(ctxt);
12625 if (enc != XML_CHAR_ENCODING_NONE) {
12626 xmlSwitchEncoding(ctxt, enc);
12629 pinput->filename = NULL;
12632 pinput->base = ctxt->input->cur;
12633 pinput->cur = ctxt->input->cur;
12634 pinput->free = NULL;
12637 * let's parse that entity knowing it's an external subset.
12639 ctxt->inSubset = 2;
12640 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12641 if (ctxt->myDoc == NULL) {
12642 xmlErrMemory(ctxt, "New Doc failed");
12645 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12646 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12647 BAD_CAST "none", BAD_CAST "none");
12649 if ((enc == XML_CHAR_ENCODING_NONE) &&
12650 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12652 * Get the 4 first bytes and decode the charset
12653 * if enc != XML_CHAR_ENCODING_NONE
12654 * plug some encoding conversion routines.
12660 enc = xmlDetectCharEncoding(start, 4);
12661 if (enc != XML_CHAR_ENCODING_NONE) {
12662 xmlSwitchEncoding(ctxt, enc);
12666 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12668 if (ctxt->myDoc != NULL) {
12669 if (ctxt->wellFormed) {
12670 ret = ctxt->myDoc->extSubset;
12671 ctxt->myDoc->extSubset = NULL;
12676 tmp = ret->children;
12677 while (tmp != NULL) {
12685 xmlFreeDoc(ctxt->myDoc);
12686 ctxt->myDoc = NULL;
12688 if (sax != NULL) ctxt->sax = NULL;
12689 xmlFreeParserCtxt(ctxt);
12696 * @sax: the SAX handler block
12697 * @ExternalID: a NAME* containing the External ID of the DTD
12698 * @SystemID: a NAME* containing the URL to the DTD
12700 * Load and parse an external subset.
12702 * Returns the resulting xmlDtdPtr or NULL in case of error.
12706 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12707 const xmlChar *SystemID) {
12708 xmlDtdPtr ret = NULL;
12709 xmlParserCtxtPtr ctxt;
12710 xmlParserInputPtr input = NULL;
12711 xmlCharEncoding enc;
12712 xmlChar* systemIdCanonic;
12714 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12716 ctxt = xmlNewParserCtxt();
12717 if (ctxt == NULL) {
12722 * Set-up the SAX context
12725 if (ctxt->sax != NULL)
12726 xmlFree(ctxt->sax);
12728 ctxt->userData = ctxt;
12732 * Canonicalise the system ID
12734 systemIdCanonic = xmlCanonicPath(SystemID);
12735 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12736 xmlFreeParserCtxt(ctxt);
12741 * Ask the Entity resolver to load the damn thing
12744 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12745 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12747 if (input == NULL) {
12748 if (sax != NULL) ctxt->sax = NULL;
12749 xmlFreeParserCtxt(ctxt);
12750 if (systemIdCanonic != NULL)
12751 xmlFree(systemIdCanonic);
12756 * plug some encoding conversion routines here.
12758 if (xmlPushInput(ctxt, input) < 0) {
12759 if (sax != NULL) ctxt->sax = NULL;
12760 xmlFreeParserCtxt(ctxt);
12761 if (systemIdCanonic != NULL)
12762 xmlFree(systemIdCanonic);
12765 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12766 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12767 xmlSwitchEncoding(ctxt, enc);
12770 if (input->filename == NULL)
12771 input->filename = (char *) systemIdCanonic;
12773 xmlFree(systemIdCanonic);
12776 input->base = ctxt->input->cur;
12777 input->cur = ctxt->input->cur;
12778 input->free = NULL;
12781 * let's parse that entity knowing it's an external subset.
12783 ctxt->inSubset = 2;
12784 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12785 if (ctxt->myDoc == NULL) {
12786 xmlErrMemory(ctxt, "New Doc failed");
12787 if (sax != NULL) ctxt->sax = NULL;
12788 xmlFreeParserCtxt(ctxt);
12791 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12792 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12793 ExternalID, SystemID);
12794 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12796 if (ctxt->myDoc != NULL) {
12797 if (ctxt->wellFormed) {
12798 ret = ctxt->myDoc->extSubset;
12799 ctxt->myDoc->extSubset = NULL;
12804 tmp = ret->children;
12805 while (tmp != NULL) {
12813 xmlFreeDoc(ctxt->myDoc);
12814 ctxt->myDoc = NULL;
12816 if (sax != NULL) ctxt->sax = NULL;
12817 xmlFreeParserCtxt(ctxt);
12825 * @ExternalID: a NAME* containing the External ID of the DTD
12826 * @SystemID: a NAME* containing the URL to the DTD
12828 * Load and parse an external subset.
12830 * Returns the resulting xmlDtdPtr or NULL in case of error.
12834 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12835 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12837 #endif /* LIBXML_VALID_ENABLED */
12839 /************************************************************************
12841 * Front ends when parsing an Entity *
12843 ************************************************************************/
12846 * xmlParseCtxtExternalEntity:
12847 * @ctx: the existing parsing context
12848 * @URL: the URL for the entity to load
12849 * @ID: the System ID for the entity to load
12850 * @lst: the return value for the set of parsed nodes
12852 * Parse an external general entity within an existing parsing context
12853 * An external general parsed entity is well-formed if it matches the
12854 * production labeled extParsedEnt.
12856 * [78] extParsedEnt ::= TextDecl? content
12858 * Returns 0 if the entity is well formed, -1 in case of args problem and
12859 * the parser error code otherwise
12863 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12864 const xmlChar *ID, xmlNodePtr *lst) {
12865 xmlParserCtxtPtr ctxt;
12867 xmlNodePtr newRoot;
12868 xmlSAXHandlerPtr oldsax = NULL;
12871 xmlCharEncoding enc;
12873 if (ctx == NULL) return(-1);
12875 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12876 (ctx->depth > 1024)) {
12877 return(XML_ERR_ENTITY_LOOP);
12882 if ((URL == NULL) && (ID == NULL))
12884 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12887 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12888 if (ctxt == NULL) {
12892 oldsax = ctxt->sax;
12893 ctxt->sax = ctx->sax;
12894 xmlDetectSAX2(ctxt);
12895 newDoc = xmlNewDoc(BAD_CAST "1.0");
12896 if (newDoc == NULL) {
12897 xmlFreeParserCtxt(ctxt);
12900 newDoc->properties = XML_DOC_INTERNAL;
12901 if (ctx->myDoc->dict) {
12902 newDoc->dict = ctx->myDoc->dict;
12903 xmlDictReference(newDoc->dict);
12905 if (ctx->myDoc != NULL) {
12906 newDoc->intSubset = ctx->myDoc->intSubset;
12907 newDoc->extSubset = ctx->myDoc->extSubset;
12909 if (ctx->myDoc->URL != NULL) {
12910 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12912 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12913 if (newRoot == NULL) {
12914 ctxt->sax = oldsax;
12915 xmlFreeParserCtxt(ctxt);
12916 newDoc->intSubset = NULL;
12917 newDoc->extSubset = NULL;
12918 xmlFreeDoc(newDoc);
12921 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12922 nodePush(ctxt, newDoc->children);
12923 if (ctx->myDoc == NULL) {
12924 ctxt->myDoc = newDoc;
12926 ctxt->myDoc = ctx->myDoc;
12927 newDoc->children->doc = ctx->myDoc;
12931 * Get the 4 first bytes and decode the charset
12932 * if enc != XML_CHAR_ENCODING_NONE
12933 * plug some encoding conversion routines.
12936 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12941 enc = xmlDetectCharEncoding(start, 4);
12942 if (enc != XML_CHAR_ENCODING_NONE) {
12943 xmlSwitchEncoding(ctxt, enc);
12948 * Parse a possible text declaration first
12950 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12951 xmlParseTextDecl(ctxt);
12953 * An XML-1.0 document can't reference an entity not XML-1.0
12955 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12956 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12957 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12958 "Version mismatch between document and entity\n");
12963 * If the user provided its own SAX callbacks then reuse the
12964 * useData callback field, otherwise the expected setup in a
12965 * DOM builder is to have userData == ctxt
12967 if (ctx->userData == ctx)
12968 ctxt->userData = ctxt;
12970 ctxt->userData = ctx->userData;
12973 * Doing validity checking on chunk doesn't make sense
12975 ctxt->instate = XML_PARSER_CONTENT;
12976 ctxt->validate = ctx->validate;
12977 ctxt->valid = ctx->valid;
12978 ctxt->loadsubset = ctx->loadsubset;
12979 ctxt->depth = ctx->depth + 1;
12980 ctxt->replaceEntities = ctx->replaceEntities;
12981 if (ctxt->validate) {
12982 ctxt->vctxt.error = ctx->vctxt.error;
12983 ctxt->vctxt.warning = ctx->vctxt.warning;
12985 ctxt->vctxt.error = NULL;
12986 ctxt->vctxt.warning = NULL;
12988 ctxt->vctxt.nodeTab = NULL;
12989 ctxt->vctxt.nodeNr = 0;
12990 ctxt->vctxt.nodeMax = 0;
12991 ctxt->vctxt.node = NULL;
12992 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12993 ctxt->dict = ctx->dict;
12994 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12995 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12996 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12997 ctxt->dictNames = ctx->dictNames;
12998 ctxt->attsDefault = ctx->attsDefault;
12999 ctxt->attsSpecial = ctx->attsSpecial;
13000 ctxt->linenumbers = ctx->linenumbers;
13002 xmlParseContent(ctxt);
13004 ctx->validate = ctxt->validate;
13005 ctx->valid = ctxt->valid;
13006 if ((RAW == '<') && (NXT(1) == '/')) {
13007 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13008 } else if (RAW != 0) {
13009 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13011 if (ctxt->node != newDoc->children) {
13012 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13015 if (!ctxt->wellFormed) {
13016 if (ctxt->errNo == 0)
13025 * Return the newly created nodeset after unlinking it from
13026 * they pseudo parent.
13028 cur = newDoc->children->children;
13030 while (cur != NULL) {
13031 cur->parent = NULL;
13034 newDoc->children->children = NULL;
13038 ctxt->sax = oldsax;
13040 ctxt->attsDefault = NULL;
13041 ctxt->attsSpecial = NULL;
13042 xmlFreeParserCtxt(ctxt);
13043 newDoc->intSubset = NULL;
13044 newDoc->extSubset = NULL;
13045 xmlFreeDoc(newDoc);
13051 * xmlParseExternalEntityPrivate:
13052 * @doc: the document the chunk pertains to
13053 * @oldctxt: the previous parser context if available
13054 * @sax: the SAX handler bloc (possibly NULL)
13055 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13056 * @depth: Used for loop detection, use 0
13057 * @URL: the URL for the entity to load
13058 * @ID: the System ID for the entity to load
13059 * @list: the return value for the set of parsed nodes
13061 * Private version of xmlParseExternalEntity()
13063 * Returns 0 if the entity is well formed, -1 in case of args problem and
13064 * the parser error code otherwise
13067 static xmlParserErrors
13068 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13069 xmlSAXHandlerPtr sax,
13070 void *user_data, int depth, const xmlChar *URL,
13071 const xmlChar *ID, xmlNodePtr *list) {
13072 xmlParserCtxtPtr ctxt;
13074 xmlNodePtr newRoot;
13075 xmlSAXHandlerPtr oldsax = NULL;
13076 xmlParserErrors ret = XML_ERR_OK;
13078 xmlCharEncoding enc;
13080 if (((depth > 40) &&
13081 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13083 return(XML_ERR_ENTITY_LOOP);
13088 if ((URL == NULL) && (ID == NULL))
13089 return(XML_ERR_INTERNAL_ERROR);
13091 return(XML_ERR_INTERNAL_ERROR);
13094 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13095 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13096 ctxt->userData = ctxt;
13097 if (oldctxt != NULL) {
13098 ctxt->_private = oldctxt->_private;
13099 ctxt->loadsubset = oldctxt->loadsubset;
13100 ctxt->validate = oldctxt->validate;
13101 ctxt->external = oldctxt->external;
13102 ctxt->record_info = oldctxt->record_info;
13103 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13104 ctxt->node_seq.length = oldctxt->node_seq.length;
13105 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13108 * Doing validity checking on chunk without context
13109 * doesn't make sense
13111 ctxt->_private = NULL;
13112 ctxt->validate = 0;
13113 ctxt->external = 2;
13114 ctxt->loadsubset = 0;
13117 oldsax = ctxt->sax;
13119 if (user_data != NULL)
13120 ctxt->userData = user_data;
13122 xmlDetectSAX2(ctxt);
13123 newDoc = xmlNewDoc(BAD_CAST "1.0");
13124 if (newDoc == NULL) {
13125 ctxt->node_seq.maximum = 0;
13126 ctxt->node_seq.length = 0;
13127 ctxt->node_seq.buffer = NULL;
13128 xmlFreeParserCtxt(ctxt);
13129 return(XML_ERR_INTERNAL_ERROR);
13131 newDoc->properties = XML_DOC_INTERNAL;
13132 newDoc->intSubset = doc->intSubset;
13133 newDoc->extSubset = doc->extSubset;
13134 newDoc->dict = doc->dict;
13135 xmlDictReference(newDoc->dict);
13137 if (doc->URL != NULL) {
13138 newDoc->URL = xmlStrdup(doc->URL);
13140 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13141 if (newRoot == NULL) {
13143 ctxt->sax = oldsax;
13144 ctxt->node_seq.maximum = 0;
13145 ctxt->node_seq.length = 0;
13146 ctxt->node_seq.buffer = NULL;
13147 xmlFreeParserCtxt(ctxt);
13148 newDoc->intSubset = NULL;
13149 newDoc->extSubset = NULL;
13150 xmlFreeDoc(newDoc);
13151 return(XML_ERR_INTERNAL_ERROR);
13153 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13154 nodePush(ctxt, newDoc->children);
13156 newRoot->doc = doc;
13159 * Get the 4 first bytes and decode the charset
13160 * if enc != XML_CHAR_ENCODING_NONE
13161 * plug some encoding conversion routines.
13164 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13169 enc = xmlDetectCharEncoding(start, 4);
13170 if (enc != XML_CHAR_ENCODING_NONE) {
13171 xmlSwitchEncoding(ctxt, enc);
13176 * Parse a possible text declaration first
13178 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13179 xmlParseTextDecl(ctxt);
13182 ctxt->instate = XML_PARSER_CONTENT;
13183 ctxt->depth = depth;
13185 xmlParseContent(ctxt);
13187 if ((RAW == '<') && (NXT(1) == '/')) {
13188 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13189 } else if (RAW != 0) {
13190 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13192 if (ctxt->node != newDoc->children) {
13193 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13196 if (!ctxt->wellFormed) {
13197 if (ctxt->errNo == 0)
13198 ret = XML_ERR_INTERNAL_ERROR;
13200 ret = (xmlParserErrors)ctxt->errNo;
13202 if (list != NULL) {
13206 * Return the newly created nodeset after unlinking it from
13207 * they pseudo parent.
13209 cur = newDoc->children->children;
13211 while (cur != NULL) {
13212 cur->parent = NULL;
13215 newDoc->children->children = NULL;
13221 * Record in the parent context the number of entities replacement
13222 * done when parsing that reference.
13224 if (oldctxt != NULL)
13225 oldctxt->nbentities += ctxt->nbentities;
13228 * Also record the size of the entity parsed
13230 if (ctxt->input != NULL) {
13231 oldctxt->sizeentities += ctxt->input->consumed;
13232 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13235 * And record the last error if any
13237 if (ctxt->lastError.code != XML_ERR_OK)
13238 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241 ctxt->sax = oldsax;
13242 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13243 oldctxt->node_seq.length = ctxt->node_seq.length;
13244 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13245 ctxt->node_seq.maximum = 0;
13246 ctxt->node_seq.length = 0;
13247 ctxt->node_seq.buffer = NULL;
13248 xmlFreeParserCtxt(ctxt);
13249 newDoc->intSubset = NULL;
13250 newDoc->extSubset = NULL;
13251 xmlFreeDoc(newDoc);
13256 #ifdef LIBXML_SAX1_ENABLED
13258 * xmlParseExternalEntity:
13259 * @doc: the document the chunk pertains to
13260 * @sax: the SAX handler bloc (possibly NULL)
13261 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13262 * @depth: Used for loop detection, use 0
13263 * @URL: the URL for the entity to load
13264 * @ID: the System ID for the entity to load
13265 * @lst: the return value for the set of parsed nodes
13267 * Parse an external general entity
13268 * An external general parsed entity is well-formed if it matches the
13269 * production labeled extParsedEnt.
13271 * [78] extParsedEnt ::= TextDecl? content
13273 * Returns 0 if the entity is well formed, -1 in case of args problem and
13274 * the parser error code otherwise
13278 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13279 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13280 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13285 * xmlParseBalancedChunkMemory:
13286 * @doc: the document the chunk pertains to
13287 * @sax: the SAX handler bloc (possibly NULL)
13288 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13289 * @depth: Used for loop detection, use 0
13290 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13291 * @lst: the return value for the set of parsed nodes
13293 * Parse a well-balanced chunk of an XML document
13294 * called by the parser
13295 * The allowed sequence for the Well Balanced Chunk is the one defined by
13296 * the content production in the XML grammar:
13298 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13300 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13301 * the parser error code otherwise
13305 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13306 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13307 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13308 depth, string, lst, 0 );
13310 #endif /* LIBXML_SAX1_ENABLED */
13313 * xmlParseBalancedChunkMemoryInternal:
13314 * @oldctxt: the existing parsing context
13315 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13316 * @user_data: the user data field for the parser context
13317 * @lst: the return value for the set of parsed nodes
13320 * Parse a well-balanced chunk of an XML document
13321 * called by the parser
13322 * The allowed sequence for the Well Balanced Chunk is the one defined by
13323 * the content production in the XML grammar:
13325 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13327 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13328 * error code otherwise
13330 * In case recover is set to 1, the nodelist will not be empty even if
13331 * the parsed chunk is not well balanced.
13333 static xmlParserErrors
13334 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13335 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13336 xmlParserCtxtPtr ctxt;
13337 xmlDocPtr newDoc = NULL;
13338 xmlNodePtr newRoot;
13339 xmlSAXHandlerPtr oldsax = NULL;
13340 xmlNodePtr content = NULL;
13341 xmlNodePtr last = NULL;
13343 xmlParserErrors ret = XML_ERR_OK;
13348 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13349 (oldctxt->depth > 1024)) {
13350 return(XML_ERR_ENTITY_LOOP);
13356 if (string == NULL)
13357 return(XML_ERR_INTERNAL_ERROR);
13359 size = xmlStrlen(string);
13361 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13362 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13363 if (user_data != NULL)
13364 ctxt->userData = user_data;
13366 ctxt->userData = ctxt;
13367 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13368 ctxt->dict = oldctxt->dict;
13369 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13370 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13371 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13374 /* propagate namespaces down the entity */
13375 for (i = 0;i < oldctxt->nsNr;i += 2) {
13376 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13380 oldsax = ctxt->sax;
13381 ctxt->sax = oldctxt->sax;
13382 xmlDetectSAX2(ctxt);
13383 ctxt->replaceEntities = oldctxt->replaceEntities;
13384 ctxt->options = oldctxt->options;
13386 ctxt->_private = oldctxt->_private;
13387 if (oldctxt->myDoc == NULL) {
13388 newDoc = xmlNewDoc(BAD_CAST "1.0");
13389 if (newDoc == NULL) {
13390 ctxt->sax = oldsax;
13392 xmlFreeParserCtxt(ctxt);
13393 return(XML_ERR_INTERNAL_ERROR);
13395 newDoc->properties = XML_DOC_INTERNAL;
13396 newDoc->dict = ctxt->dict;
13397 xmlDictReference(newDoc->dict);
13398 ctxt->myDoc = newDoc;
13400 ctxt->myDoc = oldctxt->myDoc;
13401 content = ctxt->myDoc->children;
13402 last = ctxt->myDoc->last;
13404 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13405 if (newRoot == NULL) {
13406 ctxt->sax = oldsax;
13408 xmlFreeParserCtxt(ctxt);
13409 if (newDoc != NULL) {
13410 xmlFreeDoc(newDoc);
13412 return(XML_ERR_INTERNAL_ERROR);
13414 ctxt->myDoc->children = NULL;
13415 ctxt->myDoc->last = NULL;
13416 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13417 nodePush(ctxt, ctxt->myDoc->children);
13418 ctxt->instate = XML_PARSER_CONTENT;
13419 ctxt->depth = oldctxt->depth + 1;
13421 ctxt->validate = 0;
13422 ctxt->loadsubset = oldctxt->loadsubset;
13423 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13425 * ID/IDREF registration will be done in xmlValidateElement below
13427 ctxt->loadsubset |= XML_SKIP_IDS;
13429 ctxt->dictNames = oldctxt->dictNames;
13430 ctxt->attsDefault = oldctxt->attsDefault;
13431 ctxt->attsSpecial = oldctxt->attsSpecial;
13433 xmlParseContent(ctxt);
13434 if ((RAW == '<') && (NXT(1) == '/')) {
13435 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13436 } else if (RAW != 0) {
13437 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13439 if (ctxt->node != ctxt->myDoc->children) {
13440 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13443 if (!ctxt->wellFormed) {
13444 if (ctxt->errNo == 0)
13445 ret = XML_ERR_INTERNAL_ERROR;
13447 ret = (xmlParserErrors)ctxt->errNo;
13452 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13456 * Return the newly created nodeset after unlinking it from
13457 * they pseudo parent.
13459 cur = ctxt->myDoc->children->children;
13461 while (cur != NULL) {
13462 #ifdef LIBXML_VALID_ENABLED
13463 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13464 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13465 (cur->type == XML_ELEMENT_NODE)) {
13466 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13467 oldctxt->myDoc, cur);
13469 #endif /* LIBXML_VALID_ENABLED */
13470 cur->parent = NULL;
13473 ctxt->myDoc->children->children = NULL;
13475 if (ctxt->myDoc != NULL) {
13476 xmlFreeNode(ctxt->myDoc->children);
13477 ctxt->myDoc->children = content;
13478 ctxt->myDoc->last = last;
13482 * Record in the parent context the number of entities replacement
13483 * done when parsing that reference.
13485 if (oldctxt != NULL)
13486 oldctxt->nbentities += ctxt->nbentities;
13489 * Also record the last error if any
13491 if (ctxt->lastError.code != XML_ERR_OK)
13492 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13494 ctxt->sax = oldsax;
13496 ctxt->attsDefault = NULL;
13497 ctxt->attsSpecial = NULL;
13498 xmlFreeParserCtxt(ctxt);
13499 if (newDoc != NULL) {
13500 xmlFreeDoc(newDoc);
13507 * xmlParseInNodeContext:
13508 * @node: the context node
13509 * @data: the input string
13510 * @datalen: the input string length in bytes
13511 * @options: a combination of xmlParserOption
13512 * @lst: the return value for the set of parsed nodes
13514 * Parse a well-balanced chunk of an XML document
13515 * within the context (DTD, namespaces, etc ...) of the given node.
13517 * The allowed sequence for the data is a Well Balanced Chunk defined by
13518 * the content production in the XML grammar:
13520 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13522 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13523 * error code otherwise
13526 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13527 int options, xmlNodePtr *lst) {
13529 xmlParserCtxtPtr ctxt;
13530 xmlDocPtr doc = NULL;
13531 xmlNodePtr fake, cur;
13534 xmlParserErrors ret = XML_ERR_OK;
13537 * check all input parameters, grab the document
13539 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13540 return(XML_ERR_INTERNAL_ERROR);
13541 switch (node->type) {
13542 case XML_ELEMENT_NODE:
13543 case XML_ATTRIBUTE_NODE:
13544 case XML_TEXT_NODE:
13545 case XML_CDATA_SECTION_NODE:
13546 case XML_ENTITY_REF_NODE:
13548 case XML_COMMENT_NODE:
13549 case XML_DOCUMENT_NODE:
13550 case XML_HTML_DOCUMENT_NODE:
13553 return(XML_ERR_INTERNAL_ERROR);
13556 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13557 (node->type != XML_DOCUMENT_NODE) &&
13558 (node->type != XML_HTML_DOCUMENT_NODE))
13559 node = node->parent;
13561 return(XML_ERR_INTERNAL_ERROR);
13562 if (node->type == XML_ELEMENT_NODE)
13565 doc = (xmlDocPtr) node;
13567 return(XML_ERR_INTERNAL_ERROR);
13570 * allocate a context and set-up everything not related to the
13571 * node position in the tree
13573 if (doc->type == XML_DOCUMENT_NODE)
13574 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13575 #ifdef LIBXML_HTML_ENABLED
13576 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13577 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13579 * When parsing in context, it makes no sense to add implied
13580 * elements like html/body/etc...
13582 options |= HTML_PARSE_NOIMPLIED;
13586 return(XML_ERR_INTERNAL_ERROR);
13589 return(XML_ERR_NO_MEMORY);
13592 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13593 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13594 * we must wait until the last moment to free the original one.
13596 if (doc->dict != NULL) {
13597 if (ctxt->dict != NULL)
13598 xmlDictFree(ctxt->dict);
13599 ctxt->dict = doc->dict;
13601 options |= XML_PARSE_NODICT;
13603 if (doc->encoding != NULL) {
13604 xmlCharEncodingHandlerPtr hdlr;
13606 if (ctxt->encoding != NULL)
13607 xmlFree((xmlChar *) ctxt->encoding);
13608 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13610 hdlr = xmlFindCharEncodingHandler(doc->encoding);
13611 if (hdlr != NULL) {
13612 xmlSwitchToEncoding(ctxt, hdlr);
13614 return(XML_ERR_UNSUPPORTED_ENCODING);
13618 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13619 xmlDetectSAX2(ctxt);
13622 fake = xmlNewComment(NULL);
13623 if (fake == NULL) {
13624 xmlFreeParserCtxt(ctxt);
13625 return(XML_ERR_NO_MEMORY);
13627 xmlAddChild(node, fake);
13629 if (node->type == XML_ELEMENT_NODE) {
13630 nodePush(ctxt, node);
13632 * initialize the SAX2 namespaces stack
13635 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13636 xmlNsPtr ns = cur->nsDef;
13637 const xmlChar *iprefix, *ihref;
13639 while (ns != NULL) {
13641 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13642 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13644 iprefix = ns->prefix;
13648 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13649 nsPush(ctxt, iprefix, ihref);
13656 ctxt->instate = XML_PARSER_CONTENT;
13659 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13661 * ID/IDREF registration will be done in xmlValidateElement below
13663 ctxt->loadsubset |= XML_SKIP_IDS;
13666 #ifdef LIBXML_HTML_ENABLED
13667 if (doc->type == XML_HTML_DOCUMENT_NODE)
13668 __htmlParseContent(ctxt);
13671 xmlParseContent(ctxt);
13674 if ((RAW == '<') && (NXT(1) == '/')) {
13675 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13676 } else if (RAW != 0) {
13677 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13679 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13680 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13681 ctxt->wellFormed = 0;
13684 if (!ctxt->wellFormed) {
13685 if (ctxt->errNo == 0)
13686 ret = XML_ERR_INTERNAL_ERROR;
13688 ret = (xmlParserErrors)ctxt->errNo;
13694 * Return the newly created nodeset after unlinking it from
13695 * the pseudo sibling.
13708 while (cur != NULL) {
13709 cur->parent = NULL;
13713 xmlUnlinkNode(fake);
13717 if (ret != XML_ERR_OK) {
13718 xmlFreeNodeList(*lst);
13722 if (doc->dict != NULL)
13724 xmlFreeParserCtxt(ctxt);
13728 return(XML_ERR_INTERNAL_ERROR);
13732 #ifdef LIBXML_SAX1_ENABLED
13734 * xmlParseBalancedChunkMemoryRecover:
13735 * @doc: the document the chunk pertains to
13736 * @sax: the SAX handler bloc (possibly NULL)
13737 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13738 * @depth: Used for loop detection, use 0
13739 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13740 * @lst: the return value for the set of parsed nodes
13741 * @recover: return nodes even if the data is broken (use 0)
13744 * Parse a well-balanced chunk of an XML document
13745 * called by the parser
13746 * The allowed sequence for the Well Balanced Chunk is the one defined by
13747 * the content production in the XML grammar:
13749 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13751 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13752 * the parser error code otherwise
13754 * In case recover is set to 1, the nodelist will not be empty even if
13755 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13759 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13760 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13762 xmlParserCtxtPtr ctxt;
13764 xmlSAXHandlerPtr oldsax = NULL;
13765 xmlNodePtr content, newRoot;
13770 return(XML_ERR_ENTITY_LOOP);
13776 if (string == NULL)
13779 size = xmlStrlen(string);
13781 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13782 if (ctxt == NULL) return(-1);
13783 ctxt->userData = ctxt;
13785 oldsax = ctxt->sax;
13787 if (user_data != NULL)
13788 ctxt->userData = user_data;
13790 newDoc = xmlNewDoc(BAD_CAST "1.0");
13791 if (newDoc == NULL) {
13792 xmlFreeParserCtxt(ctxt);
13795 newDoc->properties = XML_DOC_INTERNAL;
13796 if ((doc != NULL) && (doc->dict != NULL)) {
13797 xmlDictFree(ctxt->dict);
13798 ctxt->dict = doc->dict;
13799 xmlDictReference(ctxt->dict);
13800 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13801 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13802 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13803 ctxt->dictNames = 1;
13805 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13808 newDoc->intSubset = doc->intSubset;
13809 newDoc->extSubset = doc->extSubset;
13811 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13812 if (newRoot == NULL) {
13814 ctxt->sax = oldsax;
13815 xmlFreeParserCtxt(ctxt);
13816 newDoc->intSubset = NULL;
13817 newDoc->extSubset = NULL;
13818 xmlFreeDoc(newDoc);
13821 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13822 nodePush(ctxt, newRoot);
13824 ctxt->myDoc = newDoc;
13826 ctxt->myDoc = newDoc;
13827 newDoc->children->doc = doc;
13828 /* Ensure that doc has XML spec namespace */
13829 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13830 newDoc->oldNs = doc->oldNs;
13832 ctxt->instate = XML_PARSER_CONTENT;
13833 ctxt->depth = depth;
13836 * Doing validity checking on chunk doesn't make sense
13838 ctxt->validate = 0;
13839 ctxt->loadsubset = 0;
13840 xmlDetectSAX2(ctxt);
13842 if ( doc != NULL ){
13843 content = doc->children;
13844 doc->children = NULL;
13845 xmlParseContent(ctxt);
13846 doc->children = content;
13849 xmlParseContent(ctxt);
13851 if ((RAW == '<') && (NXT(1) == '/')) {
13852 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13853 } else if (RAW != 0) {
13854 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13856 if (ctxt->node != newDoc->children) {
13857 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13860 if (!ctxt->wellFormed) {
13861 if (ctxt->errNo == 0)
13869 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13873 * Return the newly created nodeset after unlinking it from
13874 * they pseudo parent.
13876 cur = newDoc->children->children;
13878 while (cur != NULL) {
13879 xmlSetTreeDoc(cur, doc);
13880 cur->parent = NULL;
13883 newDoc->children->children = NULL;
13887 ctxt->sax = oldsax;
13888 xmlFreeParserCtxt(ctxt);
13889 newDoc->intSubset = NULL;
13890 newDoc->extSubset = NULL;
13891 newDoc->oldNs = NULL;
13892 xmlFreeDoc(newDoc);
13898 * xmlSAXParseEntity:
13899 * @sax: the SAX handler block
13900 * @filename: the filename
13902 * parse an XML external entity out of context and build a tree.
13903 * It use the given SAX function block to handle the parsing callback.
13904 * If sax is NULL, fallback to the default DOM tree building routines.
13906 * [78] extParsedEnt ::= TextDecl? content
13908 * This correspond to a "Well Balanced" chunk
13910 * Returns the resulting document tree
13914 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13916 xmlParserCtxtPtr ctxt;
13918 ctxt = xmlCreateFileParserCtxt(filename);
13919 if (ctxt == NULL) {
13923 if (ctxt->sax != NULL)
13924 xmlFree(ctxt->sax);
13926 ctxt->userData = NULL;
13929 xmlParseExtParsedEnt(ctxt);
13931 if (ctxt->wellFormed)
13935 xmlFreeDoc(ctxt->myDoc);
13936 ctxt->myDoc = NULL;
13940 xmlFreeParserCtxt(ctxt);
13947 * @filename: the filename
13949 * parse an XML external entity out of context and build a tree.
13951 * [78] extParsedEnt ::= TextDecl? content
13953 * This correspond to a "Well Balanced" chunk
13955 * Returns the resulting document tree
13959 xmlParseEntity(const char *filename) {
13960 return(xmlSAXParseEntity(NULL, filename));
13962 #endif /* LIBXML_SAX1_ENABLED */
13965 * xmlCreateEntityParserCtxtInternal:
13966 * @URL: the entity URL
13967 * @ID: the entity PUBLIC ID
13968 * @base: a possible base for the target URI
13969 * @pctx: parser context used to set options on new context
13971 * Create a parser context for an external entity
13972 * Automatic support for ZLIB/Compress compressed document is provided
13973 * by default if found at compile-time.
13975 * Returns the new parser context or NULL
13977 static xmlParserCtxtPtr
13978 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13979 const xmlChar *base, xmlParserCtxtPtr pctx) {
13980 xmlParserCtxtPtr ctxt;
13981 xmlParserInputPtr inputStream;
13982 char *directory = NULL;
13985 ctxt = xmlNewParserCtxt();
13986 if (ctxt == NULL) {
13990 if (pctx != NULL) {
13991 ctxt->options = pctx->options;
13992 ctxt->_private = pctx->_private;
13995 uri = xmlBuildURI(URL, base);
13998 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13999 if (inputStream == NULL) {
14000 xmlFreeParserCtxt(ctxt);
14004 inputPush(ctxt, inputStream);
14006 if ((ctxt->directory == NULL) && (directory == NULL))
14007 directory = xmlParserGetDirectory((char *)URL);
14008 if ((ctxt->directory == NULL) && (directory != NULL))
14009 ctxt->directory = directory;
14011 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14012 if (inputStream == NULL) {
14014 xmlFreeParserCtxt(ctxt);
14018 inputPush(ctxt, inputStream);
14020 if ((ctxt->directory == NULL) && (directory == NULL))
14021 directory = xmlParserGetDirectory((char *)uri);
14022 if ((ctxt->directory == NULL) && (directory != NULL))
14023 ctxt->directory = directory;
14030 * xmlCreateEntityParserCtxt:
14031 * @URL: the entity URL
14032 * @ID: the entity PUBLIC ID
14033 * @base: a possible base for the target URI
14035 * Create a parser context for an external entity
14036 * Automatic support for ZLIB/Compress compressed document is provided
14037 * by default if found at compile-time.
14039 * Returns the new parser context or NULL
14042 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14043 const xmlChar *base) {
14044 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14048 /************************************************************************
14050 * Front ends when parsing from a file *
14052 ************************************************************************/
14055 * xmlCreateURLParserCtxt:
14056 * @filename: the filename or URL
14057 * @options: a combination of xmlParserOption
14059 * Create a parser context for a file or URL content.
14060 * Automatic support for ZLIB/Compress compressed document is provided
14061 * by default if found at compile-time and for file accesses
14063 * Returns the new parser context or NULL
14066 xmlCreateURLParserCtxt(const char *filename, int options)
14068 xmlParserCtxtPtr ctxt;
14069 xmlParserInputPtr inputStream;
14070 char *directory = NULL;
14072 ctxt = xmlNewParserCtxt();
14073 if (ctxt == NULL) {
14074 xmlErrMemory(NULL, "cannot allocate parser context");
14079 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14080 ctxt->linenumbers = 1;
14082 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14083 if (inputStream == NULL) {
14084 xmlFreeParserCtxt(ctxt);
14088 inputPush(ctxt, inputStream);
14089 if ((ctxt->directory == NULL) && (directory == NULL))
14090 directory = xmlParserGetDirectory(filename);
14091 if ((ctxt->directory == NULL) && (directory != NULL))
14092 ctxt->directory = directory;
14098 * xmlCreateFileParserCtxt:
14099 * @filename: the filename
14101 * Create a parser context for a file content.
14102 * Automatic support for ZLIB/Compress compressed document is provided
14103 * by default if found at compile-time.
14105 * Returns the new parser context or NULL
14108 xmlCreateFileParserCtxt(const char *filename)
14110 return(xmlCreateURLParserCtxt(filename, 0));
14113 #ifdef LIBXML_SAX1_ENABLED
14115 * xmlSAXParseFileWithData:
14116 * @sax: the SAX handler block
14117 * @filename: the filename
14118 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14120 * @data: the userdata
14122 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14123 * compressed document is provided by default if found at compile-time.
14124 * It use the given SAX function block to handle the parsing callback.
14125 * If sax is NULL, fallback to the default DOM tree building routines.
14127 * User data (void *) is stored within the parser context in the
14128 * context's _private member, so it is available nearly everywhere in libxml
14130 * Returns the resulting document tree
14134 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14135 int recovery, void *data) {
14137 xmlParserCtxtPtr ctxt;
14141 ctxt = xmlCreateFileParserCtxt(filename);
14142 if (ctxt == NULL) {
14146 if (ctxt->sax != NULL)
14147 xmlFree(ctxt->sax);
14150 xmlDetectSAX2(ctxt);
14152 ctxt->_private = data;
14155 if (ctxt->directory == NULL)
14156 ctxt->directory = xmlParserGetDirectory(filename);
14158 ctxt->recovery = recovery;
14160 xmlParseDocument(ctxt);
14162 if ((ctxt->wellFormed) || recovery) {
14165 if (ctxt->input->buf->compressed > 0)
14166 ret->compression = 9;
14168 ret->compression = ctxt->input->buf->compressed;
14173 xmlFreeDoc(ctxt->myDoc);
14174 ctxt->myDoc = NULL;
14178 xmlFreeParserCtxt(ctxt);
14185 * @sax: the SAX handler block
14186 * @filename: the filename
14187 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14190 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14191 * compressed document is provided by default if found at compile-time.
14192 * It use the given SAX function block to handle the parsing callback.
14193 * If sax is NULL, fallback to the default DOM tree building routines.
14195 * Returns the resulting document tree
14199 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14201 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14206 * @cur: a pointer to an array of xmlChar
14208 * parse an XML in-memory document and build a tree.
14209 * In the case the document is not Well Formed, a attempt to build a
14210 * tree is tried anyway
14212 * Returns the resulting document tree or NULL in case of failure
14216 xmlRecoverDoc(const xmlChar *cur) {
14217 return(xmlSAXParseDoc(NULL, cur, 1));
14222 * @filename: the filename
14224 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14225 * compressed document is provided by default if found at compile-time.
14227 * Returns the resulting document tree if the file was wellformed,
14232 xmlParseFile(const char *filename) {
14233 return(xmlSAXParseFile(NULL, filename, 0));
14238 * @filename: the filename
14240 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14241 * compressed document is provided by default if found at compile-time.
14242 * In the case the document is not Well Formed, it attempts to build
14245 * Returns the resulting document tree or NULL in case of failure
14249 xmlRecoverFile(const char *filename) {
14250 return(xmlSAXParseFile(NULL, filename, 1));
14255 * xmlSetupParserForBuffer:
14256 * @ctxt: an XML parser context
14257 * @buffer: a xmlChar * buffer
14258 * @filename: a file name
14260 * Setup the parser context to parse a new buffer; Clears any prior
14261 * contents from the parser context. The buffer parameter must not be
14262 * NULL, but the filename parameter can be
14265 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14266 const char* filename)
14268 xmlParserInputPtr input;
14270 if ((ctxt == NULL) || (buffer == NULL))
14273 input = xmlNewInputStream(ctxt);
14274 if (input == NULL) {
14275 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14276 xmlClearParserCtxt(ctxt);
14280 xmlClearParserCtxt(ctxt);
14281 if (filename != NULL)
14282 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14283 input->base = buffer;
14284 input->cur = buffer;
14285 input->end = &buffer[xmlStrlen(buffer)];
14286 inputPush(ctxt, input);
14290 * xmlSAXUserParseFile:
14291 * @sax: a SAX handler
14292 * @user_data: The user data returned on SAX callbacks
14293 * @filename: a file name
14295 * parse an XML file and call the given SAX handler routines.
14296 * Automatic support for ZLIB/Compress compressed document is provided
14298 * Returns 0 in case of success or a error number otherwise
14301 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14302 const char *filename) {
14304 xmlParserCtxtPtr ctxt;
14306 ctxt = xmlCreateFileParserCtxt(filename);
14307 if (ctxt == NULL) return -1;
14308 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14309 xmlFree(ctxt->sax);
14311 xmlDetectSAX2(ctxt);
14313 if (user_data != NULL)
14314 ctxt->userData = user_data;
14316 xmlParseDocument(ctxt);
14318 if (ctxt->wellFormed)
14321 if (ctxt->errNo != 0)
14328 if (ctxt->myDoc != NULL) {
14329 xmlFreeDoc(ctxt->myDoc);
14330 ctxt->myDoc = NULL;
14332 xmlFreeParserCtxt(ctxt);
14336 #endif /* LIBXML_SAX1_ENABLED */
14338 /************************************************************************
14340 * Front ends when parsing from memory *
14342 ************************************************************************/
14345 * xmlCreateMemoryParserCtxt:
14346 * @buffer: a pointer to a char array
14347 * @size: the size of the array
14349 * Create a parser context for an XML in-memory document.
14351 * Returns the new parser context or NULL
14354 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14355 xmlParserCtxtPtr ctxt;
14356 xmlParserInputPtr input;
14357 xmlParserInputBufferPtr buf;
14359 if (buffer == NULL)
14364 ctxt = xmlNewParserCtxt();
14368 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14369 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14371 xmlFreeParserCtxt(ctxt);
14375 input = xmlNewInputStream(ctxt);
14376 if (input == NULL) {
14377 xmlFreeParserInputBuffer(buf);
14378 xmlFreeParserCtxt(ctxt);
14382 input->filename = NULL;
14384 xmlBufResetInput(input->buf->buffer, input);
14386 inputPush(ctxt, input);
14390 #ifdef LIBXML_SAX1_ENABLED
14392 * xmlSAXParseMemoryWithData:
14393 * @sax: the SAX handler block
14394 * @buffer: an pointer to a char array
14395 * @size: the size of the array
14396 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14398 * @data: the userdata
14400 * parse an XML in-memory block and use the given SAX function block
14401 * to handle the parsing callback. If sax is NULL, fallback to the default
14402 * DOM tree building routines.
14404 * User data (void *) is stored within the parser context in the
14405 * context's _private member, so it is available nearly everywhere in libxml
14407 * Returns the resulting document tree
14411 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14412 int size, int recovery, void *data) {
14414 xmlParserCtxtPtr ctxt;
14418 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14419 if (ctxt == NULL) return(NULL);
14421 if (ctxt->sax != NULL)
14422 xmlFree(ctxt->sax);
14425 xmlDetectSAX2(ctxt);
14427 ctxt->_private=data;
14430 ctxt->recovery = recovery;
14432 xmlParseDocument(ctxt);
14434 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14437 xmlFreeDoc(ctxt->myDoc);
14438 ctxt->myDoc = NULL;
14442 xmlFreeParserCtxt(ctxt);
14448 * xmlSAXParseMemory:
14449 * @sax: the SAX handler block
14450 * @buffer: an pointer to a char array
14451 * @size: the size of the array
14452 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14455 * parse an XML in-memory block and use the given SAX function block
14456 * to handle the parsing callback. If sax is NULL, fallback to the default
14457 * DOM tree building routines.
14459 * Returns the resulting document tree
14462 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14463 int size, int recovery) {
14464 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14469 * @buffer: an pointer to a char array
14470 * @size: the size of the array
14472 * parse an XML in-memory block and build a tree.
14474 * Returns the resulting document tree
14477 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14478 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14482 * xmlRecoverMemory:
14483 * @buffer: an pointer to a char array
14484 * @size: the size of the array
14486 * parse an XML in-memory block and build a tree.
14487 * In the case the document is not Well Formed, an attempt to
14488 * build a tree is tried anyway
14490 * Returns the resulting document tree or NULL in case of error
14493 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14494 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14498 * xmlSAXUserParseMemory:
14499 * @sax: a SAX handler
14500 * @user_data: The user data returned on SAX callbacks
14501 * @buffer: an in-memory XML document input
14502 * @size: the length of the XML document in bytes
14504 * A better SAX parsing routine.
14505 * parse an XML in-memory buffer and call the given SAX handler routines.
14507 * Returns 0 in case of success or a error number otherwise
14509 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14510 const char *buffer, int size) {
14512 xmlParserCtxtPtr ctxt;
14516 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14517 if (ctxt == NULL) return -1;
14518 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14519 xmlFree(ctxt->sax);
14521 xmlDetectSAX2(ctxt);
14523 if (user_data != NULL)
14524 ctxt->userData = user_data;
14526 xmlParseDocument(ctxt);
14528 if (ctxt->wellFormed)
14531 if (ctxt->errNo != 0)
14538 if (ctxt->myDoc != NULL) {
14539 xmlFreeDoc(ctxt->myDoc);
14540 ctxt->myDoc = NULL;
14542 xmlFreeParserCtxt(ctxt);
14546 #endif /* LIBXML_SAX1_ENABLED */
14549 * xmlCreateDocParserCtxt:
14550 * @cur: a pointer to an array of xmlChar
14552 * Creates a parser context for an XML in-memory document.
14554 * Returns the new parser context or NULL
14557 xmlCreateDocParserCtxt(const xmlChar *cur) {
14562 len = xmlStrlen(cur);
14563 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14566 #ifdef LIBXML_SAX1_ENABLED
14569 * @sax: the SAX handler block
14570 * @cur: a pointer to an array of xmlChar
14571 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14574 * parse an XML in-memory document and build a tree.
14575 * It use the given SAX function block to handle the parsing callback.
14576 * If sax is NULL, fallback to the default DOM tree building routines.
14578 * Returns the resulting document tree
14582 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14584 xmlParserCtxtPtr ctxt;
14585 xmlSAXHandlerPtr oldsax = NULL;
14587 if (cur == NULL) return(NULL);
14590 ctxt = xmlCreateDocParserCtxt(cur);
14591 if (ctxt == NULL) return(NULL);
14593 oldsax = ctxt->sax;
14595 ctxt->userData = NULL;
14597 xmlDetectSAX2(ctxt);
14599 xmlParseDocument(ctxt);
14600 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14603 xmlFreeDoc(ctxt->myDoc);
14604 ctxt->myDoc = NULL;
14607 ctxt->sax = oldsax;
14608 xmlFreeParserCtxt(ctxt);
14615 * @cur: a pointer to an array of xmlChar
14617 * parse an XML in-memory document and build a tree.
14619 * Returns the resulting document tree
14623 xmlParseDoc(const xmlChar *cur) {
14624 return(xmlSAXParseDoc(NULL, cur, 0));
14626 #endif /* LIBXML_SAX1_ENABLED */
14628 #ifdef LIBXML_LEGACY_ENABLED
14629 /************************************************************************
14631 * Specific function to keep track of entities references *
14632 * and used by the XSLT debugger *
14634 ************************************************************************/
14636 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14639 * xmlAddEntityReference:
14640 * @ent : A valid entity
14641 * @firstNode : A valid first node for children of entity
14642 * @lastNode : A valid last node of children entity
14644 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14647 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14648 xmlNodePtr lastNode)
14650 if (xmlEntityRefFunc != NULL) {
14651 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14657 * xmlSetEntityReferenceFunc:
14658 * @func: A valid function
14660 * Set the function to call call back when a xml reference has been made
14663 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14665 xmlEntityRefFunc = func;
14667 #endif /* LIBXML_LEGACY_ENABLED */
14669 /************************************************************************
14673 ************************************************************************/
14675 #ifdef LIBXML_XPATH_ENABLED
14676 #include <libxml/xpath.h>
14679 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14680 static int xmlParserInitialized = 0;
14685 * Initialization function for the XML parser.
14686 * This is not reentrant. Call once before processing in case of
14687 * use in multithreaded programs.
14691 xmlInitParser(void) {
14692 if (xmlParserInitialized != 0)
14695 #ifdef LIBXML_THREAD_ENABLED
14696 __xmlGlobalInitMutexLock();
14697 if (xmlParserInitialized == 0) {
14701 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14702 (xmlGenericError == NULL))
14703 initGenericErrorDefaultFunc(NULL);
14705 xmlInitializeDict();
14706 xmlInitCharEncodingHandlers();
14707 xmlDefaultSAXHandlerInit();
14708 xmlRegisterDefaultInputCallbacks();
14709 #ifdef LIBXML_OUTPUT_ENABLED
14710 xmlRegisterDefaultOutputCallbacks();
14711 #endif /* LIBXML_OUTPUT_ENABLED */
14712 #ifdef LIBXML_HTML_ENABLED
14713 htmlInitAutoClose();
14714 htmlDefaultSAXHandlerInit();
14716 #ifdef LIBXML_XPATH_ENABLED
14719 xmlParserInitialized = 1;
14720 #ifdef LIBXML_THREAD_ENABLED
14722 __xmlGlobalInitMutexUnlock();
14727 * xmlCleanupParser:
14729 * This function name is somewhat misleading. It does not clean up
14730 * parser state, it cleans up memory allocated by the library itself.
14731 * It is a cleanup function for the XML library. It tries to reclaim all
14732 * related global memory allocated for the library processing.
14733 * It doesn't deallocate any document related memory. One should
14734 * call xmlCleanupParser() only when the process has finished using
14735 * the library and all XML/HTML documents built with it.
14736 * See also xmlInitParser() which has the opposite function of preparing
14737 * the library for operations.
14739 * WARNING: if your application is multithreaded or has plugin support
14740 * calling this may crash the application if another thread or
14741 * a plugin is still using libxml2. It's sometimes very hard to
14742 * guess if libxml2 is in use in the application, some libraries
14743 * or plugins may use it without notice. In case of doubt abstain
14744 * from calling this function or do it just before calling exit()
14745 * to avoid leak reports from valgrind !
14749 xmlCleanupParser(void) {
14750 if (!xmlParserInitialized)
14753 xmlCleanupCharEncodingHandlers();
14754 #ifdef LIBXML_CATALOG_ENABLED
14755 xmlCatalogCleanup();
14758 xmlCleanupInputCallbacks();
14759 #ifdef LIBXML_OUTPUT_ENABLED
14760 xmlCleanupOutputCallbacks();
14762 #ifdef LIBXML_SCHEMAS_ENABLED
14763 xmlSchemaCleanupTypes();
14764 xmlRelaxNGCleanupTypes();
14766 xmlCleanupGlobals();
14767 xmlResetLastError();
14768 xmlCleanupThreads(); /* must be last if called not from the main thread */
14769 xmlCleanupMemory();
14770 xmlParserInitialized = 0;
14773 /************************************************************************
14775 * New set (2.6.0) of simpler and more flexible APIs *
14777 ************************************************************************/
14783 * Free a string if it is not owned by the "dict" dictionnary in the
14786 #define DICT_FREE(str) \
14787 if ((str) && ((!dict) || \
14788 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14789 xmlFree((char *)(str));
14793 * @ctxt: an XML parser context
14795 * Reset a parser context
14798 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14800 xmlParserInputPtr input;
14808 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14809 xmlFreeInputStream(input);
14812 ctxt->input = NULL;
14815 if (ctxt->spaceTab != NULL) {
14816 ctxt->spaceTab[0] = -1;
14817 ctxt->space = &ctxt->spaceTab[0];
14819 ctxt->space = NULL;
14829 DICT_FREE(ctxt->version);
14830 ctxt->version = NULL;
14831 DICT_FREE(ctxt->encoding);
14832 ctxt->encoding = NULL;
14833 DICT_FREE(ctxt->directory);
14834 ctxt->directory = NULL;
14835 DICT_FREE(ctxt->extSubURI);
14836 ctxt->extSubURI = NULL;
14837 DICT_FREE(ctxt->extSubSystem);
14838 ctxt->extSubSystem = NULL;
14839 if (ctxt->myDoc != NULL)
14840 xmlFreeDoc(ctxt->myDoc);
14841 ctxt->myDoc = NULL;
14843 ctxt->standalone = -1;
14844 ctxt->hasExternalSubset = 0;
14845 ctxt->hasPErefs = 0;
14847 ctxt->external = 0;
14848 ctxt->instate = XML_PARSER_START;
14851 ctxt->wellFormed = 1;
14852 ctxt->nsWellFormed = 1;
14853 ctxt->disableSAX = 0;
14856 ctxt->vctxt.userData = ctxt;
14857 ctxt->vctxt.error = xmlParserValidityError;
14858 ctxt->vctxt.warning = xmlParserValidityWarning;
14860 ctxt->record_info = 0;
14862 ctxt->checkIndex = 0;
14863 ctxt->inSubset = 0;
14864 ctxt->errNo = XML_ERR_OK;
14866 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14867 ctxt->catalogs = NULL;
14868 ctxt->nbentities = 0;
14869 ctxt->sizeentities = 0;
14870 ctxt->sizeentcopy = 0;
14871 xmlInitNodeInfoSeq(&ctxt->node_seq);
14873 if (ctxt->attsDefault != NULL) {
14874 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14875 ctxt->attsDefault = NULL;
14877 if (ctxt->attsSpecial != NULL) {
14878 xmlHashFree(ctxt->attsSpecial, NULL);
14879 ctxt->attsSpecial = NULL;
14882 #ifdef LIBXML_CATALOG_ENABLED
14883 if (ctxt->catalogs != NULL)
14884 xmlCatalogFreeLocal(ctxt->catalogs);
14886 if (ctxt->lastError.code != XML_ERR_OK)
14887 xmlResetError(&ctxt->lastError);
14891 * xmlCtxtResetPush:
14892 * @ctxt: an XML parser context
14893 * @chunk: a pointer to an array of chars
14894 * @size: number of chars in the array
14895 * @filename: an optional file name or URI
14896 * @encoding: the document encoding, or NULL
14898 * Reset a push parser context
14900 * Returns 0 in case of success and 1 in case of error
14903 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14904 int size, const char *filename, const char *encoding)
14906 xmlParserInputPtr inputStream;
14907 xmlParserInputBufferPtr buf;
14908 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14913 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14914 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14916 buf = xmlAllocParserInputBuffer(enc);
14920 if (ctxt == NULL) {
14921 xmlFreeParserInputBuffer(buf);
14925 xmlCtxtReset(ctxt);
14927 if (ctxt->pushTab == NULL) {
14928 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14929 sizeof(xmlChar *));
14930 if (ctxt->pushTab == NULL) {
14931 xmlErrMemory(ctxt, NULL);
14932 xmlFreeParserInputBuffer(buf);
14937 if (filename == NULL) {
14938 ctxt->directory = NULL;
14940 ctxt->directory = xmlParserGetDirectory(filename);
14943 inputStream = xmlNewInputStream(ctxt);
14944 if (inputStream == NULL) {
14945 xmlFreeParserInputBuffer(buf);
14949 if (filename == NULL)
14950 inputStream->filename = NULL;
14952 inputStream->filename = (char *)
14953 xmlCanonicPath((const xmlChar *) filename);
14954 inputStream->buf = buf;
14955 xmlBufResetInput(buf->buffer, inputStream);
14957 inputPush(ctxt, inputStream);
14959 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14960 (ctxt->input->buf != NULL)) {
14961 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14962 size_t cur = ctxt->input->cur - ctxt->input->base;
14964 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14966 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14968 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14972 if (encoding != NULL) {
14973 xmlCharEncodingHandlerPtr hdlr;
14975 if (ctxt->encoding != NULL)
14976 xmlFree((xmlChar *) ctxt->encoding);
14977 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14979 hdlr = xmlFindCharEncodingHandler(encoding);
14980 if (hdlr != NULL) {
14981 xmlSwitchToEncoding(ctxt, hdlr);
14983 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14984 "Unsupported encoding %s\n", BAD_CAST encoding);
14986 } else if (enc != XML_CHAR_ENCODING_NONE) {
14987 xmlSwitchEncoding(ctxt, enc);
14995 * xmlCtxtUseOptionsInternal:
14996 * @ctxt: an XML parser context
14997 * @options: a combination of xmlParserOption
14998 * @encoding: the user provided encoding to use
15000 * Applies the options to the parser context
15002 * Returns 0 in case of success, the set of unknown or unimplemented options
15003 * in case of error.
15006 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15010 if (encoding != NULL) {
15011 if (ctxt->encoding != NULL)
15012 xmlFree((xmlChar *) ctxt->encoding);
15013 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15015 if (options & XML_PARSE_RECOVER) {
15016 ctxt->recovery = 1;
15017 options -= XML_PARSE_RECOVER;
15018 ctxt->options |= XML_PARSE_RECOVER;
15020 ctxt->recovery = 0;
15021 if (options & XML_PARSE_DTDLOAD) {
15022 ctxt->loadsubset = XML_DETECT_IDS;
15023 options -= XML_PARSE_DTDLOAD;
15024 ctxt->options |= XML_PARSE_DTDLOAD;
15026 ctxt->loadsubset = 0;
15027 if (options & XML_PARSE_DTDATTR) {
15028 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15029 options -= XML_PARSE_DTDATTR;
15030 ctxt->options |= XML_PARSE_DTDATTR;
15032 if (options & XML_PARSE_NOENT) {
15033 ctxt->replaceEntities = 1;
15034 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15035 options -= XML_PARSE_NOENT;
15036 ctxt->options |= XML_PARSE_NOENT;
15038 ctxt->replaceEntities = 0;
15039 if (options & XML_PARSE_PEDANTIC) {
15040 ctxt->pedantic = 1;
15041 options -= XML_PARSE_PEDANTIC;
15042 ctxt->options |= XML_PARSE_PEDANTIC;
15044 ctxt->pedantic = 0;
15045 if (options & XML_PARSE_NOBLANKS) {
15046 ctxt->keepBlanks = 0;
15047 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15048 options -= XML_PARSE_NOBLANKS;
15049 ctxt->options |= XML_PARSE_NOBLANKS;
15051 ctxt->keepBlanks = 1;
15052 if (options & XML_PARSE_DTDVALID) {
15053 ctxt->validate = 1;
15054 if (options & XML_PARSE_NOWARNING)
15055 ctxt->vctxt.warning = NULL;
15056 if (options & XML_PARSE_NOERROR)
15057 ctxt->vctxt.error = NULL;
15058 options -= XML_PARSE_DTDVALID;
15059 ctxt->options |= XML_PARSE_DTDVALID;
15061 ctxt->validate = 0;
15062 if (options & XML_PARSE_NOWARNING) {
15063 ctxt->sax->warning = NULL;
15064 options -= XML_PARSE_NOWARNING;
15066 if (options & XML_PARSE_NOERROR) {
15067 ctxt->sax->error = NULL;
15068 ctxt->sax->fatalError = NULL;
15069 options -= XML_PARSE_NOERROR;
15071 #ifdef LIBXML_SAX1_ENABLED
15072 if (options & XML_PARSE_SAX1) {
15073 ctxt->sax->startElement = xmlSAX2StartElement;
15074 ctxt->sax->endElement = xmlSAX2EndElement;
15075 ctxt->sax->startElementNs = NULL;
15076 ctxt->sax->endElementNs = NULL;
15077 ctxt->sax->initialized = 1;
15078 options -= XML_PARSE_SAX1;
15079 ctxt->options |= XML_PARSE_SAX1;
15081 #endif /* LIBXML_SAX1_ENABLED */
15082 if (options & XML_PARSE_NODICT) {
15083 ctxt->dictNames = 0;
15084 options -= XML_PARSE_NODICT;
15085 ctxt->options |= XML_PARSE_NODICT;
15087 ctxt->dictNames = 1;
15089 if (options & XML_PARSE_NOCDATA) {
15090 ctxt->sax->cdataBlock = NULL;
15091 options -= XML_PARSE_NOCDATA;
15092 ctxt->options |= XML_PARSE_NOCDATA;
15094 if (options & XML_PARSE_NSCLEAN) {
15095 ctxt->options |= XML_PARSE_NSCLEAN;
15096 options -= XML_PARSE_NSCLEAN;
15098 if (options & XML_PARSE_NONET) {
15099 ctxt->options |= XML_PARSE_NONET;
15100 options -= XML_PARSE_NONET;
15102 if (options & XML_PARSE_COMPACT) {
15103 ctxt->options |= XML_PARSE_COMPACT;
15104 options -= XML_PARSE_COMPACT;
15106 if (options & XML_PARSE_OLD10) {
15107 ctxt->options |= XML_PARSE_OLD10;
15108 options -= XML_PARSE_OLD10;
15110 if (options & XML_PARSE_NOBASEFIX) {
15111 ctxt->options |= XML_PARSE_NOBASEFIX;
15112 options -= XML_PARSE_NOBASEFIX;
15114 if (options & XML_PARSE_HUGE) {
15115 ctxt->options |= XML_PARSE_HUGE;
15116 options -= XML_PARSE_HUGE;
15117 if (ctxt->dict != NULL)
15118 xmlDictSetLimit(ctxt->dict, 0);
15120 if (options & XML_PARSE_OLDSAX) {
15121 ctxt->options |= XML_PARSE_OLDSAX;
15122 options -= XML_PARSE_OLDSAX;
15124 if (options & XML_PARSE_IGNORE_ENC) {
15125 ctxt->options |= XML_PARSE_IGNORE_ENC;
15126 options -= XML_PARSE_IGNORE_ENC;
15128 if (options & XML_PARSE_BIG_LINES) {
15129 ctxt->options |= XML_PARSE_BIG_LINES;
15130 options -= XML_PARSE_BIG_LINES;
15132 ctxt->linenumbers = 1;
15137 * xmlCtxtUseOptions:
15138 * @ctxt: an XML parser context
15139 * @options: a combination of xmlParserOption
15141 * Applies the options to the parser context
15143 * Returns 0 in case of success, the set of unknown or unimplemented options
15144 * in case of error.
15147 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15149 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15154 * @ctxt: an XML parser context
15155 * @URL: the base URL to use for the document
15156 * @encoding: the document encoding, or NULL
15157 * @options: a combination of xmlParserOption
15158 * @reuse: keep the context for reuse
15160 * Common front-end for the xmlRead functions
15162 * Returns the resulting document tree or NULL
15165 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15166 int options, int reuse)
15170 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15171 if (encoding != NULL) {
15172 xmlCharEncodingHandlerPtr hdlr;
15174 hdlr = xmlFindCharEncodingHandler(encoding);
15176 xmlSwitchToEncoding(ctxt, hdlr);
15178 if ((URL != NULL) && (ctxt->input != NULL) &&
15179 (ctxt->input->filename == NULL))
15180 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15181 xmlParseDocument(ctxt);
15182 if ((ctxt->wellFormed) || ctxt->recovery)
15186 if (ctxt->myDoc != NULL) {
15187 xmlFreeDoc(ctxt->myDoc);
15190 ctxt->myDoc = NULL;
15192 xmlFreeParserCtxt(ctxt);
15200 * @cur: a pointer to a zero terminated string
15201 * @URL: the base URL to use for the document
15202 * @encoding: the document encoding, or NULL
15203 * @options: a combination of xmlParserOption
15205 * parse an XML in-memory document and build a tree.
15207 * Returns the resulting document tree
15210 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15212 xmlParserCtxtPtr ctxt;
15217 ctxt = xmlCreateDocParserCtxt(cur);
15220 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15225 * @filename: a file or URL
15226 * @encoding: the document encoding, or NULL
15227 * @options: a combination of xmlParserOption
15229 * parse an XML file from the filesystem or the network.
15231 * Returns the resulting document tree
15234 xmlReadFile(const char *filename, const char *encoding, int options)
15236 xmlParserCtxtPtr ctxt;
15238 ctxt = xmlCreateURLParserCtxt(filename, options);
15241 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15246 * @buffer: a pointer to a char array
15247 * @size: the size of the array
15248 * @URL: the base URL to use for the document
15249 * @encoding: the document encoding, or NULL
15250 * @options: a combination of xmlParserOption
15252 * parse an XML in-memory document and build a tree.
15254 * Returns the resulting document tree
15257 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15259 xmlParserCtxtPtr ctxt;
15261 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15264 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15269 * @fd: an open file descriptor
15270 * @URL: the base URL to use for the document
15271 * @encoding: the document encoding, or NULL
15272 * @options: a combination of xmlParserOption
15274 * parse an XML from a file descriptor and build a tree.
15275 * NOTE that the file descriptor will not be closed when the
15276 * reader is closed or reset.
15278 * Returns the resulting document tree
15281 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15283 xmlParserCtxtPtr ctxt;
15284 xmlParserInputBufferPtr input;
15285 xmlParserInputPtr stream;
15290 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15293 input->closecallback = NULL;
15294 ctxt = xmlNewParserCtxt();
15295 if (ctxt == NULL) {
15296 xmlFreeParserInputBuffer(input);
15299 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15300 if (stream == NULL) {
15301 xmlFreeParserInputBuffer(input);
15302 xmlFreeParserCtxt(ctxt);
15305 inputPush(ctxt, stream);
15306 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15311 * @ioread: an I/O read function
15312 * @ioclose: an I/O close function
15313 * @ioctx: an I/O handler
15314 * @URL: the base URL to use for the document
15315 * @encoding: the document encoding, or NULL
15316 * @options: a combination of xmlParserOption
15318 * parse an XML document from I/O functions and source and build a tree.
15320 * Returns the resulting document tree
15323 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15324 void *ioctx, const char *URL, const char *encoding, int options)
15326 xmlParserCtxtPtr ctxt;
15327 xmlParserInputBufferPtr input;
15328 xmlParserInputPtr stream;
15330 if (ioread == NULL)
15333 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15334 XML_CHAR_ENCODING_NONE);
15335 if (input == NULL) {
15336 if (ioclose != NULL)
15340 ctxt = xmlNewParserCtxt();
15341 if (ctxt == NULL) {
15342 xmlFreeParserInputBuffer(input);
15345 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15346 if (stream == NULL) {
15347 xmlFreeParserInputBuffer(input);
15348 xmlFreeParserCtxt(ctxt);
15351 inputPush(ctxt, stream);
15352 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15357 * @ctxt: an XML parser context
15358 * @cur: a pointer to a zero terminated string
15359 * @URL: the base URL to use for the document
15360 * @encoding: the document encoding, or NULL
15361 * @options: a combination of xmlParserOption
15363 * parse an XML in-memory document and build a tree.
15364 * This reuses the existing @ctxt parser context
15366 * Returns the resulting document tree
15369 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15370 const char *URL, const char *encoding, int options)
15372 xmlParserInputPtr stream;
15379 xmlCtxtReset(ctxt);
15381 stream = xmlNewStringInputStream(ctxt, cur);
15382 if (stream == NULL) {
15385 inputPush(ctxt, stream);
15386 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15391 * @ctxt: an XML parser context
15392 * @filename: a file or URL
15393 * @encoding: the document encoding, or NULL
15394 * @options: a combination of xmlParserOption
15396 * parse an XML file from the filesystem or the network.
15397 * This reuses the existing @ctxt parser context
15399 * Returns the resulting document tree
15402 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15403 const char *encoding, int options)
15405 xmlParserInputPtr stream;
15407 if (filename == NULL)
15412 xmlCtxtReset(ctxt);
15414 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15415 if (stream == NULL) {
15418 inputPush(ctxt, stream);
15419 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15423 * xmlCtxtReadMemory:
15424 * @ctxt: an XML parser context
15425 * @buffer: a pointer to a char array
15426 * @size: the size of the array
15427 * @URL: the base URL to use for the document
15428 * @encoding: the document encoding, or NULL
15429 * @options: a combination of xmlParserOption
15431 * parse an XML in-memory document and build a tree.
15432 * This reuses the existing @ctxt parser context
15434 * Returns the resulting document tree
15437 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15438 const char *URL, const char *encoding, int options)
15440 xmlParserInputBufferPtr input;
15441 xmlParserInputPtr stream;
15445 if (buffer == NULL)
15448 xmlCtxtReset(ctxt);
15450 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15451 if (input == NULL) {
15455 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15456 if (stream == NULL) {
15457 xmlFreeParserInputBuffer(input);
15461 inputPush(ctxt, stream);
15462 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15467 * @ctxt: an XML parser context
15468 * @fd: an open file descriptor
15469 * @URL: the base URL to use for the document
15470 * @encoding: the document encoding, or NULL
15471 * @options: a combination of xmlParserOption
15473 * parse an XML from a file descriptor and build a tree.
15474 * This reuses the existing @ctxt parser context
15475 * NOTE that the file descriptor will not be closed when the
15476 * reader is closed or reset.
15478 * Returns the resulting document tree
15481 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15482 const char *URL, const char *encoding, int options)
15484 xmlParserInputBufferPtr input;
15485 xmlParserInputPtr stream;
15492 xmlCtxtReset(ctxt);
15495 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15498 input->closecallback = NULL;
15499 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15500 if (stream == NULL) {
15501 xmlFreeParserInputBuffer(input);
15504 inputPush(ctxt, stream);
15505 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15510 * @ctxt: an XML parser context
15511 * @ioread: an I/O read function
15512 * @ioclose: an I/O close function
15513 * @ioctx: an I/O handler
15514 * @URL: the base URL to use for the document
15515 * @encoding: the document encoding, or NULL
15516 * @options: a combination of xmlParserOption
15518 * parse an XML document from I/O functions and source and build a tree.
15519 * This reuses the existing @ctxt parser context
15521 * Returns the resulting document tree
15524 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15525 xmlInputCloseCallback ioclose, void *ioctx,
15527 const char *encoding, int options)
15529 xmlParserInputBufferPtr input;
15530 xmlParserInputPtr stream;
15532 if (ioread == NULL)
15537 xmlCtxtReset(ctxt);
15539 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15540 XML_CHAR_ENCODING_NONE);
15541 if (input == NULL) {
15542 if (ioclose != NULL)
15546 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15547 if (stream == NULL) {
15548 xmlFreeParserInputBuffer(input);
15551 inputPush(ctxt, stream);
15552 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15555 #define bottom_parser
15556 #include "elfgcchack.h"