2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
28 * See Copyright for the status of this software.
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
39 #define XML_DIR_SEP '/'
45 #include <libxml/xmlmemory.h>
46 #include <libxml/threads.h>
47 #include <libxml/globals.h>
48 #include <libxml/tree.h>
49 #include <libxml/parser.h>
50 #include <libxml/parserInternals.h>
51 #include <libxml/valid.h>
52 #include <libxml/entities.h>
53 #include <libxml/xmlerror.h>
54 #include <libxml/encoding.h>
55 #include <libxml/xmlIO.h>
56 #include <libxml/uri.h>
57 #ifdef LIBXML_CATALOG_ENABLED
58 #include <libxml/catalog.h>
60 #ifdef LIBXML_SCHEMAS_ENABLED
61 #include <libxml/xmlschemastypes.h>
62 #include <libxml/relaxng.h>
70 #ifdef HAVE_SYS_STAT_H
87 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
89 static xmlParserCtxtPtr
90 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
91 const xmlChar *base, xmlParserCtxtPtr pctx);
93 /************************************************************************
95 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
97 ************************************************************************/
99 #define XML_PARSER_BIG_ENTITY 1000
100 #define XML_PARSER_LOT_ENTITY 5000
103 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
104 * replacement over the size in byte of the input indicates that you have
105 * and eponential behaviour. A value of 10 correspond to at least 3 entity
106 * replacement per byte of input.
108 #define XML_PARSER_NON_LINEAR 10
111 * xmlParserEntityCheck
113 * Function to check non-linear entity expansion behaviour
114 * This is here to detect and stop exponential linear entity expansion
115 * This is not a limitation of the parser but a safety
116 * boundary feature. It can be disabled with the XML_PARSE_HUGE
120 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
123 unsigned long consumed = 0;
125 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
127 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
131 * Do the check based on the replacement size of the entity
133 if (size < XML_PARSER_BIG_ENTITY)
137 * A limit on the amount of text data reasonably used
139 if (ctxt->input != NULL) {
140 consumed = ctxt->input->consumed +
141 (ctxt->input->cur - ctxt->input->base);
143 consumed += ctxt->sizeentities;
145 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
146 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
148 } else if (ent != NULL) {
150 * use the number of parsed entities in the replacement
155 * The amount of data parsed counting entities size only once
157 if (ctxt->input != NULL) {
158 consumed = ctxt->input->consumed +
159 (ctxt->input->cur - ctxt->input->base);
161 consumed += ctxt->sizeentities;
164 * Check the density of entities for the amount of data
165 * knowing an entity reference will take at least 3 bytes
167 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
171 * strange we got no data for checking just return
176 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
183 * arbitrary depth limit for the XML documents that we allow to
184 * process. This is not a limitation of the parser but a safety
185 * boundary feature. It can be disabled with the XML_PARSE_HUGE
188 unsigned int xmlParserMaxDepth = 256;
193 #define XML_PARSER_BIG_BUFFER_SIZE 300
194 #define XML_PARSER_BUFFER_SIZE 100
195 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
198 * List of XML prefixed PI allowed by W3C specs
201 static const char *xmlW3CPIs[] = {
208 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
209 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
210 const xmlChar **str);
212 static xmlParserErrors
213 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
214 xmlSAXHandlerPtr sax,
215 void *user_data, int depth, const xmlChar *URL,
216 const xmlChar *ID, xmlNodePtr *list);
219 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
220 const char *encoding);
221 #ifdef LIBXML_LEGACY_ENABLED
223 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
224 xmlNodePtr lastNode);
225 #endif /* LIBXML_LEGACY_ENABLED */
227 static xmlParserErrors
228 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
229 const xmlChar *string, void *user_data, xmlNodePtr *lst);
232 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
234 /************************************************************************
236 * Some factorized error routines *
238 ************************************************************************/
241 * xmlErrAttributeDup:
242 * @ctxt: an XML parser context
243 * @prefix: the attribute prefix
244 * @localname: the attribute localname
246 * Handle a redefinition of attribute error
249 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
250 const xmlChar * localname)
252 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253 (ctxt->instate == XML_PARSER_EOF))
256 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
259 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
260 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
261 (const char *) localname, NULL, NULL, 0, 0,
262 "Attribute %s redefined\n", localname);
264 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
265 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
266 (const char *) prefix, (const char *) localname,
267 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
270 ctxt->wellFormed = 0;
271 if (ctxt->recovery == 0)
272 ctxt->disableSAX = 1;
278 * @ctxt: an XML parser context
279 * @error: the error number
280 * @extra: extra information string
282 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
285 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
289 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
290 (ctxt->instate == XML_PARSER_EOF))
293 case XML_ERR_INVALID_HEX_CHARREF:
294 errmsg = "CharRef: invalid hexadecimal value\n";
296 case XML_ERR_INVALID_DEC_CHARREF:
297 errmsg = "CharRef: invalid decimal value\n";
299 case XML_ERR_INVALID_CHARREF:
300 errmsg = "CharRef: invalid value\n";
302 case XML_ERR_INTERNAL_ERROR:
303 errmsg = "internal error";
305 case XML_ERR_PEREF_AT_EOF:
306 errmsg = "PEReference at end of document\n";
308 case XML_ERR_PEREF_IN_PROLOG:
309 errmsg = "PEReference in prolog\n";
311 case XML_ERR_PEREF_IN_EPILOG:
312 errmsg = "PEReference in epilog\n";
314 case XML_ERR_PEREF_NO_NAME:
315 errmsg = "PEReference: no name\n";
317 case XML_ERR_PEREF_SEMICOL_MISSING:
318 errmsg = "PEReference: expecting ';'\n";
320 case XML_ERR_ENTITY_LOOP:
321 errmsg = "Detected an entity reference loop\n";
323 case XML_ERR_ENTITY_NOT_STARTED:
324 errmsg = "EntityValue: \" or ' expected\n";
326 case XML_ERR_ENTITY_PE_INTERNAL:
327 errmsg = "PEReferences forbidden in internal subset\n";
329 case XML_ERR_ENTITY_NOT_FINISHED:
330 errmsg = "EntityValue: \" or ' expected\n";
332 case XML_ERR_ATTRIBUTE_NOT_STARTED:
333 errmsg = "AttValue: \" or ' expected\n";
335 case XML_ERR_LT_IN_ATTRIBUTE:
336 errmsg = "Unescaped '<' not allowed in attributes values\n";
338 case XML_ERR_LITERAL_NOT_STARTED:
339 errmsg = "SystemLiteral \" or ' expected\n";
341 case XML_ERR_LITERAL_NOT_FINISHED:
342 errmsg = "Unfinished System or Public ID \" or ' expected\n";
344 case XML_ERR_MISPLACED_CDATA_END:
345 errmsg = "Sequence ']]>' not allowed in content\n";
347 case XML_ERR_URI_REQUIRED:
348 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
350 case XML_ERR_PUBID_REQUIRED:
351 errmsg = "PUBLIC, the Public Identifier is missing\n";
353 case XML_ERR_HYPHEN_IN_COMMENT:
354 errmsg = "Comment must not contain '--' (double-hyphen)\n";
356 case XML_ERR_PI_NOT_STARTED:
357 errmsg = "xmlParsePI : no target name\n";
359 case XML_ERR_RESERVED_XML_NAME:
360 errmsg = "Invalid PI name\n";
362 case XML_ERR_NOTATION_NOT_STARTED:
363 errmsg = "NOTATION: Name expected here\n";
365 case XML_ERR_NOTATION_NOT_FINISHED:
366 errmsg = "'>' required to close NOTATION declaration\n";
368 case XML_ERR_VALUE_REQUIRED:
369 errmsg = "Entity value required\n";
371 case XML_ERR_URI_FRAGMENT:
372 errmsg = "Fragment not allowed";
374 case XML_ERR_ATTLIST_NOT_STARTED:
375 errmsg = "'(' required to start ATTLIST enumeration\n";
377 case XML_ERR_NMTOKEN_REQUIRED:
378 errmsg = "NmToken expected in ATTLIST enumeration\n";
380 case XML_ERR_ATTLIST_NOT_FINISHED:
381 errmsg = "')' required to finish ATTLIST enumeration\n";
383 case XML_ERR_MIXED_NOT_STARTED:
384 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
386 case XML_ERR_PCDATA_REQUIRED:
387 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
389 case XML_ERR_ELEMCONTENT_NOT_STARTED:
390 errmsg = "ContentDecl : Name or '(' expected\n";
392 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
393 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
395 case XML_ERR_PEREF_IN_INT_SUBSET:
397 "PEReference: forbidden within markup decl in internal subset\n";
399 case XML_ERR_GT_REQUIRED:
400 errmsg = "expected '>'\n";
402 case XML_ERR_CONDSEC_INVALID:
403 errmsg = "XML conditional section '[' expected\n";
405 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
406 errmsg = "Content error in the external subset\n";
408 case XML_ERR_CONDSEC_INVALID_KEYWORD:
410 "conditional section INCLUDE or IGNORE keyword expected\n";
412 case XML_ERR_CONDSEC_NOT_FINISHED:
413 errmsg = "XML conditional section not closed\n";
415 case XML_ERR_XMLDECL_NOT_STARTED:
416 errmsg = "Text declaration '<?xml' required\n";
418 case XML_ERR_XMLDECL_NOT_FINISHED:
419 errmsg = "parsing XML declaration: '?>' expected\n";
421 case XML_ERR_EXT_ENTITY_STANDALONE:
422 errmsg = "external parsed entities cannot be standalone\n";
424 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
425 errmsg = "EntityRef: expecting ';'\n";
427 case XML_ERR_DOCTYPE_NOT_FINISHED:
428 errmsg = "DOCTYPE improperly terminated\n";
430 case XML_ERR_LTSLASH_REQUIRED:
431 errmsg = "EndTag: '</' not found\n";
433 case XML_ERR_EQUAL_REQUIRED:
434 errmsg = "expected '='\n";
436 case XML_ERR_STRING_NOT_CLOSED:
437 errmsg = "String not closed expecting \" or '\n";
439 case XML_ERR_STRING_NOT_STARTED:
440 errmsg = "String not started expecting ' or \"\n";
442 case XML_ERR_ENCODING_NAME:
443 errmsg = "Invalid XML encoding name\n";
445 case XML_ERR_STANDALONE_VALUE:
446 errmsg = "standalone accepts only 'yes' or 'no'\n";
448 case XML_ERR_DOCUMENT_EMPTY:
449 errmsg = "Document is empty\n";
451 case XML_ERR_DOCUMENT_END:
452 errmsg = "Extra content at the end of the document\n";
454 case XML_ERR_NOT_WELL_BALANCED:
455 errmsg = "chunk is not well balanced\n";
457 case XML_ERR_EXTRA_CONTENT:
458 errmsg = "extra content at the end of well balanced chunk\n";
460 case XML_ERR_VERSION_MISSING:
461 errmsg = "Malformed declaration expecting version\n";
469 errmsg = "Unregistered error message\n";
473 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
474 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
477 ctxt->wellFormed = 0;
478 if (ctxt->recovery == 0)
479 ctxt->disableSAX = 1;
485 * @ctxt: an XML parser context
486 * @error: the error number
487 * @msg: the error message
489 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
492 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
495 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
496 (ctxt->instate == XML_PARSER_EOF))
500 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
501 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
503 ctxt->wellFormed = 0;
504 if (ctxt->recovery == 0)
505 ctxt->disableSAX = 1;
511 * @ctxt: an XML parser context
512 * @error: the error number
513 * @msg: the error message
520 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
521 const char *msg, const xmlChar *str1, const xmlChar *str2)
523 xmlStructuredErrorFunc schannel = NULL;
525 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
526 (ctxt->instate == XML_PARSER_EOF))
528 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
529 (ctxt->sax->initialized == XML_SAX2_MAGIC))
530 schannel = ctxt->sax->serror;
532 __xmlRaiseError(schannel,
533 (ctxt->sax) ? ctxt->sax->warning : NULL,
535 ctxt, NULL, XML_FROM_PARSER, error,
536 XML_ERR_WARNING, NULL, 0,
537 (const char *) str1, (const char *) str2, NULL, 0, 0,
538 msg, (const char *) str1, (const char *) str2);
540 __xmlRaiseError(schannel, NULL, NULL,
541 ctxt, NULL, XML_FROM_PARSER, error,
542 XML_ERR_WARNING, NULL, 0,
543 (const char *) str1, (const char *) str2, NULL, 0, 0,
544 msg, (const char *) str1, (const char *) str2);
550 * @ctxt: an XML parser context
551 * @error: the error number
552 * @msg: the error message
555 * Handle a validity error.
558 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
559 const char *msg, const xmlChar *str1, const xmlChar *str2)
561 xmlStructuredErrorFunc schannel = NULL;
563 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
564 (ctxt->instate == XML_PARSER_EOF))
568 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
569 schannel = ctxt->sax->serror;
572 __xmlRaiseError(schannel,
573 ctxt->vctxt.error, ctxt->vctxt.userData,
574 ctxt, NULL, XML_FROM_DTD, error,
575 XML_ERR_ERROR, NULL, 0, (const char *) str1,
576 (const char *) str2, NULL, 0, 0,
577 msg, (const char *) str1, (const char *) str2);
580 __xmlRaiseError(schannel, NULL, NULL,
581 ctxt, NULL, XML_FROM_DTD, error,
582 XML_ERR_ERROR, NULL, 0, (const char *) str1,
583 (const char *) str2, NULL, 0, 0,
584 msg, (const char *) str1, (const char *) str2);
590 * @ctxt: an XML parser context
591 * @error: the error number
592 * @msg: the error message
593 * @val: an integer value
595 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
599 const char *msg, int val)
601 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
602 (ctxt->instate == XML_PARSER_EOF))
606 __xmlRaiseError(NULL, NULL, NULL,
607 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
608 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
610 ctxt->wellFormed = 0;
611 if (ctxt->recovery == 0)
612 ctxt->disableSAX = 1;
617 * xmlFatalErrMsgStrIntStr:
618 * @ctxt: an XML parser context
619 * @error: the error number
620 * @msg: the error message
621 * @str1: an string info
622 * @val: an integer value
623 * @str2: an string info
625 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
629 const char *msg, const xmlChar *str1, int val,
632 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
633 (ctxt->instate == XML_PARSER_EOF))
637 __xmlRaiseError(NULL, NULL, NULL,
638 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
639 NULL, 0, (const char *) str1, (const char *) str2,
640 NULL, val, 0, msg, str1, val, str2);
642 ctxt->wellFormed = 0;
643 if (ctxt->recovery == 0)
644 ctxt->disableSAX = 1;
650 * @ctxt: an XML parser context
651 * @error: the error number
652 * @msg: the error message
653 * @val: a string value
655 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
658 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
659 const char *msg, const xmlChar * val)
661 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
662 (ctxt->instate == XML_PARSER_EOF))
666 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
667 XML_FROM_PARSER, error, XML_ERR_FATAL,
668 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
671 ctxt->wellFormed = 0;
672 if (ctxt->recovery == 0)
673 ctxt->disableSAX = 1;
679 * @ctxt: an XML parser context
680 * @error: the error number
681 * @msg: the error message
682 * @val: a string value
684 * Handle a non fatal parser error
687 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
688 const char *msg, const xmlChar * val)
690 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
691 (ctxt->instate == XML_PARSER_EOF))
695 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
696 XML_FROM_PARSER, error, XML_ERR_ERROR,
697 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
703 * @ctxt: an XML parser context
704 * @error: the error number
706 * @info1: extra information string
707 * @info2: extra information string
709 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
712 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714 const xmlChar * info1, const xmlChar * info2,
715 const xmlChar * info3)
717 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
718 (ctxt->instate == XML_PARSER_EOF))
722 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
723 XML_ERR_ERROR, NULL, 0, (const char *) info1,
724 (const char *) info2, (const char *) info3, 0, 0, msg,
725 info1, info2, info3);
727 ctxt->nsWellFormed = 0;
732 * @ctxt: an XML parser context
733 * @error: the error number
735 * @info1: extra information string
736 * @info2: extra information string
738 * Handle a namespace warning error
741 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
743 const xmlChar * info1, const xmlChar * info2,
744 const xmlChar * info3)
746 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
747 (ctxt->instate == XML_PARSER_EOF))
749 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
750 XML_ERR_WARNING, NULL, 0, (const char *) info1,
751 (const char *) info2, (const char *) info3, 0, 0, msg,
752 info1, info2, info3);
755 /************************************************************************
757 * Library wide options *
759 ************************************************************************/
763 * @feature: the feature to be examined
765 * Examines if the library has been compiled with a given feature.
767 * Returns a non-zero value if the feature exist, otherwise zero.
768 * Returns zero (0) if the feature does not exist or an unknown
769 * unknown feature is requested, non-zero otherwise.
772 xmlHasFeature(xmlFeature feature)
775 case XML_WITH_THREAD:
776 #ifdef LIBXML_THREAD_ENABLED
782 #ifdef LIBXML_TREE_ENABLED
787 case XML_WITH_OUTPUT:
788 #ifdef LIBXML_OUTPUT_ENABLED
794 #ifdef LIBXML_PUSH_ENABLED
799 case XML_WITH_READER:
800 #ifdef LIBXML_READER_ENABLED
805 case XML_WITH_PATTERN:
806 #ifdef LIBXML_PATTERN_ENABLED
811 case XML_WITH_WRITER:
812 #ifdef LIBXML_WRITER_ENABLED
818 #ifdef LIBXML_SAX1_ENABLED
824 #ifdef LIBXML_FTP_ENABLED
830 #ifdef LIBXML_HTTP_ENABLED
836 #ifdef LIBXML_VALID_ENABLED
842 #ifdef LIBXML_HTML_ENABLED
847 case XML_WITH_LEGACY:
848 #ifdef LIBXML_LEGACY_ENABLED
854 #ifdef LIBXML_C14N_ENABLED
859 case XML_WITH_CATALOG:
860 #ifdef LIBXML_CATALOG_ENABLED
866 #ifdef LIBXML_XPATH_ENABLED
872 #ifdef LIBXML_XPTR_ENABLED
877 case XML_WITH_XINCLUDE:
878 #ifdef LIBXML_XINCLUDE_ENABLED
884 #ifdef LIBXML_ICONV_ENABLED
889 case XML_WITH_ISO8859X:
890 #ifdef LIBXML_ISO8859X_ENABLED
895 case XML_WITH_UNICODE:
896 #ifdef LIBXML_UNICODE_ENABLED
901 case XML_WITH_REGEXP:
902 #ifdef LIBXML_REGEXP_ENABLED
907 case XML_WITH_AUTOMATA:
908 #ifdef LIBXML_AUTOMATA_ENABLED
914 #ifdef LIBXML_EXPR_ENABLED
919 case XML_WITH_SCHEMAS:
920 #ifdef LIBXML_SCHEMAS_ENABLED
925 case XML_WITH_SCHEMATRON:
926 #ifdef LIBXML_SCHEMATRON_ENABLED
931 case XML_WITH_MODULES:
932 #ifdef LIBXML_MODULES_ENABLED
938 #ifdef LIBXML_DEBUG_ENABLED
943 case XML_WITH_DEBUG_MEM:
944 #ifdef DEBUG_MEMORY_LOCATION
949 case XML_WITH_DEBUG_RUN:
950 #ifdef LIBXML_DEBUG_RUNTIME
956 #ifdef LIBXML_ZLIB_ENABLED
962 #ifdef LIBXML_LZMA_ENABLED
968 #ifdef LIBXML_ICU_ENABLED
979 /************************************************************************
981 * SAX2 defaulted attributes handling *
983 ************************************************************************/
987 * @ctxt: an XML parser context
989 * Do the SAX2 detection and specific intialization
992 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
993 if (ctxt == NULL) return;
994 #ifdef LIBXML_SAX1_ENABLED
995 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
996 ((ctxt->sax->startElementNs != NULL) ||
997 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1000 #endif /* LIBXML_SAX1_ENABLED */
1002 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1003 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1004 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1005 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1006 (ctxt->str_xml_ns == NULL)) {
1007 xmlErrMemory(ctxt, NULL);
1011 typedef struct _xmlDefAttrs xmlDefAttrs;
1012 typedef xmlDefAttrs *xmlDefAttrsPtr;
1013 struct _xmlDefAttrs {
1014 int nbAttrs; /* number of defaulted attributes on that element */
1015 int maxAttrs; /* the size of the array */
1016 const xmlChar *values[5]; /* array of localname/prefix/values/external */
1020 * xmlAttrNormalizeSpace:
1021 * @src: the source string
1022 * @dst: the target string
1024 * Normalize the space in non CDATA attribute values:
1025 * If the attribute type is not CDATA, then the XML processor MUST further
1026 * process the normalized attribute value by discarding any leading and
1027 * trailing space (#x20) characters, and by replacing sequences of space
1028 * (#x20) characters by a single space (#x20) character.
1029 * Note that the size of dst need to be at least src, and if one doesn't need
1030 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1031 * passing src as dst is just fine.
1033 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1037 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1039 if ((src == NULL) || (dst == NULL))
1042 while (*src == 0x20) src++;
1045 while (*src == 0x20) src++;
1059 * xmlAttrNormalizeSpace2:
1060 * @src: the source string
1062 * Normalize the space in non CDATA attribute values, a slightly more complex
1063 * front end to avoid allocation problems when running on attribute values
1064 * coming from the input.
1066 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1069 static const xmlChar *
1070 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1073 int remove_head = 0;
1074 int need_realloc = 0;
1077 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1084 while (*cur == 0x20) {
1091 if ((*cur == 0x20) || (*cur == 0)) {
1101 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1103 xmlErrMemory(ctxt, NULL);
1106 xmlAttrNormalizeSpace(ret, ret);
1107 *len = (int) strlen((const char *)ret);
1109 } else if (remove_head) {
1110 *len -= remove_head;
1111 memmove(src, src + remove_head, 1 + *len);
1119 * @ctxt: an XML parser context
1120 * @fullname: the element fullname
1121 * @fullattr: the attribute fullname
1122 * @value: the attribute value
1124 * Add a defaulted attribute for an element
1127 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1128 const xmlChar *fullname,
1129 const xmlChar *fullattr,
1130 const xmlChar *value) {
1131 xmlDefAttrsPtr defaults;
1133 const xmlChar *name;
1134 const xmlChar *prefix;
1137 * Allows to detect attribute redefinitions
1139 if (ctxt->attsSpecial != NULL) {
1140 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1144 if (ctxt->attsDefault == NULL) {
1145 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1146 if (ctxt->attsDefault == NULL)
1151 * split the element name into prefix:localname , the string found
1152 * are within the DTD and then not associated to namespace names.
1154 name = xmlSplitQName3(fullname, &len);
1156 name = xmlDictLookup(ctxt->dict, fullname, -1);
1159 name = xmlDictLookup(ctxt->dict, name, -1);
1160 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1164 * make sure there is some storage
1166 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1167 if (defaults == NULL) {
1168 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1169 (4 * 5) * sizeof(const xmlChar *));
1170 if (defaults == NULL)
1172 defaults->nbAttrs = 0;
1173 defaults->maxAttrs = 4;
1174 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1175 defaults, NULL) < 0) {
1179 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1180 xmlDefAttrsPtr temp;
1182 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1183 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1187 defaults->maxAttrs *= 2;
1188 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1189 defaults, NULL) < 0) {
1196 * Split the element name into prefix:localname , the string found
1197 * are within the DTD and hen not associated to namespace names.
1199 name = xmlSplitQName3(fullattr, &len);
1201 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1204 name = xmlDictLookup(ctxt->dict, name, -1);
1205 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1208 defaults->values[5 * defaults->nbAttrs] = name;
1209 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1210 /* intern the string and precompute the end */
1211 len = xmlStrlen(value);
1212 value = xmlDictLookup(ctxt->dict, value, len);
1213 defaults->values[5 * defaults->nbAttrs + 2] = value;
1214 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1216 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1218 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1219 defaults->nbAttrs++;
1224 xmlErrMemory(ctxt, NULL);
1229 * xmlAddSpecialAttr:
1230 * @ctxt: an XML parser context
1231 * @fullname: the element fullname
1232 * @fullattr: the attribute fullname
1233 * @type: the attribute type
1235 * Register this attribute type
1238 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1239 const xmlChar *fullname,
1240 const xmlChar *fullattr,
1243 if (ctxt->attsSpecial == NULL) {
1244 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1245 if (ctxt->attsSpecial == NULL)
1249 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1252 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1253 (void *) (long) type);
1257 xmlErrMemory(ctxt, NULL);
1262 * xmlCleanSpecialAttrCallback:
1264 * Removes CDATA attributes from the special attribute table
1267 xmlCleanSpecialAttrCallback(void *payload, void *data,
1268 const xmlChar *fullname, const xmlChar *fullattr,
1269 const xmlChar *unused ATTRIBUTE_UNUSED) {
1270 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1272 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1273 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1278 * xmlCleanSpecialAttr:
1279 * @ctxt: an XML parser context
1281 * Trim the list of attributes defined to remove all those of type
1282 * CDATA as they are not special. This call should be done when finishing
1283 * to parse the DTD and before starting to parse the document root.
1286 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1288 if (ctxt->attsSpecial == NULL)
1291 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1293 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1294 xmlHashFree(ctxt->attsSpecial, NULL);
1295 ctxt->attsSpecial = NULL;
1301 * xmlCheckLanguageID:
1302 * @lang: pointer to the string value
1304 * Checks that the value conforms to the LanguageID production:
1306 * NOTE: this is somewhat deprecated, those productions were removed from
1307 * the XML Second edition.
1309 * [33] LanguageID ::= Langcode ('-' Subcode)*
1310 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1311 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1312 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1313 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1314 * [38] Subcode ::= ([a-z] | [A-Z])+
1316 * The current REC reference the sucessors of RFC 1766, currently 5646
1318 * http://www.rfc-editor.org/rfc/rfc5646.txt
1319 * langtag = language
1325 * language = 2*3ALPHA ; shortest ISO 639 code
1326 * ["-" extlang] ; sometimes followed by
1327 * ; extended language subtags
1328 * / 4ALPHA ; or reserved for future use
1329 * / 5*8ALPHA ; or registered language subtag
1331 * extlang = 3ALPHA ; selected ISO 639 codes
1332 * *2("-" 3ALPHA) ; permanently reserved
1334 * script = 4ALPHA ; ISO 15924 code
1336 * region = 2ALPHA ; ISO 3166-1 code
1337 * / 3DIGIT ; UN M.49 code
1339 * variant = 5*8alphanum ; registered variants
1340 * / (DIGIT 3alphanum)
1342 * extension = singleton 1*("-" (2*8alphanum))
1344 * ; Single alphanumerics
1345 * ; "x" reserved for private use
1346 * singleton = DIGIT ; 0 - 9
1352 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1353 * The parser below doesn't try to cope with extension or privateuse
1354 * that could be added but that's not interoperable anyway
1356 * Returns 1 if correct 0 otherwise
1359 xmlCheckLanguageID(const xmlChar * lang)
1361 const xmlChar *cur = lang, *nxt;
1365 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1366 ((cur[0] == 'I') && (cur[1] == '-')) ||
1367 ((cur[0] == 'x') && (cur[1] == '-')) ||
1368 ((cur[0] == 'X') && (cur[1] == '-'))) {
1370 * Still allow IANA code and user code which were coming
1371 * from the previous version of the XML-1.0 specification
1372 * it's deprecated but we should not fail
1375 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1376 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1378 return(cur[0] == 0);
1381 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1382 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1384 if (nxt - cur >= 4) {
1388 if ((nxt - cur > 8) || (nxt[0] != 0))
1394 /* we got an ISO 639 code */
1402 /* now we can have extlang or script or region or variant */
1403 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1406 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1407 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1413 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1417 /* we parsed an extlang */
1425 /* now we can have script or region or variant */
1426 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1429 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1430 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1434 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1438 /* we parsed a script */
1447 /* now we can have region or variant */
1448 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1451 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1452 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1455 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1459 /* we parsed a region */
1468 /* now we can just have a variant */
1469 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1470 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1473 if ((nxt - cur < 5) || (nxt - cur > 8))
1476 /* we parsed a variant */
1482 /* extensions and private use subtags not checked */
1486 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1487 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1494 /************************************************************************
1496 * Parser stacks related functions and macros *
1498 ************************************************************************/
1500 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1501 const xmlChar ** str);
1506 * @ctxt: an XML parser context
1507 * @prefix: the namespace prefix or NULL
1508 * @URL: the namespace name
1510 * Pushes a new parser namespace on top of the ns stack
1512 * Returns -1 in case of error, -2 if the namespace should be discarded
1513 * and the index in the stack otherwise.
1516 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1518 if (ctxt->options & XML_PARSE_NSCLEAN) {
1520 for (i = 0;i < ctxt->nsNr;i += 2) {
1521 if (ctxt->nsTab[i] == prefix) {
1523 if (ctxt->nsTab[i + 1] == URL)
1525 /* out of scope keep it */
1530 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1533 ctxt->nsTab = (const xmlChar **)
1534 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1535 if (ctxt->nsTab == NULL) {
1536 xmlErrMemory(ctxt, NULL);
1540 } else if (ctxt->nsNr >= ctxt->nsMax) {
1541 const xmlChar ** tmp;
1543 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1544 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1546 xmlErrMemory(ctxt, NULL);
1552 ctxt->nsTab[ctxt->nsNr++] = prefix;
1553 ctxt->nsTab[ctxt->nsNr++] = URL;
1554 return (ctxt->nsNr);
1558 * @ctxt: an XML parser context
1559 * @nr: the number to pop
1561 * Pops the top @nr parser prefix/namespace from the ns stack
1563 * Returns the number of namespaces removed
1566 nsPop(xmlParserCtxtPtr ctxt, int nr)
1570 if (ctxt->nsTab == NULL) return(0);
1571 if (ctxt->nsNr < nr) {
1572 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1575 if (ctxt->nsNr <= 0)
1578 for (i = 0;i < nr;i++) {
1580 ctxt->nsTab[ctxt->nsNr] = NULL;
1587 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1588 const xmlChar **atts;
1592 if (ctxt->atts == NULL) {
1593 maxatts = 55; /* allow for 10 attrs by default */
1594 atts = (const xmlChar **)
1595 xmlMalloc(maxatts * sizeof(xmlChar *));
1596 if (atts == NULL) goto mem_error;
1598 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1599 if (attallocs == NULL) goto mem_error;
1600 ctxt->attallocs = attallocs;
1601 ctxt->maxatts = maxatts;
1602 } else if (nr + 5 > ctxt->maxatts) {
1603 maxatts = (nr + 5) * 2;
1604 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1605 maxatts * sizeof(const xmlChar *));
1606 if (atts == NULL) goto mem_error;
1608 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1609 (maxatts / 5) * sizeof(int));
1610 if (attallocs == NULL) goto mem_error;
1611 ctxt->attallocs = attallocs;
1612 ctxt->maxatts = maxatts;
1614 return(ctxt->maxatts);
1616 xmlErrMemory(ctxt, NULL);
1622 * @ctxt: an XML parser context
1623 * @value: the parser input
1625 * Pushes a new parser input on top of the input stack
1627 * Returns -1 in case of error, the index in the stack otherwise
1630 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1632 if ((ctxt == NULL) || (value == NULL))
1634 if (ctxt->inputNr >= ctxt->inputMax) {
1635 ctxt->inputMax *= 2;
1637 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1639 sizeof(ctxt->inputTab[0]));
1640 if (ctxt->inputTab == NULL) {
1641 xmlErrMemory(ctxt, NULL);
1642 xmlFreeInputStream(value);
1643 ctxt->inputMax /= 2;
1648 ctxt->inputTab[ctxt->inputNr] = value;
1649 ctxt->input = value;
1650 return (ctxt->inputNr++);
1654 * @ctxt: an XML parser context
1656 * Pops the top parser input from the input stack
1658 * Returns the input just removed
1661 inputPop(xmlParserCtxtPtr ctxt)
1663 xmlParserInputPtr ret;
1667 if (ctxt->inputNr <= 0)
1670 if (ctxt->inputNr > 0)
1671 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1674 ret = ctxt->inputTab[ctxt->inputNr];
1675 ctxt->inputTab[ctxt->inputNr] = NULL;
1680 * @ctxt: an XML parser context
1681 * @value: the element node
1683 * Pushes a new element node on top of the node stack
1685 * Returns -1 in case of error, the index in the stack otherwise
1688 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1690 if (ctxt == NULL) return(0);
1691 if (ctxt->nodeNr >= ctxt->nodeMax) {
1694 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1696 sizeof(ctxt->nodeTab[0]));
1698 xmlErrMemory(ctxt, NULL);
1701 ctxt->nodeTab = tmp;
1704 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1705 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1706 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1707 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1709 ctxt->instate = XML_PARSER_EOF;
1712 ctxt->nodeTab[ctxt->nodeNr] = value;
1714 return (ctxt->nodeNr++);
1719 * @ctxt: an XML parser context
1721 * Pops the top element node from the node stack
1723 * Returns the node just removed
1726 nodePop(xmlParserCtxtPtr ctxt)
1730 if (ctxt == NULL) return(NULL);
1731 if (ctxt->nodeNr <= 0)
1734 if (ctxt->nodeNr > 0)
1735 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1738 ret = ctxt->nodeTab[ctxt->nodeNr];
1739 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1743 #ifdef LIBXML_PUSH_ENABLED
1746 * @ctxt: an XML parser context
1747 * @value: the element name
1748 * @prefix: the element prefix
1749 * @URI: the element namespace name
1751 * Pushes a new element name/prefix/URL on top of the name stack
1753 * Returns -1 in case of error, the index in the stack otherwise
1756 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1757 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1759 if (ctxt->nameNr >= ctxt->nameMax) {
1760 const xmlChar * *tmp;
1763 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1765 sizeof(ctxt->nameTab[0]));
1770 ctxt->nameTab = tmp;
1771 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1773 sizeof(ctxt->pushTab[0]));
1778 ctxt->pushTab = tmp2;
1780 ctxt->nameTab[ctxt->nameNr] = value;
1782 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1783 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1784 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1785 return (ctxt->nameNr++);
1787 xmlErrMemory(ctxt, NULL);
1792 * @ctxt: an XML parser context
1794 * Pops the top element/prefix/URI name from the name stack
1796 * Returns the name just removed
1798 static const xmlChar *
1799 nameNsPop(xmlParserCtxtPtr ctxt)
1803 if (ctxt->nameNr <= 0)
1806 if (ctxt->nameNr > 0)
1807 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1810 ret = ctxt->nameTab[ctxt->nameNr];
1811 ctxt->nameTab[ctxt->nameNr] = NULL;
1814 #endif /* LIBXML_PUSH_ENABLED */
1818 * @ctxt: an XML parser context
1819 * @value: the element name
1821 * Pushes a new element name on top of the name stack
1823 * Returns -1 in case of error, the index in the stack otherwise
1826 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1828 if (ctxt == NULL) return (-1);
1830 if (ctxt->nameNr >= ctxt->nameMax) {
1831 const xmlChar * *tmp;
1832 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1834 sizeof(ctxt->nameTab[0]));
1838 ctxt->nameTab = tmp;
1841 ctxt->nameTab[ctxt->nameNr] = value;
1843 return (ctxt->nameNr++);
1845 xmlErrMemory(ctxt, NULL);
1850 * @ctxt: an XML parser context
1852 * Pops the top element name from the name stack
1854 * Returns the name just removed
1857 namePop(xmlParserCtxtPtr ctxt)
1861 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1864 if (ctxt->nameNr > 0)
1865 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1868 ret = ctxt->nameTab[ctxt->nameNr];
1869 ctxt->nameTab[ctxt->nameNr] = NULL;
1873 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1874 if (ctxt->spaceNr >= ctxt->spaceMax) {
1877 ctxt->spaceMax *= 2;
1878 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1879 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1881 xmlErrMemory(ctxt, NULL);
1885 ctxt->spaceTab = tmp;
1887 ctxt->spaceTab[ctxt->spaceNr] = val;
1888 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1889 return(ctxt->spaceNr++);
1892 static int spacePop(xmlParserCtxtPtr ctxt) {
1894 if (ctxt->spaceNr <= 0) return(0);
1896 if (ctxt->spaceNr > 0)
1897 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1899 ctxt->space = &ctxt->spaceTab[0];
1900 ret = ctxt->spaceTab[ctxt->spaceNr];
1901 ctxt->spaceTab[ctxt->spaceNr] = -1;
1906 * Macros for accessing the content. Those should be used only by the parser,
1909 * Dirty macros, i.e. one often need to make assumption on the context to
1912 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1913 * To be used with extreme caution since operations consuming
1914 * characters may move the input buffer to a different location !
1915 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1916 * This should be used internally by the parser
1917 * only to compare to ASCII values otherwise it would break when
1918 * running with UTF-8 encoding.
1919 * RAW same as CUR but in the input buffer, bypass any token
1920 * extraction that may have been done
1921 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1922 * to compare on ASCII based substring.
1923 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1924 * strings without newlines within the parser.
1925 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1926 * defined char within the parser.
1927 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1929 * NEXT Skip to the next character, this does the proper decoding
1930 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1931 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1932 * CUR_CHAR(l) returns the current unicode character (int), set l
1933 * to the number of xmlChars used for the encoding [0-5].
1934 * CUR_SCHAR same but operate on a string instead of the context
1935 * COPY_BUF copy the current unicode char to the target buffer, increment
1937 * GROW, SHRINK handling of input buffers
1940 #define RAW (*ctxt->input->cur)
1941 #define CUR (*ctxt->input->cur)
1942 #define NXT(val) ctxt->input->cur[(val)]
1943 #define CUR_PTR ctxt->input->cur
1945 #define CMP4( s, c1, c2, c3, c4 ) \
1946 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1947 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1948 #define CMP5( s, c1, c2, c3, c4, c5 ) \
1949 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1950 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1951 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1952 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1953 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1954 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1955 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1956 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1957 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1958 ((unsigned char *) s)[ 8 ] == c9 )
1959 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1960 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1961 ((unsigned char *) s)[ 9 ] == c10 )
1963 #define SKIP(val) do { \
1964 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1965 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1966 if ((*ctxt->input->cur == 0) && \
1967 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1968 xmlPopInput(ctxt); \
1971 #define SKIPL(val) do { \
1973 for(skipl=0; skipl<val; skipl++) { \
1974 if (*(ctxt->input->cur) == '\n') { \
1975 ctxt->input->line++; ctxt->input->col = 1; \
1976 } else ctxt->input->col++; \
1978 ctxt->input->cur++; \
1980 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1981 if ((*ctxt->input->cur == 0) && \
1982 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1983 xmlPopInput(ctxt); \
1986 #define SHRINK if ((ctxt->progressive == 0) && \
1987 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1988 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1991 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1992 xmlParserInputShrink(ctxt->input);
1993 if ((*ctxt->input->cur == 0) &&
1994 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1998 #define GROW if ((ctxt->progressive == 0) && \
1999 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2002 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2003 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2004 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2005 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2009 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2011 #define NEXT xmlNextChar(ctxt)
2014 ctxt->input->col++; \
2015 ctxt->input->cur++; \
2017 if (*ctxt->input->cur == 0) \
2018 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2021 #define NEXTL(l) do { \
2022 if (*(ctxt->input->cur) == '\n') { \
2023 ctxt->input->line++; ctxt->input->col = 1; \
2024 } else ctxt->input->col++; \
2025 ctxt->input->cur += l; \
2026 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2029 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2030 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2032 #define COPY_BUF(l,b,i,v) \
2033 if (l == 1) b[i++] = (xmlChar) v; \
2034 else i += xmlCopyCharMultiByte(&b[i],v)
2037 * xmlSkipBlankChars:
2038 * @ctxt: the XML parser context
2040 * skip all blanks character found at that point in the input streams.
2041 * It pops up finished entities in the process if allowable at that point.
2043 * Returns the number of space chars skipped
2047 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2051 * It's Okay to use CUR/NEXT here since all the blanks are on
2054 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2057 * if we are in the document content, go really fast
2059 cur = ctxt->input->cur;
2060 while (IS_BLANK_CH(*cur)) {
2062 ctxt->input->line++; ctxt->input->col = 1;
2067 ctxt->input->cur = cur;
2068 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2069 cur = ctxt->input->cur;
2072 ctxt->input->cur = cur;
2077 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2082 while ((cur == 0) && (ctxt->inputNr > 1) &&
2083 (ctxt->instate != XML_PARSER_COMMENT)) {
2088 * Need to handle support of entities branching here
2090 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2091 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2096 /************************************************************************
2098 * Commodity functions to handle entities *
2100 ************************************************************************/
2104 * @ctxt: an XML parser context
2106 * xmlPopInput: the current input pointed by ctxt->input came to an end
2107 * pop it and return the next char.
2109 * Returns the current xmlChar in the parser context
2112 xmlPopInput(xmlParserCtxtPtr ctxt) {
2113 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2114 if (xmlParserDebugEntities)
2115 xmlGenericError(xmlGenericErrorContext,
2116 "Popping input %d\n", ctxt->inputNr);
2117 xmlFreeInputStream(inputPop(ctxt));
2118 if ((*ctxt->input->cur == 0) &&
2119 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2120 return(xmlPopInput(ctxt));
2126 * @ctxt: an XML parser context
2127 * @input: an XML parser input fragment (entity, XML fragment ...).
2129 * xmlPushInput: switch to a new input stream which is stacked on top
2130 * of the previous one(s).
2131 * Returns -1 in case of error or the index in the input stack
2134 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2136 if (input == NULL) return(-1);
2138 if (xmlParserDebugEntities) {
2139 if ((ctxt->input != NULL) && (ctxt->input->filename))
2140 xmlGenericError(xmlGenericErrorContext,
2141 "%s(%d): ", ctxt->input->filename,
2143 xmlGenericError(xmlGenericErrorContext,
2144 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2146 ret = inputPush(ctxt, input);
2153 * @ctxt: an XML parser context
2155 * parse Reference declarations
2157 * [66] CharRef ::= '&#' [0-9]+ ';' |
2158 * '&#x' [0-9a-fA-F]+ ';'
2160 * [ WFC: Legal Character ]
2161 * Characters referred to using character references must match the
2162 * production for Char.
2164 * Returns the value parsed (as an int), 0 in case of error
2167 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2168 unsigned int val = 0;
2170 unsigned int outofrange = 0;
2173 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2175 if ((RAW == '&') && (NXT(1) == '#') &&
2179 while (RAW != ';') { /* loop blocked by count */
2184 if ((RAW >= '0') && (RAW <= '9'))
2185 val = val * 16 + (CUR - '0');
2186 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2187 val = val * 16 + (CUR - 'a') + 10;
2188 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2189 val = val * 16 + (CUR - 'A') + 10;
2191 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2202 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2207 } else if ((RAW == '&') && (NXT(1) == '#')) {
2210 while (RAW != ';') { /* loop blocked by count */
2215 if ((RAW >= '0') && (RAW <= '9'))
2216 val = val * 10 + (CUR - '0');
2218 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2229 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2235 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2239 * [ WFC: Legal Character ]
2240 * Characters referred to using character references must match the
2241 * production for Char.
2243 if ((IS_CHAR(val) && (outofrange == 0))) {
2246 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2247 "xmlParseCharRef: invalid xmlChar value %d\n",
2254 * xmlParseStringCharRef:
2255 * @ctxt: an XML parser context
2256 * @str: a pointer to an index in the string
2258 * parse Reference declarations, variant parsing from a string rather
2259 * than an an input flow.
2261 * [66] CharRef ::= '&#' [0-9]+ ';' |
2262 * '&#x' [0-9a-fA-F]+ ';'
2264 * [ WFC: Legal Character ]
2265 * Characters referred to using character references must match the
2266 * production for Char.
2268 * Returns the value parsed (as an int), 0 in case of error, str will be
2269 * updated to the current value of the index
2272 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2275 unsigned int val = 0;
2276 unsigned int outofrange = 0;
2278 if ((str == NULL) || (*str == NULL)) return(0);
2281 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2284 while (cur != ';') { /* Non input consuming loop */
2285 if ((cur >= '0') && (cur <= '9'))
2286 val = val * 16 + (cur - '0');
2287 else if ((cur >= 'a') && (cur <= 'f'))
2288 val = val * 16 + (cur - 'a') + 10;
2289 else if ((cur >= 'A') && (cur <= 'F'))
2290 val = val * 16 + (cur - 'A') + 10;
2292 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2304 } else if ((cur == '&') && (ptr[1] == '#')){
2307 while (cur != ';') { /* Non input consuming loops */
2308 if ((cur >= '0') && (cur <= '9'))
2309 val = val * 10 + (cur - '0');
2311 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2324 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2330 * [ WFC: Legal Character ]
2331 * Characters referred to using character references must match the
2332 * production for Char.
2334 if ((IS_CHAR(val) && (outofrange == 0))) {
2337 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2338 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2345 * xmlNewBlanksWrapperInputStream:
2346 * @ctxt: an XML parser context
2347 * @entity: an Entity pointer
2349 * Create a new input stream for wrapping
2350 * blanks around a PEReference
2352 * Returns the new input stream or NULL
2355 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2357 static xmlParserInputPtr
2358 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2359 xmlParserInputPtr input;
2362 if (entity == NULL) {
2363 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2364 "xmlNewBlanksWrapperInputStream entity\n");
2367 if (xmlParserDebugEntities)
2368 xmlGenericError(xmlGenericErrorContext,
2369 "new blanks wrapper for entity: %s\n", entity->name);
2370 input = xmlNewInputStream(ctxt);
2371 if (input == NULL) {
2374 length = xmlStrlen(entity->name) + 5;
2375 buffer = xmlMallocAtomic(length);
2376 if (buffer == NULL) {
2377 xmlErrMemory(ctxt, NULL);
2383 buffer [length-3] = ';';
2384 buffer [length-2] = ' ';
2385 buffer [length-1] = 0;
2386 memcpy(buffer + 2, entity->name, length - 5);
2387 input->free = deallocblankswrapper;
2388 input->base = buffer;
2389 input->cur = buffer;
2390 input->length = length;
2391 input->end = &buffer[length];
2396 * xmlParserHandlePEReference:
2397 * @ctxt: the parser context
2399 * [69] PEReference ::= '%' Name ';'
2401 * [ WFC: No Recursion ]
2402 * A parsed entity must not contain a recursive
2403 * reference to itself, either directly or indirectly.
2405 * [ WFC: Entity Declared ]
2406 * In a document without any DTD, a document with only an internal DTD
2407 * subset which contains no parameter entity references, or a document
2408 * with "standalone='yes'", ... ... The declaration of a parameter
2409 * entity must precede any reference to it...
2411 * [ VC: Entity Declared ]
2412 * In a document with an external subset or external parameter entities
2413 * with "standalone='no'", ... ... The declaration of a parameter entity
2414 * must precede any reference to it...
2417 * Parameter-entity references may only appear in the DTD.
2418 * NOTE: misleading but this is handled.
2420 * A PEReference may have been detected in the current input stream
2421 * the handling is done accordingly to
2422 * http://www.w3.org/TR/REC-xml#entproc
2424 * - Included in literal in entity values
2425 * - Included as Parameter Entity reference within DTDs
2428 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2429 const xmlChar *name;
2430 xmlEntityPtr entity = NULL;
2431 xmlParserInputPtr input;
2433 if (RAW != '%') return;
2434 switch(ctxt->instate) {
2435 case XML_PARSER_CDATA_SECTION:
2437 case XML_PARSER_COMMENT:
2439 case XML_PARSER_START_TAG:
2441 case XML_PARSER_END_TAG:
2443 case XML_PARSER_EOF:
2444 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2446 case XML_PARSER_PROLOG:
2447 case XML_PARSER_START:
2448 case XML_PARSER_MISC:
2449 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2451 case XML_PARSER_ENTITY_DECL:
2452 case XML_PARSER_CONTENT:
2453 case XML_PARSER_ATTRIBUTE_VALUE:
2455 case XML_PARSER_SYSTEM_LITERAL:
2456 case XML_PARSER_PUBLIC_LITERAL:
2457 /* we just ignore it there */
2459 case XML_PARSER_EPILOG:
2460 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2462 case XML_PARSER_ENTITY_VALUE:
2464 * NOTE: in the case of entity values, we don't do the
2465 * substitution here since we need the literal
2466 * entity value to be able to save the internal
2467 * subset of the document.
2468 * This will be handled by xmlStringDecodeEntities
2471 case XML_PARSER_DTD:
2473 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2474 * In the internal DTD subset, parameter-entity references
2475 * can occur only where markup declarations can occur, not
2476 * within markup declarations.
2477 * In that case this is handled in xmlParseMarkupDecl
2479 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2481 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2484 case XML_PARSER_IGNORE:
2489 name = xmlParseName(ctxt);
2490 if (xmlParserDebugEntities)
2491 xmlGenericError(xmlGenericErrorContext,
2492 "PEReference: %s\n", name);
2494 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2498 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2499 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2500 if (entity == NULL) {
2503 * [ WFC: Entity Declared ]
2504 * In a document without any DTD, a document with only an
2505 * internal DTD subset which contains no parameter entity
2506 * references, or a document with "standalone='yes'", ...
2507 * ... The declaration of a parameter entity must precede
2508 * any reference to it...
2510 if ((ctxt->standalone == 1) ||
2511 ((ctxt->hasExternalSubset == 0) &&
2512 (ctxt->hasPErefs == 0))) {
2513 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2514 "PEReference: %%%s; not found\n", name);
2517 * [ VC: Entity Declared ]
2518 * In a document with an external subset or external
2519 * parameter entities with "standalone='no'", ...
2520 * ... The declaration of a parameter entity must precede
2521 * any reference to it...
2523 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2524 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2525 "PEReference: %%%s; not found\n",
2528 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2529 "PEReference: %%%s; not found\n",
2533 } else if (ctxt->input->free != deallocblankswrapper) {
2534 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2535 if (xmlPushInput(ctxt, input) < 0)
2538 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2539 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2541 xmlCharEncoding enc;
2544 * handle the extra spaces added before and after
2545 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2546 * this is done independently.
2548 input = xmlNewEntityInputStream(ctxt, entity);
2549 if (xmlPushInput(ctxt, input) < 0)
2553 * Get the 4 first bytes and decode the charset
2554 * if enc != XML_CHAR_ENCODING_NONE
2555 * plug some encoding conversion routines.
2556 * Note that, since we may have some non-UTF8
2557 * encoding (like UTF16, bug 135229), the 'length'
2558 * is not known, but we can calculate based upon
2559 * the amount of data in the buffer.
2562 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2567 enc = xmlDetectCharEncoding(start, 4);
2568 if (enc != XML_CHAR_ENCODING_NONE) {
2569 xmlSwitchEncoding(ctxt, enc);
2573 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2574 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2575 (IS_BLANK_CH(NXT(5)))) {
2576 xmlParseTextDecl(ctxt);
2579 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2580 "PEReference: %s is not a parameter entity\n",
2585 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2591 * Macro used to grow the current buffer.
2593 #define growBuffer(buffer, n) { \
2595 buffer##_size *= 2; \
2596 buffer##_size += n; \
2598 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
2599 if (tmp == NULL) goto mem_error; \
2604 * xmlStringLenDecodeEntities:
2605 * @ctxt: the parser context
2606 * @str: the input string
2607 * @len: the string length
2608 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2609 * @end: an end marker xmlChar, 0 if none
2610 * @end2: an end marker xmlChar, 0 if none
2611 * @end3: an end marker xmlChar, 0 if none
2613 * Takes a entity string content and process to do the adequate substitutions.
2615 * [67] Reference ::= EntityRef | CharRef
2617 * [69] PEReference ::= '%' Name ';'
2619 * Returns A newly allocated string with the substitution done. The caller
2620 * must deallocate it !
2623 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2624 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2625 xmlChar *buffer = NULL;
2626 int buffer_size = 0;
2628 xmlChar *current = NULL;
2629 xmlChar *rep = NULL;
2630 const xmlChar *last;
2635 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2639 if (((ctxt->depth > 40) &&
2640 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2641 (ctxt->depth > 1024)) {
2642 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2647 * allocate a translation buffer.
2649 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2650 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2651 if (buffer == NULL) goto mem_error;
2654 * OK loop until we reach one of the ending char or a size limit.
2655 * we are operating on already parsed values.
2658 c = CUR_SCHAR(str, l);
2661 while ((c != 0) && (c != end) && /* non input consuming loop */
2662 (c != end2) && (c != end3)) {
2665 if ((c == '&') && (str[1] == '#')) {
2666 int val = xmlParseStringCharRef(ctxt, &str);
2668 COPY_BUF(0,buffer,nbchars,val);
2670 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2671 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2673 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2674 if (xmlParserDebugEntities)
2675 xmlGenericError(xmlGenericErrorContext,
2676 "String decoding Entity Reference: %.30s\n",
2678 ent = xmlParseStringEntityRef(ctxt, &str);
2679 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2680 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2683 ctxt->nbentities += ent->checked;
2684 if ((ent != NULL) &&
2685 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2686 if (ent->content != NULL) {
2687 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2688 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2689 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2692 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2693 "predefined entity has no content\n");
2695 } else if ((ent != NULL) && (ent->content != NULL)) {
2697 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2703 while (*current != 0) { /* non input consuming loop */
2704 buffer[nbchars++] = *current++;
2706 buffer_size - XML_PARSER_BUFFER_SIZE) {
2707 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2709 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2715 } else if (ent != NULL) {
2716 int i = xmlStrlen(ent->name);
2717 const xmlChar *cur = ent->name;
2719 buffer[nbchars++] = '&';
2720 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2721 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724 buffer[nbchars++] = *cur++;
2725 buffer[nbchars++] = ';';
2727 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2728 if (xmlParserDebugEntities)
2729 xmlGenericError(xmlGenericErrorContext,
2730 "String decoding PE Reference: %.30s\n", str);
2731 ent = xmlParseStringPEReference(ctxt, &str);
2732 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2735 ctxt->nbentities += ent->checked;
2737 if (ent->content == NULL) {
2738 xmlLoadEntityContent(ctxt, ent);
2741 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2746 while (*current != 0) { /* non input consuming loop */
2747 buffer[nbchars++] = *current++;
2749 buffer_size - XML_PARSER_BUFFER_SIZE) {
2750 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2752 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2760 COPY_BUF(l,buffer,nbchars,c);
2762 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2763 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2767 c = CUR_SCHAR(str, l);
2771 buffer[nbchars] = 0;
2775 xmlErrMemory(ctxt, NULL);
2785 * xmlStringDecodeEntities:
2786 * @ctxt: the parser context
2787 * @str: the input string
2788 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2789 * @end: an end marker xmlChar, 0 if none
2790 * @end2: an end marker xmlChar, 0 if none
2791 * @end3: an end marker xmlChar, 0 if none
2793 * Takes a entity string content and process to do the adequate substitutions.
2795 * [67] Reference ::= EntityRef | CharRef
2797 * [69] PEReference ::= '%' Name ';'
2799 * Returns A newly allocated string with the substitution done. The caller
2800 * must deallocate it !
2803 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2804 xmlChar end, xmlChar end2, xmlChar end3) {
2805 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2806 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2810 /************************************************************************
2812 * Commodity functions, cleanup needed ? *
2814 ************************************************************************/
2818 * @ctxt: an XML parser context
2820 * @len: the size of @str
2821 * @blank_chars: we know the chars are blanks
2823 * Is this a sequence of blank chars that one can ignore ?
2825 * Returns 1 if ignorable 0 otherwise.
2828 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2831 xmlNodePtr lastChild;
2834 * Don't spend time trying to differentiate them, the same callback is
2837 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2841 * Check for xml:space value.
2843 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2844 (*(ctxt->space) == -2))
2848 * Check that the string is made of blanks
2850 if (blank_chars == 0) {
2851 for (i = 0;i < len;i++)
2852 if (!(IS_BLANK_CH(str[i]))) return(0);
2856 * Look if the element is mixed content in the DTD if available
2858 if (ctxt->node == NULL) return(0);
2859 if (ctxt->myDoc != NULL) {
2860 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2861 if (ret == 0) return(1);
2862 if (ret == 1) return(0);
2866 * Otherwise, heuristic :-\
2868 if ((RAW != '<') && (RAW != 0xD)) return(0);
2869 if ((ctxt->node->children == NULL) &&
2870 (RAW == '<') && (NXT(1) == '/')) return(0);
2872 lastChild = xmlGetLastChild(ctxt->node);
2873 if (lastChild == NULL) {
2874 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2875 (ctxt->node->content != NULL)) return(0);
2876 } else if (xmlNodeIsText(lastChild))
2878 else if ((ctxt->node->children != NULL) &&
2879 (xmlNodeIsText(ctxt->node->children)))
2884 /************************************************************************
2886 * Extra stuff for namespace support *
2887 * Relates to http://www.w3.org/TR/WD-xml-names *
2889 ************************************************************************/
2893 * @ctxt: an XML parser context
2894 * @name: an XML parser context
2895 * @prefix: a xmlChar **
2897 * parse an UTF8 encoded XML qualified name string
2899 * [NS 5] QName ::= (Prefix ':')? LocalPart
2901 * [NS 6] Prefix ::= NCName
2903 * [NS 7] LocalPart ::= NCName
2905 * Returns the local part, and prefix is updated
2906 * to get the Prefix if any.
2910 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2911 xmlChar buf[XML_MAX_NAMELEN + 5];
2912 xmlChar *buffer = NULL;
2914 int max = XML_MAX_NAMELEN;
2915 xmlChar *ret = NULL;
2916 const xmlChar *cur = name;
2919 if (prefix == NULL) return(NULL);
2922 if (cur == NULL) return(NULL);
2924 #ifndef XML_XML_NAMESPACE
2925 /* xml: prefix is not really a namespace */
2926 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2927 (cur[2] == 'l') && (cur[3] == ':'))
2928 return(xmlStrdup(name));
2931 /* nasty but well=formed */
2933 return(xmlStrdup(name));
2936 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2942 * Okay someone managed to make a huge name, so he's ready to pay
2943 * for the processing speed.
2947 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2948 if (buffer == NULL) {
2949 xmlErrMemory(ctxt, NULL);
2952 memcpy(buffer, buf, len);
2953 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2954 if (len + 10 > max) {
2958 tmp = (xmlChar *) xmlRealloc(buffer,
2959 max * sizeof(xmlChar));
2962 xmlErrMemory(ctxt, NULL);
2973 if ((c == ':') && (*cur == 0)) {
2977 return(xmlStrdup(name));
2981 ret = xmlStrndup(buf, len);
2985 max = XML_MAX_NAMELEN;
2993 return(xmlStrndup(BAD_CAST "", 0));
2998 * Check that the first character is proper to start
3001 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3002 ((c >= 0x41) && (c <= 0x5A)) ||
3003 (c == '_') || (c == ':'))) {
3005 int first = CUR_SCHAR(cur, l);
3007 if (!IS_LETTER(first) && (first != '_')) {
3008 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3009 "Name %s is not XML Namespace compliant\n",
3015 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3021 * Okay someone managed to make a huge name, so he's ready to pay
3022 * for the processing speed.
3026 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3027 if (buffer == NULL) {
3028 xmlErrMemory(ctxt, NULL);
3031 memcpy(buffer, buf, len);
3032 while (c != 0) { /* tested bigname2.xml */
3033 if (len + 10 > max) {
3037 tmp = (xmlChar *) xmlRealloc(buffer,
3038 max * sizeof(xmlChar));
3040 xmlErrMemory(ctxt, NULL);
3053 ret = xmlStrndup(buf, len);
3062 /************************************************************************
3064 * The parser itself *
3065 * Relates to http://www.w3.org/TR/REC-xml *
3067 ************************************************************************/
3069 /************************************************************************
3071 * Routines to parse Name, NCName and NmToken *
3073 ************************************************************************/
3075 static unsigned long nbParseName = 0;
3076 static unsigned long nbParseNmToken = 0;
3077 static unsigned long nbParseNCName = 0;
3078 static unsigned long nbParseNCNameComplex = 0;
3079 static unsigned long nbParseNameComplex = 0;
3080 static unsigned long nbParseStringName = 0;
3084 * The two following functions are related to the change of accepted
3085 * characters for Name and NmToken in the Revision 5 of XML-1.0
3086 * They correspond to the modified production [4] and the new production [4a]
3087 * changes in that revision. Also note that the macros used for the
3088 * productions Letter, Digit, CombiningChar and Extender are not needed
3090 * We still keep compatibility to pre-revision5 parsing semantic if the
3091 * new XML_PARSE_OLD10 option is given to the parser.
3094 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3095 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3097 * Use the new checks of production [4] [4a] amd [5] of the
3098 * Update 5 of XML-1.0
3100 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3101 (((c >= 'a') && (c <= 'z')) ||
3102 ((c >= 'A') && (c <= 'Z')) ||
3103 (c == '_') || (c == ':') ||
3104 ((c >= 0xC0) && (c <= 0xD6)) ||
3105 ((c >= 0xD8) && (c <= 0xF6)) ||
3106 ((c >= 0xF8) && (c <= 0x2FF)) ||
3107 ((c >= 0x370) && (c <= 0x37D)) ||
3108 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3109 ((c >= 0x200C) && (c <= 0x200D)) ||
3110 ((c >= 0x2070) && (c <= 0x218F)) ||
3111 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3112 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3113 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3114 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3115 ((c >= 0x10000) && (c <= 0xEFFFF))))
3118 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3125 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3126 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3128 * Use the new checks of production [4] [4a] amd [5] of the
3129 * Update 5 of XML-1.0
3131 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3132 (((c >= 'a') && (c <= 'z')) ||
3133 ((c >= 'A') && (c <= 'Z')) ||
3134 ((c >= '0') && (c <= '9')) || /* !start */
3135 (c == '_') || (c == ':') ||
3136 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3137 ((c >= 0xC0) && (c <= 0xD6)) ||
3138 ((c >= 0xD8) && (c <= 0xF6)) ||
3139 ((c >= 0xF8) && (c <= 0x2FF)) ||
3140 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3141 ((c >= 0x370) && (c <= 0x37D)) ||
3142 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3143 ((c >= 0x200C) && (c <= 0x200D)) ||
3144 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3145 ((c >= 0x2070) && (c <= 0x218F)) ||
3146 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3147 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3148 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3149 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3150 ((c >= 0x10000) && (c <= 0xEFFFF))))
3153 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3154 (c == '.') || (c == '-') ||
3155 (c == '_') || (c == ':') ||
3156 (IS_COMBINING(c)) ||
3163 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3164 int *len, int *alloc, int normalize);
3166 static const xmlChar *
3167 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3173 nbParseNameComplex++;
3177 * Handler for more complex cases
3181 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3183 * Use the new checks of production [4] [4a] amd [5] of the
3184 * Update 5 of XML-1.0
3186 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3187 (!(((c >= 'a') && (c <= 'z')) ||
3188 ((c >= 'A') && (c <= 'Z')) ||
3189 (c == '_') || (c == ':') ||
3190 ((c >= 0xC0) && (c <= 0xD6)) ||
3191 ((c >= 0xD8) && (c <= 0xF6)) ||
3192 ((c >= 0xF8) && (c <= 0x2FF)) ||
3193 ((c >= 0x370) && (c <= 0x37D)) ||
3194 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3195 ((c >= 0x200C) && (c <= 0x200D)) ||
3196 ((c >= 0x2070) && (c <= 0x218F)) ||
3197 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3198 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3199 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3200 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3201 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3207 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3208 (((c >= 'a') && (c <= 'z')) ||
3209 ((c >= 'A') && (c <= 'Z')) ||
3210 ((c >= '0') && (c <= '9')) || /* !start */
3211 (c == '_') || (c == ':') ||
3212 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3213 ((c >= 0xC0) && (c <= 0xD6)) ||
3214 ((c >= 0xD8) && (c <= 0xF6)) ||
3215 ((c >= 0xF8) && (c <= 0x2FF)) ||
3216 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3217 ((c >= 0x370) && (c <= 0x37D)) ||
3218 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3219 ((c >= 0x200C) && (c <= 0x200D)) ||
3220 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3221 ((c >= 0x2070) && (c <= 0x218F)) ||
3222 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3223 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3224 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3225 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3226 ((c >= 0x10000) && (c <= 0xEFFFF))
3228 if (count++ > 100) {
3237 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3238 (!IS_LETTER(c) && (c != '_') &&
3246 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3247 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3248 (c == '.') || (c == '-') ||
3249 (c == '_') || (c == ':') ||
3250 (IS_COMBINING(c)) ||
3251 (IS_EXTENDER(c)))) {
3252 if (count++ > 100) {
3261 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3262 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3263 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3268 * @ctxt: an XML parser context
3270 * parse an XML name.
3272 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3273 * CombiningChar | Extender
3275 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3277 * [6] Names ::= Name (#x20 Name)*
3279 * Returns the Name parsed or NULL
3283 xmlParseName(xmlParserCtxtPtr ctxt) {
3295 * Accelerator for simple ASCII names
3297 in = ctxt->input->cur;
3298 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3299 ((*in >= 0x41) && (*in <= 0x5A)) ||
3300 (*in == '_') || (*in == ':')) {
3302 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3303 ((*in >= 0x41) && (*in <= 0x5A)) ||
3304 ((*in >= 0x30) && (*in <= 0x39)) ||
3305 (*in == '_') || (*in == '-') ||
3306 (*in == ':') || (*in == '.'))
3308 if ((*in > 0) && (*in < 0x80)) {
3309 count = in - ctxt->input->cur;
3310 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3311 ctxt->input->cur = in;
3312 ctxt->nbChars += count;
3313 ctxt->input->col += count;
3315 xmlErrMemory(ctxt, NULL);
3319 /* accelerator for special cases */
3320 return(xmlParseNameComplex(ctxt));
3323 static const xmlChar *
3324 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3330 nbParseNCNameComplex++;
3334 * Handler for more complex cases
3338 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3339 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3343 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3344 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3345 if (count++ > 100) {
3353 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3358 * @ctxt: an XML parser context
3359 * @len: lenght of the string parsed
3361 * parse an XML name.
3363 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3364 * CombiningChar | Extender
3366 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3368 * Returns the Name parsed or NULL
3371 static const xmlChar *
3372 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3382 * Accelerator for simple ASCII names
3384 in = ctxt->input->cur;
3385 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3386 ((*in >= 0x41) && (*in <= 0x5A)) ||
3389 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3390 ((*in >= 0x41) && (*in <= 0x5A)) ||
3391 ((*in >= 0x30) && (*in <= 0x39)) ||
3392 (*in == '_') || (*in == '-') ||
3395 if ((*in > 0) && (*in < 0x80)) {
3396 count = in - ctxt->input->cur;
3397 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3398 ctxt->input->cur = in;
3399 ctxt->nbChars += count;
3400 ctxt->input->col += count;
3402 xmlErrMemory(ctxt, NULL);
3407 return(xmlParseNCNameComplex(ctxt));
3411 * xmlParseNameAndCompare:
3412 * @ctxt: an XML parser context
3414 * parse an XML name and compares for match
3415 * (specialized for endtag parsing)
3417 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3418 * and the name for mismatch
3421 static const xmlChar *
3422 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3423 register const xmlChar *cmp = other;
3424 register const xmlChar *in;
3429 in = ctxt->input->cur;
3430 while (*in != 0 && *in == *cmp) {
3435 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3437 ctxt->input->cur = in;
3438 return (const xmlChar*) 1;
3440 /* failure (or end of input buffer), check with full function */
3441 ret = xmlParseName (ctxt);
3442 /* strings coming from the dictionnary direct compare possible */
3444 return (const xmlChar*) 1;
3450 * xmlParseStringName:
3451 * @ctxt: an XML parser context
3452 * @str: a pointer to the string pointer (IN/OUT)
3454 * parse an XML name.
3456 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3457 * CombiningChar | Extender
3459 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3461 * [6] Names ::= Name (#x20 Name)*
3463 * Returns the Name parsed or NULL. The @str pointer
3464 * is updated to the current location in the string.
3468 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3469 xmlChar buf[XML_MAX_NAMELEN + 5];
3470 const xmlChar *cur = *str;
3475 nbParseStringName++;
3478 c = CUR_SCHAR(cur, l);
3479 if (!xmlIsNameStartChar(ctxt, c)) {
3483 COPY_BUF(l,buf,len,c);
3485 c = CUR_SCHAR(cur, l);
3486 while (xmlIsNameChar(ctxt, c)) {
3487 COPY_BUF(l,buf,len,c);
3489 c = CUR_SCHAR(cur, l);
3490 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3492 * Okay someone managed to make a huge name, so he's ready to pay
3493 * for the processing speed.
3498 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3499 if (buffer == NULL) {
3500 xmlErrMemory(ctxt, NULL);
3503 memcpy(buffer, buf, len);
3504 while (xmlIsNameChar(ctxt, c)) {
3505 if (len + 10 > max) {
3508 tmp = (xmlChar *) xmlRealloc(buffer,
3509 max * sizeof(xmlChar));
3511 xmlErrMemory(ctxt, NULL);
3517 COPY_BUF(l,buffer,len,c);
3519 c = CUR_SCHAR(cur, l);
3527 return(xmlStrndup(buf, len));
3532 * @ctxt: an XML parser context
3534 * parse an XML Nmtoken.
3536 * [7] Nmtoken ::= (NameChar)+
3538 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3540 * Returns the Nmtoken parsed or NULL
3544 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3545 xmlChar buf[XML_MAX_NAMELEN + 5];
3557 while (xmlIsNameChar(ctxt, c)) {
3558 if (count++ > 100) {
3562 COPY_BUF(l,buf,len,c);
3565 if (len >= XML_MAX_NAMELEN) {
3567 * Okay someone managed to make a huge token, so he's ready to pay
3568 * for the processing speed.
3573 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3574 if (buffer == NULL) {
3575 xmlErrMemory(ctxt, NULL);
3578 memcpy(buffer, buf, len);
3579 while (xmlIsNameChar(ctxt, c)) {
3580 if (count++ > 100) {
3584 if (len + 10 > max) {
3588 tmp = (xmlChar *) xmlRealloc(buffer,
3589 max * sizeof(xmlChar));
3591 xmlErrMemory(ctxt, NULL);
3597 COPY_BUF(l,buffer,len,c);
3607 return(xmlStrndup(buf, len));
3611 * xmlParseEntityValue:
3612 * @ctxt: an XML parser context
3613 * @orig: if non-NULL store a copy of the original entity value
3615 * parse a value for ENTITY declarations
3617 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3618 * "'" ([^%&'] | PEReference | Reference)* "'"
3620 * Returns the EntityValue parsed with reference substituted or NULL
3624 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3625 xmlChar *buf = NULL;
3627 int size = XML_PARSER_BUFFER_SIZE;
3630 xmlChar *ret = NULL;
3631 const xmlChar *cur = NULL;
3632 xmlParserInputPtr input;
3634 if (RAW == '"') stop = '"';
3635 else if (RAW == '\'') stop = '\'';
3637 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3640 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3642 xmlErrMemory(ctxt, NULL);
3647 * The content of the entity definition is copied in a buffer.
3650 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3651 input = ctxt->input;
3656 * NOTE: 4.4.5 Included in Literal
3657 * When a parameter entity reference appears in a literal entity
3658 * value, ... a single or double quote character in the replacement
3659 * text is always treated as a normal data character and will not
3660 * terminate the literal.
3661 * In practice it means we stop the loop only when back at parsing
3662 * the initial entity and the quote is found
3664 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
3665 (ctxt->input != input))) {
3666 if (len + 5 >= size) {
3670 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3672 xmlErrMemory(ctxt, NULL);
3678 COPY_BUF(l,buf,len,c);
3681 * Pop-up of finished entities.
3683 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3696 * Raise problem w.r.t. '&' and '%' being used in non-entities
3697 * reference constructs. Note Charref will be handled in
3698 * xmlStringDecodeEntities()
3701 while (*cur != 0) { /* non input consuming */
3702 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3707 name = xmlParseStringName(ctxt, &cur);
3708 if ((name == NULL) || (*cur != ';')) {
3709 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3710 "EntityValue: '%c' forbidden except for entities references\n",
3713 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3714 (ctxt->inputNr == 1)) {
3715 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3726 * Then PEReference entities are substituted.
3729 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3734 * NOTE: 4.4.7 Bypassed
3735 * When a general entity reference appears in the EntityValue in
3736 * an entity declaration, it is bypassed and left as is.
3737 * so XML_SUBSTITUTE_REF is not set here.
3739 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3751 * xmlParseAttValueComplex:
3752 * @ctxt: an XML parser context
3753 * @len: the resulting attribute len
3754 * @normalize: wether to apply the inner normalization
3756 * parse a value for an attribute, this is the fallback function
3757 * of xmlParseAttValue() when the attribute parsing requires handling
3758 * of non-ASCII characters, or normalization compaction.
3760 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3763 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3765 xmlChar *buf = NULL;
3766 xmlChar *rep = NULL;
3769 int c, l, in_space = 0;
3770 xmlChar *current = NULL;
3773 if (NXT(0) == '"') {
3774 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3777 } else if (NXT(0) == '\'') {
3779 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3782 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3787 * allocate a translation buffer.
3789 buf_size = XML_PARSER_BUFFER_SIZE;
3790 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3791 if (buf == NULL) goto mem_error;
3794 * OK loop until we reach one of the ending char or a size limit.
3797 while ((NXT(0) != limit) && /* checked */
3798 (IS_CHAR(c)) && (c != '<')) {
3802 if (NXT(1) == '#') {
3803 int val = xmlParseCharRef(ctxt);
3806 if (ctxt->replaceEntities) {
3807 if (len > buf_size - 10) {
3808 growBuffer(buf, 10);
3813 * The reparsing will be done in xmlStringGetNodeList()
3814 * called by the attribute() function in SAX.c
3816 if (len > buf_size - 10) {
3817 growBuffer(buf, 10);
3825 } else if (val != 0) {
3826 if (len > buf_size - 10) {
3827 growBuffer(buf, 10);
3829 len += xmlCopyChar(0, &buf[len], val);
3832 ent = xmlParseEntityRef(ctxt);
3835 ctxt->nbentities += ent->owner;
3836 if ((ent != NULL) &&
3837 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3838 if (len > buf_size - 10) {
3839 growBuffer(buf, 10);
3841 if ((ctxt->replaceEntities == 0) &&
3842 (ent->content[0] == '&')) {
3849 buf[len++] = ent->content[0];
3851 } else if ((ent != NULL) &&
3852 (ctxt->replaceEntities != 0)) {
3853 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3854 rep = xmlStringDecodeEntities(ctxt, ent->content,
3859 while (*current != 0) { /* non input consuming */
3860 if ((*current == 0xD) || (*current == 0xA) ||
3861 (*current == 0x9)) {
3865 buf[len++] = *current++;
3866 if (len > buf_size - 10) {
3867 growBuffer(buf, 10);
3874 if (len > buf_size - 10) {
3875 growBuffer(buf, 10);
3877 if (ent->content != NULL)
3878 buf[len++] = ent->content[0];
3880 } else if (ent != NULL) {
3881 int i = xmlStrlen(ent->name);
3882 const xmlChar *cur = ent->name;
3885 * This may look absurd but is needed to detect
3888 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3889 (ent->content != NULL)) {
3890 rep = xmlStringDecodeEntities(ctxt, ent->content,
3891 XML_SUBSTITUTE_REF, 0, 0, 0);
3899 * Just output the reference
3902 while (len > buf_size - i - 10) {
3903 growBuffer(buf, i + 10);
3906 buf[len++] = *cur++;
3911 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3912 if ((len != 0) || (!normalize)) {
3913 if ((!normalize) || (!in_space)) {
3914 COPY_BUF(l,buf,len,0x20);
3915 while (len > buf_size - 10) {
3916 growBuffer(buf, 10);
3923 COPY_BUF(l,buf,len,c);
3924 if (len > buf_size - 10) {
3925 growBuffer(buf, 10);
3933 if ((in_space) && (normalize)) {
3934 while (buf[len - 1] == 0x20) len--;
3938 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3939 } else if (RAW != limit) {
3940 if ((c != 0) && (!IS_CHAR(c))) {
3941 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3942 "invalid character in attribute value\n");
3944 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3945 "AttValue: ' expected\n");
3949 if (attlen != NULL) *attlen = len;
3953 xmlErrMemory(ctxt, NULL);
3963 * @ctxt: an XML parser context
3965 * parse a value for an attribute
3966 * Note: the parser won't do substitution of entities here, this
3967 * will be handled later in xmlStringGetNodeList
3969 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3970 * "'" ([^<&'] | Reference)* "'"
3972 * 3.3.3 Attribute-Value Normalization:
3973 * Before the value of an attribute is passed to the application or
3974 * checked for validity, the XML processor must normalize it as follows:
3975 * - a character reference is processed by appending the referenced
3976 * character to the attribute value
3977 * - an entity reference is processed by recursively processing the
3978 * replacement text of the entity
3979 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3980 * appending #x20 to the normalized value, except that only a single
3981 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3982 * parsed entity or the literal entity value of an internal parsed entity
3983 * - other characters are processed by appending them to the normalized value
3984 * If the declared value is not CDATA, then the XML processor must further
3985 * process the normalized attribute value by discarding any leading and
3986 * trailing space (#x20) characters, and by replacing sequences of space
3987 * (#x20) characters by a single space (#x20) character.
3988 * All attributes for which no declaration has been read should be treated
3989 * by a non-validating parser as if declared CDATA.
3991 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3996 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3997 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3998 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4002 * xmlParseSystemLiteral:
4003 * @ctxt: an XML parser context
4005 * parse an XML Literal
4007 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4009 * Returns the SystemLiteral parsed or NULL
4013 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4014 xmlChar *buf = NULL;
4016 int size = XML_PARSER_BUFFER_SIZE;
4019 int state = ctxt->instate;
4026 } else if (RAW == '\'') {
4030 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4034 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4036 xmlErrMemory(ctxt, NULL);
4039 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4041 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4042 if (len + 5 >= size) {
4046 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4049 xmlErrMemory(ctxt, NULL);
4050 ctxt->instate = (xmlParserInputState) state;
4060 COPY_BUF(l,buf,len,cur);
4070 ctxt->instate = (xmlParserInputState) state;
4071 if (!IS_CHAR(cur)) {
4072 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4080 * xmlParsePubidLiteral:
4081 * @ctxt: an XML parser context
4083 * parse an XML public literal
4085 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4087 * Returns the PubidLiteral parsed or NULL.
4091 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4092 xmlChar *buf = NULL;
4094 int size = XML_PARSER_BUFFER_SIZE;
4098 xmlParserInputState oldstate = ctxt->instate;
4104 } else if (RAW == '\'') {
4108 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4111 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4113 xmlErrMemory(ctxt, NULL);
4116 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4118 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4119 if (len + 1 >= size) {
4123 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4125 xmlErrMemory(ctxt, NULL);
4147 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4151 ctxt->instate = oldstate;
4155 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4158 * used for the test in the inner loop of the char data testing
4160 static const unsigned char test_char_data[256] = {
4161 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4162 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4163 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4164 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4165 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4166 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4167 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4168 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4169 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4170 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4171 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4172 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4173 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4174 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4175 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4176 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4177 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4178 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4179 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4180 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4181 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4182 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4183 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4184 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4185 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4186 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4187 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4188 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4189 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4190 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4191 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4192 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4197 * @ctxt: an XML parser context
4198 * @cdata: int indicating whether we are within a CDATA section
4200 * parse a CharData section.
4201 * if we are within a CDATA section ']]>' marks an end of section.
4203 * The right angle bracket (>) may be represented using the string ">",
4204 * and must, for compatibility, be escaped using ">" or a character
4205 * reference when it appears in the string "]]>" in content, when that
4206 * string is not marking the end of a CDATA section.
4208 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4212 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4215 int line = ctxt->input->line;
4216 int col = ctxt->input->col;
4222 * Accelerated common case where input don't need to be
4223 * modified before passing it to the handler.
4226 in = ctxt->input->cur;
4229 while (*in == 0x20) { in++; ctxt->input->col++; }
4232 ctxt->input->line++; ctxt->input->col = 1;
4234 } while (*in == 0xA);
4235 goto get_more_space;
4238 nbchar = in - ctxt->input->cur;
4240 const xmlChar *tmp = ctxt->input->cur;
4241 ctxt->input->cur = in;
4243 if ((ctxt->sax != NULL) &&
4244 (ctxt->sax->ignorableWhitespace !=
4245 ctxt->sax->characters)) {
4246 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4247 if (ctxt->sax->ignorableWhitespace != NULL)
4248 ctxt->sax->ignorableWhitespace(ctxt->userData,
4251 if (ctxt->sax->characters != NULL)
4252 ctxt->sax->characters(ctxt->userData,
4254 if (*ctxt->space == -1)
4257 } else if ((ctxt->sax != NULL) &&
4258 (ctxt->sax->characters != NULL)) {
4259 ctxt->sax->characters(ctxt->userData,
4267 ccol = ctxt->input->col;
4268 while (test_char_data[*in]) {
4272 ctxt->input->col = ccol;
4275 ctxt->input->line++; ctxt->input->col = 1;
4277 } while (*in == 0xA);
4281 if ((in[1] == ']') && (in[2] == '>')) {
4282 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4283 ctxt->input->cur = in;
4290 nbchar = in - ctxt->input->cur;
4292 if ((ctxt->sax != NULL) &&
4293 (ctxt->sax->ignorableWhitespace !=
4294 ctxt->sax->characters) &&
4295 (IS_BLANK_CH(*ctxt->input->cur))) {
4296 const xmlChar *tmp = ctxt->input->cur;
4297 ctxt->input->cur = in;
4299 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4300 if (ctxt->sax->ignorableWhitespace != NULL)
4301 ctxt->sax->ignorableWhitespace(ctxt->userData,
4304 if (ctxt->sax->characters != NULL)
4305 ctxt->sax->characters(ctxt->userData,
4307 if (*ctxt->space == -1)
4310 line = ctxt->input->line;
4311 col = ctxt->input->col;
4312 } else if (ctxt->sax != NULL) {
4313 if (ctxt->sax->characters != NULL)
4314 ctxt->sax->characters(ctxt->userData,
4315 ctxt->input->cur, nbchar);
4316 line = ctxt->input->line;
4317 col = ctxt->input->col;
4319 /* something really bad happened in the SAX callback */
4320 if (ctxt->instate != XML_PARSER_CONTENT)
4323 ctxt->input->cur = in;
4327 ctxt->input->cur = in;
4329 ctxt->input->line++; ctxt->input->col = 1;
4330 continue; /* while */
4342 in = ctxt->input->cur;
4343 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4346 ctxt->input->line = line;
4347 ctxt->input->col = col;
4348 xmlParseCharDataComplex(ctxt, cdata);
4352 * xmlParseCharDataComplex:
4353 * @ctxt: an XML parser context
4354 * @cdata: int indicating whether we are within a CDATA section
4356 * parse a CharData section.this is the fallback function
4357 * of xmlParseCharData() when the parsing requires handling
4358 * of non-ASCII characters.
4361 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4362 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4370 while ((cur != '<') && /* checked */
4372 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4373 if ((cur == ']') && (NXT(1) == ']') &&
4377 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4380 COPY_BUF(l,buf,nbchar,cur);
4381 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4385 * OK the segment is to be consumed as chars.
4387 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4388 if (areBlanks(ctxt, buf, nbchar, 0)) {
4389 if (ctxt->sax->ignorableWhitespace != NULL)
4390 ctxt->sax->ignorableWhitespace(ctxt->userData,
4393 if (ctxt->sax->characters != NULL)
4394 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4395 if ((ctxt->sax->characters !=
4396 ctxt->sax->ignorableWhitespace) &&
4397 (*ctxt->space == -1))
4402 /* something really bad happened in the SAX callback */
4403 if (ctxt->instate != XML_PARSER_CONTENT)
4417 * OK the segment is to be consumed as chars.
4419 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4420 if (areBlanks(ctxt, buf, nbchar, 0)) {
4421 if (ctxt->sax->ignorableWhitespace != NULL)
4422 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4424 if (ctxt->sax->characters != NULL)
4425 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4426 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4427 (*ctxt->space == -1))
4432 if ((cur != 0) && (!IS_CHAR(cur))) {
4433 /* Generate the error and skip the offending character */
4434 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4435 "PCDATA invalid Char value %d\n",
4442 * xmlParseExternalID:
4443 * @ctxt: an XML parser context
4444 * @publicID: a xmlChar** receiving PubidLiteral
4445 * @strict: indicate whether we should restrict parsing to only
4446 * production [75], see NOTE below
4448 * Parse an External ID or a Public ID
4450 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4451 * 'PUBLIC' S PubidLiteral S SystemLiteral
4453 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4454 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4456 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4458 * Returns the function returns SystemLiteral and in the second
4459 * case publicID receives PubidLiteral, is strict is off
4460 * it is possible to return NULL and have publicID set.
4464 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4465 xmlChar *URI = NULL;
4470 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4472 if (!IS_BLANK_CH(CUR)) {
4473 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4474 "Space required after 'SYSTEM'\n");
4477 URI = xmlParseSystemLiteral(ctxt);
4479 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4481 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4483 if (!IS_BLANK_CH(CUR)) {
4484 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4485 "Space required after 'PUBLIC'\n");
4488 *publicID = xmlParsePubidLiteral(ctxt);
4489 if (*publicID == NULL) {
4490 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4494 * We don't handle [83] so "S SystemLiteral" is required.
4496 if (!IS_BLANK_CH(CUR)) {
4497 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4498 "Space required after the Public Identifier\n");
4502 * We handle [83] so we return immediately, if
4503 * "S SystemLiteral" is not detected. From a purely parsing
4504 * point of view that's a nice mess.
4510 if (!IS_BLANK_CH(*ptr)) return(NULL);
4512 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4513 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4516 URI = xmlParseSystemLiteral(ctxt);
4518 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4525 * xmlParseCommentComplex:
4526 * @ctxt: an XML parser context
4527 * @buf: the already parsed part of the buffer
4528 * @len: number of bytes filles in the buffer
4529 * @size: allocated size of the buffer
4531 * Skip an XML (SGML) comment <!-- .... -->
4532 * The spec says that "For compatibility, the string "--" (double-hyphen)
4533 * must not occur within comments. "
4534 * This is the slow routine in case the accelerator for ascii didn't work
4536 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4539 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
4546 inputid = ctxt->input->id;
4550 size = XML_PARSER_BUFFER_SIZE;
4551 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4553 xmlErrMemory(ctxt, NULL);
4557 GROW; /* Assure there's enough input data */
4560 goto not_terminated;
4562 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4563 "xmlParseComment: invalid xmlChar value %d\n",
4571 goto not_terminated;
4573 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4574 "xmlParseComment: invalid xmlChar value %d\n",
4582 goto not_terminated;
4583 while (IS_CHAR(cur) && /* checked */
4585 (r != '-') || (q != '-'))) {
4586 if ((r == '-') && (q == '-')) {
4587 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4589 if (len + 5 >= size) {
4592 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4593 if (new_buf == NULL) {
4595 xmlErrMemory(ctxt, NULL);
4600 COPY_BUF(ql,buf,len,q);
4621 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4622 "Comment not terminated \n<!--%.50s\n", buf);
4623 } else if (!IS_CHAR(cur)) {
4624 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4625 "xmlParseComment: invalid xmlChar value %d\n",
4628 if (inputid != ctxt->input->id) {
4629 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4630 "Comment doesn't start and stop in the same entity\n");
4633 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4634 (!ctxt->disableSAX))
4635 ctxt->sax->comment(ctxt->userData, buf);
4640 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4641 "Comment not terminated\n", NULL);
4648 * @ctxt: an XML parser context
4650 * Skip an XML (SGML) comment <!-- .... -->
4651 * The spec says that "For compatibility, the string "--" (double-hyphen)
4652 * must not occur within comments. "
4654 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4657 xmlParseComment(xmlParserCtxtPtr ctxt) {
4658 xmlChar *buf = NULL;
4659 int size = XML_PARSER_BUFFER_SIZE;
4661 xmlParserInputState state;
4663 int nbchar = 0, ccol;
4667 * Check that there is a comment right here.
4669 if ((RAW != '<') || (NXT(1) != '!') ||
4670 (NXT(2) != '-') || (NXT(3) != '-')) return;
4671 state = ctxt->instate;
4672 ctxt->instate = XML_PARSER_COMMENT;
4673 inputid = ctxt->input->id;
4679 * Accelerated common case where input don't need to be
4680 * modified before passing it to the handler.
4682 in = ctxt->input->cur;
4686 ctxt->input->line++; ctxt->input->col = 1;
4688 } while (*in == 0xA);
4691 ccol = ctxt->input->col;
4692 while (((*in > '-') && (*in <= 0x7F)) ||
4693 ((*in >= 0x20) && (*in < '-')) ||
4698 ctxt->input->col = ccol;
4701 ctxt->input->line++; ctxt->input->col = 1;
4703 } while (*in == 0xA);
4706 nbchar = in - ctxt->input->cur;
4708 * save current set of data
4711 if ((ctxt->sax != NULL) &&
4712 (ctxt->sax->comment != NULL)) {
4714 if ((*in == '-') && (in[1] == '-'))
4717 size = XML_PARSER_BUFFER_SIZE + nbchar;
4718 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4720 xmlErrMemory(ctxt, NULL);
4721 ctxt->instate = state;
4725 } else if (len + nbchar + 1 >= size) {
4727 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4728 new_buf = (xmlChar *) xmlRealloc(buf,
4729 size * sizeof(xmlChar));
4730 if (new_buf == NULL) {
4732 xmlErrMemory(ctxt, NULL);
4733 ctxt->instate = state;
4738 memcpy(&buf[len], ctxt->input->cur, nbchar);
4743 ctxt->input->cur = in;
4746 ctxt->input->line++; ctxt->input->col = 1;
4751 ctxt->input->cur = in;
4753 ctxt->input->line++; ctxt->input->col = 1;
4754 continue; /* while */
4760 in = ctxt->input->cur;
4764 if (ctxt->input->id != inputid) {
4765 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4766 "comment doesn't start and stop in the same entity\n");
4769 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4770 (!ctxt->disableSAX)) {
4772 ctxt->sax->comment(ctxt->userData, buf);
4774 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4778 ctxt->instate = state;
4782 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4783 "Double hyphen within comment: "
4787 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4788 "Double hyphen within comment\n", NULL);
4796 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4797 xmlParseCommentComplex(ctxt, buf, len, size);
4798 ctxt->instate = state;
4805 * @ctxt: an XML parser context
4807 * parse the name of a PI
4809 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4811 * Returns the PITarget name or NULL
4815 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4816 const xmlChar *name;
4818 name = xmlParseName(ctxt);
4819 if ((name != NULL) &&
4820 ((name[0] == 'x') || (name[0] == 'X')) &&
4821 ((name[1] == 'm') || (name[1] == 'M')) &&
4822 ((name[2] == 'l') || (name[2] == 'L'))) {
4824 if ((name[0] == 'x') && (name[1] == 'm') &&
4825 (name[2] == 'l') && (name[3] == 0)) {
4826 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4827 "XML declaration allowed only at the start of the document\n");
4829 } else if (name[3] == 0) {
4830 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4834 if (xmlW3CPIs[i] == NULL) break;
4835 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4838 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4839 "xmlParsePITarget: invalid name prefix 'xml'\n",
4842 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4843 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4844 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4849 #ifdef LIBXML_CATALOG_ENABLED
4851 * xmlParseCatalogPI:
4852 * @ctxt: an XML parser context
4853 * @catalog: the PI value string
4855 * parse an XML Catalog Processing Instruction.
4857 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4859 * Occurs only if allowed by the user and if happening in the Misc
4860 * part of the document before any doctype informations
4861 * This will add the given catalog to the parsing context in order
4862 * to be used if there is a resolution need further down in the document
4866 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4867 xmlChar *URL = NULL;
4868 const xmlChar *tmp, *base;
4872 while (IS_BLANK_CH(*tmp)) tmp++;
4873 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4876 while (IS_BLANK_CH(*tmp)) tmp++;
4881 while (IS_BLANK_CH(*tmp)) tmp++;
4883 if ((marker != '\'') && (marker != '"'))
4887 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4890 URL = xmlStrndup(base, tmp - base);
4892 while (IS_BLANK_CH(*tmp)) tmp++;
4897 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4903 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4904 "Catalog PI syntax error: %s\n",
4913 * @ctxt: an XML parser context
4915 * parse an XML Processing Instruction.
4917 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4919 * The processing is transfered to SAX once parsed.
4923 xmlParsePI(xmlParserCtxtPtr ctxt) {
4924 xmlChar *buf = NULL;
4926 int size = XML_PARSER_BUFFER_SIZE;
4928 const xmlChar *target;
4929 xmlParserInputState state;
4932 if ((RAW == '<') && (NXT(1) == '?')) {
4933 xmlParserInputPtr input = ctxt->input;
4934 state = ctxt->instate;
4935 ctxt->instate = XML_PARSER_PI;
4937 * this is a Processing Instruction.
4943 * Parse the target name and check for special support like
4946 target = xmlParsePITarget(ctxt);
4947 if (target != NULL) {
4948 if ((RAW == '?') && (NXT(1) == '>')) {
4949 if (input != ctxt->input) {
4950 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4951 "PI declaration doesn't start and stop in the same entity\n");
4958 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4959 (ctxt->sax->processingInstruction != NULL))
4960 ctxt->sax->processingInstruction(ctxt->userData,
4962 if (ctxt->instate != XML_PARSER_EOF)
4963 ctxt->instate = state;
4966 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4968 xmlErrMemory(ctxt, NULL);
4969 ctxt->instate = state;
4973 if (!IS_BLANK(cur)) {
4974 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4975 "ParsePI: PI %s space expected\n", target);
4979 while (IS_CHAR(cur) && /* checked */
4980 ((cur != '?') || (NXT(1) != '>'))) {
4981 if (len + 5 >= size) {
4985 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4987 xmlErrMemory(ctxt, NULL);
4989 ctxt->instate = state;
4999 COPY_BUF(l,buf,len,cur);
5010 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5011 "ParsePI: PI %s never end ...\n", target);
5013 if (input != ctxt->input) {
5014 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5015 "PI declaration doesn't start and stop in the same entity\n");
5019 #ifdef LIBXML_CATALOG_ENABLED
5020 if (((state == XML_PARSER_MISC) ||
5021 (state == XML_PARSER_START)) &&
5022 (xmlStrEqual(target, XML_CATALOG_PI))) {
5023 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5024 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5025 (allow == XML_CATA_ALLOW_ALL))
5026 xmlParseCatalogPI(ctxt, buf);
5034 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5035 (ctxt->sax->processingInstruction != NULL))
5036 ctxt->sax->processingInstruction(ctxt->userData,
5041 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5043 if (ctxt->instate != XML_PARSER_EOF)
5044 ctxt->instate = state;
5049 * xmlParseNotationDecl:
5050 * @ctxt: an XML parser context
5052 * parse a notation declaration
5054 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5056 * Hence there is actually 3 choices:
5057 * 'PUBLIC' S PubidLiteral
5058 * 'PUBLIC' S PubidLiteral S SystemLiteral
5059 * and 'SYSTEM' S SystemLiteral
5061 * See the NOTE on xmlParseExternalID().
5065 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5066 const xmlChar *name;
5070 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5071 xmlParserInputPtr input = ctxt->input;
5074 if (!IS_BLANK_CH(CUR)) {
5075 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5076 "Space required after '<!NOTATION'\n");
5081 name = xmlParseName(ctxt);
5083 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5086 if (!IS_BLANK_CH(CUR)) {
5087 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5088 "Space required after the NOTATION name'\n");
5091 if (xmlStrchr(name, ':') != NULL) {
5092 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5093 "colon are forbidden from notation names '%s'\n",
5101 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5105 if (input != ctxt->input) {
5106 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5107 "Notation declaration doesn't start and stop in the same entity\n");
5110 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5111 (ctxt->sax->notationDecl != NULL))
5112 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5114 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5116 if (Systemid != NULL) xmlFree(Systemid);
5117 if (Pubid != NULL) xmlFree(Pubid);
5122 * xmlParseEntityDecl:
5123 * @ctxt: an XML parser context
5125 * parse <!ENTITY declarations
5127 * [70] EntityDecl ::= GEDecl | PEDecl
5129 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5131 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5133 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5135 * [74] PEDef ::= EntityValue | ExternalID
5137 * [76] NDataDecl ::= S 'NDATA' S Name
5139 * [ VC: Notation Declared ]
5140 * The Name must match the declared name of a notation.
5144 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5145 const xmlChar *name = NULL;
5146 xmlChar *value = NULL;
5147 xmlChar *URI = NULL, *literal = NULL;
5148 const xmlChar *ndata = NULL;
5149 int isParameter = 0;
5150 xmlChar *orig = NULL;
5153 /* GROW; done in the caller */
5154 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5155 xmlParserInputPtr input = ctxt->input;
5158 skipped = SKIP_BLANKS;
5160 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5161 "Space required after '<!ENTITY'\n");
5166 skipped = SKIP_BLANKS;
5168 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5169 "Space required after '%'\n");
5174 name = xmlParseName(ctxt);
5176 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5177 "xmlParseEntityDecl: no name\n");
5180 if (xmlStrchr(name, ':') != NULL) {
5181 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5182 "colon are forbidden from entities names '%s'\n",
5185 skipped = SKIP_BLANKS;
5187 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5188 "Space required after the entity name\n");
5191 ctxt->instate = XML_PARSER_ENTITY_DECL;
5193 * handle the various case of definitions...
5196 if ((RAW == '"') || (RAW == '\'')) {
5197 value = xmlParseEntityValue(ctxt, &orig);
5199 if ((ctxt->sax != NULL) &&
5200 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5201 ctxt->sax->entityDecl(ctxt->userData, name,
5202 XML_INTERNAL_PARAMETER_ENTITY,
5206 URI = xmlParseExternalID(ctxt, &literal, 1);
5207 if ((URI == NULL) && (literal == NULL)) {
5208 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5213 uri = xmlParseURI((const char *) URI);
5215 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5216 "Invalid URI: %s\n", URI);
5218 * This really ought to be a well formedness error
5219 * but the XML Core WG decided otherwise c.f. issue
5220 * E26 of the XML erratas.
5223 if (uri->fragment != NULL) {
5225 * Okay this is foolish to block those but not
5228 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5230 if ((ctxt->sax != NULL) &&
5231 (!ctxt->disableSAX) &&
5232 (ctxt->sax->entityDecl != NULL))
5233 ctxt->sax->entityDecl(ctxt->userData, name,
5234 XML_EXTERNAL_PARAMETER_ENTITY,
5235 literal, URI, NULL);
5242 if ((RAW == '"') || (RAW == '\'')) {
5243 value = xmlParseEntityValue(ctxt, &orig);
5244 if ((ctxt->sax != NULL) &&
5245 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5246 ctxt->sax->entityDecl(ctxt->userData, name,
5247 XML_INTERNAL_GENERAL_ENTITY,
5250 * For expat compatibility in SAX mode.
5252 if ((ctxt->myDoc == NULL) ||
5253 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5254 if (ctxt->myDoc == NULL) {
5255 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5256 if (ctxt->myDoc == NULL) {
5257 xmlErrMemory(ctxt, "New Doc failed");
5260 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5262 if (ctxt->myDoc->intSubset == NULL)
5263 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5264 BAD_CAST "fake", NULL, NULL);
5266 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5270 URI = xmlParseExternalID(ctxt, &literal, 1);
5271 if ((URI == NULL) && (literal == NULL)) {
5272 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5277 uri = xmlParseURI((const char *)URI);
5279 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5280 "Invalid URI: %s\n", URI);
5282 * This really ought to be a well formedness error
5283 * but the XML Core WG decided otherwise c.f. issue
5284 * E26 of the XML erratas.
5287 if (uri->fragment != NULL) {
5289 * Okay this is foolish to block those but not
5292 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5297 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5298 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5299 "Space required before 'NDATA'\n");
5302 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5304 if (!IS_BLANK_CH(CUR)) {
5305 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5306 "Space required after 'NDATA'\n");
5309 ndata = xmlParseName(ctxt);
5310 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5311 (ctxt->sax->unparsedEntityDecl != NULL))
5312 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5313 literal, URI, ndata);
5315 if ((ctxt->sax != NULL) &&
5316 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5317 ctxt->sax->entityDecl(ctxt->userData, name,
5318 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5319 literal, URI, NULL);
5321 * For expat compatibility in SAX mode.
5322 * assuming the entity repalcement was asked for
5324 if ((ctxt->replaceEntities != 0) &&
5325 ((ctxt->myDoc == NULL) ||
5326 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5327 if (ctxt->myDoc == NULL) {
5328 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5329 if (ctxt->myDoc == NULL) {
5330 xmlErrMemory(ctxt, "New Doc failed");
5333 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5336 if (ctxt->myDoc->intSubset == NULL)
5337 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5338 BAD_CAST "fake", NULL, NULL);
5339 xmlSAX2EntityDecl(ctxt, name,
5340 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5341 literal, URI, NULL);
5348 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5349 "xmlParseEntityDecl: entity %s not terminated\n", name);
5351 if (input != ctxt->input) {
5352 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5353 "Entity declaration doesn't start and stop in the same entity\n");
5359 * Ugly mechanism to save the raw entity value.
5361 xmlEntityPtr cur = NULL;
5364 if ((ctxt->sax != NULL) &&
5365 (ctxt->sax->getParameterEntity != NULL))
5366 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5368 if ((ctxt->sax != NULL) &&
5369 (ctxt->sax->getEntity != NULL))
5370 cur = ctxt->sax->getEntity(ctxt->userData, name);
5371 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5372 cur = xmlSAX2GetEntity(ctxt, name);
5376 if (cur->orig != NULL)
5383 if (value != NULL) xmlFree(value);
5384 if (URI != NULL) xmlFree(URI);
5385 if (literal != NULL) xmlFree(literal);
5390 * xmlParseDefaultDecl:
5391 * @ctxt: an XML parser context
5392 * @value: Receive a possible fixed default value for the attribute
5394 * Parse an attribute default declaration
5396 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5398 * [ VC: Required Attribute ]
5399 * if the default declaration is the keyword #REQUIRED, then the
5400 * attribute must be specified for all elements of the type in the
5401 * attribute-list declaration.
5403 * [ VC: Attribute Default Legal ]
5404 * The declared default value must meet the lexical constraints of
5405 * the declared attribute type c.f. xmlValidateAttributeDecl()
5407 * [ VC: Fixed Attribute Default ]
5408 * if an attribute has a default value declared with the #FIXED
5409 * keyword, instances of that attribute must match the default value.
5411 * [ WFC: No < in Attribute Values ]
5412 * handled in xmlParseAttValue()
5414 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5415 * or XML_ATTRIBUTE_FIXED.
5419 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5424 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5426 return(XML_ATTRIBUTE_REQUIRED);
5428 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5430 return(XML_ATTRIBUTE_IMPLIED);
5432 val = XML_ATTRIBUTE_NONE;
5433 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5435 val = XML_ATTRIBUTE_FIXED;
5436 if (!IS_BLANK_CH(CUR)) {
5437 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5438 "Space required after '#FIXED'\n");
5442 ret = xmlParseAttValue(ctxt);
5443 ctxt->instate = XML_PARSER_DTD;
5445 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5446 "Attribute default value declaration error\n");
5453 * xmlParseNotationType:
5454 * @ctxt: an XML parser context
5456 * parse an Notation attribute type.
5458 * Note: the leading 'NOTATION' S part has already being parsed...
5460 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5462 * [ VC: Notation Attributes ]
5463 * Values of this type must match one of the notation names included
5464 * in the declaration; all notation names in the declaration must be declared.
5466 * Returns: the notation attribute tree built while parsing
5470 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5471 const xmlChar *name;
5472 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5475 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5482 name = xmlParseName(ctxt);
5484 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5485 "Name expected in NOTATION declaration\n");
5486 xmlFreeEnumeration(ret);
5490 while (tmp != NULL) {
5491 if (xmlStrEqual(name, tmp->name)) {
5492 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5493 "standalone: attribute notation value token %s duplicated\n",
5495 if (!xmlDictOwns(ctxt->dict, name))
5496 xmlFree((xmlChar *) name);
5502 cur = xmlCreateEnumeration(name);
5504 xmlFreeEnumeration(ret);
5507 if (last == NULL) ret = last = cur;
5514 } while (RAW == '|');
5516 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5517 xmlFreeEnumeration(ret);
5525 * xmlParseEnumerationType:
5526 * @ctxt: an XML parser context
5528 * parse an Enumeration attribute type.
5530 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5532 * [ VC: Enumeration ]
5533 * Values of this type must match one of the Nmtoken tokens in
5536 * Returns: the enumeration attribute tree built while parsing
5540 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5542 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5545 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5552 name = xmlParseNmtoken(ctxt);
5554 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5558 while (tmp != NULL) {
5559 if (xmlStrEqual(name, tmp->name)) {
5560 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5561 "standalone: attribute enumeration value token %s duplicated\n",
5563 if (!xmlDictOwns(ctxt->dict, name))
5570 cur = xmlCreateEnumeration(name);
5571 if (!xmlDictOwns(ctxt->dict, name))
5574 xmlFreeEnumeration(ret);
5577 if (last == NULL) ret = last = cur;
5584 } while (RAW == '|');
5586 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5594 * xmlParseEnumeratedType:
5595 * @ctxt: an XML parser context
5596 * @tree: the enumeration tree built while parsing
5598 * parse an Enumerated attribute type.
5600 * [57] EnumeratedType ::= NotationType | Enumeration
5602 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5605 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5609 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5610 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5612 if (!IS_BLANK_CH(CUR)) {
5613 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5614 "Space required after 'NOTATION'\n");
5618 *tree = xmlParseNotationType(ctxt);
5619 if (*tree == NULL) return(0);
5620 return(XML_ATTRIBUTE_NOTATION);
5622 *tree = xmlParseEnumerationType(ctxt);
5623 if (*tree == NULL) return(0);
5624 return(XML_ATTRIBUTE_ENUMERATION);
5628 * xmlParseAttributeType:
5629 * @ctxt: an XML parser context
5630 * @tree: the enumeration tree built while parsing
5632 * parse the Attribute list def for an element
5634 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5636 * [55] StringType ::= 'CDATA'
5638 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5639 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5641 * Validity constraints for attribute values syntax are checked in
5642 * xmlValidateAttributeValue()
5645 * Values of type ID must match the Name production. A name must not
5646 * appear more than once in an XML document as a value of this type;
5647 * i.e., ID values must uniquely identify the elements which bear them.
5649 * [ VC: One ID per Element Type ]
5650 * No element type may have more than one ID attribute specified.
5652 * [ VC: ID Attribute Default ]
5653 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5656 * Values of type IDREF must match the Name production, and values
5657 * of type IDREFS must match Names; each IDREF Name must match the value
5658 * of an ID attribute on some element in the XML document; i.e. IDREF
5659 * values must match the value of some ID attribute.
5661 * [ VC: Entity Name ]
5662 * Values of type ENTITY must match the Name production, values
5663 * of type ENTITIES must match Names; each Entity Name must match the
5664 * name of an unparsed entity declared in the DTD.
5666 * [ VC: Name Token ]
5667 * Values of type NMTOKEN must match the Nmtoken production; values
5668 * of type NMTOKENS must match Nmtokens.
5670 * Returns the attribute type
5673 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5675 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5677 return(XML_ATTRIBUTE_CDATA);
5678 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5680 return(XML_ATTRIBUTE_IDREFS);
5681 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5683 return(XML_ATTRIBUTE_IDREF);
5684 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5686 return(XML_ATTRIBUTE_ID);
5687 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5689 return(XML_ATTRIBUTE_ENTITY);
5690 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5692 return(XML_ATTRIBUTE_ENTITIES);
5693 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5695 return(XML_ATTRIBUTE_NMTOKENS);
5696 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5698 return(XML_ATTRIBUTE_NMTOKEN);
5700 return(xmlParseEnumeratedType(ctxt, tree));
5704 * xmlParseAttributeListDecl:
5705 * @ctxt: an XML parser context
5707 * : parse the Attribute list def for an element
5709 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5711 * [53] AttDef ::= S Name S AttType S DefaultDecl
5715 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5716 const xmlChar *elemName;
5717 const xmlChar *attrName;
5718 xmlEnumerationPtr tree;
5720 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5721 xmlParserInputPtr input = ctxt->input;
5724 if (!IS_BLANK_CH(CUR)) {
5725 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5726 "Space required after '<!ATTLIST'\n");
5729 elemName = xmlParseName(ctxt);
5730 if (elemName == NULL) {
5731 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5732 "ATTLIST: no name for Element\n");
5737 while (RAW != '>') {
5738 const xmlChar *check = CUR_PTR;
5741 xmlChar *defaultValue = NULL;
5745 attrName = xmlParseName(ctxt);
5746 if (attrName == NULL) {
5747 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5748 "ATTLIST: no name for Attribute\n");
5752 if (!IS_BLANK_CH(CUR)) {
5753 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5754 "Space required after the attribute name\n");
5759 type = xmlParseAttributeType(ctxt, &tree);
5765 if (!IS_BLANK_CH(CUR)) {
5766 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5767 "Space required after the attribute type\n");
5769 xmlFreeEnumeration(tree);
5774 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5776 if (defaultValue != NULL)
5777 xmlFree(defaultValue);
5779 xmlFreeEnumeration(tree);
5782 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5783 xmlAttrNormalizeSpace(defaultValue, defaultValue);
5787 if (!IS_BLANK_CH(CUR)) {
5788 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5789 "Space required after the attribute default value\n");
5790 if (defaultValue != NULL)
5791 xmlFree(defaultValue);
5793 xmlFreeEnumeration(tree);
5798 if (check == CUR_PTR) {
5799 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5800 "in xmlParseAttributeListDecl\n");
5801 if (defaultValue != NULL)
5802 xmlFree(defaultValue);
5804 xmlFreeEnumeration(tree);
5807 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5808 (ctxt->sax->attributeDecl != NULL))
5809 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5810 type, def, defaultValue, tree);
5811 else if (tree != NULL)
5812 xmlFreeEnumeration(tree);
5814 if ((ctxt->sax2) && (defaultValue != NULL) &&
5815 (def != XML_ATTRIBUTE_IMPLIED) &&
5816 (def != XML_ATTRIBUTE_REQUIRED)) {
5817 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5820 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5822 if (defaultValue != NULL)
5823 xmlFree(defaultValue);
5827 if (input != ctxt->input) {
5828 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5829 "Attribute list declaration doesn't start and stop in the same entity\n",
5838 * xmlParseElementMixedContentDecl:
5839 * @ctxt: an XML parser context
5840 * @inputchk: the input used for the current entity, needed for boundary checks
5842 * parse the declaration for a Mixed Element content
5843 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5845 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5846 * '(' S? '#PCDATA' S? ')'
5848 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5850 * [ VC: No Duplicate Types ]
5851 * The same name must not appear more than once in a single
5852 * mixed-content declaration.
5854 * returns: the list of the xmlElementContentPtr describing the element choices
5856 xmlElementContentPtr
5857 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5858 xmlElementContentPtr ret = NULL, cur = NULL, n;
5859 const xmlChar *elem = NULL;
5862 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5867 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5868 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5869 "Element content declaration doesn't start and stop in the same entity\n",
5873 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5877 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5882 if ((RAW == '(') || (RAW == '|')) {
5883 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5884 if (ret == NULL) return(NULL);
5886 while (RAW == '|') {
5889 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5890 if (ret == NULL) return(NULL);
5896 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5897 if (n == NULL) return(NULL);
5898 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5907 elem = xmlParseName(ctxt);
5909 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5910 "xmlParseElementMixedContentDecl : Name expected\n");
5911 xmlFreeDocElementContent(ctxt->myDoc, cur);
5917 if ((RAW == ')') && (NXT(1) == '*')) {
5919 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5920 XML_ELEMENT_CONTENT_ELEMENT);
5921 if (cur->c2 != NULL)
5922 cur->c2->parent = cur;
5925 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5926 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5927 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5928 "Element content declaration doesn't start and stop in the same entity\n",
5933 xmlFreeDocElementContent(ctxt->myDoc, ret);
5934 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5939 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5945 * xmlParseElementChildrenContentDeclPriv:
5946 * @ctxt: an XML parser context
5947 * @inputchk: the input used for the current entity, needed for boundary checks
5948 * @depth: the level of recursion
5950 * parse the declaration for a Mixed Element content
5951 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5954 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5956 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5958 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5960 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5962 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5963 * TODO Parameter-entity replacement text must be properly nested
5964 * with parenthesized groups. That is to say, if either of the
5965 * opening or closing parentheses in a choice, seq, or Mixed
5966 * construct is contained in the replacement text for a parameter
5967 * entity, both must be contained in the same replacement text. For
5968 * interoperability, if a parameter-entity reference appears in a
5969 * choice, seq, or Mixed construct, its replacement text should not
5970 * be empty, and neither the first nor last non-blank character of
5971 * the replacement text should be a connector (| or ,).
5973 * Returns the tree of xmlElementContentPtr describing the element
5976 static xmlElementContentPtr
5977 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5979 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5980 const xmlChar *elem;
5983 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5985 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5986 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5993 int inputid = ctxt->input->id;
5995 /* Recurse on first child */
5998 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6003 elem = xmlParseName(ctxt);
6005 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6008 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6010 xmlErrMemory(ctxt, NULL);
6015 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6017 } else if (RAW == '*') {
6018 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6020 } else if (RAW == '+') {
6021 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6024 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6030 while (RAW != ')') {
6032 * Each loop we parse one separator and one element.
6035 if (type == 0) type = CUR;
6038 * Detect "Name | Name , Name" error
6040 else if (type != CUR) {
6041 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6042 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6044 if ((last != NULL) && (last != ret))
6045 xmlFreeDocElementContent(ctxt->myDoc, last);
6047 xmlFreeDocElementContent(ctxt->myDoc, ret);
6052 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6054 if ((last != NULL) && (last != ret))
6055 xmlFreeDocElementContent(ctxt->myDoc, last);
6056 xmlFreeDocElementContent(ctxt->myDoc, ret);
6074 } else if (RAW == '|') {
6075 if (type == 0) type = CUR;
6078 * Detect "Name , Name | Name" error
6080 else if (type != CUR) {
6081 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6082 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6084 if ((last != NULL) && (last != ret))
6085 xmlFreeDocElementContent(ctxt->myDoc, last);
6087 xmlFreeDocElementContent(ctxt->myDoc, ret);
6092 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6094 if ((last != NULL) && (last != ret))
6095 xmlFreeDocElementContent(ctxt->myDoc, last);
6097 xmlFreeDocElementContent(ctxt->myDoc, ret);
6116 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6117 if ((last != NULL) && (last != ret))
6118 xmlFreeDocElementContent(ctxt->myDoc, last);
6120 xmlFreeDocElementContent(ctxt->myDoc, ret);
6127 int inputid = ctxt->input->id;
6128 /* Recurse on second child */
6131 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6135 elem = xmlParseName(ctxt);
6137 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6139 xmlFreeDocElementContent(ctxt->myDoc, ret);
6142 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6145 xmlFreeDocElementContent(ctxt->myDoc, ret);
6149 last->ocur = XML_ELEMENT_CONTENT_OPT;
6151 } else if (RAW == '*') {
6152 last->ocur = XML_ELEMENT_CONTENT_MULT;
6154 } else if (RAW == '+') {
6155 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6158 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6164 if ((cur != NULL) && (last != NULL)) {
6169 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6170 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6171 "Element content declaration doesn't start and stop in the same entity\n",
6177 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6178 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6179 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6181 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6184 } else if (RAW == '*') {
6186 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6189 * Some normalization:
6190 * (a | b* | c?)* == (a | b | c)*
6192 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6193 if ((cur->c1 != NULL) &&
6194 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6195 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6196 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6197 if ((cur->c2 != NULL) &&
6198 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6199 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6200 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6205 } else if (RAW == '+') {
6209 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6210 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6211 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6213 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6215 * Some normalization:
6216 * (a | b*)+ == (a | b)*
6217 * (a | b?)+ == (a | b)*
6219 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6220 if ((cur->c1 != NULL) &&
6221 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6222 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6223 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6226 if ((cur->c2 != NULL) &&
6227 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6228 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6229 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6235 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6243 * xmlParseElementChildrenContentDecl:
6244 * @ctxt: an XML parser context
6245 * @inputchk: the input used for the current entity, needed for boundary checks
6247 * parse the declaration for a Mixed Element content
6248 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6250 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6252 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6254 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6256 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6258 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6259 * TODO Parameter-entity replacement text must be properly nested
6260 * with parenthesized groups. That is to say, if either of the
6261 * opening or closing parentheses in a choice, seq, or Mixed
6262 * construct is contained in the replacement text for a parameter
6263 * entity, both must be contained in the same replacement text. For
6264 * interoperability, if a parameter-entity reference appears in a
6265 * choice, seq, or Mixed construct, its replacement text should not
6266 * be empty, and neither the first nor last non-blank character of
6267 * the replacement text should be a connector (| or ,).
6269 * Returns the tree of xmlElementContentPtr describing the element
6272 xmlElementContentPtr
6273 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6274 /* stub left for API/ABI compat */
6275 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6279 * xmlParseElementContentDecl:
6280 * @ctxt: an XML parser context
6281 * @name: the name of the element being defined.
6282 * @result: the Element Content pointer will be stored here if any
6284 * parse the declaration for an Element content either Mixed or Children,
6285 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6287 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6289 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6293 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6294 xmlElementContentPtr *result) {
6296 xmlElementContentPtr tree = NULL;
6297 int inputid = ctxt->input->id;
6303 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6304 "xmlParseElementContentDecl : %s '(' expected\n", name);
6310 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6311 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6312 res = XML_ELEMENT_TYPE_MIXED;
6314 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6315 res = XML_ELEMENT_TYPE_ELEMENT;
6323 * xmlParseElementDecl:
6324 * @ctxt: an XML parser context
6326 * parse an Element declaration.
6328 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6330 * [ VC: Unique Element Type Declaration ]
6331 * No element type may be declared more than once
6333 * Returns the type of the element, or -1 in case of error
6336 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6337 const xmlChar *name;
6339 xmlElementContentPtr content = NULL;
6341 /* GROW; done in the caller */
6342 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6343 xmlParserInputPtr input = ctxt->input;
6346 if (!IS_BLANK_CH(CUR)) {
6347 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6348 "Space required after 'ELEMENT'\n");
6351 name = xmlParseName(ctxt);
6353 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6354 "xmlParseElementDecl: no name for Element\n");
6357 while ((RAW == 0) && (ctxt->inputNr > 1))
6359 if (!IS_BLANK_CH(CUR)) {
6360 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6361 "Space required after the element name\n");
6364 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6367 * Element must always be empty.
6369 ret = XML_ELEMENT_TYPE_EMPTY;
6370 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6374 * Element is a generic container.
6376 ret = XML_ELEMENT_TYPE_ANY;
6377 } else if (RAW == '(') {
6378 ret = xmlParseElementContentDecl(ctxt, name, &content);
6381 * [ WFC: PEs in Internal Subset ] error handling.
6383 if ((RAW == '%') && (ctxt->external == 0) &&
6384 (ctxt->inputNr == 1)) {
6385 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6386 "PEReference: forbidden within markup decl in internal subset\n");
6388 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6389 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6396 * Pop-up of finished entities.
6398 while ((RAW == 0) && (ctxt->inputNr > 1))
6403 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6404 if (content != NULL) {
6405 xmlFreeDocElementContent(ctxt->myDoc, content);
6408 if (input != ctxt->input) {
6409 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6410 "Element declaration doesn't start and stop in the same entity\n");
6414 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6415 (ctxt->sax->elementDecl != NULL)) {
6416 if (content != NULL)
6417 content->parent = NULL;
6418 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6420 if ((content != NULL) && (content->parent == NULL)) {
6422 * this is a trick: if xmlAddElementDecl is called,
6423 * instead of copying the full tree it is plugged directly
6424 * if called from the parser. Avoid duplicating the
6425 * interfaces or change the API/ABI
6427 xmlFreeDocElementContent(ctxt->myDoc, content);
6429 } else if (content != NULL) {
6430 xmlFreeDocElementContent(ctxt->myDoc, content);
6438 * xmlParseConditionalSections
6439 * @ctxt: an XML parser context
6441 * [61] conditionalSect ::= includeSect | ignoreSect
6442 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6443 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6444 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6445 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6449 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6450 int id = ctxt->input->id;
6454 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6458 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6460 if (ctxt->input->id != id) {
6461 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6462 "All markup of the conditional section is not in the same entity\n",
6467 if (xmlParserDebugEntities) {
6468 if ((ctxt->input != NULL) && (ctxt->input->filename))
6469 xmlGenericError(xmlGenericErrorContext,
6470 "%s(%d): ", ctxt->input->filename,
6472 xmlGenericError(xmlGenericErrorContext,
6473 "Entering INCLUDE Conditional Section\n");
6476 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6478 const xmlChar *check = CUR_PTR;
6479 unsigned int cons = ctxt->input->consumed;
6481 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6482 xmlParseConditionalSections(ctxt);
6483 } else if (IS_BLANK_CH(CUR)) {
6485 } else if (RAW == '%') {
6486 xmlParsePEReference(ctxt);
6488 xmlParseMarkupDecl(ctxt);
6491 * Pop-up of finished entities.
6493 while ((RAW == 0) && (ctxt->inputNr > 1))
6496 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6497 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6501 if (xmlParserDebugEntities) {
6502 if ((ctxt->input != NULL) && (ctxt->input->filename))
6503 xmlGenericError(xmlGenericErrorContext,
6504 "%s(%d): ", ctxt->input->filename,
6506 xmlGenericError(xmlGenericErrorContext,
6507 "Leaving INCLUDE Conditional Section\n");
6510 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6512 xmlParserInputState instate;
6518 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6520 if (ctxt->input->id != id) {
6521 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6522 "All markup of the conditional section is not in the same entity\n",
6527 if (xmlParserDebugEntities) {
6528 if ((ctxt->input != NULL) && (ctxt->input->filename))
6529 xmlGenericError(xmlGenericErrorContext,
6530 "%s(%d): ", ctxt->input->filename,
6532 xmlGenericError(xmlGenericErrorContext,
6533 "Entering IGNORE Conditional Section\n");
6537 * Parse up to the end of the conditional section
6538 * But disable SAX event generating DTD building in the meantime
6540 state = ctxt->disableSAX;
6541 instate = ctxt->instate;
6542 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6543 ctxt->instate = XML_PARSER_IGNORE;
6545 while ((depth >= 0) && (RAW != 0)) {
6546 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6551 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6552 if (--depth >= 0) SKIP(3);
6559 ctxt->disableSAX = state;
6560 ctxt->instate = instate;
6562 if (xmlParserDebugEntities) {
6563 if ((ctxt->input != NULL) && (ctxt->input->filename))
6564 xmlGenericError(xmlGenericErrorContext,
6565 "%s(%d): ", ctxt->input->filename,
6567 xmlGenericError(xmlGenericErrorContext,
6568 "Leaving IGNORE Conditional Section\n");
6572 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6579 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6581 if (ctxt->input->id != id) {
6582 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6583 "All markup of the conditional section is not in the same entity\n",
6591 * xmlParseMarkupDecl:
6592 * @ctxt: an XML parser context
6594 * parse Markup declarations
6596 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6597 * NotationDecl | PI | Comment
6599 * [ VC: Proper Declaration/PE Nesting ]
6600 * Parameter-entity replacement text must be properly nested with
6601 * markup declarations. That is to say, if either the first character
6602 * or the last character of a markup declaration (markupdecl above) is
6603 * contained in the replacement text for a parameter-entity reference,
6604 * both must be contained in the same replacement text.
6606 * [ WFC: PEs in Internal Subset ]
6607 * In the internal DTD subset, parameter-entity references can occur
6608 * only where markup declarations can occur, not within markup declarations.
6609 * (This does not apply to references that occur in external parameter
6610 * entities or to the external subset.)
6613 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6616 if (NXT(1) == '!') {
6620 xmlParseElementDecl(ctxt);
6621 else if (NXT(3) == 'N')
6622 xmlParseEntityDecl(ctxt);
6625 xmlParseAttributeListDecl(ctxt);
6628 xmlParseNotationDecl(ctxt);
6631 xmlParseComment(ctxt);
6634 /* there is an error but it will be detected later */
6637 } else if (NXT(1) == '?') {
6642 * This is only for internal subset. On external entities,
6643 * the replacement is done before parsing stage
6645 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6646 xmlParsePEReference(ctxt);
6649 * Conditional sections are allowed from entities included
6650 * by PE References in the internal subset.
6652 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6653 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6654 xmlParseConditionalSections(ctxt);
6658 ctxt->instate = XML_PARSER_DTD;
6663 * @ctxt: an XML parser context
6665 * parse an XML declaration header for external entities
6667 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6671 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6673 const xmlChar *encoding;
6676 * We know that '<?xml' is here.
6678 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6681 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6685 if (!IS_BLANK_CH(CUR)) {
6686 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6687 "Space needed after '<?xml'\n");
6692 * We may have the VersionInfo here.
6694 version = xmlParseVersionInfo(ctxt);
6695 if (version == NULL)
6696 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6698 if (!IS_BLANK_CH(CUR)) {
6699 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6700 "Space needed here\n");
6703 ctxt->input->version = version;
6706 * We must have the encoding declaration
6708 encoding = xmlParseEncodingDecl(ctxt);
6709 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6711 * The XML REC instructs us to stop parsing right here
6715 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6716 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6717 "Missing encoding in text declaration\n");
6721 if ((RAW == '?') && (NXT(1) == '>')) {
6723 } else if (RAW == '>') {
6724 /* Deprecated old WD ... */
6725 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6728 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6729 MOVETO_ENDTAG(CUR_PTR);
6735 * xmlParseExternalSubset:
6736 * @ctxt: an XML parser context
6737 * @ExternalID: the external identifier
6738 * @SystemID: the system identifier (or URL)
6740 * parse Markup declarations from an external subset
6742 * [30] extSubset ::= textDecl? extSubsetDecl
6744 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6747 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6748 const xmlChar *SystemID) {
6749 xmlDetectSAX2(ctxt);
6752 if ((ctxt->encoding == NULL) &&
6753 (ctxt->input->end - ctxt->input->cur >= 4)) {
6755 xmlCharEncoding enc;
6761 enc = xmlDetectCharEncoding(start, 4);
6762 if (enc != XML_CHAR_ENCODING_NONE)
6763 xmlSwitchEncoding(ctxt, enc);
6766 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6767 xmlParseTextDecl(ctxt);
6768 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6770 * The XML REC instructs us to stop parsing right here
6772 ctxt->instate = XML_PARSER_EOF;
6776 if (ctxt->myDoc == NULL) {
6777 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6778 if (ctxt->myDoc == NULL) {
6779 xmlErrMemory(ctxt, "New Doc failed");
6782 ctxt->myDoc->properties = XML_DOC_INTERNAL;
6784 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6785 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6787 ctxt->instate = XML_PARSER_DTD;
6789 while (((RAW == '<') && (NXT(1) == '?')) ||
6790 ((RAW == '<') && (NXT(1) == '!')) ||
6791 (RAW == '%') || IS_BLANK_CH(CUR)) {
6792 const xmlChar *check = CUR_PTR;
6793 unsigned int cons = ctxt->input->consumed;
6796 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6797 xmlParseConditionalSections(ctxt);
6798 } else if (IS_BLANK_CH(CUR)) {
6800 } else if (RAW == '%') {
6801 xmlParsePEReference(ctxt);
6803 xmlParseMarkupDecl(ctxt);
6806 * Pop-up of finished entities.
6808 while ((RAW == 0) && (ctxt->inputNr > 1))
6811 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6812 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6818 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6824 * xmlParseReference:
6825 * @ctxt: an XML parser context
6827 * parse and handle entity references in content, depending on the SAX
6828 * interface, this may end-up in a call to character() if this is a
6829 * CharRef, a predefined entity, if there is no reference() callback.
6830 * or if the parser was asked to switch to that mode.
6832 * [67] Reference ::= EntityRef | CharRef
6835 xmlParseReference(xmlParserCtxtPtr ctxt) {
6839 xmlNodePtr list = NULL;
6840 xmlParserErrors ret = XML_ERR_OK;
6847 * Simple case of a CharRef
6849 if (NXT(1) == '#') {
6853 int value = xmlParseCharRef(ctxt);
6857 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6859 * So we are using non-UTF-8 buffers
6860 * Check that the char fit on 8bits, if not
6861 * generate a CharRef.
6863 if (value <= 0xFF) {
6866 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6867 (!ctxt->disableSAX))
6868 ctxt->sax->characters(ctxt->userData, out, 1);
6870 if ((hex == 'x') || (hex == 'X'))
6871 snprintf((char *)out, sizeof(out), "#x%X", value);
6873 snprintf((char *)out, sizeof(out), "#%d", value);
6874 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6875 (!ctxt->disableSAX))
6876 ctxt->sax->reference(ctxt->userData, out);
6880 * Just encode the value in UTF-8
6882 COPY_BUF(0 ,out, i, value);
6884 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6885 (!ctxt->disableSAX))
6886 ctxt->sax->characters(ctxt->userData, out, i);
6892 * We are seeing an entity reference
6894 ent = xmlParseEntityRef(ctxt);
6895 if (ent == NULL) return;
6896 if (!ctxt->wellFormed)
6898 was_checked = ent->checked;
6900 /* special case of predefined entities */
6901 if ((ent->name == NULL) ||
6902 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6904 if (val == NULL) return;
6906 * inline the entity.
6908 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6909 (!ctxt->disableSAX))
6910 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6915 * The first reference to the entity trigger a parsing phase
6916 * where the ent->children is filled with the result from
6919 if (ent->checked == 0) {
6920 unsigned long oldnbent = ctxt->nbentities;
6923 * This is a bit hackish but this seems the best
6924 * way to make sure both SAX and DOM entity support
6928 if (ctxt->userData == ctxt)
6931 user_data = ctxt->userData;
6934 * Check that this entity is well formed
6935 * 4.3.2: An internal general parsed entity is well-formed
6936 * if its replacement text matches the production labeled
6939 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6941 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6945 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6947 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6948 user_data, ctxt->depth, ent->URI,
6949 ent->ExternalID, &list);
6952 ret = XML_ERR_ENTITY_PE_INTERNAL;
6953 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6954 "invalid entity type found\n", NULL);
6958 * Store the number of entities needing parsing for this entity
6959 * content and do checkings
6961 ent->checked = ctxt->nbentities - oldnbent;
6962 if (ret == XML_ERR_ENTITY_LOOP) {
6963 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6964 xmlFreeNodeList(list);
6967 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6968 xmlFreeNodeList(list);
6972 if ((ret == XML_ERR_OK) && (list != NULL)) {
6973 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6974 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6975 (ent->children == NULL)) {
6976 ent->children = list;
6977 if (ctxt->replaceEntities) {
6979 * Prune it directly in the generated document
6980 * except for single text nodes.
6982 if (((list->type == XML_TEXT_NODE) &&
6983 (list->next == NULL)) ||
6984 (ctxt->parseMode == XML_PARSE_READER)) {
6985 list->parent = (xmlNodePtr) ent;
6990 while (list != NULL) {
6991 list->parent = (xmlNodePtr) ctxt->node;
6992 list->doc = ctxt->myDoc;
6993 if (list->next == NULL)
6997 list = ent->children;
6998 #ifdef LIBXML_LEGACY_ENABLED
6999 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7000 xmlAddEntityReference(ent, list, NULL);
7001 #endif /* LIBXML_LEGACY_ENABLED */
7005 while (list != NULL) {
7006 list->parent = (xmlNodePtr) ent;
7007 xmlSetTreeDoc(list, ent->doc);
7008 if (list->next == NULL)
7014 xmlFreeNodeList(list);
7017 } else if ((ret != XML_ERR_OK) &&
7018 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7019 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7020 "Entity '%s' failed to parse\n", ent->name);
7021 } else if (list != NULL) {
7022 xmlFreeNodeList(list);
7025 if (ent->checked == 0)
7027 } else if (ent->checked != 1) {
7028 ctxt->nbentities += ent->checked;
7032 * Now that the entity content has been gathered
7033 * provide it to the application, this can take different forms based
7034 * on the parsing modes.
7036 if (ent->children == NULL) {
7038 * Probably running in SAX mode and the callbacks don't
7039 * build the entity content. So unless we already went
7040 * though parsing for first checking go though the entity
7041 * content to generate callbacks associated to the entity
7043 if (was_checked != 0) {
7046 * This is a bit hackish but this seems the best
7047 * way to make sure both SAX and DOM entity support
7050 if (ctxt->userData == ctxt)
7053 user_data = ctxt->userData;
7055 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7057 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7058 ent->content, user_data, NULL);
7060 } else if (ent->etype ==
7061 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7063 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7064 ctxt->sax, user_data, ctxt->depth,
7065 ent->URI, ent->ExternalID, NULL);
7068 ret = XML_ERR_ENTITY_PE_INTERNAL;
7069 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7070 "invalid entity type found\n", NULL);
7072 if (ret == XML_ERR_ENTITY_LOOP) {
7073 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7077 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7078 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7080 * Entity reference callback comes second, it's somewhat
7081 * superfluous but a compatibility to historical behaviour
7083 ctxt->sax->reference(ctxt->userData, ent->name);
7089 * If we didn't get any children for the entity being built
7091 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7092 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7096 ctxt->sax->reference(ctxt->userData, ent->name);
7100 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7102 * There is a problem on the handling of _private for entities
7103 * (bug 155816): Should we copy the content of the field from
7104 * the entity (possibly overwriting some value set by the user
7105 * when a copy is created), should we leave it alone, or should
7106 * we try to take care of different situations? The problem
7107 * is exacerbated by the usage of this field by the xmlReader.
7108 * To fix this bug, we look at _private on the created node
7109 * and, if it's NULL, we copy in whatever was in the entity.
7110 * If it's not NULL we leave it alone. This is somewhat of a
7111 * hack - maybe we should have further tests to determine
7114 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7116 * Seems we are generating the DOM content, do
7117 * a simple tree copy for all references except the first
7118 * In the first occurrence list contains the replacement.
7119 * progressive == 2 means we are operating on the Reader
7120 * and since nodes are discarded we must copy all the time.
7122 if (((list == NULL) && (ent->owner == 0)) ||
7123 (ctxt->parseMode == XML_PARSE_READER)) {
7124 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7127 * when operating on a reader, the entities definitions
7128 * are always owning the entities subtree.
7129 if (ctxt->parseMode == XML_PARSE_READER)
7133 cur = ent->children;
7134 while (cur != NULL) {
7135 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7137 if (nw->_private == NULL)
7138 nw->_private = cur->_private;
7139 if (firstChild == NULL){
7142 nw = xmlAddChild(ctxt->node, nw);
7144 if (cur == ent->last) {
7146 * needed to detect some strange empty
7147 * node cases in the reader tests
7149 if ((ctxt->parseMode == XML_PARSE_READER) &&
7151 (nw->type == XML_ELEMENT_NODE) &&
7152 (nw->children == NULL))
7159 #ifdef LIBXML_LEGACY_ENABLED
7160 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7161 xmlAddEntityReference(ent, firstChild, nw);
7162 #endif /* LIBXML_LEGACY_ENABLED */
7163 } else if (list == NULL) {
7164 xmlNodePtr nw = NULL, cur, next, last,
7167 * Copy the entity child list and make it the new
7168 * entity child list. The goal is to make sure any
7169 * ID or REF referenced will be the one from the
7170 * document content and not the entity copy.
7172 cur = ent->children;
7173 ent->children = NULL;
7176 while (cur != NULL) {
7180 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7182 if (nw->_private == NULL)
7183 nw->_private = cur->_private;
7184 if (firstChild == NULL){
7187 xmlAddChild((xmlNodePtr) ent, nw);
7188 xmlAddChild(ctxt->node, cur);
7194 if (ent->owner == 0)
7196 #ifdef LIBXML_LEGACY_ENABLED
7197 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7198 xmlAddEntityReference(ent, firstChild, nw);
7199 #endif /* LIBXML_LEGACY_ENABLED */
7201 const xmlChar *nbktext;
7204 * the name change is to avoid coalescing of the
7205 * node with a possible previous text one which
7206 * would make ent->children a dangling pointer
7208 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7210 if (ent->children->type == XML_TEXT_NODE)
7211 ent->children->name = nbktext;
7212 if ((ent->last != ent->children) &&
7213 (ent->last->type == XML_TEXT_NODE))
7214 ent->last->name = nbktext;
7215 xmlAddChildList(ctxt->node, ent->children);
7219 * This is to avoid a nasty side effect, see
7220 * characters() in SAX.c
7230 * xmlParseEntityRef:
7231 * @ctxt: an XML parser context
7233 * parse ENTITY references declarations
7235 * [68] EntityRef ::= '&' Name ';'
7237 * [ WFC: Entity Declared ]
7238 * In a document without any DTD, a document with only an internal DTD
7239 * subset which contains no parameter entity references, or a document
7240 * with "standalone='yes'", the Name given in the entity reference
7241 * must match that in an entity declaration, except that well-formed
7242 * documents need not declare any of the following entities: amp, lt,
7243 * gt, apos, quot. The declaration of a parameter entity must precede
7244 * any reference to it. Similarly, the declaration of a general entity
7245 * must precede any reference to it which appears in a default value in an
7246 * attribute-list declaration. Note that if entities are declared in the
7247 * external subset or in external parameter entities, a non-validating
7248 * processor is not obligated to read and process their declarations;
7249 * for such documents, the rule that an entity must be declared is a
7250 * well-formedness constraint only if standalone='yes'.
7252 * [ WFC: Parsed Entity ]
7253 * An entity reference must not contain the name of an unparsed entity
7255 * Returns the xmlEntityPtr if found, or NULL otherwise.
7258 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7259 const xmlChar *name;
7260 xmlEntityPtr ent = NULL;
7267 name = xmlParseName(ctxt);
7269 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7270 "xmlParseEntityRef: no name\n");
7274 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7280 * Predefined entites override any extra definition
7282 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7283 ent = xmlGetPredefinedEntity(name);
7289 * Increate the number of entity references parsed
7294 * Ask first SAX for entity resolution, otherwise try the
7295 * entities which may have stored in the parser context.
7297 if (ctxt->sax != NULL) {
7298 if (ctxt->sax->getEntity != NULL)
7299 ent = ctxt->sax->getEntity(ctxt->userData, name);
7300 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7301 (ctxt->options & XML_PARSE_OLDSAX))
7302 ent = xmlGetPredefinedEntity(name);
7303 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7304 (ctxt->userData==ctxt)) {
7305 ent = xmlSAX2GetEntity(ctxt, name);
7309 * [ WFC: Entity Declared ]
7310 * In a document without any DTD, a document with only an
7311 * internal DTD subset which contains no parameter entity
7312 * references, or a document with "standalone='yes'", the
7313 * Name given in the entity reference must match that in an
7314 * entity declaration, except that well-formed documents
7315 * need not declare any of the following entities: amp, lt,
7317 * The declaration of a parameter entity must precede any
7319 * Similarly, the declaration of a general entity must
7320 * precede any reference to it which appears in a default
7321 * value in an attribute-list declaration. Note that if
7322 * entities are declared in the external subset or in
7323 * external parameter entities, a non-validating processor
7324 * is not obligated to read and process their declarations;
7325 * for such documents, the rule that an entity must be
7326 * declared is a well-formedness constraint only if
7330 if ((ctxt->standalone == 1) ||
7331 ((ctxt->hasExternalSubset == 0) &&
7332 (ctxt->hasPErefs == 0))) {
7333 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7334 "Entity '%s' not defined\n", name);
7336 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7337 "Entity '%s' not defined\n", name);
7338 if ((ctxt->inSubset == 0) &&
7339 (ctxt->sax != NULL) &&
7340 (ctxt->sax->reference != NULL)) {
7341 ctxt->sax->reference(ctxt->userData, name);
7348 * [ WFC: Parsed Entity ]
7349 * An entity reference must not contain the name of an
7352 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7353 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7354 "Entity reference to unparsed entity %s\n", name);
7358 * [ WFC: No External Entity References ]
7359 * Attribute values cannot contain direct or indirect
7360 * entity references to external entities.
7362 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7363 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7364 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7365 "Attribute references external entity '%s'\n", name);
7368 * [ WFC: No < in Attribute Values ]
7369 * The replacement text of any entity referred to directly or
7370 * indirectly in an attribute value (other than "<") must
7373 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7374 (ent != NULL) && (ent->content != NULL) &&
7375 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7376 (xmlStrchr(ent->content, '<'))) {
7377 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7378 "'<' in entity '%s' is not allowed in attributes values\n", name);
7382 * Internal check, no parameter entities here ...
7385 switch (ent->etype) {
7386 case XML_INTERNAL_PARAMETER_ENTITY:
7387 case XML_EXTERNAL_PARAMETER_ENTITY:
7388 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7389 "Attempt to reference the parameter entity '%s'\n",
7398 * [ WFC: No Recursion ]
7399 * A parsed entity must not contain a recursive reference
7400 * to itself, either directly or indirectly.
7401 * Done somewhere else
7407 * xmlParseStringEntityRef:
7408 * @ctxt: an XML parser context
7409 * @str: a pointer to an index in the string
7411 * parse ENTITY references declarations, but this version parses it from
7414 * [68] EntityRef ::= '&' Name ';'
7416 * [ WFC: Entity Declared ]
7417 * In a document without any DTD, a document with only an internal DTD
7418 * subset which contains no parameter entity references, or a document
7419 * with "standalone='yes'", the Name given in the entity reference
7420 * must match that in an entity declaration, except that well-formed
7421 * documents need not declare any of the following entities: amp, lt,
7422 * gt, apos, quot. The declaration of a parameter entity must precede
7423 * any reference to it. Similarly, the declaration of a general entity
7424 * must precede any reference to it which appears in a default value in an
7425 * attribute-list declaration. Note that if entities are declared in the
7426 * external subset or in external parameter entities, a non-validating
7427 * processor is not obligated to read and process their declarations;
7428 * for such documents, the rule that an entity must be declared is a
7429 * well-formedness constraint only if standalone='yes'.
7431 * [ WFC: Parsed Entity ]
7432 * An entity reference must not contain the name of an unparsed entity
7434 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7435 * is updated to the current location in the string.
7438 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7442 xmlEntityPtr ent = NULL;
7444 if ((str == NULL) || (*str == NULL))
7452 name = xmlParseStringName(ctxt, &ptr);
7454 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7455 "xmlParseStringEntityRef: no name\n");
7460 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7469 * Predefined entites override any extra definition
7471 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7472 ent = xmlGetPredefinedEntity(name);
7481 * Increate the number of entity references parsed
7486 * Ask first SAX for entity resolution, otherwise try the
7487 * entities which may have stored in the parser context.
7489 if (ctxt->sax != NULL) {
7490 if (ctxt->sax->getEntity != NULL)
7491 ent = ctxt->sax->getEntity(ctxt->userData, name);
7492 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7493 ent = xmlGetPredefinedEntity(name);
7494 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7495 ent = xmlSAX2GetEntity(ctxt, name);
7500 * [ WFC: Entity Declared ]
7501 * In a document without any DTD, a document with only an
7502 * internal DTD subset which contains no parameter entity
7503 * references, or a document with "standalone='yes'", the
7504 * Name given in the entity reference must match that in an
7505 * entity declaration, except that well-formed documents
7506 * need not declare any of the following entities: amp, lt,
7508 * The declaration of a parameter entity must precede any
7510 * Similarly, the declaration of a general entity must
7511 * precede any reference to it which appears in a default
7512 * value in an attribute-list declaration. Note that if
7513 * entities are declared in the external subset or in
7514 * external parameter entities, a non-validating processor
7515 * is not obligated to read and process their declarations;
7516 * for such documents, the rule that an entity must be
7517 * declared is a well-formedness constraint only if
7521 if ((ctxt->standalone == 1) ||
7522 ((ctxt->hasExternalSubset == 0) &&
7523 (ctxt->hasPErefs == 0))) {
7524 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7525 "Entity '%s' not defined\n", name);
7527 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7528 "Entity '%s' not defined\n",
7531 /* TODO ? check regressions ctxt->valid = 0; */
7535 * [ WFC: Parsed Entity ]
7536 * An entity reference must not contain the name of an
7539 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7540 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7541 "Entity reference to unparsed entity %s\n", name);
7545 * [ WFC: No External Entity References ]
7546 * Attribute values cannot contain direct or indirect
7547 * entity references to external entities.
7549 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7550 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7551 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7552 "Attribute references external entity '%s'\n", name);
7555 * [ WFC: No < in Attribute Values ]
7556 * The replacement text of any entity referred to directly or
7557 * indirectly in an attribute value (other than "<") must
7560 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7561 (ent != NULL) && (ent->content != NULL) &&
7562 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7563 (xmlStrchr(ent->content, '<'))) {
7564 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7565 "'<' in entity '%s' is not allowed in attributes values\n",
7570 * Internal check, no parameter entities here ...
7573 switch (ent->etype) {
7574 case XML_INTERNAL_PARAMETER_ENTITY:
7575 case XML_EXTERNAL_PARAMETER_ENTITY:
7576 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7577 "Attempt to reference the parameter entity '%s'\n",
7586 * [ WFC: No Recursion ]
7587 * A parsed entity must not contain a recursive reference
7588 * to itself, either directly or indirectly.
7589 * Done somewhere else
7598 * xmlParsePEReference:
7599 * @ctxt: an XML parser context
7601 * parse PEReference declarations
7602 * The entity content is handled directly by pushing it's content as
7603 * a new input stream.
7605 * [69] PEReference ::= '%' Name ';'
7607 * [ WFC: No Recursion ]
7608 * A parsed entity must not contain a recursive
7609 * reference to itself, either directly or indirectly.
7611 * [ WFC: Entity Declared ]
7612 * In a document without any DTD, a document with only an internal DTD
7613 * subset which contains no parameter entity references, or a document
7614 * with "standalone='yes'", ... ... The declaration of a parameter
7615 * entity must precede any reference to it...
7617 * [ VC: Entity Declared ]
7618 * In a document with an external subset or external parameter entities
7619 * with "standalone='no'", ... ... The declaration of a parameter entity
7620 * must precede any reference to it...
7623 * Parameter-entity references may only appear in the DTD.
7624 * NOTE: misleading but this is handled.
7627 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7629 const xmlChar *name;
7630 xmlEntityPtr entity = NULL;
7631 xmlParserInputPtr input;
7636 name = xmlParseName(ctxt);
7638 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7639 "xmlParsePEReference: no name\n");
7643 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7650 * Increate the number of entity references parsed
7655 * Request the entity from SAX
7657 if ((ctxt->sax != NULL) &&
7658 (ctxt->sax->getParameterEntity != NULL))
7659 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7661 if (entity == NULL) {
7663 * [ WFC: Entity Declared ]
7664 * In a document without any DTD, a document with only an
7665 * internal DTD subset which contains no parameter entity
7666 * references, or a document with "standalone='yes'", ...
7667 * ... The declaration of a parameter entity must precede
7668 * any reference to it...
7670 if ((ctxt->standalone == 1) ||
7671 ((ctxt->hasExternalSubset == 0) &&
7672 (ctxt->hasPErefs == 0))) {
7673 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7674 "PEReference: %%%s; not found\n",
7678 * [ VC: Entity Declared ]
7679 * In a document with an external subset or external
7680 * parameter entities with "standalone='no'", ...
7681 * ... The declaration of a parameter entity must
7682 * precede any reference to it...
7684 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7685 "PEReference: %%%s; not found\n",
7691 * Internal checking in case the entity quest barfed
7693 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7694 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7695 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7696 "Internal: %%%s; is not a parameter entity\n",
7698 } else if (ctxt->input->free != deallocblankswrapper) {
7699 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7700 if (xmlPushInput(ctxt, input) < 0)
7705 * handle the extra spaces added before and after
7706 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7708 input = xmlNewEntityInputStream(ctxt, entity);
7709 if (xmlPushInput(ctxt, input) < 0)
7711 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7712 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7713 (IS_BLANK_CH(NXT(5)))) {
7714 xmlParseTextDecl(ctxt);
7716 XML_ERR_UNSUPPORTED_ENCODING) {
7718 * The XML REC instructs us to stop parsing
7721 ctxt->instate = XML_PARSER_EOF;
7727 ctxt->hasPErefs = 1;
7731 * xmlLoadEntityContent:
7732 * @ctxt: an XML parser context
7733 * @entity: an unloaded system entity
7735 * Load the original content of the given system entity from the
7736 * ExternalID/SystemID given. This is to be used for Included in Literal
7737 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7739 * Returns 0 in case of success and -1 in case of failure
7742 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7743 xmlParserInputPtr input;
7748 if ((ctxt == NULL) || (entity == NULL) ||
7749 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7750 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7751 (entity->content != NULL)) {
7752 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7753 "xmlLoadEntityContent parameter error");
7757 if (xmlParserDebugEntities)
7758 xmlGenericError(xmlGenericErrorContext,
7759 "Reading %s entity content input\n", entity->name);
7761 buf = xmlBufferCreate();
7763 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7764 "xmlLoadEntityContent parameter error");
7768 input = xmlNewEntityInputStream(ctxt, entity);
7769 if (input == NULL) {
7770 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7771 "xmlLoadEntityContent input error");
7777 * Push the entity as the current input, read char by char
7778 * saving to the buffer until the end of the entity or an error
7780 if (xmlPushInput(ctxt, input) < 0) {
7787 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7789 xmlBufferAdd(buf, ctxt->input->cur, l);
7790 if (count++ > 100) {
7798 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7800 } else if (!IS_CHAR(c)) {
7801 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7802 "xmlLoadEntityContent: invalid char value %d\n",
7807 entity->content = buf->content;
7808 buf->content = NULL;
7815 * xmlParseStringPEReference:
7816 * @ctxt: an XML parser context
7817 * @str: a pointer to an index in the string
7819 * parse PEReference declarations
7821 * [69] PEReference ::= '%' Name ';'
7823 * [ WFC: No Recursion ]
7824 * A parsed entity must not contain a recursive
7825 * reference to itself, either directly or indirectly.
7827 * [ WFC: Entity Declared ]
7828 * In a document without any DTD, a document with only an internal DTD
7829 * subset which contains no parameter entity references, or a document
7830 * with "standalone='yes'", ... ... The declaration of a parameter
7831 * entity must precede any reference to it...
7833 * [ VC: Entity Declared ]
7834 * In a document with an external subset or external parameter entities
7835 * with "standalone='no'", ... ... The declaration of a parameter entity
7836 * must precede any reference to it...
7839 * Parameter-entity references may only appear in the DTD.
7840 * NOTE: misleading but this is handled.
7842 * Returns the string of the entity content.
7843 * str is updated to the current value of the index
7846 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7850 xmlEntityPtr entity = NULL;
7852 if ((str == NULL) || (*str == NULL)) return(NULL);
7858 name = xmlParseStringName(ctxt, &ptr);
7860 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7861 "xmlParseStringPEReference: no name\n");
7867 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7875 * Increate the number of entity references parsed
7880 * Request the entity from SAX
7882 if ((ctxt->sax != NULL) &&
7883 (ctxt->sax->getParameterEntity != NULL))
7884 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7886 if (entity == NULL) {
7888 * [ WFC: Entity Declared ]
7889 * In a document without any DTD, a document with only an
7890 * internal DTD subset which contains no parameter entity
7891 * references, or a document with "standalone='yes'", ...
7892 * ... The declaration of a parameter entity must precede
7893 * any reference to it...
7895 if ((ctxt->standalone == 1) ||
7896 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7897 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7898 "PEReference: %%%s; not found\n", name);
7901 * [ VC: Entity Declared ]
7902 * In a document with an external subset or external
7903 * parameter entities with "standalone='no'", ...
7904 * ... The declaration of a parameter entity must
7905 * precede any reference to it...
7907 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7908 "PEReference: %%%s; not found\n",
7914 * Internal checking in case the entity quest barfed
7916 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7917 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7918 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7919 "%%%s; is not a parameter entity\n",
7923 ctxt->hasPErefs = 1;
7930 * xmlParseDocTypeDecl:
7931 * @ctxt: an XML parser context
7933 * parse a DOCTYPE declaration
7935 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7936 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7938 * [ VC: Root Element Type ]
7939 * The Name in the document type declaration must match the element
7940 * type of the root element.
7944 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
7945 const xmlChar *name = NULL;
7946 xmlChar *ExternalID = NULL;
7947 xmlChar *URI = NULL;
7950 * We know that '<!DOCTYPE' has been detected.
7957 * Parse the DOCTYPE name.
7959 name = xmlParseName(ctxt);
7961 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7962 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7964 ctxt->intSubName = name;
7969 * Check for SystemID and ExternalID
7971 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7973 if ((URI != NULL) || (ExternalID != NULL)) {
7974 ctxt->hasExternalSubset = 1;
7976 ctxt->extSubURI = URI;
7977 ctxt->extSubSystem = ExternalID;
7982 * Create and update the internal subset.
7984 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7985 (!ctxt->disableSAX))
7986 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7989 * Is there any internal subset declarations ?
7990 * they are handled separately in xmlParseInternalSubset()
7996 * We should be at the end of the DOCTYPE declaration.
7999 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8005 * xmlParseInternalSubset:
8006 * @ctxt: an XML parser context
8008 * parse the internal subset declaration
8010 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8014 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8016 * Is there any DTD definition ?
8019 ctxt->instate = XML_PARSER_DTD;
8022 * Parse the succession of Markup declarations and
8024 * Subsequence (markupdecl | PEReference | S)*
8026 while (RAW != ']') {
8027 const xmlChar *check = CUR_PTR;
8028 unsigned int cons = ctxt->input->consumed;
8031 xmlParseMarkupDecl(ctxt);
8032 xmlParsePEReference(ctxt);
8035 * Pop-up of finished entities.
8037 while ((RAW == 0) && (ctxt->inputNr > 1))
8040 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8041 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8042 "xmlParseInternalSubset: error detected in Markup declaration\n");
8053 * We should be at the end of the DOCTYPE declaration.
8056 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8061 #ifdef LIBXML_SAX1_ENABLED
8063 * xmlParseAttribute:
8064 * @ctxt: an XML parser context
8065 * @value: a xmlChar ** used to store the value of the attribute
8067 * parse an attribute
8069 * [41] Attribute ::= Name Eq AttValue
8071 * [ WFC: No External Entity References ]
8072 * Attribute values cannot contain direct or indirect entity references
8073 * to external entities.
8075 * [ WFC: No < in Attribute Values ]
8076 * The replacement text of any entity referred to directly or indirectly in
8077 * an attribute value (other than "<") must not contain a <.
8079 * [ VC: Attribute Value Type ]
8080 * The attribute must have been declared; the value must be of the type
8083 * [25] Eq ::= S? '=' S?
8087 * [NS 11] Attribute ::= QName Eq AttValue
8089 * Also the case QName == xmlns:??? is handled independently as a namespace
8092 * Returns the attribute name, and the value in *value.
8096 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8097 const xmlChar *name;
8102 name = xmlParseName(ctxt);
8104 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8105 "error parsing attribute name\n");
8116 val = xmlParseAttValue(ctxt);
8117 ctxt->instate = XML_PARSER_CONTENT;
8119 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8120 "Specification mandate value for attribute %s\n", name);
8125 * Check that xml:lang conforms to the specification
8126 * No more registered as an error, just generate a warning now
8127 * since this was deprecated in XML second edition
8129 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8130 if (!xmlCheckLanguageID(val)) {
8131 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8132 "Malformed value for xml:lang : %s\n",
8138 * Check that xml:space conforms to the specification
8140 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8141 if (xmlStrEqual(val, BAD_CAST "default"))
8143 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8146 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8147 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8158 * @ctxt: an XML parser context
8160 * parse a start of tag either for rule element or
8161 * EmptyElement. In both case we don't parse the tag closing chars.
8163 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8165 * [ WFC: Unique Att Spec ]
8166 * No attribute name may appear more than once in the same start-tag or
8167 * empty-element tag.
8169 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8171 * [ WFC: Unique Att Spec ]
8172 * No attribute name may appear more than once in the same start-tag or
8173 * empty-element tag.
8177 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8179 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8181 * Returns the element name parsed
8185 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8186 const xmlChar *name;
8187 const xmlChar *attname;
8189 const xmlChar **atts = ctxt->atts;
8191 int maxatts = ctxt->maxatts;
8194 if (RAW != '<') return(NULL);
8197 name = xmlParseName(ctxt);
8199 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8200 "xmlParseStartTag: invalid element name\n");
8205 * Now parse the attributes, it ends up with the ending
8212 while ((RAW != '>') &&
8213 ((RAW != '/') || (NXT(1) != '>')) &&
8214 (IS_BYTE_CHAR(RAW))) {
8215 const xmlChar *q = CUR_PTR;
8216 unsigned int cons = ctxt->input->consumed;
8218 attname = xmlParseAttribute(ctxt, &attvalue);
8219 if ((attname != NULL) && (attvalue != NULL)) {
8221 * [ WFC: Unique Att Spec ]
8222 * No attribute name may appear more than once in the same
8223 * start-tag or empty-element tag.
8225 for (i = 0; i < nbatts;i += 2) {
8226 if (xmlStrEqual(atts[i], attname)) {
8227 xmlErrAttributeDup(ctxt, NULL, attname);
8233 * Add the pair to atts
8236 maxatts = 22; /* allow for 10 attrs by default */
8237 atts = (const xmlChar **)
8238 xmlMalloc(maxatts * sizeof(xmlChar *));
8240 xmlErrMemory(ctxt, NULL);
8241 if (attvalue != NULL)
8246 ctxt->maxatts = maxatts;
8247 } else if (nbatts + 4 > maxatts) {
8251 n = (const xmlChar **) xmlRealloc((void *) atts,
8252 maxatts * sizeof(const xmlChar *));
8254 xmlErrMemory(ctxt, NULL);
8255 if (attvalue != NULL)
8261 ctxt->maxatts = maxatts;
8263 atts[nbatts++] = attname;
8264 atts[nbatts++] = attvalue;
8265 atts[nbatts] = NULL;
8266 atts[nbatts + 1] = NULL;
8268 if (attvalue != NULL)
8275 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8277 if (!IS_BLANK_CH(RAW)) {
8278 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8279 "attributes construct error\n");
8282 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8283 (attname == NULL) && (attvalue == NULL)) {
8284 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8285 "xmlParseStartTag: problem parsing attributes\n");
8293 * SAX: Start of Element !
8295 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8296 (!ctxt->disableSAX)) {
8298 ctxt->sax->startElement(ctxt->userData, name, atts);
8300 ctxt->sax->startElement(ctxt->userData, name, NULL);
8304 /* Free only the content strings */
8305 for (i = 1;i < nbatts;i+=2)
8306 if (atts[i] != NULL)
8307 xmlFree((xmlChar *) atts[i]);
8314 * @ctxt: an XML parser context
8315 * @line: line of the start tag
8316 * @nsNr: number of namespaces on the start tag
8318 * parse an end of tag
8320 * [42] ETag ::= '</' Name S? '>'
8324 * [NS 9] ETag ::= '</' QName S? '>'
8328 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8329 const xmlChar *name;
8332 if ((RAW != '<') || (NXT(1) != '/')) {
8333 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8334 "xmlParseEndTag: '</' not found\n");
8339 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8342 * We should definitely be at the ending "S? '>'" part
8346 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8347 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8352 * [ WFC: Element Type Match ]
8353 * The Name in an element's end-tag must match the element type in the
8357 if (name != (xmlChar*)1) {
8358 if (name == NULL) name = BAD_CAST "unparseable";
8359 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8360 "Opening and ending tag mismatch: %s line %d and %s\n",
8361 ctxt->name, line, name);
8367 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8368 (!ctxt->disableSAX))
8369 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8378 * @ctxt: an XML parser context
8380 * parse an end of tag
8382 * [42] ETag ::= '</' Name S? '>'
8386 * [NS 9] ETag ::= '</' QName S? '>'
8390 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8391 xmlParseEndTag1(ctxt, 0);
8393 #endif /* LIBXML_SAX1_ENABLED */
8395 /************************************************************************
8397 * SAX 2 specific operations *
8399 ************************************************************************/
8403 * @ctxt: an XML parser context
8404 * @prefix: the prefix to lookup
8406 * Lookup the namespace name for the @prefix (which ca be NULL)
8407 * The prefix must come from the @ctxt->dict dictionnary
8409 * Returns the namespace name or NULL if not bound
8411 static const xmlChar *
8412 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8415 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8416 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8417 if (ctxt->nsTab[i] == prefix) {
8418 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8420 return(ctxt->nsTab[i + 1]);
8427 * @ctxt: an XML parser context
8428 * @prefix: pointer to store the prefix part
8430 * parse an XML Namespace QName
8432 * [6] QName ::= (Prefix ':')? LocalPart
8433 * [7] Prefix ::= NCName
8434 * [8] LocalPart ::= NCName
8436 * Returns the Name parsed or NULL
8439 static const xmlChar *
8440 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8441 const xmlChar *l, *p;
8445 l = xmlParseNCName(ctxt);
8448 l = xmlParseName(ctxt);
8450 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8451 "Failed to parse QName '%s'\n", l, NULL, NULL);
8461 l = xmlParseNCName(ctxt);
8465 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8466 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8467 l = xmlParseNmtoken(ctxt);
8469 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8471 tmp = xmlBuildQName(l, p, NULL, 0);
8474 p = xmlDictLookup(ctxt->dict, tmp, -1);
8475 if (tmp != NULL) xmlFree(tmp);
8482 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8483 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8485 tmp = (xmlChar *) xmlParseName(ctxt);
8487 tmp = xmlBuildQName(tmp, l, NULL, 0);
8488 l = xmlDictLookup(ctxt->dict, tmp, -1);
8489 if (tmp != NULL) xmlFree(tmp);
8493 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8494 l = xmlDictLookup(ctxt->dict, tmp, -1);
8495 if (tmp != NULL) xmlFree(tmp);
8506 * xmlParseQNameAndCompare:
8507 * @ctxt: an XML parser context
8508 * @name: the localname
8509 * @prefix: the prefix, if any.
8511 * parse an XML name and compares for match
8512 * (specialized for endtag parsing)
8514 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8515 * and the name for mismatch
8518 static const xmlChar *
8519 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8520 xmlChar const *prefix) {
8524 const xmlChar *prefix2;
8526 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8529 in = ctxt->input->cur;
8532 while (*in != 0 && *in == *cmp) {
8536 if ((*cmp == 0) && (*in == ':')) {
8539 while (*in != 0 && *in == *cmp) {
8543 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8545 ctxt->input->cur = in;
8546 return((const xmlChar*) 1);
8550 * all strings coms from the dictionary, equality can be done directly
8552 ret = xmlParseQName (ctxt, &prefix2);
8553 if ((ret == name) && (prefix == prefix2))
8554 return((const xmlChar*) 1);
8559 * xmlParseAttValueInternal:
8560 * @ctxt: an XML parser context
8561 * @len: attribute len result
8562 * @alloc: whether the attribute was reallocated as a new string
8563 * @normalize: if 1 then further non-CDATA normalization must be done
8565 * parse a value for an attribute.
8566 * NOTE: if no normalization is needed, the routine will return pointers
8567 * directly from the data buffer.
8569 * 3.3.3 Attribute-Value Normalization:
8570 * Before the value of an attribute is passed to the application or
8571 * checked for validity, the XML processor must normalize it as follows:
8572 * - a character reference is processed by appending the referenced
8573 * character to the attribute value
8574 * - an entity reference is processed by recursively processing the
8575 * replacement text of the entity
8576 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8577 * appending #x20 to the normalized value, except that only a single
8578 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8579 * parsed entity or the literal entity value of an internal parsed entity
8580 * - other characters are processed by appending them to the normalized value
8581 * If the declared value is not CDATA, then the XML processor must further
8582 * process the normalized attribute value by discarding any leading and
8583 * trailing space (#x20) characters, and by replacing sequences of space
8584 * (#x20) characters by a single space (#x20) character.
8585 * All attributes for which no declaration has been read should be treated
8586 * by a non-validating parser as if declared CDATA.
8588 * Returns the AttValue parsed or NULL. The value has to be freed by the
8589 * caller if it was copied, this can be detected by val[*len] == 0.
8593 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8597 const xmlChar *in = NULL, *start, *end, *last;
8598 xmlChar *ret = NULL;
8601 in = (xmlChar *) CUR_PTR;
8602 if (*in != '"' && *in != '\'') {
8603 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8606 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8609 * try to handle in this routine the most common case where no
8610 * allocation of a new string is required and where content is
8614 end = ctxt->input->end;
8617 const xmlChar *oldbase = ctxt->input->base;
8619 if (oldbase != ctxt->input->base) {
8620 long delta = ctxt->input->base - oldbase;
8621 start = start + delta;
8624 end = ctxt->input->end;
8628 * Skip any leading spaces
8630 while ((in < end) && (*in != limit) &&
8631 ((*in == 0x20) || (*in == 0x9) ||
8632 (*in == 0xA) || (*in == 0xD))) {
8636 const xmlChar *oldbase = ctxt->input->base;
8638 if (oldbase != ctxt->input->base) {
8639 long delta = ctxt->input->base - oldbase;
8640 start = start + delta;
8643 end = ctxt->input->end;
8646 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8647 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8648 if ((*in++ == 0x20) && (*in == 0x20)) break;
8650 const xmlChar *oldbase = ctxt->input->base;
8652 if (oldbase != ctxt->input->base) {
8653 long delta = ctxt->input->base - oldbase;
8654 start = start + delta;
8657 end = ctxt->input->end;
8662 * skip the trailing blanks
8664 while ((last[-1] == 0x20) && (last > start)) last--;
8665 while ((in < end) && (*in != limit) &&
8666 ((*in == 0x20) || (*in == 0x9) ||
8667 (*in == 0xA) || (*in == 0xD))) {
8670 const xmlChar *oldbase = ctxt->input->base;
8672 if (oldbase != ctxt->input->base) {
8673 long delta = ctxt->input->base - oldbase;
8674 start = start + delta;
8676 last = last + delta;
8678 end = ctxt->input->end;
8681 if (*in != limit) goto need_complex;
8683 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8684 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8687 const xmlChar *oldbase = ctxt->input->base;
8689 if (oldbase != ctxt->input->base) {
8690 long delta = ctxt->input->base - oldbase;
8691 start = start + delta;
8694 end = ctxt->input->end;
8698 if (*in != limit) goto need_complex;
8702 *len = last - start;
8703 ret = (xmlChar *) start;
8705 if (alloc) *alloc = 1;
8706 ret = xmlStrndup(start, last - start);
8709 if (alloc) *alloc = 0;
8712 if (alloc) *alloc = 1;
8713 return xmlParseAttValueComplex(ctxt, len, normalize);
8717 * xmlParseAttribute2:
8718 * @ctxt: an XML parser context
8719 * @pref: the element prefix
8720 * @elem: the element name
8721 * @prefix: a xmlChar ** used to store the value of the attribute prefix
8722 * @value: a xmlChar ** used to store the value of the attribute
8723 * @len: an int * to save the length of the attribute
8724 * @alloc: an int * to indicate if the attribute was allocated
8726 * parse an attribute in the new SAX2 framework.
8728 * Returns the attribute name, and the value in *value, .
8731 static const xmlChar *
8732 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8733 const xmlChar * pref, const xmlChar * elem,
8734 const xmlChar ** prefix, xmlChar ** value,
8735 int *len, int *alloc)
8737 const xmlChar *name;
8738 xmlChar *val, *internal_val = NULL;
8743 name = xmlParseQName(ctxt, prefix);
8745 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8746 "error parsing attribute name\n");
8751 * get the type if needed
8753 if (ctxt->attsSpecial != NULL) {
8756 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
8757 pref, elem, *prefix, name);
8769 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8772 * Sometimes a second normalisation pass for spaces is needed
8773 * but that only happens if charrefs or entities refernces
8774 * have been used in the attribute value, i.e. the attribute
8775 * value have been extracted in an allocated string already.
8778 const xmlChar *val2;
8780 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8781 if ((val2 != NULL) && (val2 != val)) {
8783 val = (xmlChar *) val2;
8787 ctxt->instate = XML_PARSER_CONTENT;
8789 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8790 "Specification mandate value for attribute %s\n",
8795 if (*prefix == ctxt->str_xml) {
8797 * Check that xml:lang conforms to the specification
8798 * No more registered as an error, just generate a warning now
8799 * since this was deprecated in XML second edition
8801 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8802 internal_val = xmlStrndup(val, *len);
8803 if (!xmlCheckLanguageID(internal_val)) {
8804 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8805 "Malformed value for xml:lang : %s\n",
8806 internal_val, NULL);
8811 * Check that xml:space conforms to the specification
8813 if (xmlStrEqual(name, BAD_CAST "space")) {
8814 internal_val = xmlStrndup(val, *len);
8815 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8817 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8820 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8821 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8822 internal_val, NULL);
8826 xmlFree(internal_val);
8834 * xmlParseStartTag2:
8835 * @ctxt: an XML parser context
8837 * parse a start of tag either for rule element or
8838 * EmptyElement. In both case we don't parse the tag closing chars.
8839 * This routine is called when running SAX2 parsing
8841 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8843 * [ WFC: Unique Att Spec ]
8844 * No attribute name may appear more than once in the same start-tag or
8845 * empty-element tag.
8847 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8849 * [ WFC: Unique Att Spec ]
8850 * No attribute name may appear more than once in the same start-tag or
8851 * empty-element tag.
8855 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8857 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8859 * Returns the element name parsed
8862 static const xmlChar *
8863 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8864 const xmlChar **URI, int *tlen) {
8865 const xmlChar *localname;
8866 const xmlChar *prefix;
8867 const xmlChar *attname;
8868 const xmlChar *aprefix;
8869 const xmlChar *nsname;
8871 const xmlChar **atts = ctxt->atts;
8872 int maxatts = ctxt->maxatts;
8873 int nratts, nbatts, nbdef;
8874 int i, j, nbNs, attval, oldline, oldcol;
8875 const xmlChar *base;
8877 int nsNr = ctxt->nsNr;
8879 if (RAW != '<') return(NULL);
8883 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8884 * point since the attribute values may be stored as pointers to
8885 * the buffer and calling SHRINK would destroy them !
8886 * The Shrinking is only possible once the full set of attribute
8887 * callbacks have been done.
8891 base = ctxt->input->base;
8892 cur = ctxt->input->cur - ctxt->input->base;
8893 oldline = ctxt->input->line;
8894 oldcol = ctxt->input->col;
8900 /* Forget any namespaces added during an earlier parse of this element. */
8903 localname = xmlParseQName(ctxt, &prefix);
8904 if (localname == NULL) {
8905 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8906 "StartTag: invalid element name\n");
8909 *tlen = ctxt->input->cur - ctxt->input->base - cur;
8912 * Now parse the attributes, it ends up with the ending
8918 if (ctxt->input->base != base) goto base_changed;
8920 while ((RAW != '>') &&
8921 ((RAW != '/') || (NXT(1) != '>')) &&
8922 (IS_BYTE_CHAR(RAW))) {
8923 const xmlChar *q = CUR_PTR;
8924 unsigned int cons = ctxt->input->consumed;
8925 int len = -1, alloc = 0;
8927 attname = xmlParseAttribute2(ctxt, prefix, localname,
8928 &aprefix, &attvalue, &len, &alloc);
8929 if (ctxt->input->base != base) {
8930 if ((attvalue != NULL) && (alloc != 0))
8935 if ((attname != NULL) && (attvalue != NULL)) {
8936 if (len < 0) len = xmlStrlen(attvalue);
8937 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8938 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8942 uri = xmlParseURI((const char *) URL);
8944 xmlNsErr(ctxt, XML_WAR_NS_URI,
8945 "xmlns: '%s' is not a valid URI\n",
8948 if (uri->scheme == NULL) {
8949 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8950 "xmlns: URI %s is not absolute\n",
8955 if (URL == ctxt->str_xml_ns) {
8956 if (attname != ctxt->str_xml) {
8957 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8958 "xml namespace URI cannot be the default namespace\n",
8961 goto skip_default_ns;
8965 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8966 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8967 "reuse of the xmlns namespace name is forbidden\n",
8969 goto skip_default_ns;
8973 * check that it's not a defined namespace
8975 for (j = 1;j <= nbNs;j++)
8976 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8979 xmlErrAttributeDup(ctxt, NULL, attname);
8981 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8983 if (alloc != 0) xmlFree(attvalue);
8987 if (aprefix == ctxt->str_xmlns) {
8988 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8991 if (attname == ctxt->str_xml) {
8992 if (URL != ctxt->str_xml_ns) {
8993 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8994 "xml namespace prefix mapped to wrong URI\n",
8998 * Do not keep a namespace definition node
9002 if (URL == ctxt->str_xml_ns) {
9003 if (attname != ctxt->str_xml) {
9004 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9005 "xml namespace URI mapped to wrong prefix\n",
9010 if (attname == ctxt->str_xmlns) {
9011 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9012 "redefinition of the xmlns prefix is forbidden\n",
9018 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9019 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9020 "reuse of the xmlns namespace name is forbidden\n",
9024 if ((URL == NULL) || (URL[0] == 0)) {
9025 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9026 "xmlns:%s: Empty XML namespace is not allowed\n",
9027 attname, NULL, NULL);
9030 uri = xmlParseURI((const char *) URL);
9032 xmlNsErr(ctxt, XML_WAR_NS_URI,
9033 "xmlns:%s: '%s' is not a valid URI\n",
9034 attname, URL, NULL);
9036 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9037 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9038 "xmlns:%s: URI %s is not absolute\n",
9039 attname, URL, NULL);
9046 * check that it's not a defined namespace
9048 for (j = 1;j <= nbNs;j++)
9049 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9052 xmlErrAttributeDup(ctxt, aprefix, attname);
9054 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9056 if (alloc != 0) xmlFree(attvalue);
9058 if (ctxt->input->base != base) goto base_changed;
9063 * Add the pair to atts
9065 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9066 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9067 if (attvalue[len] == 0)
9071 maxatts = ctxt->maxatts;
9074 ctxt->attallocs[nratts++] = alloc;
9075 atts[nbatts++] = attname;
9076 atts[nbatts++] = aprefix;
9077 atts[nbatts++] = NULL; /* the URI will be fetched later */
9078 atts[nbatts++] = attvalue;
9080 atts[nbatts++] = attvalue;
9082 * tag if some deallocation is needed
9084 if (alloc != 0) attval = 1;
9086 if ((attvalue != NULL) && (attvalue[len] == 0))
9093 if (ctxt->input->base != base) goto base_changed;
9094 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9096 if (!IS_BLANK_CH(RAW)) {
9097 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9098 "attributes construct error\n");
9102 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9103 (attname == NULL) && (attvalue == NULL)) {
9104 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9105 "xmlParseStartTag: problem parsing attributes\n");
9109 if (ctxt->input->base != base) goto base_changed;
9113 * The attributes defaulting
9115 if (ctxt->attsDefault != NULL) {
9116 xmlDefAttrsPtr defaults;
9118 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9119 if (defaults != NULL) {
9120 for (i = 0;i < defaults->nbAttrs;i++) {
9121 attname = defaults->values[5 * i];
9122 aprefix = defaults->values[5 * i + 1];
9125 * special work for namespaces defaulted defs
9127 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9129 * check that it's not a defined namespace
9131 for (j = 1;j <= nbNs;j++)
9132 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9134 if (j <= nbNs) continue;
9136 nsname = xmlGetNamespace(ctxt, NULL);
9137 if (nsname != defaults->values[5 * i + 2]) {
9138 if (nsPush(ctxt, NULL,
9139 defaults->values[5 * i + 2]) > 0)
9142 } else if (aprefix == ctxt->str_xmlns) {
9144 * check that it's not a defined namespace
9146 for (j = 1;j <= nbNs;j++)
9147 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9149 if (j <= nbNs) continue;
9151 nsname = xmlGetNamespace(ctxt, attname);
9152 if (nsname != defaults->values[2]) {
9153 if (nsPush(ctxt, attname,
9154 defaults->values[5 * i + 2]) > 0)
9159 * check that it's not a defined attribute
9161 for (j = 0;j < nbatts;j+=5) {
9162 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9165 if (j < nbatts) continue;
9167 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9168 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9171 maxatts = ctxt->maxatts;
9174 atts[nbatts++] = attname;
9175 atts[nbatts++] = aprefix;
9176 if (aprefix == NULL)
9177 atts[nbatts++] = NULL;
9179 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9180 atts[nbatts++] = defaults->values[5 * i + 2];
9181 atts[nbatts++] = defaults->values[5 * i + 3];
9182 if ((ctxt->standalone == 1) &&
9183 (defaults->values[5 * i + 4] != NULL)) {
9184 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9185 "standalone: attribute %s on %s defaulted from external subset\n",
9186 attname, localname);
9195 * The attributes checkings
9197 for (i = 0; i < nbatts;i += 5) {
9199 * The default namespace does not apply to attribute names.
9201 if (atts[i + 1] != NULL) {
9202 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9203 if (nsname == NULL) {
9204 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9205 "Namespace prefix %s for %s on %s is not defined\n",
9206 atts[i + 1], atts[i], localname);
9208 atts[i + 2] = nsname;
9212 * [ WFC: Unique Att Spec ]
9213 * No attribute name may appear more than once in the same
9214 * start-tag or empty-element tag.
9215 * As extended by the Namespace in XML REC.
9217 for (j = 0; j < i;j += 5) {
9218 if (atts[i] == atts[j]) {
9219 if (atts[i+1] == atts[j+1]) {
9220 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9223 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9224 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9225 "Namespaced Attribute %s in '%s' redefined\n",
9226 atts[i], nsname, NULL);
9233 nsname = xmlGetNamespace(ctxt, prefix);
9234 if ((prefix != NULL) && (nsname == NULL)) {
9235 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9236 "Namespace prefix %s on %s is not defined\n",
9237 prefix, localname, NULL);
9243 * SAX: Start of Element !
9245 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9246 (!ctxt->disableSAX)) {
9248 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9249 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9250 nbatts / 5, nbdef, atts);
9252 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9253 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9257 * Free up attribute allocated strings if needed
9260 for (i = 3,j = 0; j < nratts;i += 5,j++)
9261 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9262 xmlFree((xmlChar *) atts[i]);
9269 * the attribute strings are valid iif the base didn't changed
9272 for (i = 3,j = 0; j < nratts;i += 5,j++)
9273 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9274 xmlFree((xmlChar *) atts[i]);
9276 ctxt->input->cur = ctxt->input->base + cur;
9277 ctxt->input->line = oldline;
9278 ctxt->input->col = oldcol;
9279 if (ctxt->wellFormed == 1) {
9287 * @ctxt: an XML parser context
9288 * @line: line of the start tag
9289 * @nsNr: number of namespaces on the start tag
9291 * parse an end of tag
9293 * [42] ETag ::= '</' Name S? '>'
9297 * [NS 9] ETag ::= '</' QName S? '>'
9301 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9302 const xmlChar *URI, int line, int nsNr, int tlen) {
9303 const xmlChar *name;
9306 if ((RAW != '<') || (NXT(1) != '/')) {
9307 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9312 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9313 if (ctxt->input->cur[tlen] == '>') {
9314 ctxt->input->cur += tlen + 1;
9317 ctxt->input->cur += tlen;
9321 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9323 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9327 * We should definitely be at the ending "S? '>'" part
9331 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9332 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9337 * [ WFC: Element Type Match ]
9338 * The Name in an element's end-tag must match the element type in the
9342 if (name != (xmlChar*)1) {
9343 if (name == NULL) name = BAD_CAST "unparseable";
9344 if ((line == 0) && (ctxt->node != NULL))
9345 line = ctxt->node->line;
9346 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9347 "Opening and ending tag mismatch: %s line %d and %s\n",
9348 ctxt->name, line, name);
9355 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9356 (!ctxt->disableSAX))
9357 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9367 * @ctxt: an XML parser context
9369 * Parse escaped pure raw content.
9371 * [18] CDSect ::= CDStart CData CDEnd
9373 * [19] CDStart ::= '<![CDATA['
9375 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9377 * [21] CDEnd ::= ']]>'
9380 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9381 xmlChar *buf = NULL;
9383 int size = XML_PARSER_BUFFER_SIZE;
9389 /* Check 2.6.0 was NXT(0) not RAW */
9390 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9395 ctxt->instate = XML_PARSER_CDATA_SECTION;
9398 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9399 ctxt->instate = XML_PARSER_CONTENT;
9405 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9406 ctxt->instate = XML_PARSER_CONTENT;
9411 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9413 xmlErrMemory(ctxt, NULL);
9416 while (IS_CHAR(cur) &&
9417 ((r != ']') || (s != ']') || (cur != '>'))) {
9418 if (len + 5 >= size) {
9422 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9425 xmlErrMemory(ctxt, NULL);
9430 COPY_BUF(rl,buf,len,r);
9444 ctxt->instate = XML_PARSER_CONTENT;
9446 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9447 "CData section not finished\n%.50s\n", buf);
9454 * OK the buffer is to be consumed as cdata.
9456 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9457 if (ctxt->sax->cdataBlock != NULL)
9458 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9459 else if (ctxt->sax->characters != NULL)
9460 ctxt->sax->characters(ctxt->userData, buf, len);
9467 * @ctxt: an XML parser context
9471 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9475 xmlParseContent(xmlParserCtxtPtr ctxt) {
9477 while ((RAW != 0) &&
9478 ((RAW != '<') || (NXT(1) != '/')) &&
9479 (ctxt->instate != XML_PARSER_EOF)) {
9480 const xmlChar *test = CUR_PTR;
9481 unsigned int cons = ctxt->input->consumed;
9482 const xmlChar *cur = ctxt->input->cur;
9485 * First case : a Processing Instruction.
9487 if ((*cur == '<') && (cur[1] == '?')) {
9492 * Second case : a CDSection
9494 /* 2.6.0 test was *cur not RAW */
9495 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9496 xmlParseCDSect(ctxt);
9500 * Third case : a comment
9502 else if ((*cur == '<') && (NXT(1) == '!') &&
9503 (NXT(2) == '-') && (NXT(3) == '-')) {
9504 xmlParseComment(ctxt);
9505 ctxt->instate = XML_PARSER_CONTENT;
9509 * Fourth case : a sub-element.
9511 else if (*cur == '<') {
9512 xmlParseElement(ctxt);
9516 * Fifth case : a reference. If if has not been resolved,
9517 * parsing returns it's Name, create the node
9520 else if (*cur == '&') {
9521 xmlParseReference(ctxt);
9525 * Last case, text. Note that References are handled directly.
9528 xmlParseCharData(ctxt, 0);
9533 * Pop-up of finished entities.
9535 while ((RAW == 0) && (ctxt->inputNr > 1))
9539 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9540 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9541 "detected an error in element content\n");
9542 ctxt->instate = XML_PARSER_EOF;
9550 * @ctxt: an XML parser context
9552 * parse an XML element, this is highly recursive
9554 * [39] element ::= EmptyElemTag | STag content ETag
9556 * [ WFC: Element Type Match ]
9557 * The Name in an element's end-tag must match the element type in the
9563 xmlParseElement(xmlParserCtxtPtr ctxt) {
9564 const xmlChar *name;
9565 const xmlChar *prefix = NULL;
9566 const xmlChar *URI = NULL;
9567 xmlParserNodeInfo node_info;
9570 int nsNr = ctxt->nsNr;
9572 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9573 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9574 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9575 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9577 ctxt->instate = XML_PARSER_EOF;
9581 /* Capture start position */
9582 if (ctxt->record_info) {
9583 node_info.begin_pos = ctxt->input->consumed +
9584 (CUR_PTR - ctxt->input->base);
9585 node_info.begin_line = ctxt->input->line;
9588 if (ctxt->spaceNr == 0)
9589 spacePush(ctxt, -1);
9590 else if (*ctxt->space == -2)
9591 spacePush(ctxt, -1);
9593 spacePush(ctxt, *ctxt->space);
9595 line = ctxt->input->line;
9596 #ifdef LIBXML_SAX1_ENABLED
9598 #endif /* LIBXML_SAX1_ENABLED */
9599 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9600 #ifdef LIBXML_SAX1_ENABLED
9602 name = xmlParseStartTag(ctxt);
9603 #endif /* LIBXML_SAX1_ENABLED */
9604 if (ctxt->instate == XML_PARSER_EOF)
9610 namePush(ctxt, name);
9613 #ifdef LIBXML_VALID_ENABLED
9615 * [ VC: Root Element Type ]
9616 * The Name in the document type declaration must match the element
9617 * type of the root element.
9619 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9620 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9621 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9622 #endif /* LIBXML_VALID_ENABLED */
9625 * Check for an Empty Element.
9627 if ((RAW == '/') && (NXT(1) == '>')) {
9630 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9631 (!ctxt->disableSAX))
9632 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9633 #ifdef LIBXML_SAX1_ENABLED
9635 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9636 (!ctxt->disableSAX))
9637 ctxt->sax->endElement(ctxt->userData, name);
9638 #endif /* LIBXML_SAX1_ENABLED */
9642 if (nsNr != ctxt->nsNr)
9643 nsPop(ctxt, ctxt->nsNr - nsNr);
9644 if ( ret != NULL && ctxt->record_info ) {
9645 node_info.end_pos = ctxt->input->consumed +
9646 (CUR_PTR - ctxt->input->base);
9647 node_info.end_line = ctxt->input->line;
9648 node_info.node = ret;
9649 xmlParserAddNodeInfo(ctxt, &node_info);
9656 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9657 "Couldn't find end of Start Tag %s line %d\n",
9661 * end of parsing of this node.
9666 if (nsNr != ctxt->nsNr)
9667 nsPop(ctxt, ctxt->nsNr - nsNr);
9670 * Capture end position and add node
9672 if ( ret != NULL && ctxt->record_info ) {
9673 node_info.end_pos = ctxt->input->consumed +
9674 (CUR_PTR - ctxt->input->base);
9675 node_info.end_line = ctxt->input->line;
9676 node_info.node = ret;
9677 xmlParserAddNodeInfo(ctxt, &node_info);
9683 * Parse the content of the element:
9685 xmlParseContent(ctxt);
9686 if (!IS_BYTE_CHAR(RAW)) {
9687 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9688 "Premature end of data in tag %s line %d\n",
9692 * end of parsing of this node.
9697 if (nsNr != ctxt->nsNr)
9698 nsPop(ctxt, ctxt->nsNr - nsNr);
9703 * parse the end of tag: '</' should be here.
9706 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
9709 #ifdef LIBXML_SAX1_ENABLED
9711 xmlParseEndTag1(ctxt, line);
9712 #endif /* LIBXML_SAX1_ENABLED */
9715 * Capture end position and add node
9717 if ( ret != NULL && ctxt->record_info ) {
9718 node_info.end_pos = ctxt->input->consumed +
9719 (CUR_PTR - ctxt->input->base);
9720 node_info.end_line = ctxt->input->line;
9721 node_info.node = ret;
9722 xmlParserAddNodeInfo(ctxt, &node_info);
9727 * xmlParseVersionNum:
9728 * @ctxt: an XML parser context
9730 * parse the XML version value.
9732 * [26] VersionNum ::= '1.' [0-9]+
9734 * In practice allow [0-9].[0-9]+ at that level
9736 * Returns the string giving the XML version number, or NULL
9739 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9740 xmlChar *buf = NULL;
9745 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9747 xmlErrMemory(ctxt, NULL);
9751 if (!((cur >= '0') && (cur <= '9'))) {
9765 while ((cur >= '0') && (cur <= '9')) {
9766 if (len + 1 >= size) {
9770 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9773 xmlErrMemory(ctxt, NULL);
9787 * xmlParseVersionInfo:
9788 * @ctxt: an XML parser context
9790 * parse the XML version.
9792 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9794 * [25] Eq ::= S? '=' S?
9796 * Returns the version string, e.g. "1.0"
9800 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9801 xmlChar *version = NULL;
9803 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9807 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9814 version = xmlParseVersionNum(ctxt);
9816 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9819 } else if (RAW == '\''){
9821 version = xmlParseVersionNum(ctxt);
9823 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9827 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9835 * @ctxt: an XML parser context
9837 * parse the XML encoding name
9839 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9841 * Returns the encoding name value or NULL
9844 xmlParseEncName(xmlParserCtxtPtr ctxt) {
9845 xmlChar *buf = NULL;
9851 if (((cur >= 'a') && (cur <= 'z')) ||
9852 ((cur >= 'A') && (cur <= 'Z'))) {
9853 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9855 xmlErrMemory(ctxt, NULL);
9862 while (((cur >= 'a') && (cur <= 'z')) ||
9863 ((cur >= 'A') && (cur <= 'Z')) ||
9864 ((cur >= '0') && (cur <= '9')) ||
9865 (cur == '.') || (cur == '_') ||
9867 if (len + 1 >= size) {
9871 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9873 xmlErrMemory(ctxt, NULL);
9890 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
9896 * xmlParseEncodingDecl:
9897 * @ctxt: an XML parser context
9899 * parse the XML encoding declaration
9901 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9903 * this setups the conversion filters.
9905 * Returns the encoding value or NULL
9909 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9910 xmlChar *encoding = NULL;
9913 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9917 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9924 encoding = xmlParseEncName(ctxt);
9926 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9929 } else if (RAW == '\''){
9931 encoding = xmlParseEncName(ctxt);
9933 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9937 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9941 * Non standard parsing, allowing the user to ignore encoding
9943 if (ctxt->options & XML_PARSE_IGNORE_ENC)
9947 * UTF-16 encoding stwich has already taken place at this stage,
9948 * more over the little-endian/big-endian selection is already done
9950 if ((encoding != NULL) &&
9951 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9952 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
9954 * If no encoding was passed to the parser, that we are
9955 * using UTF-16 and no decoder is present i.e. the
9956 * document is apparently UTF-8 compatible, then raise an
9957 * encoding mismatch fatal error
9959 if ((ctxt->encoding == NULL) &&
9960 (ctxt->input->buf != NULL) &&
9961 (ctxt->input->buf->encoder == NULL)) {
9962 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9963 "Document labelled UTF-16 but has UTF-8 content\n");
9965 if (ctxt->encoding != NULL)
9966 xmlFree((xmlChar *) ctxt->encoding);
9967 ctxt->encoding = encoding;
9970 * UTF-8 encoding is handled natively
9972 else if ((encoding != NULL) &&
9973 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9974 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
9975 if (ctxt->encoding != NULL)
9976 xmlFree((xmlChar *) ctxt->encoding);
9977 ctxt->encoding = encoding;
9979 else if (encoding != NULL) {
9980 xmlCharEncodingHandlerPtr handler;
9982 if (ctxt->input->encoding != NULL)
9983 xmlFree((xmlChar *) ctxt->input->encoding);
9984 ctxt->input->encoding = encoding;
9986 handler = xmlFindCharEncodingHandler((const char *) encoding);
9987 if (handler != NULL) {
9988 xmlSwitchToEncoding(ctxt, handler);
9990 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
9991 "Unsupported encoding %s\n", encoding);
10001 * @ctxt: an XML parser context
10003 * parse the XML standalone declaration
10005 * [32] SDDecl ::= S 'standalone' Eq
10006 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10008 * [ VC: Standalone Document Declaration ]
10009 * TODO The standalone document declaration must have the value "no"
10010 * if any external markup declarations contain declarations of:
10011 * - attributes with default values, if elements to which these
10012 * attributes apply appear in the document without specifications
10013 * of values for these attributes, or
10014 * - entities (other than amp, lt, gt, apos, quot), if references
10015 * to those entities appear in the document, or
10016 * - attributes with values subject to normalization, where the
10017 * attribute appears in the document with a value which will change
10018 * as a result of normalization, or
10019 * - element types with element content, if white space occurs directly
10020 * within any instance of those types.
10023 * 1 if standalone="yes"
10024 * 0 if standalone="no"
10025 * -2 if standalone attribute is missing or invalid
10026 * (A standalone value of -2 means that the XML declaration was found,
10027 * but no value was specified for the standalone attribute).
10031 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10032 int standalone = -2;
10035 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10039 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10040 return(standalone);
10046 if ((RAW == 'n') && (NXT(1) == 'o')) {
10049 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10054 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10057 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10060 } else if (RAW == '"'){
10062 if ((RAW == 'n') && (NXT(1) == 'o')) {
10065 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10070 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10073 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10077 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10080 return(standalone);
10085 * @ctxt: an XML parser context
10087 * parse an XML declaration header
10089 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10093 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10097 * This value for standalone indicates that the document has an
10098 * XML declaration but it does not have a standalone attribute.
10099 * It will be overwritten later if a standalone attribute is found.
10101 ctxt->input->standalone = -2;
10104 * We know that '<?xml' is here.
10108 if (!IS_BLANK_CH(RAW)) {
10109 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10110 "Blank needed after '<?xml'\n");
10115 * We must have the VersionInfo here.
10117 version = xmlParseVersionInfo(ctxt);
10118 if (version == NULL) {
10119 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10121 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10123 * Changed here for XML-1.0 5th edition
10125 if (ctxt->options & XML_PARSE_OLD10) {
10126 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10127 "Unsupported version '%s'\n",
10130 if ((version[0] == '1') && ((version[1] == '.'))) {
10131 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10132 "Unsupported version '%s'\n",
10135 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10136 "Unsupported version '%s'\n",
10141 if (ctxt->version != NULL)
10142 xmlFree((void *) ctxt->version);
10143 ctxt->version = version;
10147 * We may have the encoding declaration
10149 if (!IS_BLANK_CH(RAW)) {
10150 if ((RAW == '?') && (NXT(1) == '>')) {
10154 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10156 xmlParseEncodingDecl(ctxt);
10157 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10159 * The XML REC instructs us to stop parsing right here
10165 * We may have the standalone status.
10167 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10168 if ((RAW == '?') && (NXT(1) == '>')) {
10172 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10176 * We can grow the input buffer freely at that point
10181 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10184 if ((RAW == '?') && (NXT(1) == '>')) {
10186 } else if (RAW == '>') {
10187 /* Deprecated old WD ... */
10188 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10191 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10192 MOVETO_ENDTAG(CUR_PTR);
10199 * @ctxt: an XML parser context
10201 * parse an XML Misc* optional field.
10203 * [27] Misc ::= Comment | PI | S
10207 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10208 while (((RAW == '<') && (NXT(1) == '?')) ||
10209 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10210 IS_BLANK_CH(CUR)) {
10211 if ((RAW == '<') && (NXT(1) == '?')) {
10213 } else if (IS_BLANK_CH(CUR)) {
10216 xmlParseComment(ctxt);
10221 * xmlParseDocument:
10222 * @ctxt: an XML parser context
10224 * parse an XML document (and build a tree if using the standard SAX
10227 * [1] document ::= prolog element Misc*
10229 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10231 * Returns 0, -1 in case of error. the parser context is augmented
10232 * as a result of the parsing.
10236 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10238 xmlCharEncoding enc;
10242 if ((ctxt == NULL) || (ctxt->input == NULL))
10248 * SAX: detecting the level.
10250 xmlDetectSAX2(ctxt);
10253 * SAX: beginning of the document processing.
10255 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10256 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10258 if ((ctxt->encoding == NULL) &&
10259 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10261 * Get the 4 first bytes and decode the charset
10262 * if enc != XML_CHAR_ENCODING_NONE
10263 * plug some encoding conversion routines.
10269 enc = xmlDetectCharEncoding(&start[0], 4);
10270 if (enc != XML_CHAR_ENCODING_NONE) {
10271 xmlSwitchEncoding(ctxt, enc);
10277 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10281 * Check for the XMLDecl in the Prolog.
10282 * do not GROW here to avoid the detected encoder to decode more
10283 * than just the first line, unless the amount of data is really
10284 * too small to hold "<?xml version="1.0" encoding="foo"
10286 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10289 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10292 * Note that we will switch encoding on the fly.
10294 xmlParseXMLDecl(ctxt);
10295 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10297 * The XML REC instructs us to stop parsing right here
10301 ctxt->standalone = ctxt->input->standalone;
10304 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10306 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10307 ctxt->sax->startDocument(ctxt->userData);
10310 * The Misc part of the Prolog
10313 xmlParseMisc(ctxt);
10316 * Then possibly doc type declaration(s) and more Misc
10317 * (doctypedecl Misc*)?
10320 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10322 ctxt->inSubset = 1;
10323 xmlParseDocTypeDecl(ctxt);
10325 ctxt->instate = XML_PARSER_DTD;
10326 xmlParseInternalSubset(ctxt);
10330 * Create and update the external subset.
10332 ctxt->inSubset = 2;
10333 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10334 (!ctxt->disableSAX))
10335 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10336 ctxt->extSubSystem, ctxt->extSubURI);
10337 ctxt->inSubset = 0;
10339 xmlCleanSpecialAttr(ctxt);
10341 ctxt->instate = XML_PARSER_PROLOG;
10342 xmlParseMisc(ctxt);
10346 * Time to start parsing the tree itself
10350 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10351 "Start tag expected, '<' not found\n");
10353 ctxt->instate = XML_PARSER_CONTENT;
10354 xmlParseElement(ctxt);
10355 ctxt->instate = XML_PARSER_EPILOG;
10359 * The Misc part at the end
10361 xmlParseMisc(ctxt);
10364 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10366 ctxt->instate = XML_PARSER_EOF;
10370 * SAX: end of the document processing.
10372 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10373 ctxt->sax->endDocument(ctxt->userData);
10376 * Remove locally kept entity definitions if the tree was not built
10378 if ((ctxt->myDoc != NULL) &&
10379 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10380 xmlFreeDoc(ctxt->myDoc);
10381 ctxt->myDoc = NULL;
10384 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10385 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10387 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10388 if (ctxt->nsWellFormed)
10389 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10390 if (ctxt->options & XML_PARSE_OLD10)
10391 ctxt->myDoc->properties |= XML_DOC_OLD10;
10393 if (! ctxt->wellFormed) {
10401 * xmlParseExtParsedEnt:
10402 * @ctxt: an XML parser context
10404 * parse a general parsed entity
10405 * An external general parsed entity is well-formed if it matches the
10406 * production labeled extParsedEnt.
10408 * [78] extParsedEnt ::= TextDecl? content
10410 * Returns 0, -1 in case of error. the parser context is augmented
10411 * as a result of the parsing.
10415 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10417 xmlCharEncoding enc;
10419 if ((ctxt == NULL) || (ctxt->input == NULL))
10422 xmlDefaultSAXHandlerInit();
10424 xmlDetectSAX2(ctxt);
10429 * SAX: beginning of the document processing.
10431 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10432 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10435 * Get the 4 first bytes and decode the charset
10436 * if enc != XML_CHAR_ENCODING_NONE
10437 * plug some encoding conversion routines.
10439 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10444 enc = xmlDetectCharEncoding(start, 4);
10445 if (enc != XML_CHAR_ENCODING_NONE) {
10446 xmlSwitchEncoding(ctxt, enc);
10452 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10456 * Check for the XMLDecl in the Prolog.
10459 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10462 * Note that we will switch encoding on the fly.
10464 xmlParseXMLDecl(ctxt);
10465 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10467 * The XML REC instructs us to stop parsing right here
10473 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10475 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10476 ctxt->sax->startDocument(ctxt->userData);
10479 * Doing validity checking on chunk doesn't make sense
10481 ctxt->instate = XML_PARSER_CONTENT;
10482 ctxt->validate = 0;
10483 ctxt->loadsubset = 0;
10486 xmlParseContent(ctxt);
10488 if ((RAW == '<') && (NXT(1) == '/')) {
10489 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10490 } else if (RAW != 0) {
10491 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10495 * SAX: end of the document processing.
10497 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10498 ctxt->sax->endDocument(ctxt->userData);
10500 if (! ctxt->wellFormed) return(-1);
10504 #ifdef LIBXML_PUSH_ENABLED
10505 /************************************************************************
10507 * Progressive parsing interfaces *
10509 ************************************************************************/
10512 * xmlParseLookupSequence:
10513 * @ctxt: an XML parser context
10514 * @first: the first char to lookup
10515 * @next: the next char to lookup or zero
10516 * @third: the next char to lookup or zero
10518 * Try to find if a sequence (first, next, third) or just (first next) or
10519 * (first) is available in the input stream.
10520 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10521 * to avoid rescanning sequences of bytes, it DOES change the state of the
10522 * parser, do not use liberally.
10524 * Returns the index to the current parsing point if the full sequence
10525 * is available, -1 otherwise.
10528 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10529 xmlChar next, xmlChar third) {
10531 xmlParserInputPtr in;
10532 const xmlChar *buf;
10535 if (in == NULL) return(-1);
10536 base = in->cur - in->base;
10537 if (base < 0) return(-1);
10538 if (ctxt->checkIndex > base)
10539 base = ctxt->checkIndex;
10540 if (in->buf == NULL) {
10544 buf = in->buf->buffer->content;
10545 len = in->buf->buffer->use;
10547 /* take into account the sequence length */
10548 if (third) len -= 2;
10549 else if (next) len --;
10550 for (;base < len;base++) {
10551 if (buf[base] == first) {
10553 if ((buf[base + 1] != next) ||
10554 (buf[base + 2] != third)) continue;
10555 } else if (next != 0) {
10556 if (buf[base + 1] != next) continue;
10558 ctxt->checkIndex = 0;
10561 xmlGenericError(xmlGenericErrorContext,
10562 "PP: lookup '%c' found at %d\n",
10564 else if (third == 0)
10565 xmlGenericError(xmlGenericErrorContext,
10566 "PP: lookup '%c%c' found at %d\n",
10567 first, next, base);
10569 xmlGenericError(xmlGenericErrorContext,
10570 "PP: lookup '%c%c%c' found at %d\n",
10571 first, next, third, base);
10573 return(base - (in->cur - in->base));
10576 ctxt->checkIndex = base;
10579 xmlGenericError(xmlGenericErrorContext,
10580 "PP: lookup '%c' failed\n", first);
10581 else if (third == 0)
10582 xmlGenericError(xmlGenericErrorContext,
10583 "PP: lookup '%c%c' failed\n", first, next);
10585 xmlGenericError(xmlGenericErrorContext,
10586 "PP: lookup '%c%c%c' failed\n", first, next, third);
10592 * xmlParseGetLasts:
10593 * @ctxt: an XML parser context
10594 * @lastlt: pointer to store the last '<' from the input
10595 * @lastgt: pointer to store the last '>' from the input
10597 * Lookup the last < and > in the current chunk
10600 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10601 const xmlChar **lastgt) {
10602 const xmlChar *tmp;
10604 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10605 xmlGenericError(xmlGenericErrorContext,
10606 "Internal error: xmlParseGetLasts\n");
10609 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10610 tmp = ctxt->input->end;
10612 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10613 if (tmp < ctxt->input->base) {
10619 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10620 if (*tmp == '\'') {
10622 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10623 if (tmp < ctxt->input->end) tmp++;
10624 } else if (*tmp == '"') {
10626 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10627 if (tmp < ctxt->input->end) tmp++;
10631 if (tmp < ctxt->input->end)
10636 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10637 if (tmp >= ctxt->input->base)
10649 * xmlCheckCdataPush:
10650 * @cur: pointer to the bock of characters
10651 * @len: length of the block in bytes
10653 * Check that the block of characters is okay as SCdata content [20]
10655 * Returns the number of bytes to pass if okay, a negative index where an
10656 * UTF-8 error occured otherwise
10659 xmlCheckCdataPush(const xmlChar *utf, int len) {
10664 if ((utf == NULL) || (len <= 0))
10667 for (ix = 0; ix < len;) { /* string is 0-terminated */
10669 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10672 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10676 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10677 if (ix + 2 > len) return(ix);
10678 if ((utf[ix+1] & 0xc0 ) != 0x80)
10680 codepoint = (utf[ix] & 0x1f) << 6;
10681 codepoint |= utf[ix+1] & 0x3f;
10682 if (!xmlIsCharQ(codepoint))
10685 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10686 if (ix + 3 > len) return(ix);
10687 if (((utf[ix+1] & 0xc0) != 0x80) ||
10688 ((utf[ix+2] & 0xc0) != 0x80))
10690 codepoint = (utf[ix] & 0xf) << 12;
10691 codepoint |= (utf[ix+1] & 0x3f) << 6;
10692 codepoint |= utf[ix+2] & 0x3f;
10693 if (!xmlIsCharQ(codepoint))
10696 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10697 if (ix + 4 > len) return(ix);
10698 if (((utf[ix+1] & 0xc0) != 0x80) ||
10699 ((utf[ix+2] & 0xc0) != 0x80) ||
10700 ((utf[ix+3] & 0xc0) != 0x80))
10702 codepoint = (utf[ix] & 0x7) << 18;
10703 codepoint |= (utf[ix+1] & 0x3f) << 12;
10704 codepoint |= (utf[ix+2] & 0x3f) << 6;
10705 codepoint |= utf[ix+3] & 0x3f;
10706 if (!xmlIsCharQ(codepoint))
10709 } else /* unknown encoding */
10716 * xmlParseTryOrFinish:
10717 * @ctxt: an XML parser context
10718 * @terminate: last chunk indicator
10720 * Try to progress on parsing
10722 * Returns zero if no parsing was possible
10725 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10729 const xmlChar *lastlt, *lastgt;
10731 if (ctxt->input == NULL)
10735 switch (ctxt->instate) {
10736 case XML_PARSER_EOF:
10737 xmlGenericError(xmlGenericErrorContext,
10738 "PP: try EOF\n"); break;
10739 case XML_PARSER_START:
10740 xmlGenericError(xmlGenericErrorContext,
10741 "PP: try START\n"); break;
10742 case XML_PARSER_MISC:
10743 xmlGenericError(xmlGenericErrorContext,
10744 "PP: try MISC\n");break;
10745 case XML_PARSER_COMMENT:
10746 xmlGenericError(xmlGenericErrorContext,
10747 "PP: try COMMENT\n");break;
10748 case XML_PARSER_PROLOG:
10749 xmlGenericError(xmlGenericErrorContext,
10750 "PP: try PROLOG\n");break;
10751 case XML_PARSER_START_TAG:
10752 xmlGenericError(xmlGenericErrorContext,
10753 "PP: try START_TAG\n");break;
10754 case XML_PARSER_CONTENT:
10755 xmlGenericError(xmlGenericErrorContext,
10756 "PP: try CONTENT\n");break;
10757 case XML_PARSER_CDATA_SECTION:
10758 xmlGenericError(xmlGenericErrorContext,
10759 "PP: try CDATA_SECTION\n");break;
10760 case XML_PARSER_END_TAG:
10761 xmlGenericError(xmlGenericErrorContext,
10762 "PP: try END_TAG\n");break;
10763 case XML_PARSER_ENTITY_DECL:
10764 xmlGenericError(xmlGenericErrorContext,
10765 "PP: try ENTITY_DECL\n");break;
10766 case XML_PARSER_ENTITY_VALUE:
10767 xmlGenericError(xmlGenericErrorContext,
10768 "PP: try ENTITY_VALUE\n");break;
10769 case XML_PARSER_ATTRIBUTE_VALUE:
10770 xmlGenericError(xmlGenericErrorContext,
10771 "PP: try ATTRIBUTE_VALUE\n");break;
10772 case XML_PARSER_DTD:
10773 xmlGenericError(xmlGenericErrorContext,
10774 "PP: try DTD\n");break;
10775 case XML_PARSER_EPILOG:
10776 xmlGenericError(xmlGenericErrorContext,
10777 "PP: try EPILOG\n");break;
10778 case XML_PARSER_PI:
10779 xmlGenericError(xmlGenericErrorContext,
10780 "PP: try PI\n");break;
10781 case XML_PARSER_IGNORE:
10782 xmlGenericError(xmlGenericErrorContext,
10783 "PP: try IGNORE\n");break;
10787 if ((ctxt->input != NULL) &&
10788 (ctxt->input->cur - ctxt->input->base > 4096)) {
10790 ctxt->checkIndex = 0;
10792 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10795 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10800 * Pop-up of finished entities.
10802 while ((RAW == 0) && (ctxt->inputNr > 1))
10805 if (ctxt->input == NULL) break;
10806 if (ctxt->input->buf == NULL)
10807 avail = ctxt->input->length -
10808 (ctxt->input->cur - ctxt->input->base);
10811 * If we are operating on converted input, try to flush
10812 * remainng chars to avoid them stalling in the non-converted
10815 if ((ctxt->input->buf->raw != NULL) &&
10816 (ctxt->input->buf->raw->use > 0)) {
10817 int base = ctxt->input->base -
10818 ctxt->input->buf->buffer->content;
10819 int current = ctxt->input->cur - ctxt->input->base;
10821 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10822 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10823 ctxt->input->cur = ctxt->input->base + current;
10825 &ctxt->input->buf->buffer->content[
10826 ctxt->input->buf->buffer->use];
10828 avail = ctxt->input->buf->buffer->use -
10829 (ctxt->input->cur - ctxt->input->base);
10833 switch (ctxt->instate) {
10834 case XML_PARSER_EOF:
10836 * Document parsing is done !
10839 case XML_PARSER_START:
10840 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10842 xmlCharEncoding enc;
10845 * Very first chars read from the document flow.
10851 * Get the 4 first bytes and decode the charset
10852 * if enc != XML_CHAR_ENCODING_NONE
10853 * plug some encoding conversion routines,
10854 * else xmlSwitchEncoding will set to (default)
10861 enc = xmlDetectCharEncoding(start, 4);
10862 xmlSwitchEncoding(ctxt, enc);
10868 cur = ctxt->input->cur[0];
10869 next = ctxt->input->cur[1];
10871 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10872 ctxt->sax->setDocumentLocator(ctxt->userData,
10873 &xmlDefaultSAXLocator);
10874 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10875 ctxt->instate = XML_PARSER_EOF;
10877 xmlGenericError(xmlGenericErrorContext,
10878 "PP: entering EOF\n");
10880 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10881 ctxt->sax->endDocument(ctxt->userData);
10884 if ((cur == '<') && (next == '?')) {
10885 /* PI or XML decl */
10886 if (avail < 5) return(ret);
10887 if ((!terminate) &&
10888 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10890 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10891 ctxt->sax->setDocumentLocator(ctxt->userData,
10892 &xmlDefaultSAXLocator);
10893 if ((ctxt->input->cur[2] == 'x') &&
10894 (ctxt->input->cur[3] == 'm') &&
10895 (ctxt->input->cur[4] == 'l') &&
10896 (IS_BLANK_CH(ctxt->input->cur[5]))) {
10899 xmlGenericError(xmlGenericErrorContext,
10900 "PP: Parsing XML Decl\n");
10902 xmlParseXMLDecl(ctxt);
10903 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10905 * The XML REC instructs us to stop parsing right
10908 ctxt->instate = XML_PARSER_EOF;
10911 ctxt->standalone = ctxt->input->standalone;
10912 if ((ctxt->encoding == NULL) &&
10913 (ctxt->input->encoding != NULL))
10914 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10915 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10916 (!ctxt->disableSAX))
10917 ctxt->sax->startDocument(ctxt->userData);
10918 ctxt->instate = XML_PARSER_MISC;
10920 xmlGenericError(xmlGenericErrorContext,
10921 "PP: entering MISC\n");
10924 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10925 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10926 (!ctxt->disableSAX))
10927 ctxt->sax->startDocument(ctxt->userData);
10928 ctxt->instate = XML_PARSER_MISC;
10930 xmlGenericError(xmlGenericErrorContext,
10931 "PP: entering MISC\n");
10935 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10936 ctxt->sax->setDocumentLocator(ctxt->userData,
10937 &xmlDefaultSAXLocator);
10938 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10939 if (ctxt->version == NULL) {
10940 xmlErrMemory(ctxt, NULL);
10943 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10944 (!ctxt->disableSAX))
10945 ctxt->sax->startDocument(ctxt->userData);
10946 ctxt->instate = XML_PARSER_MISC;
10948 xmlGenericError(xmlGenericErrorContext,
10949 "PP: entering MISC\n");
10953 case XML_PARSER_START_TAG: {
10954 const xmlChar *name;
10955 const xmlChar *prefix = NULL;
10956 const xmlChar *URI = NULL;
10957 int nsNr = ctxt->nsNr;
10959 if ((avail < 2) && (ctxt->inputNr == 1))
10961 cur = ctxt->input->cur[0];
10963 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10964 ctxt->instate = XML_PARSER_EOF;
10965 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10966 ctxt->sax->endDocument(ctxt->userData);
10970 if (ctxt->progressive) {
10971 /* > can be found unescaped in attribute values */
10972 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10974 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10978 if (ctxt->spaceNr == 0)
10979 spacePush(ctxt, -1);
10980 else if (*ctxt->space == -2)
10981 spacePush(ctxt, -1);
10983 spacePush(ctxt, *ctxt->space);
10984 #ifdef LIBXML_SAX1_ENABLED
10986 #endif /* LIBXML_SAX1_ENABLED */
10987 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10988 #ifdef LIBXML_SAX1_ENABLED
10990 name = xmlParseStartTag(ctxt);
10991 #endif /* LIBXML_SAX1_ENABLED */
10992 if (ctxt->instate == XML_PARSER_EOF)
10994 if (name == NULL) {
10996 ctxt->instate = XML_PARSER_EOF;
10997 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10998 ctxt->sax->endDocument(ctxt->userData);
11001 #ifdef LIBXML_VALID_ENABLED
11003 * [ VC: Root Element Type ]
11004 * The Name in the document type declaration must match
11005 * the element type of the root element.
11007 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11008 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11009 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11010 #endif /* LIBXML_VALID_ENABLED */
11013 * Check for an Empty Element.
11015 if ((RAW == '/') && (NXT(1) == '>')) {
11019 if ((ctxt->sax != NULL) &&
11020 (ctxt->sax->endElementNs != NULL) &&
11021 (!ctxt->disableSAX))
11022 ctxt->sax->endElementNs(ctxt->userData, name,
11024 if (ctxt->nsNr - nsNr > 0)
11025 nsPop(ctxt, ctxt->nsNr - nsNr);
11026 #ifdef LIBXML_SAX1_ENABLED
11028 if ((ctxt->sax != NULL) &&
11029 (ctxt->sax->endElement != NULL) &&
11030 (!ctxt->disableSAX))
11031 ctxt->sax->endElement(ctxt->userData, name);
11032 #endif /* LIBXML_SAX1_ENABLED */
11035 if (ctxt->nameNr == 0) {
11036 ctxt->instate = XML_PARSER_EPILOG;
11038 ctxt->instate = XML_PARSER_CONTENT;
11045 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11046 "Couldn't find end of Start Tag %s\n",
11052 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11053 #ifdef LIBXML_SAX1_ENABLED
11055 namePush(ctxt, name);
11056 #endif /* LIBXML_SAX1_ENABLED */
11058 ctxt->instate = XML_PARSER_CONTENT;
11061 case XML_PARSER_CONTENT: {
11062 const xmlChar *test;
11064 if ((avail < 2) && (ctxt->inputNr == 1))
11066 cur = ctxt->input->cur[0];
11067 next = ctxt->input->cur[1];
11070 cons = ctxt->input->consumed;
11071 if ((cur == '<') && (next == '/')) {
11072 ctxt->instate = XML_PARSER_END_TAG;
11074 } else if ((cur == '<') && (next == '?')) {
11075 if ((!terminate) &&
11076 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11079 } else if ((cur == '<') && (next != '!')) {
11080 ctxt->instate = XML_PARSER_START_TAG;
11082 } else if ((cur == '<') && (next == '!') &&
11083 (ctxt->input->cur[2] == '-') &&
11084 (ctxt->input->cur[3] == '-')) {
11089 ctxt->input->cur += 4;
11090 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11091 ctxt->input->cur -= 4;
11092 if ((!terminate) && (term < 0))
11094 xmlParseComment(ctxt);
11095 ctxt->instate = XML_PARSER_CONTENT;
11096 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11097 (ctxt->input->cur[2] == '[') &&
11098 (ctxt->input->cur[3] == 'C') &&
11099 (ctxt->input->cur[4] == 'D') &&
11100 (ctxt->input->cur[5] == 'A') &&
11101 (ctxt->input->cur[6] == 'T') &&
11102 (ctxt->input->cur[7] == 'A') &&
11103 (ctxt->input->cur[8] == '[')) {
11105 ctxt->instate = XML_PARSER_CDATA_SECTION;
11107 } else if ((cur == '<') && (next == '!') &&
11110 } else if (cur == '&') {
11111 if ((!terminate) &&
11112 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11114 xmlParseReference(ctxt);
11116 /* TODO Avoid the extra copy, handle directly !!! */
11118 * Goal of the following test is:
11119 * - minimize calls to the SAX 'character' callback
11120 * when they are mergeable
11121 * - handle an problem for isBlank when we only parse
11122 * a sequence of blank chars and the next one is
11123 * not available to check against '<' presence.
11124 * - tries to homogenize the differences in SAX
11125 * callbacks between the push and pull versions
11128 if ((ctxt->inputNr == 1) &&
11129 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11131 if (ctxt->progressive) {
11132 if ((lastlt == NULL) ||
11133 (ctxt->input->cur > lastlt))
11135 } else if (xmlParseLookupSequence(ctxt,
11141 ctxt->checkIndex = 0;
11142 xmlParseCharData(ctxt, 0);
11145 * Pop-up of finished entities.
11147 while ((RAW == 0) && (ctxt->inputNr > 1))
11149 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11150 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11151 "detected an error in element content\n");
11152 ctxt->instate = XML_PARSER_EOF;
11157 case XML_PARSER_END_TAG:
11161 if (ctxt->progressive) {
11162 /* > can be found unescaped in attribute values */
11163 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11165 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11170 xmlParseEndTag2(ctxt,
11171 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11172 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11173 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11176 #ifdef LIBXML_SAX1_ENABLED
11178 xmlParseEndTag1(ctxt, 0);
11179 #endif /* LIBXML_SAX1_ENABLED */
11180 if (ctxt->instate == XML_PARSER_EOF) {
11182 } else if (ctxt->nameNr == 0) {
11183 ctxt->instate = XML_PARSER_EPILOG;
11185 ctxt->instate = XML_PARSER_CONTENT;
11188 case XML_PARSER_CDATA_SECTION: {
11190 * The Push mode need to have the SAX callback for
11191 * cdataBlock merge back contiguous callbacks.
11195 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11197 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11200 tmp = xmlCheckCdataPush(ctxt->input->cur,
11201 XML_PARSER_BIG_BUFFER_SIZE);
11204 ctxt->input->cur += tmp;
11205 goto encoding_error;
11207 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11208 if (ctxt->sax->cdataBlock != NULL)
11209 ctxt->sax->cdataBlock(ctxt->userData,
11210 ctxt->input->cur, tmp);
11211 else if (ctxt->sax->characters != NULL)
11212 ctxt->sax->characters(ctxt->userData,
11213 ctxt->input->cur, tmp);
11216 ctxt->checkIndex = 0;
11222 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11223 if ((tmp < 0) || (tmp != base)) {
11225 ctxt->input->cur += tmp;
11226 goto encoding_error;
11228 if ((ctxt->sax != NULL) && (base == 0) &&
11229 (ctxt->sax->cdataBlock != NULL) &&
11230 (!ctxt->disableSAX)) {
11232 * Special case to provide identical behaviour
11233 * between pull and push parsers on enpty CDATA
11236 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11237 (!strncmp((const char *)&ctxt->input->cur[-9],
11239 ctxt->sax->cdataBlock(ctxt->userData,
11241 } else if ((ctxt->sax != NULL) && (base > 0) &&
11242 (!ctxt->disableSAX)) {
11243 if (ctxt->sax->cdataBlock != NULL)
11244 ctxt->sax->cdataBlock(ctxt->userData,
11245 ctxt->input->cur, base);
11246 else if (ctxt->sax->characters != NULL)
11247 ctxt->sax->characters(ctxt->userData,
11248 ctxt->input->cur, base);
11251 ctxt->checkIndex = 0;
11252 ctxt->instate = XML_PARSER_CONTENT;
11254 xmlGenericError(xmlGenericErrorContext,
11255 "PP: entering CONTENT\n");
11260 case XML_PARSER_MISC:
11262 if (ctxt->input->buf == NULL)
11263 avail = ctxt->input->length -
11264 (ctxt->input->cur - ctxt->input->base);
11266 avail = ctxt->input->buf->buffer->use -
11267 (ctxt->input->cur - ctxt->input->base);
11270 cur = ctxt->input->cur[0];
11271 next = ctxt->input->cur[1];
11272 if ((cur == '<') && (next == '?')) {
11273 if ((!terminate) &&
11274 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11277 xmlGenericError(xmlGenericErrorContext,
11278 "PP: Parsing PI\n");
11281 ctxt->checkIndex = 0;
11282 } else if ((cur == '<') && (next == '!') &&
11283 (ctxt->input->cur[2] == '-') &&
11284 (ctxt->input->cur[3] == '-')) {
11285 if ((!terminate) &&
11286 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11289 xmlGenericError(xmlGenericErrorContext,
11290 "PP: Parsing Comment\n");
11292 xmlParseComment(ctxt);
11293 ctxt->instate = XML_PARSER_MISC;
11294 ctxt->checkIndex = 0;
11295 } else if ((cur == '<') && (next == '!') &&
11296 (ctxt->input->cur[2] == 'D') &&
11297 (ctxt->input->cur[3] == 'O') &&
11298 (ctxt->input->cur[4] == 'C') &&
11299 (ctxt->input->cur[5] == 'T') &&
11300 (ctxt->input->cur[6] == 'Y') &&
11301 (ctxt->input->cur[7] == 'P') &&
11302 (ctxt->input->cur[8] == 'E')) {
11303 if ((!terminate) &&
11304 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11307 xmlGenericError(xmlGenericErrorContext,
11308 "PP: Parsing internal subset\n");
11310 ctxt->inSubset = 1;
11311 xmlParseDocTypeDecl(ctxt);
11313 ctxt->instate = XML_PARSER_DTD;
11315 xmlGenericError(xmlGenericErrorContext,
11316 "PP: entering DTD\n");
11320 * Create and update the external subset.
11322 ctxt->inSubset = 2;
11323 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11324 (ctxt->sax->externalSubset != NULL))
11325 ctxt->sax->externalSubset(ctxt->userData,
11326 ctxt->intSubName, ctxt->extSubSystem,
11328 ctxt->inSubset = 0;
11329 xmlCleanSpecialAttr(ctxt);
11330 ctxt->instate = XML_PARSER_PROLOG;
11332 xmlGenericError(xmlGenericErrorContext,
11333 "PP: entering PROLOG\n");
11336 } else if ((cur == '<') && (next == '!') &&
11340 ctxt->instate = XML_PARSER_START_TAG;
11341 ctxt->progressive = 1;
11342 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11344 xmlGenericError(xmlGenericErrorContext,
11345 "PP: entering START_TAG\n");
11349 case XML_PARSER_PROLOG:
11351 if (ctxt->input->buf == NULL)
11352 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11354 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11357 cur = ctxt->input->cur[0];
11358 next = ctxt->input->cur[1];
11359 if ((cur == '<') && (next == '?')) {
11360 if ((!terminate) &&
11361 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11364 xmlGenericError(xmlGenericErrorContext,
11365 "PP: Parsing PI\n");
11368 } else if ((cur == '<') && (next == '!') &&
11369 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11370 if ((!terminate) &&
11371 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11374 xmlGenericError(xmlGenericErrorContext,
11375 "PP: Parsing Comment\n");
11377 xmlParseComment(ctxt);
11378 ctxt->instate = XML_PARSER_PROLOG;
11379 } else if ((cur == '<') && (next == '!') &&
11383 ctxt->instate = XML_PARSER_START_TAG;
11384 if (ctxt->progressive == 0)
11385 ctxt->progressive = 1;
11386 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11388 xmlGenericError(xmlGenericErrorContext,
11389 "PP: entering START_TAG\n");
11393 case XML_PARSER_EPILOG:
11395 if (ctxt->input->buf == NULL)
11396 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11398 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11401 cur = ctxt->input->cur[0];
11402 next = ctxt->input->cur[1];
11403 if ((cur == '<') && (next == '?')) {
11404 if ((!terminate) &&
11405 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11408 xmlGenericError(xmlGenericErrorContext,
11409 "PP: Parsing PI\n");
11412 ctxt->instate = XML_PARSER_EPILOG;
11413 } else if ((cur == '<') && (next == '!') &&
11414 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11415 if ((!terminate) &&
11416 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11419 xmlGenericError(xmlGenericErrorContext,
11420 "PP: Parsing Comment\n");
11422 xmlParseComment(ctxt);
11423 ctxt->instate = XML_PARSER_EPILOG;
11424 } else if ((cur == '<') && (next == '!') &&
11428 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11429 ctxt->instate = XML_PARSER_EOF;
11431 xmlGenericError(xmlGenericErrorContext,
11432 "PP: entering EOF\n");
11434 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11435 ctxt->sax->endDocument(ctxt->userData);
11439 case XML_PARSER_DTD: {
11441 * Sorry but progressive parsing of the internal subset
11442 * is not expected to be supported. We first check that
11443 * the full content of the internal subset is available and
11444 * the parsing is launched only at that point.
11445 * Internal subset ends up with "']' S? '>'" in an unescaped
11446 * section and not in a ']]>' sequence which are conditional
11447 * sections (whoever argued to keep that crap in XML deserve
11448 * a place in hell !).
11454 base = ctxt->input->cur - ctxt->input->base;
11455 if (base < 0) return(0);
11456 if (ctxt->checkIndex > base)
11457 base = ctxt->checkIndex;
11458 buf = ctxt->input->buf->buffer->content;
11459 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11462 if (buf[base] == quote)
11466 if ((quote == 0) && (buf[base] == '<')) {
11468 /* special handling of comments */
11469 if (((unsigned int) base + 4 <
11470 ctxt->input->buf->buffer->use) &&
11471 (buf[base + 1] == '!') &&
11472 (buf[base + 2] == '-') &&
11473 (buf[base + 3] == '-')) {
11474 for (;(unsigned int) base + 3 <
11475 ctxt->input->buf->buffer->use; base++) {
11476 if ((buf[base] == '-') &&
11477 (buf[base + 1] == '-') &&
11478 (buf[base + 2] == '>')) {
11486 fprintf(stderr, "unfinished comment\n");
11493 if (buf[base] == '"') {
11497 if (buf[base] == '\'') {
11501 if (buf[base] == ']') {
11503 fprintf(stderr, "%c%c%c%c: ", buf[base],
11504 buf[base + 1], buf[base + 2], buf[base + 3]);
11506 if ((unsigned int) base +1 >=
11507 ctxt->input->buf->buffer->use)
11509 if (buf[base + 1] == ']') {
11510 /* conditional crap, skip both ']' ! */
11515 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11517 if (buf[base + i] == '>') {
11519 fprintf(stderr, "found\n");
11521 goto found_end_int_subset;
11523 if (!IS_BLANK_CH(buf[base + i])) {
11525 fprintf(stderr, "not found\n");
11527 goto not_end_of_int_subset;
11531 fprintf(stderr, "end of stream\n");
11536 not_end_of_int_subset:
11537 continue; /* for */
11540 * We didn't found the end of the Internal subset
11544 xmlGenericError(xmlGenericErrorContext,
11545 "PP: lookup of int subset end filed\n");
11549 found_end_int_subset:
11550 xmlParseInternalSubset(ctxt);
11551 ctxt->inSubset = 2;
11552 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11553 (ctxt->sax->externalSubset != NULL))
11554 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11555 ctxt->extSubSystem, ctxt->extSubURI);
11556 ctxt->inSubset = 0;
11557 xmlCleanSpecialAttr(ctxt);
11558 ctxt->instate = XML_PARSER_PROLOG;
11559 ctxt->checkIndex = 0;
11561 xmlGenericError(xmlGenericErrorContext,
11562 "PP: entering PROLOG\n");
11566 case XML_PARSER_COMMENT:
11567 xmlGenericError(xmlGenericErrorContext,
11568 "PP: internal error, state == COMMENT\n");
11569 ctxt->instate = XML_PARSER_CONTENT;
11571 xmlGenericError(xmlGenericErrorContext,
11572 "PP: entering CONTENT\n");
11575 case XML_PARSER_IGNORE:
11576 xmlGenericError(xmlGenericErrorContext,
11577 "PP: internal error, state == IGNORE");
11578 ctxt->instate = XML_PARSER_DTD;
11580 xmlGenericError(xmlGenericErrorContext,
11581 "PP: entering DTD\n");
11584 case XML_PARSER_PI:
11585 xmlGenericError(xmlGenericErrorContext,
11586 "PP: internal error, state == PI\n");
11587 ctxt->instate = XML_PARSER_CONTENT;
11589 xmlGenericError(xmlGenericErrorContext,
11590 "PP: entering CONTENT\n");
11593 case XML_PARSER_ENTITY_DECL:
11594 xmlGenericError(xmlGenericErrorContext,
11595 "PP: internal error, state == ENTITY_DECL\n");
11596 ctxt->instate = XML_PARSER_DTD;
11598 xmlGenericError(xmlGenericErrorContext,
11599 "PP: entering DTD\n");
11602 case XML_PARSER_ENTITY_VALUE:
11603 xmlGenericError(xmlGenericErrorContext,
11604 "PP: internal error, state == ENTITY_VALUE\n");
11605 ctxt->instate = XML_PARSER_CONTENT;
11607 xmlGenericError(xmlGenericErrorContext,
11608 "PP: entering DTD\n");
11611 case XML_PARSER_ATTRIBUTE_VALUE:
11612 xmlGenericError(xmlGenericErrorContext,
11613 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11614 ctxt->instate = XML_PARSER_START_TAG;
11616 xmlGenericError(xmlGenericErrorContext,
11617 "PP: entering START_TAG\n");
11620 case XML_PARSER_SYSTEM_LITERAL:
11621 xmlGenericError(xmlGenericErrorContext,
11622 "PP: internal error, state == SYSTEM_LITERAL\n");
11623 ctxt->instate = XML_PARSER_START_TAG;
11625 xmlGenericError(xmlGenericErrorContext,
11626 "PP: entering START_TAG\n");
11629 case XML_PARSER_PUBLIC_LITERAL:
11630 xmlGenericError(xmlGenericErrorContext,
11631 "PP: internal error, state == PUBLIC_LITERAL\n");
11632 ctxt->instate = XML_PARSER_START_TAG;
11634 xmlGenericError(xmlGenericErrorContext,
11635 "PP: entering START_TAG\n");
11642 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11649 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11650 ctxt->input->cur[0], ctxt->input->cur[1],
11651 ctxt->input->cur[2], ctxt->input->cur[3]);
11652 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11653 "Input is not proper UTF-8, indicate encoding !\n%s",
11654 BAD_CAST buffer, NULL);
11661 * @ctxt: an XML parser context
11662 * @chunk: an char array
11663 * @size: the size in byte of the chunk
11664 * @terminate: last chunk indicator
11666 * Parse a Chunk of memory
11668 * Returns zero if no error, the xmlParserErrors otherwise.
11671 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11677 return(XML_ERR_INTERNAL_ERROR);
11678 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11679 return(ctxt->errNo);
11680 if (ctxt->instate == XML_PARSER_START)
11681 xmlDetectSAX2(ctxt);
11682 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11683 (chunk[size - 1] == '\r')) {
11690 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11691 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11692 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11693 int cur = ctxt->input->cur - ctxt->input->base;
11697 * Specific handling if we autodetected an encoding, we should not
11698 * push more than the first line ... which depend on the encoding
11699 * And only push the rest once the final encoding was detected
11701 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11702 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
11703 unsigned int len = 45;
11705 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11706 BAD_CAST "UTF-16")) ||
11707 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11708 BAD_CAST "UTF16")))
11710 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11711 BAD_CAST "UCS-4")) ||
11712 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11716 if (ctxt->input->buf->rawconsumed < len)
11717 len -= ctxt->input->buf->rawconsumed;
11720 * Change size for reading the initial declaration only
11721 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11722 * will blindly copy extra bytes from memory.
11724 if ((unsigned int) size > len) {
11725 remain = size - len;
11731 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11733 ctxt->errNo = XML_PARSER_EOF;
11734 ctxt->disableSAX = 1;
11735 return (XML_PARSER_EOF);
11737 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11738 ctxt->input->cur = ctxt->input->base + cur;
11740 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11742 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11745 } else if (ctxt->instate != XML_PARSER_EOF) {
11746 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11747 xmlParserInputBufferPtr in = ctxt->input->buf;
11748 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11749 (in->raw != NULL)) {
11752 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11755 xmlGenericError(xmlGenericErrorContext,
11756 "xmlParseChunk: encoder error\n");
11757 return(XML_ERR_INVALID_ENCODING);
11763 xmlParseTryOrFinish(ctxt, 0);
11765 xmlParseTryOrFinish(ctxt, terminate);
11766 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11767 return(ctxt->errNo);
11775 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11776 (ctxt->input->buf != NULL)) {
11777 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11781 * Check for termination
11785 if (ctxt->input != NULL) {
11786 if (ctxt->input->buf == NULL)
11787 avail = ctxt->input->length -
11788 (ctxt->input->cur - ctxt->input->base);
11790 avail = ctxt->input->buf->buffer->use -
11791 (ctxt->input->cur - ctxt->input->base);
11794 if ((ctxt->instate != XML_PARSER_EOF) &&
11795 (ctxt->instate != XML_PARSER_EPILOG)) {
11796 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11798 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
11799 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11801 if (ctxt->instate != XML_PARSER_EOF) {
11802 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11803 ctxt->sax->endDocument(ctxt->userData);
11805 ctxt->instate = XML_PARSER_EOF;
11807 return((xmlParserErrors) ctxt->errNo);
11810 /************************************************************************
11812 * I/O front end functions to the parser *
11814 ************************************************************************/
11817 * xmlCreatePushParserCtxt:
11818 * @sax: a SAX handler
11819 * @user_data: The user data returned on SAX callbacks
11820 * @chunk: a pointer to an array of chars
11821 * @size: number of chars in the array
11822 * @filename: an optional file name or URI
11824 * Create a parser context for using the XML parser in push mode.
11825 * If @buffer and @size are non-NULL, the data is used to detect
11826 * the encoding. The remaining characters will be parsed so they
11827 * don't need to be fed in again through xmlParseChunk.
11828 * To allow content encoding detection, @size should be >= 4
11829 * The value of @filename is used for fetching external entities
11830 * and error/warning reports.
11832 * Returns the new parser context or NULL
11836 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11837 const char *chunk, int size, const char *filename) {
11838 xmlParserCtxtPtr ctxt;
11839 xmlParserInputPtr inputStream;
11840 xmlParserInputBufferPtr buf;
11841 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11844 * plug some encoding conversion routines
11846 if ((chunk != NULL) && (size >= 4))
11847 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11849 buf = xmlAllocParserInputBuffer(enc);
11850 if (buf == NULL) return(NULL);
11852 ctxt = xmlNewParserCtxt();
11853 if (ctxt == NULL) {
11854 xmlErrMemory(NULL, "creating parser: out of memory\n");
11855 xmlFreeParserInputBuffer(buf);
11858 ctxt->dictNames = 1;
11859 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11860 if (ctxt->pushTab == NULL) {
11861 xmlErrMemory(ctxt, NULL);
11862 xmlFreeParserInputBuffer(buf);
11863 xmlFreeParserCtxt(ctxt);
11867 #ifdef LIBXML_SAX1_ENABLED
11868 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11869 #endif /* LIBXML_SAX1_ENABLED */
11870 xmlFree(ctxt->sax);
11871 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11872 if (ctxt->sax == NULL) {
11873 xmlErrMemory(ctxt, NULL);
11874 xmlFreeParserInputBuffer(buf);
11875 xmlFreeParserCtxt(ctxt);
11878 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11879 if (sax->initialized == XML_SAX2_MAGIC)
11880 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11882 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11883 if (user_data != NULL)
11884 ctxt->userData = user_data;
11886 if (filename == NULL) {
11887 ctxt->directory = NULL;
11889 ctxt->directory = xmlParserGetDirectory(filename);
11892 inputStream = xmlNewInputStream(ctxt);
11893 if (inputStream == NULL) {
11894 xmlFreeParserCtxt(ctxt);
11895 xmlFreeParserInputBuffer(buf);
11899 if (filename == NULL)
11900 inputStream->filename = NULL;
11902 inputStream->filename = (char *)
11903 xmlCanonicPath((const xmlChar *) filename);
11904 if (inputStream->filename == NULL) {
11905 xmlFreeParserCtxt(ctxt);
11906 xmlFreeParserInputBuffer(buf);
11910 inputStream->buf = buf;
11911 inputStream->base = inputStream->buf->buffer->content;
11912 inputStream->cur = inputStream->buf->buffer->content;
11914 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11916 inputPush(ctxt, inputStream);
11919 * If the caller didn't provide an initial 'chunk' for determining
11920 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11921 * that it can be automatically determined later
11923 if ((size == 0) || (chunk == NULL)) {
11924 ctxt->charset = XML_CHAR_ENCODING_NONE;
11925 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
11926 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11927 int cur = ctxt->input->cur - ctxt->input->base;
11929 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11931 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11932 ctxt->input->cur = ctxt->input->base + cur;
11934 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11936 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11940 if (enc != XML_CHAR_ENCODING_NONE) {
11941 xmlSwitchEncoding(ctxt, enc);
11946 #endif /* LIBXML_PUSH_ENABLED */
11950 * @ctxt: an XML parser context
11952 * Blocks further parser processing
11955 xmlStopParser(xmlParserCtxtPtr ctxt) {
11958 ctxt->instate = XML_PARSER_EOF;
11959 ctxt->disableSAX = 1;
11960 if (ctxt->input != NULL) {
11961 ctxt->input->cur = BAD_CAST"";
11962 ctxt->input->base = ctxt->input->cur;
11967 * xmlCreateIOParserCtxt:
11968 * @sax: a SAX handler
11969 * @user_data: The user data returned on SAX callbacks
11970 * @ioread: an I/O read function
11971 * @ioclose: an I/O close function
11972 * @ioctx: an I/O handler
11973 * @enc: the charset encoding if known
11975 * Create a parser context for using the XML parser with an existing
11978 * Returns the new parser context or NULL
11981 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11982 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11983 void *ioctx, xmlCharEncoding enc) {
11984 xmlParserCtxtPtr ctxt;
11985 xmlParserInputPtr inputStream;
11986 xmlParserInputBufferPtr buf;
11988 if (ioread == NULL) return(NULL);
11990 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11992 if (ioclose != NULL)
11997 ctxt = xmlNewParserCtxt();
11998 if (ctxt == NULL) {
11999 xmlFreeParserInputBuffer(buf);
12003 #ifdef LIBXML_SAX1_ENABLED
12004 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12005 #endif /* LIBXML_SAX1_ENABLED */
12006 xmlFree(ctxt->sax);
12007 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12008 if (ctxt->sax == NULL) {
12009 xmlErrMemory(ctxt, NULL);
12010 xmlFreeParserCtxt(ctxt);
12013 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12014 if (sax->initialized == XML_SAX2_MAGIC)
12015 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12017 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12018 if (user_data != NULL)
12019 ctxt->userData = user_data;
12022 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12023 if (inputStream == NULL) {
12024 xmlFreeParserCtxt(ctxt);
12027 inputPush(ctxt, inputStream);
12032 #ifdef LIBXML_VALID_ENABLED
12033 /************************************************************************
12035 * Front ends when parsing a DTD *
12037 ************************************************************************/
12041 * @sax: the SAX handler block or NULL
12042 * @input: an Input Buffer
12043 * @enc: the charset encoding if known
12045 * Load and parse a DTD
12047 * Returns the resulting xmlDtdPtr or NULL in case of error.
12048 * @input will be freed by the function in any case.
12052 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12053 xmlCharEncoding enc) {
12054 xmlDtdPtr ret = NULL;
12055 xmlParserCtxtPtr ctxt;
12056 xmlParserInputPtr pinput = NULL;
12062 ctxt = xmlNewParserCtxt();
12063 if (ctxt == NULL) {
12064 xmlFreeParserInputBuffer(input);
12069 * Set-up the SAX context
12072 if (ctxt->sax != NULL)
12073 xmlFree(ctxt->sax);
12075 ctxt->userData = ctxt;
12077 xmlDetectSAX2(ctxt);
12080 * generate a parser input from the I/O handler
12083 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12084 if (pinput == NULL) {
12085 if (sax != NULL) ctxt->sax = NULL;
12086 xmlFreeParserInputBuffer(input);
12087 xmlFreeParserCtxt(ctxt);
12092 * plug some encoding conversion routines here.
12094 if (xmlPushInput(ctxt, pinput) < 0) {
12095 if (sax != NULL) ctxt->sax = NULL;
12096 xmlFreeParserCtxt(ctxt);
12099 if (enc != XML_CHAR_ENCODING_NONE) {
12100 xmlSwitchEncoding(ctxt, enc);
12103 pinput->filename = NULL;
12106 pinput->base = ctxt->input->cur;
12107 pinput->cur = ctxt->input->cur;
12108 pinput->free = NULL;
12111 * let's parse that entity knowing it's an external subset.
12113 ctxt->inSubset = 2;
12114 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12115 if (ctxt->myDoc == NULL) {
12116 xmlErrMemory(ctxt, "New Doc failed");
12119 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12120 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12121 BAD_CAST "none", BAD_CAST "none");
12123 if ((enc == XML_CHAR_ENCODING_NONE) &&
12124 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12126 * Get the 4 first bytes and decode the charset
12127 * if enc != XML_CHAR_ENCODING_NONE
12128 * plug some encoding conversion routines.
12134 enc = xmlDetectCharEncoding(start, 4);
12135 if (enc != XML_CHAR_ENCODING_NONE) {
12136 xmlSwitchEncoding(ctxt, enc);
12140 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12142 if (ctxt->myDoc != NULL) {
12143 if (ctxt->wellFormed) {
12144 ret = ctxt->myDoc->extSubset;
12145 ctxt->myDoc->extSubset = NULL;
12150 tmp = ret->children;
12151 while (tmp != NULL) {
12159 xmlFreeDoc(ctxt->myDoc);
12160 ctxt->myDoc = NULL;
12162 if (sax != NULL) ctxt->sax = NULL;
12163 xmlFreeParserCtxt(ctxt);
12170 * @sax: the SAX handler block
12171 * @ExternalID: a NAME* containing the External ID of the DTD
12172 * @SystemID: a NAME* containing the URL to the DTD
12174 * Load and parse an external subset.
12176 * Returns the resulting xmlDtdPtr or NULL in case of error.
12180 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12181 const xmlChar *SystemID) {
12182 xmlDtdPtr ret = NULL;
12183 xmlParserCtxtPtr ctxt;
12184 xmlParserInputPtr input = NULL;
12185 xmlCharEncoding enc;
12186 xmlChar* systemIdCanonic;
12188 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12190 ctxt = xmlNewParserCtxt();
12191 if (ctxt == NULL) {
12196 * Set-up the SAX context
12199 if (ctxt->sax != NULL)
12200 xmlFree(ctxt->sax);
12202 ctxt->userData = ctxt;
12206 * Canonicalise the system ID
12208 systemIdCanonic = xmlCanonicPath(SystemID);
12209 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12210 xmlFreeParserCtxt(ctxt);
12215 * Ask the Entity resolver to load the damn thing
12218 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12219 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12221 if (input == NULL) {
12222 if (sax != NULL) ctxt->sax = NULL;
12223 xmlFreeParserCtxt(ctxt);
12224 if (systemIdCanonic != NULL)
12225 xmlFree(systemIdCanonic);
12230 * plug some encoding conversion routines here.
12232 if (xmlPushInput(ctxt, input) < 0) {
12233 if (sax != NULL) ctxt->sax = NULL;
12234 xmlFreeParserCtxt(ctxt);
12235 if (systemIdCanonic != NULL)
12236 xmlFree(systemIdCanonic);
12239 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12240 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12241 xmlSwitchEncoding(ctxt, enc);
12244 if (input->filename == NULL)
12245 input->filename = (char *) systemIdCanonic;
12247 xmlFree(systemIdCanonic);
12250 input->base = ctxt->input->cur;
12251 input->cur = ctxt->input->cur;
12252 input->free = NULL;
12255 * let's parse that entity knowing it's an external subset.
12257 ctxt->inSubset = 2;
12258 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12259 if (ctxt->myDoc == NULL) {
12260 xmlErrMemory(ctxt, "New Doc failed");
12261 if (sax != NULL) ctxt->sax = NULL;
12262 xmlFreeParserCtxt(ctxt);
12265 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12266 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12267 ExternalID, SystemID);
12268 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12270 if (ctxt->myDoc != NULL) {
12271 if (ctxt->wellFormed) {
12272 ret = ctxt->myDoc->extSubset;
12273 ctxt->myDoc->extSubset = NULL;
12278 tmp = ret->children;
12279 while (tmp != NULL) {
12287 xmlFreeDoc(ctxt->myDoc);
12288 ctxt->myDoc = NULL;
12290 if (sax != NULL) ctxt->sax = NULL;
12291 xmlFreeParserCtxt(ctxt);
12299 * @ExternalID: a NAME* containing the External ID of the DTD
12300 * @SystemID: a NAME* containing the URL to the DTD
12302 * Load and parse an external subset.
12304 * Returns the resulting xmlDtdPtr or NULL in case of error.
12308 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12309 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12311 #endif /* LIBXML_VALID_ENABLED */
12313 /************************************************************************
12315 * Front ends when parsing an Entity *
12317 ************************************************************************/
12320 * xmlParseCtxtExternalEntity:
12321 * @ctx: the existing parsing context
12322 * @URL: the URL for the entity to load
12323 * @ID: the System ID for the entity to load
12324 * @lst: the return value for the set of parsed nodes
12326 * Parse an external general entity within an existing parsing context
12327 * An external general parsed entity is well-formed if it matches the
12328 * production labeled extParsedEnt.
12330 * [78] extParsedEnt ::= TextDecl? content
12332 * Returns 0 if the entity is well formed, -1 in case of args problem and
12333 * the parser error code otherwise
12337 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12338 const xmlChar *ID, xmlNodePtr *lst) {
12339 xmlParserCtxtPtr ctxt;
12341 xmlNodePtr newRoot;
12342 xmlSAXHandlerPtr oldsax = NULL;
12345 xmlCharEncoding enc;
12347 if (ctx == NULL) return(-1);
12349 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12350 (ctx->depth > 1024)) {
12351 return(XML_ERR_ENTITY_LOOP);
12356 if ((URL == NULL) && (ID == NULL))
12358 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12361 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12362 if (ctxt == NULL) {
12366 oldsax = ctxt->sax;
12367 ctxt->sax = ctx->sax;
12368 xmlDetectSAX2(ctxt);
12369 newDoc = xmlNewDoc(BAD_CAST "1.0");
12370 if (newDoc == NULL) {
12371 xmlFreeParserCtxt(ctxt);
12374 newDoc->properties = XML_DOC_INTERNAL;
12375 if (ctx->myDoc->dict) {
12376 newDoc->dict = ctx->myDoc->dict;
12377 xmlDictReference(newDoc->dict);
12379 if (ctx->myDoc != NULL) {
12380 newDoc->intSubset = ctx->myDoc->intSubset;
12381 newDoc->extSubset = ctx->myDoc->extSubset;
12383 if (ctx->myDoc->URL != NULL) {
12384 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12386 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12387 if (newRoot == NULL) {
12388 ctxt->sax = oldsax;
12389 xmlFreeParserCtxt(ctxt);
12390 newDoc->intSubset = NULL;
12391 newDoc->extSubset = NULL;
12392 xmlFreeDoc(newDoc);
12395 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12396 nodePush(ctxt, newDoc->children);
12397 if (ctx->myDoc == NULL) {
12398 ctxt->myDoc = newDoc;
12400 ctxt->myDoc = ctx->myDoc;
12401 newDoc->children->doc = ctx->myDoc;
12405 * Get the 4 first bytes and decode the charset
12406 * if enc != XML_CHAR_ENCODING_NONE
12407 * plug some encoding conversion routines.
12410 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12415 enc = xmlDetectCharEncoding(start, 4);
12416 if (enc != XML_CHAR_ENCODING_NONE) {
12417 xmlSwitchEncoding(ctxt, enc);
12422 * Parse a possible text declaration first
12424 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12425 xmlParseTextDecl(ctxt);
12427 * An XML-1.0 document can't reference an entity not XML-1.0
12429 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12430 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12431 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12432 "Version mismatch between document and entity\n");
12437 * If the user provided its own SAX callbacks then reuse the
12438 * useData callback field, otherwise the expected setup in a
12439 * DOM builder is to have userData == ctxt
12441 if (ctx->userData == ctx)
12442 ctxt->userData = ctxt;
12444 ctxt->userData = ctx->userData;
12447 * Doing validity checking on chunk doesn't make sense
12449 ctxt->instate = XML_PARSER_CONTENT;
12450 ctxt->validate = ctx->validate;
12451 ctxt->valid = ctx->valid;
12452 ctxt->loadsubset = ctx->loadsubset;
12453 ctxt->depth = ctx->depth + 1;
12454 ctxt->replaceEntities = ctx->replaceEntities;
12455 if (ctxt->validate) {
12456 ctxt->vctxt.error = ctx->vctxt.error;
12457 ctxt->vctxt.warning = ctx->vctxt.warning;
12459 ctxt->vctxt.error = NULL;
12460 ctxt->vctxt.warning = NULL;
12462 ctxt->vctxt.nodeTab = NULL;
12463 ctxt->vctxt.nodeNr = 0;
12464 ctxt->vctxt.nodeMax = 0;
12465 ctxt->vctxt.node = NULL;
12466 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12467 ctxt->dict = ctx->dict;
12468 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12469 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12470 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12471 ctxt->dictNames = ctx->dictNames;
12472 ctxt->attsDefault = ctx->attsDefault;
12473 ctxt->attsSpecial = ctx->attsSpecial;
12474 ctxt->linenumbers = ctx->linenumbers;
12476 xmlParseContent(ctxt);
12478 ctx->validate = ctxt->validate;
12479 ctx->valid = ctxt->valid;
12480 if ((RAW == '<') && (NXT(1) == '/')) {
12481 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12482 } else if (RAW != 0) {
12483 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12485 if (ctxt->node != newDoc->children) {
12486 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12489 if (!ctxt->wellFormed) {
12490 if (ctxt->errNo == 0)
12499 * Return the newly created nodeset after unlinking it from
12500 * they pseudo parent.
12502 cur = newDoc->children->children;
12504 while (cur != NULL) {
12505 cur->parent = NULL;
12508 newDoc->children->children = NULL;
12512 ctxt->sax = oldsax;
12514 ctxt->attsDefault = NULL;
12515 ctxt->attsSpecial = NULL;
12516 xmlFreeParserCtxt(ctxt);
12517 newDoc->intSubset = NULL;
12518 newDoc->extSubset = NULL;
12519 xmlFreeDoc(newDoc);
12525 * xmlParseExternalEntityPrivate:
12526 * @doc: the document the chunk pertains to
12527 * @oldctxt: the previous parser context if available
12528 * @sax: the SAX handler bloc (possibly NULL)
12529 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12530 * @depth: Used for loop detection, use 0
12531 * @URL: the URL for the entity to load
12532 * @ID: the System ID for the entity to load
12533 * @list: the return value for the set of parsed nodes
12535 * Private version of xmlParseExternalEntity()
12537 * Returns 0 if the entity is well formed, -1 in case of args problem and
12538 * the parser error code otherwise
12541 static xmlParserErrors
12542 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12543 xmlSAXHandlerPtr sax,
12544 void *user_data, int depth, const xmlChar *URL,
12545 const xmlChar *ID, xmlNodePtr *list) {
12546 xmlParserCtxtPtr ctxt;
12548 xmlNodePtr newRoot;
12549 xmlSAXHandlerPtr oldsax = NULL;
12550 xmlParserErrors ret = XML_ERR_OK;
12552 xmlCharEncoding enc;
12554 if (((depth > 40) &&
12555 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12557 return(XML_ERR_ENTITY_LOOP);
12562 if ((URL == NULL) && (ID == NULL))
12563 return(XML_ERR_INTERNAL_ERROR);
12565 return(XML_ERR_INTERNAL_ERROR);
12568 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
12569 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12570 ctxt->userData = ctxt;
12571 if (oldctxt != NULL) {
12572 ctxt->_private = oldctxt->_private;
12573 ctxt->loadsubset = oldctxt->loadsubset;
12574 ctxt->validate = oldctxt->validate;
12575 ctxt->external = oldctxt->external;
12576 ctxt->record_info = oldctxt->record_info;
12577 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12578 ctxt->node_seq.length = oldctxt->node_seq.length;
12579 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12582 * Doing validity checking on chunk without context
12583 * doesn't make sense
12585 ctxt->_private = NULL;
12586 ctxt->validate = 0;
12587 ctxt->external = 2;
12588 ctxt->loadsubset = 0;
12591 oldsax = ctxt->sax;
12593 if (user_data != NULL)
12594 ctxt->userData = user_data;
12596 xmlDetectSAX2(ctxt);
12597 newDoc = xmlNewDoc(BAD_CAST "1.0");
12598 if (newDoc == NULL) {
12599 ctxt->node_seq.maximum = 0;
12600 ctxt->node_seq.length = 0;
12601 ctxt->node_seq.buffer = NULL;
12602 xmlFreeParserCtxt(ctxt);
12603 return(XML_ERR_INTERNAL_ERROR);
12605 newDoc->properties = XML_DOC_INTERNAL;
12606 newDoc->intSubset = doc->intSubset;
12607 newDoc->extSubset = doc->extSubset;
12608 newDoc->dict = doc->dict;
12609 xmlDictReference(newDoc->dict);
12611 if (doc->URL != NULL) {
12612 newDoc->URL = xmlStrdup(doc->URL);
12614 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12615 if (newRoot == NULL) {
12617 ctxt->sax = oldsax;
12618 ctxt->node_seq.maximum = 0;
12619 ctxt->node_seq.length = 0;
12620 ctxt->node_seq.buffer = NULL;
12621 xmlFreeParserCtxt(ctxt);
12622 newDoc->intSubset = NULL;
12623 newDoc->extSubset = NULL;
12624 xmlFreeDoc(newDoc);
12625 return(XML_ERR_INTERNAL_ERROR);
12627 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12628 nodePush(ctxt, newDoc->children);
12630 newRoot->doc = doc;
12633 * Get the 4 first bytes and decode the charset
12634 * if enc != XML_CHAR_ENCODING_NONE
12635 * plug some encoding conversion routines.
12638 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12643 enc = xmlDetectCharEncoding(start, 4);
12644 if (enc != XML_CHAR_ENCODING_NONE) {
12645 xmlSwitchEncoding(ctxt, enc);
12650 * Parse a possible text declaration first
12652 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12653 xmlParseTextDecl(ctxt);
12656 ctxt->instate = XML_PARSER_CONTENT;
12657 ctxt->depth = depth;
12659 xmlParseContent(ctxt);
12661 if ((RAW == '<') && (NXT(1) == '/')) {
12662 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12663 } else if (RAW != 0) {
12664 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12666 if (ctxt->node != newDoc->children) {
12667 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12670 if (!ctxt->wellFormed) {
12671 if (ctxt->errNo == 0)
12672 ret = XML_ERR_INTERNAL_ERROR;
12674 ret = (xmlParserErrors)ctxt->errNo;
12676 if (list != NULL) {
12680 * Return the newly created nodeset after unlinking it from
12681 * they pseudo parent.
12683 cur = newDoc->children->children;
12685 while (cur != NULL) {
12686 cur->parent = NULL;
12689 newDoc->children->children = NULL;
12695 * Record in the parent context the number of entities replacement
12696 * done when parsing that reference.
12698 if (oldctxt != NULL)
12699 oldctxt->nbentities += ctxt->nbentities;
12702 * Also record the size of the entity parsed
12704 if (ctxt->input != NULL) {
12705 oldctxt->sizeentities += ctxt->input->consumed;
12706 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12709 * And record the last error if any
12711 if (ctxt->lastError.code != XML_ERR_OK)
12712 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12715 ctxt->sax = oldsax;
12716 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12717 oldctxt->node_seq.length = ctxt->node_seq.length;
12718 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12719 ctxt->node_seq.maximum = 0;
12720 ctxt->node_seq.length = 0;
12721 ctxt->node_seq.buffer = NULL;
12722 xmlFreeParserCtxt(ctxt);
12723 newDoc->intSubset = NULL;
12724 newDoc->extSubset = NULL;
12725 xmlFreeDoc(newDoc);
12730 #ifdef LIBXML_SAX1_ENABLED
12732 * xmlParseExternalEntity:
12733 * @doc: the document the chunk pertains to
12734 * @sax: the SAX handler bloc (possibly NULL)
12735 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12736 * @depth: Used for loop detection, use 0
12737 * @URL: the URL for the entity to load
12738 * @ID: the System ID for the entity to load
12739 * @lst: the return value for the set of parsed nodes
12741 * Parse an external general entity
12742 * An external general parsed entity is well-formed if it matches the
12743 * production labeled extParsedEnt.
12745 * [78] extParsedEnt ::= TextDecl? content
12747 * Returns 0 if the entity is well formed, -1 in case of args problem and
12748 * the parser error code otherwise
12752 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12753 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12754 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12759 * xmlParseBalancedChunkMemory:
12760 * @doc: the document the chunk pertains to
12761 * @sax: the SAX handler bloc (possibly NULL)
12762 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12763 * @depth: Used for loop detection, use 0
12764 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12765 * @lst: the return value for the set of parsed nodes
12767 * Parse a well-balanced chunk of an XML document
12768 * called by the parser
12769 * The allowed sequence for the Well Balanced Chunk is the one defined by
12770 * the content production in the XML grammar:
12772 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12774 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12775 * the parser error code otherwise
12779 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12780 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12781 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12782 depth, string, lst, 0 );
12784 #endif /* LIBXML_SAX1_ENABLED */
12787 * xmlParseBalancedChunkMemoryInternal:
12788 * @oldctxt: the existing parsing context
12789 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12790 * @user_data: the user data field for the parser context
12791 * @lst: the return value for the set of parsed nodes
12794 * Parse a well-balanced chunk of an XML document
12795 * called by the parser
12796 * The allowed sequence for the Well Balanced Chunk is the one defined by
12797 * the content production in the XML grammar:
12799 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12801 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12802 * error code otherwise
12804 * In case recover is set to 1, the nodelist will not be empty even if
12805 * the parsed chunk is not well balanced.
12807 static xmlParserErrors
12808 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12809 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12810 xmlParserCtxtPtr ctxt;
12811 xmlDocPtr newDoc = NULL;
12812 xmlNodePtr newRoot;
12813 xmlSAXHandlerPtr oldsax = NULL;
12814 xmlNodePtr content = NULL;
12815 xmlNodePtr last = NULL;
12817 xmlParserErrors ret = XML_ERR_OK;
12822 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12823 (oldctxt->depth > 1024)) {
12824 return(XML_ERR_ENTITY_LOOP);
12830 if (string == NULL)
12831 return(XML_ERR_INTERNAL_ERROR);
12833 size = xmlStrlen(string);
12835 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12836 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12837 if (user_data != NULL)
12838 ctxt->userData = user_data;
12840 ctxt->userData = ctxt;
12841 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12842 ctxt->dict = oldctxt->dict;
12843 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12844 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12845 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12848 /* propagate namespaces down the entity */
12849 for (i = 0;i < oldctxt->nsNr;i += 2) {
12850 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12854 oldsax = ctxt->sax;
12855 ctxt->sax = oldctxt->sax;
12856 xmlDetectSAX2(ctxt);
12857 ctxt->replaceEntities = oldctxt->replaceEntities;
12858 ctxt->options = oldctxt->options;
12860 ctxt->_private = oldctxt->_private;
12861 if (oldctxt->myDoc == NULL) {
12862 newDoc = xmlNewDoc(BAD_CAST "1.0");
12863 if (newDoc == NULL) {
12864 ctxt->sax = oldsax;
12866 xmlFreeParserCtxt(ctxt);
12867 return(XML_ERR_INTERNAL_ERROR);
12869 newDoc->properties = XML_DOC_INTERNAL;
12870 newDoc->dict = ctxt->dict;
12871 xmlDictReference(newDoc->dict);
12872 ctxt->myDoc = newDoc;
12874 ctxt->myDoc = oldctxt->myDoc;
12875 content = ctxt->myDoc->children;
12876 last = ctxt->myDoc->last;
12878 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12879 if (newRoot == NULL) {
12880 ctxt->sax = oldsax;
12882 xmlFreeParserCtxt(ctxt);
12883 if (newDoc != NULL) {
12884 xmlFreeDoc(newDoc);
12886 return(XML_ERR_INTERNAL_ERROR);
12888 ctxt->myDoc->children = NULL;
12889 ctxt->myDoc->last = NULL;
12890 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
12891 nodePush(ctxt, ctxt->myDoc->children);
12892 ctxt->instate = XML_PARSER_CONTENT;
12893 ctxt->depth = oldctxt->depth + 1;
12895 ctxt->validate = 0;
12896 ctxt->loadsubset = oldctxt->loadsubset;
12897 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12899 * ID/IDREF registration will be done in xmlValidateElement below
12901 ctxt->loadsubset |= XML_SKIP_IDS;
12903 ctxt->dictNames = oldctxt->dictNames;
12904 ctxt->attsDefault = oldctxt->attsDefault;
12905 ctxt->attsSpecial = oldctxt->attsSpecial;
12907 xmlParseContent(ctxt);
12908 if ((RAW == '<') && (NXT(1) == '/')) {
12909 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12910 } else if (RAW != 0) {
12911 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12913 if (ctxt->node != ctxt->myDoc->children) {
12914 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12917 if (!ctxt->wellFormed) {
12918 if (ctxt->errNo == 0)
12919 ret = XML_ERR_INTERNAL_ERROR;
12921 ret = (xmlParserErrors)ctxt->errNo;
12926 if ((lst != NULL) && (ret == XML_ERR_OK)) {
12930 * Return the newly created nodeset after unlinking it from
12931 * they pseudo parent.
12933 cur = ctxt->myDoc->children->children;
12935 while (cur != NULL) {
12936 #ifdef LIBXML_VALID_ENABLED
12937 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12938 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12939 (cur->type == XML_ELEMENT_NODE)) {
12940 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12941 oldctxt->myDoc, cur);
12943 #endif /* LIBXML_VALID_ENABLED */
12944 cur->parent = NULL;
12947 ctxt->myDoc->children->children = NULL;
12949 if (ctxt->myDoc != NULL) {
12950 xmlFreeNode(ctxt->myDoc->children);
12951 ctxt->myDoc->children = content;
12952 ctxt->myDoc->last = last;
12956 * Record in the parent context the number of entities replacement
12957 * done when parsing that reference.
12959 if (oldctxt != NULL)
12960 oldctxt->nbentities += ctxt->nbentities;
12963 * Also record the last error if any
12965 if (ctxt->lastError.code != XML_ERR_OK)
12966 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12968 ctxt->sax = oldsax;
12970 ctxt->attsDefault = NULL;
12971 ctxt->attsSpecial = NULL;
12972 xmlFreeParserCtxt(ctxt);
12973 if (newDoc != NULL) {
12974 xmlFreeDoc(newDoc);
12981 * xmlParseInNodeContext:
12982 * @node: the context node
12983 * @data: the input string
12984 * @datalen: the input string length in bytes
12985 * @options: a combination of xmlParserOption
12986 * @lst: the return value for the set of parsed nodes
12988 * Parse a well-balanced chunk of an XML document
12989 * within the context (DTD, namespaces, etc ...) of the given node.
12991 * The allowed sequence for the data is a Well Balanced Chunk defined by
12992 * the content production in the XML grammar:
12994 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12996 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12997 * error code otherwise
13000 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13001 int options, xmlNodePtr *lst) {
13003 xmlParserCtxtPtr ctxt;
13004 xmlDocPtr doc = NULL;
13005 xmlNodePtr fake, cur;
13008 xmlParserErrors ret = XML_ERR_OK;
13011 * check all input parameters, grab the document
13013 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13014 return(XML_ERR_INTERNAL_ERROR);
13015 switch (node->type) {
13016 case XML_ELEMENT_NODE:
13017 case XML_ATTRIBUTE_NODE:
13018 case XML_TEXT_NODE:
13019 case XML_CDATA_SECTION_NODE:
13020 case XML_ENTITY_REF_NODE:
13022 case XML_COMMENT_NODE:
13023 case XML_DOCUMENT_NODE:
13024 case XML_HTML_DOCUMENT_NODE:
13027 return(XML_ERR_INTERNAL_ERROR);
13030 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13031 (node->type != XML_DOCUMENT_NODE) &&
13032 (node->type != XML_HTML_DOCUMENT_NODE))
13033 node = node->parent;
13035 return(XML_ERR_INTERNAL_ERROR);
13036 if (node->type == XML_ELEMENT_NODE)
13039 doc = (xmlDocPtr) node;
13041 return(XML_ERR_INTERNAL_ERROR);
13044 * allocate a context and set-up everything not related to the
13045 * node position in the tree
13047 if (doc->type == XML_DOCUMENT_NODE)
13048 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13049 #ifdef LIBXML_HTML_ENABLED
13050 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13051 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13053 * When parsing in context, it makes no sense to add implied
13054 * elements like html/body/etc...
13056 options |= HTML_PARSE_NOIMPLIED;
13060 return(XML_ERR_INTERNAL_ERROR);
13063 return(XML_ERR_NO_MEMORY);
13066 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13067 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13068 * we must wait until the last moment to free the original one.
13070 if (doc->dict != NULL) {
13071 if (ctxt->dict != NULL)
13072 xmlDictFree(ctxt->dict);
13073 ctxt->dict = doc->dict;
13075 options |= XML_PARSE_NODICT;
13077 if (doc->encoding != NULL) {
13078 xmlCharEncodingHandlerPtr hdlr;
13080 if (ctxt->encoding != NULL)
13081 xmlFree((xmlChar *) ctxt->encoding);
13082 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13084 hdlr = xmlFindCharEncodingHandler(doc->encoding);
13085 if (hdlr != NULL) {
13086 xmlSwitchToEncoding(ctxt, hdlr);
13088 return(XML_ERR_UNSUPPORTED_ENCODING);
13092 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13093 xmlDetectSAX2(ctxt);
13096 fake = xmlNewComment(NULL);
13097 if (fake == NULL) {
13098 xmlFreeParserCtxt(ctxt);
13099 return(XML_ERR_NO_MEMORY);
13101 xmlAddChild(node, fake);
13103 if (node->type == XML_ELEMENT_NODE) {
13104 nodePush(ctxt, node);
13106 * initialize the SAX2 namespaces stack
13109 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13110 xmlNsPtr ns = cur->nsDef;
13111 const xmlChar *iprefix, *ihref;
13113 while (ns != NULL) {
13115 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13116 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13118 iprefix = ns->prefix;
13122 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13123 nsPush(ctxt, iprefix, ihref);
13130 ctxt->instate = XML_PARSER_CONTENT;
13133 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13135 * ID/IDREF registration will be done in xmlValidateElement below
13137 ctxt->loadsubset |= XML_SKIP_IDS;
13140 #ifdef LIBXML_HTML_ENABLED
13141 if (doc->type == XML_HTML_DOCUMENT_NODE)
13142 __htmlParseContent(ctxt);
13145 xmlParseContent(ctxt);
13148 if ((RAW == '<') && (NXT(1) == '/')) {
13149 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13150 } else if (RAW != 0) {
13151 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13153 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13154 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13155 ctxt->wellFormed = 0;
13158 if (!ctxt->wellFormed) {
13159 if (ctxt->errNo == 0)
13160 ret = XML_ERR_INTERNAL_ERROR;
13162 ret = (xmlParserErrors)ctxt->errNo;
13168 * Return the newly created nodeset after unlinking it from
13169 * the pseudo sibling.
13182 while (cur != NULL) {
13183 cur->parent = NULL;
13187 xmlUnlinkNode(fake);
13191 if (ret != XML_ERR_OK) {
13192 xmlFreeNodeList(*lst);
13196 if (doc->dict != NULL)
13198 xmlFreeParserCtxt(ctxt);
13202 return(XML_ERR_INTERNAL_ERROR);
13206 #ifdef LIBXML_SAX1_ENABLED
13208 * xmlParseBalancedChunkMemoryRecover:
13209 * @doc: the document the chunk pertains to
13210 * @sax: the SAX handler bloc (possibly NULL)
13211 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13212 * @depth: Used for loop detection, use 0
13213 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13214 * @lst: the return value for the set of parsed nodes
13215 * @recover: return nodes even if the data is broken (use 0)
13218 * Parse a well-balanced chunk of an XML document
13219 * called by the parser
13220 * The allowed sequence for the Well Balanced Chunk is the one defined by
13221 * the content production in the XML grammar:
13223 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13225 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13226 * the parser error code otherwise
13228 * In case recover is set to 1, the nodelist will not be empty even if
13229 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13233 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13234 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13236 xmlParserCtxtPtr ctxt;
13238 xmlSAXHandlerPtr oldsax = NULL;
13239 xmlNodePtr content, newRoot;
13244 return(XML_ERR_ENTITY_LOOP);
13250 if (string == NULL)
13253 size = xmlStrlen(string);
13255 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13256 if (ctxt == NULL) return(-1);
13257 ctxt->userData = ctxt;
13259 oldsax = ctxt->sax;
13261 if (user_data != NULL)
13262 ctxt->userData = user_data;
13264 newDoc = xmlNewDoc(BAD_CAST "1.0");
13265 if (newDoc == NULL) {
13266 xmlFreeParserCtxt(ctxt);
13269 newDoc->properties = XML_DOC_INTERNAL;
13270 if ((doc != NULL) && (doc->dict != NULL)) {
13271 xmlDictFree(ctxt->dict);
13272 ctxt->dict = doc->dict;
13273 xmlDictReference(ctxt->dict);
13274 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13275 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13276 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13277 ctxt->dictNames = 1;
13279 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13282 newDoc->intSubset = doc->intSubset;
13283 newDoc->extSubset = doc->extSubset;
13285 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13286 if (newRoot == NULL) {
13288 ctxt->sax = oldsax;
13289 xmlFreeParserCtxt(ctxt);
13290 newDoc->intSubset = NULL;
13291 newDoc->extSubset = NULL;
13292 xmlFreeDoc(newDoc);
13295 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13296 nodePush(ctxt, newRoot);
13298 ctxt->myDoc = newDoc;
13300 ctxt->myDoc = newDoc;
13301 newDoc->children->doc = doc;
13302 /* Ensure that doc has XML spec namespace */
13303 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13304 newDoc->oldNs = doc->oldNs;
13306 ctxt->instate = XML_PARSER_CONTENT;
13307 ctxt->depth = depth;
13310 * Doing validity checking on chunk doesn't make sense
13312 ctxt->validate = 0;
13313 ctxt->loadsubset = 0;
13314 xmlDetectSAX2(ctxt);
13316 if ( doc != NULL ){
13317 content = doc->children;
13318 doc->children = NULL;
13319 xmlParseContent(ctxt);
13320 doc->children = content;
13323 xmlParseContent(ctxt);
13325 if ((RAW == '<') && (NXT(1) == '/')) {
13326 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13327 } else if (RAW != 0) {
13328 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13330 if (ctxt->node != newDoc->children) {
13331 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13334 if (!ctxt->wellFormed) {
13335 if (ctxt->errNo == 0)
13343 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13347 * Return the newly created nodeset after unlinking it from
13348 * they pseudo parent.
13350 cur = newDoc->children->children;
13352 while (cur != NULL) {
13353 xmlSetTreeDoc(cur, doc);
13354 cur->parent = NULL;
13357 newDoc->children->children = NULL;
13361 ctxt->sax = oldsax;
13362 xmlFreeParserCtxt(ctxt);
13363 newDoc->intSubset = NULL;
13364 newDoc->extSubset = NULL;
13365 newDoc->oldNs = NULL;
13366 xmlFreeDoc(newDoc);
13372 * xmlSAXParseEntity:
13373 * @sax: the SAX handler block
13374 * @filename: the filename
13376 * parse an XML external entity out of context and build a tree.
13377 * It use the given SAX function block to handle the parsing callback.
13378 * If sax is NULL, fallback to the default DOM tree building routines.
13380 * [78] extParsedEnt ::= TextDecl? content
13382 * This correspond to a "Well Balanced" chunk
13384 * Returns the resulting document tree
13388 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13390 xmlParserCtxtPtr ctxt;
13392 ctxt = xmlCreateFileParserCtxt(filename);
13393 if (ctxt == NULL) {
13397 if (ctxt->sax != NULL)
13398 xmlFree(ctxt->sax);
13400 ctxt->userData = NULL;
13403 xmlParseExtParsedEnt(ctxt);
13405 if (ctxt->wellFormed)
13409 xmlFreeDoc(ctxt->myDoc);
13410 ctxt->myDoc = NULL;
13414 xmlFreeParserCtxt(ctxt);
13421 * @filename: the filename
13423 * parse an XML external entity out of context and build a tree.
13425 * [78] extParsedEnt ::= TextDecl? content
13427 * This correspond to a "Well Balanced" chunk
13429 * Returns the resulting document tree
13433 xmlParseEntity(const char *filename) {
13434 return(xmlSAXParseEntity(NULL, filename));
13436 #endif /* LIBXML_SAX1_ENABLED */
13439 * xmlCreateEntityParserCtxtInternal:
13440 * @URL: the entity URL
13441 * @ID: the entity PUBLIC ID
13442 * @base: a possible base for the target URI
13443 * @pctx: parser context used to set options on new context
13445 * Create a parser context for an external entity
13446 * Automatic support for ZLIB/Compress compressed document is provided
13447 * by default if found at compile-time.
13449 * Returns the new parser context or NULL
13451 static xmlParserCtxtPtr
13452 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13453 const xmlChar *base, xmlParserCtxtPtr pctx) {
13454 xmlParserCtxtPtr ctxt;
13455 xmlParserInputPtr inputStream;
13456 char *directory = NULL;
13459 ctxt = xmlNewParserCtxt();
13460 if (ctxt == NULL) {
13464 if (pctx != NULL) {
13465 ctxt->options = pctx->options;
13466 ctxt->_private = pctx->_private;
13469 uri = xmlBuildURI(URL, base);
13472 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13473 if (inputStream == NULL) {
13474 xmlFreeParserCtxt(ctxt);
13478 inputPush(ctxt, inputStream);
13480 if ((ctxt->directory == NULL) && (directory == NULL))
13481 directory = xmlParserGetDirectory((char *)URL);
13482 if ((ctxt->directory == NULL) && (directory != NULL))
13483 ctxt->directory = directory;
13485 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13486 if (inputStream == NULL) {
13488 xmlFreeParserCtxt(ctxt);
13492 inputPush(ctxt, inputStream);
13494 if ((ctxt->directory == NULL) && (directory == NULL))
13495 directory = xmlParserGetDirectory((char *)uri);
13496 if ((ctxt->directory == NULL) && (directory != NULL))
13497 ctxt->directory = directory;
13504 * xmlCreateEntityParserCtxt:
13505 * @URL: the entity URL
13506 * @ID: the entity PUBLIC ID
13507 * @base: a possible base for the target URI
13509 * Create a parser context for an external entity
13510 * Automatic support for ZLIB/Compress compressed document is provided
13511 * by default if found at compile-time.
13513 * Returns the new parser context or NULL
13516 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13517 const xmlChar *base) {
13518 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13522 /************************************************************************
13524 * Front ends when parsing from a file *
13526 ************************************************************************/
13529 * xmlCreateURLParserCtxt:
13530 * @filename: the filename or URL
13531 * @options: a combination of xmlParserOption
13533 * Create a parser context for a file or URL content.
13534 * Automatic support for ZLIB/Compress compressed document is provided
13535 * by default if found at compile-time and for file accesses
13537 * Returns the new parser context or NULL
13540 xmlCreateURLParserCtxt(const char *filename, int options)
13542 xmlParserCtxtPtr ctxt;
13543 xmlParserInputPtr inputStream;
13544 char *directory = NULL;
13546 ctxt = xmlNewParserCtxt();
13547 if (ctxt == NULL) {
13548 xmlErrMemory(NULL, "cannot allocate parser context");
13553 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13554 ctxt->linenumbers = 1;
13556 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13557 if (inputStream == NULL) {
13558 xmlFreeParserCtxt(ctxt);
13562 inputPush(ctxt, inputStream);
13563 if ((ctxt->directory == NULL) && (directory == NULL))
13564 directory = xmlParserGetDirectory(filename);
13565 if ((ctxt->directory == NULL) && (directory != NULL))
13566 ctxt->directory = directory;
13572 * xmlCreateFileParserCtxt:
13573 * @filename: the filename
13575 * Create a parser context for a file content.
13576 * Automatic support for ZLIB/Compress compressed document is provided
13577 * by default if found at compile-time.
13579 * Returns the new parser context or NULL
13582 xmlCreateFileParserCtxt(const char *filename)
13584 return(xmlCreateURLParserCtxt(filename, 0));
13587 #ifdef LIBXML_SAX1_ENABLED
13589 * xmlSAXParseFileWithData:
13590 * @sax: the SAX handler block
13591 * @filename: the filename
13592 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13594 * @data: the userdata
13596 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13597 * compressed document is provided by default if found at compile-time.
13598 * It use the given SAX function block to handle the parsing callback.
13599 * If sax is NULL, fallback to the default DOM tree building routines.
13601 * User data (void *) is stored within the parser context in the
13602 * context's _private member, so it is available nearly everywhere in libxml
13604 * Returns the resulting document tree
13608 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13609 int recovery, void *data) {
13611 xmlParserCtxtPtr ctxt;
13615 ctxt = xmlCreateFileParserCtxt(filename);
13616 if (ctxt == NULL) {
13620 if (ctxt->sax != NULL)
13621 xmlFree(ctxt->sax);
13624 xmlDetectSAX2(ctxt);
13626 ctxt->_private = data;
13629 if (ctxt->directory == NULL)
13630 ctxt->directory = xmlParserGetDirectory(filename);
13632 ctxt->recovery = recovery;
13634 xmlParseDocument(ctxt);
13636 if ((ctxt->wellFormed) || recovery) {
13639 if (ctxt->input->buf->compressed > 0)
13640 ret->compression = 9;
13642 ret->compression = ctxt->input->buf->compressed;
13647 xmlFreeDoc(ctxt->myDoc);
13648 ctxt->myDoc = NULL;
13652 xmlFreeParserCtxt(ctxt);
13659 * @sax: the SAX handler block
13660 * @filename: the filename
13661 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13664 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13665 * compressed document is provided by default if found at compile-time.
13666 * It use the given SAX function block to handle the parsing callback.
13667 * If sax is NULL, fallback to the default DOM tree building routines.
13669 * Returns the resulting document tree
13673 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13675 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13680 * @cur: a pointer to an array of xmlChar
13682 * parse an XML in-memory document and build a tree.
13683 * In the case the document is not Well Formed, a attempt to build a
13684 * tree is tried anyway
13686 * Returns the resulting document tree or NULL in case of failure
13690 xmlRecoverDoc(const xmlChar *cur) {
13691 return(xmlSAXParseDoc(NULL, cur, 1));
13696 * @filename: the filename
13698 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13699 * compressed document is provided by default if found at compile-time.
13701 * Returns the resulting document tree if the file was wellformed,
13706 xmlParseFile(const char *filename) {
13707 return(xmlSAXParseFile(NULL, filename, 0));
13712 * @filename: the filename
13714 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13715 * compressed document is provided by default if found at compile-time.
13716 * In the case the document is not Well Formed, it attempts to build
13719 * Returns the resulting document tree or NULL in case of failure
13723 xmlRecoverFile(const char *filename) {
13724 return(xmlSAXParseFile(NULL, filename, 1));
13729 * xmlSetupParserForBuffer:
13730 * @ctxt: an XML parser context
13731 * @buffer: a xmlChar * buffer
13732 * @filename: a file name
13734 * Setup the parser context to parse a new buffer; Clears any prior
13735 * contents from the parser context. The buffer parameter must not be
13736 * NULL, but the filename parameter can be
13739 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13740 const char* filename)
13742 xmlParserInputPtr input;
13744 if ((ctxt == NULL) || (buffer == NULL))
13747 input = xmlNewInputStream(ctxt);
13748 if (input == NULL) {
13749 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13750 xmlClearParserCtxt(ctxt);
13754 xmlClearParserCtxt(ctxt);
13755 if (filename != NULL)
13756 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13757 input->base = buffer;
13758 input->cur = buffer;
13759 input->end = &buffer[xmlStrlen(buffer)];
13760 inputPush(ctxt, input);
13764 * xmlSAXUserParseFile:
13765 * @sax: a SAX handler
13766 * @user_data: The user data returned on SAX callbacks
13767 * @filename: a file name
13769 * parse an XML file and call the given SAX handler routines.
13770 * Automatic support for ZLIB/Compress compressed document is provided
13772 * Returns 0 in case of success or a error number otherwise
13775 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13776 const char *filename) {
13778 xmlParserCtxtPtr ctxt;
13780 ctxt = xmlCreateFileParserCtxt(filename);
13781 if (ctxt == NULL) return -1;
13782 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13783 xmlFree(ctxt->sax);
13785 xmlDetectSAX2(ctxt);
13787 if (user_data != NULL)
13788 ctxt->userData = user_data;
13790 xmlParseDocument(ctxt);
13792 if (ctxt->wellFormed)
13795 if (ctxt->errNo != 0)
13802 if (ctxt->myDoc != NULL) {
13803 xmlFreeDoc(ctxt->myDoc);
13804 ctxt->myDoc = NULL;
13806 xmlFreeParserCtxt(ctxt);
13810 #endif /* LIBXML_SAX1_ENABLED */
13812 /************************************************************************
13814 * Front ends when parsing from memory *
13816 ************************************************************************/
13819 * xmlCreateMemoryParserCtxt:
13820 * @buffer: a pointer to a char array
13821 * @size: the size of the array
13823 * Create a parser context for an XML in-memory document.
13825 * Returns the new parser context or NULL
13828 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13829 xmlParserCtxtPtr ctxt;
13830 xmlParserInputPtr input;
13831 xmlParserInputBufferPtr buf;
13833 if (buffer == NULL)
13838 ctxt = xmlNewParserCtxt();
13842 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
13843 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13845 xmlFreeParserCtxt(ctxt);
13849 input = xmlNewInputStream(ctxt);
13850 if (input == NULL) {
13851 xmlFreeParserInputBuffer(buf);
13852 xmlFreeParserCtxt(ctxt);
13856 input->filename = NULL;
13858 input->base = input->buf->buffer->content;
13859 input->cur = input->buf->buffer->content;
13860 input->end = &input->buf->buffer->content[input->buf->buffer->use];
13862 inputPush(ctxt, input);
13866 #ifdef LIBXML_SAX1_ENABLED
13868 * xmlSAXParseMemoryWithData:
13869 * @sax: the SAX handler block
13870 * @buffer: an pointer to a char array
13871 * @size: the size of the array
13872 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13874 * @data: the userdata
13876 * parse an XML in-memory block and use the given SAX function block
13877 * to handle the parsing callback. If sax is NULL, fallback to the default
13878 * DOM tree building routines.
13880 * User data (void *) is stored within the parser context in the
13881 * context's _private member, so it is available nearly everywhere in libxml
13883 * Returns the resulting document tree
13887 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13888 int size, int recovery, void *data) {
13890 xmlParserCtxtPtr ctxt;
13894 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13895 if (ctxt == NULL) return(NULL);
13897 if (ctxt->sax != NULL)
13898 xmlFree(ctxt->sax);
13901 xmlDetectSAX2(ctxt);
13903 ctxt->_private=data;
13906 ctxt->recovery = recovery;
13908 xmlParseDocument(ctxt);
13910 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13913 xmlFreeDoc(ctxt->myDoc);
13914 ctxt->myDoc = NULL;
13918 xmlFreeParserCtxt(ctxt);
13924 * xmlSAXParseMemory:
13925 * @sax: the SAX handler block
13926 * @buffer: an pointer to a char array
13927 * @size: the size of the array
13928 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13931 * parse an XML in-memory block and use the given SAX function block
13932 * to handle the parsing callback. If sax is NULL, fallback to the default
13933 * DOM tree building routines.
13935 * Returns the resulting document tree
13938 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13939 int size, int recovery) {
13940 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13945 * @buffer: an pointer to a char array
13946 * @size: the size of the array
13948 * parse an XML in-memory block and build a tree.
13950 * Returns the resulting document tree
13953 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13954 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13958 * xmlRecoverMemory:
13959 * @buffer: an pointer to a char array
13960 * @size: the size of the array
13962 * parse an XML in-memory block and build a tree.
13963 * In the case the document is not Well Formed, an attempt to
13964 * build a tree is tried anyway
13966 * Returns the resulting document tree or NULL in case of error
13969 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13970 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13974 * xmlSAXUserParseMemory:
13975 * @sax: a SAX handler
13976 * @user_data: The user data returned on SAX callbacks
13977 * @buffer: an in-memory XML document input
13978 * @size: the length of the XML document in bytes
13980 * A better SAX parsing routine.
13981 * parse an XML in-memory buffer and call the given SAX handler routines.
13983 * Returns 0 in case of success or a error number otherwise
13985 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13986 const char *buffer, int size) {
13988 xmlParserCtxtPtr ctxt;
13992 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13993 if (ctxt == NULL) return -1;
13994 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13995 xmlFree(ctxt->sax);
13997 xmlDetectSAX2(ctxt);
13999 if (user_data != NULL)
14000 ctxt->userData = user_data;
14002 xmlParseDocument(ctxt);
14004 if (ctxt->wellFormed)
14007 if (ctxt->errNo != 0)
14014 if (ctxt->myDoc != NULL) {
14015 xmlFreeDoc(ctxt->myDoc);
14016 ctxt->myDoc = NULL;
14018 xmlFreeParserCtxt(ctxt);
14022 #endif /* LIBXML_SAX1_ENABLED */
14025 * xmlCreateDocParserCtxt:
14026 * @cur: a pointer to an array of xmlChar
14028 * Creates a parser context for an XML in-memory document.
14030 * Returns the new parser context or NULL
14033 xmlCreateDocParserCtxt(const xmlChar *cur) {
14038 len = xmlStrlen(cur);
14039 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14042 #ifdef LIBXML_SAX1_ENABLED
14045 * @sax: the SAX handler block
14046 * @cur: a pointer to an array of xmlChar
14047 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14050 * parse an XML in-memory document and build a tree.
14051 * It use the given SAX function block to handle the parsing callback.
14052 * If sax is NULL, fallback to the default DOM tree building routines.
14054 * Returns the resulting document tree
14058 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14060 xmlParserCtxtPtr ctxt;
14061 xmlSAXHandlerPtr oldsax = NULL;
14063 if (cur == NULL) return(NULL);
14066 ctxt = xmlCreateDocParserCtxt(cur);
14067 if (ctxt == NULL) return(NULL);
14069 oldsax = ctxt->sax;
14071 ctxt->userData = NULL;
14073 xmlDetectSAX2(ctxt);
14075 xmlParseDocument(ctxt);
14076 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14079 xmlFreeDoc(ctxt->myDoc);
14080 ctxt->myDoc = NULL;
14083 ctxt->sax = oldsax;
14084 xmlFreeParserCtxt(ctxt);
14091 * @cur: a pointer to an array of xmlChar
14093 * parse an XML in-memory document and build a tree.
14095 * Returns the resulting document tree
14099 xmlParseDoc(const xmlChar *cur) {
14100 return(xmlSAXParseDoc(NULL, cur, 0));
14102 #endif /* LIBXML_SAX1_ENABLED */
14104 #ifdef LIBXML_LEGACY_ENABLED
14105 /************************************************************************
14107 * Specific function to keep track of entities references *
14108 * and used by the XSLT debugger *
14110 ************************************************************************/
14112 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14115 * xmlAddEntityReference:
14116 * @ent : A valid entity
14117 * @firstNode : A valid first node for children of entity
14118 * @lastNode : A valid last node of children entity
14120 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14123 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14124 xmlNodePtr lastNode)
14126 if (xmlEntityRefFunc != NULL) {
14127 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14133 * xmlSetEntityReferenceFunc:
14134 * @func: A valid function
14136 * Set the function to call call back when a xml reference has been made
14139 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14141 xmlEntityRefFunc = func;
14143 #endif /* LIBXML_LEGACY_ENABLED */
14145 /************************************************************************
14149 ************************************************************************/
14151 #ifdef LIBXML_XPATH_ENABLED
14152 #include <libxml/xpath.h>
14155 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14156 static int xmlParserInitialized = 0;
14161 * Initialization function for the XML parser.
14162 * This is not reentrant. Call once before processing in case of
14163 * use in multithreaded programs.
14167 xmlInitParser(void) {
14168 if (xmlParserInitialized != 0)
14171 #ifdef LIBXML_THREAD_ENABLED
14172 __xmlGlobalInitMutexLock();
14173 if (xmlParserInitialized == 0) {
14177 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14178 (xmlGenericError == NULL))
14179 initGenericErrorDefaultFunc(NULL);
14181 xmlInitializeDict();
14182 xmlInitCharEncodingHandlers();
14183 xmlDefaultSAXHandlerInit();
14184 xmlRegisterDefaultInputCallbacks();
14185 #ifdef LIBXML_OUTPUT_ENABLED
14186 xmlRegisterDefaultOutputCallbacks();
14187 #endif /* LIBXML_OUTPUT_ENABLED */
14188 #ifdef LIBXML_HTML_ENABLED
14189 htmlInitAutoClose();
14190 htmlDefaultSAXHandlerInit();
14192 #ifdef LIBXML_XPATH_ENABLED
14195 xmlParserInitialized = 1;
14196 #ifdef LIBXML_THREAD_ENABLED
14198 __xmlGlobalInitMutexUnlock();
14203 * xmlCleanupParser:
14205 * This function name is somewhat misleading. It does not clean up
14206 * parser state, it cleans up memory allocated by the library itself.
14207 * It is a cleanup function for the XML library. It tries to reclaim all
14208 * related global memory allocated for the library processing.
14209 * It doesn't deallocate any document related memory. One should
14210 * call xmlCleanupParser() only when the process has finished using
14211 * the library and all XML/HTML documents built with it.
14212 * See also xmlInitParser() which has the opposite function of preparing
14213 * the library for operations.
14215 * WARNING: if your application is multithreaded or has plugin support
14216 * calling this may crash the application if another thread or
14217 * a plugin is still using libxml2. It's sometimes very hard to
14218 * guess if libxml2 is in use in the application, some libraries
14219 * or plugins may use it without notice. In case of doubt abstain
14220 * from calling this function or do it just before calling exit()
14221 * to avoid leak reports from valgrind !
14225 xmlCleanupParser(void) {
14226 if (!xmlParserInitialized)
14229 xmlCleanupCharEncodingHandlers();
14230 #ifdef LIBXML_CATALOG_ENABLED
14231 xmlCatalogCleanup();
14234 xmlCleanupInputCallbacks();
14235 #ifdef LIBXML_OUTPUT_ENABLED
14236 xmlCleanupOutputCallbacks();
14238 #ifdef LIBXML_SCHEMAS_ENABLED
14239 xmlSchemaCleanupTypes();
14240 xmlRelaxNGCleanupTypes();
14242 xmlCleanupGlobals();
14243 xmlResetLastError();
14244 xmlCleanupThreads(); /* must be last if called not from the main thread */
14245 xmlCleanupMemory();
14246 xmlParserInitialized = 0;
14249 /************************************************************************
14251 * New set (2.6.0) of simpler and more flexible APIs *
14253 ************************************************************************/
14259 * Free a string if it is not owned by the "dict" dictionnary in the
14262 #define DICT_FREE(str) \
14263 if ((str) && ((!dict) || \
14264 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14265 xmlFree((char *)(str));
14269 * @ctxt: an XML parser context
14271 * Reset a parser context
14274 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14276 xmlParserInputPtr input;
14284 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14285 xmlFreeInputStream(input);
14288 ctxt->input = NULL;
14291 if (ctxt->spaceTab != NULL) {
14292 ctxt->spaceTab[0] = -1;
14293 ctxt->space = &ctxt->spaceTab[0];
14295 ctxt->space = NULL;
14305 DICT_FREE(ctxt->version);
14306 ctxt->version = NULL;
14307 DICT_FREE(ctxt->encoding);
14308 ctxt->encoding = NULL;
14309 DICT_FREE(ctxt->directory);
14310 ctxt->directory = NULL;
14311 DICT_FREE(ctxt->extSubURI);
14312 ctxt->extSubURI = NULL;
14313 DICT_FREE(ctxt->extSubSystem);
14314 ctxt->extSubSystem = NULL;
14315 if (ctxt->myDoc != NULL)
14316 xmlFreeDoc(ctxt->myDoc);
14317 ctxt->myDoc = NULL;
14319 ctxt->standalone = -1;
14320 ctxt->hasExternalSubset = 0;
14321 ctxt->hasPErefs = 0;
14323 ctxt->external = 0;
14324 ctxt->instate = XML_PARSER_START;
14327 ctxt->wellFormed = 1;
14328 ctxt->nsWellFormed = 1;
14329 ctxt->disableSAX = 0;
14332 ctxt->vctxt.userData = ctxt;
14333 ctxt->vctxt.error = xmlParserValidityError;
14334 ctxt->vctxt.warning = xmlParserValidityWarning;
14336 ctxt->record_info = 0;
14338 ctxt->checkIndex = 0;
14339 ctxt->inSubset = 0;
14340 ctxt->errNo = XML_ERR_OK;
14342 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14343 ctxt->catalogs = NULL;
14344 ctxt->nbentities = 0;
14345 ctxt->sizeentities = 0;
14346 xmlInitNodeInfoSeq(&ctxt->node_seq);
14348 if (ctxt->attsDefault != NULL) {
14349 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14350 ctxt->attsDefault = NULL;
14352 if (ctxt->attsSpecial != NULL) {
14353 xmlHashFree(ctxt->attsSpecial, NULL);
14354 ctxt->attsSpecial = NULL;
14357 #ifdef LIBXML_CATALOG_ENABLED
14358 if (ctxt->catalogs != NULL)
14359 xmlCatalogFreeLocal(ctxt->catalogs);
14361 if (ctxt->lastError.code != XML_ERR_OK)
14362 xmlResetError(&ctxt->lastError);
14366 * xmlCtxtResetPush:
14367 * @ctxt: an XML parser context
14368 * @chunk: a pointer to an array of chars
14369 * @size: number of chars in the array
14370 * @filename: an optional file name or URI
14371 * @encoding: the document encoding, or NULL
14373 * Reset a push parser context
14375 * Returns 0 in case of success and 1 in case of error
14378 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14379 int size, const char *filename, const char *encoding)
14381 xmlParserInputPtr inputStream;
14382 xmlParserInputBufferPtr buf;
14383 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14388 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14389 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14391 buf = xmlAllocParserInputBuffer(enc);
14395 if (ctxt == NULL) {
14396 xmlFreeParserInputBuffer(buf);
14400 xmlCtxtReset(ctxt);
14402 if (ctxt->pushTab == NULL) {
14403 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14404 sizeof(xmlChar *));
14405 if (ctxt->pushTab == NULL) {
14406 xmlErrMemory(ctxt, NULL);
14407 xmlFreeParserInputBuffer(buf);
14412 if (filename == NULL) {
14413 ctxt->directory = NULL;
14415 ctxt->directory = xmlParserGetDirectory(filename);
14418 inputStream = xmlNewInputStream(ctxt);
14419 if (inputStream == NULL) {
14420 xmlFreeParserInputBuffer(buf);
14424 if (filename == NULL)
14425 inputStream->filename = NULL;
14427 inputStream->filename = (char *)
14428 xmlCanonicPath((const xmlChar *) filename);
14429 inputStream->buf = buf;
14430 inputStream->base = inputStream->buf->buffer->content;
14431 inputStream->cur = inputStream->buf->buffer->content;
14433 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14435 inputPush(ctxt, inputStream);
14437 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14438 (ctxt->input->buf != NULL)) {
14439 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14440 int cur = ctxt->input->cur - ctxt->input->base;
14442 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14444 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14445 ctxt->input->cur = ctxt->input->base + cur;
14447 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14450 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14454 if (encoding != NULL) {
14455 xmlCharEncodingHandlerPtr hdlr;
14457 if (ctxt->encoding != NULL)
14458 xmlFree((xmlChar *) ctxt->encoding);
14459 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14461 hdlr = xmlFindCharEncodingHandler(encoding);
14462 if (hdlr != NULL) {
14463 xmlSwitchToEncoding(ctxt, hdlr);
14465 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14466 "Unsupported encoding %s\n", BAD_CAST encoding);
14468 } else if (enc != XML_CHAR_ENCODING_NONE) {
14469 xmlSwitchEncoding(ctxt, enc);
14477 * xmlCtxtUseOptionsInternal:
14478 * @ctxt: an XML parser context
14479 * @options: a combination of xmlParserOption
14480 * @encoding: the user provided encoding to use
14482 * Applies the options to the parser context
14484 * Returns 0 in case of success, the set of unknown or unimplemented options
14485 * in case of error.
14488 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14492 if (encoding != NULL) {
14493 if (ctxt->encoding != NULL)
14494 xmlFree((xmlChar *) ctxt->encoding);
14495 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14497 if (options & XML_PARSE_RECOVER) {
14498 ctxt->recovery = 1;
14499 options -= XML_PARSE_RECOVER;
14500 ctxt->options |= XML_PARSE_RECOVER;
14502 ctxt->recovery = 0;
14503 if (options & XML_PARSE_DTDLOAD) {
14504 ctxt->loadsubset = XML_DETECT_IDS;
14505 options -= XML_PARSE_DTDLOAD;
14506 ctxt->options |= XML_PARSE_DTDLOAD;
14508 ctxt->loadsubset = 0;
14509 if (options & XML_PARSE_DTDATTR) {
14510 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14511 options -= XML_PARSE_DTDATTR;
14512 ctxt->options |= XML_PARSE_DTDATTR;
14514 if (options & XML_PARSE_NOENT) {
14515 ctxt->replaceEntities = 1;
14516 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14517 options -= XML_PARSE_NOENT;
14518 ctxt->options |= XML_PARSE_NOENT;
14520 ctxt->replaceEntities = 0;
14521 if (options & XML_PARSE_PEDANTIC) {
14522 ctxt->pedantic = 1;
14523 options -= XML_PARSE_PEDANTIC;
14524 ctxt->options |= XML_PARSE_PEDANTIC;
14526 ctxt->pedantic = 0;
14527 if (options & XML_PARSE_NOBLANKS) {
14528 ctxt->keepBlanks = 0;
14529 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14530 options -= XML_PARSE_NOBLANKS;
14531 ctxt->options |= XML_PARSE_NOBLANKS;
14533 ctxt->keepBlanks = 1;
14534 if (options & XML_PARSE_DTDVALID) {
14535 ctxt->validate = 1;
14536 if (options & XML_PARSE_NOWARNING)
14537 ctxt->vctxt.warning = NULL;
14538 if (options & XML_PARSE_NOERROR)
14539 ctxt->vctxt.error = NULL;
14540 options -= XML_PARSE_DTDVALID;
14541 ctxt->options |= XML_PARSE_DTDVALID;
14543 ctxt->validate = 0;
14544 if (options & XML_PARSE_NOWARNING) {
14545 ctxt->sax->warning = NULL;
14546 options -= XML_PARSE_NOWARNING;
14548 if (options & XML_PARSE_NOERROR) {
14549 ctxt->sax->error = NULL;
14550 ctxt->sax->fatalError = NULL;
14551 options -= XML_PARSE_NOERROR;
14553 #ifdef LIBXML_SAX1_ENABLED
14554 if (options & XML_PARSE_SAX1) {
14555 ctxt->sax->startElement = xmlSAX2StartElement;
14556 ctxt->sax->endElement = xmlSAX2EndElement;
14557 ctxt->sax->startElementNs = NULL;
14558 ctxt->sax->endElementNs = NULL;
14559 ctxt->sax->initialized = 1;
14560 options -= XML_PARSE_SAX1;
14561 ctxt->options |= XML_PARSE_SAX1;
14563 #endif /* LIBXML_SAX1_ENABLED */
14564 if (options & XML_PARSE_NODICT) {
14565 ctxt->dictNames = 0;
14566 options -= XML_PARSE_NODICT;
14567 ctxt->options |= XML_PARSE_NODICT;
14569 ctxt->dictNames = 1;
14571 if (options & XML_PARSE_NOCDATA) {
14572 ctxt->sax->cdataBlock = NULL;
14573 options -= XML_PARSE_NOCDATA;
14574 ctxt->options |= XML_PARSE_NOCDATA;
14576 if (options & XML_PARSE_NSCLEAN) {
14577 ctxt->options |= XML_PARSE_NSCLEAN;
14578 options -= XML_PARSE_NSCLEAN;
14580 if (options & XML_PARSE_NONET) {
14581 ctxt->options |= XML_PARSE_NONET;
14582 options -= XML_PARSE_NONET;
14584 if (options & XML_PARSE_COMPACT) {
14585 ctxt->options |= XML_PARSE_COMPACT;
14586 options -= XML_PARSE_COMPACT;
14588 if (options & XML_PARSE_OLD10) {
14589 ctxt->options |= XML_PARSE_OLD10;
14590 options -= XML_PARSE_OLD10;
14592 if (options & XML_PARSE_NOBASEFIX) {
14593 ctxt->options |= XML_PARSE_NOBASEFIX;
14594 options -= XML_PARSE_NOBASEFIX;
14596 if (options & XML_PARSE_HUGE) {
14597 ctxt->options |= XML_PARSE_HUGE;
14598 options -= XML_PARSE_HUGE;
14600 if (options & XML_PARSE_OLDSAX) {
14601 ctxt->options |= XML_PARSE_OLDSAX;
14602 options -= XML_PARSE_OLDSAX;
14604 if (options & XML_PARSE_IGNORE_ENC) {
14605 ctxt->options |= XML_PARSE_IGNORE_ENC;
14606 options -= XML_PARSE_IGNORE_ENC;
14608 ctxt->linenumbers = 1;
14613 * xmlCtxtUseOptions:
14614 * @ctxt: an XML parser context
14615 * @options: a combination of xmlParserOption
14617 * Applies the options to the parser context
14619 * Returns 0 in case of success, the set of unknown or unimplemented options
14620 * in case of error.
14623 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14625 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14630 * @ctxt: an XML parser context
14631 * @URL: the base URL to use for the document
14632 * @encoding: the document encoding, or NULL
14633 * @options: a combination of xmlParserOption
14634 * @reuse: keep the context for reuse
14636 * Common front-end for the xmlRead functions
14638 * Returns the resulting document tree or NULL
14641 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14642 int options, int reuse)
14646 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14647 if (encoding != NULL) {
14648 xmlCharEncodingHandlerPtr hdlr;
14650 hdlr = xmlFindCharEncodingHandler(encoding);
14652 xmlSwitchToEncoding(ctxt, hdlr);
14654 if ((URL != NULL) && (ctxt->input != NULL) &&
14655 (ctxt->input->filename == NULL))
14656 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14657 xmlParseDocument(ctxt);
14658 if ((ctxt->wellFormed) || ctxt->recovery)
14662 if (ctxt->myDoc != NULL) {
14663 xmlFreeDoc(ctxt->myDoc);
14666 ctxt->myDoc = NULL;
14668 xmlFreeParserCtxt(ctxt);
14676 * @cur: a pointer to a zero terminated string
14677 * @URL: the base URL to use for the document
14678 * @encoding: the document encoding, or NULL
14679 * @options: a combination of xmlParserOption
14681 * parse an XML in-memory document and build a tree.
14683 * Returns the resulting document tree
14686 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14688 xmlParserCtxtPtr ctxt;
14693 ctxt = xmlCreateDocParserCtxt(cur);
14696 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14701 * @filename: a file or URL
14702 * @encoding: the document encoding, or NULL
14703 * @options: a combination of xmlParserOption
14705 * parse an XML file from the filesystem or the network.
14707 * Returns the resulting document tree
14710 xmlReadFile(const char *filename, const char *encoding, int options)
14712 xmlParserCtxtPtr ctxt;
14714 ctxt = xmlCreateURLParserCtxt(filename, options);
14717 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14722 * @buffer: a pointer to a char array
14723 * @size: the size of the array
14724 * @URL: the base URL to use for the document
14725 * @encoding: the document encoding, or NULL
14726 * @options: a combination of xmlParserOption
14728 * parse an XML in-memory document and build a tree.
14730 * Returns the resulting document tree
14733 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14735 xmlParserCtxtPtr ctxt;
14737 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14740 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14745 * @fd: an open file descriptor
14746 * @URL: the base URL to use for the document
14747 * @encoding: the document encoding, or NULL
14748 * @options: a combination of xmlParserOption
14750 * parse an XML from a file descriptor and build a tree.
14751 * NOTE that the file descriptor will not be closed when the
14752 * reader is closed or reset.
14754 * Returns the resulting document tree
14757 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14759 xmlParserCtxtPtr ctxt;
14760 xmlParserInputBufferPtr input;
14761 xmlParserInputPtr stream;
14766 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14769 input->closecallback = NULL;
14770 ctxt = xmlNewParserCtxt();
14771 if (ctxt == NULL) {
14772 xmlFreeParserInputBuffer(input);
14775 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14776 if (stream == NULL) {
14777 xmlFreeParserInputBuffer(input);
14778 xmlFreeParserCtxt(ctxt);
14781 inputPush(ctxt, stream);
14782 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14787 * @ioread: an I/O read function
14788 * @ioclose: an I/O close function
14789 * @ioctx: an I/O handler
14790 * @URL: the base URL to use for the document
14791 * @encoding: the document encoding, or NULL
14792 * @options: a combination of xmlParserOption
14794 * parse an XML document from I/O functions and source and build a tree.
14796 * Returns the resulting document tree
14799 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14800 void *ioctx, const char *URL, const char *encoding, int options)
14802 xmlParserCtxtPtr ctxt;
14803 xmlParserInputBufferPtr input;
14804 xmlParserInputPtr stream;
14806 if (ioread == NULL)
14809 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14810 XML_CHAR_ENCODING_NONE);
14811 if (input == NULL) {
14812 if (ioclose != NULL)
14816 ctxt = xmlNewParserCtxt();
14817 if (ctxt == NULL) {
14818 xmlFreeParserInputBuffer(input);
14821 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14822 if (stream == NULL) {
14823 xmlFreeParserInputBuffer(input);
14824 xmlFreeParserCtxt(ctxt);
14827 inputPush(ctxt, stream);
14828 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14833 * @ctxt: an XML parser context
14834 * @cur: a pointer to a zero terminated string
14835 * @URL: the base URL to use for the document
14836 * @encoding: the document encoding, or NULL
14837 * @options: a combination of xmlParserOption
14839 * parse an XML in-memory document and build a tree.
14840 * This reuses the existing @ctxt parser context
14842 * Returns the resulting document tree
14845 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
14846 const char *URL, const char *encoding, int options)
14848 xmlParserInputPtr stream;
14855 xmlCtxtReset(ctxt);
14857 stream = xmlNewStringInputStream(ctxt, cur);
14858 if (stream == NULL) {
14861 inputPush(ctxt, stream);
14862 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14867 * @ctxt: an XML parser context
14868 * @filename: a file or URL
14869 * @encoding: the document encoding, or NULL
14870 * @options: a combination of xmlParserOption
14872 * parse an XML file from the filesystem or the network.
14873 * This reuses the existing @ctxt parser context
14875 * Returns the resulting document tree
14878 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14879 const char *encoding, int options)
14881 xmlParserInputPtr stream;
14883 if (filename == NULL)
14888 xmlCtxtReset(ctxt);
14890 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
14891 if (stream == NULL) {
14894 inputPush(ctxt, stream);
14895 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
14899 * xmlCtxtReadMemory:
14900 * @ctxt: an XML parser context
14901 * @buffer: a pointer to a char array
14902 * @size: the size of the array
14903 * @URL: the base URL to use for the document
14904 * @encoding: the document encoding, or NULL
14905 * @options: a combination of xmlParserOption
14907 * parse an XML in-memory document and build a tree.
14908 * This reuses the existing @ctxt parser context
14910 * Returns the resulting document tree
14913 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14914 const char *URL, const char *encoding, int options)
14916 xmlParserInputBufferPtr input;
14917 xmlParserInputPtr stream;
14921 if (buffer == NULL)
14924 xmlCtxtReset(ctxt);
14926 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14927 if (input == NULL) {
14931 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14932 if (stream == NULL) {
14933 xmlFreeParserInputBuffer(input);
14937 inputPush(ctxt, stream);
14938 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14943 * @ctxt: an XML parser context
14944 * @fd: an open file descriptor
14945 * @URL: the base URL to use for the document
14946 * @encoding: the document encoding, or NULL
14947 * @options: a combination of xmlParserOption
14949 * parse an XML from a file descriptor and build a tree.
14950 * This reuses the existing @ctxt parser context
14951 * NOTE that the file descriptor will not be closed when the
14952 * reader is closed or reset.
14954 * Returns the resulting document tree
14957 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14958 const char *URL, const char *encoding, int options)
14960 xmlParserInputBufferPtr input;
14961 xmlParserInputPtr stream;
14968 xmlCtxtReset(ctxt);
14971 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14974 input->closecallback = NULL;
14975 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14976 if (stream == NULL) {
14977 xmlFreeParserInputBuffer(input);
14980 inputPush(ctxt, stream);
14981 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14986 * @ctxt: an XML parser context
14987 * @ioread: an I/O read function
14988 * @ioclose: an I/O close function
14989 * @ioctx: an I/O handler
14990 * @URL: the base URL to use for the document
14991 * @encoding: the document encoding, or NULL
14992 * @options: a combination of xmlParserOption
14994 * parse an XML document from I/O functions and source and build a tree.
14995 * This reuses the existing @ctxt parser context
14997 * Returns the resulting document tree
15000 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15001 xmlInputCloseCallback ioclose, void *ioctx,
15003 const char *encoding, int options)
15005 xmlParserInputBufferPtr input;
15006 xmlParserInputPtr stream;
15008 if (ioread == NULL)
15013 xmlCtxtReset(ctxt);
15015 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15016 XML_CHAR_ENCODING_NONE);
15017 if (input == NULL) {
15018 if (ioclose != NULL)
15022 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15023 if (stream == NULL) {
15024 xmlFreeParserInputBuffer(input);
15027 inputPush(ctxt, stream);
15028 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15031 #define bottom_parser
15032 #include "elfgcchack.h"